Last active
January 22, 2025 21:21
-
-
Save electrocucaracha/addd14dddae246c29356a470272b19d4 to your computer and use it in GitHub Desktop.
Llama Index Local RAG learning
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def add_numbers(a, b): | |
return a + b | |
def subtract_numbers(a, b): | |
return a - b | |
def multiply_numbers(a, b): | |
return a * b | |
def divide_numbers(a, b): | |
if b != 0: | |
return a / b | |
return "Division by zero error" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import chromadb | |
from langchain.prompts import ChatPromptTemplate | |
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex | |
from llama_index.core.node_parser import CodeSplitter | |
from llama_index.core.prompts import LangchainPromptTemplate | |
from llama_index.embeddings.ollama import OllamaEmbedding | |
from llama_index.llms.ollama import Ollama | |
# Global settings | |
Settings.embed_model = OllamaEmbedding(model_name="mxbai-embed-large") | |
Settings.llm = Ollama(model="llama3.2") | |
repo_path = "src/" | |
query = "give me the function that starts with letter 'mul'" | |
# Get code chunks and store them in a ChromaDB instance | |
loader = SimpleDirectoryReader(input_files=["calculator.py"]) | |
documents = loader.load_data() | |
splitter = CodeSplitter(language="python") | |
nodes = splitter.get_nodes_from_documents(documents) | |
index = VectorStoreIndex(nodes) | |
query_engine = index.as_query_engine() | |
# Default QA prompt template | |
response = query_engine.query(query) | |
print(f"\n***Default prompt***\nQuery: '{query}'\nResponse:\n{response}\n") | |
# Customized QA prompt template | |
template = """ | |
You are an expert programmer that writes simple, concise well-documented code using best practices. | |
Consider the retrieved context generated by the question to produce a new function using latest python syntax withdocstring and test units. | |
Context: {context} | |
Question: {question}""" | |
lc_prompt_tmpl = LangchainPromptTemplate( | |
template=ChatPromptTemplate.from_template(template), | |
template_var_mappings={"query_str": "question", "context_str": "context"}, | |
) | |
query_engine.update_prompts({"response_synthesizer:text_qa_template": lc_prompt_tmpl}) | |
response = query_engine.query(query) | |
print(f"\n***Customized prompt***\nQuery: '{query}'\nSuggestion:\n{response}\n") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
***Default prompt*** | |
Query: 'give me the function that starts with letter 'mul'' | |
Response: | |
multiply_numbers | |
***Customized prompt*** | |
Query: 'give me the function that starts with letter 'mul'' | |
Suggestion: | |
```python | |
""" | |
Mathematical Operations Module | |
This module provides functions for basic mathematical operations. | |
Functions: | |
multiply_numbers(a, b): Returns the product of two numbers. | |
""" | |
def multiply_numbers(a, b): | |
""" | |
Returns the product of two numbers. | |
Args: | |
a (float): The first number. | |
b (float): The second number. | |
Returns: | |
float: The product of a and b. | |
Raises: | |
TypeError: If a or b is not a number. | |
""" | |
try: | |
return a * b | |
except TypeError: | |
raise TypeError("Both inputs must be numbers") | |
``` | |
Test units: | |
```python | |
import unittest | |
class TestMathOperations(unittest.TestCase): | |
def test_multiply_numbers(self): | |
self.assertEqual(multiply_numbers(2, 3), 6) | |
self.assertEqual(multiply_numbers(-1, 2), -2) | |
self.assertEqual(multiply_numbers(0, 10), 0) | |
def test_multiply_numbers_error(self): | |
with self.assertRaises(TypeError): | |
multiply_numbers('a', 3) | |
if __name__ == '__main__': | |
unittest.main() | |
``` | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chromadb | |
langchain-community | |
llama-index | |
llama-index-embeddings-ollama | |
llama-index-llms-langchain | |
llama_index-llms-ollama | |
tree-sitter-languages | |
tree-sitter<0.22 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# This file is autogenerated by pip-compile with Python 3.12 | |
# by the following command: | |
# | |
# pip-compile --output-file=llama-index/requirements.txt llama-index/requirements.in | |
# | |
aiohappyeyeballs==2.4.4 | |
# via aiohttp | |
aiohttp==3.11.11 | |
# via | |
# langchain | |
# langchain-community | |
# llama-index-core | |
aiosignal==1.3.2 | |
# via aiohttp | |
annotated-types==0.7.0 | |
# via pydantic | |
anyio==4.8.0 | |
# via | |
# httpx | |
# openai | |
# starlette | |
# watchfiles | |
asgiref==3.8.1 | |
# via opentelemetry-instrumentation-asgi | |
attrs==24.3.0 | |
# via aiohttp | |
backoff==2.2.1 | |
# via posthog | |
bcrypt==4.2.1 | |
# via chromadb | |
beautifulsoup4==4.12.3 | |
# via llama-index-readers-file | |
build==1.2.2.post1 | |
# via chromadb | |
cachetools==5.5.1 | |
# via google-auth | |
certifi==2024.12.14 | |
# via | |
# httpcore | |
# httpx | |
# kubernetes | |
# llama-cloud | |
# requests | |
charset-normalizer==3.4.1 | |
# via requests | |
chroma-hnswlib==0.7.6 | |
# via chromadb | |
chromadb==0.6.3 | |
# via -r llama-index/requirements.in | |
click==8.1.8 | |
# via | |
# llama-parse | |
# nltk | |
# typer | |
# uvicorn | |
coloredlogs==15.0.1 | |
# via onnxruntime | |
dataclasses-json==0.6.7 | |
# via | |
# langchain-community | |
# llama-index-core | |
deprecated==1.2.15 | |
# via | |
# llama-index-core | |
# opentelemetry-api | |
# opentelemetry-exporter-otlp-proto-grpc | |
# opentelemetry-semantic-conventions | |
dirtyjson==1.0.8 | |
# via llama-index-core | |
distro==1.9.0 | |
# via openai | |
durationpy==0.9 | |
# via kubernetes | |
fastapi==0.115.6 | |
# via chromadb | |
filelock==3.17.0 | |
# via huggingface-hub | |
filetype==1.2.0 | |
# via llama-index-core | |
flatbuffers==25.1.21 | |
# via onnxruntime | |
frozenlist==1.5.0 | |
# via | |
# aiohttp | |
# aiosignal | |
fsspec==2024.12.0 | |
# via | |
# huggingface-hub | |
# llama-index-core | |
google-auth==2.37.0 | |
# via kubernetes | |
googleapis-common-protos==1.66.0 | |
# via opentelemetry-exporter-otlp-proto-grpc | |
greenlet==3.1.1 | |
# via sqlalchemy | |
grpcio==1.69.0 | |
# via | |
# chromadb | |
# opentelemetry-exporter-otlp-proto-grpc | |
h11==0.14.0 | |
# via | |
# httpcore | |
# uvicorn | |
httpcore==1.0.7 | |
# via httpx | |
httptools==0.6.4 | |
# via uvicorn | |
httpx==0.28.1 | |
# via | |
# chromadb | |
# langsmith | |
# llama-cloud | |
# llama-index-core | |
# ollama | |
# openai | |
httpx-sse==0.4.0 | |
# via langchain-community | |
huggingface-hub==0.27.1 | |
# via tokenizers | |
humanfriendly==10.0 | |
# via coloredlogs | |
idna==3.10 | |
# via | |
# anyio | |
# httpx | |
# requests | |
# yarl | |
importlib-metadata==8.5.0 | |
# via opentelemetry-api | |
importlib-resources==6.5.2 | |
# via chromadb | |
jiter==0.8.2 | |
# via openai | |
joblib==1.4.2 | |
# via nltk | |
jsonpatch==1.33 | |
# via langchain-core | |
jsonpointer==3.0.0 | |
# via jsonpatch | |
kubernetes==31.0.0 | |
# via chromadb | |
langchain==0.3.15 | |
# via | |
# langchain-community | |
# llama-index-llms-langchain | |
langchain-community==0.3.15 | |
# via -r llama-index/requirements.in | |
langchain-core==0.3.31 | |
# via | |
# langchain | |
# langchain-community | |
# langchain-text-splitters | |
langchain-text-splitters==0.3.5 | |
# via langchain | |
langsmith==0.3.1 | |
# via | |
# langchain | |
# langchain-community | |
# langchain-core | |
llama-cloud==0.1.10 | |
# via llama-index-indices-managed-llama-cloud | |
llama-index==0.12.12 | |
# via -r llama-index/requirements.in | |
llama-index-agent-openai==0.4.2 | |
# via | |
# llama-index | |
# llama-index-program-openai | |
llama-index-cli==0.4.0 | |
# via llama-index | |
llama-index-core==0.12.12 | |
# via | |
# llama-index | |
# llama-index-agent-openai | |
# llama-index-cli | |
# llama-index-embeddings-ollama | |
# llama-index-embeddings-openai | |
# llama-index-indices-managed-llama-cloud | |
# llama-index-llms-langchain | |
# llama-index-llms-ollama | |
# llama-index-llms-openai | |
# llama-index-multi-modal-llms-openai | |
# llama-index-program-openai | |
# llama-index-question-gen-openai | |
# llama-index-readers-file | |
# llama-index-readers-llama-parse | |
# llama-parse | |
llama-index-embeddings-ollama==0.5.0 | |
# via -r llama-index/requirements.in | |
llama-index-embeddings-openai==0.3.1 | |
# via | |
# llama-index | |
# llama-index-cli | |
llama-index-indices-managed-llama-cloud==0.6.4 | |
# via llama-index | |
llama-index-llms-langchain==0.5.1 | |
# via -r llama-index/requirements.in | |
llama-index-llms-ollama==0.5.0 | |
# via -r llama-index/requirements.in | |
llama-index-llms-openai==0.3.14 | |
# via | |
# llama-index | |
# llama-index-agent-openai | |
# llama-index-cli | |
# llama-index-multi-modal-llms-openai | |
# llama-index-program-openai | |
# llama-index-question-gen-openai | |
llama-index-multi-modal-llms-openai==0.4.2 | |
# via llama-index | |
llama-index-program-openai==0.3.1 | |
# via | |
# llama-index | |
# llama-index-question-gen-openai | |
llama-index-question-gen-openai==0.3.0 | |
# via llama-index | |
llama-index-readers-file==0.4.3 | |
# via llama-index | |
llama-index-readers-llama-parse==0.4.0 | |
# via llama-index | |
llama-parse==0.5.19 | |
# via llama-index-readers-llama-parse | |
markdown-it-py==3.0.0 | |
# via rich | |
marshmallow==3.25.1 | |
# via dataclasses-json | |
mdurl==0.1.2 | |
# via markdown-it-py | |
mmh3==5.0.1 | |
# via chromadb | |
monotonic==1.6 | |
# via posthog | |
mpmath==1.3.0 | |
# via sympy | |
multidict==6.1.0 | |
# via | |
# aiohttp | |
# yarl | |
mypy-extensions==1.0.0 | |
# via typing-inspect | |
nest-asyncio==1.6.0 | |
# via llama-index-core | |
networkx==3.4.2 | |
# via llama-index-core | |
nltk==3.9.1 | |
# via | |
# llama-index | |
# llama-index-core | |
numpy==2.2.2 | |
# via | |
# chroma-hnswlib | |
# chromadb | |
# langchain | |
# langchain-community | |
# llama-index-core | |
# onnxruntime | |
# pandas | |
oauthlib==3.2.2 | |
# via | |
# kubernetes | |
# requests-oauthlib | |
ollama==0.4.7 | |
# via | |
# llama-index-embeddings-ollama | |
# llama-index-llms-ollama | |
onnxruntime==1.20.1 | |
# via chromadb | |
openai==1.60.0 | |
# via | |
# llama-index-agent-openai | |
# llama-index-embeddings-openai | |
# llama-index-llms-openai | |
opentelemetry-api==1.29.0 | |
# via | |
# chromadb | |
# opentelemetry-exporter-otlp-proto-grpc | |
# opentelemetry-instrumentation | |
# opentelemetry-instrumentation-asgi | |
# opentelemetry-instrumentation-fastapi | |
# opentelemetry-sdk | |
# opentelemetry-semantic-conventions | |
opentelemetry-exporter-otlp-proto-common==1.29.0 | |
# via opentelemetry-exporter-otlp-proto-grpc | |
opentelemetry-exporter-otlp-proto-grpc==1.29.0 | |
# via chromadb | |
opentelemetry-instrumentation==0.50b0 | |
# via | |
# opentelemetry-instrumentation-asgi | |
# opentelemetry-instrumentation-fastapi | |
opentelemetry-instrumentation-asgi==0.50b0 | |
# via opentelemetry-instrumentation-fastapi | |
opentelemetry-instrumentation-fastapi==0.50b0 | |
# via chromadb | |
opentelemetry-proto==1.29.0 | |
# via | |
# opentelemetry-exporter-otlp-proto-common | |
# opentelemetry-exporter-otlp-proto-grpc | |
opentelemetry-sdk==1.29.0 | |
# via | |
# chromadb | |
# opentelemetry-exporter-otlp-proto-grpc | |
opentelemetry-semantic-conventions==0.50b0 | |
# via | |
# opentelemetry-instrumentation | |
# opentelemetry-instrumentation-asgi | |
# opentelemetry-instrumentation-fastapi | |
# opentelemetry-sdk | |
opentelemetry-util-http==0.50b0 | |
# via | |
# opentelemetry-instrumentation-asgi | |
# opentelemetry-instrumentation-fastapi | |
orjson==3.10.15 | |
# via | |
# chromadb | |
# langsmith | |
overrides==7.7.0 | |
# via chromadb | |
packaging==24.2 | |
# via | |
# build | |
# huggingface-hub | |
# langchain-core | |
# marshmallow | |
# onnxruntime | |
# opentelemetry-instrumentation | |
pandas==2.2.3 | |
# via llama-index-readers-file | |
pillow==11.1.0 | |
# via llama-index-core | |
posthog==3.9.2 | |
# via chromadb | |
propcache==0.2.1 | |
# via | |
# aiohttp | |
# yarl | |
protobuf==5.29.3 | |
# via | |
# googleapis-common-protos | |
# onnxruntime | |
# opentelemetry-proto | |
pyasn1==0.6.1 | |
# via | |
# pyasn1-modules | |
# rsa | |
pyasn1-modules==0.4.1 | |
# via google-auth | |
pydantic==2.10.5 | |
# via | |
# chromadb | |
# fastapi | |
# langchain | |
# langchain-core | |
# langsmith | |
# llama-cloud | |
# llama-index-core | |
# llama-parse | |
# ollama | |
# openai | |
# pydantic-settings | |
pydantic-core==2.27.2 | |
# via pydantic | |
pydantic-settings==2.7.1 | |
# via langchain-community | |
pygments==2.19.1 | |
# via rich | |
pypdf==5.1.0 | |
# via llama-index-readers-file | |
pypika==0.48.9 | |
# via chromadb | |
pyproject-hooks==1.2.0 | |
# via build | |
python-dateutil==2.9.0.post0 | |
# via | |
# kubernetes | |
# pandas | |
# posthog | |
python-dotenv==1.0.1 | |
# via | |
# pydantic-settings | |
# uvicorn | |
pytz==2024.2 | |
# via pandas | |
pyyaml==6.0.2 | |
# via | |
# chromadb | |
# huggingface-hub | |
# kubernetes | |
# langchain | |
# langchain-community | |
# langchain-core | |
# llama-index-core | |
# uvicorn | |
regex==2024.11.6 | |
# via | |
# nltk | |
# tiktoken | |
requests==2.32.3 | |
# via | |
# huggingface-hub | |
# kubernetes | |
# langchain | |
# langchain-community | |
# langsmith | |
# llama-index-core | |
# posthog | |
# requests-oauthlib | |
# requests-toolbelt | |
# tiktoken | |
requests-oauthlib==2.0.0 | |
# via kubernetes | |
requests-toolbelt==1.0.0 | |
# via langsmith | |
rich==13.9.4 | |
# via | |
# chromadb | |
# typer | |
rsa==4.9 | |
# via google-auth | |
shellingham==1.5.4 | |
# via typer | |
six==1.17.0 | |
# via | |
# kubernetes | |
# posthog | |
# python-dateutil | |
sniffio==1.3.1 | |
# via | |
# anyio | |
# openai | |
soupsieve==2.6 | |
# via beautifulsoup4 | |
sqlalchemy[asyncio]==2.0.37 | |
# via | |
# langchain | |
# langchain-community | |
# llama-index-core | |
starlette==0.41.3 | |
# via fastapi | |
striprtf==0.0.26 | |
# via llama-index-readers-file | |
sympy==1.13.3 | |
# via onnxruntime | |
tenacity==9.0.0 | |
# via | |
# chromadb | |
# langchain | |
# langchain-community | |
# langchain-core | |
# llama-index-core | |
tiktoken==0.8.0 | |
# via llama-index-core | |
tokenizers==0.21.0 | |
# via chromadb | |
tqdm==4.67.1 | |
# via | |
# chromadb | |
# huggingface-hub | |
# llama-index-core | |
# nltk | |
# openai | |
tree-sitter==0.21.3 | |
# via | |
# -r llama-index/requirements.in | |
# tree-sitter-languages | |
tree-sitter-languages==1.10.2 | |
# via -r llama-index/requirements.in | |
typer==0.15.1 | |
# via chromadb | |
typing-extensions==4.12.2 | |
# via | |
# anyio | |
# chromadb | |
# fastapi | |
# huggingface-hub | |
# langchain-core | |
# llama-index-core | |
# openai | |
# opentelemetry-sdk | |
# pydantic | |
# pydantic-core | |
# sqlalchemy | |
# typer | |
# typing-inspect | |
typing-inspect==0.9.0 | |
# via | |
# dataclasses-json | |
# llama-index-core | |
tzdata==2025.1 | |
# via pandas | |
urllib3==2.3.0 | |
# via | |
# kubernetes | |
# requests | |
uvicorn[standard]==0.34.0 | |
# via chromadb | |
uvloop==0.21.0 | |
# via uvicorn | |
watchfiles==1.0.4 | |
# via uvicorn | |
websocket-client==1.8.0 | |
# via kubernetes | |
websockets==14.2 | |
# via uvicorn | |
wrapt==1.17.2 | |
# via | |
# deprecated | |
# llama-index-core | |
# opentelemetry-instrumentation | |
yarl==1.18.3 | |
# via aiohttp | |
zipp==3.21.0 | |
# via importlib-metadata | |
zstandard==0.23.0 | |
# via langsmith |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment