Skip to content

Instantly share code, notes, and snippets.

@electrocucaracha
Last active January 22, 2025 21:21
Show Gist options
  • Save electrocucaracha/addd14dddae246c29356a470272b19d4 to your computer and use it in GitHub Desktop.
Save electrocucaracha/addd14dddae246c29356a470272b19d4 to your computer and use it in GitHub Desktop.
Llama Index Local RAG learning
def add_numbers(a, b):
return a + b
def subtract_numbers(a, b):
return a - b
def multiply_numbers(a, b):
return a * b
def divide_numbers(a, b):
if b != 0:
return a / b
return "Division by zero error"
import os
import chromadb
from langchain.prompts import ChatPromptTemplate
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.node_parser import CodeSplitter
from llama_index.core.prompts import LangchainPromptTemplate
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
# Global settings
Settings.embed_model = OllamaEmbedding(model_name="mxbai-embed-large")
Settings.llm = Ollama(model="llama3.2")
repo_path = "src/"
query = "give me the function that starts with letter 'mul'"
# Get code chunks and store them in a ChromaDB instance
loader = SimpleDirectoryReader(input_files=["calculator.py"])
documents = loader.load_data()
splitter = CodeSplitter(language="python")
nodes = splitter.get_nodes_from_documents(documents)
index = VectorStoreIndex(nodes)
query_engine = index.as_query_engine()
# Default QA prompt template
response = query_engine.query(query)
print(f"\n***Default prompt***\nQuery: '{query}'\nResponse:\n{response}\n")
# Customized QA prompt template
template = """
You are an expert programmer that writes simple, concise well-documented code using best practices.
Consider the retrieved context generated by the question to produce a new function using latest python syntax withdocstring and test units.
Context: {context}
Question: {question}"""
lc_prompt_tmpl = LangchainPromptTemplate(
template=ChatPromptTemplate.from_template(template),
template_var_mappings={"query_str": "question", "context_str": "context"},
)
query_engine.update_prompts({"response_synthesizer:text_qa_template": lc_prompt_tmpl})
response = query_engine.query(query)
print(f"\n***Customized prompt***\nQuery: '{query}'\nSuggestion:\n{response}\n")
***Default prompt***
Query: 'give me the function that starts with letter 'mul''
Response:
multiply_numbers
***Customized prompt***
Query: 'give me the function that starts with letter 'mul''
Suggestion:
```python
"""
Mathematical Operations Module
This module provides functions for basic mathematical operations.
Functions:
multiply_numbers(a, b): Returns the product of two numbers.
"""
def multiply_numbers(a, b):
"""
Returns the product of two numbers.
Args:
a (float): The first number.
b (float): The second number.
Returns:
float: The product of a and b.
Raises:
TypeError: If a or b is not a number.
"""
try:
return a * b
except TypeError:
raise TypeError("Both inputs must be numbers")
```
Test units:
```python
import unittest
class TestMathOperations(unittest.TestCase):
def test_multiply_numbers(self):
self.assertEqual(multiply_numbers(2, 3), 6)
self.assertEqual(multiply_numbers(-1, 2), -2)
self.assertEqual(multiply_numbers(0, 10), 0)
def test_multiply_numbers_error(self):
with self.assertRaises(TypeError):
multiply_numbers('a', 3)
if __name__ == '__main__':
unittest.main()
```
chromadb
langchain-community
llama-index
llama-index-embeddings-ollama
llama-index-llms-langchain
llama_index-llms-ollama
tree-sitter-languages
tree-sitter<0.22
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile --output-file=llama-index/requirements.txt llama-index/requirements.in
#
aiohappyeyeballs==2.4.4
# via aiohttp
aiohttp==3.11.11
# via
# langchain
# langchain-community
# llama-index-core
aiosignal==1.3.2
# via aiohttp
annotated-types==0.7.0
# via pydantic
anyio==4.8.0
# via
# httpx
# openai
# starlette
# watchfiles
asgiref==3.8.1
# via opentelemetry-instrumentation-asgi
attrs==24.3.0
# via aiohttp
backoff==2.2.1
# via posthog
bcrypt==4.2.1
# via chromadb
beautifulsoup4==4.12.3
# via llama-index-readers-file
build==1.2.2.post1
# via chromadb
cachetools==5.5.1
# via google-auth
certifi==2024.12.14
# via
# httpcore
# httpx
# kubernetes
# llama-cloud
# requests
charset-normalizer==3.4.1
# via requests
chroma-hnswlib==0.7.6
# via chromadb
chromadb==0.6.3
# via -r llama-index/requirements.in
click==8.1.8
# via
# llama-parse
# nltk
# typer
# uvicorn
coloredlogs==15.0.1
# via onnxruntime
dataclasses-json==0.6.7
# via
# langchain-community
# llama-index-core
deprecated==1.2.15
# via
# llama-index-core
# opentelemetry-api
# opentelemetry-exporter-otlp-proto-grpc
# opentelemetry-semantic-conventions
dirtyjson==1.0.8
# via llama-index-core
distro==1.9.0
# via openai
durationpy==0.9
# via kubernetes
fastapi==0.115.6
# via chromadb
filelock==3.17.0
# via huggingface-hub
filetype==1.2.0
# via llama-index-core
flatbuffers==25.1.21
# via onnxruntime
frozenlist==1.5.0
# via
# aiohttp
# aiosignal
fsspec==2024.12.0
# via
# huggingface-hub
# llama-index-core
google-auth==2.37.0
# via kubernetes
googleapis-common-protos==1.66.0
# via opentelemetry-exporter-otlp-proto-grpc
greenlet==3.1.1
# via sqlalchemy
grpcio==1.69.0
# via
# chromadb
# opentelemetry-exporter-otlp-proto-grpc
h11==0.14.0
# via
# httpcore
# uvicorn
httpcore==1.0.7
# via httpx
httptools==0.6.4
# via uvicorn
httpx==0.28.1
# via
# chromadb
# langsmith
# llama-cloud
# llama-index-core
# ollama
# openai
httpx-sse==0.4.0
# via langchain-community
huggingface-hub==0.27.1
# via tokenizers
humanfriendly==10.0
# via coloredlogs
idna==3.10
# via
# anyio
# httpx
# requests
# yarl
importlib-metadata==8.5.0
# via opentelemetry-api
importlib-resources==6.5.2
# via chromadb
jiter==0.8.2
# via openai
joblib==1.4.2
# via nltk
jsonpatch==1.33
# via langchain-core
jsonpointer==3.0.0
# via jsonpatch
kubernetes==31.0.0
# via chromadb
langchain==0.3.15
# via
# langchain-community
# llama-index-llms-langchain
langchain-community==0.3.15
# via -r llama-index/requirements.in
langchain-core==0.3.31
# via
# langchain
# langchain-community
# langchain-text-splitters
langchain-text-splitters==0.3.5
# via langchain
langsmith==0.3.1
# via
# langchain
# langchain-community
# langchain-core
llama-cloud==0.1.10
# via llama-index-indices-managed-llama-cloud
llama-index==0.12.12
# via -r llama-index/requirements.in
llama-index-agent-openai==0.4.2
# via
# llama-index
# llama-index-program-openai
llama-index-cli==0.4.0
# via llama-index
llama-index-core==0.12.12
# via
# llama-index
# llama-index-agent-openai
# llama-index-cli
# llama-index-embeddings-ollama
# llama-index-embeddings-openai
# llama-index-indices-managed-llama-cloud
# llama-index-llms-langchain
# llama-index-llms-ollama
# llama-index-llms-openai
# llama-index-multi-modal-llms-openai
# llama-index-program-openai
# llama-index-question-gen-openai
# llama-index-readers-file
# llama-index-readers-llama-parse
# llama-parse
llama-index-embeddings-ollama==0.5.0
# via -r llama-index/requirements.in
llama-index-embeddings-openai==0.3.1
# via
# llama-index
# llama-index-cli
llama-index-indices-managed-llama-cloud==0.6.4
# via llama-index
llama-index-llms-langchain==0.5.1
# via -r llama-index/requirements.in
llama-index-llms-ollama==0.5.0
# via -r llama-index/requirements.in
llama-index-llms-openai==0.3.14
# via
# llama-index
# llama-index-agent-openai
# llama-index-cli
# llama-index-multi-modal-llms-openai
# llama-index-program-openai
# llama-index-question-gen-openai
llama-index-multi-modal-llms-openai==0.4.2
# via llama-index
llama-index-program-openai==0.3.1
# via
# llama-index
# llama-index-question-gen-openai
llama-index-question-gen-openai==0.3.0
# via llama-index
llama-index-readers-file==0.4.3
# via llama-index
llama-index-readers-llama-parse==0.4.0
# via llama-index
llama-parse==0.5.19
# via llama-index-readers-llama-parse
markdown-it-py==3.0.0
# via rich
marshmallow==3.25.1
# via dataclasses-json
mdurl==0.1.2
# via markdown-it-py
mmh3==5.0.1
# via chromadb
monotonic==1.6
# via posthog
mpmath==1.3.0
# via sympy
multidict==6.1.0
# via
# aiohttp
# yarl
mypy-extensions==1.0.0
# via typing-inspect
nest-asyncio==1.6.0
# via llama-index-core
networkx==3.4.2
# via llama-index-core
nltk==3.9.1
# via
# llama-index
# llama-index-core
numpy==2.2.2
# via
# chroma-hnswlib
# chromadb
# langchain
# langchain-community
# llama-index-core
# onnxruntime
# pandas
oauthlib==3.2.2
# via
# kubernetes
# requests-oauthlib
ollama==0.4.7
# via
# llama-index-embeddings-ollama
# llama-index-llms-ollama
onnxruntime==1.20.1
# via chromadb
openai==1.60.0
# via
# llama-index-agent-openai
# llama-index-embeddings-openai
# llama-index-llms-openai
opentelemetry-api==1.29.0
# via
# chromadb
# opentelemetry-exporter-otlp-proto-grpc
# opentelemetry-instrumentation
# opentelemetry-instrumentation-asgi
# opentelemetry-instrumentation-fastapi
# opentelemetry-sdk
# opentelemetry-semantic-conventions
opentelemetry-exporter-otlp-proto-common==1.29.0
# via opentelemetry-exporter-otlp-proto-grpc
opentelemetry-exporter-otlp-proto-grpc==1.29.0
# via chromadb
opentelemetry-instrumentation==0.50b0
# via
# opentelemetry-instrumentation-asgi
# opentelemetry-instrumentation-fastapi
opentelemetry-instrumentation-asgi==0.50b0
# via opentelemetry-instrumentation-fastapi
opentelemetry-instrumentation-fastapi==0.50b0
# via chromadb
opentelemetry-proto==1.29.0
# via
# opentelemetry-exporter-otlp-proto-common
# opentelemetry-exporter-otlp-proto-grpc
opentelemetry-sdk==1.29.0
# via
# chromadb
# opentelemetry-exporter-otlp-proto-grpc
opentelemetry-semantic-conventions==0.50b0
# via
# opentelemetry-instrumentation
# opentelemetry-instrumentation-asgi
# opentelemetry-instrumentation-fastapi
# opentelemetry-sdk
opentelemetry-util-http==0.50b0
# via
# opentelemetry-instrumentation-asgi
# opentelemetry-instrumentation-fastapi
orjson==3.10.15
# via
# chromadb
# langsmith
overrides==7.7.0
# via chromadb
packaging==24.2
# via
# build
# huggingface-hub
# langchain-core
# marshmallow
# onnxruntime
# opentelemetry-instrumentation
pandas==2.2.3
# via llama-index-readers-file
pillow==11.1.0
# via llama-index-core
posthog==3.9.2
# via chromadb
propcache==0.2.1
# via
# aiohttp
# yarl
protobuf==5.29.3
# via
# googleapis-common-protos
# onnxruntime
# opentelemetry-proto
pyasn1==0.6.1
# via
# pyasn1-modules
# rsa
pyasn1-modules==0.4.1
# via google-auth
pydantic==2.10.5
# via
# chromadb
# fastapi
# langchain
# langchain-core
# langsmith
# llama-cloud
# llama-index-core
# llama-parse
# ollama
# openai
# pydantic-settings
pydantic-core==2.27.2
# via pydantic
pydantic-settings==2.7.1
# via langchain-community
pygments==2.19.1
# via rich
pypdf==5.1.0
# via llama-index-readers-file
pypika==0.48.9
# via chromadb
pyproject-hooks==1.2.0
# via build
python-dateutil==2.9.0.post0
# via
# kubernetes
# pandas
# posthog
python-dotenv==1.0.1
# via
# pydantic-settings
# uvicorn
pytz==2024.2
# via pandas
pyyaml==6.0.2
# via
# chromadb
# huggingface-hub
# kubernetes
# langchain
# langchain-community
# langchain-core
# llama-index-core
# uvicorn
regex==2024.11.6
# via
# nltk
# tiktoken
requests==2.32.3
# via
# huggingface-hub
# kubernetes
# langchain
# langchain-community
# langsmith
# llama-index-core
# posthog
# requests-oauthlib
# requests-toolbelt
# tiktoken
requests-oauthlib==2.0.0
# via kubernetes
requests-toolbelt==1.0.0
# via langsmith
rich==13.9.4
# via
# chromadb
# typer
rsa==4.9
# via google-auth
shellingham==1.5.4
# via typer
six==1.17.0
# via
# kubernetes
# posthog
# python-dateutil
sniffio==1.3.1
# via
# anyio
# openai
soupsieve==2.6
# via beautifulsoup4
sqlalchemy[asyncio]==2.0.37
# via
# langchain
# langchain-community
# llama-index-core
starlette==0.41.3
# via fastapi
striprtf==0.0.26
# via llama-index-readers-file
sympy==1.13.3
# via onnxruntime
tenacity==9.0.0
# via
# chromadb
# langchain
# langchain-community
# langchain-core
# llama-index-core
tiktoken==0.8.0
# via llama-index-core
tokenizers==0.21.0
# via chromadb
tqdm==4.67.1
# via
# chromadb
# huggingface-hub
# llama-index-core
# nltk
# openai
tree-sitter==0.21.3
# via
# -r llama-index/requirements.in
# tree-sitter-languages
tree-sitter-languages==1.10.2
# via -r llama-index/requirements.in
typer==0.15.1
# via chromadb
typing-extensions==4.12.2
# via
# anyio
# chromadb
# fastapi
# huggingface-hub
# langchain-core
# llama-index-core
# openai
# opentelemetry-sdk
# pydantic
# pydantic-core
# sqlalchemy
# typer
# typing-inspect
typing-inspect==0.9.0
# via
# dataclasses-json
# llama-index-core
tzdata==2025.1
# via pandas
urllib3==2.3.0
# via
# kubernetes
# requests
uvicorn[standard]==0.34.0
# via chromadb
uvloop==0.21.0
# via uvicorn
watchfiles==1.0.4
# via uvicorn
websocket-client==1.8.0
# via kubernetes
websockets==14.2
# via uvicorn
wrapt==1.17.2
# via
# deprecated
# llama-index-core
# opentelemetry-instrumentation
yarl==1.18.3
# via aiohttp
zipp==3.21.0
# via importlib-metadata
zstandard==0.23.0
# via langsmith
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment