Skip to content

Instantly share code, notes, and snippets.

@jamiekt
Created July 13, 2025 21:44
Show Gist options
  • Save jamiekt/c9f130bcdce4428ccde3747f328f7df7 to your computer and use it in GitHub Desktop.
Save jamiekt/c9f130bcdce4428ccde3747f328f7df7 to your computer and use it in GitHub Desktop.
Building an offline personal RAG AI
import os
import time
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
OLLAMA_BASE_URL = "http://host.docker.internal:11434"
EMBEDDING_MODEL_NAME = "nomic-embed-text"
def load_and_process_notes(notes_dir="../notes"):
loader = DirectoryLoader(
notes_dir,
glob="**/*.md",
loader_cls=TextLoader, # Simple text loader for Markdown
loader_kwargs={"autodetect_encoding": True} # Helps with various encodings
)
documents = loader.load()
print(f"Loaded {len(documents)} documents.")
# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, # Max characters per chunk
chunk_overlap=200 # Overlap to maintain context between chunks
)
chunks = text_splitter.split_documents(documents)
print(f"Split into {len(chunks)} chunks.")
return chunks
def create_embeddings_and_vectorstore(chunks, persist_directory="./chroma_db", batch_size=100, retry_delay=5):
# Create embeddings using a local Ollama embedding model
# Ensure 'nomic-embed-text' is pulled in Ollama (e.g., docker exec -it ollama ollama pull nomic-embed-text)
embeddings_model = OllamaEmbeddings(model=EMBEDDING_MODEL_NAME, base_url=OLLAMA_BASE_URL)
# Initialize ChromaDB
# If the directory exists, it will load the existing vector store
# We will add chunks incrementally
if os.path.exists(persist_directory) and os.listdir(persist_directory):
print(f"Loading existing vector store from {persist_directory}...")
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings_model)
else:
print(f"Creating new vector store at {persist_directory}...")
# Create an empty vectorstore or with a dummy document to initialize
vectorstore = Chroma.from_documents(
documents=[], # Start with an empty list
embedding=embeddings_model,
persist_directory=persist_directory
)
num_chunks = len(chunks)
print(f"Starting to embed and add {num_chunks} chunks in batches of {batch_size}...")
total_time_elapsed = 0.0
for i in range(0, num_chunks, batch_size):
batch = chunks[i:i + batch_size]
print(f"Processing batch {int(i/batch_size) + 1}/{(num_chunks + batch_size - 1) // batch_size} ({len(batch)} chunks)...")
retries = 3
for attempt in range(retries):
try:
start_time = time.time()
# Use add_documents to add the batch to the vector store
vectorstore.add_documents(batch)
end_time = time.time()
batch_time = end_time - start_time
total_time_elapsed += batch_time
print(f"Successfully added {len(batch)} chunks in {batch_time:.2f} seconds. Total processed: {i + len(batch)}")
print_time_elapsed(total_time_elapsed)
break # Break out of retry loop on success
except Exception as e:
print(f"Error processing batch {int(i/batch_size) + 1} on attempt {attempt + 1}: {e}")
print(f"Waiting {retry_delay} seconds before retrying...")
time.sleep(retry_delay)
if attempt == retries - 1:
print(f"Failed to process batch {int(i/batch_size) + 1} after {retries} attempts. Skipping this batch.")
# Optionally, log these failed chunks to a file for later review
# with open("failed_chunks.log", "a") as f:
# for chunk in batch:
# f.write(f"Failed: {chunk.page_content[:100]}...\n")
print(f"Embedding and vector store update complete. Total time: {total_time_elapsed:.2f} seconds.")
return vectorstore
def print_time_elapsed(total_time_elapsed_in_seconds: int):
# Print total time elapsed in a friendly format
if total_time_elapsed_in_seconds < 60:
print(f"Total time elapsed: {total_time_elapsed_in_seconds:.2f} seconds.")
elif total_time_elapsed_in_seconds < 3600:
minutes = total_time_elapsed_in_seconds // 60
seconds = total_time_elapsed_in_seconds % 60
print(f"Total time elapsed: {int(minutes)} min {int(seconds)} sec.")
else:
hours = total_time_elapsed_in_seconds // 3600
minutes = (total_time_elapsed_in_seconds % 3600) // 60
seconds = total_time_elapsed_in_seconds % 60
print(f"Total time elapsed: {int(hours)} hr {int(minutes)} min {int(seconds)} sec.")
if __name__ == "__main__":
# This assumes your 'notes' directory is a sibling of the directory contianing this file
# Adjust the path as necessary based on your project structure
notes_repo_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'notes'))
print(f"Starting data processing for notes in: {notes_repo_path}")
# Ensure your Ollama container is running and 'nomic-embed-text' model is pulled
note_chunks = load_and_process_notes(notes_dir=notes_repo_path)
vector_db = create_embeddings_and_vectorstore(note_chunks, persist_directory="./chroma_db", batch_size=100)
print("Data processing complete. Your notes are ready to be queried!")
import os
from langchain_ollama import OllamaLLM
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
# Configuration constants
PERSIST_DIRECTORY = "./chroma_db"
OLLAMA_BASE_URL = "http://host.docker.internal:11434" # Default Ollama API address
LLM_MODEL_NAME = "llama3" # The LLM you pulled with Ollama
def setup_rag_chain():
# Load the embedding model (must match the one used for indexing)
embeddings = OllamaEmbeddings(model="nomic-embed-text", base_url=OLLAMA_BASE_URL)
# Load the persistent vector store
if not os.path.exists(PERSIST_DIRECTORY):
print(f"Error: Vector store not found at {PERSIST_DIRECTORY}.")
print("Please run data_processing.py first to create the vector store.")
return None
vectorstore = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings)
print(f"Loaded vector store from {PERSIST_DIRECTORY}.")
# Set up the retriever to fetch relevant documents
# k=3 means it will retrieve the top 3 most relevant chunks
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# Initialize the local LLM (ensure it's pulled in Ollama)
llm = OllamaLLM(model=LLM_MODEL_NAME, base_url=OLLAMA_BASE_URL)
print(f"Initialized LLM: {LLM_MODEL_NAME}")
# Define a custom prompt template for RAG
# This guides the LLM on how to use the retrieved context
prompt_template = """Use the following pieces of context from my notes to answer the user's question.
If you don't know the answer based *only* on the provided context, just say that you don't know, don't try to make up an answer.
Cite the source filenames of the notes if possible.
Context:
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt_template)
# Create the RAG chain
qa_chain = RetrievalQA.from_chain_type(
llm,
retriever=retriever,
return_source_documents=True, # So we can show the original notes used
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)
return qa_chain
if __name__ == "__main__":
qa_chain = setup_rag_chain()
if qa_chain:
print("RAG system ready. Ask a question about your notes (type 'exit' to quit):")
while True:
query = input("\nYour question: ")
if query.lower() == 'exit':
break
try:
result = qa_chain.invoke({"query": query})
print("\nAnswer:", result["result"])
# Print sources if available
if result.get("source_documents"):
print("\nSources (from your notes):")
for doc in result["source_documents"]:
# LangChain adds metadata like 'source' (filename)
source_path = doc.metadata.get('source', 'Unknown Source')
# Extract just the filename for cleaner output
filename = os.path.basename(source_path)
print(f"- {filename}")
else:
print("\nNo specific sources found in your notes for this answer.")
except Exception as e:
print(f"An error occurred: {e}")
print("Make sure your Ollama container is running and the specified LLM/embedding models are pulled.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment