Created
July 13, 2025 21:44
-
-
Save jamiekt/c9f130bcdce4428ccde3747f328f7df7 to your computer and use it in GitHub Desktop.
Building an offline personal RAG AI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
from langchain_community.document_loaders import DirectoryLoader, TextLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_ollama import OllamaEmbeddings | |
from langchain_chroma import Chroma | |
OLLAMA_BASE_URL = "http://host.docker.internal:11434" | |
EMBEDDING_MODEL_NAME = "nomic-embed-text" | |
def load_and_process_notes(notes_dir="../notes"): | |
loader = DirectoryLoader( | |
notes_dir, | |
glob="**/*.md", | |
loader_cls=TextLoader, # Simple text loader for Markdown | |
loader_kwargs={"autodetect_encoding": True} # Helps with various encodings | |
) | |
documents = loader.load() | |
print(f"Loaded {len(documents)} documents.") | |
# Split documents into chunks | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, # Max characters per chunk | |
chunk_overlap=200 # Overlap to maintain context between chunks | |
) | |
chunks = text_splitter.split_documents(documents) | |
print(f"Split into {len(chunks)} chunks.") | |
return chunks | |
def create_embeddings_and_vectorstore(chunks, persist_directory="./chroma_db", batch_size=100, retry_delay=5): | |
# Create embeddings using a local Ollama embedding model | |
# Ensure 'nomic-embed-text' is pulled in Ollama (e.g., docker exec -it ollama ollama pull nomic-embed-text) | |
embeddings_model = OllamaEmbeddings(model=EMBEDDING_MODEL_NAME, base_url=OLLAMA_BASE_URL) | |
# Initialize ChromaDB | |
# If the directory exists, it will load the existing vector store | |
# We will add chunks incrementally | |
if os.path.exists(persist_directory) and os.listdir(persist_directory): | |
print(f"Loading existing vector store from {persist_directory}...") | |
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings_model) | |
else: | |
print(f"Creating new vector store at {persist_directory}...") | |
# Create an empty vectorstore or with a dummy document to initialize | |
vectorstore = Chroma.from_documents( | |
documents=[], # Start with an empty list | |
embedding=embeddings_model, | |
persist_directory=persist_directory | |
) | |
num_chunks = len(chunks) | |
print(f"Starting to embed and add {num_chunks} chunks in batches of {batch_size}...") | |
total_time_elapsed = 0.0 | |
for i in range(0, num_chunks, batch_size): | |
batch = chunks[i:i + batch_size] | |
print(f"Processing batch {int(i/batch_size) + 1}/{(num_chunks + batch_size - 1) // batch_size} ({len(batch)} chunks)...") | |
retries = 3 | |
for attempt in range(retries): | |
try: | |
start_time = time.time() | |
# Use add_documents to add the batch to the vector store | |
vectorstore.add_documents(batch) | |
end_time = time.time() | |
batch_time = end_time - start_time | |
total_time_elapsed += batch_time | |
print(f"Successfully added {len(batch)} chunks in {batch_time:.2f} seconds. Total processed: {i + len(batch)}") | |
print_time_elapsed(total_time_elapsed) | |
break # Break out of retry loop on success | |
except Exception as e: | |
print(f"Error processing batch {int(i/batch_size) + 1} on attempt {attempt + 1}: {e}") | |
print(f"Waiting {retry_delay} seconds before retrying...") | |
time.sleep(retry_delay) | |
if attempt == retries - 1: | |
print(f"Failed to process batch {int(i/batch_size) + 1} after {retries} attempts. Skipping this batch.") | |
# Optionally, log these failed chunks to a file for later review | |
# with open("failed_chunks.log", "a") as f: | |
# for chunk in batch: | |
# f.write(f"Failed: {chunk.page_content[:100]}...\n") | |
print(f"Embedding and vector store update complete. Total time: {total_time_elapsed:.2f} seconds.") | |
return vectorstore | |
def print_time_elapsed(total_time_elapsed_in_seconds: int): | |
# Print total time elapsed in a friendly format | |
if total_time_elapsed_in_seconds < 60: | |
print(f"Total time elapsed: {total_time_elapsed_in_seconds:.2f} seconds.") | |
elif total_time_elapsed_in_seconds < 3600: | |
minutes = total_time_elapsed_in_seconds // 60 | |
seconds = total_time_elapsed_in_seconds % 60 | |
print(f"Total time elapsed: {int(minutes)} min {int(seconds)} sec.") | |
else: | |
hours = total_time_elapsed_in_seconds // 3600 | |
minutes = (total_time_elapsed_in_seconds % 3600) // 60 | |
seconds = total_time_elapsed_in_seconds % 60 | |
print(f"Total time elapsed: {int(hours)} hr {int(minutes)} min {int(seconds)} sec.") | |
if __name__ == "__main__": | |
# This assumes your 'notes' directory is a sibling of the directory contianing this file | |
# Adjust the path as necessary based on your project structure | |
notes_repo_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'notes')) | |
print(f"Starting data processing for notes in: {notes_repo_path}") | |
# Ensure your Ollama container is running and 'nomic-embed-text' model is pulled | |
note_chunks = load_and_process_notes(notes_dir=notes_repo_path) | |
vector_db = create_embeddings_and_vectorstore(note_chunks, persist_directory="./chroma_db", batch_size=100) | |
print("Data processing complete. Your notes are ready to be queried!") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from langchain_ollama import OllamaLLM | |
from langchain_ollama import OllamaEmbeddings | |
from langchain_chroma import Chroma | |
from langchain.chains import RetrievalQA | |
from langchain.prompts import PromptTemplate | |
# Configuration constants | |
PERSIST_DIRECTORY = "./chroma_db" | |
OLLAMA_BASE_URL = "http://host.docker.internal:11434" # Default Ollama API address | |
LLM_MODEL_NAME = "llama3" # The LLM you pulled with Ollama | |
def setup_rag_chain(): | |
# Load the embedding model (must match the one used for indexing) | |
embeddings = OllamaEmbeddings(model="nomic-embed-text", base_url=OLLAMA_BASE_URL) | |
# Load the persistent vector store | |
if not os.path.exists(PERSIST_DIRECTORY): | |
print(f"Error: Vector store not found at {PERSIST_DIRECTORY}.") | |
print("Please run data_processing.py first to create the vector store.") | |
return None | |
vectorstore = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings) | |
print(f"Loaded vector store from {PERSIST_DIRECTORY}.") | |
# Set up the retriever to fetch relevant documents | |
# k=3 means it will retrieve the top 3 most relevant chunks | |
retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
# Initialize the local LLM (ensure it's pulled in Ollama) | |
llm = OllamaLLM(model=LLM_MODEL_NAME, base_url=OLLAMA_BASE_URL) | |
print(f"Initialized LLM: {LLM_MODEL_NAME}") | |
# Define a custom prompt template for RAG | |
# This guides the LLM on how to use the retrieved context | |
prompt_template = """Use the following pieces of context from my notes to answer the user's question. | |
If you don't know the answer based *only* on the provided context, just say that you don't know, don't try to make up an answer. | |
Cite the source filenames of the notes if possible. | |
Context: | |
{context} | |
Question: {question} | |
Helpful Answer:""" | |
QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt_template) | |
# Create the RAG chain | |
qa_chain = RetrievalQA.from_chain_type( | |
llm, | |
retriever=retriever, | |
return_source_documents=True, # So we can show the original notes used | |
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT} | |
) | |
return qa_chain | |
if __name__ == "__main__": | |
qa_chain = setup_rag_chain() | |
if qa_chain: | |
print("RAG system ready. Ask a question about your notes (type 'exit' to quit):") | |
while True: | |
query = input("\nYour question: ") | |
if query.lower() == 'exit': | |
break | |
try: | |
result = qa_chain.invoke({"query": query}) | |
print("\nAnswer:", result["result"]) | |
# Print sources if available | |
if result.get("source_documents"): | |
print("\nSources (from your notes):") | |
for doc in result["source_documents"]: | |
# LangChain adds metadata like 'source' (filename) | |
source_path = doc.metadata.get('source', 'Unknown Source') | |
# Extract just the filename for cleaner output | |
filename = os.path.basename(source_path) | |
print(f"- {filename}") | |
else: | |
print("\nNo specific sources found in your notes for this answer.") | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
print("Make sure your Ollama container is running and the specified LLM/embedding models are pulled.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment