hursh-desai · January 13, 2023 10:17
diff --git a/agent.py b/agent.py
 import os
 import re
 import faiss
 from langchain import FAISS
 import obsidiantools.api as otools
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.chains.qa_with_sources import load_qa_with_sources_chain
 from langchain.llms import OpenAI

 os.environ["OPENAI_API_KEY"] = 'sk-********'
 dirpath = '/Users/hursh/<vault_name>'
 vault = otools.Vault(dirpath).connect().gather()

 embeddings = OpenAIEmbeddings() # type: ignore
 def markdown_to_dict(markdown_text):
  # Initialize empty dictionary
  markdown_dict = {}

  # Split markdown file into a list of lines
  lines = markdown_text.split('\n')

  # Initialize current header and text
  current_header = None
  current_text = ''

  # Iterate through lines
  for line in lines:
    # Check if line is a markdown header
    header_match = re.match(r'^#+\s', line)
    if header_match:
      # If current header is not None, add current header and text to dictionary
      if current_header is not None:
        markdown_dict[current_header] = current_text
      
      # Update current header and reset current text
      current_header = line
      current_text = ''
    else:
      # If line is not a header, append it to current text
      current_text += line + '\n'
  
  # Add final header and text to dictionary
  markdown_dict[current_header] = current_text

  # Remove markdown formatting from header and text
  if current_header is not None:
    markdown_dict = {re.sub(r'#', '', key).strip(): re.sub(r'<.*?>', '', value) for key, value in markdown_dict.items()}
  else:
    markdown_dict = {key: re.sub(r'<.*?>', '', value) for key, value in markdown_dict.items()}

  return markdown_dict
 df = vault.get_note_metadata()
 all_text = []
 all_metadata = []
 for index, row in df.loc[df['rel_filepath'].notna()].iterrows():
    note = vault.get_source_text(index)
    clean_note = markdown_to_dict(note)
    text = [value for value in clean_note.values()]
    metadata = [{'source' : index + '-' + str(key)} for key in clean_note.keys()]
    all_text.extend(text)
    all_metadata.extend(metadata)
 docsearch = FAISS.from_texts(all_text, embeddings, metadatas=all_metadata)
 chain = load_qa_with_sources_chain(OpenAI(temperature=0))

 def print_answer(question):
    print(
        chain(
            {
                "input_documents": docsearch.similarity_search(question, k=4),
                "question": question,
            },
            return_only_outputs=True,
        )["output_text"]
    )
 print_answer('What is the meaning of life?')
	import os
	import re
	import faiss
	from langchain import FAISS
	import obsidiantools.api as otools
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.chains.qa_with_sources import load_qa_with_sources_chain
	from langchain.llms import OpenAI

	os.environ["OPENAI_API_KEY"] = 'sk-********'
	dirpath = '/Users/hursh/<vault_name>'
	vault = otools.Vault(dirpath).connect().gather()

	embeddings = OpenAIEmbeddings() # type: ignore
	def markdown_to_dict(markdown_text):
	# Initialize empty dictionary
	markdown_dict = {}

	# Split markdown file into a list of lines
	lines = markdown_text.split('\n')

	# Initialize current header and text
	current_header = None
	current_text = ''

	# Iterate through lines
	for line in lines:
	# Check if line is a markdown header
	header_match = re.match(r'^#+\s', line)
	if header_match:
	# If current header is not None, add current header and text to dictionary
	if current_header is not None:
	markdown_dict[current_header] = current_text

	# Update current header and reset current text
	current_header = line
	current_text = ''
	else:
	# If line is not a header, append it to current text
	current_text += line + '\n'

	# Add final header and text to dictionary
	markdown_dict[current_header] = current_text

	# Remove markdown formatting from header and text
	if current_header is not None:
	markdown_dict = {re.sub(r'#', '', key).strip(): re.sub(r'<.*?>', '', value) for key, value in markdown_dict.items()}
	else:
	markdown_dict = {key: re.sub(r'<.*?>', '', value) for key, value in markdown_dict.items()}

	return markdown_dict
	df = vault.get_note_metadata()
	all_text = []
	all_metadata = []
	for index, row in df.loc[df['rel_filepath'].notna()].iterrows():
	note = vault.get_source_text(index)
	clean_note = markdown_to_dict(note)
	text = [value for value in clean_note.values()]
	metadata = [{'source' : index + '-' + str(key)} for key in clean_note.keys()]
	all_text.extend(text)
	all_metadata.extend(metadata)
	docsearch = FAISS.from_texts(all_text, embeddings, metadatas=all_metadata)
	chain = load_qa_with_sources_chain(OpenAI(temperature=0))

	def print_answer(question):
	print(
	chain(
	{
	"input_documents": docsearch.similarity_search(question, k=4),
	"question": question,
	},
	return_only_outputs=True,
	)["output_text"]
	)
	print_answer('What is the meaning of life?')