Last active
February 10, 2025 10:47
-
-
Save khaerulumam42/241712c67534ed632ff355c8e4ed5b7c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from typing import List | |
from dotenv import load_dotenv | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_community.vectorstores.pgvector import DistanceStrategy | |
from langchain_community.vectorstores import PGVector | |
from langchain.docstore.document import Document | |
load_dotenv(override=True) | |
# Configure OpenAI API key | |
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') | |
if not OPENAI_API_KEY: | |
raise ValueError('Please set OPENAI_API_KEY environment variable') | |
CONNECTION_STRING = PGVector.connection_string_from_db_params( | |
driver='psycopg2', | |
host=os.environ.get('PGVECTOR_HOST'), | |
port=int(os.environ.get('PGVECTOR_PORT')), | |
database=os.environ.get('PGVECTOR_DATABASE'), | |
user=os.environ.get('PGVECTOR_USER'), | |
password=os.environ.get('PGVECTOR_PASSWORD'), | |
) | |
# Configure embeddings model | |
embeddings = OpenAIEmbeddings( | |
openai_api_key=OPENAI_API_KEY, | |
model='text-embedding-3-small' | |
) | |
def create_pgvector_store( | |
docs: List[Document], | |
collection_name: str = 'documents', | |
distance_strategy: DistanceStrategy = DistanceStrategy.COSINE | |
) -> PGVector: | |
""" | |
Create a PGVector store with the given documents. | |
Args: | |
docs: List of Document objects to store | |
collection_name: Name for the collection in the database | |
distance_strategy: Distance strategy for similarity search | |
Returns: | |
PGVector store instance | |
""" | |
try: | |
# Create new PGVector instance | |
vector_store = PGVector.from_documents( | |
documents=docs, | |
embedding=embeddings, | |
collection_name=collection_name, | |
connection_string=CONNECTION_STRING, | |
distance_strategy=distance_strategy, | |
) | |
print(f'Successfully created PGVector store with {len(docs)} documents') | |
return vector_store | |
except Exception as e: | |
print(f'Error creating PGVector store: {str(e)}') | |
raise | |
def create_pgvector_store( | |
docs: List[Document], | |
collection_name: str = 'sample_kredivo_faq', | |
distance_strategy: DistanceStrategy = DistanceStrategy.COSINE | |
) -> PGVector: | |
""" | |
Create a PGVector store with the given documents. | |
Args: | |
docs: List of Document objects to store | |
collection_name: Name for the collection in the database | |
distance_strategy: Distance strategy for similarity search | |
Returns: | |
PGVector store instance | |
""" | |
try: | |
# Create new PGVector instance | |
vector_store = PGVector.from_documents( | |
documents=docs, | |
embedding=embeddings, | |
collection_name=collection_name, | |
connection_string=CONNECTION_STRING, | |
distance_strategy=distance_strategy, | |
) | |
print(f'Successfully created PGVector store with {len(docs)} documents') | |
return vector_store | |
except Exception as e: | |
print(f'Error creating PGVector store: {str(e)}') | |
raise | |
def search_similar_documents( | |
vector_store: PGVector, | |
query: str, | |
k: int = 4 | |
) -> List[Document]: | |
""" | |
Search for similar documents in the vector store. | |
Args: | |
vector_store: PGVector store instance | |
query: Search query string | |
k: Number of results to return | |
Returns: | |
List of similar documents | |
""" | |
try: | |
results = vector_store.similarity_search(query, k=k) | |
return results | |
except Exception as e: | |
print(f'Error searching documents: {str(e)}') | |
raise | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment