Created
December 13, 2023 17:43
-
-
Save JitendraZaa/f04ed4c5cfca6c97b97cf57f38eeeab1 to your computer and use it in GitHub Desktop.
Python code to connect to Salesforce and use Open AI for embedding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import openai | |
import numpy as np | |
import pandas as pd | |
import requests | |
from ast import literal_eval | |
def lambda_handler(event, context): | |
question = event.get('question', 'Default Question') | |
# Set OpenAI and Salesforce credentials | |
openai.api_key = os.getenv('OPENAI_API_KEY') | |
sf_username = os.getenv('SF_USERNAME') | |
sf_password = os.getenv('SF_PASSWORD') | |
sf_security_token = os.getenv('SF_SECURITY_TOKEN') | |
sf_instance = os.getenv('SF_INSTANCE') | |
sf_instance_afterlogin = os.getenv('SF_INSTANCE_AFTERLOGIN') | |
sf_client_id = os.getenv('SF_CLIENT_ID') | |
sf_client_secret = os.getenv('SF_CLIENT_SECRET') | |
# Salesforce authentication and data extraction | |
auth_url = f'https://{sf_instance}/services/oauth2/token' | |
query_url = f'https://{sf_instance_afterlogin}/services/data/v57.0/query?q=SELECT+Id,Name,Company,Title,LeadSource,Email,Status+FROM+Lead' | |
auth_payload = { | |
'grant_type': 'password', | |
'client_id': sf_client_id, | |
'client_secret': sf_client_secret, | |
'username': sf_username, | |
'password': sf_password + sf_security_token | |
} | |
auth_response = requests.post(auth_url, data=auth_payload) | |
access_token = auth_response.json()['access_token'] | |
headers = {'Authorization': f'Bearer {access_token}'} | |
query_response = requests.get(query_url, headers=headers) | |
leads = query_response.json().get('records', []) | |
# OpenAI Embedding | |
content = "\n".join([f"{lead['Name']} {lead['Email']}" for lead in leads]) | |
content_chunks = [content[i:i + 8000] for i in range(0, len(content), 8000)] | |
content_embeddings = [] | |
for chunk in content_chunks: | |
content_embeddings += get_embeddings_for_text(chunk) | |
question_embedding = get_embeddings_for_text(question) | |
similarities = [cosine_similarity(question_embedding, record_embedding) for record_embedding in content_embeddings] | |
results = list(zip(leads, similarities)) | |
results.sort(key=lambda x: x[1][0], reverse=True) | |
top_results = results[:100] | |
response_data = [{'Lead': result[0]['Name'], 'Similarity': round(result[1][0], 4)} for result in top_results] | |
return { | |
'statusCode': 200, | |
'body': {'question': question, 'results': response_data} | |
} | |
def cosine_similarity(A, B): | |
return np.dot(A, B) / (norm(A) * norm(B)) | |
def get_embeddings_for_text(input_term): | |
input_vector = openai.Embedding.create(input=input_term, model="text-embedding-ada-002") | |
return input_vector['data'][0]['embedding'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment