Skip to content

Instantly share code, notes, and snippets.

@dexhunter
Created April 14, 2025 13:19
Show Gist options
  • Save dexhunter/1f4ae637c71b5146b061b353d4780702 to your computer and use it in GitHub Desktop.
Save dexhunter/1f4ae637c71b5146b061b353d4780702 to your computer and use it in GitHub Desktop.
import os
from openai import OpenAI, APIError, APITimeoutError
import copy
import json
# Ensure the OPENAI_API_KEY environment variable is set
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
print("Error: OPENAI_API_KEY environment variable not set.")
exit(1)
client = OpenAI(api_key=api_key)
MODELS_TO_TEST = ["o1-mini", "gpt-4o", "o1-preview"]
base_messages = [
# Role will be set dynamically
{"role": "system", "content": "You are a helpful assistant."},
# User prompt remains the same, asking for the structure
{"role": "user", "content": "Output a greeting 'hello'."}
]
# Define a simple tool spec for the o1 models
greeting_tool_spec = {
"type": "function",
"function": {
"name": "output_greeting",
"description": "Outputs a greeting.",
"parameters": {
"type": "object",
"properties": {
"greeting": {
"type": "string",
"description": "The greeting message, e.g., hello"
}
},
"required": ["greeting"]
}
}
}
for model_name in MODELS_TO_TEST:
print(f"--- Testing model: {model_name} ---")
messages = copy.deepcopy(base_messages)
api_kwargs = {}
test_type = ""
# --- Configure based on model ---
if model_name.startswith("o1-"):
test_type = "tools"
messages[0]["role"] = "user" # Use 'user' role for o1 system prompt
messages[0]["content"] = "You are a helpful assistant that uses tools to provide responses."
api_kwargs["tools"] = [greeting_tool_spec]
api_kwargs["tool_choice"] = {"type": "function", "function": {"name": "output_greeting"}}
print(f"(Using role '{messages[0]['role']}', testing with {test_type})")
elif model_name == "gpt-4o":
test_type = "response_format: json_object"
messages[0]["role"] = "system"
messages[0]["content"] = "You are a helpful assistant designed to output JSON."
messages[1]["content"] = "Provide a JSON object with a single key 'greeting' and value 'hello'. Don't include markdown."
api_kwargs["response_format"] = {"type": "json_object"}
print(f"(Using role '{messages[0]['role']}', testing with {test_type})")
else:
print(f"Skipping model {model_name} - test configuration not defined.")
continue
# ---------------------------------
try:
print(f"Attempting call to {model_name}...")
response = client.chat.completions.create(
model=model_name,
messages=messages,
**api_kwargs
)
# --- Validate response based on test type ---
valid = False
output_content = ""
if test_type == "tools":
if response.choices and response.choices[0].message.tool_calls:
tool_call = response.choices[0].message.tool_calls[0]
if tool_call.function.name == "output_greeting":
output_content = tool_call.function.arguments
try:
# Check if arguments are valid JSON and contain the key
args_json = json.loads(output_content)
if "greeting" in args_json:
valid = True
except json.JSONDecodeError:
pass # Invalid JSON
else:
output_content = str(response.choices[0].message) # Log what was received
elif test_type == "response_format: json_object":
if response.choices and response.choices[0].message.content:
output_content = response.choices[0].message.content
try:
# Check if content is valid JSON and contains the key
content_json = json.loads(output_content)
if "greeting" in content_json:
valid = True
except json.JSONDecodeError:
pass # Invalid JSON
# -------------------------------------------
if valid:
print(f"\nSuccess! Model {model_name} worked as expected with {test_type}.")
print(f"Output: {output_content}")
else:
print(f"\nFailure! Model {model_name} did not return the expected output using {test_type}.")
print(f"Received: {output_content}")
except APITimeoutError as e:
print(f"\nAPITimeoutError encountered for model {model_name}: {e}")
except APIError as e:
print(f"\nAPIError encountered for model {model_name}: {e.status_code} {e.code} - {e.message}")
except Exception as e:
print(f"\nAn unexpected error occurred for model {model_name}: {e}")
print("-" * (len(model_name) + 28) + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment