Skip to content

Instantly share code, notes, and snippets.

@nehiljain
Last active October 29, 2024 08:47
Show Gist options
  • Save nehiljain/fd18c2cfee30a5c46922e62704f9f5af to your computer and use it in GitHub Desktop.
Save nehiljain/fd18c2cfee30a5c46922e62704f9f5af to your computer and use it in GitHub Desktop.
Create Synthetic Data using FireworksAI Llama models
from fireworks.client import Fireworks
import os
from dotenv import load_dotenv
import random
import json
from pydantic import BaseModel, Field
from enum import Enum
class DateTimeCategory(str, Enum):
BASIC_OPERATIONS = "Basic Operations"
TIMEZONE_HANDLING = "Timezone Handling"
DATE_PARSING = "Date Parsing"
DATE_FORMATTING = "Date Formatting"
DATE_CALCULATIONS = "Date Calculations"
PENDULUM_FEATURES = "Pendulum Features"
class ComplexityLevel(str, Enum):
BASIC = "Basic"
INTERMEDIATE = "Intermediate"
ADVANCED = "Advanced"
class CodeSnippetType(str, Enum):
DATETIME = "datetime"
PENDULUM = "pendulum"
HYBRID = "hybrid"
load_dotenv()
client = Fireworks(api_key=os.getenv("FIREWORKS_API_KEY"))
model_name = "accounts/fireworks/models/llama-v3p1-405b-instruct"
response = client.chat.completions.create(
model=model_name,
messages=[{
"role": "user",
"content": "Who are you?",
}],
)
print(response.choices[0].message.content)
class DateTimeExample(BaseModel):
title: str
description: str
python_code: str
expected_output: str
category: str = Field(..., description="One of: Basic Operations, Timezone Handling, Date Parsing, Date Formatting, Date Calculations, Pendulum Features", enum_values=list(DateTimeCategory))
complexity: str = Field(..., description="One of: Basic, Intermediate, Advanced", enum_values=list(ComplexityLevel))
snippet_type: str = Field(..., description="One of: datetime, pendulum, hybrid", enum_values=list(CodeSnippetType))
categories = [
"Basic Operations",
"Timezone Handling",
"Date Parsing",
"Date Formatting",
"Date Calculations",
"Pendulum Features"
]
complexities = ["Basic", "Intermediate", "Advanced"]
snippet_types = ["datetime", "pendulum", "hybrid"]
prompt_templates = [
"Generate a {selected_complexity} {selected_snippet_type} code example about {selected_category}.",
"Provide a {selected_snippet_type} code snippet demonstrating {selected_category} at a {selected_complexity} level.",
"Create an example using {selected_snippet_type} that illustrates {selected_category}, with {selected_complexity} complexity."
]
def generate_datetime_example():
selected_category = random.choice(categories)
selected_complexity = random.choice(complexities)
selected_snippet_type = random.choice(snippet_types)
prompt_template = random.choice(prompt_templates)
prompt = prompt_template.format(
selected_category=selected_category,
selected_complexity=selected_complexity,
selected_snippet_type=selected_snippet_type
)
system_prompt = """You are a Python datetime expert creating practical code examples.
You must return a JSON object with exactly these fields:
{
"title": "string - brief title",
"description": "string - detailed description",
"python_code": "string - working code example",
"expected_output": "string - example output",
"category": "one of: Basic Operations, Timezone Handling, Date Parsing, Date Formatting, Date Calculations, Pendulum Features",
"complexity": "one of: Basic, Intermediate, Advanced",
"snippet_type": "one of: datetime, pendulum, hybrid"
}"""
response = client.chat.completions.create(
model=model_name,
response_format={"type": "json_object"},
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"Generate a complete code example following this prompt: {prompt}"}
]
)
try:
example_data = json.loads(response.choices[0].message.content)
# Validate the category, complexity and snippet_type match our enums
example_data["category"] = DateTimeCategory(example_data["category"])
example_data["complexity"] = ComplexityLevel(example_data["complexity"])
example_data["snippet_type"] = CodeSnippetType(example_data["snippet_type"])
return DateTimeExample(**example_data)
except Exception as e:
print(f"Error generating example: {str(e)}")
raise
def main(num_examples=2):
with open("datetime_examples.jsonl", "w") as f:
for i in range(num_examples):
example = generate_datetime_example()
json.dump(example.dict(), f)
f.write("\n")
print(f"Generated example {i+1}/{num_examples}: {example.title}")
print(f"Generated and saved {num_examples} examples to datetime_examples.jsonl")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment