Last active
October 29, 2024 08:47
-
-
Save nehiljain/fd18c2cfee30a5c46922e62704f9f5af to your computer and use it in GitHub Desktop.
Create Synthetic Data using FireworksAI Llama models
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fireworks.client import Fireworks | |
import os | |
from dotenv import load_dotenv | |
import random | |
import json | |
from pydantic import BaseModel, Field | |
from enum import Enum | |
class DateTimeCategory(str, Enum): | |
BASIC_OPERATIONS = "Basic Operations" | |
TIMEZONE_HANDLING = "Timezone Handling" | |
DATE_PARSING = "Date Parsing" | |
DATE_FORMATTING = "Date Formatting" | |
DATE_CALCULATIONS = "Date Calculations" | |
PENDULUM_FEATURES = "Pendulum Features" | |
class ComplexityLevel(str, Enum): | |
BASIC = "Basic" | |
INTERMEDIATE = "Intermediate" | |
ADVANCED = "Advanced" | |
class CodeSnippetType(str, Enum): | |
DATETIME = "datetime" | |
PENDULUM = "pendulum" | |
HYBRID = "hybrid" | |
load_dotenv() | |
client = Fireworks(api_key=os.getenv("FIREWORKS_API_KEY")) | |
model_name = "accounts/fireworks/models/llama-v3p1-405b-instruct" | |
response = client.chat.completions.create( | |
model=model_name, | |
messages=[{ | |
"role": "user", | |
"content": "Who are you?", | |
}], | |
) | |
print(response.choices[0].message.content) | |
class DateTimeExample(BaseModel): | |
title: str | |
description: str | |
python_code: str | |
expected_output: str | |
category: str = Field(..., description="One of: Basic Operations, Timezone Handling, Date Parsing, Date Formatting, Date Calculations, Pendulum Features", enum_values=list(DateTimeCategory)) | |
complexity: str = Field(..., description="One of: Basic, Intermediate, Advanced", enum_values=list(ComplexityLevel)) | |
snippet_type: str = Field(..., description="One of: datetime, pendulum, hybrid", enum_values=list(CodeSnippetType)) | |
categories = [ | |
"Basic Operations", | |
"Timezone Handling", | |
"Date Parsing", | |
"Date Formatting", | |
"Date Calculations", | |
"Pendulum Features" | |
] | |
complexities = ["Basic", "Intermediate", "Advanced"] | |
snippet_types = ["datetime", "pendulum", "hybrid"] | |
prompt_templates = [ | |
"Generate a {selected_complexity} {selected_snippet_type} code example about {selected_category}.", | |
"Provide a {selected_snippet_type} code snippet demonstrating {selected_category} at a {selected_complexity} level.", | |
"Create an example using {selected_snippet_type} that illustrates {selected_category}, with {selected_complexity} complexity." | |
] | |
def generate_datetime_example(): | |
selected_category = random.choice(categories) | |
selected_complexity = random.choice(complexities) | |
selected_snippet_type = random.choice(snippet_types) | |
prompt_template = random.choice(prompt_templates) | |
prompt = prompt_template.format( | |
selected_category=selected_category, | |
selected_complexity=selected_complexity, | |
selected_snippet_type=selected_snippet_type | |
) | |
system_prompt = """You are a Python datetime expert creating practical code examples. | |
You must return a JSON object with exactly these fields: | |
{ | |
"title": "string - brief title", | |
"description": "string - detailed description", | |
"python_code": "string - working code example", | |
"expected_output": "string - example output", | |
"category": "one of: Basic Operations, Timezone Handling, Date Parsing, Date Formatting, Date Calculations, Pendulum Features", | |
"complexity": "one of: Basic, Intermediate, Advanced", | |
"snippet_type": "one of: datetime, pendulum, hybrid" | |
}""" | |
response = client.chat.completions.create( | |
model=model_name, | |
response_format={"type": "json_object"}, | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": f"Generate a complete code example following this prompt: {prompt}"} | |
] | |
) | |
try: | |
example_data = json.loads(response.choices[0].message.content) | |
# Validate the category, complexity and snippet_type match our enums | |
example_data["category"] = DateTimeCategory(example_data["category"]) | |
example_data["complexity"] = ComplexityLevel(example_data["complexity"]) | |
example_data["snippet_type"] = CodeSnippetType(example_data["snippet_type"]) | |
return DateTimeExample(**example_data) | |
except Exception as e: | |
print(f"Error generating example: {str(e)}") | |
raise | |
def main(num_examples=2): | |
with open("datetime_examples.jsonl", "w") as f: | |
for i in range(num_examples): | |
example = generate_datetime_example() | |
json.dump(example.dict(), f) | |
f.write("\n") | |
print(f"Generated example {i+1}/{num_examples}: {example.title}") | |
print(f"Generated and saved {num_examples} examples to datetime_examples.jsonl") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment