|
import json |
|
import traceback |
|
# Was this helpful? I have another open source project you can check out if you're interested at https://github.com/e-p-armstrong/augmentoolkit/tree/master |
|
def process_openai_data(input_file, output_file, system_prompt_func): |
|
with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out: |
|
data = json.load(f_in) |
|
for obj in data: |
|
try: |
|
title = obj['title'] |
|
system_prompt = system_prompt_func(title) |
|
|
|
conversation = [{"from": "system", "value": system_prompt}] |
|
|
|
for node_id, node in obj['mapping'].items(): |
|
if 'message' in node and node['message']: |
|
message = node['message'] |
|
if message['author']['role'] == 'user': |
|
if any(isinstance(part, dict) and part.get('content_type') == 'code' for part in message['content']['parts']): |
|
break |
|
if any(isinstance(part, dict) and part.get('content_type') == 'image_asset_pointer' for part in message['content']['parts']): |
|
break |
|
conversation.append({"from": "human", "value": message['content']['parts'][0] if isinstance(message['content']['parts'][0], str) else message['content']['parts'][0]['content']}) |
|
elif message['author']['role'] == 'assistant': |
|
if any(isinstance(part, dict) and part.get('content_type') == 'image_asset_pointer' for part in message['content']['parts']): |
|
break |
|
conversation.append({"from": "gpt", "value": message['content']['parts'][0] if isinstance(message['content']['parts'][0], str) else message['content']['parts'][0]['content']}) |
|
|
|
if len(conversation) > 1: |
|
f_out.write(json.dumps({"conversations": conversation}) + '\n') |
|
except Exception as e: |
|
print(f"Error processing conversation: {str(e)}") |
|
traceback.print_exc() |
|
|
|
def generate_system_prompt(title): |
|
return f"The following conversation is related to the topic: {title}" |
|
|
|
if __name__ == "__main__": |
|
input_file = 'gpt_data_export/conversations.json' |
|
output_file = 'gpt_data_export.json' |
|
process_openai_data(input_file, output_file, generate_system_prompt) |