e-p-armstrong · May 6, 2024 03:04
diff --git a/export_to_sharegpt.py b/export_to_sharegpt.py
 import json
 import traceback
 # Was this helpful? I have another open source project you can check out if you're interested at https://github.com/e-p-armstrong/augmentoolkit/tree/master
 def process_openai_data(input_file, output_file, system_prompt_func):
    with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out:
        data = json.load(f_in)
        for obj in data:
            try:
                title = obj['title']
                system_prompt = system_prompt_func(title)
                
                conversation = [{"from": "system", "value": system_prompt}]
                
                for node_id, node in obj['mapping'].items():
                    if 'message' in node and node['message']:
                        message = node['message']
                        if message['author']['role'] == 'user':
                            if any(isinstance(part, dict) and part.get('content_type') == 'code' for part in message['content']['parts']):
                                break
                            if any(isinstance(part, dict) and part.get('content_type') == 'image_asset_pointer' for part in message['content']['parts']):
                                break
                            conversation.append({"from": "human", "value": message['content']['parts'][0] if isinstance(message['content']['parts'][0], str) else message['content']['parts'][0]['content']})
                        elif message['author']['role'] == 'assistant':
                            if any(isinstance(part, dict) and part.get('content_type') == 'image_asset_pointer' for part in message['content']['parts']):
                                break
                            conversation.append({"from": "gpt", "value": message['content']['parts'][0] if isinstance(message['content']['parts'][0], str) else message['content']['parts'][0]['content']})
                
                if len(conversation) > 1:
                    f_out.write(json.dumps({"conversations": conversation}) + '\n')
            except Exception as e:
                print(f"Error processing conversation: {str(e)}")
                traceback.print_exc()

 def generate_system_prompt(title):
    return f"The following conversation is related to the topic: {title}"

 if __name__ == "__main__":
    input_file = 'gpt_data_export/conversations.json'
    output_file = 'gpt_data_export.json'
    process_openai_data(input_file, output_file, generate_system_prompt)
	import json
	import traceback
	# Was this helpful? I have another open source project you can check out if you're interested at https://github.com/e-p-armstrong/augmentoolkit/tree/master
	def process_openai_data(input_file, output_file, system_prompt_func):
	with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out:
	data = json.load(f_in)
	for obj in data:
	try:
	title = obj['title']
	system_prompt = system_prompt_func(title)

	conversation = [{"from": "system", "value": system_prompt}]

	for node_id, node in obj['mapping'].items():
	if 'message' in node and node['message']:
	message = node['message']
	if message['author']['role'] == 'user':
	if any(isinstance(part, dict) and part.get('content_type') == 'code' for part in message['content']['parts']):
	break
	if any(isinstance(part, dict) and part.get('content_type') == 'image_asset_pointer' for part in message['content']['parts']):
	break
	conversation.append({"from": "human", "value": message['content']['parts'][0] if isinstance(message['content']['parts'][0], str) else message['content']['parts'][0]['content']})
	elif message['author']['role'] == 'assistant':
	if any(isinstance(part, dict) and part.get('content_type') == 'image_asset_pointer' for part in message['content']['parts']):
	break
	conversation.append({"from": "gpt", "value": message['content']['parts'][0] if isinstance(message['content']['parts'][0], str) else message['content']['parts'][0]['content']})

	if len(conversation) > 1:
	f_out.write(json.dumps({"conversations": conversation}) + '\n')
	except Exception as e:
	print(f"Error processing conversation: {str(e)}")
	traceback.print_exc()

	def generate_system_prompt(title):
	return f"The following conversation is related to the topic: {title}"

	if __name__ == "__main__":
	input_file = 'gpt_data_export/conversations.json'
	output_file = 'gpt_data_export.json'
	process_openai_data(input_file, output_file, generate_system_prompt)