altryne · October 18, 2024 00:38
diff --git a/openai_audio_streaming.py b/openai_audio_streaming.py
 import base64
 import os
 from openai import OpenAI
 import pyaudio
 import wave
 import weave

 os.environ["WANDB_API_KEY"] = "" # Set your wandb API key here to track audio
 client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

 weave.init('openai-audio-chat')
 @weave.op
 def get_audio_stream():
    # Initialize PyAudio
    p = pyaudio.PyAudio()
    
    # Open an audio stream
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=22050,
                    output=True,
                    frames_per_buffer=4096)

    # Open a wave file for writing
    with wave.open("output.wav", "wb") as wav_file:
        wav_file.setnchannels(1)  # Mono
        wav_file.setsampwidth(2)  # 16-bit
        wav_file.setframerate(22050)  # Sample rate (adjust if needed)

        completion = client.chat.completions.create(
            model="gpt-4o-audio-preview",
            modalities=["text", "audio"],
            audio={"voice": "fable", "format": "pcm16"},
            stream=True,
            messages=[
                {"role": "system", "content": "You're the fastest counter in the world"},
                {"role": "user", "content": "Count to 13 super super slow, announciate each number with a dramatic flair, chnging up accents as you go along. British, french, german, spanish, etc."}
            ]
        )

        for chunk in completion:
            if hasattr(chunk, 'choices') and chunk.choices is not None and len(chunk.choices) > 0:
                if hasattr(chunk.choices[0].delta, 'audio') and chunk.choices[0].delta.audio.get('data') is not None:
                    audio_data = base64.b64decode(chunk.choices[0].delta.audio.get('data'))
                    
                    # Write to audio stream
                    stream.write(audio_data)
                    
                    # Write to wave file
                    wav_file.writeframes(audio_data)

    # Close and terminate PyAudio
    stream.stop_stream()
    stream.close()
    p.terminate()

    return wave.open("output.wav", "rb")

 # Call the function
 get_audio_stream()
diff --git a/requirements.txt b/requirements.txt
 openai
 weave
 pyaudio
	import base64
	import os
	from openai import OpenAI
	import pyaudio
	import wave
	import weave

	os.environ["WANDB_API_KEY"] = "" # Set your wandb API key here to track audio
	client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

	weave.init('openai-audio-chat')
	@weave.op
	def get_audio_stream():
	# Initialize PyAudio
	p = pyaudio.PyAudio()

	# Open an audio stream
	stream = p.open(format=pyaudio.paInt16,
	channels=1,
	rate=22050,
	output=True,
	frames_per_buffer=4096)

	# Open a wave file for writing
	with wave.open("output.wav", "wb") as wav_file:
	wav_file.setnchannels(1) # Mono
	wav_file.setsampwidth(2) # 16-bit
	wav_file.setframerate(22050) # Sample rate (adjust if needed)

	completion = client.chat.completions.create(
	model="gpt-4o-audio-preview",
	modalities=["text", "audio"],
	audio={"voice": "fable", "format": "pcm16"},
	stream=True,
	messages=[
	{"role": "system", "content": "You're the fastest counter in the world"},
	{"role": "user", "content": "Count to 13 super super slow, announciate each number with a dramatic flair, chnging up accents as you go along. British, french, german, spanish, etc."}
	]
	)

	for chunk in completion:
	if hasattr(chunk, 'choices') and chunk.choices is not None and len(chunk.choices) > 0:
	if hasattr(chunk.choices[0].delta, 'audio') and chunk.choices[0].delta.audio.get('data') is not None:
	audio_data = base64.b64decode(chunk.choices[0].delta.audio.get('data'))

	# Write to audio stream
	stream.write(audio_data)

	# Write to wave file
	wav_file.writeframes(audio_data)

	# Close and terminate PyAudio
	stream.stop_stream()
	stream.close()
	p.terminate()

	return wave.open("output.wav", "rb")

	# Call the function
	get_audio_stream()