Created
October 21, 2024 02:04
-
-
Save hololeo/0d1677adf84041c230545c2eb5be7f1c to your computer and use it in GitHub Desktop.
calling ollama with litellm and streaming response
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from litellm import completion | |
def stream_response(response): | |
for chunk in response: | |
chunk_content = chunk['choices'][0]['delta'].get('content', '') | |
if chunk_content: | |
yield chunk_content | |
def main(): | |
response = completion( | |
model="ollama/llama3.2:latest", | |
messages=[{"content": "how to achieve world peace in 1 number. not 42", "role": "user"}], | |
api_base="http://localhost:11434", | |
stream=True | |
) | |
for chunk in stream_response(response): | |
print(chunk, end='', flush=True) | |
print() # Move to the next line after finishing | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment