Created
March 6, 2025 09:44
-
-
Save glowinthedark/4757697e1cd2c816a1b48f7f0735fd72 to your computer and use it in GitHub Desktop.
towerinstruct translate English to French
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import json | |
import re | |
import sys | |
from pathlib import Path | |
# install with: pip3 install openai — see https://pypi.org/project/openai/ | |
from openai import OpenAI | |
client = OpenAI(base_url="http://localhost:1234/v1") | |
MODEL = "towerinstruct-13b-v0.1" | |
input_file = Path(sys.argv[1]) | |
if not input_file.exists(): | |
print(f'{input_file} not found!') | |
exit(1) | |
output = input_file.with_suffix(f'._FR_{MODEL}_LM_STUDIO.html') | |
output_JSON = input_file.with_suffix(f'._FR_{MODEL}_LM_STUDIO.json') | |
if not output_JSON.exists(): | |
with output_JSON.open(mode='w', encoding='utf-8') as jwr: | |
json.dump([], fp=jwr, ensure_ascii=False) | |
PATTERN_PAR = re.compile("(?=<p>|<h3>)") | |
source_text = input_file.read_text(encoding='utf-8') | |
fragments = PATTERN_PAR.split(source_text) | |
for fragment in fragments: | |
response = client.chat.completions.create( | |
model=MODEL, | |
messages=[ | |
{ | |
"role": "user", | |
"content": f"""Translate English to French: | |
English: | |
{fragment} | |
""", | |
}, | |
], | |
stream=False, | |
temperature=0.2, | |
max_tokens=-1 | |
) | |
content = response.choices[0].message.content | |
print(f'🇬🇧 {fragment}') | |
print(f"🇫🇷 {content}") | |
with output.open("a", encoding="utf-8") as fout: | |
fout.write(f"{content}\n") | |
with output_JSON.open(mode='r', encoding='utf-8') as jr: | |
data = json.load(fp=jr) or [] | |
with output_JSON.open(mode="w", encoding="utf-8") as jwr: | |
data.append([fragment.strip(), content.strip()]) | |
json.dump(data, fp=jwr, indent=1, ensure_ascii=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment