Created
August 21, 2019 23:33
-
-
Save thebabush/67aabe335fe4894629b7172aaba4e2c7 to your computer and use it in GitHub Desktop.
translate
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Quick'n'dirty script to translate subs using Yandex. | |
It all started because I didn't have enough time to finish a Japanese movie | |
that I was watching heading back home from DEF CON 27. | |
Anyway, I managed to get a copy of the movie (**cough cough**) but my 日本語 | |
is really bad and the only subs I could find were in Chinese. | |
So I decided to bear the automatic translation for the remaining 30 minutes | |
that I had left. | |
Usage: ./translate_str.py /path/to/subs.srt zh-en | |
(c) 2019 Paolo Montesel | |
""" | |
import re | |
import sys | |
import yandex_translate | |
API_KEY = YOUR_YANDEX_API_KEY | |
TMPL = r'''{} | |
{} --> {} | |
{} | |
''' | |
def main(fp, lan): | |
translate = yandex_translate.YandexTranslate(API_KEY) | |
with open(fp, 'r') as f: | |
ss = re.split('(\\d+)\n(\\S+) --> (\\S+)\s+', f.read()) | |
start = 0 | |
for i in range(100): | |
try: | |
start = int(ss[i]) | |
break | |
except: | |
pass | |
ss = ss[start:] | |
for i in range(0, len(ss), 4): | |
seq, start, end, text = ss[i:i+4] | |
seq = int(seq) | |
text = text.strip() | |
text = translate.translate(text, lan)['text'] | |
print(TMPL.format(seq, start, end, text[0])) | |
assert len(text) == 1 | |
if __name__ == '__main__': | |
main(sys.argv[1], sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment