Last active
August 6, 2020 23:39
-
-
Save motbob/d38feff5e8e15ae01deb338eb822ac59 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tinycss | |
import re | |
import datetime | |
import math | |
#Put this script in the same folder as the .css and .vtt | |
#If you don't know how to get the .css, you'll need to find a different parser | |
SUBS_FILENAME = 'subs.vtt' | |
CSS_FILENAME = 'css.css' | |
#obviously this input method is absurdly clunky. Sorry. | |
with open(SUBS_FILENAME, 'r', encoding="utf-8") as f: | |
subs = f.read() | |
parser = tinycss.make_parser('page3') | |
stylesheet = parser.parse_stylesheet_file(CSS_FILENAME) | |
css_rules = [] | |
#hidive assigns certain text formatting to a bunch of lines at once. our goal is to get those formatting rules (there are usually less than 10 of them per script) and store them in a way that can be matched to individual subtitle lines later | |
#we make the assumption that any line that is not yellow is a sign. this has generally worked out pretty well in my testing. basically, for yellow (dialogue) lines we ignore the hidive positioning, and for non-yellow lines we try to replicate the positioning as closely as possible. Warning: be wary about rendering songs like dialogue (you should test with something like the araburu ED if you try) | |
for i in range(0, len(stylesheet.rules)): | |
dialogueRule = 0 | |
signRule = 0 | |
italicRule = 0 | |
maybeDialogueRule = 0 | |
rule = str(stylesheet.rules[i].declarations) | |
if (rule.find('font-family: "Swis721 BT"') != -1) and (rule.find('color: yellow') != -1): | |
dialogueRule = 1 | |
elif (rule.find('font-family: "Swis721 BT"') != -1) and (rule.find('color: yellow') == -1): | |
maybeDialogueRule = 1 | |
else: signRule = 1 | |
if rule.find('font-style: italic') != -1: | |
italicRule = 1 | |
#check for rules that make text bigger or smaller than normal. Example: {color:#FFFFFF; ... font-size:.84em;} | |
if re.search('font-size: ([\d\.]+)em', rule) != None: | |
font_multiplier = float(re.search('font-size: ([\d\.]+)em', rule).groups()[0]) | |
else: | |
font_multiplier = 1 | |
newdict = {'selector' : stylesheet.rules[i].selector.as_css(), 'dialogue' : dialogueRule, 'maybedialogue' : maybeDialogueRule, 'sign' : signRule, 'italic' : italicRule, 'font_multiplier' : font_multiplier} | |
css_rules.append(newdict) | |
time_regx = re.compile('\s*(\d+):(\d{2}):(\d{2})\.(\d{3})\s*-->\s*(\d+):(\d{2}):(\d{2})\.(\d{3})\s*(.*)') | |
dialogue_regx = re.compile('<c\.(.+?_)(\d)>(.+?)</c>') | |
get_line = 0 | |
lines = [] | |
subs_lines = subs.split('\n') | |
for i in range(0, len(subs_lines)): | |
if get_line == 1: | |
line_moniker_pre, line_moniker_post, line_text = dialogue_regx.match(subs_lines[i]).groups() | |
for j in css_rules: | |
if j['selector'].find(line_moniker_pre + line_moniker_post) != -1: | |
line_meta_dict = j | |
if (line_moniker_post == '1') or (line_meta_dict['sign'] == 1): | |
lines.append(tentative_dict) | |
lines[-1].update(line_meta_dict) | |
if (line_moniker_post != '1') and (line_meta_dict['sign'] != 1): | |
lines[-1].update({'line_text' : lines[-1]['line_text'] + '\\N' + line_text}) | |
else: | |
lines[-1].update({'line_text' : line_text}) | |
get_line = 0 | |
if time_regx.match(subs_lines[i]) != None: | |
s_h, s_m, s_s, s_ms, e_h, e_m, e_s, e_ms, positioning = time_regx.match(subs_lines[i]).groups() | |
start_time = float(int(s_h)*3600 + int(s_m)*60 + int(s_s)) + (float(s_ms)/1000) | |
end_time = float(int(e_h)*3600 + int(e_m)*60 + int(e_s)) + (float(e_ms)/1000) | |
tentative_dict = {'start_time' : start_time, 'end_time' : end_time, 'positioning' : positioning} | |
get_line = 1 | |
#These are meant to deal with aspect ratio. Keep things around 720p. Standard margins for 1280x720 are 150. You do NOT need to change these is your video is 16:9. | |
width = '1280' | |
height = '720' | |
fontsize = '48' | |
margins = '150' | |
#HIDIVE's player is 16:9. So if you have 4:3 video, your positioning will be off if you don't have these lines. | |
if (float(width) / float(height) < 1.7) or (float(width) / float(height) > 1.8): | |
playerwidth = float(height) * 1.78 | |
else: | |
playerwidth = float(width) | |
terxt = "[Script Info]\nScriptType: v4.00+\nWrapStyle: 0\nPlayResX: " + width + "\nPlayResY: " + height + "\nScaledBorderAndShadow: yes\nYCbCr Matrix: TV.709\n\n[V4+ Styles]\nFormat: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\nStyle: Default,Gandhi Sans," + fontsize + ",&H00FFFFFF,&H000000FF,&H00000000,&HA0000000,-1,0,0,0,100,100,0,0,1,2.4,1,2," + margins + "," + margins + ",40,1\nStyle: Default-alt,Gandhi Sans," + fontsize + ",&H00FFFFFF,&H000000FF,&H00582209,&HA0000000,-1,0,0,0,100,100,0,0,1,2.4,1,2," + margins + "," + margins + ",40,1\nStyle: Songs/Signs,Gandhi Sans," + fontsize + ",&H00FFFFFF,&H000000FF,&H00000000,&HA0000000,-1,0,0,0,100,100,0,0,1,2.4,1,2," + margins + "," + margins + ",40,1\n\n[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n" | |
def get_timestamps_from_seconds(seconds_float): | |
int_s = math.floor(seconds_float) | |
hms = str(datetime.timedelta(seconds=int_s)) | |
ms = "%.2f" % round(seconds_float % 1, 2) | |
ms = ms[2:] | |
return hms, ms | |
for i in lines: | |
if i['dialogue'] == 1 or (i['maybedialogue'] == 1 and re.search('position:([-\d]+)%', i['positioning']) == None): | |
starthms, startms = get_timestamps_from_seconds(i['start_time']) | |
endhms, endms = get_timestamps_from_seconds(i['end_time']) | |
if i['italic'] == 1: | |
italic = '\\i1' | |
else: | |
italic = '' | |
if i['maybedialogue'] == 1: | |
orientation = '\\an8' | |
else: | |
orientation = '' | |
tags = '' | |
if orientation != '' or italic != '': | |
tags = '{' + italic + orientation + '}' | |
newline = 'Dialogue: 10,' + starthms + '.' + startms + ',' + endhms + '.' + endms + ',Default,,0,0,0,,' + tags + i['line_text'] + '\n' | |
terxt = terxt + newline | |
#the reason i do two loops like this is that I want dialogue to be separate from signs/songs in the .ass | |
for i in lines: | |
if i['sign'] == 1 or (i['maybedialogue'] == 1 and re.search('position:([-\d]+)%', i['positioning']) != None): | |
starthms, startms = get_timestamps_from_seconds(i['start_time']) | |
endhms, endms = get_timestamps_from_seconds(i['end_time']) | |
positioning = i['positioning'] | |
if re.search('position:([-\d]+)%', positioning) != None: | |
#if there is horizontal positioning data in the line, we use it... | |
x = int(re.search('position:(\d+)%', positioning).groups()[0]) | |
x = int((playerwidth * x/100) - (0.5 * abs(float(width) - playerwidth))) | |
#change the margin so that text can't go past the right edge of the screen | |
right_margin = str(max([1, x - int(margins)])) | |
an = '7' | |
else: | |
#...and if there isn't, we center the text. | |
x = int(float(width)/2) | |
an = '5' | |
right_margin = '0' | |
y = int(re.search('line:([-\d]+)%', positioning).groups()[0]) | |
y = int(int(height) * y/100) | |
newfontsize = str(int(float(fontsize) * i['font_multiplier'])) | |
newline = 'Dialogue: 0,' + starthms + '.' + startms + ',' + endhms + '.' + endms + ',Songs/Signs,,0,' + right_margin + ',0,,' + '{\pos(' + str(x) + ',' + str(y) + ')\\an' + an + '\\fs' + newfontsize + '}' + i['line_text'] + '\n' | |
terxt = terxt + newline | |
#aegisub freaks out sometimes when you don't set bom | |
with open("out_araburu.ass", "w", encoding="utf-8-sig") as f: | |
f.write(terxt) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment