Skip to content

Instantly share code, notes, and snippets.

@aerickt
Last active November 22, 2024 23:52
Show Gist options
  • Save aerickt/03871d801534067066d4587cd61144eb to your computer and use it in GitHub Desktop.
Save aerickt/03871d801534067066d4587cd61144eb to your computer and use it in GitHub Desktop.
basic viet.py for Plover
# This is a (work-in-progress) python dictionary implementation of my Vietnamese steno system: https://github.com/aerickt/steno-dictionaries/wiki/Vietnamese-Steno
# As this dictionary generates words on the fly, it is not (or shouldn't be) necessary to add missing entries.
# Therefore, it is a better implementation of my Vietnamese steno system than the json dictionary.
# However, this is very work-in-progress. I particularly don't have a great way to test the tone placement system.
# This dictionary also has some minor differences to the viet.json implementation. For words such as qua, this system
# only considers the q (KW) as a consonant -- you'll have to consider the ua as a vowel. Alternatively, you can consider qu
# as a separate consonant using a different vowel chord (KWR).
# This dictionary also does not support fingerspelling yet. However, a json dictionary may be provided instead to keep this
# python dictionary readable (I'm not sure if I am able to add fingerspelling in a competent way).
import re
# Only consider single stroke outlines
LONGEST_KEY = 1
chord_space = {
# Define initial consonant chords
"initial": {
"S": "s",
"STKPW": "gi",
"STPH": "ng",
"STPHR": "ngh",
"SR": "v",
"T": "t",
"TK": "đ",
"TKP": "d",
"TKPW": "g",
"TKPWH": "gh",
"TKR": "đr",
"TP": "ph",
"TPH": "n",
"TPHR": "nh",
"TH": "th",
"TR": "tr",
"K": "k",
"KP": "x",
"KW": "q",
"KWR": "qu",
"KH": "kh",
"KHR": "ch",
"KR": "c",
"P": "p",
"PW": "b",
"PH": "m",
"H": "h",
"HR": "l",
"R": "r",
"": ""
},
# Define vowel chords
"vowel": {
"A": "a",
"AO": "ao",
"AOE": "ưu",
"AOEU": "oo",
"AOEUFR": "uyu",
"AOEUR": "uya",
"AOU": "oa",
"AOF": "oai",
"AOFR": "oă",
"AOR": "oay",
"A*R": "ya",
"AE": "ê",
"AEU": "êu",
"AEUR": "yêu",
"AEF": "iê",
"AEFR": "uê",
"AER": "yê",
"AU": "au",
"AUF": "ia",
"AUFR": "uă",
"AUR": "ưa",
"AF": "ai",
"AFR": "ă",
"AR": "ay",
"O": "o",
"O*E": "oe",
"O*UR": "uơ",
"OE": "ô",
"OEU": "uô",
"OEUF": "uôi",
"OEUFR": "uâ",
"OEF": "ôi",
"OER": "oeo",
"OU": "ơ",
"OUF": "ơi",
"OUFR": "ươi",
"OUR": "ươ",
"OF": "oi",
"OFR": "ươu",
"OR": "ua",
"*EUFR": "uây",
"E": "e",
"EU": "iêu",
"EUF": "ưi",
"EUFR": "âu",
"EUR": "uyê",
"EF": "eo",
"EFR": "â",
"ER": "ây",
"U": "u",
"UF": "ui",
"UFR": "uy",
"UR": "ư",
"-F": "i",
"-FR": "iu",
"-R": "y",
"": ""
},
# Define ending consonant chords
"final": {
"-P": "p",
"-PB": "nh",
"-PBLG": "ch",
"-PL": "m",
"-PLG": "mh",
"-B": "h",
"-BG": "c",
"-L": "n",
"-LG": "ng",
"-G": "t",
"": ""
},
# Define tone chords
"tone": {
"-T": u'\u0341', # or u0301
"-TS": u'\u0303', # or u0342
"-S": u'\u0300', # or u0340
"-D": u'\u0309',
"-Z": u'\u0323',
"": "",
}
}
def lookup(key):
# Initialize word string
word = ""
# Take input stroke and split it into a list with 4 elements
# These elements represent the chord groups: intials, vowels, finals, and tone (in that order)
stroke_chords = split_stroke(key[0])
# print(stroke_chords)
# Iterate through the chord groups and determine if they are present in the possible chords.
for i in ["initial", "vowel", "final", "tone"]:
if stroke_chords[i] not in chord_space[i]:
raise KeyError
# Start the word with the initial consonant since it is constant
word += chord_space["initial"][stroke_chords["initial"]]
# Determine the vowel by merging the tone with the vowel letters
word += get_tone_vowel(stroke_chords["vowel"], stroke_chords["final"], stroke_chords["tone"])
# End the word with the final consonant since it is constant
word += chord_space["final"][stroke_chords["final"]]
# Return the final word
return word
# Split a stroke into the 4 groups of interest
def split_stroke(stroke):
# Use a regex to split the stroke into 7 groups
all_groups = re.match(r'(S?T?K?P?W?H?R?)(A?O?)-?(\*?)(E?U?)(F?R?)(P?B?L?G?)(T?S?D?Z?)', stroke).groups()
# Consolidate the 7 groups into the 4 that are needed
# Also use a list instead of a tuple
groups = {
"initial": all_groups[0],
"vowel": all_groups[1] + all_groups[2] + all_groups[3] + all_groups[4],
"final": all_groups[5],
"tone": all_groups[6]
}
# Iterate through the last 3 groups, and prepend a hyphen if needed
for i in ["vowel", "final", "tone"]:
if groups[i] != "":
if groups[i][0] not in ["A", "O", "E", "U", "*"]:
groups[i] = "-" + groups[i]
return groups
# Using a vowel chord and a tone chord as input, return the vowel letters with the correct tone diacritic
def get_tone_vowel(vowel_chord, final_chord, tone_chord):
# Determine the tone character
tone = chord_space["tone"][tone_chord]
# Determine the vowel letters
vowel_letters = chord_space["vowel"][vowel_chord]
# Early return if no tone
if tone == "":
return vowel_letters
tone_position = -1
# Determine the diacritic position
# If the vowel consists of one letter, place the tone on the first letter (that is, itself)
if len(vowel_letters) == 1:
tone_position = 1
# If the vowel consists of zero letters, place the tone on the previous letter (useful for words like gì)
elif len(vowel_letters) == 0:
tone_position = 0
# If the vowel has any accented letters, place the tone on that letter
elif any(x in ["ă", "â", "ê", "ô", "ơ", "ư"] for x in vowel_letters):
# Give accented characters priority
for i in ["ă", "â", "ê", "ô", "ư", "ơ"]:
if i in vowel_letters:
tone_position = vowel_letters.find(i) + 1
# If an ending consonant is present, place tone on last letter
elif final_chord != "":
tone_position = len(vowel_letters)
# If an ending consonant is not present, place tone on second last letter
elif final_chord == "":
tone_position - len(vowel_letters) - 1
# Merge the vowel letters and diacritic in the right position
vowel_with_tone = vowel_letters[0:tone_position] + tone + vowel_letters[tone_position:]
return vowel_with_tone
# for i in ["T", "TS", "S", "D", "Z"]:
# print(lookup(["STPHEURL" + i]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment