Last active
February 9, 2018 14:12
-
-
Save xi/8a2774c2e82682fa34a4cd1e621fcf5a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Get translations from leo.org on the command line.""" | |
import argparse | |
import itertools | |
import collections | |
import xml.etree.ElementTree as ET | |
import requests | |
__version__ = '0.0.0' | |
VOCABS = ['ende', 'frde', 'esde', 'itde', 'chde', 'rude', 'ptde', 'plde'] | |
def get_text(node): | |
return ''.join(node.itertext()) | |
def get_data(vocab, query): | |
url = 'https://pda.leo.org/dictQuery/m-vocab/%s/query.xml' % vocab | |
r = requests.get(url, params={'search': query}) | |
root = ET.fromstring(r.text) | |
sections = collections.OrderedDict() | |
for section in root.iter('section'): | |
title = section.get('sctTitle') | |
sections[title] = [] | |
for entry in section.iter('entry'): | |
sides = [] | |
for side in entry.iter('side'): | |
sides.append(get_text(side.find('.//repr'))) | |
sections[title].append(sides) | |
return sections | |
def get_max_len(data): | |
max_len = 0 | |
for section in data.values(): | |
for entry in section: | |
for side in entry: | |
if len(side) > max_len: | |
max_len = len(side) | |
return max_len | |
def wrap(s, width=80, indent=0): | |
words = s.split() | |
lines = [] | |
for word in words: | |
if (len(lines) == 0): | |
lines.append(word) | |
elif len(lines[-1] + ' ' + word) > width: | |
lines.append(' ' * indent + word) | |
else: | |
lines[-1] += ' ' + word | |
return lines | |
def iter_column_lines(a_raw, b_raw, width=80): | |
a_wrapped = wrap(a_raw, width=width, indent=2) | |
b_wrapped = wrap(b_raw, width=width, indent=2) | |
for a, b in itertools.zip_longest(a_wrapped, b_wrapped, fillvalue=''): | |
offset = max(0, width - len(a)) + 2 | |
line = a + ' ' * offset + b | |
yield line.rstrip() | |
def print_data(data, width=80): | |
max_len = get_max_len(data) | |
column_width = int((width - 4) / 2) | |
column_width = min(max_len, column_width) | |
for title, section in data.items(): | |
print(title) | |
for entry in section: | |
for line in iter_column_lines(*entry, width=column_width): | |
print(' ' + line) | |
print('') | |
def parse_args(): | |
parser = argparse.ArgumentParser(description=__doc__) | |
parser.add_argument( | |
'--version', '-V', action='version', version=__version__) | |
parser.add_argument('query') | |
parser.add_argument( | |
'--vocab', '-v', choices=VOCABS, default=VOCABS[0]) | |
parser.add_argument( | |
'--width', '-w', type=int, default=80) | |
return parser.parse_args() | |
def main(): | |
args = parse_args() | |
data = get_data(args.vocab, args.query) | |
print_data(data, args.width) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment