Created
January 23, 2025 17:21
-
-
Save jdhoek/4286b7380ccc7dd7fafef80de72f0465 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
from lxml import etree | |
def show_help(): | |
print("{} INPUT > OUTPUT".format(sys.argv[0])) | |
def parse_command(): | |
if len(sys.argv) < 2: | |
show_help() | |
sys.exit(0) | |
convert( | |
sys.argv[1] | |
) | |
def convert(file_name): | |
with open(file_name) as input_file: | |
tree = etree.parse(input_file) | |
root = tree.getroot() | |
for element in root: | |
is_chemist = False | |
is_kruidvat = False | |
ref = None | |
for tag in element: | |
key = tag.get('k') | |
value = tag.get('v') | |
if key == 'shop' and value == 'chemist': | |
is_chemist = True | |
continue | |
if key == 'brand' and value == 'Kruidvat': | |
is_kruidvat = True | |
continue | |
if key == 'ref' and value is not None: | |
ref = value.lstrip('0') | |
continue | |
if is_chemist and is_kruidvat and ref is not None: | |
for tag in element: | |
key = tag.get('k') | |
if key in ('website', 'brand:website', 'brand:wikipedia'): | |
element.remove(tag) | |
fix_tags(element, ref) | |
xml = etree.tostring(tree, | |
encoding="utf8", | |
xml_declaration=True, | |
pretty_print=True) | |
print(bytes.decode(xml)) | |
def fix_tags(element, ref): | |
website_tag = etree.Element("tag", attrib={'k':'website', 'v':f'https://www.kruidvat.nl/store/drogisterij-kruidvat-{ref}'}) | |
brand_website_tag = etree.Element("tag", attrib={'k':'brand:website', 'v':f'https://www.kruidvat.nl'}) | |
brand_wikipedia_tag = etree.Element("tag", attrib={'k':'brand:wikipedia', 'v':f'nl:Kruidvat'}) | |
element.append(website_tag) | |
element.append(brand_website_tag) | |
element.append(brand_wikipedia_tag) | |
element.set('action', 'modify') | |
if __name__ == '__main__': | |
parse_command() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment