Created
February 19, 2022 12:46
-
-
Save jdhoek/41130c1126a3561cdab1f50540840e55 to your computer and use it in GitHub Desktop.
Kruidvat website-tag fixer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
from lxml import etree | |
def show_help(): | |
print("{} INPUT > OUTPUT".format(sys.argv[0])) | |
def parse_command(): | |
if len(sys.argv) < 2: | |
show_help() | |
sys.exit(0) | |
convert( | |
sys.argv[1] | |
) | |
def convert(file_name): | |
with open(file_name) as input_file: | |
tree = etree.parse(input_file) | |
root = tree.getroot() | |
for element in root: | |
is_chemist = False | |
is_kruidvat = False | |
ref = None | |
for tag in element: | |
key = tag.get('k') | |
value = tag.get('v') | |
if key == 'shop' and value == 'chemist': | |
is_chemist = True | |
continue | |
if key == 'brand' and value == 'Kruidvat': | |
is_kruidvat = True | |
continue | |
if key == 'ref': | |
ref = value | |
continue | |
if is_chemist and is_kruidvat and ref is not None: | |
for tag in element: | |
key = tag.get('k') | |
if key in ('website', 'brand:website', 'brand:wikipedia', 'operator'): | |
element.remove(tag) | |
fix_tags(element, ref) | |
xml = etree.tostring(tree, | |
encoding="utf8", | |
xml_declaration=True, | |
pretty_print=True) | |
print(bytes.decode(xml)) | |
def fix_tags(element, ref): | |
website_tag = etree.Element("tag", attrib={'k':'website', 'v':f'https://www.kruidvat.nl/nl/store/DROGISTERIJ%20KRUIDVAT%20{ref}'}) | |
brand_website_tag = etree.Element("tag", attrib={'k':'brand:website', 'v':f'https://www.kruidvat.nl'}) | |
brand_wikipedia_tag = etree.Element("tag", attrib={'k':'brand:wikipedia', 'v':f'nl:Kruidvat'}) | |
operator_tag = etree.Element("tag", attrib={'k':'operator', 'v':f'A.S. Watson'}) | |
element.append(website_tag) | |
element.append(brand_website_tag) | |
element.append(brand_wikipedia_tag) | |
element.append(operator_tag) | |
element.set('action', 'modify') | |
if __name__ == '__main__': | |
parse_command() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment