-
-
Save mahmoud/3081857 to your computer and use it in GitHub Desktop.
Pretty printing XML in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from StringIO import StringIO | |
import xml.etree.cElementTree as ET | |
def pretty_xml(xml_str, indent=" "): | |
""" | |
A very simple, hopefully not simplistic, XML pretty printer. | |
Concept courtesy Mark Williams. | |
""" | |
if not hasattr(xml_str, "read"): # ElementTree uses file-like objects | |
fn = StringIO(xml_str) # cStringIO doesn't support UTF-8 | |
else: | |
fn = xml_str | |
cursor = 0 | |
out_list = [] | |
for event, elem in ET.iterparse(fn, events=('start', 'end')): | |
if event == 'start': | |
attrs = ' '.join([k+'="'+v+'"' for k, v in elem.items()]) | |
cur_tag = ('<{tag} {attrs}>'.format(tag=elem.tag, attrs=attrs) | |
if attrs else '<{tag}>'.format(tag=elem.tag)) | |
if elem.text is None: | |
had_txt = False | |
txt = '\n' | |
else: | |
had_txt = True | |
txt = elem.text | |
out_list.extend([indent*cursor, cur_tag, txt]) | |
cursor += 1 | |
else: | |
cursor -= 1 | |
cur_ind = cursor*indent if not had_txt else '' | |
out_list.extend([cur_ind, '</{0}>'.format(elem.tag), '\n']) | |
had_txt = False | |
return ''.join(out_list) | |
""" | |
The function above is about twice as fast as the following, with comparable output | |
formats. The one below might be slightly better, as far as looks and accuracy are concerned. | |
""" | |
import re | |
from xml.dom.minidom import parseString | |
_xml_re = re.compile('>\n\s+([^<>\s].*?)\n\s+</', re.DOTALL) | |
def pretty_xml_old(xml_str, indent=" "): | |
xml_re = _xml_re | |
# avoid re-prettifying large amounts of xml that is fine | |
if xml_str.count("\n") < 20: | |
pxml = parseString(xml_str).toprettyxml(indent) | |
return xml_re.sub('>\g<1></', pxml) | |
else: | |
return xml_str |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment