ids1024 · March 18, 2018 01:04
diff --git a/susheaders.py b/susheaders.py
 import os
 from urllib.request import urlretrieve
 import tarfile
 import re

 import bs4

 TAR = "susv4tc2.tar.bz2"
 DLURL = "http://pubs.opengroup.org/onlinepubs/9699919799/download/" + TAR

 if not os.path.exists(TAR):
    urlretrieve(DLURL, TAR)

 os.makedirs("output", exist_ok=True)

 with tarfile.open(TAR) as tar:
    for i in tar.getmembers():
        m = re.match("susv4tc2/basedefs/(.*\\.h)\\.html", i.name)
        if not m:
            continue
        name = m.group(1)

        html = bs4.BeautifulSoup(tar.extractfile(i), 'lxml')

        src = '\n'.join(j
                        for i in html.findAll('pre')
                        for j in i.text.splitlines()
                        if not j.startswith('['))

        with open("output/" + name, 'w') as hfile:
            hfile.write(src)
	import os
	from urllib.request import urlretrieve
	import tarfile
	import re

	import bs4

	TAR = "susv4tc2.tar.bz2"
	DLURL = "http://pubs.opengroup.org/onlinepubs/9699919799/download/" + TAR

	if not os.path.exists(TAR):
	urlretrieve(DLURL, TAR)

	os.makedirs("output", exist_ok=True)

	with tarfile.open(TAR) as tar:
	for i in tar.getmembers():
	m = re.match("susv4tc2/basedefs/(.*\\.h)\\.html", i.name)
	if not m:
	continue
	name = m.group(1)

	html = bs4.BeautifulSoup(tar.extractfile(i), 'lxml')

	src = '\n'.join(j
	for i in html.findAll('pre')
	for j in i.text.splitlines()
	if not j.startswith('['))

	with open("output/" + name, 'w') as hfile:
	hfile.write(src)