waveform80 · April 14, 2025 11:56
diff --git a/README.md b/README.md
diff --git a/discourse_revs.py b/discourse_revs.py
 #!/usr/bin/python3

 import sys
 import json
 from pathlib import Path
 from urllib.request import urlopen
 from html.parser import HTMLParser


 class TableParser(HTMLParser):
    def __init__(self, column):
        super().__init__()
        self.extract_column = column
        self.current_col = 0
        self.state = 'top'
        self.content = ''

    def handle_starttag(self, tag, attrs):
        if tag == 'table' and self.state == 'top':
            self.content = ''
            self.state = 'table'
        elif tag == 'tr' and self.state == 'table':
            self.state = 'tr'
            self.current_col = 0
        elif tag == 'td' and self.state == 'tr':
            self.current_col += 1

    def handle_endtag(self, tag):
        if tag == 'tr' and self.state == 'tr':
            self.state = 'table'
        elif tag == 'table' and self.state == 'table':
            self.state = 'top'

    def handle_data(self, data):
        if self.current_col == self.extract_column:
            self.content += data


 def main():
    for rev in range(96, 107):
        parser = TableParser(column=2)
        with urlopen(f'https://discourse.ubuntu.com/posts/120902/revisions/{rev}.json') as fp:
            data = json.load(fp)
            changes = data['body_changes']['side_by_side_markdown']
            parser.feed(changes)
            Path(f'revision{rev}.md').write_text(parser.content)


 if __name__ == '__main__':
    sys.exit(main())
	#!/usr/bin/python3

	import sys
	import json
	from pathlib import Path
	from urllib.request import urlopen
	from html.parser import HTMLParser


	class TableParser(HTMLParser):
	def __init__(self, column):
	super().__init__()
	self.extract_column = column
	self.current_col = 0
	self.state = 'top'
	self.content = ''

	def handle_starttag(self, tag, attrs):
	if tag == 'table' and self.state == 'top':
	self.content = ''
	self.state = 'table'
	elif tag == 'tr' and self.state == 'table':
	self.state = 'tr'
	self.current_col = 0
	elif tag == 'td' and self.state == 'tr':
	self.current_col += 1

	def handle_endtag(self, tag):
	if tag == 'tr' and self.state == 'tr':
	self.state = 'table'
	elif tag == 'table' and self.state == 'table':
	self.state = 'top'

	def handle_data(self, data):
	if self.current_col == self.extract_column:
	self.content += data


	def main():
	for rev in range(96, 107):
	parser = TableParser(column=2)
	with urlopen(f'https://discourse.ubuntu.com/posts/120902/revisions/{rev}.json') as fp:
	data = json.load(fp)
	changes = data['body_changes']['side_by_side_markdown']
	parser.feed(changes)
	Path(f'revision{rev}.md').write_text(parser.content)


	if __name__ == '__main__':
	sys.exit(main())