Created
June 28, 2020 17:12
-
-
Save urbanecm/ec8d74604f584d2272edaf92a3a3711f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: utf-8 -*- | |
import requests | |
import mwparserfromhell | |
import toolforge | |
import sys | |
conn = toolforge.connect(sys.argv[1]) # cswiki | |
API_URL = sys.argv[2] # https://cs.wikipedia.org/w/api.php | |
with conn.cursor() as cur: | |
cur.execute('select ct_rev_id from change_tag where ct_tag_id=(select ctd_id from change_tag_def where ctd_name="mentorship module question")') | |
edits = cur.fetchall() | |
print('Diff;Username;Question') | |
for row in edits: | |
rev_id = row[0] | |
r = requests.get(API_URL, params={ | |
"action": "query", | |
"format": "json", | |
"prop": "revisions", | |
"revids": rev_id, | |
"rvprop": "content|user" | |
}) | |
data = r.json()['query'].get('pages') | |
if data is None: | |
continue | |
data = data[list(data.keys())[0]]['revisions'][0] | |
text = data.get('*') | |
if text is None: | |
continue | |
code = mwparserfromhell.parse(text) | |
question = str(code.get_sections(include_headings=False)[-1]).replace('\n', ' ').strip() | |
#print('|-\n| [[Special:Diff/%s]] || %s || %s' % (rev_id, data['user'], question)) | |
print('%s;%s;%s' % (rev_id, data['user'], question)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment