Last active
January 4, 2019 10:25
-
-
Save yi-jiayu/4f0cf579984ad8a62979adc4ff10066f to your computer and use it in GitHub Desktop.
Automatically guessing Quizzarium answers from DuckDuckGo search results based on hints.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"from bs4 import BeautifulSoup\n", | |
"\n", | |
"ddg_query_base = 'https://duckduckgo.com/html/?q='\n", | |
"\n", | |
"resp = requests.get(ddg_query_base + 'Who originally sang vocals for ACDC?')\n", | |
"text = resp.text" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import re\n", | |
"\n", | |
"whitespace_regexp = re.compile('\\s+')\n", | |
"\n", | |
"def get_search_result(question):\n", | |
" resp = requests.get(ddg_query_base + question)\n", | |
" soup = BeautifulSoup(resp.text)\n", | |
" pageText = soup.findAll(text=True)\n", | |
" non_empty_texts = [t for t in pageText if whitespace_regexp.fullmatch(t) is None]\n", | |
" stripped_texts = [t.strip() for t in non_empty_texts]\n", | |
" return stripped_texts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"re.compile(r'(?:^|[^\\w])(\\w\\w\\w\\w \\w\\wa\\ws)(?:$|[^\\w])', re.UNICODE)" | |
] | |
}, | |
"execution_count": 45, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"def build_hint_regexp(hint):\n", | |
" hint = ' '.join([h.replace(' ', '').replace('_', '\\w') for h in hint.split(' ')])\n", | |
" hint = '(?:^|[^\\w])(' + hint + ')(?:$|[^\\w])'\n", | |
" return re.compile(hint)\n", | |
"build_hint_regexp('_ _ _ _ _ _ a _ s')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['html class']\n", | |
"['html class']\n", | |
"['html class']\n", | |
"['Dirt Cheap']\n", | |
"['Dirt Cheap']\n", | |
"['Dirt Cheap']\n", | |
"['that music']\n", | |
"['sold three']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['This video']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['like Brian']\n", | |
"['coal miner']\n", | |
"['Dirt Cheap']\n", | |
"['Dirt Cheap']\n", | |
"['Dave Evans', 'band needs']\n", | |
"['Long sheet']\n", | |
"['with scene']\n", | |
"['from their']\n", | |
"['know which']\n", | |
"['that seems', 'that brian']\n", | |
"['Hell after']\n", | |
"['Dave Evans']\n", | |
"['Mark Evans']\n", | |
"['wave group']\n" | |
] | |
} | |
], | |
"source": [ | |
"hint_regexp = build_hint_regexp('_ _ _ _ _ _ _ _ _')\n", | |
"for t in stripped_texts:\n", | |
" m = hint_regexp.findall(t)\n", | |
" if m:\n", | |
" print(m)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['html class']\n", | |
"['html class']\n", | |
"['html class']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Mark Evans']\n" | |
] | |
} | |
], | |
"source": [ | |
"hint_regexp = build_hint_regexp('_ _ _ _ _ _ a _ s')\n", | |
"for t in stripped_texts:\n", | |
" m = hint_regexp.findall(t)\n", | |
" if m:\n", | |
" print(m)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Dave Evans']\n", | |
"['Mark Evans']\n" | |
] | |
} | |
], | |
"source": [ | |
"hint_regexp = build_hint_regexp(' _ _ _ _ _ v a n s')\n", | |
"for t in stripped_texts:\n", | |
" m = hint_regexp.findall(t)\n", | |
" if m:\n", | |
" print(m)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def find_answer(search_result, hint):\n", | |
" hint_regexp = build_hint_regexp(hint)\n", | |
" for t in search_result:\n", | |
" m = hint_regexp.findall(t)\n", | |
" if m:\n", | |
" print(m)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['David Copperfield']\n", | |
"['Joyce Frankenberg']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n", | |
"['David Copperfield']\n" | |
] | |
} | |
], | |
"source": [ | |
"search_result = get_search_result('What eponymous Dickens character born with a caul over head')\n", | |
"find_answer(search_result, '_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['html class']\n", | |
"['html class']\n", | |
"['html class']\n", | |
"['John Books']\n", | |
"['1975 novel']\n", | |
"['that would']\n", | |
"['Five Movie']\n", | |
"['many 1980s', 'Sure Thing']\n", | |
"['2018 Oscar']\n", | |
"['Know About']\n", | |
"['tall today']\n", | |
"['Were Never', 'Same After']\n", | |
"['from these']\n", | |
"['Feud shows']\n", | |
"['Know About']\n" | |
] | |
} | |
], | |
"source": [ | |
"search_result = get_search_result('John Books was the final role of which actor')\n", | |
"find_answer(search_result, '_ _ _ _ _ _ _ _ _')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['Bluestones', 'Trilithons', 'DuckDuckGo']\n", | |
"['Historical']\n", | |
"['Historical']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['Stonehenge']\n", | |
"['historical', 'bluestones', 'trilithons']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['guaranteed']\n", | |
"['bluestones', 'trilithons']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['conditions']\n", | |
"['Bluestones']\n", | |
"['guaranteed']\n", | |
"['bluestones', 'trilithons']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['Stonehenge']\n", | |
"['historical']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['abprivivry']\n", | |
"['collegiate']\n", | |
"['coursehero']\n", | |
"['translator']\n", | |
"['Dictionary']\n", | |
"['historical']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['deviantART']\n", | |
"['scientific']\n", | |
"['containing']\n", | |
"['historical']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['Stonehenge']\n", | |
"['Trilithons']\n", | |
"['Trilithons']\n", | |
"['Stonehenge']\n", | |
"['Trilithons']\n", | |
"['stonehenge']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['Stonehenge']\n", | |
"['biljoissac']\n", | |
"['Wallpapers']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['Stonehenge']\n", | |
"['biljoissac']\n", | |
"['historical']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['Stonehenge']\n", | |
"['triviaquiz']\n", | |
"['Historical']\n", | |
"['Bluestones']\n", | |
"['historical']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['seatmerazu']\n", | |
"['historical']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['historical']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['cufamosist']\n", | |
"['historical']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['historical']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['Stonehenge', 'monographs']\n", | |
"['Stonehenge']\n", | |
"['Bluestones']\n", | |
"['everywhere']\n", | |
"['monographs']\n", | |
"['Bluestones']\n", | |
"['appearance']\n", | |
"['Bluestones']\n", | |
"['appearance']\n", | |
"['Fibreglass']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['Stonehenge']\n", | |
"['Bluestones']\n", | |
"['Trilithons']\n", | |
"['Stonehenge']\n" | |
] | |
} | |
], | |
"source": [ | |
"search_result = get_search_result('Where would you find Saracens Bluestones and Trilithons')\n", | |
"find_answer(search_result, '_ _ _ _ _ _ _ _ _ _')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['Stonehenge']\n", | |
"['Stonehenge']\n", | |
"['Stonehenge']\n", | |
"['Stonehenge']\n", | |
"['stonehenge']\n", | |
"['Stonehenge']\n", | |
"['Stonehenge']\n", | |
"['Stonehenge']\n", | |
"['Stonehenge']\n", | |
"['Stonehenge']\n", | |
"['Stonehenge']\n", | |
"['Stonehenge']\n" | |
] | |
} | |
], | |
"source": [ | |
"search_result = get_search_result('Where would you find Saracens Bluestones and Trilithons')\n", | |
"find_answer(search_result, '_ _ _ _ _ _ e n _ _')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment