Created
March 13, 2020 08:48
-
-
Save cessor/37c361416ae07b1c165f777832ca199f to your computer and use it in GitHub Desktop.
Random Wikipedia Articles using Python pool.appy_async
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Mit __iter__ kann man komplexe Iterationen abstrahieren | |
- Iteratoren sind Zustandsautomaten. | |
- Dadurch lässt sich das iterieren vom prozessieren trennen. | |
- So kann man den gleichen Iterator seriell oder parallel umsetzen | |
""" | |
import json | |
import time | |
import requests | |
import multiprocessing | |
def title(json_string): | |
return json.loads(json_string).get('title') | |
class RandomArticles: | |
def __init__(self, n, url): | |
self._url = url | |
self._n = n | |
def _download(self, *args): | |
return requests.get(self._url).content | |
def __iter__(self): | |
# Version 1 | |
for _ in range(self._n): | |
yield self._download() | |
# Version 2 | |
with multiprocessing.Pool() as pool: | |
future_results = [ | |
pool.apply_async(self._download) | |
for _ in range(self._n) | |
] | |
for result in future_results: | |
yield result.get() | |
class Stopwatch: | |
def __enter__(self): | |
self._start = time.time() | |
def __exit__(self, *args): | |
end = time.time() | |
elapsed = end - self._start | |
print(f'{elapsed:.2f}s') | |
if __name__ == '__main__': | |
url = "https://en.wikipedia.org/api/rest_v1/page/random/summary" | |
with Stopwatch(): | |
for article in RandomArticles(10, url): | |
print(title(article)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment