Created
October 10, 2019 19:09
-
-
Save CodeMonkeyKevin/0123bdfccb017ba0f843ca50cba5c37e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import requests | |
import concurrent.futures | |
from concurrent.futures import ThreadPoolExecutor | |
from time import sleep | |
import urllib.request | |
import ssl | |
pool = ThreadPoolExecutor(10) | |
urls = open("urls.txt","r+").readlines() | |
# Retrieve a single page and report the url and contents | |
def load_url(url, timeout): | |
ctx = ssl.create_default_context() | |
ctx.check_hostname = False | |
ctx.verify_mode = ssl.CERT_NONE | |
with urllib.request.urlopen(url, timeout=timeout, context=ctx) as conn: | |
return conn.read() | |
# We can use a with statement to ensure threads are cleaned up promptly | |
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: | |
# Start the load operations and mark each future with its URL | |
future_to_url = {executor.submit(load_url, url.strip(), 60): url for url in urls} | |
for future in concurrent.futures.as_completed(future_to_url): | |
url = future_to_url[future] | |
try: | |
data = future.result() | |
except Exception as exc: | |
print('%r generated an exception: %s' % (url.strip(), exc)) | |
else: | |
print('%r page is %d bytes' % (url.strip(), len(data))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment