Created
May 18, 2019 10:27
-
-
Save songouyang/01c3e8db141f7cbb4b5589f9c99e94e4 to your computer and use it in GitHub Desktop.
多线程下载器
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from os import path | |
import requests | |
from threading import Lock | |
from six.moves.urllib.parse import unquote, urlparse | |
from concurrent.futures import ThreadPoolExecutor, wait | |
class Downloader(object): | |
def __init__(self, workers_num=8): | |
self.session = requests.session() | |
self.pool = ThreadPoolExecutor(max_workers=workers_num) | |
self.workers_num = workers_num | |
def get_file_name(self, url): | |
token = '[-!#-\'*+.\dA-Z^-z|~]+' | |
qdtext='[]-~\t !#-[]' | |
mimeCharset='[-!#-&+\dA-Z^-z]+' | |
language='(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}(?:-[A-Za-z]{3}){,2})?|[A-Za-z]{4,8})(?:-[A-Za-z]{4})?(?:-(?:[A-Za-z]{2}|\d{3}))(?:-(?:[\dA-Za-z]{5,8}|\d[\dA-Za-z]{3}))*(?:-[\dA-WY-Za-wy-z](?:-[\dA-Za-z]{2,8})+)*(?:-[Xx](?:-[\dA-Za-z]{1,8})+)?|[Xx](?:-[\dA-Za-z]{1,8})+|[Ee][Nn]-[Gg][Bb]-[Oo][Ee][Dd]|[Ii]-[Aa][Mm][Ii]|[Ii]-[Bb][Nn][Nn]|[Ii]-[Dd][Ee][Ff][Aa][Uu][Ll][Tt]|[Ii]-[Ee][Nn][Oo][Cc][Hh][Ii][Aa][Nn]|[Ii]-[Hh][Aa][Kk]|[Ii]-[Kk][Ll][Ii][Nn][Gg][Oo][Nn]|[Ii]-[Ll][Uu][Xx]|[Ii]-[Mm][Ii][Nn][Gg][Oo]|[Ii]-[Nn][Aa][Vv][Aa][Jj][Oo]|[Ii]-[Pp][Ww][Nn]|[Ii]-[Tt][Aa][Oo]|[Ii]-[Tt][Aa][Yy]|[Ii]-[Tt][Ss][Uu]|[Ss][Gg][Nn]-[Bb][Ee]-[Ff][Rr]|[Ss][Gg][Nn]-[Bb][Ee]-[Nn][Ll]|[Ss][Gg][Nn]-[Cc][Hh]-[Dd][Ee]' | |
valueChars = '(?:%[\dA-F][\dA-F]|[-!#$&+.\dA-Z^-z|~])*' | |
dispositionParm = '[Ff][Ii][Ll][Ee][Nn][Aa][Mm][Ee]\s*=\s*(?:({token})|"((?:{qdtext}|\\\\[\t !-~])*)")|[Ff][Ii][Ll][Ee][Nn][Aa][Mm][Ee]\*\s*=\s*({mimeCharset})\'(?:{language})?\'({valueChars})|{token}\s*=\s*(?:{token}|"(?:{qdtext}|\\\\[\t !-~])*")|{token}\*\s*=\s*{mimeCharset}\'(?:{language})?\'{valueChars}'.format(**locals()) | |
rsp = self.session.head(url) | |
try: | |
m = re.match('(?:{token}\s*;\s*)?(?:{dispositionParm})(?:\s*;\s*(?:{dispositionParm}))*|{token}'.format(**locals()), rsp.headers['Content-Disposition']) | |
except KeyError: | |
name = path.basename(unquote(urlparse(url).path)) | |
else: | |
if not m: | |
name = path.basename(unquote(urlparse(url).path)) | |
elif m.group(8) is not None: | |
name = unquote(m.group(8)).decode(m.group(7)) | |
elif m.group(4) is not None: | |
name = unquote(m.group(4)).decode(m.group(3)) | |
elif m.group(6) is not None: | |
name = re.sub('\\\\(.)', '\1', m.group(6)) | |
elif m.group(5) is not None: | |
name = m.group(5) | |
elif m.group(2) is not None: | |
name = re.sub('\\\\(.)', '\1', m.group(2)) | |
else: | |
name = m.group(1) | |
if name: | |
name = path.basename(name) | |
else: | |
name = path.basename(unquote(urlparse(url).path)) | |
return name | |
def get_file_size(self, url): | |
while True: | |
rsp = self.session.head(url) | |
if 300 < rsp.status_code < 400: | |
url = rsp.headers["Location"] | |
continue | |
return int(rsp.headers["content-length"]) | |
def handler(self, url, file_name, start, end): | |
headers = {'Range': 'bytes={}-{}'.format(start, end)} | |
rsp = self.session.get(url, headers=headers, stream=True) | |
with open(file_name, 'rb+') as f: | |
f.seek(start) | |
f.write(rsp.content) | |
def run(self, url): | |
file_name = self.get_file_name(url) | |
file_size = self.get_file_size(url) | |
with open(file_name, 'wb+') as f: | |
f.truncate(file_size) | |
f.close() | |
part = int(file_size / self.workers_num) | |
futures = [] | |
for i in range(self.workers_num): | |
start = i * part | |
if i == self.workers_num - 1: | |
end = file_size | |
else: | |
end = start + part - 1 | |
futures.append(self.pool.submit(self.handler, url, file_name, start, end)) | |
wait(futures) | |
if __name__ == "__main__": | |
import time | |
d = Downloader() | |
s = time.time() | |
d.run("http://demo.borland.com/testsite/downloads/downloadfile.php?file=Small.zip&cd=attachment+filename") | |
d.run("http://static-aliyun-doc.oss-cn-hangzhou.aliyuncs.com/download/pdf/DNHCS_MGW1842487_zh-CN_cn_181112170048_public_92408e650bbaaab8b146f371082a0ac3.pdf") | |
d.run("https://p.pstatp.com/origin/ff670000482866725305") | |
e = time.time() | |
print(e - s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment