Created
June 25, 2020 08:34
-
-
Save yi-Xu-0100/5222453b55919f659536815a25a8f8cf to your computer and use it in GitHub Desktop.
简易下载《诗词来了》的代码
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
import urllib.request | |
from urllib.parse import unquote | |
import base64 | |
import codecs | |
import time | |
import random | |
def getHtml(url,headers): | |
req = urllib.request.Request(url=url,headers=headers) | |
html = urllib.request.urlopen(req).read() | |
return html | |
def saveHtml(file_name, file_content): | |
if os.path.exists(file_name): | |
os.remove(file_name) | |
# 注意windows文件命名的禁用符,比如 / | |
with open(file_name.replace('/', '_').replace('\\', '_') + ".html", "wb") as f: | |
# 写文件用bytes而不是str,所以要转码 | |
f.write(file_content) | |
def download(name): | |
with codecs.open(r'.\1\sduview.html',encoding='utf8') as text: | |
for line in text: | |
x = re.findall ('Audio\(\'\/xaud\/(.+)\'\);',line) | |
bas=unquote(str(x)) | |
url=str(base64.b64decode(bas)) | |
durl=url[2:-1] | |
if(durl!=''): | |
print(durl) | |
print("正在下载:"+name) | |
urllib.request.urlretrieve(durl, name) | |
print('歌曲下载完成') | |
time.sleep( random.randint(1,5) ) | |
def getname(): | |
with codecs.open(r'.\1\sduview.html',encoding='utf8') as text: | |
for line in text: | |
x = re.findall ('<h5>(.+)</h5>',line) | |
na=str(x) | |
name=na[2:-2] | |
if(name!=''): | |
return name | |
for count in range(100,119): | |
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} | |
aurl = "https://ksfan.net/story/shi-ci-yin-yue-ting/?page="+str(count) | |
print("获取网页") | |
html = getHtml(aurl,headers) | |
print("网页下载完成") | |
saveHtml("sduview", html) | |
name=str(count)+' '+getname()+'.mp3' | |
download(name.replace('/', '_').replace('\\', '_')) | |
print("成功") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment