Skip to content

Instantly share code, notes, and snippets.

@yi-Xu-0100
Created June 25, 2020 08:34
Show Gist options
  • Save yi-Xu-0100/5222453b55919f659536815a25a8f8cf to your computer and use it in GitHub Desktop.
Save yi-Xu-0100/5222453b55919f659536815a25a8f8cf to your computer and use it in GitHub Desktop.
简易下载《诗词来了》的代码
import re
import os
import urllib.request
from urllib.parse import unquote
import base64
import codecs
import time
import random
def getHtml(url,headers):
req = urllib.request.Request(url=url,headers=headers)
html = urllib.request.urlopen(req).read()
return html
def saveHtml(file_name, file_content):
if os.path.exists(file_name):
os.remove(file_name)
# 注意windows文件命名的禁用符,比如 /
with open(file_name.replace('/', '_').replace('\\', '_') + ".html", "wb") as f:
# 写文件用bytes而不是str,所以要转码
f.write(file_content)
def download(name):
with codecs.open(r'.\1\sduview.html',encoding='utf8') as text:
for line in text:
x = re.findall ('Audio\(\'\/xaud\/(.+)\'\);',line)
bas=unquote(str(x))
url=str(base64.b64decode(bas))
durl=url[2:-1]
if(durl!=''):
print(durl)
print("正在下载:"+name)
urllib.request.urlretrieve(durl, name)
print('歌曲下载完成')
time.sleep( random.randint(1,5) )
def getname():
with codecs.open(r'.\1\sduview.html',encoding='utf8') as text:
for line in text:
x = re.findall ('<h5>(.+)</h5>',line)
na=str(x)
name=na[2:-2]
if(name!=''):
return name
for count in range(100,119):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
aurl = "https://ksfan.net/story/shi-ci-yin-yue-ting/?page="+str(count)
print("获取网页")
html = getHtml(aurl,headers)
print("网页下载完成")
saveHtml("sduview", html)
name=str(count)+' '+getname()+'.mp3'
download(name.replace('/', '_').replace('\\', '_'))
print("成功")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment