Skip to content

Instantly share code, notes, and snippets.

@zhangxigithub
Last active August 29, 2015 14:05
Show Gist options
  • Save zhangxigithub/25b3aa95e18d7f6ab608 to your computer and use it in GitHub Desktop.
Save zhangxigithub/25b3aa95e18d7f6ab608 to your computer and use it in GitHub Desktop.
下载豆瓣妹子的图片 http://dbmeizi.com 使用方法:python db.py 会在当前目录下创建dbmeizi文件夹并下载所有图片
#coding:utf-8
#!/usr/bin/python
import urlparse,urllib,urllib2,os
from bs4 import BeautifulSoup
print "======start======"
print "./dbmeizi"
def downloadImage(imageURL):
url = urlparse.urlparse(imageURL)
i = len(url.path) - 1
while i > 0:
if url.path[i] == '/':
break
i = i - 1
filename = url.path[i+1:len(url.path)]
urllib.urlretrieve(imageURL,"./dbmeizi/"+filename);
print filename+"..... done"
def findMM():
os.makedirs("./dbmeizi")
index = 0
while True:
htmlString = urllib2.urlopen("http://www.dbmeizi.com/?p="+str(index)).read()
soup = BeautifulSoup(htmlString)
pics = soup.findAll("div",{"class":"pic"})
if len(pics) == 0:
return
for person in pics:
mz = person.find("img")
picURL = mz["data-bigimg"]
downloadImage(picURL)
#print "==="
index = index + 1
findMM()
print "======end======"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment