zhangxigithub · August 29, 2015 14:05
diff --git a/db2.py b/db2.py
 #coding:utf-8
 #!/usr/bin/python 
 import Queue
 import threading
 import os
 import time
 import random

 import urlparse,urllib,urllib2,os,time,threading,Queue
 from bs4 import BeautifulSoup

 def downloadImage(imageURL):
 	
 	url = urlparse.urlparse(imageURL)
 	i = len(url.path) - 1
 	while i > 0:
 		if url.path[i] == '/':
 			break
 		i = i - 1

 	filename = url.path[i+1:len(url.path)]
 	urllib.urlretrieve(imageURL,"./dbmeizi/"+filename);

 	print filename+"..... done"


 def findMM(startIndex,length):
 	#os.makedirs("./dbmeizi")
 	index = startIndex
 	picList = []
 	while index<startIndex+length:
 	
 		htmlString = urllib2.urlopen("http://www.dbmeizi.com/?p="+str(index)).read()
 		soup = BeautifulSoup(htmlString)

 		pics = soup.findAll("div",{"class":"pic"})

 		#if len(pics) == 0:
 		#	return

 		for person in pics:
 			mz = person.find("img")
 			picURL = mz["data-bigimg"]
 			picList.append(picURL)
 			#downloadImage(picURL)
 			#print "==="

 		print "find page "+str(index)
 		index = index + 1

 	return picList

 class Download(threading.Thread):
 	name = ""
 	def __init__(self,que):
 		threading.Thread.__init__(self)
 		self.que=que
 	def run(self):
 		while True:

 			host = self.que.get()
 			#time.sleep(random.random()*3)
 			#time.sleep(1)
 			downloadImage(host)
 			#print "download......"+host+"....done\n"
 			self.que.task_done()

 	    	#grabs urls of hosts and prints first 1024 bytes of page
 	    	#url = urllib2.urlopen(host)
 	   		#print url.read(1024)
 			
 	    	#signals to queue job is done
 	    	

 			#print "download......"
 			#time.sleep(random.random()*3)
 			#time.sleep(3)
 			#print "download......"+self.name


 def main():

 	theList = findMM(0,900)

 	#print theList[0]

 	q = Queue.Queue(0)


 	for x in range(10):
 		d = Download(q)
 		d.name = str(x)
 		d.setDaemon(True)
 	 	d.start() 

 	for url in theList:
 		q.put(url)


 	q.join()


 main()
	#coding:utf-8
	#!/usr/bin/python
	import Queue
	import threading
	import os
	import time
	import random

	import urlparse,urllib,urllib2,os,time,threading,Queue
	from bs4 import BeautifulSoup

	def downloadImage(imageURL):

	url = urlparse.urlparse(imageURL)
	i = len(url.path) - 1
	while i > 0:
	if url.path[i] == '/':
	break
	i = i - 1

	filename = url.path[i+1:len(url.path)]
	urllib.urlretrieve(imageURL,"./dbmeizi/"+filename);

	print filename+"..... done"


	def findMM(startIndex,length):
	#os.makedirs("./dbmeizi")
	index = startIndex
	picList = []
	while index<startIndex+length:

	htmlString = urllib2.urlopen("http://www.dbmeizi.com/?p="+str(index)).read()
	soup = BeautifulSoup(htmlString)

	pics = soup.findAll("div",{"class":"pic"})

	#if len(pics) == 0:
	# return

	for person in pics:
	mz = person.find("img")
	picURL = mz["data-bigimg"]
	picList.append(picURL)
	#downloadImage(picURL)
	#print "==="

	print "find page "+str(index)
	index = index + 1

	return picList

	class Download(threading.Thread):
	name = ""
	def __init__(self,que):
	threading.Thread.__init__(self)
	self.que=que
	def run(self):
	while True:

	host = self.que.get()
	#time.sleep(random.random()*3)
	#time.sleep(1)
	downloadImage(host)
	#print "download......"+host+"....done\n"
	self.que.task_done()

	#grabs urls of hosts and prints first 1024 bytes of page
	#url = urllib2.urlopen(host)
	#print url.read(1024)

	#signals to queue job is done


	#print "download......"
	#time.sleep(random.random()*3)
	#time.sleep(3)
	#print "download......"+self.name


	def main():

	theList = findMM(0,900)

	#print theList[0]

	q = Queue.Queue(0)


	for x in range(10):
	d = Download(q)
	d.name = str(x)
	d.setDaemon(True)
	d.start()

	for url in theList:
	q.put(url)


	q.join()


	main()