Created
March 3, 2020 10:28
-
-
Save santoshpy/bd801243056f9e6b16ec4e4a39cb7a88 to your computer and use it in GitHub Desktop.
Python queue for multithreading, multi-thread-example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import random | |
import threading | |
from queue import Queue | |
from threading import Thread | |
def crawler(queue): | |
while True: | |
url = queue.get() # if there is no url this will wait | |
name = threading.currentThread().getName() | |
print("Thread: {0} start download {1} at time = {2} \n".format(name, url, time.strftime('%H:%M:%S'))) | |
time.sleep(random.randint(2,6)) | |
print("Thread: {0} finish download {1} at time = {2} \n".format(name, url, time.strftime('%H:%M:%S'))) | |
queue.task_done() | |
def producer(urls, queue): | |
for url in urls: | |
time.sleep(random.randint(3, 10)) | |
name = threading.currentThread().getName() | |
print("Thread: {0} start put url {1} into url_queue[current size={2}] at time = {3} \n".format(name, url, queue.qsize(), time.strftime('%H:%M:%S'))) | |
queue.put(url) | |
print("Thread: {0} finish put url {1} into url_queue[current size={2}] at time = {3} \n".format(name, url, queue.qsize(), time.strftime('%H:%M:%S'))) | |
def main(): | |
max_url_in_queue = 5 | |
q = Queue(max_url_in_queue) | |
q.qsize() | |
thread_pool_size = 3 | |
print('Main: start crawler threads at {0}'.format(time.strftime('%H:%M:%S'))) | |
for i in range(thread_pool_size): | |
t = Thread(name = 'Thread-' + str(i), target=crawler, args=(q, )) | |
t.daemon = True | |
t.start() | |
print('Main: start producer threads at {0}'.format(time.strftime('%H:%M:%S'))) | |
urls1 = ['Domain-A-URL-' + str(i) for i in range(3)] | |
# we also use 2 threads to fill the queue | |
t1 = Thread(name = 'url_producer-1', target=producer, args=(urls1, q)) | |
t1.start() | |
urls2 = ['Domain-B-URL-' + str(i) for i in range(4)] | |
t2 = Thread(name = 'url_producer-1', target=producer, args=(urls2, q)) | |
t2.start() | |
q.join() # block until all tasks are done | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment