Skip to content

Instantly share code, notes, and snippets.

@paveltyavin
Created June 7, 2016 16:37
Show Gist options
  • Save paveltyavin/360c5aa4f438a82c70d2910311657716 to your computer and use it in GitHub Desktop.
Save paveltyavin/360c5aa4f438a82c70d2910311657716 to your computer and use it in GitHub Desktop.
update bunch of django models using naive implemetation, multiprocessing, multithreading in postgres and sqlite
# coding: utf-8
import threading
import time
import random
import string
from django.db import connection
from multiprocessing import Pool
from django.core.management.base import BaseCommand
from app.models import Author
def gen_random_string():
return "".join(random.choice(string.ascii_letters) for _ in range(10))
def _update_one(author):
author_id, author_name = author
if author_name[0] == 'a':
connection.close()
Author.objects.filter(id=author_id).update(name=gen_random_string())
class Thread(threading.Thread):
def __init__(self, a):
threading.Thread.__init__(self)
self.a_id, self.a_name = a
def update(self):
if self.a_name[0] == 'a':
Author.objects.filter(id=self.a_id).update(name=gen_random_string())
def run(self):
threadLock.acquire()
self.update()
threadLock.release()
threadLock = threading.Lock()
class Command(BaseCommand):
processes = 8
obj_count = 10000
def get_values(self):
Author.objects.all().delete()
author_list = []
for _ in range(self.obj_count):
obj = Author(name=gen_random_string())
author_list.append(obj)
Author.objects.bulk_create(author_list)
qs = Author.objects.all()
values = qs.values_list('id', 'name')
return list(values)
def naive_update(self, values):
for a_id, a_name in values:
if a_name[0] == 'a':
Author.objects.filter(id=a_id).update(name=gen_random_string())
def multi_update(self, values):
pool = Pool(processes=self.processes)
pool.map(_update_one, values)
pool.close()
pool.join()
def thread_update(self, values):
threads = []
for a in values:
t = Thread(a)
t.start()
threads.append(t)
for t in threads:
t.join()
def handle(self, *args, **options):
t0 = time.time()
values = self.get_values()
t1 = time.time()
print('prepare : {:.3f}'.format(t1 - t0))
self.naive_update(values)
t2 = time.time()
print('naive : {:.3f}'.format(t2 - t1))
self.multi_update(values)
t3 = time.time()
print('multiproc : {:.3f}'.format(t3 - t2))
self.thread_update(values)
t4 = time.time()
print('multithread: {:.3f}'.format(t4 - t3))
from django.db import models
class Author(models.Model):
name = models.CharField(max_length=128)

POSTGRES     1 000		10 000		100 000		1 000 000
naive      : 0.024		0.220		2.009		21.704
multiproc  : 0.122		0.536		3.992  		63.968
multithread: 0.242		2.097		21.901     	?

SQLITE 		 1 000		10 000    
naive      : 0.026		0.263
multiproc  : 0.427		18.070
multithread: 0.130		1.450

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment