Created
April 20, 2012 20:06
-
-
Save tkaemming/2431447 to your computer and use it in GitHub Desktop.
self-versioning and argument-hashing cache decorator for python (django/flask)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Self-versioning and argument-hashing cache decorator for deterministic functions. | |
Designed to be extensible and API-compliant with Django and Flask cache backends. | |
For examples and caveats, see the bottom of the file. | |
Ted Kaemming: https://github.com/tkaemming | |
Mike Tigas: https://github.com/mtigas | |
""" | |
import functools | |
import hashlib | |
def generate_function_key(fn): | |
""" | |
Generates a key for this callable by hashing the bytecode. This appears | |
to be deterministic on CPython for trivial implementations, but likely | |
is implementation-specific. | |
""" | |
return hashlib.md5(fn.func_code.co_code).hexdigest() | |
def generate_unique_key(*args, **kwargs): | |
""" | |
Generates a unique key based on the hashed values of all of the passed | |
arguments. This makes a pretty bold assumption that the hash() function | |
is deterministic, which is (probably) implementation specific. | |
""" | |
hashed_args = ['%s' % hash(arg) for arg in args] | |
hashed_kwargs = ['%s ' % hash((key, value)) for (key, value) in kwargs.items()] | |
# this is md5 hashed again to avoid the key growing too large for memcached | |
return hashlib.md5(':'.join(hashed_args + hashed_kwargs)).hexdigest() | |
def cached(backend, **kwargs): | |
""" | |
Automagical caching for deterministic functions. | |
Supported keyword arguments: | |
* key: use a user-defined cache key (not versioned) instead of hashing the | |
function's bytecode | |
* key_generator: use a user-defined cache key generator instead of using | |
`__hash__` on the args/kwargs passed to the callable | |
* set_kwargs: keyword arguments passed to the cache backend's `set` method, | |
so you can pass timeouts, etc. when setting cached values | |
""" | |
def decorator(fn, key=None, key_generator=None, set_kwargs=None): | |
if key is None: | |
key = generate_function_key(fn) | |
if key_generator is None: | |
key_generator = generate_unique_key | |
if set_kwargs is None: | |
set_kwargs = {} | |
@functools.wraps(fn) | |
def inner(*args, **kwargs): | |
unique_key = '%s:%s' % (key, key_generator(*args, **kwargs)) | |
# If the value is `None` from the cache, then generate the real | |
# value and store it. | |
value = backend.get(unique_key) | |
if value is None: | |
value = fn(*args, **kwargs) | |
backend.set(unique_key, value, **set_kwargs) | |
return value | |
return inner | |
return functools.partial(decorator, **kwargs) | |
if __name__ == '__main__': | |
# the underlying cache interface is the same as django and flask, so it | |
# should be reasonably portable. otherwise, you can always write a wrapper | |
# that supports the same interface | |
class CacheBackend(object): | |
def get(self, key, fallback=None): | |
raise NotImplementedError | |
def set(self, key, value): | |
raise NotImplementedError | |
def __contains__(self, key): | |
raise NotImplementedError | |
class DummyCacheBackend(CacheBackend): | |
def get(self, key, fallback=None): | |
print 'GET', key, len(key) | |
return fallback | |
def set(self, key, value): | |
print 'SET', key, len(key), value | |
return None | |
def __contains__(self, key): | |
print 'HAS', key, len(key) | |
return False | |
# examples! | |
cache = DummyCacheBackend() | |
# autogenerated base key, automagical key generator | |
@cached(backend=cache) | |
def foo(x): | |
return x | |
print foo('bar') | |
print foo('baz') | |
# let's say we updated the underlying code for foo... | |
# the base key will be implicitly versioned since the underlying function | |
# code has been modified. there is no need to update arbitrary versioning | |
# numbers in the code or jump through other hoops to make sure the cache | |
# only returns the result appropriate for the latest logic | |
@cached(backend=cache) | |
def foo(x): | |
return '%s!' % x | |
print foo('bar') | |
print foo('baz') | |
# allow the definition of the base key | |
@cached(backend=cache, key='bar') | |
def bar(x): | |
return x | |
print bar('baz') | |
# you can use a really awful hash function if you want to break your code | |
# notice that the underlying base key is the same as the first example, | |
# since the underlying bytecode is actually the same -- this might save you | |
# some cache space if you have two functions that actually do the exact | |
# same thing (which would be sort of weird but hey, it's your code) | |
@cached(backend=cache, key_generator=lambda x: 1) | |
def baz(x): | |
return x | |
print baz(1) | |
print baz(2) | |
print baz(3) | |
# got 99 problems and hashing unhashable types are all of them | |
try: | |
print foo([1,2,3]) | |
except TypeError, e: | |
print e # unhashable type: 'list' | |
try: | |
print foo({"a": 1, "b": 2}) | |
except TypeError, e: | |
print e # unhashable type: 'dict' | |
# you can also cache values without using the decorator syntax | |
def expensive_function(x): | |
# assume that this does something that takes forever | |
return x + x | |
cached_expensive_function = cached(backend=cache)(expensive_function) | |
# to call without cache | |
print expensive_function(1) | |
# to call with cache | |
print cached_expensive_function(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment