Created
April 12, 2015 11:35
-
-
Save hideaki-t/0a5b78dab96aa87aaae2 to your computer and use it in GitHub Desktop.
wrapping mmap object with memoryview
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import mmap\n", | |
"import os\n", | |
"import struct" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%load_ext memory_profiler" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def mmap_and(f, fn):\n", | |
" try:\n", | |
" fd = os.open(f, os.O_RDONLY)\n", | |
" with mmap.mmap(fd, 0, prot=mmap.MAP_SHARED) as m:\n", | |
" return fn(m)\n", | |
" finally:\n", | |
" os.close(fd)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def get_value(m):\n", | |
" return struct.unpack('b', struct.pack('B', m[8]))[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def slice_(m):\n", | |
" return struct.unpack('b', struct.pack('B', m[1:][7]))[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def slice_cast(m):\n", | |
" return memoryview(m[1:]).cast('b')[7]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def mview_slice(m):\n", | |
" with memoryview(m) as mv:\n", | |
" return mv[1:].cast('b')[7]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"size 2086114280\n" | |
] | |
} | |
], | |
"source": [ | |
"f = '20140728.json.xz'\n", | |
"print('size', os.stat(f).st_size)\n", | |
"assert mmap_and(f, get_value) == mmap_and(f, slice_) == mmap_and(f, slice_cast) == mmap_and(f, mview_slice)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 35.02 MiB, increment: 0.30 MiB\n", | |
"peak memory: 35.04 MiB, increment: 0.01 MiB\n", | |
"peak memory: 3860.70 MiB, increment: 3825.66 MiB\n", | |
"peak memory: 3863.97 MiB, increment: 3828.93 MiB\n", | |
"peak memory: 35.04 MiB, increment: 0.00 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"%memit mmap_and(f, lambda x: None)\n", | |
"%memit mmap_and(f, get_value)\n", | |
"%memit mmap_and(f, slice_)\n", | |
"%memit mmap_and(f, slice_cast)\n", | |
"%memit mmap_and(f, mview_slice)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The slowest run took 18.19 times longer than the fastest. This could mean that an intermediate result is being cached \n", | |
"100000 loops, best of 3: 15.4 µs per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit mmap_and(f, lambda x: None)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The slowest run took 10.44 times longer than the fastest. This could mean that an intermediate result is being cached \n", | |
"10000 loops, best of 3: 27.9 µs per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit mmap_and(f, get_value)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1 loops, best of 3: 1.36 s per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit mmap_and(f, slice_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1 loops, best of 3: 1.36 s per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit mmap_and(f, slice_cast)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The slowest run took 5.40 times longer than the fastest. This could mean that an intermediate result is being cached \n", | |
"10000 loops, best of 3: 29.6 µs per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit mmap_and(f, mview_slice)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment