Last active
August 29, 2015 14:24
-
-
Save d2207197/aac0083f7958c3b0979c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 236, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from functools import wraps, lru_cache\n", | |
"\n", | |
"def listify(func):\n", | |
" \"Decorator to convert generator output to a list\"\n", | |
" @wraps(func)\n", | |
" def listify(*args, **kw):\n", | |
" return list(func(*args, **kw))\n", | |
" return listify\n", | |
"\n", | |
"import sys\n", | |
"import time\n", | |
"from contextlib import redirect_stdout\n", | |
"def trace(slowdown = False):\n", | |
" def trace(func):\n", | |
" \"print function input and output\"\n", | |
" @wraps(func)\n", | |
" def trace(*args, indent = [0],**kw):\n", | |
" with redirect_stdout(sys.stderr):\n", | |
" print(' '*indent[0], end='')\n", | |
"\n", | |
" print('{}('.format(func.__name__), end='')\n", | |
" if args:\n", | |
" print(*map(repr, args), sep=', ', end='')\n", | |
" if kw:\n", | |
" print(',', *('{}={}'.format(k,repr(v)) for k, v in kw), sep=', ', end='')\n", | |
" print(')')\n", | |
" if slowdown: time.sleep(0.5)\n", | |
" indent[0] += 1\n", | |
" out = func(*args, **kw)\n", | |
" indent[0] -= 1\n", | |
" with redirect_stdout(sys.stderr):\n", | |
" print(' '*indent[0], end='')\n", | |
"\n", | |
" print('{}('.format(func.__name__), end='')\n", | |
" if args:\n", | |
" print(*map(repr, args), sep=', ', end='')\n", | |
" if kw:\n", | |
" print(',', *('{}={}'.format(k,repr(v)) for k, v in kw), sep=', ', end='')\n", | |
" print(') ->', out)\n", | |
" return out\n", | |
" return trace\n", | |
" return trace\n", | |
"\n", | |
"\n", | |
"@lru_cache()\n", | |
"# @trace\n", | |
"@listify\n", | |
"def allpartition(seq, *, max_length=3):\n", | |
" max_length = min(len(seq), max_length)\n", | |
" for length in range(max_length, 1 - 1, -1):\n", | |
" yield seq[0:length], seq[length:]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 232, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('abc', 'd'), ('ab', 'cd'), ('a', 'bcd')]" | |
] | |
}, | |
"execution_count": 232, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"allpartition('abcd')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 179, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('ABCDEFGHIJKLMNOPQRSTUVWXYZ', ''),\n", | |
" ('ABCDEFGHIJKLMNOPQRSTUVWXY', 'Z'),\n", | |
" ('ABCDEFGHIJKLMNOPQRSTUVWX', 'YZ'),\n", | |
" ('ABCDEFGHIJKLMNOPQRSTUVW', 'XYZ'),\n", | |
" ('ABCDEFGHIJKLMNOPQRSTUV', 'WXYZ'),\n", | |
" ('ABCDEFGHIJKLMNOPQRSTU', 'VWXYZ'),\n", | |
" ('ABCDEFGHIJKLMNOPQRST', 'UVWXYZ'),\n", | |
" ('ABCDEFGHIJKLMNOPQRS', 'TUVWXYZ'),\n", | |
" ('ABCDEFGHIJKLMNOPQR', 'STUVWXYZ'),\n", | |
" ('ABCDEFGHIJKLMNOPQ', 'RSTUVWXYZ'),\n", | |
" ('ABCDEFGHIJKLMNOP', 'QRSTUVWXYZ'),\n", | |
" ('ABCDEFGHIJKLMNO', 'PQRSTUVWXYZ'),\n", | |
" ('ABCDEFGHIJKLMN', 'OPQRSTUVWXYZ'),\n", | |
" ('ABCDEFGHIJKLM', 'NOPQRSTUVWXYZ'),\n", | |
" ('ABCDEFGHIJKL', 'MNOPQRSTUVWXYZ'),\n", | |
" ('ABCDEFGHIJK', 'LMNOPQRSTUVWXYZ'),\n", | |
" ('ABCDEFGHIJ', 'KLMNOPQRSTUVWXYZ'),\n", | |
" ('ABCDEFGHI', 'JKLMNOPQRSTUVWXYZ'),\n", | |
" ('ABCDEFGH', 'IJKLMNOPQRSTUVWXYZ'),\n", | |
" ('ABCDEFG', 'HIJKLMNOPQRSTUVWXYZ'),\n", | |
" ('ABCDEF', 'GHIJKLMNOPQRSTUVWXYZ'),\n", | |
" ('ABCDE', 'FGHIJKLMNOPQRSTUVWXYZ'),\n", | |
" ('ABCD', 'EFGHIJKLMNOPQRSTUVWXYZ'),\n", | |
" ('ABC', 'DEFGHIJKLMNOPQRSTUVWXYZ'),\n", | |
" ('AB', 'CDEFGHIJKLMNOPQRSTUVWXYZ'),\n", | |
" ('A', 'BCDEFGHIJKLMNOPQRSTUVWXYZ')]" | |
] | |
}, | |
"execution_count": 179, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from string import ascii_uppercase\n", | |
"allpartition(ascii_uppercase, max_length = 1000)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 180, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from itertools import combinations, chain" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 181, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from collections import namedtuple\n", | |
"\n", | |
"class SeqScore(namedtuple('aa', ['seq', 'score'])):\n", | |
" def __add__(self, other):\n", | |
" return SeqScore(self.seq + ' ' + other.seq, self.score * other.score)\n", | |
"def tm(seq):\n", | |
" return SeqScore(seq, 1/(sum(map(ord, seq)) % 13 + 1))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 233, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"@lru_cache()\n", | |
"@trace\n", | |
"def allsep(seq):\n", | |
" out = []\n", | |
" for part1, part2 in allpartition(seq):\n", | |
" seqscore1 = tm(part1)\n", | |
" if part2:\n", | |
" out.extend(seqscore1 + seqscore2 for seqscore2 in allsep(part2))\n", | |
" else:\n", | |
" out.append(seqscore1)\n", | |
" \n", | |
" return tuple(sorted(out, key = lambda x: -x.score)[:3])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 234, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<function __main__.trace.<locals>.trace.<locals>.trace>" | |
] | |
}, | |
"execution_count": 234, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"allsep('abc')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 196, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 859 µs, sys: 0 ns, total: 859 µs\n", | |
"Wall time: 868 µs\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"allsep('g') -> (SeqScore(seq='g', score=0.07692307692307693),)\n", | |
"allsep('fg') -> (SeqScore(seq='fg', score=0.09090909090909091), SeqScore(seq='f g', score=0.00641025641025641))\n", | |
"allsep('efg') -> (SeqScore(seq='efg', score=0.125), SeqScore(seq='ef g', score=0.008547008547008548), SeqScore(seq='e fg', score=0.008264462809917356))\n", | |
"allsep('cefg') -> (SeqScore(seq='cef g', score=0.019230769230769232), SeqScore(seq='ce fg', score=0.015151515151515152), SeqScore(seq='c efg', score=0.013888888888888888))\n", | |
"allsep('bcefg') -> (SeqScore(seq='bc efg', score=0.041666666666666664), SeqScore(seq='bce fg', score=0.006993006993006994), SeqScore(seq='bc ef g', score=0.002849002849002849))\n", | |
"allsep('abcefg') -> (SeqScore(seq='ab cef g', score=0.019230769230769232), SeqScore(seq='ab ce fg', score=0.015151515151515152), SeqScore(seq='abc efg', score=0.013888888888888888))\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"(SeqScore(seq='ab cef g', score=0.019230769230769232),\n", | |
" SeqScore(seq='ab ce fg', score=0.015151515151515152),\n", | |
" SeqScore(seq='abc efg', score=0.013888888888888888))" | |
] | |
}, | |
"execution_count": 196, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%time allsep('abcefg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 176, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def allsep(seq):\n", | |
" out = []\n", | |
" for part1, part2 in allpartition(seq):\n", | |
" seqscore1 = tm(part1)\n", | |
" if part2:\n", | |
" out.extend(seqscore1 + seqscore2 for seqscore2 in allsep(part2))\n", | |
" else:\n", | |
" out.append(seqscore1)\n", | |
" \n", | |
" return tuple(sorted(out, key = lambda x: -x.score)[:3])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 119, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 411 µs, sys: 0 ns, total: 411 µs\n", | |
"Wall time: 417 µs\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"(SeqScore(seq='ab cef g', score=0.019230769230769232),\n", | |
" SeqScore(seq='ab ce fg', score=0.015151515151515152),\n", | |
" SeqScore(seq='abc efg', score=0.013888888888888888))" | |
] | |
}, | |
"execution_count": 119, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%time allsep('abcefg')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 228, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"https://gist.github.com/aac0083f7958c3b0979c\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!gist --update aac0083f7958c3b0979c allsep.ipynb " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 237, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"allpartition(['1', '2', '3', '4'], 2)\n", | |
" allpartition(['2', '3', '4'], 2)\n", | |
" allpartition(['3', '4'], 2)\n", | |
" allpartition(['4'], 2)\n", | |
" allpartition(['4'], 2) -> [[['4']]]\n", | |
" allpartition(['3', '4'], 2) -> [[['3'], ['4']]]\n", | |
" allpartition(['2', '3', '4'], 2) -> [[['2'], ['3'], ['4']]]\n", | |
"allpartition(['1', '2', '3', '4'], 2) -> [[['1'], ['2'], ['3'], ['4']]]\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[[['1'], ['2'], ['3'], ['4']]]" | |
] | |
}, | |
"execution_count": 237, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from itertools import chain\n", | |
"\n", | |
"@trace(slowdown= True)\n", | |
"@listify\n", | |
"def allpartition(sequence, k):\n", | |
" time.sleep(0.5)\n", | |
" if len(sequence) == 1:\n", | |
" yield [sequence]\n", | |
" else:\n", | |
" for i in range(1, k):\n", | |
" part1 = [sequence[:i]]\n", | |
" for part2 in allpartition(sequence[i:], k):\n", | |
" yield list(chain(part1, part2))\n", | |
"\n", | |
"a = list('1234')\n", | |
"allpartition(a, 2)\n", | |
"\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 211, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import time" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 202, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"time.sleep(1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment