Created
December 26, 2023 12:18
-
-
Save seanwupi/308c7d91f8ad1ed0c86cdf9ed5522976 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[(1172, (15, 1, 0)), (1196, (11, 8, 0)), (2134, (3, 1, 9)), (2174, (14, 1, 0)), (2187, (5, 8, 9)), (2907, (12, 8, 0)), (4592, (10, 0, 6)), (4700, (5, 1, 1)), (4741, (3, 8, 1)), (4796, (2, 7, 8)), (5171, (2, 2, 8)), (5310, (14, 8, 0)), (5340, (13, 1, 0)), (5404, (10, 0, 7)), (5517, (6, 2, 8)), (5559, (6, 7, 8)), (5606, (9, 0, 6)), (5639, (3, 8, 9)), (5737, (13, 8, 0)), (5798, (5, 1, 9)), (6032, (9, 0, 7)), (6360, (10, 4, 0)), (6603, (9, 6, 0)), (6925, (15, 8, 0)), (6933, (12, 1, 0)), (7033, (10, 0, 5)), (7292, (10, 5, 0)), (7403, (10, 3, 0)), (7452, (11, 1, 0)), (7520, (9, 4, 0)), (7779, (10, 0, 2)), (7819, (9, 5, 0)), (7894, (9, 0, 5)), (7914, (8, 3, 0)), (7985, (1, 7, 9)), (8086, (10, 0, 8)), (8185, (9, 0, 8)), (8188, (0, 6, 0)), (8293, (0, 4, 0)), (8302, (9, 7, 0)), (8778, (10, 0, 1)), (8813, (10, 0, 3)), (9000, (10, 2, 0)), (9016, (8, 0, 7)), (9099, (8, 5, 0)), (9453, (9, 0, 3)), (9560, (7, 2, 9)), (9857, (9, 0, 1)), (9878, (2, 7, 2)), (10217, (0, 0, 5)), (10220, (0, 0, 7)), (10375, (1, 8, 8)), (10512, (7, 8, 8)), (10591, (8, 4, 0)), (10674, (6, 2, 2)), (10762, (8, 0, 5)), (10903, (0, 5, 0)), (11056, (0, 0, 8)), (11408, (0, 7, 0)), (11429, (8, 2, 0)), (11477, (0, 0, 6)), (11540, (9, 0, 2)), (11709, (4, 0, 9)), (11782, (7, 1, 8)), (11861, (0, 0, 4)), (12073, (1, 1, 8)), (12076, (10, 7, 0)), (12434, (8, 0, 8)), (12595, (1, 7, 1)), (12764, (10, 0, 9)), (12767, (9, 3, 0)), (12904, (8, 0, 3)), (13189, (7, 7, 9)), (13327, (8, 6, 0)), (13344, (1, 2, 9)), (13816, (7, 2, 1)), (13843, (9, 0, 9)), (13858, (8, 0, 2)), (14585, (0, 0, 3)), (14616, (8, 0, 4)), (14915, (0, 0, 2)), (15157, (0, 8, 0)), (15530, (9, 2, 0)), (15636, (0, 3, 0)), (15738, (9, 0, 4)), (15796, (0, 2, 0)), (15953, (0, 0, 9)), (16113, (10, 0, 4)), (16537, (8, 1, 0)), (16819, (8, 7, 0)), (17036, (8, 0, 9)), (17061, (10, 1, 0)), (17828, (8, 0, 6)), (18887, (10, 8, 0)), (18980, (15, 0, 1)), (19287, (4, 0, 1)), (20086, (9, 8, 0)), (20277, (4, 8, 0)), (20493, (4, 1, 0)), (22185, (8, 0, 1)), (23736, (7, 1, 2)), (24631, (2, 2, 2)), (25057, (9, 1, 0)), (25170, (11, 0, 1)), (26600, (6, 7, 2)), (27449, (0, 0, 1)), (27976, (3, 1, 1)), (28538, (5, 8, 1)), (28587, (1, 2, 1)), (29725, (7, 7, 1)), (33828, (1, 8, 2)), (36080, (10, 6, 0)), (42615, (13, 0, 1)), (44219, (0, 1, 0)), (47559, (14, 0, 1)), (58712, (12, 0, 1)), (59702, (8, 8, 0)), (67089, (1, 1, 2)), (73753, (7, 8, 2))]\n" | |
] | |
} | |
], | |
"source": [ | |
"import re\n", | |
"from collections import defaultdict\n", | |
"\n", | |
"# https://github.com/CGLemon/chinese-chess-PGN\n", | |
"with open(\"WXF-41743games.pgns\", encoding=\"utf8\") as f:\n", | |
" data = f.read()\n", | |
"\n", | |
"cnt = defaultdict(int)\n", | |
"\n", | |
"for d in data.split(\"\\n\\n\"):\n", | |
" bd = [[None for j in range(10)] for i in range(9)]\n", | |
" for i, p in enumerate([(0, 0), (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0), (1, 2), (7, 2), (0, 3), (2, 3), (4, 3), (6, 3), (8, 3)]):\n", | |
" bd[p[0]][p[1]] = i\n", | |
" for z in re.findall(r\"\\d+\\. (..)-(..) (..)-(..)\", d):\n", | |
" p1, p2, q1, q2 = tuple(map(lambda t: (ord(t[0]) - ord(\"A\"), int(t[1])), z))\n", | |
" assert bd[p1[0]][p1[1]] != None\n", | |
" c = bd[p1[0]][p1[1]]\n", | |
" m = (c, (p2[0] - p1[0] + 9) % 9, (p2[1] - p1[1] + 10) % 10)\n", | |
" bd[p2[0]][p2[1]] = bd[p1[0]][p1[1]]\n", | |
" bd[p1[0]][p1[1]] = None\n", | |
" cnt[m] += 1\n", | |
"\n", | |
"freq = set((cnt[k], k) for k in cnt)\n", | |
"total_moves = sum(cnt.values())\n", | |
"print(sorted(freq))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"6.525916541374137\n" | |
] | |
} | |
], | |
"source": [ | |
"total_bits = 0\n", | |
"while len(freq) > 1:\n", | |
" x = min(freq, key=lambda t: t[0])\n", | |
" freq.remove(x)\n", | |
" y = min(freq, key=lambda t: t[0])\n", | |
" freq.remove(y)\n", | |
" freq.add((x[0] + y[0], (x[1], y[1])))\n", | |
" total_bits += x[0] + y[0]\n", | |
"\n", | |
"print(total_bits / total_moves)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"000000\n", | |
"000001\n", | |
"0000100\n", | |
"0000101\n", | |
"000011\n", | |
"0001000\n", | |
"0001001\n", | |
"0001010\n", | |
"00010110\n", | |
"00010111\n", | |
"00011\n", | |
"0010000\n", | |
"0010001\n", | |
"0010010\n", | |
"00100110\n", | |
"00100111\n", | |
"00101\n", | |
"001100\n", | |
"0011010\n", | |
"00110110\n", | |
"00110111\n", | |
"00111000\n", | |
"00111001\n", | |
"0011101\n", | |
"0011110\n", | |
"0011111\n", | |
"0100000\n", | |
"0100001\n", | |
"0100010\n", | |
"01000110\n", | |
"01000111\n", | |
"01001\n", | |
"010100\n", | |
"0101010\n", | |
"0101011\n", | |
"0101100\n", | |
"0101101\n", | |
"010111\n", | |
"011000\n", | |
"011001\n", | |
"0110100\n", | |
"0110101\n", | |
"0110110\n", | |
"0110111\n", | |
"01110000\n", | |
"01110001\n", | |
"0111001\n", | |
"011101\n", | |
"0111100\n", | |
"0111101\n", | |
"011111\n", | |
"1000000\n", | |
"1000001\n", | |
"1000010\n", | |
"10000110\n", | |
"10000111\n", | |
"100010\n", | |
"100011\n", | |
"100100\n", | |
"10010100\n", | |
"100101010\n", | |
"1001010110\n", | |
"1001010111\n", | |
"1001011\n", | |
"10011\n", | |
"1010000\n", | |
"10100010\n", | |
"10100011\n", | |
"101001\n", | |
"10101\n", | |
"1011000\n", | |
"10110010\n", | |
"10110011\n", | |
"1011010\n", | |
"1011011\n", | |
"10111000\n", | |
"10111001\n", | |
"1011101\n", | |
"1011110\n", | |
"1011111\n", | |
"11000000\n", | |
"11000001\n", | |
"1100001\n", | |
"11000100\n", | |
"11000101\n", | |
"1100011\n", | |
"11001000\n", | |
"11001001\n", | |
"1100101\n", | |
"11001100\n", | |
"11001101\n", | |
"1100111\n", | |
"11010\n", | |
"110110\n", | |
"1101110\n", | |
"1101111\n", | |
"11100000\n", | |
"11100001\n", | |
"1110001\n", | |
"111001\n", | |
"11101\n", | |
"11110000\n", | |
"11110001\n", | |
"11110010\n", | |
"1111001100\n", | |
"11110011010\n", | |
"11110011011\n", | |
"111100111\n", | |
"1111010\n", | |
"111101100\n", | |
"111101101\n", | |
"11110111\n", | |
"1111100\n", | |
"1111101\n", | |
"11111100\n", | |
"11111101\n", | |
"11111110\n", | |
"111111110\n", | |
"111111111\n" | |
] | |
} | |
], | |
"source": [ | |
"g = list(freq)[0][1]\n", | |
"def dump(x, s=''):\n", | |
" if len(x) == 2:\n", | |
" dump(x[0], s+'0')\n", | |
" dump(x[1], s+'1')\n", | |
" else:\n", | |
" print(s)\n", | |
"\n", | |
"dump(g)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment