Last active
April 23, 2017 23:13
-
-
Save SergeyNarozhny/76066f5b0a1a8b90d30b532632336395 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import heapq | |
from collections import Counter, namedtuple | |
class Node(namedtuple("Node", ["left", "right"])): | |
def walk(self, code, acc): | |
self.left.walk(code, acc + "0") | |
self.right.walk(code, acc + "1") | |
class Leaf(namedtuple("Leaf", ["char"])): | |
def walk(self, code, acc): | |
code[self.char] = acc or "0" | |
def huffman_encode(s): | |
h = [] | |
for ch, freq in Counter(s).items(): | |
h.append((freq, len(h), Leaf(ch))) | |
heapq.heapify(h) | |
count = len(h) | |
while len(h) > 1: | |
freq1, _count1, left = heapq.heappop(h) | |
freq2, _count2, right = heapq.heappop(h) | |
heapq.heappush(h, (freq1 + freq2, count, Node(left, right))) | |
count += 1 | |
code = {} | |
if h: | |
[(_freq, _count, root)] = h | |
root.walk(code, "") | |
return code | |
def main(): | |
s = input() | |
code = huffman_encode(s) | |
encoded = "".join(code[ch], for ch in s) | |
print(len(code), len(encoded)) | |
for ch in sorted(code): | |
print("{}: {}".format(ch, code[ch])) | |
print(encoded) | |
if __name__ == "__main__": | |
main() | |
def test(n_iter = 100): | |
import random | |
import string | |
for i in range(n_iter): | |
length = random.randint(0, 32) | |
s = "".join(random.choice(string.ascii_letters) for _ range(length)) | |
code = huffman_encode(s) | |
encoded = "".join(code[ch] for ch in s) | |
assert huffman_dencode(encoded, code) = s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment