Created
September 2, 2019 20:46
-
-
Save andreadipersio/d9e85f0c2e1e90eac2ee54f5930e9697 to your computer and use it in GitHub Desktop.
Huffman Encoder/Decoder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Huffman Encoder/Decoder | |
# | |
# usage: | |
# To encode a string: | |
# > encode_string("foo foo bar") | |
# It returns an encoding table and the encoded text (TODO) | |
# | |
# https://engineering.purdue.edu/ece264/17au/hw/HW13?alt=huffman | |
TreeValue = Struct.new(:character, :frequency) | |
class Tree | |
attr_accessor :value, :left, :right | |
def initialize(value, left, right) | |
@value = value | |
@left = left | |
@right = right | |
end | |
def self.merge(a, b) | |
tree_value = TreeValue.new(nil, a.value.frequency + b.value.frequency) | |
Tree.new(tree_value, a, b) | |
end | |
def leaf? | |
![left, right].all? | |
end | |
def <=>(other) | |
same_frequency = value.frequency == other.value.frequency | |
case | |
when !same_frequency | |
value.frequency <=> other.value.frequency | |
when (self.leaf? and !other.leaf?) | |
-1 | |
when (!self.leaf? and other.leaf?) | |
+1 | |
else | |
value.character <=> other.value.character | |
end | |
end | |
end | |
class HuffmanWalker | |
L = 0 | |
R = 1 | |
def initialize(tree) | |
@stack = [] | |
@visited = {} | |
@prefix = [] | |
@node = tree | |
end | |
def go_up | |
@prefix.pop | |
@node = @stack.pop | |
end | |
def go_left(node) | |
@prefix.push(L) | |
@stack.push(node) | |
@node = node.left | |
end | |
def go_right(node) | |
@prefix.push(R) | |
@stack.push(node) | |
@node = node.right | |
end | |
def mark_as_visited | |
@visited[@node.object_id] = true | |
end | |
def visited?(node) | |
@visited[node.object_id] | |
end | |
def each(&block) | |
while @node do | |
if @node.leaf? | |
yield(@node.value, @prefix) | |
go_up | |
mark_as_visited | |
next | |
end | |
if @node.left and !visited?(@node.left) | |
go_left(@node) | |
elsif @node.right and !visited?(@node.right) | |
go_right(@node) | |
else | |
go_up | |
end | |
mark_as_visited | |
end | |
end | |
end | |
module Huffman | |
def frequency(string) | |
string.each_char.reduce(Hash.new(0)) do |memo, char| | |
memo[char] += 1 | |
memo | |
end | |
end | |
def make_huffman_tree(frequency) | |
forest = frequency.map { |k,v| Tree.new(TreeValue.new(k, v), nil, nil) } | |
forest.length.times do | |
forest.sort! | |
a, b = forest.shift(2) | |
return a if !b | |
forest.push Tree.merge(a, b) | |
end | |
forest.pop | |
end | |
def make_encoding_table(s) | |
tree = make_huffman_tree(frequency(s)) | |
walker = HuffmanWalker.new(tree) | |
encoding_table = {} | |
walker.each do |value, prefix| | |
encoding_table[value.character] = prefix.join("") | |
end | |
encoding_table | |
end | |
def pack(s, encoding_table) | |
encoded_string = s.chars.reduce do |encoded, char| | |
encoded += encoding_table[char] | |
end | |
i = 0 | |
bytes = [] | |
while unpacked_byte = encoded_string[8 * i...8 * (i+1)] | |
bytes.push(unpacked_byte) | |
i += 1 | |
end | |
bytes | |
end | |
def encode_string(s) | |
encoding_table = make_encoding_table(s) | |
bytes = pack(s, encoding_table) | |
uncompressed_size = s.bytes.length | |
compressed_size = bytes.length | |
size_reduction = (uncompressed_size - compressed_size) / uncompressed_size.to_f * 100 | |
{ | |
encoded_string: bytes, | |
encoding_table: encoding_table, | |
stats: { | |
uncompressed_size: uncompressed_size, | |
compressed_size: compressed_size, | |
size_reduction: "#{'%.2f' % size_reduction}%", | |
} | |
} | |
end | |
def demo | |
encode_string(<<~EOS | |
Et aperiam saepe sed dicta delectus sint et. Repellat est minima aut. Ipsa quis doloremque commodi beatae ex magni aut. In sapiente nesciunt beatae. Ut at sint et. | |
Molestiae sint dicta dolorem. Ad dolor est ut aliquam quis dolorem fuga ducimus. Vero voluptas atque veritatis reiciendis veniam consequatur natus perspiciatis. | |
Sed voluptas eligendi non. Nam ut similique corporis rerum beatae aut magnam quidem. Et quaerat commodi nulla cupiditate illum et. Quas est necessitatibus doloremque ducimus illum incidunt cupiditate ea. Cupiditate similique qui aut. | |
Maxime velit et non. Laudantium voluptas id repellendus ea. Ea nulla ad dicta ipsam. | |
Et explicabo veniam quo rem et voluptatem hic. Facilis accusamus sunt ducimus voluptatem laborum eos velit. Optio aut rerum tempore sit autem. | |
EOS | |
) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment