Created
April 12, 2019 19:15
-
-
Save Raincode/99e464fa469e0951fbdae7cbb5f797ae to your computer and use it in GitHub Desktop.
MATLAB huffman encoding function
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% creates an optimal code for probability vector P using huffman coding | |
% The codes are ordered according to the input probabilities. | |
% Alternatively, you could sort them descending, or add another column to the cell | |
% array containing the symbols (like the huffmandict function). This would require an | |
% additional input parameter. | |
% My measurements show this running faster than the huffmandict function. | |
function [c,h,w]=huffman(P) | |
% function [c,h,w]=huffman(P) | |
% | |
% Huffman-Codierung | |
% | |
% P : Auftrittswahrscheinlichkeiten | |
% c : Code als Cell-Array | |
% h : Entropie | |
% w : mittlere Codewortlänge | |
% | |
% Für den Testvektor | |
% P=[0.05 0.03 0.17 0.23 0.01 0.32 0.19] | |
% ergibt sich h=2.3378 und w=2.39. | |
% nützliche Matlab-Befehle: struct, sort | |
assert(abs(sum(P) - 1) < 10e-6, "Probabilities must sum up to 100%"); | |
% compute entropy | |
h = sum(P .* (-log2(P))); | |
% each row corresponds to the probability in P | |
c = cell(length(P), 1); % codes are represent as numerical vectors for bits | |
% A symbol is used to represent dummy "fused" probabilities as well | |
% Using the indices from sort ensures that the code cell array (i) | |
% correspons to the input P(i) | |
[P, indices] = sort(P, 'descend'); | |
symbols = struct('probability', num2cell(P), 'indices', num2cell(indices)); | |
% P = sort(P, 'descend'); | |
% symbols = struct('probability', num2cell(P), 'indices', num2cell(1:length(P))); | |
while length(symbols) > 1 | |
% select the two lowest probabilities and add them | |
% O(n) insert worst case vs log(n) binary search... | |
last = symbols(end); | |
preLast = symbols(end-1); | |
% Build the code words by prepending bits | |
c(last.indices) = cellfun(@(x)[0 x], c(last.indices), 'UniformOutput', false); | |
c(preLast.indices) = cellfun(@(x)[1 x], c(preLast.indices), 'UniformOutput', false); | |
% Insert dummy symbol representing combined probability of the two | |
% lowest probabilities | |
probSum = last.probability + preLast.probability; | |
newSymbol = struct('probability', probSum, 'indices', [last.indices preLast.indices]); | |
pos = find([symbols.probability] < probSum, 1); | |
% insert dummy symbol and remove the two symbols which belong to it | |
symbols = [symbols(1:pos-1) newSymbol symbols(pos:end-2)]; | |
end | |
assert(length(symbols) == 1 && abs(symbols(1).probability - 1) < 10e-6, "Probability of tree root must add up to 100%"); | |
% compute average codeword length | |
w = sum(cellfun('length', c) .* P(:)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment