Last active
December 13, 2020 06:52
-
-
Save mocobeta/e8cc2aa6348465c6dcc9ac99e3e87095 to your computer and use it in GitHub Desktop.
compression algorithms performance comparison
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use rand::prelude::*; | |
fn main() { | |
let mut rng = thread_rng(); | |
let p: f32 = 0.00001; | |
let max_doc: usize = 1_000_000; | |
let mut postings: Vec<usize> = vec![rng.gen_range(1, 1000) as usize]; | |
loop { | |
let next = postings.last().unwrap() + geo_random(p); | |
if next > max_doc { | |
break; | |
} else { | |
postings.push(next); | |
} | |
} | |
//println!("dummy postings: {:?}", postings); | |
let postings_delta = delta_values(&postings); | |
let gamma_coded = encode_gamma(&postings_delta); | |
let delta_coded = encode_delta(&postings_delta); | |
let m_opt: u32 = ((2.0 - (postings.len() as f64 / max_doc as f64)).ln() | |
/ -(1.0 - (postings.len() as f64 / max_doc as f64)).ln()) | |
.ceil() as u32; | |
let golomb_coded = encode_golomb(&postings_delta, m_opt); | |
let m_opt = 2u32.pow((m_opt as f64).log2().ceil() as u32); | |
let rice_coded = encode_rice(&postings_delta, m_opt); | |
let vbyte_coded = encode_vbyte(&postings_delta); | |
println!("Length (gamma) = {} bytes", gamma_coded.to_bytes().len()); | |
println!("Length (delta) = {} bytes", delta_coded.to_bytes().len()); | |
println!("Length (golomb) = {} bytes", golomb_coded.to_bytes().len()); | |
println!("Length (rice) = {} bytes", rice_coded.to_bytes().len()); | |
println!("Length (vbyte) = {} bytes", vbyte_coded.len()); | |
} | |
fn delta_values(li: &[usize]) -> Vec<usize> { | |
if li.len() == 0 { | |
return vec![]; | |
} | |
let mut res = Vec::new(); | |
res.push(li[0]); | |
for i in 1..li.len() { | |
let delta = li[i] - li[i - 1]; | |
assert!(delta > 0); | |
res.push(delta); | |
} | |
res | |
} | |
fn geo_random(p: f32) -> usize { | |
let mut rng = thread_rng(); | |
let rand: f32 = rng.gen(); | |
(rand.ln() / (1.0 - p).ln()).ceil() as usize | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment