Last active
September 29, 2020 05:03
-
-
Save bsergean/891b792f6900078db67145b05ffaca48 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* | |
# each * represents a count of 40. total 5823 | |
zlib [ 2775] ********************************************************************* | |
zlib-ng [ 1808] ********************************************* | |
libdeflate [ 1240] ******************************* | |
* | |
$ clang++ -DHAVE_LIBDEFLATE=1 -O3 --std=c++14 --stdlib=libc++ gzip.cpp /usr/local/lib/libdeflate.a && ./a.out ~/Desktop/example_data.bin | |
median runtime to compress file: 1240 | |
compressing file completed in 1225 us | |
$ clang++ -O3 --std=c++14 --stdlib=libc++ gzip.cpp /usr/local/lib/libz.a && ./a.out ~/Desktop/example_data.bin | |
median runtime to compress file: 1808 | |
compressing file completed in 1848 us | |
$ clang++ -O3 --std=c++14 --stdlib=libc++ gzip.cpp /usr/lib/libz.dylib && ./a.out ~/Desktop/example_data.bin | |
median runtime to compress file: 2775 | |
compressing file completed in 2999 us | |
*/ | |
#include <chrono> | |
#include <stdint.h> | |
#include <string> | |
#include <iostream> | |
#include <fstream> | |
#include <vector> | |
#include <array> | |
#include <string.h> | |
#include <iostream> | |
#include <zlib.h> | |
#ifdef HAVE_LIBDEFLATE | |
#include <libdeflate.h> | |
#endif | |
class Bench | |
{ | |
public: | |
Bench(const std::string& description); | |
~Bench(); | |
void reset(); | |
void record(); | |
void report(); | |
void setReported(); | |
uint64_t getDuration() const; | |
private: | |
std::string _description; | |
std::chrono::time_point<std::chrono::high_resolution_clock> _start; | |
uint64_t _duration; | |
bool _reported; | |
}; | |
Bench::Bench(const std::string& description) | |
: _description(description) | |
{ | |
reset(); | |
} | |
Bench::~Bench() | |
{ | |
if (!_reported) | |
{ | |
report(); | |
} | |
} | |
void Bench::reset() | |
{ | |
_start = std::chrono::high_resolution_clock::now(); | |
_reported = false; | |
} | |
void Bench::report() | |
{ | |
auto now = std::chrono::high_resolution_clock::now(); | |
auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(now - _start); | |
_duration = microseconds.count(); | |
std::cerr << _description << " completed in " << _duration << " us" << std::endl; | |
setReported(); | |
} | |
void Bench::record() | |
{ | |
auto now = std::chrono::high_resolution_clock::now(); | |
auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(now - _start); | |
_duration = microseconds.count(); | |
} | |
void Bench::setReported() | |
{ | |
_reported = true; | |
} | |
uint64_t Bench::getDuration() const | |
{ | |
return _duration; | |
} | |
std::string gzipCompress(const std::string& str) | |
{ | |
#ifdef HAVE_LIBDEFLATE | |
int compressionLevel = 6; | |
struct libdeflate_compressor *compressor; | |
compressor = | |
libdeflate_alloc_compressor(compressionLevel); | |
const void *uncompressed_data = str.data(); | |
size_t uncompressed_size = str.size(); | |
void *compressed_data; | |
size_t actual_compressed_size; | |
size_t max_compressed_size; | |
max_compressed_size = libdeflate_gzip_compress_bound(compressor, | |
uncompressed_size); | |
compressed_data = malloc(max_compressed_size); | |
if (compressed_data == NULL) | |
{ | |
return std::string(); | |
} | |
actual_compressed_size = libdeflate_gzip_compress( | |
compressor, | |
uncompressed_data, | |
uncompressed_size, | |
compressed_data, | |
max_compressed_size); | |
if (actual_compressed_size == 0) | |
{ | |
free(compressed_data); | |
return std::string(); | |
} | |
libdeflate_free_compressor(compressor); | |
std::string out; | |
out.append(reinterpret_cast<char*>(compressed_data), actual_compressed_size); | |
free(compressed_data); | |
return out; | |
#else | |
z_stream zs; // z_stream is zlib's control structure | |
memset(&zs, 0, sizeof(zs)); | |
// deflateInit2 configure the file format: request gzip instead of deflate | |
const int windowBits = 15; | |
const int GZIP_ENCODING = 16; | |
deflateInit2(&zs, | |
Z_DEFAULT_COMPRESSION, | |
Z_DEFLATED, | |
windowBits | GZIP_ENCODING, | |
8, | |
Z_DEFAULT_STRATEGY); | |
zs.next_in = (Bytef*) str.data(); | |
zs.avail_in = (uInt) str.size(); // set the z_stream's input | |
int ret; | |
char outbuffer[32768]; | |
std::string outstring; | |
// retrieve the compressed bytes blockwise | |
do | |
{ | |
zs.next_out = reinterpret_cast<Bytef*>(outbuffer); | |
zs.avail_out = sizeof(outbuffer); | |
ret = deflate(&zs, Z_FINISH); | |
if (outstring.size() < zs.total_out) | |
{ | |
// append the block to the output string | |
outstring.append(outbuffer, zs.total_out - outstring.size()); | |
} | |
} while (ret == Z_OK); | |
deflateEnd(&zs); | |
return outstring; | |
#endif | |
} | |
std::pair<bool, std::vector<uint8_t>> load(const std::string& path) | |
{ | |
std::vector<uint8_t> memblock; | |
std::ifstream file(path); | |
if (!file.is_open()) return std::make_pair(false, memblock); | |
file.seekg(0, file.end); | |
std::streamoff size = file.tellg(); | |
file.seekg(0, file.beg); | |
memblock.resize((size_t) size); | |
file.read((char*) &memblock.front(), static_cast<std::streamsize>(size)); | |
return std::make_pair(true, memblock); | |
} | |
std::pair<bool, std::string> readAsString(const std::string& path) | |
{ | |
auto res = load(path); | |
auto vec = res.second; | |
return std::make_pair(res.first, std::string(vec.begin(), vec.end())); | |
} | |
int gzip(const std::string& filename, int runCount) | |
{ | |
auto res = readAsString(filename); | |
bool found = res.first; | |
if (!found) | |
{ | |
return 1; | |
} | |
std::string compressedBytes; | |
std::vector<uint64_t> durations; | |
{ | |
Bench bench("compressing file"); | |
bench.setReported(); | |
for (int i = 0; i < runCount; ++i) | |
{ | |
bench.reset(); | |
compressedBytes = gzipCompress(res.second); | |
bench.record(); | |
durations.push_back(bench.getDuration()); | |
} | |
size_t medianIdx = durations.size() / 2; | |
uint64_t medianRuntime = durations[medianIdx]; | |
std::cout << "median runtime to compress file: " | |
<< medianRuntime << std::endl; | |
} | |
std::string outputFilename(filename); | |
outputFilename += ".gz"; | |
std::ofstream f; | |
f.open(outputFilename); | |
f << compressedBytes; | |
f.close(); | |
return 0; | |
} | |
int main(int argc, char** argv) | |
{ | |
return gzip(argv[1], 100); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment