Last active
August 23, 2024 08:17
-
-
Save jd/b0aa5cbfa42f4eb23eb9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- encoding: utf-8 -*- | |
# | |
# Copyright © 2016 Red Hat, Inc. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); you may | |
# not use this file except in compliance with the License. You may obtain | |
# a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | |
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | |
# License for the specific language governing permissions and limitations | |
# under the License. | |
import struct | |
import bitarray | |
def double_to_int(f): | |
"""Convert a 64 bits float to a 64 bits integer.""" | |
return struct.unpack("!Q", struct.pack("!d", f))[0] | |
def int_to_double(i): | |
"""Convert an integer to a 64 bits float.""" | |
return struct.unpack("!d", struct.pack("!Q", i))[0] | |
def to_binary64(v): | |
"""Convert value to a binary representation on 64 bits as a string.""" | |
return "{:064b}".format(v) | |
def count_lead_and_trail_zeroes(d): | |
"""Count the number of leading and trailing zeroes in an integer.""" | |
as_str = "{:64b}".format(d) | |
try: | |
leading_zeroes = as_str.index("1") | |
trailing_zeroes = 63 - as_str.rindex("1") | |
except ValueError: | |
return 64, 64 | |
return leading_zeroes, trailing_zeroes | |
def encode_xor_floats(floats): | |
"""Encode a list of floats using XOR encoding. | |
Encoding stolen from paper: | |
"Gorilla: A Fast, Scalable, In-Memory Time Series Database" | |
""" | |
if len(floats) == 0: | |
return b"" | |
prev = double_to_int(floats[0]) | |
encoded = bitarray.bitarray(to_binary64(prev)) | |
prev_lz = prev_tz = 0 | |
for v in floats[1:]: | |
v = double_to_int(v) | |
xor = prev ^ v | |
if xor == 0: | |
encoded.append(0) | |
else: | |
encoded.append(1) | |
lz, tz = count_lead_and_trail_zeroes(xor) | |
if lz >= 32: | |
lz = 31 | |
if prev_lz != 0 and lz >= prev_lz and tz >= prev_tz: | |
encoded.append(0) | |
encoded.extend( | |
to_binary64(xor >> prev_tz)[- (64 - prev_lz - prev_tz):]) | |
else: | |
prev_lz = lz | |
prev_tz = tz | |
encoded.append(1) | |
encoded.extend(to_binary64(lz)[-5:]) | |
sigbits = 64 - lz - tz | |
encoded.extend(to_binary64(sigbits)[-6:]) | |
encoded.extend(to_binary64(xor >> tz)[-sigbits:]) | |
prev = v | |
return encoded.tobytes() | |
_ARRAY_3_ZERO = bitarray.bitarray('000') | |
_ARRAY_2_ZERO = bitarray.bitarray('00') | |
def decode_xor_floats(encoded, number_of_values): | |
if number_of_values == 0: | |
return [] | |
array = bitarray.bitarray() | |
array.frombytes(encoded.tobytes()) | |
results = [] | |
# Read first value | |
results.append(struct.unpack("!d", array[:64].tobytes())[0]) | |
index = 64 | |
for _ in range(number_of_values - 1): | |
index += 1 | |
if array[index - 1]: | |
index += 1 | |
if array[index - 1]: | |
bits = _ARRAY_3_ZERO + array[index:index + 5] | |
leading = struct.unpack("!B", bits.tobytes())[0] | |
bits = _ARRAY_2_ZERO + array[index + 5:index + 11] | |
trailing = 64 - leading - struct.unpack( | |
"!B", bits.tobytes())[0] | |
index += 11 | |
sigbits = 64 - leading - trailing | |
bits = bitarray.bitarray( | |
'0' * (64 - sigbits)) + array[index:index + sigbits] | |
num = struct.unpack("!Q", bits.tobytes())[0] | |
index += sigbits | |
vbits = double_to_int(results[-1]) | |
vbits ^= num << trailing | |
results.append(int_to_double(vbits)) | |
else: | |
results.append(results[-1]) | |
return results |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you @BastianHavers!