Created
June 12, 2024 08:47
-
-
Save michaeljclark/72b550e076eebb3754322db137c0a821 to your computer and use it in GitHub Desktop.
SipHash derivative using carryless multiplies instead of rotates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
SlurpHash 0-pre-alpha AVX implementation | |
(SipHash derivative using carryless multiplies instead of rotates) | |
------------------------------------------------------------------ | |
Copyright (c) 2012-2016 Jean-Philippe Aumasson <[email protected]> | |
Copyright (c) 2012-2014 Daniel J. Bernstein <[email protected]> | |
Copyright (c) 2017 Salvatore Sanfilippo <[email protected]> | |
Copyright (c) 2024 Michael Clark <[email protected]> | |
To the extent possible under law, the author(s) have dedicated all copyright | |
and related and neighboring rights to this software to the public domain | |
worldwide. This software is distributed without any warranty. | |
You should have received a copy of the CC0 Public Domain Dedication along | |
with this software. If not, see | |
<http://creativecommons.org/publicdomain/zero/1.0/>. | |
*/ | |
// compile: gcc -O3 -DBENCH_MAIN -march=skylake-avx512 -o bench_slurphash slurphash.c | |
#include <stddef.h> | |
#include <stdint.h> | |
#include <immintrin.h> | |
/* | |
* SMHasher 0x9754755B | |
* | |
* [[[ Avalanche Tests ]]] | |
* | |
* 24-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.658000% | |
* 32-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.691333% | |
* 40-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.652000% | |
* 48-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.720667% | |
* 56-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.757333% | |
* 64-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.740667% | |
* 72-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.688000% | |
* 80-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.617333% | |
* 96-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.674667% | |
* 112-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.665333% | |
* 128-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.720000% | |
* 160-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.648667% | |
* 512-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.769333% | |
* 1024-bit keys -> 64-bit hashes, 300000 reps, worst bias is 0.736667% | |
* | |
* [[[ MomentChi2 Tests ]]] | |
* | |
* linearly increasing numbers of 32-bit, using a step of 2 ... | |
* Target values to approximate : 38918200.000000 - 273633.333333 | |
* | |
* Popcount 1 stats : 38918583.940467 - 273647.079550 | |
* Popcount 0 stats : 38918853.766915 - 273620.670023 | |
* MomentChi2 for bits 1 : 0.269351 | |
* MomentChi2 for bits 0 : 0.781011 | |
* | |
* Derivative stats (transition from 2 consecutive values) : | |
* Popcount 1 stats : 38918796.153440 - 273631.111416 | |
* Popcount 0 stats : 38918666.234741 - 273631.079280 | |
* MomentChi2 for deriv b1 : 0.64941 | |
* MomentChi2 for deriv b0 : 0.397203 | |
*/ | |
/* | |
* p0 = 0b000000000000000011100101110011111 | |
* { 0-4,7,8,9,11,14,15,16 } [6EVE,6ODD] | |
* | |
* p1 = 0b100100000001100100000010100100000 | |
* { 5,8,10,17,20,21,29,32 } [4EVE,4ODD] | |
*/ | |
#define SLURPROUND() do { \ | |
t0 = _mm_clmulepi64_si128(v0, p0, 0); \ | |
t1 = _mm_clmulepi64_si128(v0, p0, 1); \ | |
t2 = _mm_slli_si128(t1, 8); \ | |
v0 = _mm_xor_si128(t0, t2); \ | |
v0 = _mm_shuffle_epi32(v0, 0b00100111); /* 3,1,2,0 */ \ | |
v1 = _mm_add_epi64(v1, v0); \ | |
t0 = _mm_clmulepi64_si128(v1, p1, 0); \ | |
t1 = _mm_clmulepi64_si128(v1, p1, 1); \ | |
t2 = _mm_slli_si128(t1, 8); \ | |
v1 = _mm_xor_si128(t0, t2); \ | |
v1 = _mm_shuffle_epi32(v1, 0b00011011); /* 3,2,1,0 */ \ | |
v0 = _mm_add_epi64(v0, v1); \ | |
} while(0); | |
int slurphash(uint8_t *out, const uint8_t *in, size_t len, const uint8_t *k) | |
{ | |
const uint64_t x[4] = { | |
0x736f6d6570736575ULL, 0x646f72616e646f6dULL, | |
0x6c7967656e657261ULL, 0x7465646279746573ULL | |
}; | |
const uint64_t pf[2] = { | |
0b000000000000000011100101110011111, | |
0b100100000001100100000010100100000 | |
}; | |
__m128i key = _mm_loadu_si128((const __m128i *)k); | |
__m128i iv0 = _mm_xor_si128(key, _mm_load_si128((const __m128i *)&x[0])); | |
__m128i iv1 = _mm_xor_si128(key, _mm_load_si128((const __m128i *)&x[2])); | |
__m128i v0 = _mm_unpacklo_epi64(iv0, iv1); | |
__m128i v1 = _mm_unpackhi_epi64(iv0, iv1); | |
__m128i p0 = _mm_set_epi64x(0, pf[0]); | |
__m128i p1 = _mm_set_epi64x(0, pf[1]); | |
__m128i p, q, t0, t1, t2; | |
const uint8_t *end = in + (len & ~7); | |
const uint64_t *in64 = (const uint64_t*)in; | |
const uint64_t *end64 = (const uint64_t*)end; | |
uint64_t b = ((uint64_t)len) << 56; | |
for(; in64 < end64; in64++) | |
{ | |
p =_mm_loadl_epi64((const __m128i*)in64); | |
v0 = _mm_add_epi64(p, v0); | |
SLURPROUND(); | |
} | |
switch( len & 7 ) | |
{ | |
case 7: b |= ((uint64_t)end[6]) << 48; | |
case 6: b |= ((uint64_t)end[5]) << 40; | |
case 5: b |= ((uint64_t)end[4]) << 32; | |
case 4: b |= ((uint64_t)end[3]) << 24; | |
case 3: b |= ((uint64_t)end[2]) << 16; | |
case 2: b |= ((uint64_t)end[1]) << 8; | |
case 1: b |= ((uint64_t)end[0]); break; | |
case 0: break; | |
} | |
p = _mm_loadl_epi64((const __m128i*)&b); | |
v1 = _mm_xor_si128(v1, _mm_slli_si128(p, 8)); | |
SLURPROUND(); | |
v0 = _mm_xor_si128(v0, p); | |
v0 = _mm_xor_si128(v0, _mm_set_epi64x(0xFF, 0)); | |
SLURPROUND(); | |
SLURPROUND(); | |
p = _mm_xor_si128(v0, v1); | |
q = _mm_unpackhi_epi64(p, p); | |
p = _mm_xor_si128(p, q); | |
*(uint64_t*)out = _mm_cvtsi128_si64(p); | |
return 0; | |
} | |
/* | |
//SMhasher test function | |
void slurphash_test(const void *input, int len, uint32_t seed, void *out) | |
{ | |
unsigned char key[16]; | |
memset(key, 0, sizeof(key)); | |
memcpy(key, &seed, sizeof(seed)); | |
slurphash((uint8_t*)out, (const uint8_t *)input, len, key); | |
} | |
*/ | |
#ifdef BENCH_MAIN | |
#include <stdio.h> | |
#include <string.h> | |
#include <time.h> | |
#define buffer_size 32768 | |
uint64_t bench_siphash(size_t count) | |
{ | |
clock_t start, end; | |
double gbsec, ns; | |
uint8_t key[16], buf[buffer_size]; | |
uint64_t hash, sum = 0; | |
memset(buf, 0, sizeof(buf)); | |
start = clock(); | |
for (size_t s = 0; s < count;) { | |
size_t l = count - s > buffer_size ? buffer_size : count - s; | |
siphash((uint8_t*)&hash, buf, l, key); | |
sum += hash; | |
s += l; | |
} | |
end = clock(); | |
ns = (double)(end - start) / (double)CLOCKS_PER_SEC * 1e9; | |
gbsec = ((double)count / (double)(1<<30)) / ((double)ns / 1e9); | |
printf("|%-12s|%8s|%8s|%8s|\n", "algorithm", "sz_mib", "ns_byte", "gb_sec"); | |
printf("|%-12s|%8s|%8s|%8s|\n", ":--------", "-----:", "------:", "------:"); | |
printf("|%-12s|%8zu|%8.2f|%8.2f|\n", "slurphash", count>>20, ns/count, gbsec); | |
return sum; | |
} | |
int main(int argc, char **argv) | |
{ | |
return (int)bench_siphash(1<<30); | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment