Created
December 1, 2020 13:08
-
-
Save badamczewski/42ec5d3aabd47c32684cdb87851f8a51 to your computer and use it in GitHub Desktop.
BloomFilter Source Code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Text; | |
namespace ProbabilisticDataStructures.DataStructures | |
{ | |
public class BitSet | |
{ | |
private ulong[] bitset; | |
public int Size { get; private set; } | |
public BitSet(int size) | |
{ | |
bitset = new ulong[(size / 64) + 1]; | |
Size = size; | |
} | |
public void Add(int index) | |
{ | |
bitset[index / 64] |= (1u << (index % 64)); | |
} | |
public bool Contains(int index) | |
{ | |
return (bitset[index / 64] & (1u << (index % 64))) != 0; | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections; | |
using System.Collections.Generic; | |
using System.Text; | |
namespace ProbabilisticDataStructures.DataStructures | |
{ | |
public class BloomFilter | |
{ | |
public readonly int _hashFunctionCount; | |
public readonly BitSet _hashBits; | |
public readonly HashFunction _getHashSecondary; | |
public delegate uint HashFunction(int input); | |
public BloomFilter(int capacity) | |
: this(capacity, 0.01f) | |
{ | |
} | |
public BloomFilter(int capacity, float errorRate) | |
: this(BestM(capacity, errorRate), BestK(capacity, errorRate)) | |
{ | |
} | |
public BloomFilter(int m, int k) | |
{ | |
this._getHashSecondary = HashFunctions.HashInt32Shift; | |
this._hashFunctionCount = k; | |
this._hashBits = new BitSet(m); | |
} | |
public void Add(int item) | |
{ | |
uint primaryHash = HashFunctions.HashInt32Jenkins(item); | |
uint secondaryHash = this._getHashSecondary(item); | |
for (int i = 1; i <= this._hashFunctionCount; i++) | |
{ | |
uint hash = this.ComputeHash(primaryHash, secondaryHash, i); | |
this._hashBits.Add((int)hash); | |
} | |
} | |
public bool Contains(int item) | |
{ | |
uint primaryHash = HashFunctions.HashInt32Jenkins(item); | |
uint secondaryHash = this._getHashSecondary(item); | |
for (int i = 1; i <= this._hashFunctionCount; i++) | |
{ | |
uint hash = this.ComputeHash(primaryHash, secondaryHash, i); | |
if (this._hashBits.Contains((int)hash) == false) | |
{ | |
return false; | |
} | |
} | |
return true; | |
} | |
public static int BestK(int capacity, float errorRate) | |
{ | |
return (int)Math.Round(Math.Log(2.0) * BestM(capacity, errorRate) / capacity); | |
} | |
public static int BestM(int capacity, float errorRate) | |
{ | |
return (int)Math.Ceiling(capacity * Math.Log(errorRate, (1.0 / Math.Pow(2, Math.Log(2.0))))); | |
} | |
public static double ErrorRate(int capacity, int m, int k) | |
{ | |
var x = 1 - Math.Pow(Math.E, -k * (double)capacity / (double)m); | |
return Math.Pow(x, k); | |
} | |
public static float BestErrorRate(int capacity, int m) | |
{ | |
float c = (float)(1.0 / capacity); | |
if (c != 0) | |
{ | |
return c; | |
} | |
// default | |
// http://www.cs.princeton.edu/courses/archive/spring02/cs493/lec7.pdf | |
return (float)Math.Pow(0.6185, m / capacity); | |
} | |
/// <summary> | |
/// Performs Dillinger and Manolios double hashing. | |
/// </summary> | |
/// <param name="primaryHash"> The primary hash. </param> | |
/// <param name="secondaryHash"> The secondary hash. </param> | |
/// <param name="i"> The i. </param> | |
/// <returns> The <see cref="int"/>. </returns> | |
private uint ComputeHash(uint primaryHash, uint secondaryHash, int i) | |
{ | |
var c = (primaryHash + ((uint)i * secondaryHash)); | |
uint resultingHash = (uint)(c % (this._hashBits.Size)); | |
return resultingHash; | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Text; | |
namespace ProbabilisticDataStructures.DataStructures | |
{ | |
public static class HashFunctions | |
{ | |
/// <summary> | |
/// Hashes a 32-bit signed int using Thomas Wang's method v3.1 (http://www.concentric.net/~Ttwang/tech/inthash.htm). | |
/// Runtime is suggested to be 11 cycles. | |
/// </summary> | |
/// <param name="input">The integer to hash.</param> | |
/// <returns>The hashed result.</returns> | |
public static uint HashInt32Shift(int input) | |
{ | |
uint x = (uint)input; | |
unchecked | |
{ | |
x = ~x + (x << 15); // x = (x << 15) - x- 1, as (~x) + y is equivalent to y - x - 1 in two's complement representation | |
x = x ^ (x >> 12); | |
x = x + (x << 2); | |
x = x ^ (x >> 4); | |
x = x * 2057; // x = (x + (x << 3)) + (x<< 11); | |
x = x ^ (x >> 16); | |
return x; | |
} | |
} | |
public static uint HashInt32Jenkins(int input) | |
{ | |
uint a = (uint)input; | |
unchecked | |
{ | |
a = (a + 0x7ed55d16) + (a << 12); | |
a = (a ^ 0xc761c23c) ^ (a >> 19); | |
a = (a + 0x165667b1) + (a << 5); | |
a = (a + 0xd3a2646c) ^ (a << 9); | |
a = (a + 0xfd7046c5) + (a << 3); | |
a = (a ^ 0xb55a4f09) ^ (a >> 16); | |
return a; | |
} | |
} | |
public static int HashInt32FNV1a(int input) | |
{ | |
uint x = (uint)input; | |
unchecked | |
{ | |
uint fnvPrime = 16777619; | |
uint hash = 2166136261; | |
hash = (x >> 24) ^ hash; | |
hash = (x >> 24) * fnvPrime; | |
hash = (x >> 16) ^ hash; | |
hash = (x >> 16) * fnvPrime; | |
hash = (x >> 8) ^ hash; | |
hash = (x >> 8) * fnvPrime; | |
hash = (x) ^ hash; | |
hash = (x) * fnvPrime; | |
return (int)hash; | |
} | |
} | |
/// <summary> | |
/// Hashes a string using Bob Jenkin's "One At A Time" method from Dr. Dobbs (http://burtleburtle.net/bob/hash/doobs.html). | |
/// Runtime is suggested to be 9x+9, where x = input.Length. | |
/// </summary> | |
/// <param name="input">The string to hash.</param> | |
/// <returns>The hashed result.</returns> | |
public static int HashString(string input) | |
{ | |
string s = input as string; | |
int hash = 0; | |
for (int i = 0; i < s.Length; i++) | |
{ | |
hash += s[i]; | |
hash += (hash << 10); | |
hash ^= (hash >> 6); | |
} | |
hash += (hash << 3); | |
hash ^= (hash >> 11); | |
hash += (hash << 15); | |
return hash; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://www.xspdf.com/help/52171480.html
https://www.codeproject.com/Tips/5293506/Hash-Code-Generatrix-et-the-Frequency-Domain