How to Implement a Bloom Filter in Go
Use the hash/maphash package to create a Bloom filter that supports probabilistic set membership with configurable false-positive rates.
package main
import (
"fmt"
"hash/maphash"
"math"
"math/bits"
)
// BloomFilter is a probabilistic data structure for set membership.
type BloomFilter[V any] struct {
hasher maphash.Hasher[V]
seeds []maphash.Seed
bytes []byte
}
// NewComparableBloomFilter creates a Bloom filter for comparable types.
func NewComparableBloomFilter[V comparable](n int, fpProb float64) *BloomFilter[V] {
return NewBloomFilter(maphash.ComparableHasher[V]{}, n, fpProb)
}
// NewBloomFilter creates a Bloom filter with a custom hasher.
func NewBloomFilter[V any](hasher maphash.Hasher[V], n int, fpProb float64) *BloomFilter[V] {
nbytes, nseeds := calibrate(n, fpProb)
seeds := make([]maphash.Seed, nseeds)
for i := range nseeds {
seeds[i] = maphash.MakeSeed()
}
return &BloomFilter[V]{
hasher: hasher,
bytes: make([]byte, nbytes),
seeds: seeds,
}
}
// Insert adds an element to the filter.
func (f *BloomFilter[V]) Insert(v V) {
for _, seed := range f.seeds {
index, bit := f.locate(seed, v)
f.bytes[index] |= bit
}
}
// Contains checks if an element is likely in the filter.
func (f *BloomFilter[V]) Contains(v V) bool {
for _, seed := range f.seeds {
index, bit := f.locate(seed, v)
if f.bytes[index]&bit == 0 {
return false
}
}
return true
}
func (f *BloomFilter[V]) locate(seed maphash.Seed, v V) (uint64, byte) {
var h maphash.Hash
h.SetSeed(seed)
f.hasher.Hash(&h, v)
hash := h.Sum64()
index := reduce(hash, uint64(len(f.bytes)))
mask := byte(1 << (hash % 8))
return index, mask
}
// reduce maps hash to a value in the interval [0, n).
func reduce(hash, n uint64) uint64 {
if n > 1<<32-1 {
hi, _ := bits.Mul64(hash, n)
return hi
}
return hash >> 32 * n >> 32
}
func calibrate(n int, fpProb float64) (bytes, seeds int) {
if n == 0 {
return 1, 1
}
logFpProb := math.Log(fpProb)
m := -(float64(n) * logFpProb) / (math.Ln2 * math.Ln2)
bytes = int(m) / 8
if float64(bytes*8) < m {
bytes++
}
k := -logFpProb / math.Ln2
seeds = max(int(math.Round(k)), 1)
return bytes, seeds
}
func main() {
// Create a filter for 1000 items with 1% false positive rate
f := NewComparableBloomFilter[string](1000, 0.01)
// Insert items
f.Insert("apple")
f.Insert("banana")
// Check membership
if f.Contains("apple") {
fmt.Println("apple is present")
}
if !f.Contains("cherry") {
fmt.Println("cherry is absent")
}
}
Note: The hash/maphash package and NewComparableBloomFilter are part of the upcoming Go standard library (as shown in the source context). Ensure you are using a Go version that includes these APIs or implement the logic manually using maphash.Hash and maphash.Seed if unavailable.