How to Implement a Bloom Filter in Go

Use the `hash/maphash` package to create a Bloom filter that supports probabilistic set membership with configurable false-positive rates.

How to Implement a Bloom Filter in Go

Use the hash/maphash package to create a Bloom filter that supports probabilistic set membership with configurable false-positive rates.

package main

import (
	"fmt"
	"hash/maphash"
	"math"
	"math/bits"
)

// BloomFilter is a probabilistic data structure for set membership.
type BloomFilter[V any] struct {
	hasher maphash.Hasher[V]
	seeds  []maphash.Seed
	bytes  []byte
}

// NewComparableBloomFilter creates a Bloom filter for comparable types.
func NewComparableBloomFilter[V comparable](n int, fpProb float64) *BloomFilter[V] {
	return NewBloomFilter(maphash.ComparableHasher[V]{}, n, fpProb)
}

// NewBloomFilter creates a Bloom filter with a custom hasher.
func NewBloomFilter[V any](hasher maphash.Hasher[V], n int, fpProb float64) *BloomFilter[V] {
	nbytes, nseeds := calibrate(n, fpProb)

	seeds := make([]maphash.Seed, nseeds)
	for i := range nseeds {
		seeds[i] = maphash.MakeSeed()
	}

	return &BloomFilter[V]{
		hasher: hasher,
		bytes:  make([]byte, nbytes),
		seeds:  seeds,
	}
}

// Insert adds an element to the filter.
func (f *BloomFilter[V]) Insert(v V) {
	for _, seed := range f.seeds {
		index, bit := f.locate(seed, v)
		f.bytes[index] |= bit
	}
}

// Contains checks if an element is likely in the filter.
func (f *BloomFilter[V]) Contains(v V) bool {
	for _, seed := range f.seeds {
		index, bit := f.locate(seed, v)
		if f.bytes[index]&bit == 0 {
			return false
		}
	}
	return true
}

func (f *BloomFilter[V]) locate(seed maphash.Seed, v V) (uint64, byte) {
	var h maphash.Hash
	h.SetSeed(seed)
	f.hasher.Hash(&h, v)
	hash := h.Sum64()

	index := reduce(hash, uint64(len(f.bytes)))
	mask := byte(1 << (hash % 8))
	return index, mask
}

// reduce maps hash to a value in the interval [0, n).
func reduce(hash, n uint64) uint64 {
	if n > 1<<32-1 {
		hi, _ := bits.Mul64(hash, n)
		return hi
	}
	return hash >> 32 * n >> 32
}

func calibrate(n int, fpProb float64) (bytes, seeds int) {
	if n == 0 {
		return 1, 1
	}

	logFpProb := math.Log(fpProb)
	m := -(float64(n) * logFpProb) / (math.Ln2 * math.Ln2)

	bytes = int(m) / 8
	if float64(bytes*8) < m {
		bytes++
	}

	k := -logFpProb / math.Ln2
	seeds = max(int(math.Round(k)), 1)
	return bytes, seeds
}

func main() {
	// Create a filter for 1000 items with 1% false positive rate
	f := NewComparableBloomFilter[string](1000, 0.01)

	// Insert items
	f.Insert("apple")
	f.Insert("banana")

	// Check membership
	if f.Contains("apple") {
		fmt.Println("apple is present")
	}
	if !f.Contains("cherry") {
		fmt.Println("cherry is absent")
	}
}

Note: The hash/maphash package and NewComparableBloomFilter are part of the upcoming Go standard library (as shown in the source context). Ensure you are using a Go version that includes these APIs or implement the logic manually using maphash.Hash and maphash.Seed if unavailable.