Skip to content

Commit

Permalink
Speed improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
korthaj committed May 16, 2017
1 parent 11abd3e commit afcd687
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 30 deletions.
41 changes: 23 additions & 18 deletions filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,11 @@ type Filter struct {
count int64 // Estimate number of elements
}

// MurmurHash3 function.
var murmur = new(digest)
// MurmurHash3 functions.
var (
murmur = new(digest)
murmurString = new(digestString)
)

// New creates an empty Bloom filter with room for n elements
// at a false-positives rate less than 1/p.
Expand All @@ -73,7 +76,15 @@ func New(n int, p int) *Filter {

// AddByte adds b to the filter and tells if b was already a likely member.
func (f *Filter) AddByte(b []byte) bool {
h1, h2 := murmur.hash(b)
return f.add(murmur.hash(b))
}

// Add adds s to the filter and tells if s was already a likely member.
func (f *Filter) Add(s string) bool {
return f.add(murmurString.hash(s))
}

func (f *Filter) add(h1, h2 uint64) bool {
trunc := uint64(len(f.data))<<shift - 1
member := true
for i := f.lookups; i > 0; i-- {
Expand All @@ -91,16 +102,17 @@ func (f *Filter) AddByte(b []byte) bool {
return member
}

// Add adds s to the filter and tells if s was already a likely member.
func (f *Filter) Add(s string) bool {
b := make([]byte, len(s))
copy(b, s)
return f.AddByte(b)
}

// TestByte tells if b is a likely member of the filter.
func (f *Filter) TestByte(b []byte) bool {
h1, h2 := murmur.hash(b)
return f.test(murmur.hash(b))
}

// Test tells if s is a likely member of the filter.
func (f *Filter) Test(s string) bool {
return f.test(murmurString.hash(s))
}

func (f *Filter) test(h1, h2 uint64) bool {
trunc := uint64(len(f.data))<<shift - 1
for i := f.lookups; i > 0; i-- {
h1 += h2
Expand All @@ -113,13 +125,6 @@ func (f *Filter) TestByte(b []byte) bool {
return true
}

// Test tells if s is a likely member of the filter.
func (f *Filter) Test(s string) bool {
b := make([]byte, len(s))
copy(b, s)
return f.TestByte(b)
}

// Count returns an estimate of the number of elements in the filter.
func (f *Filter) Count() int64 {
return f.count
Expand Down
2 changes: 1 addition & 1 deletion filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func BenchmarkTestByte(b *testing.B) {
}
}

func BenchmarkTestUnion(b *testing.B) {
func BenchmarkUnion(b *testing.B) {
n := 1000
b.StopTimer()
f1 := New(n, 200)
Expand Down
22 changes: 11 additions & 11 deletions hash.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
package bloom

import (
"encoding/binary"
)

// MurmurHash3 implementation adapted from Sébastien Paolacci
// github.com/spaolacci/murmur3, released under BSD-3-Clause.

Expand All @@ -20,20 +16,24 @@ const (
)

type digest struct {
clen int // Digested input cumulative length.
buf [16]byte // Expected (but not required) to be 16 large.
tail []byte // 0 to 15 bytes view of buf.
h1 uint64 // Running hash part 1.
h2 uint64 // Running hash part 2.
clen int
tail []byte
h1 uint64
h2 uint64
}

func Uint64(b []byte) uint64 {
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}

func (d *digest) bmix(p []byte) (tail []byte) {
h1, h2 := d.h1, d.h2
nblocks := len(p) / 16
for i := 0; i < nblocks; i++ {
j := 16 * i
k1 := binary.LittleEndian.Uint64(p[j : j+8])
k2 := binary.LittleEndian.Uint64(p[j+8 : j+16])
k1 := Uint64(p[j : j+8])
k2 := Uint64(p[j+8 : j+16])
k1 *= c1
k1 = (k1 << 31) | (k1 >> 33)
k1 *= c2
Expand Down
117 changes: 117 additions & 0 deletions hash_string.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package bloom

// MurmurHash3 implementation adapted from Sébastien Paolacci
// github.com/spaolacci/murmur3, released under BSD-3-Clause.

func (d *digestString) hash(data string) (h1 uint64, h2 uint64) {
d.h1, d.h2 = 0, 0
d.clen = len(data)
d.tail = d.bmixString(data)
return d.sum()
}

type digestString struct {
clen int
tail string
h1 uint64
h2 uint64
}

func Uint64String(b string) uint64 {
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}

func (d *digestString) bmixString(p string) (tail string) {
h1, h2 := d.h1, d.h2
nblocks := len(p) / 16
for i := 0; i < nblocks; i++ {
j := 16 * i
k1 := Uint64String(p[j : j+8])
k2 := Uint64String(p[j+8 : j+16])
k1 *= c1
k1 = (k1 << 31) | (k1 >> 33)
k1 *= c2
h1 ^= k1
h1 = (h1 << 27) | (h1 >> 37)
h1 += h2
h1 = h1*5 + 0x52dce729
k2 *= c2
k2 = (k2 << 33) | (k2 >> 31)
k2 *= c1
h2 ^= k2
h2 = (h2 << 31) | (h2 >> 33)
h2 += h1
h2 = h2*5 + 0x38495ab5
}
d.h1, d.h2 = h1, h2
return p[nblocks*16:]
}

func (d *digestString) sum() (h1, h2 uint64) {
h1, h2 = d.h1, d.h2
var k1, k2 uint64
switch len(d.tail) & 15 {
case 15:
k2 ^= uint64(d.tail[14]) << 48
fallthrough
case 14:
k2 ^= uint64(d.tail[13]) << 40
fallthrough
case 13:
k2 ^= uint64(d.tail[12]) << 32
fallthrough
case 12:
k2 ^= uint64(d.tail[11]) << 24
fallthrough
case 11:
k2 ^= uint64(d.tail[10]) << 16
fallthrough
case 10:
k2 ^= uint64(d.tail[9]) << 8
fallthrough
case 9:
k2 ^= uint64(d.tail[8]) << 0
k2 *= c2
k2 = (k2 << 33) | (k2 >> 31)
k2 *= c1
h2 ^= k2
fallthrough
case 8:
k1 ^= uint64(d.tail[7]) << 56
fallthrough
case 7:
k1 ^= uint64(d.tail[6]) << 48
fallthrough
case 6:
k1 ^= uint64(d.tail[5]) << 40
fallthrough
case 5:
k1 ^= uint64(d.tail[4]) << 32
fallthrough
case 4:
k1 ^= uint64(d.tail[3]) << 24
fallthrough
case 3:
k1 ^= uint64(d.tail[2]) << 16
fallthrough
case 2:
k1 ^= uint64(d.tail[1]) << 8
fallthrough
case 1:
k1 ^= uint64(d.tail[0]) << 0
k1 *= c1
k1 = (k1 << 31) | (k1 >> 33)
k1 *= c2
h1 ^= k1
}
h1 ^= uint64(d.clen)
h2 ^= uint64(d.clen)
h1 += h2
h2 += h1
h1 = fmix(h1)
h2 = fmix(h2)
h1 += h2
h2 += h1
return h1, h2
}
23 changes: 23 additions & 0 deletions hash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,26 @@ func TestHash(t *testing.T) {
}
}
}

func TestHashString(t *testing.T) {
d := new(digestString)
var data = []struct {
h1, h2 uint64
s string
}{
{0x0000000000000000, 0x0000000000000000, ""},
{0xcbd8a7b341bd9b02, 0x5b1e906a48ae1d19, "hello"},
{0x342fac623a5ebc8e, 0x4cdcbc079642414d, "hello, world"},
{0xb89e5988b737affc, 0x664fc2950231b2cb, "19 Jan 2038 at 3:14:07 AM"},
{0xcd99481f9ee902c9, 0x695da1a38987b6e7, "The quick brown fox jumps over the lazy dog."},
}
for _, x := range data {
h1, h2 := d.hash(x.s)
if h1 != x.h1 {
t.Errorf("hash(%q).h1 = %d; want %d\n", x.s, h1, x.h1)
}
if h2 != x.h2 {
t.Errorf("hash(%q).h2 = %d; want %d\n", x.s, h2, x.h2)
}
}
}

0 comments on commit afcd687

Please sign in to comment.