Skip to content

Commit

Permalink
Optimize hash functions
Browse files Browse the repository at this point in the history
  • Loading branch information
korthaj committed May 20, 2017
1 parent a343179 commit fae88d1
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 189 deletions.
14 changes: 4 additions & 10 deletions filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ type Filter struct {
count int64 // Estimate number of elements
}

// MurmurHash3 functions.
var (
murmur = new(digest)
murmurString = new(digestString)
)

// New creates an empty Bloom filter with room for n elements
// at a false-positives rate less than 1/p.
func New(n int, p int) *Filter {
Expand All @@ -76,12 +70,12 @@ func New(n int, p int) *Filter {

// AddByte adds b to the filter and tells if b was already a likely member.
func (f *Filter) AddByte(b []byte) bool {
return f.add(murmur.hash(b))
return f.add(hash(b))
}

// Add adds s to the filter and tells if s was already a likely member.
func (f *Filter) Add(s string) bool {
return f.add(murmurString.hash(s))
return f.add(hashString(s))
}

func (f *Filter) add(h1, h2 uint64) bool {
Expand All @@ -104,12 +98,12 @@ func (f *Filter) add(h1, h2 uint64) bool {

// TestByte tells if b is a likely member of the filter.
func (f *Filter) TestByte(b []byte) bool {
return f.test(murmur.hash(b))
return f.test(hash(b))
}

// Test tells if s is a likely member of the filter.
func (f *Filter) Test(s string) bool {
return f.test(murmurString.hash(s))
return f.test(hashString(s))
}

func (f *Filter) test(h1, h2 uint64) bool {
Expand Down
191 changes: 135 additions & 56 deletions hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,129 +3,208 @@ package bloom
// MurmurHash3 implementation adapted from Sébastien Paolacci
// github.com/spaolacci/murmur3, released under BSD-3-Clause.

func (d *digest) hash(data []byte) (h1 uint64, h2 uint64) {
d.h1, d.h2 = 0, 0
d.clen = len(data)
d.tail = d.bmix(data)
return d.sum()
}

const (
c1 = 0x87c37b91114253d5
c2 = 0x4cf5ad432745937f
)

type digest struct {
clen int
tail []byte
h1 uint64
h2 uint64
func fmix(k uint64) uint64 {
k ^= k >> 33
k *= 0xff51afd7ed558ccd
k ^= k >> 33
k *= 0xc4ceb9fe1a85ec53
k ^= k >> 33
return k
}

func uint64byte(b []byte) uint64 {
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}

func (d *digest) bmix(p []byte) (tail []byte) {
h1, h2 := d.h1, d.h2
nblocks := len(p) / 16
func uint64string(s string) uint64 {
return uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 |
uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56
}

func hash(b []byte) (h1, h2 uint64) {
nblocks := len(b) / 16
for i := 0; i < nblocks; i++ {
j := 16 * i
k1 := uint64byte(p[j : j+8])
k2 := uint64byte(p[j+8 : j+16])
k1, k2 := uint64byte(b[j:j+8]), uint64byte(b[j+8:j+16])

k1 *= c1
k1 = (k1 << 31) | (k1 >> 33)
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
k1 *= c2

h1 ^= k1
h1 = (h1 << 27) | (h1 >> 37)
h1 = (h1 << 27) | (h1 >> 37) // rotl64(h1, 27)
h1 += h2
h1 = h1*5 + 0x52dce729

k2 *= c2
k2 = (k2 << 33) | (k2 >> 31)
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
k2 *= c1

h2 ^= k2
h2 = (h2 << 31) | (h2 >> 33)
h2 = (h2 << 31) | (h2 >> 33) // rotl64(h2, 31)
h2 += h1
h2 = h2*5 + 0x38495ab5
}
d.h1, d.h2 = h1, h2
return p[nblocks*16:]
}

func (d *digest) sum() (h1, h2 uint64) {
h1, h2 = d.h1, d.h2
tail := b[nblocks*16:]
var k1, k2 uint64
switch len(d.tail) & 15 {
switch len(tail) {
case 15:
k2 ^= uint64(d.tail[14]) << 48
k2 ^= uint64(tail[14]) << 48
fallthrough
case 14:
k2 ^= uint64(d.tail[13]) << 40
k2 ^= uint64(tail[13]) << 40
fallthrough
case 13:
k2 ^= uint64(d.tail[12]) << 32
k2 ^= uint64(tail[12]) << 32
fallthrough
case 12:
k2 ^= uint64(d.tail[11]) << 24
k2 ^= uint64(tail[11]) << 24
fallthrough
case 11:
k2 ^= uint64(d.tail[10]) << 16
k2 ^= uint64(tail[10]) << 16
fallthrough
case 10:
k2 ^= uint64(d.tail[9]) << 8
k2 ^= uint64(tail[9]) << 8
fallthrough
case 9:
k2 ^= uint64(d.tail[8]) << 0
k2 ^= uint64(tail[8]) << 0
k2 *= c2
k2 = (k2 << 33) | (k2 >> 31)
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
k2 *= c1
h2 ^= k2
fallthrough
case 8:
k1 ^= uint64(d.tail[7]) << 56
k1 ^= uint64(tail[7]) << 56
fallthrough
case 7:
k1 ^= uint64(d.tail[6]) << 48
k1 ^= uint64(tail[6]) << 48
fallthrough
case 6:
k1 ^= uint64(d.tail[5]) << 40
k1 ^= uint64(tail[5]) << 40
fallthrough
case 5:
k1 ^= uint64(d.tail[4]) << 32
k1 ^= uint64(tail[4]) << 32
fallthrough
case 4:
k1 ^= uint64(d.tail[3]) << 24
k1 ^= uint64(tail[3]) << 24
fallthrough
case 3:
k1 ^= uint64(d.tail[2]) << 16
k1 ^= uint64(tail[2]) << 16
fallthrough
case 2:
k1 ^= uint64(d.tail[1]) << 8
k1 ^= uint64(tail[1]) << 8
fallthrough
case 1:
k1 ^= uint64(d.tail[0]) << 0
k1 ^= uint64(tail[0]) << 0
k1 *= c1
k1 = (k1 << 31) | (k1 >> 33)
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
k1 *= c2
h1 ^= k1
}
h1 ^= uint64(d.clen)
h2 ^= uint64(d.clen)
h1 ^= uint64(len(b))
h2 ^= uint64(len(b))
h1 += h2
h2 += h1
h1 = fmix(h1)
h2 = fmix(h2)
h1, h2 = fmix(h1), fmix(h2)
h1 += h2
h2 += h1
return h1, h2
return
}

func fmix(k uint64) uint64 {
k ^= k >> 33
k *= 0xff51afd7ed558ccd
k ^= k >> 33
k *= 0xc4ceb9fe1a85ec53
k ^= k >> 33
return k
func hashString(s string) (h1, h2 uint64) {
nblocks := len(s) / 16
for i := 0; i < nblocks; i++ {
j := 16 * i
k1, k2 := uint64string(s[j:j+8]), uint64string(s[j+8:j+16])

k1 *= c1
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
k1 *= c2

h1 ^= k1
h1 = (h1 << 27) | (h1 >> 37) // rotl64(h1, 27)
h1 += h2
h1 = h1*5 + 0x52dce729

k2 *= c2
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
k2 *= c1

h2 ^= k2
h2 = (h2 << 31) | (h2 >> 33) // rotl64(h2, 31)
h2 += h1
h2 = h2*5 + 0x38495ab5
}

tail := s[nblocks*16:]
var k1, k2 uint64
switch len(tail) {
case 15:
k2 ^= uint64(tail[14]) << 48
fallthrough
case 14:
k2 ^= uint64(tail[13]) << 40
fallthrough
case 13:
k2 ^= uint64(tail[12]) << 32
fallthrough
case 12:
k2 ^= uint64(tail[11]) << 24
fallthrough
case 11:
k2 ^= uint64(tail[10]) << 16
fallthrough
case 10:
k2 ^= uint64(tail[9]) << 8
fallthrough
case 9:
k2 ^= uint64(tail[8]) << 0
k2 *= c2
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
k2 *= c1
h2 ^= k2
fallthrough
case 8:
k1 ^= uint64(tail[7]) << 56
fallthrough
case 7:
k1 ^= uint64(tail[6]) << 48
fallthrough
case 6:
k1 ^= uint64(tail[5]) << 40
fallthrough
case 5:
k1 ^= uint64(tail[4]) << 32
fallthrough
case 4:
k1 ^= uint64(tail[3]) << 24
fallthrough
case 3:
k1 ^= uint64(tail[2]) << 16
fallthrough
case 2:
k1 ^= uint64(tail[1]) << 8
fallthrough
case 1:
k1 ^= uint64(tail[0]) << 0
k1 *= c1
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
k1 *= c2
h1 ^= k1
}
h1 ^= uint64(len(s))
h2 ^= uint64(len(s))
h1 += h2
h2 += h1
h1, h2 = fmix(h1), fmix(h2)
h1 += h2
h2 += h1
return
}
Loading

0 comments on commit fae88d1

Please sign in to comment.