Skip to content

Commit

Permalink
planner: add canonical hasher to take in primitive type directly for …
Browse files Browse the repository at this point in the history
…hashing. (pingcap#55570)

ref pingcap#51664
  • Loading branch information
AilinKid committed Aug 23, 2024
1 parent b02581a commit ebbe53c
Show file tree
Hide file tree
Showing 3 changed files with 330 additions and 1 deletion.
4 changes: 3 additions & 1 deletion pkg/planner/cascades/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go_library(
name = "cascades",
srcs = [
"enforcer_rules.go",
"hash_equaler.go",
"implementation_rules.go",
"optimize.go",
"stringer.go",
Expand Down Expand Up @@ -41,6 +42,7 @@ go_test(
timeout = "short",
srcs = [
"enforcer_rules_test.go",
"hash_equaler_test.go",
"main_test.go",
"optimize_test.go",
"stringer_test.go",
Expand All @@ -49,7 +51,7 @@ go_test(
data = glob(["testdata/**"]),
embed = [":cascades"],
flaky = True,
shard_count = 25,
shard_count = 28,
deps = [
"//pkg/domain",
"//pkg/expression",
Expand Down
178 changes: 178 additions & 0 deletions pkg/planner/cascades/hash_equaler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cascades

import (
"math"
)

const (
// both offset and prime are used to compute the fnv-1a's
// hash value which is more unique efficient than fnv-1.
//
// offset64 is ported from fnv.go from go library.
offset64 = 14695981039346656037

// prime64 is ported from fnv.go from go library.
prime64 = 1099511628211
)

// Hasher is the interface for computing hash values of different types.
type Hasher interface {
HashBool(val bool)
HashInt(val int)
HashInt64(val int64)
HashUint64(val uint64)
HashFloat64(val float64)
HashRune(val rune)
HashString(val string)
HashByte(val byte)
HashBytes(val []byte)
Reset()
Sum64() uint64
}

// Hash64a is the type for the hash value.
type Hash64a uint64

// Hasher is a helper struct that's used for computing **fnv-1a** hash values and tell
// the equivalence on expression/operators. To use, first call the init method, then
// a series of hash methods. The final value is stored in the hash64a field.
type hasher struct {
// hash stores the hash value as it is incrementally computed.
hash64a Hash64a
}

// NewHashEqualer creates a new HashEqualer.
func NewHashEqualer() Hasher {
return &hasher{
hash64a: offset64,
}
}

// Reset resets the Hasher to its initial state, reusing the internal bytes slice.
func (h *hasher) Reset() {
h.hash64a = offset64
}

func (h *hasher) Sum64() uint64 {
return uint64(h.hash64a)
}

// ------------------------------ Hash functions ----------------------------------------
// Previously, expressions' hashcode are computed by encoding meta layer by layer from the
// bottom up. This is not efficient and oom risky because each expression has cached numerous
// hash bytes on their own.
//
// The new hash function is based on the fnv-1a hash algorithm, outputting the uint64 only.
// To avoid the OOM during the hash computation, we use a shared bytes slice to take in primitive
// types from targeted expressions/operators. The bytes slice is reused and reset after each
// usage of them.
//
// The standardized fnv-1a lib only takes in bytes slice as input, so we need to convert every
// primitive type to bytes slice inside Hash function implementation of every expression/operators
// by allocating some temporary slice. This is undesirable, and we just made the Hasher to take in
// primitive type directly.
// ---------------------------------------------------------------------------------------

// HashBool hashes a Boolean value.
func (h *hasher) HashBool(val bool) {
i := 0
if val {
i = 1
}
h.hash64a ^= Hash64a(i)
h.hash64a *= prime64
}

// HashInt hashes an integer value.
func (h *hasher) HashInt(val int) {
h.hash64a ^= Hash64a(val)
h.hash64a *= prime64
}

// HashInt64 hashes an int64 value.
func (h *hasher) HashInt64(val int64) {
h.hash64a ^= Hash64a(val)
h.hash64a *= prime64
}

// HashUint64 hashes a uint64 value.
func (h *hasher) HashUint64(val uint64) {
h.hash64a ^= Hash64a(val)
h.hash64a *= prime64
}

// HashFloat64 hashes a float64 value.
func (h *hasher) HashFloat64(val float64) {
h.hash64a ^= Hash64a(math.Float64bits(val))
h.hash64a *= prime64
}

// HashRune hashes a rune value.
func (h *hasher) HashRune(val rune) {
h.hash64a ^= Hash64a(val)
h.hash64a *= prime64
}

// HashString hashes a string value.
// eg: "我是谁" is with 3 rune inside, each rune of them takes up 3-4 bytes.
func (h *hasher) HashString(val string) {
h.HashInt(len(val))
for _, c := range val {
h.HashRune(c)
}
}

// HashByte hashes a byte value.
// a byte can be treated as a simple rune as well.
func (h *hasher) HashByte(val byte) {
h.HashRune(rune(val))
}

// HashBytes hashes a byte slice value.
func (h *hasher) HashBytes(val []byte) {
h.HashInt(len(val))
for _, c := range val {
h.HashByte(c)
}
}

// ------------------------------ Object Implementation -------------------------------------
// For primitive type, we can directly hash them and compare them. Based on the primitive
// interface call listed here, we can easily implement the hash and equal functions for other
// composed and complex user defined structure or types.
//
// Say we have a structure like this:
// type MyStruct struct {
// a int
// b string
// c OtherStruct
// d Pointer
// }
// so we can implement the hash and equal functions like this:
// func (val *MyStruct) Hash64(h Hasher) {
// h.HashInt(val.a)
// h.HashString(val.b)
// // for c here, it calls for the hash function of OtherStruct implementor.
// c.Hash64(h)
// // for pointer, how it could be hashed is up to the implementor.
// h.HashUint64(uint64(val.d))
// }
//
// func (val1 *MyStruct) Equal(val1 *MyStruct) bool {
// return val1.a == val2.a && val1.b == val2.b && val1.c.Equal(val2.c) && val1.d == val2.d
// }
// ------------------------------------------------------------------------------------------
149 changes: 149 additions & 0 deletions pkg/planner/cascades/hash_equaler_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cascades

import (
"testing"

"github.com/stretchr/testify/require"
)

type TmpStr struct {
str1 string
str2 string
}

func (ts *TmpStr) Hash64(h Hasher) {
h.HashString(ts.str1)
h.HashString(ts.str2)
}

func TestStringLen(t *testing.T) {
hasher1 := NewHashEqualer()
hasher2 := NewHashEqualer()
a := TmpStr{
str1: "abc",
str2: "def",
}
b := TmpStr{
str1: "abcdef",
str2: "",
}
a.Hash64(hasher1)
b.Hash64(hasher2)
require.NotEqual(t, hasher1.Sum64(), hasher2.Sum64())
}

type SX interface {
Hash64(h Hasher)
Equal(SX) bool
}

type SA struct {
a int
b string
}

func (sa *SA) Hash64(h Hasher) {
h.HashInt(sa.a)
h.HashString(sa.b)
}

func (sa *SA) Equal(sx SX) bool {
if sa2, ok := sx.(*SA); ok {
return sa.a == sa2.a && sa.b == sa2.b
}
return false
}

type SB struct {
a int
b string
}

func (sb *SB) Hash64(h Hasher) {
h.HashInt(sb.a)
h.HashString(sb.b)
}

func (sb *SB) Equal(sx SX) bool {
if sb2, ok := sx.(*SB); ok {
return sb.a == sb2.a && sb.b == sb2.b
}
return false
}

func TestStructType(t *testing.T) {
hasher1 := NewHashEqualer()
hasher2 := NewHashEqualer()
a := SA{
a: 1,
b: "abc",
}
b := SB{
a: 1,
b: "abc",
}
a.Hash64(hasher1)
b.Hash64(hasher2)
// As you see from the above, the two structs are different types, but they have the same fields.
// For the Hash64 function, it will hash the fields of the struct, so the hash result should be the same.
// From theoretical point of view, the hash result should NOT be the same because of different types.
//
// While the Equal function is used to compare the two structs, so the result should be false. We don't
// have to hash the golang struct type, because the dynamic runtime type pointer from reflecting is not
// that elegant, we resort to Equal function to compare the two structs completely once two obj has the
// same hash.
require.Equal(t, hasher1.Sum64(), hasher2.Sum64())
require.Equal(t, a.Equal(&b), false)
}

func TestHash64a(t *testing.T) {
hasher1 := NewHashEqualer()
hasher2 := NewHashEqualer()
hasher1.HashBool(true)
hasher2.HashBool(true)
require.Equal(t, hasher1.Sum64(), hasher2.Sum64())
hasher1.HashBool(false)
hasher2.HashBool(false)
require.Equal(t, hasher1.Sum64(), hasher2.Sum64())
hasher1.HashInt(199)
hasher2.HashInt(199)
require.Equal(t, hasher1.Sum64(), hasher2.Sum64())
hasher1.HashInt64(13534523462346)
hasher2.HashInt64(13534523462346)
require.Equal(t, hasher1.Sum64(), hasher2.Sum64())
hasher1.HashUint64(13534523462346)
hasher2.HashUint64(13534523462346)
require.Equal(t, hasher1.Sum64(), hasher2.Sum64())
hasher1.HashString("hello")
hasher2.HashString("hello")
require.Equal(t, hasher1.Sum64(), hasher2.Sum64())
hasher1.HashBytes([]byte("world"))
hasher2.HashBytes([]byte("world"))
require.Equal(t, hasher1.Sum64(), hasher2.Sum64())
hasher1.HashRune('我')
hasher1.HashRune('是')
hasher1.HashRune('谁')
hasher2.HashRune('我')
hasher2.HashRune('是')
hasher2.HashRune('谁')
require.Equal(t, hasher1.Sum64(), hasher2.Sum64())
hasher1.Reset()
hasher2.Reset()
hasher1.HashString("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
hasher2.HashString("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
require.Equal(t, hasher1.Sum64(), hasher2.Sum64())
}

0 comments on commit ebbe53c

Please sign in to comment.