From e36334b8d412a12343cabd827f62b84c5469417b Mon Sep 17 00:00:00 2001 From: Quentin McGaw Date: Thu, 15 Sep 2022 13:36:56 +0000 Subject: [PATCH] fix(lib/trie): prepare trie nodes for mutation only when needed - Only prepare nodes for mutation if mutation is guaranteed - Fixes 'reported deleted hashes' when no mutation is done - Add comment on `deletedKeys` trie struct field --- lib/trie/trie.go | 80 ++++++++++++++++++++++++++++++------------- lib/trie/trie_test.go | 66 ++++++++++++++++++++++++++++++++--- 2 files changed, 119 insertions(+), 27 deletions(-) diff --git a/lib/trie/trie.go b/lib/trie/trie.go index 6c851f76343..443e0aed20d 100644 --- a/lib/trie/trie.go +++ b/lib/trie/trie.go @@ -18,9 +18,11 @@ var EmptyHash, _ = NewEmptyTrie().Hash() // Trie is a base 16 modified Merkle Patricia trie. type Trie struct { - generation uint64 - root *Node - childTries map[common.Hash]*Trie + generation uint64 + root *Node + childTries map[common.Hash]*Trie + // deletedKeys is the database keys (trie node Merkle values) + // deleted since the last trie snapshot. deletedKeys map[common.Hash]struct{} } @@ -319,20 +321,22 @@ func findNextKeyChild(children []*Node, startIndex byte, // key specified in little Endian format. func (t *Trie) Put(keyLE, value []byte) { nibblesKey := codec.KeyLEToNibbles(keyLE) - t.root, _ = t.insert(t.root, nibblesKey, value) + t.root, _, _ = t.insert(t.root, nibblesKey, value) } // insert inserts a value in the trie at the key specified. // It may create one or more new nodes or update an existing node. -func (t *Trie) insert(parent *Node, key, value []byte) (newParent *Node, nodesCreated uint32) { +func (t *Trie) insert(parent *Node, key, value []byte) ( + newParent *Node, mutated bool, nodesCreated uint32) { if parent == nil { const nodesCreated = 1 + const mutated = true return &Node{ Key: key, SubValue: value, Generation: t.generation, Dirty: true, - }, nodesCreated + }, mutated, nodesCreated } // TODO ensure all values have dirty set to true @@ -344,23 +348,26 @@ func (t *Trie) insert(parent *Node, key, value []byte) (newParent *Node, nodesCr } func (t *Trie) insertInLeaf(parentLeaf *Node, key, value []byte) ( - newParent *Node, nodesCreated uint32) { + newParent *Node, mutated bool, nodesCreated uint32) { if bytes.Equal(parentLeaf.Key, key) { nodesCreated = 0 if bytes.Equal(value, parentLeaf.SubValue) { - return parentLeaf, nodesCreated + const mutated = false + return parentLeaf, mutated, nodesCreated } copySettings := node.DefaultCopySettings copySettings.CopyValue = false parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) parentLeaf.SubValue = value - return parentLeaf, nodesCreated + const mutated = true + return parentLeaf, mutated, nodesCreated } commonPrefixLength := lenCommonPrefix(key, parentLeaf.Key) // Convert the current leaf parent into a branch parent + mutated = true newBranchParent := &Node{ Key: key[:commonPrefixLength], Generation: t.generation, @@ -376,15 +383,18 @@ func (t *Trie) insertInLeaf(parentLeaf *Node, key, value []byte) ( if len(key) < len(parentLeafKey) { // Move the current leaf parent as a child to the new branch. copySettings := node.DefaultCopySettings - parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) childIndex := parentLeafKey[commonPrefixLength] - parentLeaf.Key = parentLeaf.Key[commonPrefixLength+1:] + newParentLeafKey := parentLeaf.Key[commonPrefixLength+1:] + if !bytes.Equal(parentLeaf.Key, newParentLeafKey) { + parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) + parentLeaf.Key = newParentLeafKey + } newBranchParent.Children[childIndex] = parentLeaf newBranchParent.Descendants++ nodesCreated++ } - return newBranchParent, nodesCreated + return newBranchParent, mutated, nodesCreated } if len(parentLeaf.Key) == commonPrefixLength { @@ -393,9 +403,12 @@ func (t *Trie) insertInLeaf(parentLeaf *Node, key, value []byte) ( } else { // make the leaf a child of the new branch copySettings := node.DefaultCopySettings - parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) childIndex := parentLeafKey[commonPrefixLength] - parentLeaf.Key = parentLeaf.Key[commonPrefixLength+1:] + newParentLeafKey := parentLeaf.Key[commonPrefixLength+1:] + if !bytes.Equal(parentLeaf.Key, newParentLeafKey) { + parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) + parentLeaf.Key = newParentLeafKey + } newBranchParent.Children[childIndex] = parentLeaf newBranchParent.Descendants++ nodesCreated++ @@ -410,17 +423,22 @@ func (t *Trie) insertInLeaf(parentLeaf *Node, key, value []byte) ( newBranchParent.Descendants++ nodesCreated++ - return newBranchParent, nodesCreated + return newBranchParent, mutated, nodesCreated } func (t *Trie) insertInBranch(parentBranch *Node, key, value []byte) ( - newParent *Node, nodesCreated uint32) { + newParent *Node, mutated bool, nodesCreated uint32) { copySettings := node.DefaultCopySettings - parentBranch = t.prepBranchForMutation(parentBranch, copySettings) if bytes.Equal(key, parentBranch.Key) { + if bytes.Equal(parentBranch.SubValue, value) { + const mutated = false + return parentBranch, mutated, 0 + } + parentBranch = t.prepBranchForMutation(parentBranch, copySettings) parentBranch.SubValue = value - return parentBranch, 0 + const mutated = true + return parentBranch, mutated, 0 } if bytes.HasPrefix(key, parentBranch.Key) { @@ -438,17 +456,27 @@ func (t *Trie) insertInBranch(parentBranch *Node, key, value []byte) ( Dirty: true, } nodesCreated = 1 - } else { - child, nodesCreated = t.insert(child, remainingKey, value) + parentBranch = t.prepBranchForMutation(parentBranch, copySettings) + parentBranch.Children[childIndex] = child + parentBranch.Descendants += nodesCreated + const mutated = true + return parentBranch, mutated, nodesCreated + } + + child, mutated, nodesCreated := t.insert(child, remainingKey, value) + if !mutated { + return parentBranch, mutated, 0 } + parentBranch = t.prepBranchForMutation(parentBranch, copySettings) parentBranch.Children[childIndex] = child parentBranch.Descendants += nodesCreated - return parentBranch, nodesCreated + return parentBranch, mutated, nodesCreated } // we need to branch out at the point where the keys diverge // update partial keys, new branch has key up to matching length + mutated = true nodesCreated = 1 commonPrefixLength := lenCommonPrefix(key, parentBranch.Key) newParentBranch := &Node{ @@ -461,6 +489,8 @@ func (t *Trie) insertInBranch(parentBranch *Node, key, value []byte) ( oldParentIndex := parentBranch.Key[commonPrefixLength] remainingOldParentKey := parentBranch.Key[commonPrefixLength+1:] + // Note: parentBranch.Key != remainingOldParentKey + parentBranch = t.prepBranchForMutation(parentBranch, copySettings) parentBranch.Key = remainingOldParentKey newParentBranch.Children[oldParentIndex] = parentBranch newParentBranch.Descendants += 1 + parentBranch.Descendants @@ -471,12 +501,12 @@ func (t *Trie) insertInBranch(parentBranch *Node, key, value []byte) ( childIndex := key[commonPrefixLength] remainingKey := key[commonPrefixLength+1:] var additionalNodesCreated uint32 - newParentBranch.Children[childIndex], additionalNodesCreated = t.insert(nil, remainingKey, value) + newParentBranch.Children[childIndex], _, additionalNodesCreated = t.insert(nil, remainingKey, value) nodesCreated += additionalNodesCreated newParentBranch.Descendants += additionalNodesCreated } - return newParentBranch, nodesCreated + return newParentBranch, mutated, nodesCreated } // LoadFromMap loads the given data mapping of key to value into the trie. @@ -792,7 +822,11 @@ func (t *Trie) deleteNodesLimit(parent *Node, limit uint32) ( copySettings := node.DefaultCopySettings branch = t.prepBranchForMutation(branch, copySettings) + branch.Children[i], newDeleted, newNodesRemoved = t.deleteNodesLimit(child, limit) + // Note: newDeleted can never be zero here since the limit isn't zero + // and the child is not nil. Therefore it is safe to prepare the branch + // for mutation right before this call. if branch.Children[i] == nil { nilChildren++ } diff --git a/lib/trie/trie_test.go b/lib/trie/trie_test.go index 060ce694518..3e9886c6b83 100644 --- a/lib/trie/trie_test.go +++ b/lib/trie/trie_test.go @@ -1069,6 +1069,7 @@ func Test_Trie_insert(t *testing.T) { key []byte value []byte newNode *Node + mutated bool nodesCreated uint32 }{ "nil parent": { @@ -1083,6 +1084,7 @@ func Test_Trie_insert(t *testing.T) { Generation: 1, Dirty: true, }, + mutated: true, nodesCreated: 1, }, "branch parent": { @@ -1116,6 +1118,7 @@ func Test_Trie_insert(t *testing.T) { {Key: []byte{2}, SubValue: []byte{1}}, }), }, + mutated: true, nodesCreated: 1, }, "override leaf parent": { @@ -1134,6 +1137,7 @@ func Test_Trie_insert(t *testing.T) { Generation: 1, Dirty: true, }, + mutated: true, }, "write same leaf value to leaf parent": { trie: Trie{ @@ -1175,6 +1179,7 @@ func Test_Trie_insert(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 1, }, "write leaf as divergent child next to parent leaf": { @@ -1208,9 +1213,10 @@ func Test_Trie_insert(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 2, }, - "write leaf into nil value leaf": { + "override leaf value": { trie: Trie{ generation: 1, }, @@ -1226,8 +1232,9 @@ func Test_Trie_insert(t *testing.T) { Dirty: true, Generation: 1, }, + mutated: true, }, - "write leaf as child to nil value leaf": { + "write leaf as child to leaf": { trie: Trie{ generation: 1, }, @@ -1253,6 +1260,7 @@ func Test_Trie_insert(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 1, }, } @@ -1265,9 +1273,10 @@ func Test_Trie_insert(t *testing.T) { trie := testCase.trie expectedTrie := *trie.DeepCopy() - newNode, nodesCreated := trie.insert(testCase.parent, testCase.key, testCase.value) + newNode, mutated, nodesCreated := trie.insert(testCase.parent, testCase.key, testCase.value) assert.Equal(t, testCase.newNode, newNode) + assert.Equal(t, testCase.mutated, mutated) assert.Equal(t, testCase.nodesCreated, nodesCreated) assert.Equal(t, expectedTrie, trie) }) @@ -1282,8 +1291,29 @@ func Test_Trie_insertInBranch(t *testing.T) { key []byte value []byte newNode *Node + mutated bool nodesCreated uint32 }{ + "insert existing value to branch": { + parent: &Node{ + Key: []byte{2}, + SubValue: []byte("same"), + Descendants: 1, + Children: padRightChildren([]*Node{ + {Key: []byte{1}, SubValue: []byte{1}}, + }), + }, + key: []byte{2}, + value: []byte("same"), + newNode: &Node{ + Key: []byte{2}, + SubValue: []byte("same"), + Descendants: 1, + Children: padRightChildren([]*Node{ + {Key: []byte{1}, SubValue: []byte{1}}, + }), + }, + }, "update with branch": { parent: &Node{ Key: []byte{2}, @@ -1304,6 +1334,7 @@ func Test_Trie_insertInBranch(t *testing.T) { {Key: []byte{1}, SubValue: []byte{1}}, }), }, + mutated: true, }, "update with leaf": { parent: &Node{ @@ -1325,6 +1356,7 @@ func Test_Trie_insertInBranch(t *testing.T) { {Key: []byte{1}, SubValue: []byte{1}}, }), }, + mutated: true, }, "add leaf as direct child": { parent: &Node{ @@ -1352,8 +1384,29 @@ func Test_Trie_insertInBranch(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 1, }, + "insert same leaf as existing direct child leaf": { + parent: &Node{ + Key: []byte{2}, + SubValue: []byte{5}, + Descendants: 1, + Children: padRightChildren([]*Node{ + {Key: []byte{1}, SubValue: []byte{1}}, + }), + }, + key: []byte{2, 0, 1}, + value: []byte{1}, + newNode: &Node{ + Key: []byte{2}, + SubValue: []byte{5}, + Descendants: 1, + Children: padRightChildren([]*Node{ + {Key: []byte{1}, SubValue: []byte{1}}, + }), + }, + }, "add leaf as nested child": { parent: &Node{ Key: []byte{2}, @@ -1395,6 +1448,7 @@ func Test_Trie_insertInBranch(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 1, }, "split branch for longer key": { @@ -1430,6 +1484,7 @@ func Test_Trie_insertInBranch(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 2, }, "split root branch": { @@ -1465,6 +1520,7 @@ func Test_Trie_insertInBranch(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 2, }, "update with leaf at empty key": { @@ -1496,6 +1552,7 @@ func Test_Trie_insertInBranch(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 1, }, } @@ -1507,9 +1564,10 @@ func Test_Trie_insertInBranch(t *testing.T) { trie := new(Trie) - newNode, nodesCreated := trie.insertInBranch(testCase.parent, testCase.key, testCase.value) + newNode, mutated, nodesCreated := trie.insertInBranch(testCase.parent, testCase.key, testCase.value) assert.Equal(t, testCase.newNode, newNode) + assert.Equal(t, testCase.mutated, mutated) assert.Equal(t, testCase.nodesCreated, nodesCreated) assert.Equal(t, new(Trie), trie) // check no mutation })