From bd6d8e411b6f474e8299d50ac2aac8934a5e040e Mon Sep 17 00:00:00 2001 From: Quentin McGaw Date: Mon, 28 Mar 2022 16:46:43 +0200 Subject: [PATCH] feat(trie): finer deep copy of nodes (#2384) - Do not copy cached fields when not needed - Do not copy key field when not needed - Do not copy value field when not needed --- internal/trie/node/copy.go | 102 +++++++++++++++++++++++++------- internal/trie/node/copy_test.go | 61 +++++++++++++++---- internal/trie/node/node.go | 2 +- lib/trie/trie.go | 66 ++++++++++++++------- lib/trie/trie_test.go | 11 ++-- 5 files changed, 182 insertions(+), 60 deletions(-) diff --git a/internal/trie/node/copy.go b/internal/trie/node/copy.go index c69161e5f8..163f6423c6 100644 --- a/internal/trie/node/copy.go +++ b/internal/trie/node/copy.go @@ -3,76 +3,134 @@ package node +var ( + // DefaultCopySettings contains the following copy settings: + // - children are NOT deep copied recursively + // - the HashDigest field is left empty on the copy + // - the Encoding field is left empty on the copy + // - the key field is deep copied + // - the value field is deep copied + DefaultCopySettings = CopySettings{ + CopyKey: true, + CopyValue: true, + } + + // DeepCopySettings returns the following copy settings: + // - children are deep copied recursively + // - the HashDigest field is deep copied + // - the Encoding field is deep copied + // - the key field is deep copied + // - the value field is deep copied + DeepCopySettings = CopySettings{ + CopyChildren: true, + CopyCached: true, + CopyKey: true, + CopyValue: true, + } +) + +// CopySettings contains settings to configure the deep copy +// of a node. +type CopySettings struct { + // CopyChildren can be set to true to recursively deep copy the eventual + // children of the node. This is false by default and should only be used + // in tests since it is quite inefficient. + CopyChildren bool + // CopyCached can be set to true to deep copy the cached digest + // and encoding fields on the current node copied. + // This is false by default because in production, a node is copied + // when it is about to be mutated, hence making its cached fields + // no longer valid. + CopyCached bool + // CopyKey can be set to true to deep copy the key field of + // the node. This is useful when false if the key is about to + // be assigned after the Copy operation, to save a memory operation. + CopyKey bool + // CopyValue can be set to true to deep copy the value field of + // the node. This is useful when false if the value is about to + // be assigned after the Copy operation, to save a memory operation. + CopyValue bool +} + // Copy deep copies the branch. // Setting copyChildren to true will deep copy // children as well. -func (b *Branch) Copy(copyChildren bool) Node { +func (b *Branch) Copy(settings CopySettings) Node { cpy := &Branch{ Dirty: b.Dirty, Generation: b.Generation, } - if copyChildren { + if settings.CopyChildren { + // Copy all fields of children if we deep copy children + childSettings := settings + childSettings.CopyKey = true + childSettings.CopyValue = true + childSettings.CopyCached = true for i, child := range b.Children { if child == nil { continue } - cpy.Children[i] = child.Copy(copyChildren) + cpy.Children[i] = child.Copy(childSettings) } } else { cpy.Children = b.Children // copy interface pointers only } - if b.Key != nil { + if settings.CopyKey && b.Key != nil { cpy.Key = make([]byte, len(b.Key)) copy(cpy.Key, b.Key) } // nil and []byte{} are encoded differently, watch out! - if b.Value != nil { + if settings.CopyValue && b.Value != nil { cpy.Value = make([]byte, len(b.Value)) copy(cpy.Value, b.Value) } - if b.HashDigest != nil { - cpy.HashDigest = make([]byte, len(b.HashDigest)) - copy(cpy.HashDigest, b.HashDigest) - } + if settings.CopyCached { + if b.HashDigest != nil { + cpy.HashDigest = make([]byte, len(b.HashDigest)) + copy(cpy.HashDigest, b.HashDigest) + } - if b.Encoding != nil { - cpy.Encoding = make([]byte, len(b.Encoding)) - copy(cpy.Encoding, b.Encoding) + if b.Encoding != nil { + cpy.Encoding = make([]byte, len(b.Encoding)) + copy(cpy.Encoding, b.Encoding) + } } return cpy } // Copy deep copies the leaf. -func (l *Leaf) Copy(_ bool) Node { +func (l *Leaf) Copy(settings CopySettings) Node { cpy := &Leaf{ Dirty: l.Dirty, Generation: l.Generation, } - if l.Key != nil { + if settings.CopyKey && l.Key != nil { cpy.Key = make([]byte, len(l.Key)) copy(cpy.Key, l.Key) } // nil and []byte{} are encoded differently, watch out! - if l.Value != nil { + if settings.CopyValue && l.Value != nil { cpy.Value = make([]byte, len(l.Value)) copy(cpy.Value, l.Value) } - if l.HashDigest != nil { - cpy.HashDigest = make([]byte, len(l.HashDigest)) - copy(cpy.HashDigest, l.HashDigest) - } + if settings.CopyCached { + if l.HashDigest != nil { + cpy.HashDigest = make([]byte, len(l.HashDigest)) + copy(cpy.HashDigest, l.HashDigest) + } - if l.Encoding != nil { - cpy.Encoding = make([]byte, len(l.Encoding)) - copy(cpy.Encoding, l.Encoding) + if l.Encoding != nil { + cpy.Encoding = make([]byte, len(l.Encoding)) + copy(cpy.Encoding, l.Encoding) + } } return cpy diff --git a/internal/trie/node/copy_test.go b/internal/trie/node/copy_test.go index 8210547c61..215a572af8 100644 --- a/internal/trie/node/copy_test.go +++ b/internal/trie/node/copy_test.go @@ -4,6 +4,7 @@ package node import ( + "reflect" "testing" "github.com/stretchr/testify/assert" @@ -12,8 +13,7 @@ import ( func testForSliceModif(t *testing.T, original, copied []byte) { t.Helper() - require.Equal(t, len(original), len(copied)) - if len(copied) == 0 { + if !reflect.DeepEqual(original, copied) || len(copied) == 0 { // cannot test for modification return } @@ -26,7 +26,7 @@ func Test_Branch_Copy(t *testing.T) { testCases := map[string]struct { branch *Branch - copyChildren bool + settings CopySettings expectedBranch *Branch }{ "empty branch": { @@ -44,15 +44,14 @@ func Test_Branch_Copy(t *testing.T) { HashDigest: []byte{5}, Encoding: []byte{6}, }, + settings: DefaultCopySettings, expectedBranch: &Branch{ Key: []byte{1, 2}, Value: []byte{3, 4}, Children: [16]Node{ nil, nil, &Leaf{Key: []byte{9}}, }, - Dirty: true, - HashDigest: []byte{5}, - Encoding: []byte{6}, + Dirty: true, }, }, "branch with children copied": { @@ -61,13 +60,38 @@ func Test_Branch_Copy(t *testing.T) { nil, nil, &Leaf{Key: []byte{9}}, }, }, - copyChildren: true, + settings: CopySettings{ + CopyChildren: true, + }, expectedBranch: &Branch{ Children: [16]Node{ nil, nil, &Leaf{Key: []byte{9}}, }, }, }, + "deep copy": { + branch: &Branch{ + Key: []byte{1, 2}, + Value: []byte{3, 4}, + Children: [16]Node{ + nil, nil, &Leaf{Key: []byte{9}}, + }, + Dirty: true, + HashDigest: []byte{5}, + Encoding: []byte{6}, + }, + settings: DeepCopySettings, + expectedBranch: &Branch{ + Key: []byte{1, 2}, + Value: []byte{3, 4}, + Children: [16]Node{ + nil, nil, &Leaf{Key: []byte{9}}, + }, + Dirty: true, + HashDigest: []byte{5}, + Encoding: []byte{6}, + }, + }, } for name, testCase := range testCases { @@ -75,7 +99,7 @@ func Test_Branch_Copy(t *testing.T) { t.Run(name, func(t *testing.T) { t.Parallel() - nodeCopy := testCase.branch.Copy(testCase.copyChildren) + nodeCopy := testCase.branch.Copy(testCase.settings) branchCopy, ok := nodeCopy.(*Branch) require.True(t, ok) @@ -97,10 +121,12 @@ func Test_Leaf_Copy(t *testing.T) { testCases := map[string]struct { leaf *Leaf + settings CopySettings expectedLeaf *Leaf }{ "empty leaf": { leaf: &Leaf{}, + settings: DefaultCopySettings, expectedLeaf: &Leaf{}, }, "non empty leaf": { @@ -111,6 +137,22 @@ func Test_Leaf_Copy(t *testing.T) { HashDigest: []byte{5}, Encoding: []byte{6}, }, + settings: DefaultCopySettings, + expectedLeaf: &Leaf{ + Key: []byte{1, 2}, + Value: []byte{3, 4}, + Dirty: true, + }, + }, + "deep copy": { + leaf: &Leaf{ + Key: []byte{1, 2}, + Value: []byte{3, 4}, + Dirty: true, + HashDigest: []byte{5}, + Encoding: []byte{6}, + }, + settings: DeepCopySettings, expectedLeaf: &Leaf{ Key: []byte{1, 2}, Value: []byte{3, 4}, @@ -126,8 +168,7 @@ func Test_Leaf_Copy(t *testing.T) { t.Run(name, func(t *testing.T) { t.Parallel() - const copyChildren = false - nodeCopy := testCase.leaf.Copy(copyChildren) + nodeCopy := testCase.leaf.Copy(testCase.settings) leafCopy, ok := nodeCopy.(*Leaf) require.True(t, ok) diff --git a/internal/trie/node/node.go b/internal/trie/node/node.go index cedab37772..eee1dcc96a 100644 --- a/internal/trie/node/node.go +++ b/internal/trie/node/node.go @@ -20,7 +20,7 @@ type Node interface { GetValue() (value []byte) GetGeneration() (generation uint64) SetGeneration(generation uint64) - Copy(copyChildren bool) Node + Copy(settings CopySettings) (cpy Node) Type() Type StringNode() (stringNode *gotree.Node) } diff --git a/lib/trie/trie.go b/lib/trie/trie.go index 0b81cca93c..e87d73d4c2 100644 --- a/lib/trie/trie.go +++ b/lib/trie/trie.go @@ -46,10 +46,12 @@ func NewTrie(root Node) *Trie { // the set of deleted hashes. func (t *Trie) Snapshot() (newTrie *Trie) { childTries := make(map[common.Hash]*Trie, len(t.childTries)) + rootCopySettings := node.DefaultCopySettings + rootCopySettings.CopyCached = true for rootHash, childTrie := range t.childTries { childTries[rootHash] = &Trie{ generation: childTrie.generation + 1, - root: childTrie.root.Copy(false), + root: childTrie.root.Copy(rootCopySettings), deletedKeys: make(map[common.Hash]struct{}), } } @@ -62,26 +64,28 @@ func (t *Trie) Snapshot() (newTrie *Trie) { } } -func (t *Trie) prepLeafForMutation(currentLeaf *node.Leaf) (newLeaf *node.Leaf) { +func (t *Trie) prepLeafForMutation(currentLeaf *node.Leaf, + copySettings node.CopySettings) (newLeaf *node.Leaf) { if currentLeaf.Generation == t.generation { // no need to deep copy and update generation // of current leaf. newLeaf = currentLeaf } else { - newNode := updateGeneration(currentLeaf, t.generation, t.deletedKeys) + newNode := updateGeneration(currentLeaf, t.generation, t.deletedKeys, copySettings) newLeaf = newNode.(*node.Leaf) } newLeaf.SetDirty(true) return newLeaf } -func (t *Trie) prepBranchForMutation(currentBranch *node.Branch) (newBranch *node.Branch) { +func (t *Trie) prepBranchForMutation(currentBranch *node.Branch, + copySettings node.CopySettings) (newBranch *node.Branch) { if currentBranch.Generation == t.generation { // no need to deep copy and update generation // of current branch. newBranch = currentBranch } else { - newNode := updateGeneration(currentBranch, t.generation, t.deletedKeys) + newNode := updateGeneration(currentBranch, t.generation, t.deletedKeys, copySettings) newBranch = newNode.(*node.Branch) } newBranch.SetDirty(true) @@ -92,9 +96,9 @@ func (t *Trie) prepBranchForMutation(currentBranch *node.Branch) (newBranch *nod // an older trie generation (snapshot) so we deep copy the // node and update the generation on the newer copy. func updateGeneration(currentNode Node, trieGeneration uint64, - deletedHashes map[common.Hash]struct{}) (newNode Node) { - const copyChildren = false - newNode = currentNode.Copy(copyChildren) + deletedHashes map[common.Hash]struct{}, copySettings node.CopySettings) ( + newNode Node) { + newNode = currentNode.Copy(copySettings) newNode.SetGeneration(trieGeneration) // The hash of the node from a previous snapshotted trie @@ -137,8 +141,8 @@ func (t *Trie) DeepCopy() (trieCopy *Trie) { } if t.root != nil { - const copyChildren = true - trieCopy.root = t.root.Copy(copyChildren) + copySettings := node.DeepCopySettings + trieCopy.root = t.root.Copy(copySettings) } return trieCopy @@ -146,8 +150,9 @@ func (t *Trie) DeepCopy() (trieCopy *Trie) { // RootNode returns a copy of the root node of the trie. func (t *Trie) RootNode() Node { - const copyChildren = false - return t.root.Copy(copyChildren) + copySettings := node.DefaultCopySettings + copySettings.CopyCached = true + return t.root.Copy(copySettings) } // encodeRoot writes the encoding of the root node to the buffer. @@ -360,7 +365,9 @@ func (t *Trie) insertInLeaf(parentLeaf *node.Leaf, key, return parentLeaf } - parentLeaf = t.prepLeafForMutation(parentLeaf) + copySettings := node.DefaultCopySettings + copySettings.CopyValue = false + parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) parentLeaf.Value = value return parentLeaf } @@ -381,7 +388,8 @@ func (t *Trie) insertInLeaf(parentLeaf *node.Leaf, key, if len(key) < len(parentLeafKey) { // Move the current leaf parent as a child to the new branch. - parentLeaf = t.prepLeafForMutation(parentLeaf) + copySettings := node.DefaultCopySettings + parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) childIndex := parentLeafKey[commonPrefixLength] parentLeaf.Key = parentLeaf.Key[commonPrefixLength+1:] newBranchParent.Children[childIndex] = parentLeaf @@ -395,7 +403,8 @@ func (t *Trie) insertInLeaf(parentLeaf *node.Leaf, key, newBranchParent.Value = parentLeaf.Value } else { // make the leaf a child of the new branch - parentLeaf = t.prepLeafForMutation(parentLeaf) + copySettings := node.DefaultCopySettings + parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) childIndex := parentLeafKey[commonPrefixLength] parentLeaf.Key = parentLeaf.Key[commonPrefixLength+1:] newBranchParent.Children[childIndex] = parentLeaf @@ -412,7 +421,8 @@ func (t *Trie) insertInLeaf(parentLeaf *node.Leaf, key, } func (t *Trie) insertInBranch(parentBranch *node.Branch, key, value []byte) (newParent Node) { - parentBranch = t.prepBranchForMutation(parentBranch) + copySettings := node.DefaultCopySettings + parentBranch = t.prepBranchForMutation(parentBranch, copySettings) if bytes.Equal(key, parentBranch.Key) { parentBranch.Value = value @@ -715,7 +725,8 @@ func (t *Trie) clearPrefixLimitBranch(branch *node.Branch, prefix []byte, limit return branch, valuesDeleted, allDeleted } - branch = t.prepBranchForMutation(branch) + copySettings := node.DefaultCopySettings + branch = t.prepBranchForMutation(branch, copySettings) branch.Children[childIndex] = child newParent = handleDeletion(branch, prefix) return newParent, valuesDeleted, allDeleted @@ -741,7 +752,8 @@ func (t *Trie) clearPrefixLimitChild(branch *node.Branch, prefix []byte, limit u return branch, valuesDeleted, allDeleted } - branch = t.prepBranchForMutation(branch) + copySettings := node.DefaultCopySettings + branch = t.prepBranchForMutation(branch, copySettings) branch.Children[childIndex] = child newParent = handleDeletion(branch, prefix) @@ -776,7 +788,8 @@ func (t *Trie) deleteNodesLimit(parent Node, prefix []byte, limit uint32) ( continue } - branch = t.prepBranchForMutation(branch) + copySettings := node.DefaultCopySettings + branch = t.prepBranchForMutation(branch, copySettings) branch.Children[i], newDeleted = t.deleteNodesLimit(child, fullKey, limit) if branch.Children[i] == nil { nilChildren++ @@ -842,7 +855,8 @@ func (t *Trie) clearPrefix(parent Node, prefix []byte) ( return parent, false } - branch = t.prepBranchForMutation(branch) + copySettings := node.DefaultCopySettings + branch = t.prepBranchForMutation(branch, copySettings) branch.Children[childIndex] = nil newParent = handleDeletion(branch, prefix) return newParent, true @@ -863,7 +877,8 @@ func (t *Trie) clearPrefix(parent Node, prefix []byte) ( return parent, false } - branch = t.prepBranchForMutation(branch) + copySettings := node.DefaultCopySettings + branch = t.prepBranchForMutation(branch, copySettings) branch.Children[childIndex] = child newParent = handleDeletion(branch, prefix) return newParent, true @@ -902,7 +917,11 @@ func deleteLeaf(parent Node, key []byte) (newParent Node) { func (t *Trie) deleteBranch(branch *node.Branch, key []byte) (newParent Node, deleted bool) { if len(key) == 0 || bytes.Equal(branch.Key, key) { - branch = t.prepBranchForMutation(branch) + copySettings := node.DefaultCopySettings + copySettings.CopyValue = false + branch = t.prepBranchForMutation(branch, copySettings) + // we need to set to nil if the branch has the same generation + // as the current trie. branch.Value = nil return handleDeletion(branch, key), true } @@ -917,7 +936,8 @@ func (t *Trie) deleteBranch(branch *node.Branch, key []byte) (newParent Node, de return branch, false } - branch = t.prepBranchForMutation(branch) + copySettings := node.DefaultCopySettings + branch = t.prepBranchForMutation(branch, copySettings) branch.Children[childIndex] = newChild newParent = handleDeletion(branch, key) return newParent, true diff --git a/lib/trie/trie_test.go b/lib/trie/trie_test.go index 7ee8858c51..1b9a435263 100644 --- a/lib/trie/trie_test.go +++ b/lib/trie/trie_test.go @@ -99,6 +99,7 @@ func Test_Trie_updateGeneration(t *testing.T) { testCases := map[string]struct { trieGeneration uint64 node Node + copySettings node.CopySettings newNode Node copied bool expectedDeletedHashes map[common.Hash]struct{} @@ -109,6 +110,7 @@ func Test_Trie_updateGeneration(t *testing.T) { Generation: 1, Key: []byte{1}, }, + copySettings: node.DefaultCopySettings, newNode: &node.Leaf{ Generation: 2, Key: []byte{1}, @@ -123,10 +125,10 @@ func Test_Trie_updateGeneration(t *testing.T) { Key: []byte{1}, HashDigest: []byte{1, 2, 3}, }, + copySettings: node.DefaultCopySettings, newNode: &node.Leaf{ Generation: 2, Key: []byte{1}, - HashDigest: []byte{1, 2, 3}, }, copied: true, expectedDeletedHashes: map[common.Hash]struct{}{ @@ -147,7 +149,8 @@ func Test_Trie_updateGeneration(t *testing.T) { deletedHashes := make(map[common.Hash]struct{}) - newNode := updateGeneration(testCase.node, testCase.trieGeneration, deletedHashes) + newNode := updateGeneration(testCase.node, testCase.trieGeneration, + deletedHashes, testCase.copySettings) assert.Equal(t, testCase.newNode, newNode) assert.Equal(t, testCase.expectedDeletedHashes, deletedHashes) @@ -2042,10 +2045,10 @@ func Test_retrieve(t *testing.T) { t.Parallel() // Check no mutation was done - const copyChildren = true + copySettings := node.DeepCopySettings var expectedParent Node if testCase.parent != nil { - expectedParent = testCase.parent.Copy(copyChildren) + expectedParent = testCase.parent.Copy(copySettings) } value := retrieve(testCase.parent, testCase.key)