From 6aee6187430db59151a88a77ed2ab65839c14669 Mon Sep 17 00:00:00 2001 From: Quentin McGaw Date: Mon, 10 Jan 2022 21:38:51 +0100 Subject: [PATCH] chore(lib/trie): use map for `trie.getInsertedNodeHashes` to reduce memory pressure (#2115) --- dot/state/pruner/pruner.go | 39 ++++++++++++++++---------------- dot/state/storage.go | 6 ++--- lib/runtime/storage/trie.go | 12 +++++----- lib/trie/database.go | 45 ++++++++++++++++++++++--------------- lib/trie/trie.go | 10 ++++----- lib/trie/trie_test.go | 2 +- 6 files changed, 63 insertions(+), 51 deletions(-) diff --git a/dot/state/pruner/pruner.go b/dot/state/pruner/pruner.go index 93cea85691..dae614fb5f 100644 --- a/dot/state/pruner/pruner.go +++ b/dot/state/pruner/pruner.go @@ -50,14 +50,16 @@ type Config struct { // Pruner is implemented by FullNode and ArchiveNode. type Pruner interface { - StoreJournalRecord(deleted, inserted []common.Hash, blockHash common.Hash, blockNum int64) error + StoreJournalRecord(deletedHashesSet, insertedHashesSet map[common.Hash]struct{}, + blockHash common.Hash, blockNum int64) error } // ArchiveNode is a no-op since we don't prune nodes in archive mode. type ArchiveNode struct{} // StoreJournalRecord for archive node doesn't do anything. -func (a *ArchiveNode) StoreJournalRecord(deleted, inserted []common.Hash, blockHash common.Hash, blockNum int64) error { +func (a *ArchiveNode) StoreJournalRecord(_, _ map[common.Hash]struct{}, + _ common.Hash, _ int64) error { return nil } @@ -86,9 +88,9 @@ type journalRecord struct { // blockHash of the block corresponding to journal record blockHash common.Hash // Hash of keys that are inserted into state trie of the block - insertedKeys []common.Hash + insertedHashesSet map[common.Hash]struct{} // Hash of keys that are deleted from state trie of the block - deletedKeys []common.Hash + deletedHashesSet map[common.Hash]struct{} } type journalKey struct { @@ -96,11 +98,12 @@ type journalKey struct { blockHash common.Hash } -func newJournalRecord(hash common.Hash, insertedKeys, deletedKeys []common.Hash) *journalRecord { +func newJournalRecord(hash common.Hash, insertedHashesSet, + deletedHashesSet map[common.Hash]struct{}) *journalRecord { return &journalRecord{ - blockHash: hash, - insertedKeys: insertedKeys, - deletedKeys: deletedKeys, + blockHash: hash, + insertedHashesSet: insertedHashesSet, + deletedHashesSet: deletedHashesSet, } } @@ -136,8 +139,9 @@ func NewFullNode(db, storageDB chaindb.Database, retainBlocks int64, l log.Level } // StoreJournalRecord stores journal record into DB and add deathRow into deathList -func (p *FullNode) StoreJournalRecord(deleted, inserted []common.Hash, blockHash common.Hash, blockNum int64) error { - jr := newJournalRecord(blockHash, inserted, deleted) +func (p *FullNode) StoreJournalRecord(deletedHashesSet, insertedHashesSet map[common.Hash]struct{}, + blockHash common.Hash, blockNum int64) error { + jr := newJournalRecord(blockHash, insertedHashesSet, deletedHashesSet) key := &journalKey{blockNum, blockHash} err := p.storeJournal(key, jr) @@ -163,16 +167,13 @@ func (p *FullNode) addDeathRow(jr *journalRecord, blockNum int64) { return } - p.processInsertedKeys(jr.insertedKeys, jr.blockHash) + p.processInsertedKeys(jr.insertedHashesSet, jr.blockHash) // add deleted keys from journal to death index - for _, k := range jr.deletedKeys { + deletedKeys := make(map[common.Hash]int64, len(jr.deletedHashesSet)) + for k := range jr.deletedHashesSet { p.deathIndex[k] = blockNum - } - - deletedKeys := make(map[common.Hash]int64) - for _, data := range jr.deletedKeys { - deletedKeys[data] = blockNum + deletedKeys[k] = blockNum } blockIndex := blockNum - p.pendingNumber @@ -190,8 +191,8 @@ func (p *FullNode) addDeathRow(jr *journalRecord, blockNum int64) { } // Remove re-inserted keys -func (p *FullNode) processInsertedKeys(insKeys []common.Hash, blockHash common.Hash) { - for _, k := range insKeys { +func (p *FullNode) processInsertedKeys(insertedHashesSet map[common.Hash]struct{}, blockHash common.Hash) { + for k := range insertedHashesSet { num, ok := p.deathIndex[k] if !ok { continue diff --git a/dot/state/storage.go b/dot/state/storage.go index 93ced1ba13..5b71fa4592 100644 --- a/dot/state/storage.go +++ b/dot/state/storage.go @@ -107,13 +107,13 @@ func (s *StorageState) StoreTrie(ts *rtstorage.TrieState, header *types.Header) } if header != nil { - insKeys, err := ts.GetInsertedNodeHashes() + insertedNodeHashes, err := ts.GetInsertedNodeHashes() if err != nil { return fmt.Errorf("failed to get state trie inserted keys: block %s %w", header.Hash(), err) } - delKeys := ts.GetDeletedNodeHashes() - err = s.pruner.StoreJournalRecord(delKeys, insKeys, header.Hash(), header.Number.Int64()) + deletedNodeHashes := ts.GetDeletedNodeHashes() + err = s.pruner.StoreJournalRecord(deletedNodeHashes, insertedNodeHashes, header.Hash(), header.Number.Int64()) if err != nil { return err } diff --git a/lib/runtime/storage/trie.go b/lib/runtime/storage/trie.go index bd57efd12c..13dd3024f3 100644 --- a/lib/runtime/storage/trie.go +++ b/lib/runtime/storage/trie.go @@ -274,16 +274,18 @@ func (s *TrieState) LoadCodeHash() (common.Hash, error) { return common.Blake2bHash(code) } -// GetInsertedNodeHashes returns the hash of nodes inserted into state trie since last block produced -func (s *TrieState) GetInsertedNodeHashes() ([]common.Hash, error) { +// GetInsertedNodeHashes returns a set of hashes of all nodes +// that were inserted into state trie since the last block produced. +func (s *TrieState) GetInsertedNodeHashes() (hashesSet map[common.Hash]struct{}, err error) { s.lock.RLock() defer s.lock.RUnlock() return s.t.GetInsertedNodeHashes() } -// GetDeletedNodeHashes returns the hash of nodes that are deleted from state trie since last block produced -func (s *TrieState) GetDeletedNodeHashes() []common.Hash { +// GetDeletedNodeHashes returns the hash of nodes that were deleted +// from the state trie since the last block produced. +func (s *TrieState) GetDeletedNodeHashes() (hashesSet map[common.Hash]struct{}) { s.lock.RLock() defer s.lock.RUnlock() - return s.t.GetDeletedNodeHash() + return s.t.GetDeletedNodeHashes() } diff --git a/lib/trie/database.go b/lib/trie/database.go index 979c346cba..52db68a112 100644 --- a/lib/trie/database.go +++ b/lib/trie/database.go @@ -400,23 +400,29 @@ func (t *Trie) writeDirty(db chaindb.Batch, n Node) error { return nil } -// GetInsertedNodeHashes returns the hashes of all nodes that were -// inserted in the state trie since the last snapshot. +// GetInsertedNodeHashes returns a set of hashes with all +// the hashes of all nodes that were inserted in the state trie +// since the last snapshot. // We need to compute the hash values of each newly inserted node. -func (t *Trie) GetInsertedNodeHashes() (hashes []common.Hash, err error) { - return t.getInsertedNodeHashes(t.root) +func (t *Trie) GetInsertedNodeHashes() (hashesSet map[common.Hash]struct{}, err error) { + hashesSet = make(map[common.Hash]struct{}) + err = t.getInsertedNodeHashes(t.root, hashesSet) + if err != nil { + return nil, err + } + return hashesSet, nil } -func (t *Trie) getInsertedNodeHashes(n Node) (hashes []common.Hash, err error) { +func (t *Trie) getInsertedNodeHashes(n Node, hashes map[common.Hash]struct{}) (err error) { // TODO pass map of hashes or slice as argument to avoid copying // and using more memory. if n == nil || !n.IsDirty() { - return nil, nil + return nil } encoding, hash, err := n.EncodeAndHash() if err != nil { - return nil, fmt.Errorf( + return fmt.Errorf( "cannot encode and hash node with hash 0x%x: %w", n.GetHash(), err) } @@ -425,18 +431,18 @@ func (t *Trie) getInsertedNodeHashes(n Node) (hashes []common.Hash, err error) { // hash root node even if its encoding is under 32 bytes encodingDigest, err := common.Blake2bHash(encoding) if err != nil { - return nil, fmt.Errorf("cannot hash root node encoding: %w", err) + return fmt.Errorf("cannot hash root node encoding: %w", err) } hash = encodingDigest[:] } - hashes = append(hashes, common.BytesToHash(hash)) + hashes[common.BytesToHash(hash)] = struct{}{} switch n.Type() { case node.BranchType, node.BranchWithValueType: default: // not a branch - return hashes, nil + return nil } branch := n.(*node.Branch) @@ -446,20 +452,23 @@ func (t *Trie) getInsertedNodeHashes(n Node) (hashes []common.Hash, err error) { continue } - deeperHashes, err := t.getInsertedNodeHashes(child) + err := t.getInsertedNodeHashes(child, hashes) if err != nil { // Note: do not wrap error since this is called recursively. - return nil, err + return err } - - hashes = append(hashes, deeperHashes...) } - return hashes, nil + return nil } -// GetDeletedNodeHash returns the hash of nodes that were +// GetDeletedNodeHashes returns a set of all the hashes of nodes that were // deleted from the trie since the last snapshot was made. -func (t *Trie) GetDeletedNodeHash() []common.Hash { - return t.deletedKeys +// The returned set is a copy of the internal set to prevent data races. +func (t *Trie) GetDeletedNodeHashes() (hashesSet map[common.Hash]struct{}) { + hashesSet = make(map[common.Hash]struct{}, len(t.deletedKeys)) + for k := range t.deletedKeys { + hashesSet[k] = struct{}{} + } + return hashesSet } diff --git a/lib/trie/trie.go b/lib/trie/trie.go index 65d1bddf53..35cadfe92a 100644 --- a/lib/trie/trie.go +++ b/lib/trie/trie.go @@ -23,7 +23,7 @@ type Trie struct { generation uint64 root Node childTries map[common.Hash]*Trie // Used to store the child tries. - deletedKeys []common.Hash + deletedKeys map[common.Hash]struct{} } // NewEmptyTrie creates a trie with a nil root @@ -37,7 +37,7 @@ func NewTrie(root Node) *Trie { root: root, childTries: make(map[common.Hash]*Trie), generation: 0, // Initially zero but increases after every snapshot. - deletedKeys: make([]common.Hash, 0), + deletedKeys: make(map[common.Hash]struct{}), } } @@ -48,7 +48,7 @@ func (t *Trie) Snapshot() *Trie { children[h] = &Trie{ generation: c.generation + 1, root: c.root, - deletedKeys: make([]common.Hash, 0), + deletedKeys: make(map[common.Hash]struct{}), } } @@ -56,7 +56,7 @@ func (t *Trie) Snapshot() *Trie { generation: t.generation + 1, root: t.root, childTries: children, - deletedKeys: make([]common.Hash, 0), + deletedKeys: make(map[common.Hash]struct{}), } return newTrie @@ -77,7 +77,7 @@ func (t *Trie) maybeUpdateGeneration(n Node) Node { oldNodeHash := n.GetHash() if len(oldNodeHash) > 0 { hash := common.BytesToHash(oldNodeHash) - t.deletedKeys = append(t.deletedKeys, hash) + t.deletedKeys[hash] = struct{}{} } return newNode } diff --git a/lib/trie/trie_test.go b/lib/trie/trie_test.go index cc19116a50..db2c20d37a 100644 --- a/lib/trie/trie_test.go +++ b/lib/trie/trie_test.go @@ -511,7 +511,7 @@ func TestTrieDiff(t *testing.T) { err = newTrie.WriteDirty(storageDB) require.NoError(t, err) - for _, key := range deletedKeys { + for key := range deletedKeys { err = storageDB.Del(key.ToBytes()) require.NoError(t, err) }