diff --git a/internal/primitives/core/hash/hash.go b/internal/primitives/core/hash/hash.go index a7dc1e7f0d..6cd56042b0 100644 --- a/internal/primitives/core/hash/hash.go +++ b/internal/primitives/core/hash/hash.go @@ -22,7 +22,7 @@ func (h256 H256) Bytes() []byte { // String returns string representation of H256 func (h256 H256) String() string { - return fmt.Sprintf("%v", h256.Bytes()) + return fmt.Sprintf("%x", h256.Bytes()) } // Length returns the byte length of H256 diff --git a/pkg/trie/triedb/cache.go b/pkg/trie/triedb/cache.go new file mode 100644 index 0000000000..1289a83c96 --- /dev/null +++ b/pkg/trie/triedb/cache.go @@ -0,0 +1,99 @@ +package triedb + +import "github.com/ChainSafe/gossamer/pkg/trie/triedb/hash" + +type CachedValues[H any] interface { + NonExistingCachedValue[H] | ExistingHashCachedValue[H] | ExistingCachedValue[H] + CachedValue[H] +} + +type CachedValue[H any] interface { + data() []byte + hash() *H +} + +func NewCachedValue[H any, CV CachedValues[H]](cv CV) CachedValue[H] { + return cv +} + +// The value doesn't exist in the trie. +type NonExistingCachedValue[H any] struct{} + +func (NonExistingCachedValue[H]) data() []byte { return nil } +func (NonExistingCachedValue[H]) hash() *H { return nil } + +// We cached the hash, because we did not yet accessed the data. +type ExistingHashCachedValue[H any] struct { + Hash H +} + +func (ExistingHashCachedValue[H]) data() []byte { return nil } +func (ehcv ExistingHashCachedValue[H]) hash() *H { return &ehcv.Hash } + +// The value exists in the trie. +type ExistingCachedValue[H any] struct { + /// The hash of the value. + Hash H + /// The actual data of the value stored as [`BytesWeak`]. + /// + /// The original data [`Bytes`] is stored in the trie node + /// that is also cached by the [`TrieCache`]. If this node is dropped, + /// this data will also not be "upgradeable" anymore. + Data []byte +} + +func (ecv ExistingCachedValue[H]) data() []byte { return ecv.Data } +func (ecv ExistingCachedValue[H]) hash() *H { return &ecv.Hash } + +// A cache that can be used to speed-up certain operations when accessing the trie. +// +// The [`TrieDB`]/[`TrieDBMut`] by default are working with the internal hash-db in a non-owning +// mode. This means that for every lookup in the trie, every node is always fetched and decoded on +// the fly. Fetching and decoding a node always takes some time and can kill the performance of any +// application that is doing quite a lot of trie lookups. To circumvent this performance +// degradation, a cache can be used when looking up something in the trie. Any cache that should be +// used with the [`TrieDB`]/[`TrieDBMut`] needs to implement this trait. +// +// The trait is laying out a two level cache, first the trie nodes cache and then the value cache. +// The trie nodes cache, as the name indicates, is for caching trie nodes as [`NodeOwned`]. These +// trie nodes are referenced by their hash. The value cache is caching [`CachedValue`]'s and these +// are referenced by the key to look them up in the trie. As multiple different tries can have +// different values under the same key, it up to the cache implementation to ensure that the +// correct value is returned. As each trie has a different root, this root can be used to +// differentiate values under the same key. +type TrieCache[H hash.Hash] interface { + /// Lookup value for the given `key`. + /// + /// Returns the `None` if the `key` is unknown or otherwise `Some(_)` with the associated + /// value. + /// + /// [`Self::cache_data_for_key`] is used to make the cache aware of data that is associated + /// to a `key`. + /// + /// # Attention + /// + /// The cache can be used for different tries, aka with different roots. This means + /// that the cache implementation needs to take care of always returning the correct value + /// for the current trie root. + GetValue(key []byte) CachedValue[H] + /// Cache the given `value` for the given `key`. + /// + /// # Attention + /// + /// The cache can be used for different tries, aka with different roots. This means + /// that the cache implementation needs to take care of caching `value` for the current + /// trie root. + SetValue(key []byte, value CachedValue[H]) + + /// Get or insert a [`NodeOwned`]. + /// + /// The cache implementation should look up based on the given `hash` if the node is already + /// known. If the node is not yet known, the given `fetch_node` function can be used to fetch + /// the particular node. + /// + /// Returns the [`NodeOwned`] or an error that happened on fetching the node. + GetOrInsertNode(hash H, fetchNode func() (NodeOwned[H], error)) (NodeOwned[H], error) + + /// Get the [`NodeOwned`] that corresponds to the given `hash`. + GetNode(hash H) NodeOwned[H] +} diff --git a/pkg/trie/triedb/lookup.go b/pkg/trie/triedb/lookup.go index 262a38a113..816e673598 100644 --- a/pkg/trie/triedb/lookup.go +++ b/pkg/trie/triedb/lookup.go @@ -17,6 +17,7 @@ import ( // Description of what kind of query will be made to the trie. type Query[Item any] func(data []byte) Item +// / Trie lookup helper object. type TrieLookup[H hash.Hash, Hasher hash.Hasher[H], QueryItem any] struct { // db to query from db db.DBGetter @@ -32,138 +33,49 @@ type TrieLookup[H hash.Hash, Hasher hash.Hasher[H], QueryItem any] struct { query Query[QueryItem] } +// NewTrieLookup is constructor for [TrieLookup] func NewTrieLookup[H hash.Hash, Hasher hash.Hasher[H], QueryItem any]( db db.DBGetter, hash H, cache TrieCache[H], recorder TrieRecorder, + query Query[QueryItem], ) TrieLookup[H, Hasher, QueryItem] { return TrieLookup[H, Hasher, QueryItem]{ db: db, hash: hash, cache: cache, recorder: recorder, - // TODO: add new TrieCache, add query + query: query, } } -func (l *TrieLookup[H, Hasher, QueryItem]) lookupNode( - nibbleKey nibbles.Nibbles, fullKey []byte, -) (codec.EncodedNode, error) { - // Start from root node and going downwards - partialKey := nibbleKey.Clone() - hash := l.hash - var keyNibbles uint - - // Iterates through non inlined nodes - for { - // Get node from DB - prefixedKey := append(nibbleKey.Mid(keyNibbles).Left().JoinedBytes(), hash.Bytes()...) - nodeData, err := l.db.Get(prefixedKey) - if err != nil { - return nil, ErrIncompleteDB - } - - l.recordAccess(EncodedNodeAccess[H]{Hash: hash, EncodedNode: nodeData}) - - InlinedChildrenIterator: - for { - // Decode node - reader := bytes.NewReader(nodeData) - decodedNode, err := codec.Decode[H](reader) - if err != nil { - return nil, err - } - - var nextNode codec.MerkleValue - - switch n := decodedNode.(type) { - case codec.Empty: - return nil, nil //nolint:nilnil - case codec.Leaf: - // We are in the node we were looking for - if partialKey.Equal(n.PartialKey) { - return n, nil - } - - l.recordAccess(NonExistingNodeAccess{FullKey: fullKey}) - - return nil, nil //nolint:nilnil - case codec.Branch: - nodePartialKey := n.PartialKey - - // This is unusual but could happen if for some reason one - // branch has a hashed child node that points to a node that - // doesn't share the prefix we are expecting - if !partialKey.StartsWith(nodePartialKey) { - l.recordAccess(NonExistingNodeAccess{FullKey: fullKey}) - return nil, nil //nolint:nilnil - } - - // We are in the node we were looking for - if partialKey.Equal(n.PartialKey) { - if n.Value != nil { - return n, nil - } - - l.recordAccess(NonExistingNodeAccess{FullKey: fullKey}) - return nil, nil //nolint:nilnil - } - - // This is not the node we were looking for but it might be in - // one of its children - childIdx := int(partialKey.At(nodePartialKey.Len())) - nextNode = n.Children[childIdx] - if nextNode == nil { - l.recordAccess(NonExistingNodeAccess{FullKey: fullKey}) - return nil, nil //nolint:nilnil - } - - // Advance the partial key consuming the part we already checked - partialKey = partialKey.Mid(nodePartialKey.Len() + 1) - keyNibbles += nodePartialKey.Len() + 1 - } - - // Next node could be inlined or hashed (pointer to a node) - // https://spec.polkadot.network/chap-state#defn-merkle-value - switch merkleValue := nextNode.(type) { - case codec.HashedNode[H]: - // If it's hashed we set the hash to look for it in next loop - hash = merkleValue.Hash - break InlinedChildrenIterator - case codec.InlineNode: - // If it is inlined we just need to decode it in the next loop - nodeData = merkleValue - } - } +func (l *TrieLookup[H, Hasher, QueryItem]) recordAccess(access TrieAccess) { + if l.recorder != nil { + l.recorder.Record(access) } } -func (l *TrieLookup[H, Hasher, QueryItem]) lookupValue( - fullKey []byte, keyNibbles nibbles.Nibbles, -) (value []byte, err error) { - node, err := l.lookupNode(keyNibbles, fullKey) - if err != nil { - return nil, err - } - - // node not found so we return nil - if node == nil { - return nil, nil - } - - if nodeValue := node.GetValue(); nodeValue != nil { - value, err = l.fetchValue(keyNibbles.OriginalDataPrefix(), fullKey, nodeValue) - if err != nil { - return nil, err - } - return value, nil +// / Look up the given `nibble_key`. +// / +// / If the value is found, it will be passed to the given function to decode or copy. +// / +// / The given `full_key` should be the full key to the data that is requested. This will +// / be used when there is a cache to potentially speed up the lookup. +func (l *TrieLookup[H, Hasher, QueryItem]) Lookup(fullKey []byte, nibbleKey nibbles.Nibbles) (*QueryItem, error) { + if l.cache != nil { + return l.lookupWithCache(fullKey, nibbleKey) } - - return nil, nil + return lookupWithoutCache(l, nibbleKey, fullKey, loadValue[H, QueryItem]) } -func (l *TrieLookup[H, Hasher, QueryItem]) lookupValueWithCache(fullKey []byte, keyNibbles nibbles.Nibbles, cache TrieCache[H]) (*QueryItem, error) { +// / Look up the given key. If the value is found, it will be passed to the given +// / function to decode or copy. +// / +// / It uses the given cache to speed-up lookups. +func (l *TrieLookup[H, Hasher, QueryItem]) lookupWithCache( + fullKey []byte, nibbleKey nibbles.Nibbles, +) (*QueryItem, error) { var trieNodesRecorded *RecordedForKey if l.recorder != nil { recorded := l.recorder.TrieNodesRecordedForKey(fullKey) @@ -195,36 +107,39 @@ func (l *TrieLookup[H, Hasher, QueryItem]) lookupValueWithCache(fullKey []byte, } var lookupData = func() ([]byte, error) { - data, err := lookupValueWithCacheInternal[H, Hasher](l, fullKey, keyNibbles, cache, loadValueOwned[H]) + data, err := lookupWithCacheInternal[H, Hasher](l, fullKey, nibbleKey, l.cache, loadValueOwned[H]) if err != nil { return nil, err } - cache.SetValue(fullKey, data.CachedValue()) - return data.Value, nil + l.cache.SetValue(fullKey, data.CachedValue()) + if data != nil { + return data.Value, nil + } + return nil, nil } var res []byte if valueCacheAllowed { - cachedVal := cache.GetValue(fullKey) + cachedVal := l.cache.GetValue(fullKey) switch cachedVal := cachedVal.(type) { - case NonExistingCachedValue: + case NonExistingCachedValue[H]: res = nil case ExistingHashCachedValue[H]: data, err := loadValueOwned[H]( // If we only have the hash cached, this can only be a value node. // For inline nodes we cache them directly as `CachedValue::Existing`. - ValueOwned(ValueOwnedNode[H]{Hash: cachedVal.Hash}), - keyNibbles, // nibble_key.original_data_as_prefix(), + ValueOwned[H](ValueOwnedNode[H]{Hash: cachedVal.Hash}), + nibbleKey.OriginalDataPrefix(), // nibble_key.original_data_as_prefix(), fullKey, - cache, + l.cache, l.db, l.recorder, ) if err != nil { break } - cache.SetValue(fullKey, data.CachedValue()) + l.cache.SetValue(fullKey, data.CachedValue()) res = data.Value case ExistingCachedValue[H]: data := cachedVal.Data @@ -265,39 +180,38 @@ func (l *TrieLookup[H, Hasher, QueryItem]) lookupValueWithCache(fullKey []byte, return nil, nil } -type loadValueFunc[H hash.Hash, R any] func( - v ValueOwned, - prefix nibbles.Nibbles, +type loadValueOwnedFunc[H hash.Hash, R any] func( + v ValueOwned[H], + prefix nibbles.Prefix, fullKey []byte, cache TrieCache[H], db db.DBGetter, recorder TrieRecorder, ) (R, error) -func lookupValueWithCacheInternal[H hash.Hash, Hasher hash.Hasher[H], R, QueryItem any]( +// / When modifying any logic inside this function, you also need to do the same in +// / lookupWithoutCache. +func lookupWithCacheInternal[H hash.Hash, Hasher hash.Hasher[H], R, QueryItem any]( l *TrieLookup[H, Hasher, QueryItem], fullKey []byte, nibbleKey nibbles.Nibbles, cache TrieCache[H], - loadValue loadValueFunc[H, R], + loadValue loadValueOwnedFunc[H, R], ) (*R, error) { partial := nibbleKey hash := l.hash var keyNibbles uint - // TODO: remove this - _ = partial - var depth uint for { - node, err := cache.GetOrInsertNode(hash, func() (NodeOwned, error) { + node, err := cache.GetOrInsertNode(hash, func() (NodeOwned[H], error) { prefixedKey := append(nibbleKey.Mid(keyNibbles).Left().JoinedBytes(), hash.Bytes()...) nodeData, err := l.db.Get(prefixedKey) if err != nil { if depth == 0 { - return nil, fmt.Errorf("invalid state root") + return nil, ErrInvalidStateRoot } else { - return nil, fmt.Errorf("incomplete database") + return nil, ErrIncompleteDB } } reader := bytes.NewReader(nodeData) @@ -321,9 +235,9 @@ func lookupValueWithCacheInternal[H hash.Hash, Hasher hash.Hasher[H], R, QueryIt var nextNode NodeHandleOwned switch node := node.(type) { case NodeOwnedLeaf[H]: - if partial.Equal(node.PartialKey) { + if partial.EqualNibbleSlice(node.PartialKey) { value := node.Value - r, err := loadValue(value, nibbleKey, fullKey, cache, l.db, l.recorder) + r, err := loadValue(value, nibbleKey.OriginalDataPrefix(), fullKey, cache, l.db, l.recorder) if err != nil { return nil, err } @@ -333,26 +247,36 @@ func lookupValueWithCacheInternal[H hash.Hash, Hasher hash.Hasher[H], R, QueryIt return nil, nil } case NodeOwnedBranch[H]: - if partial.Len() == 0 { + if !partial.StartsWithNibbleSlice(node.PartialKey) { + l.recordAccess(NonExistingNodeAccess{fullKey}) + return nil, nil + } + + if partial.Len() == node.PartialKey.Len() { + if node.Value == nil { + l.recordAccess(NonExistingNodeAccess{fullKey}) + return nil, nil + } value := node.Value - r, err := loadValue(value, nibbleKey, fullKey, cache, l.db, l.recorder) + r, err := loadValue(value, nibbleKey.OriginalDataPrefix(), fullKey, cache, l.db, l.recorder) if err != nil { return nil, err } return &r, nil + } + + child := node.Children[partial.At(node.PartialKey.Len())] + if child != nil { + partial = partial.Mid(node.PartialKey.Len() + 1) + keyNibbles += node.PartialKey.Len() + 1 + nextNode = child } else { - child := node.Children[partial.At(0)] - if child != nil { - partial = partial.Mid(1) - keyNibbles += 1 - nextNode = child - } else { - l.recordAccess(NonExistingNodeAccess{fullKey}) - return nil, nil - } + l.recordAccess(NonExistingNodeAccess{fullKey}) + return nil, nil } - case NodeOwnedEmpty: + case NodeOwnedEmpty[H]: l.recordAccess(NonExistingNodeAccess{FullKey: fullKey}) + return nil, nil default: panic("unreachable") } @@ -372,15 +296,139 @@ func lookupValueWithCacheInternal[H hash.Hash, Hasher hash.Hasher[H], R, QueryIt } } +type loadValueFunc[H hash.Hash, QueryItem, R any] func( + v codec.EncodedValue, + prefix nibbles.Prefix, + fullKey []byte, + db db.DBGetter, + recorder TrieRecorder, + query Query[QueryItem], +) (R, error) + +// / Look up the given key. If the value is found, it will be passed to the given +// / function to decode or copy. +// / +// / When modifying any logic inside this function, you also need to do the same in +// / lookupWithCacheInternal. +func lookupWithoutCache[H hash.Hash, Hasher hash.Hasher[H], QueryItem, R any]( + l *TrieLookup[H, Hasher, QueryItem], + nibbleKey nibbles.Nibbles, + fullKey []byte, + loadValue loadValueFunc[H, QueryItem, R], +) (*R, error) { + partial := nibbleKey + hash := l.hash + var keyNibbles uint + + var depth uint + for { + prefixedKey := append(nibbleKey.Mid(keyNibbles).Left().JoinedBytes(), hash.Bytes()...) + nodeData, err := l.db.Get(prefixedKey) + if err != nil { + if depth == 0 { + return nil, ErrInvalidStateRoot + } else { + return nil, fmt.Errorf("incomplete database") + } + } + + l.recordAccess(EncodedNodeAccess[H]{Hash: hash, EncodedNode: nodeData}) + + inlineLoop: + // this loop iterates through all inline children (usually max 1) + // without incrementing the depth. + for { + reader := bytes.NewReader(nodeData) + decoded, err := codec.Decode[H](reader) + if err != nil { + return nil, err + } + + var nextNode codec.MerkleValue + switch decoded := decoded.(type) { + case codec.Leaf: + leaf := decoded + if partial.Equal(leaf.PartialKey) { + r, err := loadValue( + leaf.Value, + nibbleKey.OriginalDataPrefix(), + fullKey, + l.db, + l.recorder, + l.query, + ) + if err != nil { + return nil, err + } + return &r, nil + } + l.recordAccess(NonExistingNodeAccess{FullKey: fullKey}) + return nil, nil + case codec.Branch: + branch := decoded + if !partial.StartsWith(branch.PartialKey) { + l.recordAccess(NonExistingNodeAccess{fullKey}) + return nil, nil + } + + if partial.Len() == branch.PartialKey.Len() { + if branch.Value != nil { + r, err := loadValue( + branch.Value, + nibbleKey.OriginalDataPrefix(), + fullKey, + l.db, + l.recorder, + l.query, + ) + if err != nil { + return nil, err + } + return &r, nil + } + l.recordAccess(NonExistingNodeAccess{fullKey}) + return nil, nil + } + + child := branch.Children[partial.At(branch.PartialKey.Len())] + if child != nil { + partial = partial.Mid(branch.PartialKey.Len() + 1) + keyNibbles += branch.PartialKey.Len() + 1 + nextNode = child + } else { + l.recordAccess(NonExistingNodeAccess{fullKey}) + return nil, nil + } + case codec.Empty: + l.recordAccess(NonExistingNodeAccess{FullKey: fullKey}) + default: + panic("unreachable") + } + + // check if new node data is inline or hash. + switch nextNode := nextNode.(type) { + case codec.HashedNode[H]: + hash = nextNode.Hash + break inlineLoop + case codec.InlineNode: + nodeData = nextNode + default: + panic("unreachable") + } + } + depth++ + } +} + type valueHash[H any] struct { Value []byte Hash H } -func (vh *valueHash[H]) CachedValue() CachedValue { +func (vh *valueHash[H]) CachedValue() CachedValue[H] { // valid case since this is supposed to be optional if vh == nil { - return NonExistingCachedValue{} + return NonExistingCachedValue[H]{} } return ExistingCachedValue[H]{ Hash: vh.Hash, @@ -395,13 +443,13 @@ func (vh *valueHash[H]) CachedValue() CachedValue { // // Returns the bytes representing the value and its hash. func loadValueOwned[H hash.Hash]( - v ValueOwned, - prefix nibbles.Nibbles, + v ValueOwned[H], + prefix nibbles.Prefix, fullKey []byte, cache TrieCache[H], db db.DBGetter, - recorder TrieRecorder) (valueHash[H], error) { - + recorder TrieRecorder, +) (valueHash[H], error) { switch v := v.(type) { case ValueOwnedInline[H]: if recorder != nil { @@ -412,8 +460,8 @@ func loadValueOwned[H hash.Hash]( Hash: v.Hash, }, nil case ValueOwnedNode[H]: - node, err := cache.GetOrInsertNode(v.Hash, func() (NodeOwned, error) { - prefixedKey := append(prefix.Left().JoinedBytes(), v.Hash.Bytes()...) + node, err := cache.GetOrInsertNode(v.Hash, func() (NodeOwned[H], error) { + prefixedKey := append(prefix.JoinedBytes(), v.Hash.Bytes()...) val, err := db.Get(prefixedKey) if err != nil { return nil, err @@ -429,7 +477,8 @@ func loadValueOwned[H hash.Hash]( case NodeOwnedValue[H]: value = node.Value default: - panic("we are caching a `NodeOwnedValue` for a value node hash and this cached node has always data attached") + panic("we are caching a `NodeOwnedValue` for a value node hash and this " + + "cached node has always data attached") } if recorder != nil { @@ -450,33 +499,163 @@ func loadValueOwned[H hash.Hash]( } } -// fetchValue gets the value from the node, if it is inlined we can return it -// directly. But if it is hashed (V1) we have to look up for its value in the DB -func (l *TrieLookup[H, Hasher, QueryItem]) fetchValue( - prefix nibbles.Prefix, fullKey []byte, value codec.EncodedValue, -) ([]byte, error) { - switch v := value.(type) { +// / Load the given value. +// / +// / This will access the `db` if the value is not already in memory, but then it will put it +// / into the given `cache` as `NodeOwned::Value`. +// / +// / Returns the bytes representing the value. +func loadValue[H hash.Hash, QueryItem any]( + v codec.EncodedValue, + prefix nibbles.Prefix, + fullKey []byte, + db db.DBGetter, + recorder TrieRecorder, + query Query[QueryItem], +) (qi QueryItem, err error) { + switch v := v.(type) { case codec.InlineValue: - l.recordAccess(InlineValueAccess{FullKey: fullKey}) - return v, nil + if recorder != nil { + recorder.Record(InlineValueAccess{FullKey: fullKey}) + } + return query(v), nil case codec.HashedValue[H]: - prefixedKey := bytes.Join([][]byte{prefix.JoinedBytes(), v.Hash.Bytes()}, nil) - - nodeData, err := l.db.Get(prefixedKey) + prefixedKey := append(prefix.JoinedBytes(), v.Hash.Bytes()...) + val, err := db.Get(prefixedKey) if err != nil { - return nil, ErrIncompleteDB + return qi, err + } + if val == nil { + return qi, fmt.Errorf("incomplete database for key: %s", prefixedKey) } - l.recordAccess(ValueAccess[H]{Hash: v.Hash, FullKey: fullKey, Value: nodeData}) - - return nodeData, nil + if recorder != nil { + recorder.Record(ValueAccess[H]{ + Hash: v.Hash, + Value: val, + FullKey: fullKey, + }) + } + return query(val), nil default: panic("unreachable") } } -func (l *TrieLookup[H, Hasher, QueryItem]) recordAccess(access TrieAccess) { +// / Look up the value hash for the given `nibble_key`. +// / +// / The given `full_key` should be the full key to the data that is requested. This will +// / be used when there is a cache to potentially speed up the lookup. +func (l *TrieLookup[H, Hasher, QueryItem]) LookupHash(fullKey []byte, nibbleKey nibbles.Nibbles) (*H, error) { + if l.cache != nil { + return l.lookupHashWithCache(fullKey, nibbleKey) + } + return lookupWithoutCache( + l, nibbleKey, fullKey, + func( + v codec.EncodedValue, + _ nibbles.Prefix, + fullKey []byte, + _ db.DBGetter, + recorder TrieRecorder, + _ Query[QueryItem], + ) (H, error) { + switch v := v.(type) { + case codec.InlineValue: + if recorder != nil { + // We can record this as `InlineValue`, even we are just returning + // the `hash`. This is done to prevent requiring to re-record this + // key. + recorder.Record(InlineValueAccess{FullKey: fullKey}) + } + return (*new(Hasher)).Hash(v), nil + case codec.HashedValue[H]: + if recorder != nil { + // We can record this as `InlineValue`, even we are just returning + // the `hash`. This is done to prevent requiring to re-record this + // key. + recorder.Record(InlineValueAccess{FullKey: fullKey}) + } + return v.Hash, nil + default: + panic("unreachable") + } + }, + ) + +} + +// / Look up the value hash for the given key. +// / +// / It uses the given cache to speed-up lookups. +func (l *TrieLookup[H, Hasher, QueryItem]) lookupHashWithCache( + fullKey []byte, + nibbleKey nibbles.Nibbles, +) (*H, error) { + // If there is no recorder, we can always use the value cache. + var valueCacheAllowed bool = true if l.recorder != nil { - l.recorder.Record(access) + // Check if the recorder has the trie nodes already recorded for this key. + valueCacheAllowed = l.recorder.TrieNodesRecordedForKey(fullKey) != RecordedNone + } + + var res *H + if valueCacheAllowed { + val := l.cache.GetValue(fullKey) + if val != nil { + switch val := val.(type) { + case ExistingHashCachedValue[H]: + res = &val.Hash + case ExistingCachedValue[H]: + res = &val.Hash + } + } + } else { + vh, err := lookupWithCacheInternal(l, fullKey, nibbleKey, l.cache, func( + value ValueOwned[H], + _ nibbles.Prefix, + fullKey []byte, + _ TrieCache[H], + _ db.DBGetter, + recorder TrieRecorder, + ) (valueHash[H], error) { + switch value := value.(type) { + case ValueOwnedInline[H]: + if recorder != nil { + // We can record this as `InlineValue`, even we are just returning + // the `hash`. This is done to prevent requiring to re-record this + // key. + recorder.Record(InlineValueAccess{FullKey: fullKey}) + } + return valueHash[H]{ + Value: value.Value, + Hash: value.Hash, + }, nil + case ValueOwnedNode[H]: + if recorder != nil { + recorder.Record(HashAccess{FullKey: fullKey}) + } + return valueHash[H]{ + Hash: value.Hash, + }, nil + default: + panic("unreachable") + } + }) + if err != nil { + return nil, err + } + + if vh != nil { + if vh.Value != nil { + l.cache.SetValue(fullKey, vh.CachedValue()) + } else { + l.cache.SetValue(fullKey, ExistingHashCachedValue[H]{Hash: vh.Hash}) + } + res = &vh.Hash + } else { + l.cache.SetValue(fullKey, NonExistingCachedValue[H]{}) + } } + return res, nil } diff --git a/pkg/trie/triedb/lookup_test.go b/pkg/trie/triedb/lookup_test.go index 181bbb5ad5..200d33d810 100644 --- a/pkg/trie/triedb/lookup_test.go +++ b/pkg/trie/triedb/lookup_test.go @@ -8,22 +8,77 @@ import ( "github.com/ChainSafe/gossamer/internal/primitives/core/hash" "github.com/ChainSafe/gossamer/internal/primitives/runtime" + "github.com/ChainSafe/gossamer/pkg/trie" "github.com/ChainSafe/gossamer/pkg/trie/triedb/nibbles" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestTrieDB_Lookup(t *testing.T) { t.Run("root_not_exists_in_db", func(t *testing.T) { db := newTestDB(t) empty := runtime.BlakeTwo256{}.Hash([]byte{0}) - lookup := NewTrieLookup[hash.H256, runtime.BlakeTwo256, []byte](db, empty, nil, nil) + lookup := NewTrieLookup[hash.H256, runtime.BlakeTwo256, []byte](db, empty, nil, nil, nil) - value, err := lookup.lookupValue([]byte("test"), nibbles.NewNibbles([]byte("test"))) + value, err := lookup.Lookup([]byte("test"), nibbles.NewNibbles([]byte("test"))) assert.Nil(t, value) - assert.ErrorIs(t, err, ErrIncompleteDB) + assert.ErrorIs(t, err, ErrInvalidStateRoot) }) } +type trieCacheImpl struct{} + +func (trieCacheImpl) GetValue(key []byte) CachedValue[hash.H256] { return nil } +func (*trieCacheImpl) SetValue(key []byte, value CachedValue[hash.H256]) {} +func (*trieCacheImpl) GetOrInsertNode(hash hash.H256, fetchNode func() (NodeOwned[hash.H256], error)) (NodeOwned[hash.H256], error) { + return fetchNode() +} +func (*trieCacheImpl) GetNode(hash hash.H256) NodeOwned[hash.H256] { return nil } + +func Test_TrieLookup_lookupValueWithCache(t *testing.T) { + cache := &trieCacheImpl{} + inmemoryDB := NewMemoryDB[hash.H256, runtime.BlakeTwo256](EmptyNode) + trieDB := NewEmptyTrieDB[hash.H256, runtime.BlakeTwo256]( + inmemoryDB, + WithCache[hash.H256, runtime.BlakeTwo256](cache), + ) + trieDB.SetVersion(trie.V1) + + entries := map[string][]byte{ + "no": make([]byte, 1), + "noot": make([]byte, 2), + "not": make([]byte, 3), + "notable": make([]byte, 4), + "notification": make([]byte, 33), + "test": make([]byte, 6), + "dimartiro": make([]byte, 7), + } + + for k, v := range entries { + require.NoError(t, trieDB.Put([]byte(k), v)) + } + + err := trieDB.commit() + require.NoError(t, err) + + lookup := NewTrieLookup[hash.H256, runtime.BlakeTwo256]( + inmemoryDB, + trieDB.rootHash, + cache, + nil, + func(data []byte) []byte { + return data + }, + ) + + for k, v := range entries { + bytes, err := lookup.lookupWithCache([]byte(k), nibbles.NewNibbles([]byte(k))) + require.NoError(t, err) + require.NotNil(t, bytes) + require.Equal(t, []byte(v), *bytes) + } +} + // TODO: restore after implementing node level caching // func Test_valueHash_CachedValue(t *testing.T) { // var vh *valueHash[hash.H256] diff --git a/pkg/trie/triedb/mocks_generate_test.go b/pkg/trie/triedb/mocks_generate_test.go new file mode 100644 index 0000000000..d010418444 --- /dev/null +++ b/pkg/trie/triedb/mocks_generate_test.go @@ -0,0 +1,3 @@ +package triedb + +//go:generate mockgen -destination=mock_trie_cache.go -package $GOPACKAGE . TrieCache[hash.H256] diff --git a/pkg/trie/triedb/nibbles/nibbles.go b/pkg/trie/triedb/nibbles/nibbles.go index bf4a9c90e9..3b740663dc 100644 --- a/pkg/trie/triedb/nibbles/nibbles.go +++ b/pkg/trie/triedb/nibbles/nibbles.go @@ -74,8 +74,7 @@ func PadRight(b uint8) uint8 { return b & PaddingBitmask } -// A trie node prefix, it is the nibble path from the trie root -// to the trie node. +// A trie node prefix, it is the nibble path from the trie root to the trie node. // For a node containing no partial key value it is the full key. // For a value node or node containing a partial key, it is the full key minus its node partial // nibbles (the node key can be split into prefix and node partial). @@ -113,7 +112,8 @@ func (n Nibbles) Len() uint { // Advance the view on the slice by i nibbles. func (n *Nibbles) Advance(i uint) { - if !(n.Len() >= i) { + len := n.Len() + if !(len >= i) { panic("not enough nibbles to advance") } n.offset += i @@ -202,8 +202,7 @@ func (n Nibbles) NodeKey() NodeKey { } // Helper function to create a [NodeKey] for a given number of nibbles. -// Warning this method can be slow (number of nibble does not align the -// original padding). +// Warning this method can be slow (number of nibble does not align the original padding). func (n Nibbles) NodeKeyRange(nb uint) NodeKey { if nb >= n.Len() { return n.NodeKey() @@ -239,8 +238,7 @@ func NumberPadding(i uint) uint { // Representation of a nible slice (right aligned). // It contains a right aligned padded first byte (first pair element is the number of nibbles -// (0 to max nb nibble - 1), second pair element is the padded nibble), and a slice over -// the remaining bytes. +// (0 to max nb nibble - 1), second pair element is the padded nibble), and a slice over the remaining bytes. type Partial struct { First uint8 PaddedNibble uint8 @@ -331,6 +329,36 @@ func (nb Nibbles) Compare(other Nibbles) int { } } +func (n Nibbles) EqualNibbleSlice(other NibbleSlice) bool { + if n.Len() != other.Len() { + return false + } + + for i := uint(0); i < n.Len(); i++ { + if n.At(i) != other.At(i) { + return false + } + } + return true +} + +func (n Nibbles) StartsWithNibbleSlice(other NibbleSlice) bool { + if n.Len() < other.Len() { + return false + } + + if other := other.asNibbles(); other != nil { + return n.StartsWith(*other) + } + + for i := uint(0); i < other.Len(); i++ { + if n.At(i) != other.At(i) { + return false + } + } + return true +} + // Partial node key type: offset and value. // Offset is applied on first byte of array (bytes are right aligned). type NodeKey struct { diff --git a/pkg/trie/triedb/nibbles/nibbleslice.go b/pkg/trie/triedb/nibbles/nibbleslice.go index 1ce034186b..d9811d8260 100644 --- a/pkg/trie/triedb/nibbles/nibbleslice.go +++ b/pkg/trie/triedb/nibbles/nibbleslice.go @@ -52,7 +52,7 @@ func (n *NibbleSlice) Push(nibble uint8) { n.len++ } -// Try to pop a nibble off the NibbleVec. Fails if len == 0. +// Try to pop a nibble off the [NibbleSlice]. Fails if len == 0. func (n *NibbleSlice) Pop() *uint8 { if n.IsEmpty() { return nil @@ -68,6 +68,32 @@ func (n *NibbleSlice) Pop() *uint8 { return &popped } +// / Append another [NibbleSlice]. Can be slow (alignment of second slice). +func (n *NibbleSlice) Append(v NibbleSlice) { + if v.len == 0 { + return + } + + finalLen := n.len + v.len + offset := n.len % NibblesPerByte + finalOffset := finalLen % NibblesPerByte + lastIndex := n.len / NibblesPerByte + if offset > 0 { + n.inner[lastIndex] = PadLeft(n.inner[lastIndex]) | v.inner[0]>>4 + for i := uint(0); i < uint(len(v.inner))-1; i++ { + n.inner = append(n.inner, v.inner[i]<<4|v.inner[i+1]>>4) + } + if finalOffset > 0 { + n.inner = append(n.inner, v.inner[len(v.inner)-1]<<4) + } + } else { + for i := uint(0); i < uint(len(v.inner)); i++ { + n.inner = append(n.inner, v.inner[i]) + } + } + n.len += v.len +} + // Append a [Partial]. Can be slow (alignement of partial). func (n *NibbleSlice) AppendPartial(p Partial) { if p.First == 1 { @@ -159,3 +185,53 @@ func (n NibbleSlice) Clone() NibbleSlice { len: n.len, } } + +func (n NibbleSlice) Len() uint { + return n.len +} + +func (n NibbleSlice) asNibbles() *Nibbles { + if n.len%NibblesPerByte == 0 { + nibbles := NewNibbles(n.inner) + return &nibbles + } + return nil +} + +func (n NibbleSlice) Right() []byte { + requirePadding := n.Len()%NibblesPerByte != 0 + var ix uint + + b := make([]byte, 0) + for { + if requirePadding && ix < uint(len(n.inner)) { + if ix == 0 { + ix++ + b = append(b, n.inner[ix-1]>>4) + } else { + ix++ + b = append(b, n.inner[ix-2]<<4|n.inner[ix-1]>>4) + } + } else if ix < uint(len(n.inner)) { + ix++ + b = append(b, n.inner[ix-1]) + } else { + break + } + } + return b +} + +func (n NibbleSlice) NodeKey() NodeKey { + if nibbles := n.asNibbles(); nibbles != nil { + return nibbles.NodeKey() + } + return NodeKey{ + Offset: 1, + Data: n.Right(), + } +} + +func (n NibbleSlice) Inner() []byte { + return n.inner +} diff --git a/pkg/trie/triedb/node.go b/pkg/trie/triedb/node.go index 75fee86332..2d709c14ad 100644 --- a/pkg/trie/triedb/node.go +++ b/pkg/trie/triedb/node.go @@ -15,13 +15,13 @@ import ( "github.com/ChainSafe/gossamer/pkg/trie/triedb/nibbles" ) -type nodeValue[H hash.Hash] interface { - equal(other nodeValue[H]) bool +type nodeValue interface { + equal(other nodeValue) bool } type ( // inline is an inlined value representation - inline[H hash.Hash] []byte + inline []byte // valueRef is a reference to a value stored in the db valueRef[H hash.Hash] struct { @@ -37,10 +37,10 @@ type ( // newEncodedValue creates an EncodedValue from a nodeValue func newEncodedValue[H hash.Hash]( - value nodeValue[H], partial *nibbles.Nibbles, childF onChildStoreFn, + value nodeValue, partial *nibbles.Nibbles, childF onChildStoreFn, ) (codec.EncodedValue, error) { switch v := value.(type) { - case inline[H]: + case inline: return codec.InlineValue(v), nil case valueRef[H]: return codec.HashedValue[H]{Hash: v.hash}, nil @@ -54,7 +54,6 @@ func newEncodedValue[H hash.Hash]( // Check and get new new value hash switch cr := childRef.(type) { case HashChildReference[H]: - // if bytes.Equal(cr, common.EmptyHash.ToBytes()) { empty := *new(H) if cr.Hash == empty { panic("new external value are always added before encoding a node") @@ -77,9 +76,9 @@ func newEncodedValue[H hash.Hash]( } } -func (n inline[H]) equal(other nodeValue[H]) bool { +func (n inline) equal(other nodeValue) bool { switch otherValue := other.(type) { - case inline[H]: + case inline: return bytes.Equal(n, otherValue) default: return false @@ -87,7 +86,7 @@ func (n inline[H]) equal(other nodeValue[H]) bool { } func (vr valueRef[H]) getHash() H { return vr.hash } -func (vr valueRef[H]) equal(other nodeValue[H]) bool { +func (vr valueRef[H]) equal(other nodeValue) bool { switch otherValue := other.(type) { case valueRef[H]: return vr.hash == otherValue.hash @@ -99,7 +98,7 @@ func (vr valueRef[H]) equal(other nodeValue[H]) bool { func (vr newValueRef[H]) getHash() H { return vr.hash } -func (vr newValueRef[H]) equal(other nodeValue[H]) bool { +func (vr newValueRef[H]) equal(other nodeValue) bool { switch otherValue := other.(type) { case newValueRef[H]: return vr.hash == otherValue.hash @@ -108,7 +107,7 @@ func (vr newValueRef[H]) equal(other nodeValue[H]) bool { } } -func NewValue[H hash.Hash](data []byte, threshold int) nodeValue[H] { +func NewValue[H hash.Hash](data []byte, threshold int) nodeValue { if len(data) >= threshold { return newValueRef[H]{ hash: *new(H), @@ -116,13 +115,13 @@ func NewValue[H hash.Hash](data []byte, threshold int) nodeValue[H] { } } - return inline[H](data) + return inline(data) } -func NewValueFromEncoded[H hash.Hash](encodedValue codec.EncodedValue) nodeValue[H] { +func NewValueFromEncoded[H hash.Hash](encodedValue codec.EncodedValue) nodeValue { switch v := encodedValue.(type) { case codec.InlineValue: - return inline[H](v) + return inline(v) case codec.HashedValue[H]: return valueRef[H]{v.Hash} } @@ -130,9 +129,20 @@ func NewValueFromEncoded[H hash.Hash](encodedValue codec.EncodedValue) nodeValue return nil } -func inMemoryFetchedValue[H hash.Hash](value nodeValue[H], prefix []byte, db db.DBGetter) ([]byte, error) { +func newValueFromValueOwned[H hash.Hash](val ValueOwned[H]) nodeValue { + switch val := val.(type) { + case ValueOwnedInline[H]: + return inline(val.Value) + case ValueOwnedNode[H]: + return valueRef[H]{val.Hash} + default: + panic("unreachable") + } +} + +func inMemoryFetchedValue[H hash.Hash](value nodeValue, prefix []byte, db db.DBGetter) ([]byte, error) { switch v := value.(type) { - case inline[H]: + case inline: return v, nil case newValueRef[H]: return v.data, nil @@ -165,12 +175,12 @@ type ( Empty struct{} Leaf[H hash.Hash] struct { partialKey nodeKey - value nodeValue[H] + value nodeValue } Branch[H hash.Hash] struct { partialKey nodeKey children [codec.ChildrenCapacity]NodeHandle - value nodeValue[H] + value nodeValue } ) @@ -180,7 +190,7 @@ func (n Branch[H]) getPartialKey() *nodeKey { return &n.partialKey } // Create a new node from the encoded data, decoding this data into a codec.Node // and mapping that with this node type -func newNodeFromEncoded[H hash.Hash](nodeHash H, data []byte, storage nodeStorage[H]) (Node, error) { +func newNodeFromEncoded[H hash.Hash](nodeHash H, data []byte, storage *nodeStorage[H]) (Node, error) { reader := bytes.NewReader(data) encodedNode, err := codec.Decode[H](reader) if err != nil { @@ -202,7 +212,7 @@ func newNodeFromEncoded[H hash.Hash](nodeHash H, data []byte, storage nodeStorag child := func(i int) (NodeHandle, error) { if encodedChildren[i] != nil { - newChild, err := newFromEncodedMerkleValue[H](nodeHash, encodedChildren[i], storage) + newChild, err := newNodeHandleFromMerkleValue[H](nodeHash, encodedChildren[i], storage) if err != nil { return nil, err } @@ -226,6 +236,47 @@ func newNodeFromEncoded[H hash.Hash](nodeHash H, data []byte, storage nodeStorag } } +func newNodeFromNodeOwned[H hash.Hash]( + nodeOwned NodeOwned[H], storage *nodeStorage[H], +) Node { + switch nodeOwned := nodeOwned.(type) { + case NodeOwnedEmpty[H]: + return Empty{} + case NodeOwnedLeaf[H]: + leaf := nodeOwned + return Leaf[H]{ + partialKey: leaf.PartialKey.NodeKey(), + value: newValueFromValueOwned[H](leaf.Value), + } + case NodeOwnedBranch[H]: + k := nodeOwned.PartialKey + encodedChildren := nodeOwned.Children + val := nodeOwned.Value + + child := func(i uint) NodeHandle { + if encodedChildren[i] != nil { + newChild := newNodeHandleFromNodeHandleOwned(encodedChildren[i], storage) + return newChild + } + return nil + } + + children := [codec.ChildrenCapacity]NodeHandle{} + for i := uint(0); i < codec.ChildrenCapacity; i++ { + children[i] = child(i) + } + return Branch[H]{ + partialKey: k.NodeKey(), + children: children, + value: newValueFromValueOwned[H](val), + } + case NodeOwnedValue[H]: + panic("NodeOwnedValue can only be returned for the hash of a value") + default: + panic("unreachable") + } +} + type nodeToEncode interface { isNodeToEncode() } diff --git a/pkg/trie/triedb/node_owned.go b/pkg/trie/triedb/node_owned.go index 5f29f3c3a3..4eb847b902 100644 --- a/pkg/trie/triedb/node_owned.go +++ b/pkg/trie/triedb/node_owned.go @@ -8,35 +8,44 @@ import ( "github.com/ChainSafe/gossamer/pkg/trie/triedb/nibbles" ) -type ValueOwnedTypes[H any] interface { +type ValueOwnedTypes[H hash.Hash] interface { ValueOwnedInline[H] | ValueOwnedNode[H] - ValueOwned + ValueOwned[H] } -type ValueOwned interface { - isValueOwned() +type ValueOwned[H any] interface { + // isValueOwned() + data() []byte // nil means there is no data + dataHash() *H + EncodedValue() codec.EncodedValue } type ( // Value bytes as stored in a trie node and its hash. - ValueOwnedInline[H any] struct { + ValueOwnedInline[H hash.Hash] struct { Value []byte Hash H } // Hash byte slice as stored in a trie node. - ValueOwnedNode[H any] struct { + ValueOwnedNode[H hash.Hash] struct { Hash H } ) -func (ValueOwnedInline[H]) isValueOwned() {} -func (ValueOwnedNode[H]) isValueOwned() {} +func (vo ValueOwnedInline[H]) data() []byte { return vo.Value } +func (vo ValueOwnedNode[H]) data() []byte { return nil } +func (vo ValueOwnedInline[H]) dataHash() *H { return &vo.Hash } +func (vo ValueOwnedNode[H]) dataHash() *H { return &vo.Hash } +func (vo ValueOwnedInline[H]) EncodedValue() codec.EncodedValue { return codec.InlineValue(vo.Value) } +func (vo ValueOwnedNode[H]) EncodedValue() codec.EncodedValue { + return codec.HashedValue[H]{Hash: vo.Hash} +} -var ( - _ ValueOwned = ValueOwnedInline[string]{} - _ ValueOwned = ValueOwnedNode[string]{} -) +// var ( +// _ ValueOwned[string] = ValueOwnedInline[string]{} +// _ ValueOwned[string] = ValueOwnedNode[string]{} +// ) -func ValueOwnedFromEncodedValue[H hash.Hash, Hasher hash.Hasher[H]](encVal codec.EncodedValue) ValueOwned { +func ValueOwnedFromEncodedValue[H hash.Hash, Hasher hash.Hasher[H]](encVal codec.EncodedValue) ValueOwned[H] { switch encVal := encVal.(type) { case codec.InlineValue: return ValueOwnedInline[H]{ @@ -45,35 +54,49 @@ func ValueOwnedFromEncodedValue[H hash.Hash, Hasher hash.Hasher[H]](encVal codec } case codec.HashedValue[H]: return ValueOwnedNode[H](encVal) + case nil: + return nil default: panic("unreachable") } } -type NodeHandleOwnedTypes[H any] interface { +type NodeHandleOwnedTypes[H hash.Hash] interface { NodeHandleOwnedHash[H] | NodeHandleOwnedInline[H] } type NodeHandleOwned interface { + ChildReference() ChildReference isNodeHandleOwned() } type ( - NodeHandleOwnedHash[H any] struct { + NodeHandleOwnedHash[H hash.Hash] struct { Hash H } - NodeHandleOwnedInline[H any] struct { - NodeOwned + NodeHandleOwnedInline[H hash.Hash] struct { + NodeOwned[H] } ) func (NodeHandleOwnedHash[H]) isNodeHandleOwned() {} func (NodeHandleOwnedInline[H]) isNodeHandleOwned() {} +func (nho NodeHandleOwnedHash[H]) ChildReference() ChildReference { + return HashChildReference[H]{Hash: nho.Hash} +} +func (nho NodeHandleOwnedInline[H]) ChildReference() ChildReference { + encoded := nho.NodeOwned.encoded() + store := (*new(H)) + if store.Length() > len(encoded) { + panic("Invalid inline node handle") + } + return InlineChildReference(encoded) +} -var ( - _ NodeHandleOwned = NodeHandleOwnedHash[string]{} - _ NodeHandleOwned = NodeHandleOwnedInline[string]{} -) +// var ( +// _ NodeHandleOwned = NodeHandleOwnedHash[string]{} +// _ NodeHandleOwned = NodeHandleOwnedInline[string]{} +// ) func NodeHandleOwnedFromMerkleValue[H hash.Hash, Hasher hash.Hasher[H]](mv codec.MerkleValue) (NodeHandleOwned, error) { switch mv := mv.(type) { @@ -95,28 +118,38 @@ func NodeHandleOwnedFromMerkleValue[H hash.Hash, Hasher hash.Hasher[H]](mv codec } } -type NodeOwnedTypes[H any] interface { - NodeOwnedEmpty | NodeOwnedLeaf[H] | NodeOwnedBranch[H] | NodeOwnedValue[H] - NodeOwned +type NodeOwnedTypes[H hash.Hash] interface { + NodeOwnedEmpty[H] | NodeOwnedLeaf[H] | NodeOwnedBranch[H] | NodeOwnedValue[H] + NodeOwned[H] } -type NodeOwned interface { - isNodeOwned() + +type child[H any] struct { + nibble *uint8 + NodeHandleOwned +} +type NodeOwned[H any] interface { + // isNodeOwned() + data() []byte // nil means there is no data + dataHash() *H + children() []child[H] + partialKey() *nibbles.NibbleSlice + encoded() []byte } type ( // Null trie node; could be an empty root or an empty branch entry. - NodeOwnedEmpty struct{} + NodeOwnedEmpty[H hash.Hash] struct{} // Leaf node; has key slice and value. Value may not be empty. NodeOwnedLeaf[H any] struct { - PartialKey nibbles.Nibbles - Value ValueOwned + PartialKey nibbles.NibbleSlice + Value ValueOwned[H] } // Branch node; has slice of child nodes (each possibly null) // and an optional immediate node data. NodeOwnedBranch[H any] struct { - PartialKey nibbles.Nibbles + PartialKey nibbles.NibbleSlice Children [codec.ChildrenCapacity]NodeHandleOwned // can be nil to represent no child - Value ValueOwned + Value ValueOwned[H] } // Node that represents a value. // @@ -128,25 +161,88 @@ type ( } ) -func (NodeOwnedEmpty) isNodeOwned() {} -func (NodeOwnedLeaf[H]) isNodeOwned() {} -func (NodeOwnedBranch[H]) isNodeOwned() {} -func (NodeOwnedValue[H]) isNodeOwned() {} +func (NodeOwnedEmpty[H]) data() []byte { return nil } +func (no NodeOwnedLeaf[H]) data() []byte { return no.Value.data() } +func (no NodeOwnedBranch[H]) data() []byte { + if no.Value != nil { + return no.Value.data() + } + return nil +} +func (no NodeOwnedValue[H]) data() []byte { return no.Value } -var ( - _ NodeOwned = NodeOwnedEmpty{} - _ NodeOwned = NodeOwnedLeaf[string]{} - _ NodeOwned = NodeOwnedBranch[string]{} - _ NodeOwned = NodeOwnedValue[string]{} -) +func (NodeOwnedEmpty[H]) dataHash() *H { return nil } +func (no NodeOwnedLeaf[H]) dataHash() *H { return no.Value.dataHash() } +func (no NodeOwnedBranch[H]) dataHash() *H { + if no.Value != nil { + return no.Value.dataHash() + } + return nil +} +func (no NodeOwnedValue[H]) dataHash() *H { return &no.Hash } + +func (NodeOwnedEmpty[H]) children() []child[H] { return nil } +func (no NodeOwnedLeaf[H]) children() []child[H] { return nil } +func (no NodeOwnedBranch[H]) children() []child[H] { + r := []child[H]{} + for i, ch := range no.Children { + nibble := uint8(i) + r = append(r, child[H]{ + nibble: &nibble, + NodeHandleOwned: ch, + }) + } + return r +} +func (no NodeOwnedValue[H]) children() []child[H] { return nil } + +func (NodeOwnedEmpty[H]) partialKey() *nibbles.NibbleSlice { return nil } +func (no NodeOwnedLeaf[H]) partialKey() *nibbles.NibbleSlice { return &no.PartialKey } +func (no NodeOwnedBranch[H]) partialKey() *nibbles.NibbleSlice { return &no.PartialKey } +func (no NodeOwnedValue[H]) partialKey() *nibbles.NibbleSlice { return nil } + +func (NodeOwnedEmpty[H]) encoded() []byte { + return []byte{EmptyTrieBytes} +} +func (no NodeOwnedLeaf[H]) encoded() []byte { + encodingBuffer := bytes.NewBuffer(nil) + err := NewEncodedLeaf(no.PartialKey.Right(), no.PartialKey.Len(), no.Value.EncodedValue(), encodingBuffer) + if err != nil { + panic(err) + } + return encodingBuffer.Bytes() +} +func (no NodeOwnedBranch[H]) encoded() []byte { + encodingBuffer := bytes.NewBuffer(nil) + children := [16]ChildReference{} + for i, ch := range no.Children { + if ch == nil { + continue + } + children[i] = ch.ChildReference() + } + err := NewEncodedBranch(no.PartialKey.Right(), no.PartialKey.Len(), children, no.Value.EncodedValue(), encodingBuffer) + if err != nil { + panic(err) + } + return encodingBuffer.Bytes() +} +func (no NodeOwnedValue[H]) encoded() []byte { return no.Value } + +// var ( +// _ NodeOwned[string] = NodeOwnedEmpty[string]{} +// _ NodeOwned[string] = NodeOwnedLeaf[string]{} +// _ NodeOwned[string] = NodeOwnedBranch[string]{} +// _ NodeOwned[string] = NodeOwnedValue[string]{} +// ) -func NodeOwnedFromNode[H hash.Hash, Hasher hash.Hasher[H]](n codec.EncodedNode) (NodeOwned, error) { +func NodeOwnedFromNode[H hash.Hash, Hasher hash.Hasher[H]](n codec.EncodedNode) (NodeOwned[H], error) { switch n := n.(type) { case codec.Empty: - return NodeOwnedEmpty{}, nil + return NodeOwnedEmpty[H]{}, nil case codec.Leaf: return NodeOwnedLeaf[H]{ - PartialKey: n.PartialKey, + PartialKey: nibbles.NewNibbleSliceFromNibbles(n.PartialKey), Value: ValueOwnedFromEncodedValue[H, Hasher](n.Value), }, nil case codec.Branch: @@ -162,7 +258,7 @@ func NodeOwnedFromNode[H hash.Hash, Hasher hash.Hasher[H]](n codec.EncodedNode) } } return NodeOwnedBranch[H]{ - PartialKey: n.PartialKey, + PartialKey: nibbles.NewNibbleSliceFromNibbles(n.PartialKey), Children: childrenOwned, Value: ValueOwnedFromEncodedValue[H, Hasher](n.Value), }, nil diff --git a/pkg/trie/triedb/node_storage.go b/pkg/trie/triedb/node_storage.go index f3faced47d..c82e07d4cf 100644 --- a/pkg/trie/triedb/node_storage.go +++ b/pkg/trie/triedb/node_storage.go @@ -30,10 +30,10 @@ type ( func (inMemory) isNodeHandle() {} func (persisted[H]) isNodeHandle() {} -func newFromEncodedMerkleValue[H hash.Hash]( +func newNodeHandleFromMerkleValue[H hash.Hash]( parentHash H, encodedNodeHandle codec.MerkleValue, - storage nodeStorage[H], + storage *nodeStorage[H], ) (NodeHandle, error) { switch encoded := encodedNodeHandle.(type) { case codec.HashedNode[H]: @@ -49,6 +49,21 @@ func newFromEncodedMerkleValue[H hash.Hash]( } } +func newNodeHandleFromNodeHandleOwned[H hash.Hash]( + child NodeHandleOwned, + storage *nodeStorage[H], +) NodeHandle { + switch child := child.(type) { + case NodeHandleOwnedHash[H]: + return persisted[H]{child.Hash} + case NodeHandleOwnedInline[H]: + ch := newNodeFromNodeOwned(child.NodeOwned, storage) + return inMemory(storage.alloc(NewStoredNode{node: ch})) + default: + panic("unreachable") + } +} + // StoredNode is an enum for temporal nodes stored in the trieDB // these nodes could be either new nodes or cached nodes // New nodes are used to know that we need to add them in our backed db diff --git a/pkg/trie/triedb/recorder.go b/pkg/trie/triedb/recorder.go index c691fadf3c..aa0dbb49cc 100644 --- a/pkg/trie/triedb/recorder.go +++ b/pkg/trie/triedb/recorder.go @@ -14,7 +14,7 @@ type TrieAccess interface { type ( NodeOwnedAccess[H any] struct { Hash H - Node NodeOwned + Node NodeOwned[H] } EncodedNodeAccess[H any] struct { Hash H @@ -134,6 +134,8 @@ func (r *Recorder[H]) Record(access TrieAccess) { switch a := access.(type) { case EncodedNodeAccess[H]: r.nodes = append(r.nodes, Record[H]{Hash: a.Hash, Data: a.EncodedNode}) + case NodeOwnedAccess[H]: + r.nodes = append(r.nodes, Record[H]{Hash: a.Hash, Data: a.Node.encoded()}) case ValueAccess[H]: r.nodes = append(r.nodes, Record[H]{Hash: a.Hash, Data: a.Value}) r.recordedKeys.Set(string(a.FullKey), RecordedValue) @@ -146,6 +148,8 @@ func (r *Recorder[H]) Record(access TrieAccess) { case NonExistingNodeAccess: // We handle the non existing value/hash like having recorded the value r.recordedKeys.Set(string(a.FullKey), RecordedValue) + default: + panic("unreachable") } } @@ -157,7 +161,11 @@ func (r *Recorder[H]) Drain() []Record[H] { } func (r *Recorder[H]) TrieNodesRecordedForKey(key []byte) RecordedForKey { - panic("unimpl") + rfk, ok := r.recordedKeys.Get(string(key)) + if !ok { + return RecordedNone + } + return rfk } var _ TrieRecorder = &Recorder[string]{} diff --git a/pkg/trie/triedb/triedb.go b/pkg/trie/triedb/triedb.go index eae89b2e37..4a34a5950e 100644 --- a/pkg/trie/triedb/triedb.go +++ b/pkg/trie/triedb/triedb.go @@ -19,7 +19,10 @@ import ( "github.com/ChainSafe/gossamer/pkg/trie/triedb/nibbles" ) -var ErrIncompleteDB = errors.New("incomplete database") +var ( + ErrIncompleteDB = errors.New("incomplete database") + ErrInvalidStateRoot = errors.New("invalid state root") +) var ( logger = log.NewFromGlobal(log.AddContext("pkg", "triedb")) @@ -63,7 +66,7 @@ func NewEmptyTrieDB[H hash.Hash, Hasher hash.Hasher[H]]( db db.RWDatabase, opts ...TrieDBOpts[H, Hasher]) *TrieDB[H, Hasher] { hasher := *new(Hasher) root := hasher.Hash([]byte{0}) - return NewTrieDB[H, Hasher](root, db) + return NewTrieDB[H, Hasher](root, db, opts...) } // NewTrieDB creates a new TrieDB using the given root and db @@ -137,26 +140,32 @@ func (t *TrieDB[H, Hasher]) lookup(fullKey []byte, partialKey nibbles.Nibbles, h var partialIdx uint switch node := handle.(type) { case persisted[H]: - lookup := NewTrieLookup[H, Hasher, []byte](t.db, node.hash, t.cache, t.recorder) - val, err := lookup.lookupValue(fullKey, partialKey) + lookup := NewTrieLookup[H, Hasher, []byte]( + t.db, node.hash, t.cache, t.recorder, func(data []byte) []byte { + return data + }) + qi, err := lookup.Lookup(fullKey, partialKey) if err != nil { return nil, err } - return val, nil + if qi == nil { + return nil, nil + } + return *qi, nil case inMemory: switch n := t.storage.get(storageHandle(node)).(type) { case Empty: return nil, nil case Leaf[H]: if nibbles.NewNibblesFromNodeKey(n.partialKey).Equal(partialKey) { - return inMemoryFetchedValue(n.value, prefix, t.db) + return inMemoryFetchedValue[H](n.value, prefix, t.db) } else { return nil, nil } case Branch[H]: slice := nibbles.NewNibblesFromNodeKey(n.partialKey) if slice.Equal(partialKey) { - return inMemoryFetchedValue(n.value, prefix, t.db) + return inMemoryFetchedValue[H](n.value, prefix, t.db) } else if partialKey.StartsWith(slice) { idx := partialKey.At(slice.Len()) child := n.children[idx] @@ -227,7 +236,7 @@ func (t *TrieDB[H, Hasher]) fetchValue(hash H, prefix nibbles.Prefix) ([]byte, e // Remove removes the given key from the trie func (t *TrieDB[H, Hasher]) remove(keyNibbles nibbles.Nibbles) error { - var oldValue nodeValue[H] + var oldValue nodeValue rootHandle := t.rootHandle removeResult, err := t.removeAt(rootHandle, &keyNibbles, &oldValue) @@ -252,7 +261,7 @@ func (t *TrieDB[H, Hasher]) Delete(key []byte) error { // insert inserts the node and update the rootHandle func (t *TrieDB[H, Hasher]) insert(keyNibbles nibbles.Nibbles, value []byte) error { - var oldValue nodeValue[H] + var oldValue nodeValue rootHandle := t.rootHandle newHandle, _, err := t.insertAt(rootHandle, &keyNibbles, value, &oldValue) if err != nil { @@ -274,7 +283,7 @@ func (t *TrieDB[H, Hasher]) insertAt( handle NodeHandle, keyNibbles *nibbles.Nibbles, value []byte, - oldValue *nodeValue[H], + oldValue *nodeValue, ) (strgHandle storageHandle, changed bool, err error) { switch h := handle.(type) { case inMemory: @@ -309,7 +318,7 @@ type RemoveAtResult struct { func (t *TrieDB[H, Hasher]) removeAt( handle NodeHandle, keyNibbles *nibbles.Nibbles, - oldValue *nodeValue[H], + oldValue *nodeValue, ) (*RemoveAtResult, error) { var stored StoredNode switch h := handle.(type) { @@ -396,7 +405,7 @@ func (t *TrieDB[H, Hasher]) inspect( // fix is a helper function to reorganise the nodes after deleting a branch. // For example, if the node we are deleting is the only child for a branch node, we can transform that branch in a leaf -func (t *TrieDB[H, Hasher]) fix(branch Branch[H], key *nibbles.Nibbles) (Node, error) { +func (t *TrieDB[H, Hasher]) fix(branch Branch[H], key nibbles.Nibbles) (Node, error) { usedIndex := make([]byte, 0) for i := 0; i < codec.ChildrenCapacity; i++ { @@ -523,7 +532,7 @@ func combineKey(start nodeKey, end nodeKey) nodeKey { // removeInspector removes the key node from the given node `stored` func (t *TrieDB[H, Hasher]) removeInspector( - stored Node, keyNibbles *nibbles.Nibbles, oldValue *nodeValue[H], + stored Node, keyNibbles *nibbles.Nibbles, oldValue *nodeValue, ) (action, error) { partial := keyNibbles.Clone() @@ -549,7 +558,7 @@ func (t *TrieDB[H, Hasher]) removeInspector( } // The branch contains the value so we delete it t.replaceOldValue(oldValue, n.value, keyNibbles.Left()) - newNode, err := t.fix(Branch[H]{n.partialKey, n.children, nil}, keyNibbles) + newNode, err := t.fix(Branch[H]{n.partialKey, n.children, nil}, *keyNibbles) if err != nil { return nil, err } @@ -567,7 +576,7 @@ func (t *TrieDB[H, Hasher]) removeInspector( keyVal := keyNibbles.Clone() keyVal.Advance(existingLength) t.replaceOldValue(oldValue, n.value, keyVal.Left()) - newNode, err := t.fix(Branch[H]{n.partialKey, n.children, nil}, keyNibbles) + newNode, err := t.fix(Branch[H]{n.partialKey, n.children, nil}, *keyNibbles) return replaceNode{newNode}, err } return restoreNode{Branch[H]{n.partialKey, n.children, nil}}, nil @@ -583,7 +592,7 @@ func (t *TrieDB[H, Hasher]) removeInspector( if child == nil { return restoreNode{n}, nil } - prefix := keyNibbles + prefix := *keyNibbles keyNibbles.Advance(common + 1) removeAtResult, err := t.removeAt(child, keyNibbles, oldValue) @@ -611,7 +620,7 @@ func (t *TrieDB[H, Hasher]) removeInspector( // insertInspector inserts the new key / value pair into the given node `stored` func (t *TrieDB[H, Hasher]) insertInspector( - stored Node, keyNibbles *nibbles.Nibbles, value []byte, oldValue *nodeValue[H], + stored Node, keyNibbles *nibbles.Nibbles, value []byte, oldValue *nodeValue, ) (action, error) { partial := keyNibbles.Clone() @@ -785,8 +794,8 @@ func (t *TrieDB[H, Hasher]) insertInspector( } func (t *TrieDB[H, Hasher]) replaceOldValue( - oldValue *nodeValue[H], - storedValue nodeValue[H], + oldValue *nodeValue, + storedValue nodeValue, prefix nibbles.Prefix, ) { switch oldv := storedValue.(type) { @@ -807,19 +816,41 @@ func (t *TrieDB[H, Hasher]) replaceOldValue( } // lookup node in DB and add it in storage, return storage handle -// TODO: implement cache to improve performance func (t *TrieDB[H, Hasher]) lookupNode(hash H, key nibbles.Prefix) (storageHandle, error) { - prefixedKey := append(key.JoinedBytes(), hash.Bytes()...) - encodedNode, err := t.db.Get(prefixedKey) - if err != nil { - return -1, ErrIncompleteDB - } + var newNode = func() (Node, error) { + prefixedKey := append(key.JoinedBytes(), hash.Bytes()...) + encodedNode, err := t.db.Get(prefixedKey) + if err != nil { + return nil, ErrIncompleteDB + } - t.recordAccess(EncodedNodeAccess[H]{Hash: t.rootHash, EncodedNode: encodedNode}) + t.recordAccess(EncodedNodeAccess[H]{Hash: t.rootHash, EncodedNode: encodedNode}) - node, err := newNodeFromEncoded[H](hash, encodedNode, t.storage) - if err != nil { - return -1, err + return newNodeFromEncoded[H](hash, encodedNode, &t.storage) + } + // We only check the `cache` for a node with `get_node` and don't insert + // the node if it wasn't there, because in substrate we only access the node while computing + // a new trie (aka some branch). We assume that this node isn't that important + // to have it being cached. + var node Node + if t.cache != nil { + nodeOwned := t.cache.GetNode(hash) + if nodeOwned == nil { + var err error + node, err = newNode() + if err != nil { + return -1, err + } + } else { + t.recordAccess(NodeOwnedAccess[H]{Hash: hash, Node: nodeOwned}) + node = newNodeFromNodeOwned(nodeOwned, &t.storage) + } + } else { + var err error + node, err = newNode() + if err != nil { + return -1, err + } } return t.storage.alloc(CachedStoredNode[H]{ @@ -882,6 +913,7 @@ func (t *TrieDB[H, Hasher]) commit() error { if err != nil { return nil, err } + t.cacheValue(k.Inner(), n.value, hash) k.DropLasts(mov) return HashChildReference[H]{hash}, nil case trieNodeToEncode: @@ -909,11 +941,9 @@ func (t *TrieDB[H, Hasher]) commit() error { } t.rootHash = hash + t.cacheNode(hash, encodedNode, fullKey) t.rootHandle = persisted[H]{t.rootHash} - // TODO: use fullKey when caching these nodes - _ = fullKey - // Flush all db changes return dbBatch.Flush() case CachedStoredNode[H]: @@ -943,6 +973,7 @@ func (t *TrieDB[H, Hasher]) commitChild( case CachedStoredNode[H]: return HashChildReference[H]{storedNode.hash}, nil case NewStoredNode: + // Reconstructs the full key var fullKey *nibbles.NibbleSlice prefix := prefixKey.Clone() if partial := stored.getNode().getPartialKey(); partial != nil { @@ -950,8 +981,6 @@ func (t *TrieDB[H, Hasher]) commitChild( prefix.AppendPartial(fk.RightPartial()) } fullKey = &prefix - // TODO: caching uses fullKey - _ = fullKey // We have to store the node in the DB commitChildFunc := func(node nodeToEncode, partialKey *nibbles.Nibbles, childIndex *byte) (ChildReference, error) { @@ -965,6 +994,7 @@ func (t *TrieDB[H, Hasher]) commitChild( panic("inserting in db") } + t.cacheValue(prefixKey.Inner(), n.value, hash) prefixKey.DropLasts(mov) return HashChildReference[H]{hash}, nil case trieNodeToEncode: @@ -994,6 +1024,8 @@ func (t *TrieDB[H, Hasher]) commitChild( return nil, err } + t.cacheNode(hash, encoded, fullKey) + return HashChildReference[H]{hash}, nil } else { return InlineChildReference(encoded), nil @@ -1006,101 +1038,127 @@ func (t *TrieDB[H, Hasher]) commitChild( } } -func (t *TrieDB[H, Hasher]) recordAccess(access TrieAccess) { - if t.recorder != nil { - t.recorder.Record(access) +// / Cache the given `encoded` node. +func (t *TrieDB[H, Hasher]) cacheNode(hash H, encoded []byte, fullKey *nibbles.NibbleSlice) { + if t.cache == nil { + return + } + node, err := t.cache.GetOrInsertNode(hash, func() (NodeOwned[H], error) { + buf := bytes.NewBuffer(encoded) + decoded, err := codec.Decode[H](buf) + if err != nil { + return nil, err + } + return NodeOwnedFromNode[H, Hasher](decoded) + }) + if err != nil { + panic("Just encoded the node, so it should decode without any errors; qed") } -} -func (t *TrieDB[H, Hasher]) GetHash(key []byte) (*H, error) { - panic("unimpl") -} + type valueToCache struct { + KeyBytes []byte + CachedValue[H] + } + valuesToCache := []valueToCache{} + + // If the given node has data attached, the `full_key` is the full key to this node. + if fullKey != nil { + if v := node.data(); v != nil { + if h := node.dataHash(); h != nil { + valuesToCache = append(valuesToCache, valueToCache{ + KeyBytes: fullKey.Inner(), + CachedValue: NewCachedValue[H]( + ExistingCachedValue[H]{ + Hash: *h, + Data: v, + }, + ), + }) + } + } -type CachedValues[H any] interface { - NonExistingCachedValue | ExistingHashCachedValue[H] | ExistingCachedValue[H] - CachedValue -} + var cacheChildValues = func( + node NodeOwned[H], + valuesToCache []valueToCache, + fullKey nibbles.NibbleSlice, + ) []valueToCache { + for _, child := range node.children() { + switch nho := child.NodeHandleOwned.(type) { + case NodeHandleOwnedInline[H]: + n := child.nibble + c := nho.NodeOwned + key := fullKey.Clone() + if n != nil { + key.Push(*n) + } + if pk := c.partialKey(); pk != nil { + key.Append(*pk) + } -type CachedValue interface { - isCachedValue() -} + if d := c.data(); d != nil { + if h := c.dataHash(); h != nil { + valuesToCache = append(valuesToCache, valueToCache{ + KeyBytes: key.Inner(), + CachedValue: ExistingCachedValue[H]{ + Hash: *h, + Data: d, + }, + }) + } + } + } + } + return valuesToCache + } -func NewCachedValue[H any, CV CachedValues[H]](cv CV) CachedValue { - return cv -} + // Also cache values of inline nodes. + valuesToCache = cacheChildValues(node, valuesToCache, *fullKey) + } -// The value doesn't exist in the trie. -type NonExistingCachedValue struct{} + for _, valueToCache := range valuesToCache { + k := valueToCache.KeyBytes + v := valueToCache.CachedValue + t.cache.SetValue(k, v) + } +} -func (NonExistingCachedValue) isCachedValue() {} +// / Cache the given `value`. +// / +// / `hash` is the hash of `value`. +func (t *TrieDB[H, Hasher]) cacheValue(fullKey []byte, value []byte, hash H) { + if t.cache == nil { + return + } + var val []byte + node, err := t.cache.GetOrInsertNode(hash, func() (NodeOwned[H], error) { + return NodeOwnedValue[H]{ + Value: value, + Hash: hash, + }, nil + }) + if err != nil { + panic("this should never happend") + } + if node != nil { + val = node.data() + } -// We cached the hash, because we did not yet accessed the data. -type ExistingHashCachedValue[H any] struct { - Hash H + if val != nil { + t.cache.SetValue(fullKey, ExistingCachedValue[H]{ + Hash: hash, + Data: val, + }) + } } -func (ExistingHashCachedValue[H]) isCachedValue() {} - -// The value exists in the trie. -type ExistingCachedValue[H any] struct { - /// The hash of the value. - Hash H - /// The actual data of the value stored as [`BytesWeak`]. - /// - /// The original data [`Bytes`] is stored in the trie node - /// that is also cached by the [`TrieCache`]. If this node is dropped, - /// this data will also not be "upgradeable" anymore. - Data []byte +func (t *TrieDB[H, Hasher]) recordAccess(access TrieAccess) { + if t.recorder != nil { + t.recorder.Record(access) + } } -func (ExistingCachedValue[H]) isCachedValue() {} - -// A cache that can be used to speed-up certain operations when accessing the trie. -// -// The [`TrieDB`]/[`TrieDBMut`] by default are working with the internal hash-db in a non-owning -// mode. This means that for every lookup in the trie, every node is always fetched and decoded on -// the fly. Fetching and decoding a node always takes some time and can kill the performance of any -// application that is doing quite a lot of trie lookups. To circumvent this performance -// degradation, a cache can be used when looking up something in the trie. Any cache that should be -// used with the [`TrieDB`]/[`TrieDBMut`] needs to implement this trait. -// -// The trait is laying out a two level cache, first the trie nodes cache and then the value cache. -// The trie nodes cache, as the name indicates, is for caching trie nodes as [`NodeOwned`]. These -// trie nodes are referenced by their hash. The value cache is caching [`CachedValue`]'s and these -// are referenced by the key to look them up in the trie. As multiple different tries can have -// different values under the same key, it up to the cache implementation to ensure that the -// correct value is returned. As each trie has a different root, this root can be used to -// differentiate values under the same key. -type TrieCache[H hash.Hash] interface { - /// Lookup value for the given `key`. - /// - /// Returns the `None` if the `key` is unknown or otherwise `Some(_)` with the associated - /// value. - /// - /// [`Self::cache_data_for_key`] is used to make the cache aware of data that is associated - /// to a `key`. - /// - /// # Attention - /// - /// The cache can be used for different tries, aka with different roots. This means - /// that the cache implementation needs to take care of always returning the correct value - /// for the current trie root. - GetValue(key []byte) CachedValue - /// Cache the given `value` for the given `key`. - /// - /// # Attention - /// - /// The cache can be used for different tries, aka with different roots. This means - /// that the cache implementation needs to take care of caching `value` for the current - /// trie root. - SetValue(key []byte, value CachedValue) - - /// Get or insert a [`NodeOwned`]. - /// - /// The cache implementation should look up based on the given `hash` if the node is already - /// known. If the node is not yet known, the given `fetch_node` function can be used to fetch - /// the particular node. - /// - /// Returns the [`NodeOwned`] or an error that happened on fetching the node. - GetOrInsertNode(hash H, fetchNode func() (NodeOwned, error)) (NodeOwned, error) +func (t *TrieDB[H, Hasher]) GetHash(key []byte) (*H, error) { + panic("unimpl") + // TODO: work on this + // TODO: recreate test at https://github.com/paritytech/trie/blob/2edd0a18959e046c8e75559fa4678f7b8877cf91/trie-db/test/src/triedb.rs#L1141 } diff --git a/pkg/trie/triedb/triedb_test.go b/pkg/trie/triedb/triedb_test.go index 6d47106770..e166f5ddb3 100644 --- a/pkg/trie/triedb/triedb_test.go +++ b/pkg/trie/triedb/triedb_test.go @@ -4,6 +4,8 @@ package triedb import ( + "bytes" + "fmt" "testing" "github.com/ChainSafe/gossamer/internal/primitives/core/hash" @@ -34,7 +36,7 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{0x01}, Offset: 0}, - value: inline[hash.H256]([]byte("leaf")), + value: inline([]byte("leaf")), }, }, }, @@ -54,13 +56,13 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{0x01}, Offset: 1}, - value: inline[hash.H256]([]byte("leaf")), + value: inline([]byte("leaf")), }, }, NewStoredNode{ Branch[hash.H256]{ partialKey: nodeKey{Data: []byte{0x01}}, - value: inline[hash.H256]([]byte("branch")), + value: inline([]byte("branch")), children: [codec.ChildrenCapacity]NodeHandle{ inMemory(0), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, @@ -88,7 +90,7 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Branch[hash.H256]{ partialKey: nodeKey{Data: []byte{0}, Offset: 1}, - value: inline[hash.H256]([]byte("in between branch")), + value: inline([]byte("in between branch")), children: [codec.ChildrenCapacity]NodeHandle{ inMemory(1), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, @@ -98,13 +100,13 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{0x01}, Offset: 1}, - value: inline[hash.H256]([]byte("leaf")), + value: inline([]byte("leaf")), }, }, NewStoredNode{ Branch[hash.H256]{ partialKey: nodeKey{Data: []byte{1}, Offset: 0}, - value: inline[hash.H256]([]byte("branch")), + value: inline([]byte("branch")), children: [codec.ChildrenCapacity]NodeHandle{ inMemory(0), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, @@ -132,13 +134,13 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{1}, Offset: 1}, - value: inline[hash.H256]([]byte("leaf")), + value: inline([]byte("leaf")), }, }, NewStoredNode{ Branch[hash.H256]{ partialKey: nodeKey{Data: []byte{0}, Offset: 1}, - value: inline[hash.H256]([]byte("branch")), + value: inline([]byte("branch")), children: [codec.ChildrenCapacity]NodeHandle{ inMemory(0), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, @@ -148,7 +150,7 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Branch[hash.H256]{ partialKey: nodeKey{Data: []byte{1}}, - value: inline[hash.H256]([]byte("top branch")), + value: inline([]byte("top branch")), children: [codec.ChildrenCapacity]NodeHandle{ inMemory(1), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, @@ -176,13 +178,13 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{0}, Offset: 1}, - value: inline[hash.H256]([]byte("leaf")), + value: inline([]byte("leaf")), }, }, NewStoredNode{ Branch[hash.H256]{ partialKey: nodeKey{Data: []byte{1}}, - value: inline[hash.H256]([]byte("new branch")), + value: inline([]byte("new branch")), children: [codec.ChildrenCapacity]NodeHandle{ inMemory(0), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, @@ -211,13 +213,13 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{0}, Offset: 1}, - value: inline[hash.H256]([]byte("leaf")), + value: inline([]byte("leaf")), }, }, NewStoredNode{ Branch[hash.H256]{ partialKey: nodeKey{Data: []byte{1}}, - value: inline[hash.H256]([]byte("branch")), + value: inline([]byte("branch")), children: [codec.ChildrenCapacity]NodeHandle{ inMemory(0), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, @@ -245,7 +247,7 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Branch[hash.H256]{ partialKey: nodeKey{Data: []byte{1}}, - value: inline[hash.H256]([]byte("branch")), + value: inline([]byte("branch")), children: [codec.ChildrenCapacity]NodeHandle{ inMemory(1), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, @@ -255,7 +257,7 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{0}, Offset: 1}, - value: inline[hash.H256]([]byte("leaf")), + value: inline([]byte("leaf")), }, }, }, @@ -275,7 +277,7 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{1}}, - value: inline[hash.H256]([]byte("new leaf")), + value: inline([]byte("new leaf")), }, }, }, @@ -296,7 +298,7 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{1}}, - value: inline[hash.H256]([]byte("same")), + value: inline([]byte("same")), }, }, }, @@ -316,13 +318,13 @@ func TestInsertions(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{0x02}}, - value: inline[hash.H256]([]byte("original leaf")), + value: inline([]byte("original leaf")), }, }, NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{0x03}}, - value: inline[hash.H256]([]byte("leaf")), + value: inline([]byte("leaf")), }, }, NewStoredNode{ @@ -394,7 +396,7 @@ func TestDeletes(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{1}}, - value: inline[hash.H256]([]byte("leaf")), + value: inline([]byte("leaf")), }, }, }, @@ -436,7 +438,7 @@ func TestDeletes(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{1, 0}}, - value: inline[hash.H256]([]byte("leaf")), + value: inline([]byte("leaf")), }, }, }, @@ -459,13 +461,13 @@ func TestDeletes(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: make([]byte, 0)}, - value: inline[hash.H256]([]byte("leaf1")), + value: inline([]byte("leaf1")), }, }, NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: make([]byte, 0)}, - value: inline[hash.H256]([]byte("leaf2")), + value: inline([]byte("leaf2")), }, }, NewStoredNode{ @@ -527,7 +529,7 @@ func TestInsertAfterDelete(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{1}}, - value: inline[hash.H256]([]byte("new leaf")), + value: inline([]byte("new leaf")), }, }, }, @@ -551,13 +553,13 @@ func TestInsertAfterDelete(t *testing.T) { NewStoredNode{ Leaf[hash.H256]{ partialKey: nodeKey{Data: []byte{0}, Offset: 1}, - value: inline[hash.H256]([]byte("leaf")), + value: inline([]byte("leaf")), }, }, NewStoredNode{ Branch[hash.H256]{ partialKey: nodeKey{Data: []byte{1}}, - value: inline[hash.H256]([]byte("new branch")), + value: inline([]byte("new branch")), children: [codec.ChildrenCapacity]NodeHandle{ inMemory(0), }, @@ -800,3 +802,211 @@ func TestDBCommits(t *testing.T) { assert.Nil(t, v) }) } + +func Test_TrieDB(t *testing.T) { + for _, version := range []trie.TrieLayout{trie.V0, trie.V1} { + t.Run(fmt.Sprintf("recorder_%v", version), func(t *testing.T) { + keyValues := []struct { + key []byte + value []byte + }{ + {[]byte("A"), bytes.Repeat([]byte{1}, 64)}, + {[]byte("AA"), bytes.Repeat([]byte{2}, 64)}, + {[]byte("AB"), bytes.Repeat([]byte{3}, 64)}, + {[]byte("B"), bytes.Repeat([]byte{4}, 64)}, + } + + // Add some initial data to the trie + db := NewMemoryDB[hash.H256, runtime.BlakeTwo256](EmptyNode) + trie := NewEmptyTrieDB[hash.H256, runtime.BlakeTwo256](db) + trie.SetVersion(version) + + for _, entry := range keyValues[:1] { + require.NoError(t, trie.Put(entry.key, entry.value)) + } + err := trie.commit() + require.NoError(t, err) + require.NotEmpty(t, trie.rootHash) + root := trie.rootHash + + // Add more data, but this time only to the overlay. + // While doing that we record all trie accesses to replay this operation. + recorder := NewRecorder[hash.H256]() + overlay := db.Clone() + newRoot := root + { + trie := NewTrieDB(newRoot, overlay, + WithRecorder[hash.H256, runtime.BlakeTwo256](recorder), + ) + trie.SetVersion(version) + for _, entry := range keyValues[1:] { + require.NoError(t, trie.Put(entry.key, entry.value)) + } + err := trie.commit() + require.NoError(t, err) + require.NotEmpty(t, trie.rootHash) + newRoot = trie.rootHash + } + + partialDB := NewMemoryDB[hash.H256, runtime.BlakeTwo256](EmptyNode) + for _, record := range recorder.Drain() { + key := runtime.BlakeTwo256{}.Hash(record.Data).Bytes() + require.NoError(t, partialDB.Put(key, record.Data)) + } + + // Replay the it, but this time we use the proof. + var validatedRoot hash.H256 + { + trie := NewTrieDB[hash.H256, runtime.BlakeTwo256](root, partialDB) + trie.SetVersion(version) + for _, entry := range keyValues[1:] { + require.NoError(t, trie.Put(entry.key, entry.value)) + } + err := trie.commit() + require.NoError(t, err) + require.NotEmpty(t, trie.rootHash) + validatedRoot = trie.rootHash + } + assert.Equal(t, validatedRoot, newRoot) + }) + + t.Run(fmt.Sprintf("recorder_with_cache_%v", version), func(t *testing.T) { + keyValues := []struct { + key []byte + value []byte + }{ + {[]byte("A"), bytes.Repeat([]byte{1}, 64)}, + {[]byte("AA"), bytes.Repeat([]byte{2}, 64)}, + {[]byte("AB"), bytes.Repeat([]byte{3}, 64)}, + {[]byte("B"), bytes.Repeat([]byte{4}, 64)}, + } + + // Add some initial data to the trie + db := NewMemoryDB[hash.H256, runtime.BlakeTwo256](EmptyNode) + trie := NewEmptyTrieDB[hash.H256, runtime.BlakeTwo256](db) + trie.SetVersion(version) + + for _, entry := range keyValues[:1] { + require.NoError(t, trie.Put(entry.key, entry.value)) + } + err := trie.commit() + require.NoError(t, err) + require.NotEmpty(t, trie.rootHash) + root := trie.rootHash + + cache := NewTestTrieCache[hash.H256]() + + { + trie := NewTrieDB(trie.rootHash, db, WithCache[hash.H256, runtime.BlakeTwo256](cache)) + trie.SetVersion(version) + // Only read one entry. + assert.Equal(t, keyValues[0].value, trie.Get(keyValues[0].key)) + } + + // Root should now be cached. + require.NotNil(t, cache.GetNode(trie.rootHash)) + + // Add more data, but this time only to the overlay. + // While doing that we record all trie accesses to replay this operation. + recorder := NewRecorder[hash.H256]() + overlay := db.Clone() + var newRoot hash.H256 + { + trie := NewTrieDB(trie.rootHash, overlay, + WithCache[hash.H256, runtime.BlakeTwo256](cache), + WithRecorder[hash.H256, runtime.BlakeTwo256](recorder), + ) + trie.SetVersion(version) + for _, entry := range keyValues[1:] { + require.NoError(t, trie.Put(entry.key, entry.value)) + } + err := trie.commit() + require.NoError(t, err) + require.NotEmpty(t, trie.rootHash) + newRoot = trie.rootHash + } + + for i, entry := range keyValues[1:] { + cachedValue := cache.GetValue(entry.key) + require.Equal(t, ExistingCachedValue[hash.H256]{ + Hash: runtime.BlakeTwo256{}.Hash(keyValues[i+1].value), + Data: keyValues[i+1].value, + }, cachedValue) + } + + partialDB := NewMemoryDB[hash.H256, runtime.BlakeTwo256](EmptyNode) + for _, record := range recorder.Drain() { + key := runtime.BlakeTwo256{}.Hash(record.Data).Bytes() + require.NoError(t, partialDB.Put(key, record.Data)) + } + + // Replay the it, but this time we use the proof. + var validatedRoot hash.H256 + { + trie := NewTrieDB[hash.H256, runtime.BlakeTwo256](root, partialDB) + trie.SetVersion(version) + for _, entry := range keyValues[1:] { + require.NoError(t, trie.Put(entry.key, entry.value)) + } + err := trie.commit() + require.NoError(t, err) + require.NotEmpty(t, trie.rootHash) + validatedRoot = trie.rootHash + } + assert.Equal(t, validatedRoot, newRoot) + }) + + t.Run(fmt.Sprintf("insert_remove_with_cache_%v", version), func(t *testing.T) { + keyValues := []struct { + key []byte + value []byte + }{ + {[]byte("A"), bytes.Repeat([]byte{1}, 64)}, + {[]byte("AA"), bytes.Repeat([]byte{2}, 64)}, + // Should be inlined + {[]byte("AC"), bytes.Repeat([]byte{7}, 4)}, + {[]byte("AB"), bytes.Repeat([]byte{3}, 64)}, + {[]byte("B"), bytes.Repeat([]byte{4}, 64)}, + } + + cache := NewTestTrieCache[hash.H256]() + recorder := NewRecorder[hash.H256]() + db := NewMemoryDB[hash.H256, runtime.BlakeTwo256](EmptyNode) + { + trie := NewEmptyTrieDB[hash.H256, runtime.BlakeTwo256](db, + WithCache[hash.H256, runtime.BlakeTwo256](cache), + WithRecorder[hash.H256, runtime.BlakeTwo256](recorder), + ) + trie.SetVersion(version) + + // Add all values + for _, entry := range keyValues { + require.NoError(t, trie.Put(entry.key, entry.value)) + } + + // Remove only the last 2 elements + for _, entry := range keyValues[3:] { + require.NoError(t, trie.Delete(entry.key)) + } + + err := trie.commit() + require.NoError(t, err) + require.NotEmpty(t, trie.rootHash) + } + + // Then only the first 3 elements should be in the cache and the last + // two ones should not be there. + for _, entry := range keyValues[:3] { + cachedValue := cache.GetValue(entry.key) + require.NotNil(t, cachedValue) + + require.Equal(t, entry.value, cachedValue.data()) + require.Equal(t, runtime.BlakeTwo256{}.Hash(entry.value), *cachedValue.hash()) + } + + for _, entry := range keyValues[3:] { + require.Nil(t, cache.GetValue(entry.key)) + } + }) + } +} diff --git a/pkg/trie/triedb/util_test.go b/pkg/trie/triedb/util_test.go index 4a16848319..e8da9df684 100644 --- a/pkg/trie/triedb/util_test.go +++ b/pkg/trie/triedb/util_test.go @@ -8,9 +8,11 @@ import ( "strings" "github.com/ChainSafe/gossamer/internal/database" + chash "github.com/ChainSafe/gossamer/internal/primitives/core/hash" "github.com/ChainSafe/gossamer/pkg/trie/db" "github.com/ChainSafe/gossamer/pkg/trie/triedb/hash" "github.com/stretchr/testify/assert" + "golang.org/x/exp/maps" ) // MemoryDB is an in-memory implementation of the Database interface backed by a @@ -68,6 +70,14 @@ func (db *MemoryDB) NewBatch() database.Batch { return &MemoryBatch{db} } +func (db *MemoryDB) Clone() *MemoryDB { + return &MemoryDB{ + data: maps.Clone(db.data), + hashedNullNode: db.hashedNullNode, + nullNodeData: db.nullNodeData, + } +} + var _ db.RWDatabase = &MemoryDB{} type MemoryBatch struct { @@ -91,3 +101,50 @@ func newTestDB(t assert.TestingT) database.Table { assert.NoError(t, err) return database.NewTable(db, "trie") } + +type TestTrieCache[H hash.Hash] struct { + valueCache map[string]CachedValue[H] + nodeCache map[H]NodeOwned[H] +} + +func NewTestTrieCache[H hash.Hash]() *TestTrieCache[H] { + return &TestTrieCache[H]{ + valueCache: make(map[string]CachedValue[H]), + nodeCache: make(map[H]NodeOwned[H]), + } +} + +func (ttc *TestTrieCache[H]) GetValue(key []byte) CachedValue[H] { + cv, ok := ttc.valueCache[string(key)] + if !ok { + return nil + } + return cv +} + +func (ttc *TestTrieCache[H]) SetValue(key []byte, value CachedValue[H]) { + ttc.valueCache[string(key)] = value +} + +func (ttc *TestTrieCache[H]) GetOrInsertNode(hash H, fetchNode func() (NodeOwned[H], error)) (NodeOwned[H], error) { + node, ok := ttc.nodeCache[hash] + if !ok { + var err error + node, err = fetchNode() + if err != nil { + return nil, err + } + ttc.nodeCache[hash] = node + } + return node, nil +} + +func (ttc *TestTrieCache[H]) GetNode(hash H) NodeOwned[H] { + node, ok := ttc.nodeCache[hash] + if !ok { + return nil + } + return node +} + +var _ TrieCache[chash.H256] = &TestTrieCache[chash.H256]{}