Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(trie): refactor existing header encoding #2530

Merged
merged 9 commits into from
Jul 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions internal/trie/node/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Trie node

Package node defines the `Node` structure with methods to be used in the modified Merkle-Patricia Radix-16 trie.

## Codec

The following sub-sections precise the encoding of a node.
This encoding is formally described in [the Polkadot specification](https://spec.polkadot.network/#sect-state-storage).

### Header

Each node encoding has a header of one or more bytes.
The first byte contains the node variant and some or all of the partial key length of the node.
If the partial key length cannot fit in the first byte, additional bytes are added to the header to represent the total partial key length.

### Partial key

The header is then concatenated with the partial key of the node, encoded as Little Endian bytes.

### Remaining bytes

The remaining bytes appended depend on the node variant.

- For leaves, the SCALE-encoded leaf value is appended.
- For branches, the following elements are concatenated in this order and appended to the previous header+partial key:
- Children bitmap (2 bytes)
- SCALE-encoded node value
- Hash(Encoding(Child[0]))
- Hash(Encoding(Child[1]))
- ...
- Hash(Encoding(Child[15]))
92 changes: 47 additions & 45 deletions internal/trie/node/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,63 +9,68 @@ import (
"fmt"
"io"

"github.com/ChainSafe/gossamer/internal/trie/pools"
"github.com/ChainSafe/gossamer/pkg/scale"
)

var (
ErrReadHeaderByte = errors.New("cannot read header byte")
ErrUnknownNodeType = errors.New("unknown node type")
// ErrDecodeValue is defined since no sentinel error is defined
// in the scale package.
// TODO remove once the following issue is done:
// https://github.com/ChainSafe/gossamer/issues/2631 .
ErrDecodeValue = errors.New("cannot decode value")
ErrReadChildrenBitmap = errors.New("cannot read children bitmap")
ErrDecodeChildHash = errors.New("cannot decode child hash")
// ErrDecodeChildHash is defined since no sentinel error is defined
// in the scale package.
// TODO remove once the following issue is done:
// https://github.com/ChainSafe/gossamer/issues/2631 .
ErrDecodeChildHash = errors.New("cannot decode child hash")
)

// Decode decodes a node from a reader.
// The encoding format is documented in the README.md
// of this package, and specified in the Polkadot spec at
// https://spec.polkadot.network/#sect-state-storage
// For branch decoding, see the comments on decodeBranch.
// For leaf decoding, see the comments on decodeLeaf.
func Decode(reader io.Reader) (n *Node, err error) {
buffer := pools.SingleByteBuffers.Get().(*bytes.Buffer)
defer pools.SingleByteBuffers.Put(buffer)
oneByteBuf := buffer.Bytes()
_, err = reader.Read(oneByteBuf)
variant, partialKeyLength, err := decodeHeader(reader)
if err != nil {
return nil, fmt.Errorf("%w: %s", ErrReadHeaderByte, err)
return nil, fmt.Errorf("decoding header: %w", err)
}
header := oneByteBuf[0]

nodeTypeHeaderByte := header >> 6
switch nodeTypeHeaderByte {
case leafHeader:
n, err = decodeLeaf(reader, header)
switch variant {
case leafVariant.bits:
qdm12 marked this conversation as resolved.
Show resolved Hide resolved
n, err = decodeLeaf(reader, partialKeyLength)
if err != nil {
return nil, fmt.Errorf("cannot decode leaf: %w", err)
}
return n, nil
case branchHeader, branchWithValueHeader:
n, err = decodeBranch(reader, header)
case branchVariant.bits, branchWithValueVariant.bits:
qdm12 marked this conversation as resolved.
Show resolved Hide resolved
n, err = decodeBranch(reader, variant, partialKeyLength)
if err != nil {
return nil, fmt.Errorf("cannot decode branch: %w", err)
}
return n, nil
default:
return nil, fmt.Errorf("%w: %d", ErrUnknownNodeType, nodeTypeHeaderByte)
// this is a programming error, an unknown node variant
// should be caught by decodeHeader.
panic(fmt.Sprintf("not implemented for node variant %08b", variant))
qdm12 marked this conversation as resolved.
Show resolved Hide resolved
}
}

// decodeBranch reads and decodes from a reader with the encoding specified in internal/trie/node/encode_doc.go.
// decodeBranch reads from a reader and decodes to a node branch.
// Note that since the encoded branch stores the hash of the children nodes, we are not
// reconstructing the child nodes from the encoding. This function instead stubs where the
// children are known to be with an empty leaf. The children nodes hashes are then used to
// find other values using the persistent database.
func decodeBranch(reader io.Reader, header byte) (node *Node, err error) {
func decodeBranch(reader io.Reader, variant byte, partialKeyLength uint16) (
node *Node, err error) {
node = &Node{
Dirty: true,
Children: make([]*Node, ChildrenCapacity),
}

keyLen := header & keyLenOffset
node.Key, err = decodeKey(reader, keyLen)
node.Key, err = decodeKey(reader, partialKeyLength)
if err != nil {
return nil, fmt.Errorf("cannot decode key: %w", err)
}
Expand All @@ -78,18 +83,14 @@ func decodeBranch(reader io.Reader, header byte) (node *Node, err error) {

sd := scale.NewDecoder(reader)

nodeType := header >> 6
if nodeType == branchWithValueHeader {
var value []byte
// branch w/ value
err := sd.Decode(&value)
if variant == branchWithValueVariant.bits {
err := sd.Decode(&node.Value)
if err != nil {
return nil, fmt.Errorf("%w: %s", ErrDecodeValue, err)
}
node.Value = value
}

for i := 0; i < 16; i++ {
for i := 0; i < ChildrenCapacity; i++ {
qdm12 marked this conversation as resolved.
Show resolved Hide resolved
if (childrenBitmap[i/8]>>(i%8))&1 != 1 {
continue
}
Expand All @@ -101,37 +102,38 @@ func decodeBranch(reader io.Reader, header byte) (node *Node, err error) {
ErrDecodeChildHash, i, err)
}

// Handle inlined leaf nodes.
const hashLength = 32
nodeTypeHeaderByte := hash[0] >> 6
if nodeTypeHeaderByte == leafHeader && len(hash) < hashLength {
leaf, err := decodeLeaf(bytes.NewReader(hash[1:]), hash[0])
if err != nil {
return nil, fmt.Errorf("%w: at index %d: %s",
ErrDecodeValue, i, err)
childNode := &Node{
HashDigest: hash,
qdm12 marked this conversation as resolved.
Show resolved Hide resolved
Dirty: true,
}
if len(hash) < hashLength {
// Handle inlined nodes
reader = bytes.NewReader(hash)
variant, partialKeyLength, err := decodeHeader(reader)
if err == nil && variant == leafVariant.bits {
EclesioMeloJunior marked this conversation as resolved.
Show resolved Hide resolved
childNode, err = decodeLeaf(reader, partialKeyLength)
if err != nil {
return nil, fmt.Errorf("%w: at index %d: %s",
ErrDecodeValue, i, err)
}
}
node.Descendants++
node.Children[i] = leaf
continue
}

node.Descendants++
node.Children[i] = &Node{
HashDigest: hash,
}
node.Children[i] = childNode
}

return node, nil
}

// decodeLeaf reads and decodes from a reader with the encoding specified in lib/trie/node/encode_doc.go.
func decodeLeaf(reader io.Reader, header byte) (node *Node, err error) {
// decodeLeaf reads from a reader and decodes to a leaf node.
func decodeLeaf(reader io.Reader, partialKeyLength uint16) (node *Node, err error) {
node = &Node{
Dirty: true,
}

keyLen := header & keyLenOffset
node.Key, err = decodeKey(reader, keyLen)
node.Key, err = decodeKey(reader, partialKeyLength)
if err != nil {
return nil, fmt.Errorf("cannot decode key: %w", err)
}
Expand Down
Loading