Skip to content

Commit

Permalink
feat(erasure_coding): introduce erasure coding for PoV Distributor (#…
Browse files Browse the repository at this point in the history
…3281)

Co-authored-by: Eclésio Junior <eclesiomelo.1@gmail.com>
Co-authored-by: Edward Mack <emack@pop-os.localdomain>
  • Loading branch information
3 people authored and timwu20 committed Jun 17, 2024
1 parent 88a475b commit 4921493
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 0 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ require (
github.com/jpillora/ipfilter v1.2.9
github.com/karlseguin/ccache/v3 v3.0.5
github.com/klauspost/compress v1.17.8
github.com/klauspost/reedsolomon v1.11.8
github.com/libp2p/go-libp2p v0.33.2
github.com/libp2p/go-libp2p-kad-dht v0.25.2
github.com/minio/sha256-simd v1.0.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0N
github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/klauspost/reedsolomon v1.11.8 h1:s8RpUW5TK4hjr+djiOpbZJB4ksx+TdYbRH7vHQpwPOY=
github.com/klauspost/reedsolomon v1.11.8/go.mod h1:4bXRN+cVzMdml6ti7qLouuYi32KHJ5MGv0Qd8a47h6A=
github.com/koron/go-ssdp v0.0.4 h1:1IDwrghSKYM7yLf7XCzbByg2sJ/JcNOZRXS2jczTwz0=
github.com/koron/go-ssdp v0.0.4/go.mod h1:oDXq+E5IL5q0U8uSBcoAXzTzInwy5lEgC91HoKtbmZk=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
Expand Down
70 changes: 70 additions & 0 deletions lib/erasure/erasure.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2021 ChainSafe Systems (ON)
// SPDX-License-Identifier: LGPL-3.0-only

package erasure

import (
"bytes"
"errors"
"fmt"

"github.com/klauspost/reedsolomon"
)

// ErrNotEnoughValidators cannot encode something for zero or one validator
var ErrNotEnoughValidators = errors.New("expected at least 2 validators")

// ObtainChunks obtains erasure-coded chunks, divides data into number of validatorsQty chunks and
// creates parity chunks for reconstruction
func ObtainChunks(validatorsQty int, data []byte) ([][]byte, error) {
recoveryThres, err := recoveryThreshold(validatorsQty)
if err != nil {
return nil, err
}
enc, err := reedsolomon.New(validatorsQty, recoveryThres)
if err != nil {
return nil, fmt.Errorf("creating new reed solomon failed: %w", err)
}
shards, err := enc.Split(data)
if err != nil {
return nil, err
}
err = enc.Encode(shards)
if err != nil {
return nil, err
}

return shards, nil
}

// Reconstruct the missing data from a set of chunks
func Reconstruct(validatorsQty, originalDataLen int, chunks [][]byte) ([]byte, error) {
recoveryThres, err := recoveryThreshold(validatorsQty)
if err != nil {
return nil, err
}

enc, err := reedsolomon.New(validatorsQty, recoveryThres)
if err != nil {
return nil, err
}
err = enc.Reconstruct(chunks)
if err != nil {
return nil, err
}
buf := new(bytes.Buffer)
err = enc.Join(buf, chunks, originalDataLen)
return buf.Bytes(), err
}

// recoveryThreshold gives the max number of shards/chunks that we can afford to lose and still construct
// the full initial data. Total number of chunks will be validatorQty + recoveryThreshold
func recoveryThreshold(validators int) (int, error) {
if validators <= 1 {
return 0, ErrNotEnoughValidators
}

needed := (validators - 1) / 3

return needed + 1, nil
}
132 changes: 132 additions & 0 deletions lib/erasure/erasure_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright 2021 ChainSafe Systems (ON)
// SPDX-License-Identifier: LGPL-3.0-only

package erasure

import (
"testing"

"github.com/klauspost/reedsolomon"
"github.com/stretchr/testify/assert"
)

var testData = []byte("this is a test of the erasure coding")
var expectedChunks = [][]byte{{116, 104, 105, 115}, {32, 105, 115, 32}, {97, 32, 116, 101}, {115, 116, 32, 111},
{102, 32, 116, 104}, {101, 32, 101, 114}, {97, 115, 117, 114}, {101, 32, 99, 111}, {100, 105, 110, 103},
{0, 0, 0, 0}, {133, 189, 154, 178}, {88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}}

// erasure data missing chunks
var missing2Chunks = [][]byte{{116, 104, 105, 115}, {32, 105, 115, 32}, {}, {115, 116, 32, 111},
{102, 32, 116, 104}, {101, 32, 101, 114}, {}, {101, 32, 99, 111}, {100, 105, 110, 103},
{0, 0, 0, 0}, {133, 189, 154, 178}, {88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}}
var missing3Chunks = [][]byte{{116, 104, 105, 115}, {32, 105, 115, 32}, {}, {115, 116, 32, 111},
{}, {101, 32, 101, 114}, {}, {101, 32, 99, 111}, {100, 105, 110, 103}, {0, 0, 0, 0}, {133, 189, 154, 178},
{88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}}
var missing5Chunks = [][]byte{{}, {}, {}, {115, 116, 32, 111},
{}, {101, 32, 101, 114}, {}, {101, 32, 99, 111}, {100, 105, 110, 103}, {0, 0, 0, 0}, {133, 189, 154, 178},
{88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}}

func TestObtainChunks(t *testing.T) {
type args struct {
validatorsQty int
data []byte
}
tests := map[string]struct {
args args
expectedValue [][]byte
expectedError error
}{
"happy_path": {
args: args{
validatorsQty: 10,
data: testData,
},
expectedValue: expectedChunks,
},
"nil_data": {
args: args{
validatorsQty: 10,
data: nil,
},
expectedError: reedsolomon.ErrShortData,
},
"not_enough_validators": {
args: args{
validatorsQty: 1,
data: testData,
},
expectedError: ErrNotEnoughValidators,
},
}
for name, tt := range tests {
t.Run(name, func(t *testing.T) {
got, err := ObtainChunks(tt.args.validatorsQty, tt.args.data)
expectedThreshold, _ := recoveryThreshold(tt.args.validatorsQty)
if tt.expectedError != nil {
assert.EqualError(t, err, tt.expectedError.Error())
} else {
assert.NoError(t, err)
assert.Equal(t, tt.args.validatorsQty+expectedThreshold, len(got))
}
assert.Equal(t, tt.expectedValue, got)
})
}
}

func TestReconstruct(t *testing.T) {
type args struct {
validatorsQty int
chunks [][]byte
}
tests := map[string]struct {
args
expectedData []byte
expectedChunks [][]byte
expectedError error
}{
"missing_2_chunks": {
args: args{
validatorsQty: 10,
chunks: missing2Chunks,
},
expectedData: testData,
expectedChunks: expectedChunks,
},
"missing_2_chunks,_validator_qty_3": {
args: args{
validatorsQty: 3,
chunks: missing2Chunks,
},
expectedError: reedsolomon.ErrTooFewShards,
expectedChunks: expectedChunks,
},
"missing_3_chunks": {
args: args{
validatorsQty: 10,
chunks: missing3Chunks,
},
expectedData: testData,
expectedChunks: expectedChunks,
},
"missing_5_chunks": {
args: args{
validatorsQty: 10,
chunks: missing5Chunks,
},
expectedChunks: missing5Chunks,
expectedError: reedsolomon.ErrTooFewShards,
},
}
for name, tt := range tests {
t.Run(name, func(t *testing.T) {
data, err := Reconstruct(tt.args.validatorsQty, len(testData), tt.args.chunks)
if tt.expectedError != nil {
assert.EqualError(t, err, tt.expectedError.Error())
} else {
assert.NoError(t, err)
}
assert.Equal(t, tt.expectedData, data)
assert.Equal(t, tt.expectedChunks, tt.args.chunks)
})
}
}

0 comments on commit 4921493

Please sign in to comment.