From f16f9b2129a29c3c6df2ef598ab3d41306ac3d18 Mon Sep 17 00:00:00 2001 From: Edward Mack Date: Thu, 8 Jun 2023 10:30:49 -0400 Subject: [PATCH] feat(erasure_coding): introduce erasure coding for PoV Distributor (#3281) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Eclésio Junior Co-authored-by: Edward Mack --- go.mod | 1 + go.sum | 2 + lib/erasure/erasure.go | 70 +++++++++++++++++++ lib/erasure/erasure_test.go | 132 ++++++++++++++++++++++++++++++++++++ 4 files changed, 205 insertions(+) create mode 100644 lib/erasure/erasure.go create mode 100644 lib/erasure/erasure_test.go diff --git a/go.mod b/go.mod index 3b897c9eb32..1f918c952e0 100644 --- a/go.mod +++ b/go.mod @@ -26,6 +26,7 @@ require ( github.com/ipfs/go-ds-badger2 v0.1.3 github.com/jpillora/ipfilter v1.2.9 github.com/klauspost/compress v1.16.5 + github.com/klauspost/reedsolomon v1.11.7 github.com/libp2p/go-libp2p v0.27.7 github.com/libp2p/go-libp2p-kad-dht v0.24.2 github.com/multiformats/go-multiaddr v0.9.0 diff --git a/go.sum b/go.sum index 42f140031e3..ee72418292c 100644 --- a/go.sum +++ b/go.sum @@ -429,6 +429,8 @@ github.com/klauspost/compress v1.16.5 h1:IFV2oUNUzZaz+XyusxpLzpzS8Pt5rh0Z16For/d github.com/klauspost/compress v1.16.5/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/klauspost/reedsolomon v1.11.7 h1:9uaHU0slncktTEEg4+7Vl7q7XUNMBUOK4R9gnKhMjAU= +github.com/klauspost/reedsolomon v1.11.7/go.mod h1:4bXRN+cVzMdml6ti7qLouuYi32KHJ5MGv0Qd8a47h6A= github.com/koron/go-ssdp v0.0.0-20191105050749-2e1c40ed0b5d/go.mod h1:5Ky9EC2xfoUKUor0Hjgi2BJhCSXJfMOFlmyYrVKGQMk= github.com/koron/go-ssdp v0.0.4 h1:1IDwrghSKYM7yLf7XCzbByg2sJ/JcNOZRXS2jczTwz0= github.com/koron/go-ssdp v0.0.4/go.mod h1:oDXq+E5IL5q0U8uSBcoAXzTzInwy5lEgC91HoKtbmZk= diff --git a/lib/erasure/erasure.go b/lib/erasure/erasure.go new file mode 100644 index 00000000000..7a238412595 --- /dev/null +++ b/lib/erasure/erasure.go @@ -0,0 +1,70 @@ +// Copyright 2021 ChainSafe Systems (ON) +// SPDX-License-Identifier: LGPL-3.0-only + +package erasure + +import ( + "bytes" + "errors" + "fmt" + + "github.com/klauspost/reedsolomon" +) + +// ErrNotEnoughValidators cannot encode something for zero or one validator +var ErrNotEnoughValidators = errors.New("expected at least 2 validators") + +// ObtainChunks obtains erasure-coded chunks, divides data into number of validatorsQty chunks and +// creates parity chunks for reconstruction +func ObtainChunks(validatorsQty int, data []byte) ([][]byte, error) { + recoveryThres, err := recoveryThreshold(validatorsQty) + if err != nil { + return nil, err + } + enc, err := reedsolomon.New(validatorsQty, recoveryThres) + if err != nil { + return nil, fmt.Errorf("creating new reed solomon failed: %w", err) + } + shards, err := enc.Split(data) + if err != nil { + return nil, err + } + err = enc.Encode(shards) + if err != nil { + return nil, err + } + + return shards, nil +} + +// Reconstruct the missing data from a set of chunks +func Reconstruct(validatorsQty, originalDataLen int, chunks [][]byte) ([]byte, error) { + recoveryThres, err := recoveryThreshold(validatorsQty) + if err != nil { + return nil, err + } + + enc, err := reedsolomon.New(validatorsQty, recoveryThres) + if err != nil { + return nil, err + } + err = enc.Reconstruct(chunks) + if err != nil { + return nil, err + } + buf := new(bytes.Buffer) + err = enc.Join(buf, chunks, originalDataLen) + return buf.Bytes(), err +} + +// recoveryThreshold gives the max number of shards/chunks that we can afford to lose and still construct +// the full initial data. Total number of chunks will be validatorQty + recoveryThreshold +func recoveryThreshold(validators int) (int, error) { + if validators <= 1 { + return 0, ErrNotEnoughValidators + } + + needed := (validators - 1) / 3 + + return needed + 1, nil +} diff --git a/lib/erasure/erasure_test.go b/lib/erasure/erasure_test.go new file mode 100644 index 00000000000..0cc08c41110 --- /dev/null +++ b/lib/erasure/erasure_test.go @@ -0,0 +1,132 @@ +// Copyright 2021 ChainSafe Systems (ON) +// SPDX-License-Identifier: LGPL-3.0-only + +package erasure + +import ( + "testing" + + "github.com/klauspost/reedsolomon" + "github.com/stretchr/testify/assert" +) + +var testData = []byte("this is a test of the erasure coding") +var expectedChunks = [][]byte{{116, 104, 105, 115}, {32, 105, 115, 32}, {97, 32, 116, 101}, {115, 116, 32, 111}, + {102, 32, 116, 104}, {101, 32, 101, 114}, {97, 115, 117, 114}, {101, 32, 99, 111}, {100, 105, 110, 103}, + {0, 0, 0, 0}, {133, 189, 154, 178}, {88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}} + +// erasure data missing chunks +var missing2Chunks = [][]byte{{116, 104, 105, 115}, {32, 105, 115, 32}, {}, {115, 116, 32, 111}, + {102, 32, 116, 104}, {101, 32, 101, 114}, {}, {101, 32, 99, 111}, {100, 105, 110, 103}, + {0, 0, 0, 0}, {133, 189, 154, 178}, {88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}} +var missing3Chunks = [][]byte{{116, 104, 105, 115}, {32, 105, 115, 32}, {}, {115, 116, 32, 111}, + {}, {101, 32, 101, 114}, {}, {101, 32, 99, 111}, {100, 105, 110, 103}, {0, 0, 0, 0}, {133, 189, 154, 178}, + {88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}} +var missing5Chunks = [][]byte{{}, {}, {}, {115, 116, 32, 111}, + {}, {101, 32, 101, 114}, {}, {101, 32, 99, 111}, {100, 105, 110, 103}, {0, 0, 0, 0}, {133, 189, 154, 178}, + {88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}} + +func TestObtainChunks(t *testing.T) { + type args struct { + validatorsQty int + data []byte + } + tests := map[string]struct { + args args + expectedValue [][]byte + expectedError error + }{ + "happy_path": { + args: args{ + validatorsQty: 10, + data: testData, + }, + expectedValue: expectedChunks, + }, + "nil_data": { + args: args{ + validatorsQty: 10, + data: nil, + }, + expectedError: reedsolomon.ErrShortData, + }, + "not_enough_validators": { + args: args{ + validatorsQty: 1, + data: testData, + }, + expectedError: ErrNotEnoughValidators, + }, + } + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + got, err := ObtainChunks(tt.args.validatorsQty, tt.args.data) + expectedThreshold, _ := recoveryThreshold(tt.args.validatorsQty) + if tt.expectedError != nil { + assert.EqualError(t, err, tt.expectedError.Error()) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.args.validatorsQty+expectedThreshold, len(got)) + } + assert.Equal(t, tt.expectedValue, got) + }) + } +} + +func TestReconstruct(t *testing.T) { + type args struct { + validatorsQty int + chunks [][]byte + } + tests := map[string]struct { + args + expectedData []byte + expectedChunks [][]byte + expectedError error + }{ + "missing_2_chunks": { + args: args{ + validatorsQty: 10, + chunks: missing2Chunks, + }, + expectedData: testData, + expectedChunks: expectedChunks, + }, + "missing_2_chunks,_validator_qty_3": { + args: args{ + validatorsQty: 3, + chunks: missing2Chunks, + }, + expectedError: reedsolomon.ErrTooFewShards, + expectedChunks: expectedChunks, + }, + "missing_3_chunks": { + args: args{ + validatorsQty: 10, + chunks: missing3Chunks, + }, + expectedData: testData, + expectedChunks: expectedChunks, + }, + "missing_5_chunks": { + args: args{ + validatorsQty: 10, + chunks: missing5Chunks, + }, + expectedChunks: missing5Chunks, + expectedError: reedsolomon.ErrTooFewShards, + }, + } + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + data, err := Reconstruct(tt.args.validatorsQty, len(testData), tt.args.chunks) + if tt.expectedError != nil { + assert.EqualError(t, err, tt.expectedError.Error()) + } else { + assert.NoError(t, err) + } + assert.Equal(t, tt.expectedData, data) + assert.Equal(t, tt.expectedChunks, tt.args.chunks) + }) + } +}