diff --git a/share/availability/full/availability_test.go b/share/availability/full/availability_test.go index a769c981c4..f3ce4d5e6f 100644 --- a/share/availability/full/availability_test.go +++ b/share/availability/full/availability_test.go @@ -21,7 +21,7 @@ func TestShareAvailableOverMocknet_Full(t *testing.T) { defer cancel() net := availability_test.NewTestDAGNet(ctx, t) - _, root := RandNode(net, 32) + _, root := RandNode(net, 32, FullyRecoverable) nd := Node(net) net.ConnectAll() diff --git a/share/availability/full/reconstruction_test.go b/share/availability/full/reconstruction_test.go index f3b6ce91bd..933b25040f 100644 --- a/share/availability/full/reconstruction_test.go +++ b/share/availability/full/reconstruction_test.go @@ -4,6 +4,7 @@ package full import ( "context" + "errors" "sync" "testing" "time" @@ -26,49 +27,99 @@ func init() { // light nodes only. func TestShareAvailable_OneFullNode(t *testing.T) { // NOTE: Numbers are taken from the original 'Fraud and Data Availability Proofs' paper - light.DefaultSampleAmount = 20 // s - const ( - origSquareSize = 16 // k - lightNodes = 69 // c - ) - - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) - defer cancel() - - net := availability_test.NewTestDAGNet(ctx, t) - source, root := RandNode(net, origSquareSize) // make a source node, a.k.a bridge - full := Node(net) // make a full availability service which reconstructs data - - // ensure there is no connection between source and full nodes - // so that full reconstructs from the light nodes only - net.Disconnect(source.ID(), full.ID()) - - errg, errCtx := errgroup.WithContext(ctx) - errg.Go(func() error { - return full.SharesAvailable(errCtx, root) - }) - - lights := make([]*availability_test.TestNode, lightNodes) - for i := 0; i < len(lights); i++ { - lights[i] = light.Node(net) - go func(i int) { - err := lights[i].SharesAvailable(ctx, root) - if err != nil { - t.Log("light errors:", err) - } - }(i) + tc := []struct { + name string + origSquareSize int // k + lightNodes int // c + sampleAmount uint // s + recoverability Recoverability + expectedFailure bool + }{ + { + name: "fully recoverable", + origSquareSize: 16, + lightNodes: 24, // ~99% chance of recoverability + sampleAmount: 20, + recoverability: FullyRecoverable, + expectedFailure: false, + }, + { + name: "fully recoverable, not enough LNs", + origSquareSize: 16, + lightNodes: 19, // ~0.7% chance of recoverability + sampleAmount: 20, + recoverability: FullyRecoverable, + expectedFailure: true, + }, + { + name: "barely recoverable", + origSquareSize: 16, + lightNodes: 230, // 99% chance of recoverability + sampleAmount: 20, + recoverability: BarelyRecoverable, + expectedFailure: false, + }, + { + name: "barely recoverable, not enough LNs", + origSquareSize: 16, + lightNodes: 22, // ~0.3% chance of recoverability + sampleAmount: 20, + recoverability: BarelyRecoverable, + expectedFailure: true, + }, + { + name: "unrecoverable", + origSquareSize: 16, + lightNodes: 230, + sampleAmount: 20, + recoverability: Unrecoverable, + expectedFailure: true, + }, } - for i := 0; i < len(lights); i++ { - net.Connect(lights[i].ID(), source.ID()) - } + for _, tt := range tc { + tt := tt + t.Run(tt.name, func(t *testing.T) { + light.DefaultSampleAmount = tt.sampleAmount // s + origSquareSize := tt.origSquareSize // k + lightNodes := tt.lightNodes // c + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + net := availability_test.NewTestDAGNet(ctx, t) + // make a source node, a.k.a bridge + source, root := RandNode(net, origSquareSize, tt.recoverability) + // make a full availability service which reconstructs data + full := Node(net) + + // ensure there is no connection between source and full nodes + // so that full reconstructs from the light nodes only + net.Disconnect(source.ID(), full.ID()) + + lights := make([]*availability_test.TestNode, lightNodes) + for i := 0; i < len(lights); i++ { + lights[i] = light.Node(net) + // Form topology + net.Connect(lights[i].ID(), source.ID()) + net.Connect(lights[i].ID(), full.ID()) + // Start sampling + go func(i int) { + err := lights[i].SharesAvailable(ctx, root) + if err != nil { + t.Log("light errors:", err) + } + }(i) + } - for i := 0; i < len(lights); i++ { - net.Connect(lights[i].ID(), full.ID()) + err := full.SharesAvailable(ctx, root) + if tt.expectedFailure { + require.Error(t, err) + } else { + require.NoError(t, err) + } + }) } - - err := errg.Wait() - require.NoError(t, err) } // TestShareAvailable_ConnectedFullNodes asserts that two connected full nodes @@ -79,79 +130,133 @@ func TestShareAvailable_OneFullNode(t *testing.T) { // source, full node must be able to finish the availability process started in // the beginning. func TestShareAvailable_ConnectedFullNodes(t *testing.T) { - // NOTE: Numbers are taken from the original 'Fraud and Data Availability Proofs' paper - light.DefaultSampleAmount = 20 // s - const ( - origSquareSize = 16 // k - lightNodes = 60 // c - ) - - ctx, cancel := context.WithTimeout(context.Background(), time.Second*30) - defer cancel() - - net := availability_test.NewTestDAGNet(ctx, t) - source, root := RandNode(net, origSquareSize) - - // create two full nodes and ensure they are disconnected - full1 := Node(net) - full2 := Node(net) - - // pre-connect fulls - net.Connect(full1.ID(), full2.ID()) - // ensure fulls and source are not connected - // so that fulls take data from light nodes only - net.Disconnect(full1.ID(), source.ID()) - net.Disconnect(full2.ID(), source.ID()) - - // start reconstruction for fulls - errg, errCtx := errgroup.WithContext(ctx) - errg.Go(func() error { - return full1.SharesAvailable(errCtx, root) - }) - errg.Go(func() error { - return full2.SharesAvailable(errCtx, root) - }) - - // create light nodes and start sampling for them immediately - lights1, lights2 := make( - []*availability_test.TestNode, lightNodes/2), - make([]*availability_test.TestNode, lightNodes/2) - for i := 0; i < len(lights1); i++ { - lights1[i] = light.Node(net) - go func(i int) { - err := lights1[i].SharesAvailable(ctx, root) - if err != nil { - t.Log("light1 errors:", err) + tc := []struct { + name string + origSquareSize int // k + lightNodes int // c + sampleAmount uint // s + recoverability Recoverability + expectedFailure bool + }{ + { + name: "fully recoverable", + origSquareSize: 16, + lightNodes: 24, + sampleAmount: 20, + recoverability: FullyRecoverable, + expectedFailure: false, + }, + { + name: "fully recoverable, not enough LNs", + origSquareSize: 16, + lightNodes: 19, // ~0.7% chance of recoverability + sampleAmount: 20, + recoverability: FullyRecoverable, + expectedFailure: true, + }, + // NOTE: This test contains cases for barely recoverable but + // DisconnectedFullNodes does not. The reasoning for this is that + // DisconnectedFullNodes has the additional contstraint that the data + // should not be reconstructable from a single subnetwork, while this + // test only tests that the data is reconstructable once the subnetworks + // are connected. + { + name: "barely recoverable", + origSquareSize: 16, + lightNodes: 230, // 99% chance of recoverability + sampleAmount: 20, + recoverability: BarelyRecoverable, + expectedFailure: false, + }, + { + name: "barely recoverable, not enough LNs", + origSquareSize: 16, + lightNodes: 22, // ~0.3% chance of recoverability + sampleAmount: 20, + recoverability: BarelyRecoverable, + expectedFailure: true, + }, + } + + for _, tt := range tc { + tt := tt + t.Run(tt.name, func(t *testing.T) { + light.DefaultSampleAmount = tt.sampleAmount // s + origSquareSize := tt.origSquareSize // k + lightNodes := tt.lightNodes // c + + ctx, cancel := context.WithTimeout(context.Background(), time.Second*60) + defer cancel() + + net := availability_test.NewTestDAGNet(ctx, t) + source, root := RandNode(net, origSquareSize, tt.recoverability) + + // create two full nodes and ensure they are disconnected + full1 := Node(net) + full2 := Node(net) + + // pre-connect fulls + net.Connect(full1.ID(), full2.ID()) + // ensure fulls and source are not connected + // so that fulls take data from light nodes only + net.Disconnect(full1.ID(), source.ID()) + net.Disconnect(full2.ID(), source.ID()) + + // start reconstruction for fulls + errg, errCtx := errgroup.WithContext(ctx) + errg.Go(func() error { + return full1.SharesAvailable(errCtx, root) + }) + errg.Go(func() error { + return full2.SharesAvailable(errCtx, root) + }) + + // create light nodes and start sampling for them immediately + lights1, lights2 := make( + []*availability_test.TestNode, lightNodes/2), + make([]*availability_test.TestNode, lightNodes/2) + for i := 0; i < len(lights1); i++ { + lights1[i] = light.Node(net) + go func(i int) { + err := lights1[i].SharesAvailable(ctx, root) + if err != nil { + t.Log("light1 errors:", err) + } + }(i) + + lights2[i] = light.Node(net) + go func(i int) { + err := lights2[i].SharesAvailable(ctx, root) + if err != nil { + t.Log("light2 errors:", err) + } + }(i) } - }(i) - lights2[i] = light.Node(net) - go func(i int) { - err := lights2[i].SharesAvailable(ctx, root) - if err != nil { - t.Log("light2 errors:", err) + // shape topology + for i := 0; i < len(lights1); i++ { + // ensure lights1 are only connected to full1 + net.Connect(lights1[i].ID(), full1.ID()) + net.Disconnect(lights1[i].ID(), full2.ID()) + // ensure lights2 are only connected to full2 + net.Connect(lights2[i].ID(), full2.ID()) + net.Disconnect(lights2[i].ID(), full1.ID()) } - }(i) - } - // shape topology - for i := 0; i < len(lights1); i++ { - // ensure lights1 are only connected to full1 - net.Connect(lights1[i].ID(), full1.ID()) - net.Disconnect(lights1[i].ID(), full2.ID()) - // ensure lights2 are only connected to full2 - net.Connect(lights2[i].ID(), full2.ID()) - net.Disconnect(lights2[i].ID(), full1.ID()) - } + // start connection lights with sources + for i := 0; i < len(lights1); i++ { + net.Connect(lights1[i].ID(), source.ID()) + net.Connect(lights2[i].ID(), source.ID()) + } - // start connection lights with sources - for i := 0; i < len(lights1); i++ { - net.Connect(lights1[i].ID(), source.ID()) - net.Connect(lights2[i].ID(), source.ID()) + err := errg.Wait() + if tt.expectedFailure { + require.Error(t, err) + } else { + require.NoError(t, err) + } + }) } - - err := errg.Wait() - require.NoError(t, err) } // TestShareAvailable_DisconnectedFullNodes asserts that two disconnected full @@ -176,100 +281,160 @@ func TestShareAvailable_DisconnectedFullNodes(t *testing.T) { // └─┴─┤ ├─┴─┘ // F└───┘F // - - // NOTE: Numbers are taken from the original 'Fraud and Data Availability Proofs' paper - light.DefaultSampleAmount = 20 // s - const ( - origSquareSize = 16 // k - lightNodes = 32 // c - total number of nodes on two subnetworks - ) - - ctx, cancel := context.WithTimeout(context.Background(), time.Second*60) - defer cancel() - - net := availability_test.NewTestDAGNet(ctx, t) - source, root := RandNode(net, origSquareSize) - - // create light nodes and start sampling for them immediately - lights1, lights2 := make( - []*availability_test.TestNode, lightNodes/2), - make([]*availability_test.TestNode, lightNodes/2) - - var wg sync.WaitGroup - wg.Add(lightNodes) - for i := 0; i < len(lights1); i++ { - lights1[i] = light.Node(net) - go func(i int) { - defer wg.Done() - err := lights1[i].SharesAvailable(ctx, root) - if err != nil { - t.Log("light1 errors:", err) + tc := []struct { + name string + origSquareSize int // k + lightNodes int // c + sampleAmount uint // s + recoverability Recoverability + expectedFailure bool + }{ + // NOTE: The number of LNs must be even, otherwise the WaitGroup will hang. + { + name: "fully recoverable", + origSquareSize: 16, + lightNodes: 24, // ~99% chance of recoverability + sampleAmount: 20, + recoverability: FullyRecoverable, + expectedFailure: false, + }, + { + name: "fully recoverable, not enough LNs", + origSquareSize: 16, + lightNodes: 18, // ~0.03% chance of recoverability + sampleAmount: 20, + recoverability: FullyRecoverable, + expectedFailure: true, + }, + { + name: "unrecoverable", + origSquareSize: 16, + lightNodes: 230, + sampleAmount: 20, + recoverability: Unrecoverable, + expectedFailure: true, + }, + } + for _, tt := range tc { + tt := tt + t.Run(tt.name, func(t *testing.T) { + light.DefaultSampleAmount = tt.sampleAmount // s + origSquareSize := tt.origSquareSize // k + lightNodes := tt.lightNodes // c + + ctx, cancel := context.WithTimeout(context.Background(), time.Second*60) + defer cancel() + + net := availability_test.NewTestDAGNet(ctx, t) + source, root := RandNode(net, origSquareSize, tt.recoverability) + + // create light nodes and start sampling for them immediately + lights1, lights2 := make( + []*availability_test.TestNode, lightNodes/2), + make([]*availability_test.TestNode, lightNodes/2) + + var wg sync.WaitGroup + wg.Add(lightNodes) + for i := 0; i < len(lights1); i++ { + lights1[i] = light.Node(net) + go func(i int) { + defer wg.Done() + err := lights1[i].SharesAvailable(ctx, root) + if err != nil { + t.Log("light1 errors:", err) + } + }(i) + + lights2[i] = light.Node(net) + go func(i int) { + defer wg.Done() + err := lights2[i].SharesAvailable(ctx, root) + if err != nil { + t.Log("light2 errors:", err) + } + }(i) } - }(i) - - lights2[i] = light.Node(net) - go func(i int) { - defer wg.Done() - err := lights2[i].SharesAvailable(ctx, root) - if err != nil { - t.Log("light2 errors:", err) + + // create two full nodes and ensure they are disconnected + full1 := Node(net) + full2 := Node(net) + net.Disconnect(full1.ID(), full2.ID()) + + // ensure fulls and source are not connected + // so that fulls take data from light nodes only + net.Disconnect(full1.ID(), source.ID()) + net.Disconnect(full2.ID(), source.ID()) + + // shape topology + for i := 0; i < len(lights1); i++ { + // ensure lights1 are only connected to source and full1 + net.Connect(lights1[i].ID(), source.ID()) + net.Connect(lights1[i].ID(), full1.ID()) + net.Disconnect(lights1[i].ID(), full2.ID()) + // ensure lights2 are only connected to source and full2 + net.Connect(lights2[i].ID(), source.ID()) + net.Connect(lights2[i].ID(), full2.ID()) + net.Disconnect(lights2[i].ID(), full1.ID()) } - }(i) - } - // create two full nodes and ensure they are disconnected - full1 := Node(net) - full2 := Node(net) - net.Disconnect(full1.ID(), full2.ID()) - - // ensure fulls and source are not connected - // so that fulls take data from light nodes only - net.Disconnect(full1.ID(), source.ID()) - net.Disconnect(full2.ID(), source.ID()) - - // shape topology - for i := 0; i < len(lights1); i++ { - // ensure lights1 are only connected to source and full1 - net.Connect(lights1[i].ID(), source.ID()) - net.Connect(lights1[i].ID(), full1.ID()) - net.Disconnect(lights1[i].ID(), full2.ID()) - // ensure lights2 are only connected to source and full2 - net.Connect(lights2[i].ID(), source.ID()) - net.Connect(lights2[i].ID(), full2.ID()) - net.Disconnect(lights2[i].ID(), full1.ID()) + // start reconstruction for fulls that should fail + ctxErr, cancelErr := context.WithTimeout(ctx, time.Second*5) + errg, errCtx := errgroup.WithContext(ctxErr) + errg.Go(func() error { + err := full1.SharesAvailable(errCtx, root) + if err == nil { + return errors.New("full1 should not be able to reconstruct") + } + // this is a trick to ensure that BOTH fulls fail with this error using a single errgroup. + if err != share.ErrNotAvailable { + return err + } + return nil + }) + errg.Go(func() error { + err := full2.SharesAvailable(errCtx, root) + if err == nil { + return errors.New("full2 should not be able to reconstruct") + } + // this is a trick to ensure that BOTH fulls fail with this error using a single errgroup. + if err != share.ErrNotAvailable { + return err + } + return nil + }) + + // check that any of the fulls cannot reconstruct on their own + err := errg.Wait() + require.NoError(t, err) + cancelErr() + + // but after they connect + net.Connect(full1.ID(), full2.ID()) + + // we clear the blockservices not because we need to, but just to + // show its possible to reconstruct without any previously saved + // data from previous attempts. + full1.ClearStorage() + full2.ClearStorage() + + // they both should be able to reconstruct the block + errg, bctx := errgroup.WithContext(ctx) + errg.Go(func() error { + return full1.SharesAvailable(bctx, root) + }) + errg.Go(func() error { + return full2.SharesAvailable(bctx, root) + }) + + err = errg.Wait() + if tt.expectedFailure { + require.Error(t, err) + } else { + require.NoError(t, err) + } + // wait for all routines to finish before exit, in case there are any errors to log + wg.Wait() + }) } - // start reconstruction for fulls that should fail - ctxErr, cancelErr := context.WithTimeout(ctx, time.Second*5) - errg, errCtx := errgroup.WithContext(ctxErr) - errg.Go(func() error { - return full1.SharesAvailable(errCtx, root) - }) - errg.Go(func() error { - return full2.SharesAvailable(errCtx, root) - }) - - // check that any of the fulls cannot reconstruct on their own - err := errg.Wait() - require.ErrorIs(t, err, share.ErrNotAvailable) - cancelErr() - - // but after they connect - net.Connect(full1.ID(), full2.ID()) - - // with clean caches from the previous try - full1.ClearStorage() - full2.ClearStorage() - - // they both should be able to reconstruct the block - errg, bctx := errgroup.WithContext(ctx) - errg.Go(func() error { - return full1.SharesAvailable(bctx, root) - }) - errg.Go(func() error { - return full2.SharesAvailable(bctx, root) - }) - require.NoError(t, errg.Wait()) - // wait for all routines to finish before exit, in case there are any errors to log - wg.Wait() } diff --git a/share/availability/full/testing.go b/share/availability/full/testing.go index a636b26ea6..35b9377ad8 100644 --- a/share/availability/full/testing.go +++ b/share/availability/full/testing.go @@ -26,10 +26,39 @@ func GetterWithRandSquare(t *testing.T, n int) (share.Getter, *share.Root) { return getter, availability_test.RandFillBS(t, n, bServ) } +type Recoverability int64 + +const ( + // FullyRecoverable makes all EDS shares available when filling the + // blockservice. + FullyRecoverable Recoverability = iota + // BarelyRecoverable withholds the (k + 1)^2 subsquare of the 2k^2 EDS, + // minus the node at (k + 1, k + 1) which allows for complete + // recoverability. + BarelyRecoverable + // Unrecoverable withholds the (k + 1)^2 subsquare of the 2k^2 EDS. + Unrecoverable +) + // RandNode creates a Full Node filled with a random block of the given size. -func RandNode(dn *availability_test.TestDagNet, squareSize int) (*availability_test.TestNode, *share.Root) { +func RandNode( + dn *availability_test.TestDagNet, + squareSize int, + recoverability Recoverability, +) (*availability_test.TestNode, *share.Root) { nd := Node(dn) - return nd, availability_test.RandFillBS(dn.T, squareSize, nd.BlockService) + var root *share.Root + switch recoverability { + case FullyRecoverable: + root = availability_test.RandFillBS(dn.T, squareSize, nd.BlockService) + case BarelyRecoverable: + root = availability_test.FillWithheldBS(dn.T, squareSize, nd.BlockService, true) + case Unrecoverable: + root = availability_test.FillWithheldBS(dn.T, squareSize, nd.BlockService, false) + default: + panic("invalid recoverability given") + } + return nd, root } // Node creates a new empty Full Node. diff --git a/share/availability/test/testing.go b/share/availability/test/testing.go index 64e8d23bb7..595e391f1a 100644 --- a/share/availability/test/testing.go +++ b/share/availability/test/testing.go @@ -22,6 +22,17 @@ import ( "github.com/celestiaorg/celestia-node/share/sharetest" ) +// FillWithheldBS makes k + 1 columns of k + 1 rows unavailable if recoverable = +// false, otherwise it acts the same but keeps the node at index (k + 1, k + 1). +func FillWithheldBS(t *testing.T, n int, bServ blockservice.BlockService, recoverable bool) *share.Root { + shares := sharetest.RandShares(t, n*n) + eds, err := AddAndWithholdShares(context.TODO(), shares, bServ, recoverable) + require.NoError(t, err) + dah, err := share.NewRoot(eds) + require.NoError(t, err) + return dah +} + // RandFillBS fills the given BlockService with a random block of a given size. func RandFillBS(t *testing.T, n int, bServ blockservice.BlockService) *share.Root { shares := sharetest.RandShares(t, n*n) diff --git a/share/availability/test/withholding_adder.go b/share/availability/test/withholding_adder.go new file mode 100644 index 0000000000..1905867d42 --- /dev/null +++ b/share/availability/test/withholding_adder.go @@ -0,0 +1,79 @@ +package availability_test + +import ( + "context" + "fmt" + + "github.com/ipfs/boxo/blockservice" + + "github.com/celestiaorg/celestia-app/pkg/da" + "github.com/celestiaorg/celestia-app/pkg/wrapper" + "github.com/celestiaorg/nmt" + "github.com/celestiaorg/rsmt2d" + + "github.com/celestiaorg/celestia-node/libs/utils" + "github.com/celestiaorg/celestia-node/share" + "github.com/celestiaorg/celestia-node/share/ipld" +) + +// AddShares erasures and extends shares to blockservice.BlockService using the provided +// ipld.NodeAdder. +func AddAndWithholdShares( + ctx context.Context, + shares []share.Share, + adder blockservice.BlockService, + recoverable bool, +) (*rsmt2d.ExtendedDataSquare, error) { + if len(shares) == 0 { + return nil, fmt.Errorf("empty data") // empty block is not an empty Data + } + squareSize := int(utils.SquareSize(len(shares))) + withheldSize := squareSize + 1 + // create nmt adder wrapping batch adder with calculated size + batchAdder := ipld.NewNmtNodeAdder(ctx, adder, ipld.MaxSizeBatchOption(squareSize*2)) + // create the nmt wrapper to generate row and col commitments + // recompute the eds + eds, err := rsmt2d.ComputeExtendedDataSquare( + shares, + share.DefaultRSMT2DCodec(), + wrapper.NewConstructor(uint64(squareSize), + nmt.NodeVisitor(batchAdder.Visit)), + ) + if err != nil { + return nil, fmt.Errorf("failure to recompute the extended data square: %w", err) + } + // compute roots + _, err = eds.RowRoots() + if err != nil { + return nil, err + } + // commit the batch to ipfs + err = batchAdder.Commit() + if err != nil { + return nil, fmt.Errorf("failure to commit the batch: %w", err) + } + dah, err := da.NewDataAvailabilityHeader(eds) + if err != nil { + return nil, fmt.Errorf("failure to create data availability header: %w", err) + } + + // remove blocks from blockservice + for i := 0; i < withheldSize; i++ { + for j := 0; j < withheldSize; j++ { + // leave the last block in the last row and column if data should be left recoverable + if recoverable && i == withheldSize-1 && j == withheldSize-1 { + continue + } + root, idx := ipld.Translate(&dah, i, j) + block, err := ipld.GetLeaf(ctx, adder, root, idx, len(dah.RowRoots)) + if err != nil { + return nil, fmt.Errorf("failure to get leaf: %w", err) + } + err = adder.DeleteBlock(ctx, block.Cid()) + if err != nil { + return nil, fmt.Errorf("failure to delete block: %w", err) + } + } + } + return eds, nil +}