From 6b4cd0c25354d14fcf548ffdf3b0ed14b5a1490c Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Fri, 19 May 2023 16:14:55 +0100 Subject: [PATCH] Return content not found error when HTTP publisher responds with 404 The entry chunks for which content is not found are skipped by the indexer. In HTTP publisher, non-existing content returns 404. However, the error returned by the syncer does not allow the "content not found" error to bubble up to ingest pipeline. Therefore, the indexer cannot skip such CIDs from HTTP publishers. In HTTP Syncer check the response status code and if 404 return relevant errors for upstream error handling. Relates to: - https://github.com/ipni/storetheindex/issues/1771 --- dagsync/httpsync/sync.go | 14 +++++++++--- dagsync/httpsync/sync_test.go | 41 +++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/dagsync/httpsync/sync.go b/dagsync/httpsync/sync.go index 4e81465..b04173a 100644 --- a/dagsync/httpsync/sync.go +++ b/dagsync/httpsync/sync.go @@ -249,11 +249,19 @@ nextURL: } defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { + switch resp.StatusCode { + case http.StatusNotFound: + log.Errorw("Block not found from HTTP publisher", "resource", rsrc) + // Include the string "content not found" so that indexers that have not + // upgraded gracefully handle the error case. Because, this string is + // being checked already. + return fmt.Errorf("content not found: %w", ipld.ErrNotExists{}) + case http.StatusOK: + log.Debugw("Found block from HTTP publisher", "resource", rsrc) + return cb(resp.Body) + default: return fmt.Errorf("non success http fetch response at %s: %d", localURL.String(), resp.StatusCode) } - - return cb(resp.Body) } // fetchBlock fetches an item into the datastore at c if not locally available. diff --git a/dagsync/httpsync/sync_test.go b/dagsync/httpsync/sync_test.go index a119fb7..58b451d 100644 --- a/dagsync/httpsync/sync_test.go +++ b/dagsync/httpsync/sync_test.go @@ -3,6 +3,7 @@ package httpsync_test import ( "context" "crypto/rand" + "io" "net/http" "net/http/httptest" "net/url" @@ -13,7 +14,9 @@ import ( "github.com/ipld/go-ipld-prime" _ "github.com/ipld/go-ipld-prime/codec/dagjson" _ "github.com/ipld/go-ipld-prime/codec/raw" + "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/fluent" + "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" basicnode "github.com/ipld/go-ipld-prime/node/basic" "github.com/ipld/go-ipld-prime/storage/memstore" @@ -24,6 +27,7 @@ import ( "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multiaddr" "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-multihash" "github.com/stretchr/testify/require" ) @@ -176,3 +180,40 @@ func TestHttpsync_AcceptsSpecCompliantDagJson(t *testing.T) { require.NoError(t, err) require.Equal(t, gotLink, wantLink, "computed %s but got %s", gotLink.String(), wantLink.String()) } + +func TestHttpsync_NotFoundReturnsContentNotFoundErr(t *testing.T) { + ctx := context.Background() + + pubPrK, _, err := crypto.GenerateKeyPairWithReader(crypto.RSA, 2048, rand.Reader) + require.NoError(t, err) + pubID, err := peer.IDFromPrivateKey(pubPrK) + require.NoError(t, err) + + // Instantiate a dagsync publisher. + publs := cidlink.DefaultLinkSystem() + + publs.StorageReadOpener = func(lnkCtx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) { + return nil, ipld.ErrNotExists{} + } + + pub, err := httpsync.NewPublisher("0.0.0.0:0", publs, pubPrK) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, pub.Close()) }) + + ls := cidlink.DefaultLinkSystem() + store := &memstore.Store{} + ls.SetWriteStorage(store) + ls.SetReadStorage(store) + + sync := httpsync.NewSync(ls, http.DefaultClient, nil) + syncer, err := sync.NewSyncer(pubID, pub.Addrs(), nil) + require.NoError(t, err) + + mh, err := multihash.Sum([]byte("fish"), multihash.SHA2_256, -1) + require.NoError(t, err) + nonExistingCid := cid.NewCidV1(cid.Raw, mh) + + err = syncer.Sync(ctx, nonExistingCid, selectorparse.CommonSelector_MatchPoint) + require.NotNil(t, err) + require.Contains(t, err.Error(), "content not found") +}