diff --git a/directory/basicdir.go b/directory/basicdir.go index e7c8e84..2026541 100644 --- a/directory/basicdir.go +++ b/directory/basicdir.go @@ -70,7 +70,7 @@ func (n UnixFSBasicDir) MapIterator() ipld.MapIterator { // can be expected that itr.Next will be called node.Length times // before itr.Done becomes true. func (n UnixFSBasicDir) ListIterator() ipld.ListIterator { - return nil + return iter.NewUnixFSDirListIterator(&_UnixFSBasicDir__ListItr{n._substrate.Links.Iterator()}, nil) } // Length returns the length of a list, or the number of entries in a map, diff --git a/hamt/shardeddir.go b/hamt/shardeddir.go index a273b29..31f0adc 100644 --- a/hamt/shardeddir.go +++ b/hamt/shardeddir.go @@ -256,7 +256,14 @@ func (itr *_UnixFSShardedDir__ListItr) Done() bool { // can be expected that itr.Next will be called node.Length times // before itr.Done becomes true. func (n UnixFSHAMTShard) ListIterator() ipld.ListIterator { - return nil + maxPadLen := maxPadLength(n.data) + listItr := &_UnixFSShardedDir__ListItr{ + _substrate: n.FieldLinks().Iterator(), + maxPadLen: maxPadLen, + nd: n, + } + st := stringTransformer{maxPadLen: maxPadLen} + return iter.NewUnixFSDirListIterator(listItr, st.transformNameNode) } // Length returns the length of a list, or the number of entries in a map, diff --git a/hamt/shardeddir_test.go b/hamt/shardeddir_test.go index fcc7601..a6e1b10 100644 --- a/hamt/shardeddir_test.go +++ b/hamt/shardeddir_test.go @@ -289,3 +289,83 @@ func TestIncompleteShardedIteration(t *testing.T) { req.Contains(blockNotFound, "/wiki/ICloud_Drive") req.Contains(blockNotFound, "/wiki/Édouard_Bamberger") } + +func TestIncompleteShardedListIteration(t *testing.T) { + ctx := context.Background() + req := require.New(t) + + fixture := "./fixtures/wikipedia-cryptographic-hash-function.car" + f, err := os.Open(fixture) + req.NoError(err) + defer f.Close() + carstore, err := storage.OpenReadable(f) + req.NoError(err) + lsys := cidlink.DefaultLinkSystem() + lsys.TrustedStorage = true + lsys.SetReadStorage(carstore) + + // classic recursive go-ipld-prime map iteration, being forgiving about + // NotFound block loads to see what we end up with + + kvs := make(map[string]string) + var iterNotFound int + blockNotFound := make(map[string]struct{}) + + var iter func(string, ipld.Link) + iter = func(dir string, lnk ipld.Link) { + nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, lnk, basicnode.Prototype.Any) + if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() { + // got a named link that we can't load + blockNotFound[dir] = struct{}{} + return + } + req.NoError(err) + if nd.Kind() == ipld.Kind_Bytes { + bv, err := nd.AsBytes() + req.NoError(err) + kvs[dir] = string(bv) + return + } + + nb := dagpb.Type.PBNode.NewBuilder() + req.NoError(nb.AssignNode(nd)) + pbn := nb.Build() + hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, pbn, &lsys) + req.NoError(err) + + mi := hamtShard.ListIterator() + for !mi.Done() { + _, v, err := mi.Next() + if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() { + // internal shard link that won't load, we don't know what it might + // point to + iterNotFound++ + continue + } + req.NoError(err) + pbLink, ok := v.(dagpb.PBLink) + req.True(ok) + req.True(pbLink.FieldName().Exists()) + ks := pbLink.FieldName().Must().String() + lv := pbLink.FieldHash().Link() + iter(dir+"/"+ks, lv) + } + } + // walk the tree + iter("", cidlink.Link{Cid: carstore.Roots()[0]}) + + req.Len(kvs, 1) + req.Contains(kvs, "/wiki/Cryptographic_hash_function") + req.Contains(kvs["/wiki/Cryptographic_hash_function"], "Cryptographic hash function\n") + req.Equal(iterNotFound, 570) // tried to load 570 blocks that were not in the CAR + req.Len(blockNotFound, 110) // 110 blocks, for named links, were not found in the CAR + // some of the root block links + req.Contains(blockNotFound, "/favicon.ico") + req.Contains(blockNotFound, "/index.html") + req.Contains(blockNotFound, "/zimdump_version") + // some of the shard links + req.Contains(blockNotFound, "/wiki/UK_railway_Signal") + req.Contains(blockNotFound, "/wiki/Australian_House") + req.Contains(blockNotFound, "/wiki/ICloud_Drive") + req.Contains(blockNotFound, "/wiki/Édouard_Bamberger") +} diff --git a/iter/iter.go b/iter/iter.go index 3be099d..da704a9 100644 --- a/iter/iter.go +++ b/iter/iter.go @@ -3,6 +3,8 @@ package iter import ( dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/fluent/qp" ) // pbLinkItr behaves like an list of links iterator, even thought the HAMT behavior is more complicated @@ -52,6 +54,47 @@ func (itr *UnixFSDir__MapItr) Done() bool { return itr._substrate.Done() } +func NewUnixFSDirListIterator(itr pbLinkItr, transformName TransformNameFunc) ipld.ListIterator { + return &UnixFSDir__ListItr{itr, transformName} +} + +func (itr *UnixFSDir__ListItr) Next() (i int64, v ipld.Node, err error) { + i, next, err := itr._substrate.Next() + if err != nil { + return i, nil, err + } + if next == nil { + return i, nil, ipld.ErrIteratorOverread{} + } + if next.FieldName().Exists() { + name := next.FieldName().Must() + if itr.transformName != nil { + name = itr.transformName(name) + } + nd, err := qp.BuildMap(dagpb.Type.PBLink, 3, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "Name", qp.Node(name)) + if next.FieldTsize().Exists() { + qp.MapEntry(ma, "Tsize", qp.Node(next.FieldTsize().Must())) + } + qp.MapEntry(ma, "Hash", qp.Node(next.FieldHash())) + }) + if err != nil { + return i, nil, err + } + return i, nd, nil + } + return i, next, nil +} + +func (itr *UnixFSDir__ListItr) Done() bool { + return itr._substrate.Done() +} + +type UnixFSDir__ListItr struct { + _substrate pbLinkItr + transformName TransformNameFunc +} + type UnixFSDir__Itr struct { _substrate pbLinkItr transformName TransformNameFunc