Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide list iterator interface #51

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion directory/basicdir.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (n UnixFSBasicDir) MapIterator() ipld.MapIterator {
// can be expected that itr.Next will be called node.Length times
// before itr.Done becomes true.
func (n UnixFSBasicDir) ListIterator() ipld.ListIterator {
return nil
return iter.NewUnixFSDirListIterator(&_UnixFSBasicDir__ListItr{n._substrate.Links.Iterator()}, nil)
}

// Length returns the length of a list, or the number of entries in a map,
Expand Down
9 changes: 8 additions & 1 deletion hamt/shardeddir.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,14 @@ func (itr *_UnixFSShardedDir__ListItr) Done() bool {
// can be expected that itr.Next will be called node.Length times
// before itr.Done becomes true.
func (n UnixFSHAMTShard) ListIterator() ipld.ListIterator {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we want to do this as a list -

The IPLD node interface for a directory will be of kind Map, so by the IPLD contract i would expect that asking for a list iterator returns `nil.

A non-ipld interface + method might be clearer for accessing this functionality.

return nil
maxPadLen := maxPadLength(n.data)
listItr := &_UnixFSShardedDir__ListItr{
_substrate: n.FieldLinks().Iterator(),
maxPadLen: maxPadLen,
nd: n,
}
st := stringTransformer{maxPadLen: maxPadLen}
return iter.NewUnixFSDirListIterator(listItr, st.transformNameNode)
}

// Length returns the length of a list, or the number of entries in a map,
Expand Down
80 changes: 80 additions & 0 deletions hamt/shardeddir_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,83 @@ func TestIncompleteShardedIteration(t *testing.T) {
req.Contains(blockNotFound, "/wiki/ICloud_Drive")
req.Contains(blockNotFound, "/wiki/Édouard_Bamberger")
}

func TestIncompleteShardedListIteration(t *testing.T) {
ctx := context.Background()
req := require.New(t)

fixture := "./fixtures/wikipedia-cryptographic-hash-function.car"
f, err := os.Open(fixture)
req.NoError(err)
defer f.Close()
carstore, err := storage.OpenReadable(f)
req.NoError(err)
lsys := cidlink.DefaultLinkSystem()
lsys.TrustedStorage = true
lsys.SetReadStorage(carstore)

// classic recursive go-ipld-prime map iteration, being forgiving about
// NotFound block loads to see what we end up with

kvs := make(map[string]string)
var iterNotFound int
blockNotFound := make(map[string]struct{})

var iter func(string, ipld.Link)
iter = func(dir string, lnk ipld.Link) {
nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, lnk, basicnode.Prototype.Any)
if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
// got a named link that we can't load
blockNotFound[dir] = struct{}{}
return
}
req.NoError(err)
if nd.Kind() == ipld.Kind_Bytes {
bv, err := nd.AsBytes()
req.NoError(err)
kvs[dir] = string(bv)
return
}

nb := dagpb.Type.PBNode.NewBuilder()
req.NoError(nb.AssignNode(nd))
pbn := nb.Build()
hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, pbn, &lsys)
req.NoError(err)

mi := hamtShard.ListIterator()
for !mi.Done() {
_, v, err := mi.Next()
if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
// internal shard link that won't load, we don't know what it might
// point to
iterNotFound++
continue
}
req.NoError(err)
pbLink, ok := v.(dagpb.PBLink)
req.True(ok)
req.True(pbLink.FieldName().Exists())
ks := pbLink.FieldName().Must().String()
lv := pbLink.FieldHash().Link()
iter(dir+"/"+ks, lv)
}
}
// walk the tree
iter("", cidlink.Link{Cid: carstore.Roots()[0]})

req.Len(kvs, 1)
req.Contains(kvs, "/wiki/Cryptographic_hash_function")
req.Contains(kvs["/wiki/Cryptographic_hash_function"], "<title>Cryptographic hash function</title>\n")
req.Equal(iterNotFound, 570) // tried to load 570 blocks that were not in the CAR
req.Len(blockNotFound, 110) // 110 blocks, for named links, were not found in the CAR
// some of the root block links
req.Contains(blockNotFound, "/favicon.ico")
req.Contains(blockNotFound, "/index.html")
req.Contains(blockNotFound, "/zimdump_version")
// some of the shard links
req.Contains(blockNotFound, "/wiki/UK_railway_Signal")
req.Contains(blockNotFound, "/wiki/Australian_House")
req.Contains(blockNotFound, "/wiki/ICloud_Drive")
req.Contains(blockNotFound, "/wiki/Édouard_Bamberger")
}
43 changes: 43 additions & 0 deletions iter/iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package iter
import (
dagpb "github.com/ipld/go-codec-dagpb"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/datamodel"
"github.com/ipld/go-ipld-prime/fluent/qp"
)

// pbLinkItr behaves like an list of links iterator, even thought the HAMT behavior is more complicated
Expand Down Expand Up @@ -52,6 +54,47 @@ func (itr *UnixFSDir__MapItr) Done() bool {
return itr._substrate.Done()
}

func NewUnixFSDirListIterator(itr pbLinkItr, transformName TransformNameFunc) ipld.ListIterator {
return &UnixFSDir__ListItr{itr, transformName}
}

func (itr *UnixFSDir__ListItr) Next() (i int64, v ipld.Node, err error) {
i, next, err := itr._substrate.Next()
if err != nil {
return i, nil, err
}
if next == nil {
return i, nil, ipld.ErrIteratorOverread{}
}
if next.FieldName().Exists() {
name := next.FieldName().Must()
if itr.transformName != nil {
name = itr.transformName(name)
}
nd, err := qp.BuildMap(dagpb.Type.PBLink, 3, func(ma datamodel.MapAssembler) {
qp.MapEntry(ma, "Name", qp.Node(name))
if next.FieldTsize().Exists() {
qp.MapEntry(ma, "Tsize", qp.Node(next.FieldTsize().Must()))
}
qp.MapEntry(ma, "Hash", qp.Node(next.FieldHash()))
})
if err != nil {
return i, nil, err
}
return i, nd, nil
}
return i, next, nil
}

func (itr *UnixFSDir__ListItr) Done() bool {
return itr._substrate.Done()
}

type UnixFSDir__ListItr struct {
_substrate pbLinkItr
transformName TransformNameFunc
}

type UnixFSDir__Itr struct {
_substrate pbLinkItr
transformName TransformNameFunc
Expand Down