From 85fcf249d8fed3eda6c6ec7c6966a4ec98bd5344 Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Sun, 10 Oct 2021 12:09:47 +0200 Subject: [PATCH 1/8] storage: start new API based on feature detection. This will eventually rival and replace linking.BlockWriteOpener and linking.BlockReadOpener, being more primitive than them, and easier to implement without importing any go-ipld-prime types. (The eventual migration story should be smooth, though; we'll probably support both styles at once for a while.) It also explicitly does not acknolwedge the interfaces from the github.com/ipfs/go-datastore module. We're seeking several key improvements over those, including: - slimmer core; more feature-detection - only standard library types needed to implement the API - context everywhere - clear support for streaming operation - separation between read and write directions of operation - a consistent strategy for feature-detection and optional fastpaths - key type should be much simpler (and not do strange internal manipulations that cause useless allocations!) We'll offer wrappers that bridge back to the go-datastore style API, because there's lots of things implemented today using it. However, these objectives for a new clean API are not reasonably feasible to reach by incrementally evolving that older interface, so we have to start anew. In this commit: we have a first draft of the most essential interfaces, as well as always-available functions at package scope which will "do the right thing" in the most efficient way possible, by using feature detection internally on your behalf, for ease-of-use. --- storage/api.go | 95 +++++++++++++++++++++++++++++++++++++++++ storage/doc.go | 45 ++++++++++++++++---- storage/funcs.go | 105 ++++++++++++++++++++++++++++++++++++++++++++++ storage/memory.go | 2 + 4 files changed, 239 insertions(+), 8 deletions(-) create mode 100644 storage/api.go create mode 100644 storage/funcs.go diff --git a/storage/api.go b/storage/api.go new file mode 100644 index 00000000..ff0c7461 --- /dev/null +++ b/storage/api.go @@ -0,0 +1,95 @@ +package storage + +import ( + "context" + "io" +) + +// --- basics ---> + +type ReadableStorage interface { + Get(ctx context.Context, key string) ([]byte, error) +} + +type WritableStorage interface { + Put(ctx context.Context, key string, content []byte) error +} + +// --- streaming ---> + +type StreamingReadableStorage interface { + // Note that the returned io.Reader may also be an io.ReadCloser -- check for this. + GetStream(ctx context.Context, key string) (io.Reader, error) +} + +// StreamingWritableStorage is a feature-detection interface that advertises support for streaming writes. +// It is normal for APIs to use WritableStorage in their exported API surface, +// and then internally check if that value implements StreamingWritableStorage if they wish to use streaming operations. +// +// Streaming writes can be preferable to the all-in-one style of writing of WritableStorage.Put, +// because with streaming writes, the high water mark for memory usage can be kept lower. +// On the other hand, streaming writes can incur slightly higher allocation counts, +// which may cause some performance overhead when handling many small writes in sequence. +// +// The PutStream function returns three parameters: an io.Writer (as you'd expect), another function, and an error. +// The function returned is called a "WriteCommitter". +// The final error value is as usual: it will contain an error value if the write could not be begun. +// ("WriteCommitter" will be refered to as such throughout the docs, but we don't give it a named type -- +// unfortunately, this is important, because we don't want to force implementers of storage systems to import this package just for a type name.) +// +// The WriteCommitter function should be called when you're done writing, +// at which time you give it the key you want to commit the data as. +// It will close and flush any streams, and commit the data to its final location under this key. +// (If the io.Writer is also an io.WriteCloser, it is not necessary to call Close on it, +// because using the WriteCommiter will do this for you.) +// +// Because these storage APIs are meant to work well for content-addressed systems, +// the key argument is not provided at the start of the write -- it's provided at the end. +// (This gives the opportunity to be computing a hash of the contents as they're written to the stream.) +// +// As a special case, giving a key of the zero string to the WriteCommiter will +// instead close and remove any temp files, and store nothing. +// An error may still be returned from the WriteCommitter if there is an error cleaning up +// any temporary storage buffers that were created. +// +// Continuing to write to the io.Writer after calling the WriteCommitter function will result in errors. +// Calling the WriteCommitter function more than once will result in errors. +type StreamingWritableStorage interface { + PutStream(ctx context.Context) (io.Writer, func(key string) error, error) +} + +// --- other specializations ---> + +// VectorWritableStorage is an API for writing several slices of bytes at once into storage. +// It's meant a feature-detection interface; not all storage implementations need to provide this feature. +// This kind of API can be useful for maximizing performance in scenarios where +// data is already loaded completely into memory, but scattered across several non-contiguous regions. +type VectorWritableStorage interface { + PutVec(ctx context.Context, key string, blobVec [][]byte) error +} + +// PeekableStorage is a feature-detection interface which a storage implementation can use to advertise +// the ability to look at a piece of data, and return it in shared memory. +// The PeekableStorage.Peek method is essentially the same as ReadableStorage.Get -- +// but by contrast, ReadableStorage is expected to return a safe copy. +// PeekableStorage can be used when the caller knows they will not mutate the returned slice. +type PeekableStorage interface { + Peek(ctx context.Context, key string) ([]byte, error) +} + +// the following are all hypothetical additional future interfaces (in varying degress of speculativeness): + +// FUTURE: an EnumerableStorage API, that lets you list all keys present? + +// FUTURE: a cleanup API (for getting rid of tmp files that might've been left behind on rough shutdown)? + +// FUTURE: a sync-forcing API? + +// FUTURE: a delete API? sure. (just document carefully what its consistency model is -- i.e. basically none.) +// (hunch: if you do want some sort of consistency model -- consider offering a whole family of methods that have some sort of generation or sequencing number on them.) + +// FUTURE: a force-overwrite API? (not useful for a content-address system. but maybe a gesture towards wider reusability is acceptable to have on offer.) + +// FUTURE: a size estimation API? (unclear if we need to standardize this, but we could. an offer, anyway.) + +// FUTURE: a GC API? (dubious -- doing it well probably crosses logical domains, and should not be tied down here.) diff --git a/storage/doc.go b/storage/doc.go index eaaebbb2..4a9a85ad 100644 --- a/storage/doc.go +++ b/storage/doc.go @@ -1,9 +1,38 @@ -// Storage contains some simple implementations for the -// ipld.BlockReadOpener and ipld.BlockWriteOpener interfaces, -// which are typically used by composition in a LinkSystem. -// -// These are provided as simple "batteries included" storage systems. -// They are aimed at being quickly usable to build simple demonstrations. -// For heavy usage (large datasets, with caching, etc) you'll probably -// want to start looking for other libraries which go deeper on this subject. +// The storage package contains interfaces for storage systems, and functions for using them. +// +// These are very low-level storage primitives. +// The interfaces here deal only with raw keys and raw binary blob values. +// +// In IPLD, you can often avoid dealing with storage directly yourself, +// and instead use linking.LinkSystem to handle serialization, hashing, and storage all at once. +// You'll hand some values that match interfaces from this package to LinkSystem when configuring it. +// +// The most basic APIs are ReadableStorage and WritableStorage. +// APIs should usually be designed around accepting ReadableStorage or WritableStorage as parameters +// (depending on which direction of data flow the API is regarding), +// and use the other interfaces (e.g. StreamingReadableStorage) thereafter internally for feature detection. +// Similarly, implementers of storage systems should implement ReadableStorage or WritableStorage +// before any other features. +// +// Storage systems as described by this package are allowed to make some interesting trades. +// Generally, write operations are allowed to be first-write-wins. +// Furthermore, there is no requirement that the system return an error if a subsequent write to the same key has different content. +// These rules are reasonable for a content-addressed storage system, and allow great optimizitions to be made. +// +// If implementing a storage system, you should implement packages from this interface. +// Beyond the basic two (described above), all the other interfaces are optional: +// you can implement them if you want to advertise additional features, +// or advertise fastpaths that your storage system supports; +// but you don't have implement any of the additional interfaces if you don't want to. +// +// Note that all of the interfaces in this package only use types that are present in the golang standard library. +// This is intentional, and was done very carefully. +// If implementing a storage system, you should find it possible to do so *without* importing this package. +// Because only standard library types are present in the interface contracts, +// it's possible to implement types that align with the interfaces without refering to them. package storage + +// also note: +// LinkContext stays *out* of this package. It's a chooser-related thing. +// LinkSystem can think about it (and your callbacks over there can think about it), and that's the end of its road. +// (Future: probably LinkSystem should have SetStorage and SetupStorageChooser methods for helping you set things up -- where the former doesn't discuss LinkContext at all.) diff --git a/storage/funcs.go b/storage/funcs.go new file mode 100644 index 00000000..136e5b56 --- /dev/null +++ b/storage/funcs.go @@ -0,0 +1,105 @@ +package storage + +import ( + "bytes" + "context" + "fmt" + "io" +) + +/* + This file contains equivalents of every method that can be feature-detected on a storage system. + You can always call these functions, and give them the most basic storage interface, + and they'll attempt to feature-detect their way to the best possible implementation of the behavior, + or they'll fall back to synthesizing the same behavior from more basic interfaces. + + Long story short: you can always use these functions as an end user, and get the behavior you want -- + regardless of how much explicit support the storage implementation has for the exact behavior you requested. +*/ + +func Get(ctx context.Context, store ReadableStorage, key string) ([]byte, error) { + // Okay, not much going on here -- this function is only here for consistency of style. + return store.Get(ctx, key) +} + +func Put(ctx context.Context, store WritableStorage, key string, content []byte) error { + // Okay, not much going on here -- this function is only here for consistency of style. + return store.Put(ctx, key, content) +} + +// GetStream returns a streaming reader. +// This function will feature-detect the StreamingReadableStorage interface, and use that if possible; +// otherwise it will fall back to using basic ReadableStorage methods transparently +// (at the cost of loading all the data into memory at once and up front). +func GetStream(ctx context.Context, store ReadableStorage, key string) (io.Reader, error) { + // Prefer the feature itself, first. + if streamable, ok := store.(StreamingReadableStorage); ok { + return streamable.GetStream(ctx, key) + } + // Fallback to basic. + blob, err := store.Get(ctx, key) + return bytes.NewReader(blob), err +} + +// PutStream returns an io.Writer and a WriteCommitter callback. +// (See the docs on StreamingWritableStorage.PutStream for details on what that means.) +// This function will feature-detect the StreamingWritableStorage interface, and use that if possible; +// otherwise it will fall back to using basic WritableStorage methods transparently +// (at the cost of needing to buffer all of the content in memory while the write is in progress). +func PutStream(ctx context.Context, store WritableStorage) (io.Writer, func(key string) error, error) { + // Prefer the feature itself, first. + if streamable, ok := store.(StreamingWritableStorage); ok { + return streamable.PutStream(ctx) + } + // Fallback to basic. + var buf bytes.Buffer + var written bool + return &buf, func(key string) error { + if written { + return fmt.Errorf("WriteCommitter already used") + } + written = true + return store.Put(ctx, key, buf.Bytes()) + }, nil +} + +// PutVec is an API for writing several slices of bytes at once into storage. +// This kind of API can be useful for maximizing performance in scenarios where +// data is already loaded completely into memory, but scattered across several non-contiguous regions. +// This function will feature-detect the VectorWritableStorage interface, and use that if possible; +// otherwise it will fall back to using StreamingWritableStorage, +// or failing that, fall further back to basic WritableStorage methods, transparently. +func PutVec(ctx context.Context, store WritableStorage, key string, blobVec [][]byte) error { + // Prefer the feature itself, first. + if putvable, ok := store.(VectorWritableStorage); ok { + return putvable.PutVec(ctx, key, blobVec) + } + // Fallback to streaming mode. + // ... or, fallback to basic, and use emulated streaming. Still presumably preferable to doing a big giant memcopy. + // Conveniently, the PutStream function makes that transparent for our implementation, too. + wr, wrcommit, err := PutStream(ctx, store) + if err != nil { + return err + } + for _, blob := range blobVec { + _, err := wr.Write(blob) + if err != nil { + return err + } + } + return wrcommit(key) +} + +// Peek accessess the same data as Get, but indicates that the caller promises not to mutate the returned byte slice. +// (By contrast, Get is expected to return a safe copy.) +// This function will feature-detect the PeekableStorage interface, and use that if possible; +// otherwise it will fall back to using basic ReadableStorage methods transparently +// (meaning that a no-copy fastpath simply wasn't available). +func Peek(ctx context.Context, store ReadableStorage, key string) ([]byte, error) { + // Prefer the feature itself, first. + if peekable, ok := store.(PeekableStorage); ok { + return peekable.Peek(ctx, key) + } + // Fallback to basic. + return store.Get(ctx, key) +} diff --git a/storage/memory.go b/storage/memory.go index fd6bfd8c..5a1ff975 100644 --- a/storage/memory.go +++ b/storage/memory.go @@ -9,6 +9,8 @@ import ( "github.com/ipld/go-ipld-prime/linking" ) +// TODO: move me + // Memory is a simple in-memory storage for data indexed by datamodel.Link. // (It's little more than a map -- in fact, the map is exported, // and you can poke it directly.) From 3b4df1e9c368c9bf50fb48cb2bbd33bf8a581f13 Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Sun, 10 Oct 2021 14:46:49 +0200 Subject: [PATCH 2/8] add binary access to the interface for Link. This is a pretty big change if anyone else out there is implementing new link types. If you're using the linking/cid package, the change is already done. I'm going to consider this a minimally breaking change because I don't think we've got a lot of diverse Link implementations out in the wild. (And even if so: this should be a pretty easy addition to make.) I'm anticipating using this as part of a good layering of APIs regarding storage. (We should be able to use binary, dense things in any storage substrate that allows it. Some common storage systems, like e.g. flatfs, don't work with binary. But any escaping having to do with that should be associated with the storage system -- not pushed to any other layer. Having binary access here helps do that.) --- datamodel/link.go | 13 +++++++++++++ linking/cid/cidLink.go | 3 +++ 2 files changed, 16 insertions(+) diff --git a/datamodel/link.go b/datamodel/link.go index e1c193a4..f52354ad 100644 --- a/datamodel/link.go +++ b/datamodel/link.go @@ -34,6 +34,19 @@ type Link interface { // There is no contract that requires that the string be able to be parsed back into a Link value, // but the string should be unique (e.g. not elide any parts of the hash). String() string + + // Binary should return the densest possible encoding of the Link. + // The value need not be printable or human-readable; + // the golang string type is used for immutability and for ease of use as a map key. + // As with the String method, the returned value may not elide any parts of the hash. + // + // Note that there is still no contract that the returned value be able to be parsed back into a Link value; + // not even in the case of `lnk.Prototype().BuildLink(lnk.Binary()[:])`. + // This is because the value returned by this method may contain data that the LinkPrototype would also restate. + // (For a concrete example: if using CIDs, this method will return a binary string that includes + // the cid version indicator, the multicodec and multihash indicators, etc, in addition to the hash itself -- + // whereas the LinkPrototype.BuildLink function still expects to receive only the hash itself alone.) + Binary() string } // LinkPrototype encapsulates any implementation details and parameters diff --git a/linking/cid/cidLink.go b/linking/cid/cidLink.go index 02e8e4da..835e950a 100644 --- a/linking/cid/cidLink.go +++ b/linking/cid/cidLink.go @@ -30,6 +30,9 @@ func (lnk Link) Prototype() datamodel.LinkPrototype { func (lnk Link) String() string { return lnk.Cid.String() } +func (lnk Link) Binary() string { + return lnk.Cid.KeyString() +} type LinkPrototype struct { cid.Prefix From 0fdf939a23d644437a181ae106c777cd154dd8ce Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Sun, 10 Oct 2021 15:03:21 +0200 Subject: [PATCH 3/8] storage: move memstore. Fixes import cycles that I would otherwise be about to experience starting in the next commit. --- adl/rot13adl/example_test.go | 4 ++-- linking/linkingExamples_test.go | 4 ++-- node/tests/schemaLinks.go | 4 ++-- storage/{memory.go => memstore/memstore.go} | 16 ++++++++-------- traversal/focus_test.go | 10 +++++----- 5 files changed, 19 insertions(+), 19 deletions(-) rename storage/{memory.go => memstore/memstore.go} (67%) diff --git a/adl/rot13adl/example_test.go b/adl/rot13adl/example_test.go index 70cc3f90..434cd8f8 100644 --- a/adl/rot13adl/example_test.go +++ b/adl/rot13adl/example_test.go @@ -14,7 +14,7 @@ import ( "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/must" - "github.com/ipld/go-ipld-prime/storage" + "github.com/ipld/go-ipld-prime/storage/memstore" ) func ExampleReify_unmarshallingToADL() { @@ -64,7 +64,7 @@ func ExampleReify_loadingToADL() { MhLength: 4, }} linkSystem := cidlink.DefaultLinkSystem() - storage := &storage.Memory{} + storage := &memstore.Store{} linkSystem.StorageReadOpener = storage.OpenRead linkSystem.StorageWriteOpener = storage.OpenWrite linkSystem.NodeReifier = func(_ linking.LinkContext, nd datamodel.Node, _ *linking.LinkSystem) (datamodel.Node, error) { diff --git a/linking/linkingExamples_test.go b/linking/linkingExamples_test.go index f77883f8..5404a6c5 100644 --- a/linking/linkingExamples_test.go +++ b/linking/linkingExamples_test.go @@ -10,7 +10,7 @@ import ( "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/node/basicnode" - "github.com/ipld/go-ipld-prime/storage" + "github.com/ipld/go-ipld-prime/storage/memstore" ) // storage is a map where we'll store serialized IPLD data. @@ -20,7 +20,7 @@ import ( // // In a real program, you'll probably make functions to load and store from disk, // or some network storage, or... whatever you want, really :) -var store = storage.Memory{} +var store = memstore.Store{} // TODO: These examples are really heavy on CIDs and the multicodec and multihash magic tables. // It would be good to have examples that create and use less magical LinkSystem constructions, too. diff --git a/node/tests/schemaLinks.go b/node/tests/schemaLinks.go index 1e719b07..fddd3e6c 100644 --- a/node/tests/schemaLinks.go +++ b/node/tests/schemaLinks.go @@ -15,13 +15,13 @@ import ( cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/node/basicnode" "github.com/ipld/go-ipld-prime/schema" - "github.com/ipld/go-ipld-prime/storage" + "github.com/ipld/go-ipld-prime/storage/memstore" "github.com/ipld/go-ipld-prime/traversal" "github.com/ipld/go-ipld-prime/traversal/selector" "github.com/ipld/go-ipld-prime/traversal/selector/builder" ) -var store = storage.Memory{} +var store = memstore.Store{} func encode(n datamodel.Node) (datamodel.Node, datamodel.Link) { lp := cidlink.LinkPrototype{cid.Prefix{ diff --git a/storage/memory.go b/storage/memstore/memstore.go similarity index 67% rename from storage/memory.go rename to storage/memstore/memstore.go index 5a1ff975..b2a9d8e2 100644 --- a/storage/memory.go +++ b/storage/memstore/memstore.go @@ -1,4 +1,4 @@ -package storage +package memstore import ( "bytes" @@ -9,9 +9,9 @@ import ( "github.com/ipld/go-ipld-prime/linking" ) -// TODO: move me +// TODO: this is implementing the linking APIs, and it should be updated to just implement the storage APIs, and let LinkSystem construction figure out the rest of the connections. -// Memory is a simple in-memory storage for data indexed by datamodel.Link. +// Store is a simple in-memory storage. // (It's little more than a map -- in fact, the map is exported, // and you can poke it directly.) // @@ -19,24 +19,24 @@ import ( // and the OpenWrite method conforms to linking.BlockWriteOpener. // Therefore it's easy to use in a LinkSystem like this: // -// store := storage.Memory{} +// store := memstore.Store{} // lsys.StorageReadOpener = (&store).OpenRead // lsys.StorageWriteOpener = (&store).OpenWrite // // This storage is mostly expected to be used for testing and demos, // and as an example of how you can implement and integrate your own storage systems. -type Memory struct { +type Store struct { Bag map[datamodel.Link][]byte } -func (store *Memory) beInitialized() { +func (store *Store) beInitialized() { if store.Bag != nil { return } store.Bag = make(map[datamodel.Link][]byte) } -func (store *Memory) OpenRead(lnkCtx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) { +func (store *Store) OpenRead(lnkCtx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) { store.beInitialized() data, exists := store.Bag[lnk] if !exists { @@ -45,7 +45,7 @@ func (store *Memory) OpenRead(lnkCtx linking.LinkContext, lnk datamodel.Link) (i return bytes.NewReader(data), nil } -func (store *Memory) OpenWrite(lnkCtx linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) { +func (store *Store) OpenWrite(lnkCtx linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) { store.beInitialized() buf := bytes.Buffer{} return &buf, func(lnk datamodel.Link) error { diff --git a/traversal/focus_test.go b/traversal/focus_test.go index e623f2de..90d10d4a 100644 --- a/traversal/focus_test.go +++ b/traversal/focus_test.go @@ -14,14 +14,14 @@ import ( cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/must" "github.com/ipld/go-ipld-prime/node/basicnode" - "github.com/ipld/go-ipld-prime/storage" + "github.com/ipld/go-ipld-prime/storage/memstore" "github.com/ipld/go-ipld-prime/traversal" ) // Do some fixture fabrication. // We assume all the builders and serialization must Just Work here. -var store = storage.Memory{} +var store = memstore.Store{} var ( // baguqeeyexkjwnfy leafAlpha, leafAlphaLnk = encode(basicnode.NewString("alpha")) @@ -314,7 +314,7 @@ func TestFocusedTransform(t *testing.T) { } func TestFocusedTransformWithLinks(t *testing.T) { - var store2 = storage.Memory{} + var store2 = memstore.Store{} lsys := cidlink.DefaultLinkSystem() lsys.StorageReadOpener = (&store).OpenRead lsys.StorageWriteOpener = (&store2).OpenWrite @@ -339,7 +339,7 @@ func TestFocusedTransformWithLinks(t *testing.T) { // there should be a new object in our new storage! Wish(t, len(store2.Bag), ShouldEqual, 1) // cleanup for next test - store2 = storage.Memory{} + store2 = memstore.Store{} }) t.Run("UpdateNotBeyondLink", func(t *testing.T) { // This is replacing a link with a non-link. Doing so shouldn't hit storage. @@ -356,7 +356,7 @@ func TestFocusedTransformWithLinks(t *testing.T) { // there should be no new objects in our new storage! Wish(t, len(store2.Bag), ShouldEqual, 0) // cleanup for next test - store2 = storage.Memory{} + store2 = memstore.Store{} }) // link traverse to scalar // this is unspecifiable using the current path syntax! you'll just end up replacing the link with the scalar! From 57117677280e97016156843833ed3decba039909 Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Sun, 10 Oct 2021 15:10:29 +0200 Subject: [PATCH 4/8] linking: new method to help configure LinkSystem to use interfaces defined by the storage package. This means we now have a one-liner call that can: - rig up a storage system... - which can have been written while using zero types from this repo... - and it'll DTRT. - All while it's also become much easier to write storage implementations, as well as extend them gracefully, because of the new storage package APIs. This seems good. --- linking/setup.go | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 linking/setup.go diff --git a/linking/setup.go b/linking/setup.go new file mode 100644 index 00000000..6d3175c7 --- /dev/null +++ b/linking/setup.go @@ -0,0 +1,41 @@ +package linking + +import ( + "io" + + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/storage" +) + +// SetReadStorage configures how the LinkSystem will look for information to load, +// setting it to look at the given storage.ReadableStorage. +// +// This will overwrite the LinkSystem.StorageReadOpener field. +// +// This mechanism only supports setting exactly one ReadableStorage. +// If you would like to make a more complex configuration +// (for example, perhaps using information from a LinkContext to decide which storage area to use?) +// then you should set LinkSystem.StorageReadOpener to a custom callback of your own creation instead. +func (lsys *LinkSystem) SetReadStorage(store storage.ReadableStorage) { + lsys.StorageReadOpener = func(lctx LinkContext, lnk datamodel.Link) (io.Reader, error) { + return storage.GetStream(lctx.Ctx, store, lnk.Binary()) + } +} + +// SetWriteStorage configures how the LinkSystem will store information, +// setting it to write into the given storage.WritableStorage. +// +// This will overwrite the LinkSystem.StorageWriteOpener field. +// +// This mechanism only supports setting exactly one WritableStorage. +// If you would like to make a more complex configuration +// (for example, perhaps using information from a LinkContext to decide which storage area to use?) +// then you should set LinkSystem.StorageWriteOpener to a custom callback of your own creation instead. +func (lsys *LinkSystem) SetWriteStorage(store storage.WritableStorage) { + lsys.StorageWriteOpener = func(lctx LinkContext) (io.Writer, BlockWriteCommitter, error) { + wr, wrcommit, err := storage.PutStream(lctx.Ctx, store) + return wr, func(lnk datamodel.Link) error { + return wrcommit(lnk.Binary()) + }, err + } +} From 522500cfab8beca432df02baf811814fdd0e9031 Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Sun, 10 Oct 2021 15:57:05 +0200 Subject: [PATCH 5/8] storage/dsadapter: a small module which bridges go-datastore forward into the new storage API. --- storage/dsadapter/README.md | 33 ++++++++++++++ storage/dsadapter/dsadapter.go | 83 ++++++++++++++++++++++++++++++++++ storage/dsadapter/go.mod | 5 ++ storage/dsadapter/go.sum | 49 ++++++++++++++++++++ 4 files changed, 170 insertions(+) create mode 100644 storage/dsadapter/README.md create mode 100644 storage/dsadapter/dsadapter.go create mode 100644 storage/dsadapter/go.mod create mode 100644 storage/dsadapter/go.sum diff --git a/storage/dsadapter/README.md b/storage/dsadapter/README.md new file mode 100644 index 00000000..67570959 --- /dev/null +++ b/storage/dsadapter/README.md @@ -0,0 +1,33 @@ +dsadapter +========= + +The `dsadapter` package/module is a small piece of glue code to connect +the `github.com/ipfs/go-datastore` package, and packages implementing its interfaces, +forward into the `go-ipld-prime/storage` interfaces. + +For example, this can be used to use "flatfs" and other datastore plugins +with go-ipld-prime storage APIs. + +Why structured like this? +------------------------- + +Why are there layers of interface code? +The `go-ipld-prime/storage` interfaces are a newer generation, +and improves on several things vs `go-datastore`. (See other docs for that.) + +Why is this code in a shared place? +The glue code to connect `go-datastore` to the new `go-ipld-prime/storage` APIs +is fairly minimal, but there's also no reason for anyone to write it twice, +so we want to put it somewhere easy to share. + +Why does this code has its own go module? +A separate module is used because it's important that go-ipld-prime can be used +without forming a dependency on `go-datastore`. +(We want this so that there's a reasonable deprecation pathway -- it must be +possible to write new code that doesn't take on transitive dependencies to old code.) + +Why does this code exist here, in this git repo? +We put this separate module in the same git repo as `go-ipld-prime`... because we can. +Technically, neither this module nor the go-ipld-prime module depend on each other -- +they just have interfaces that are aligned with each other -- so it's very easy to +hold them as separate go modules in the same repo, even though that can otherwise sometimes be tricky. diff --git a/storage/dsadapter/dsadapter.go b/storage/dsadapter/dsadapter.go new file mode 100644 index 00000000..08fe5f50 --- /dev/null +++ b/storage/dsadapter/dsadapter.go @@ -0,0 +1,83 @@ +package dsadapter + +import ( + "context" + + "github.com/ipfs/go-datastore" +) + +// Adapter implements go-ipld-prime/storage.ReadableStorage +// and go-ipld-prime/storage.WritableStorage +// backed by a go-datastore.Datastore. +// +// Optionally, an EscapingFunc may also be set, +// which transforms the (possibly binary) keys considered acceptable +// by the go-ipld-prime/storage APIs into a subset that +// the go-datastore can accept. +// (Be careful to use any escaping consistently, +// and be wary of potential unexpected behavior if the escaping function might +// collapse two distinct keys into the same "escaped" key.) +// +// The go-datastore.Datastore may internally have other configuration, +// such as key sharding functions, etc, and we don't interfere with that here; +// such configuration should be handled when creating the go-datastore value. +type Adapter struct { + Wrapped datastore.Datastore + EscapingFunc func(string) string +} + +// Get implements go-ipld-prime/storage.ReadableStorage.Get. +func (a *Adapter) Get(ctx context.Context, key string) ([]byte, error) { + // Return early if the context is already closed. + // This is also the last time we'll check the context, + // since go-datastore doesn't take them. + if ctx.Err() != nil { + return nil, ctx.Err() + } + + // If we have an EscapingFunc, apply it. + if a.EscapingFunc != nil { + key = a.EscapingFunc(key) + } + + // Wrap the key into go-datastore's concrete type that it requires. + // Note that this does a bunch of actual work, which may be surprising. + // The key may be transformed (as per path.Clean). + // There will also be an allocation, if the key doesn't start with "/". + // (Avoiding these performance drags is part of why we started + // new interfaces in go-ipld-prime/storage.) + k := datastore.NewKey(key) + + // Delegate the get call. + // Note that for some datastore implementations, this will do *yet more* + // validation on the key, and may return errors from that. + return a.Wrapped.Get(k) +} + +// Put implements go-ipld-prime/storage.WritableStorage.Put. +func (a *Adapter) Put(ctx context.Context, key string, content []byte) error { + // Return early if the context is already closed. + // This is also the last time we'll check the context, + // since go-datastore doesn't take them. + if ctx.Err() != nil { + return ctx.Err() + } + + // If we have an EscapingFunc, apply it. + if a.EscapingFunc != nil { + key = a.EscapingFunc(key) + } + + // Wrap the key into go-datastore's concrete type that it requires. + // Note that this does a bunch of actual work, which may be surprising. + // The key may be transformed (as per path.Clean). + // There will also be an allocation, if the key doesn't start with "/". + // (Avoiding these performance drags is part of why we started + // new interfaces in go-ipld-prime/storage.) + k := datastore.NewKey(key) + + // Delegate the put call. + // Note that for some datastore implementations, this will do *yet more* + // validation on the key, and may return errors from that. + return a.Wrapped.Put(k, content) +} diff --git a/storage/dsadapter/go.mod b/storage/dsadapter/go.mod new file mode 100644 index 00000000..3a222901 --- /dev/null +++ b/storage/dsadapter/go.mod @@ -0,0 +1,5 @@ +module github.com/ipld/go-ipld-prime/storage/dsadapter + +go 1.16 + +require github.com/ipfs/go-datastore v0.4.6 diff --git a/storage/dsadapter/go.sum b/storage/dsadapter/go.sum new file mode 100644 index 00000000..a9b0d6c0 --- /dev/null +++ b/storage/dsadapter/go.sum @@ -0,0 +1,49 @@ +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/ipfs/go-datastore v0.4.6 h1:zU2cmweykxJ+ziXnA2cPtsLe8rdR/vrthOipLPuf6kc= +github.com/ipfs/go-datastore v0.4.6/go.mod h1:XSipLSc64rFKSFRFGo1ecQl+WhYce3K7frtpHkyPFUc= +github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk= +github.com/ipfs/go-detect-race v0.0.1/go.mod h1:8BNT7shDZPo99Q74BpGMK+4D8Mn4j46UU0LZ723meps= +github.com/ipfs/go-ipfs-delay v0.0.0-20181109222059-70721b86a9a8/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= +github.com/jbenet/go-cienv v0.1.0/go.mod h1:TqNnHUmJgXau0nCzC7kXWeotg3J9W34CUv5Djy1+FlA= +github.com/jbenet/goprocess v0.1.4 h1:DRGOFReOMqqDNXwW70QkacFW0YN9QnwLV0Vqk+3oU0o= +github.com/jbenet/goprocess v0.1.4/go.mod h1:5yspPrukOVuOLORacaBi858NqyClJPQxYZlqdZVfqY4= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs= +github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= +go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= +go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= From c56158d0c299a76e8efb8d7bf5528feea0168afa Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Thu, 14 Oct 2021 16:39:38 +0200 Subject: [PATCH 6/8] storage: Peek API talks about close explicitly. Opens the way to a storage implementation that makes reuse of buffers. --- storage/api.go | 10 +++++++++- storage/funcs.go | 14 ++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/storage/api.go b/storage/api.go index ff0c7461..76d2e208 100644 --- a/storage/api.go +++ b/storage/api.go @@ -73,8 +73,16 @@ type VectorWritableStorage interface { // The PeekableStorage.Peek method is essentially the same as ReadableStorage.Get -- // but by contrast, ReadableStorage is expected to return a safe copy. // PeekableStorage can be used when the caller knows they will not mutate the returned slice. +// +// An io.Closer is returned along with the byte slice. +// The Close method on the Closer must be called when the caller is done with the byte slice; +// otherwise, memory leaks may result. +// (Implementers of this interface may be expecting to reuse the byte slice after Close is called.) +// +// Note that Peek does not imply that the caller can use the byte slice freely; +// doing so may result in storage corruption or other undefined behavior. type PeekableStorage interface { - Peek(ctx context.Context, key string) ([]byte, error) + Peek(ctx context.Context, key string) ([]byte, io.Closer, error) } // the following are all hypothetical additional future interfaces (in varying degress of speculativeness): diff --git a/storage/funcs.go b/storage/funcs.go index 136e5b56..1a3daebc 100644 --- a/storage/funcs.go +++ b/storage/funcs.go @@ -95,11 +95,21 @@ func PutVec(ctx context.Context, store WritableStorage, key string, blobVec [][] // This function will feature-detect the PeekableStorage interface, and use that if possible; // otherwise it will fall back to using basic ReadableStorage methods transparently // (meaning that a no-copy fastpath simply wasn't available). -func Peek(ctx context.Context, store ReadableStorage, key string) ([]byte, error) { +// +// An io.Closer is returned along with the byte slice. +// The Close method on the Closer must be called when the caller is done with the byte slice; +// otherwise, memory leaks may result. +// (Implementers of this interface may be expecting to reuse the byte slice after Close is called.) +func Peek(ctx context.Context, store ReadableStorage, key string) ([]byte, io.Closer, error) { // Prefer the feature itself, first. if peekable, ok := store.(PeekableStorage); ok { return peekable.Peek(ctx, key) } // Fallback to basic. - return store.Get(ctx, key) + bs, err := store.Get(ctx, key) + return bs, noopCloser{}, err } + +type noopCloser struct{} + +func (noopCloser) Close() error { return nil } From 55a4896b41e16a0c9ee54748dbd490b0c60a5220 Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Thu, 14 Oct 2021 17:51:49 +0200 Subject: [PATCH 7/8] storage: clarification about key constraints, and recommendations on handling it. --- storage/doc.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/storage/doc.go b/storage/doc.go index 4a9a85ad..c698c823 100644 --- a/storage/doc.go +++ b/storage/doc.go @@ -30,6 +30,17 @@ // If implementing a storage system, you should find it possible to do so *without* importing this package. // Because only standard library types are present in the interface contracts, // it's possible to implement types that align with the interfaces without refering to them. +// +// Note that where keys are discussed in this package, they use the golang string type -- +// however, they may be binary. (The golang string type allows arbitrary bytes in general, +// and here, we both use that, and explicitly disavow the usual "norm" that the string type implies UTF-8. +// This is roughly the same as the practical truth that appears when using e.g. os.OpenFile and other similar functions.) +// If you are creating a storage implementation where the underlying medium does not support arbitrary binary keys, +// then it is strongly recommend that your storage implementation should support being configured with +// an "escaping function", which should typically simply be of the form `func(string) string`. +// Additional, your storage implementation's documentation should also clearly describe its internal limitations, +// so that users have enough information to write an escaping function which +// maps their domain into the domain your storage implementation can handle. package storage // also note: From 273362c379705cbdd0aca4cd5151f52940adde55 Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Thu, 14 Oct 2021 18:22:17 +0200 Subject: [PATCH 8/8] storage/memstore: update to match the new interfaces. Most usages change from a one-liner to another one-liner. Some tests get a bit more involved, because they were actually using the callback structure to hook introspections up. --- adl/rot13adl/example_test.go | 4 +- linking/linkingExamples_test.go | 8 ++-- node/tests/schemaLinks.go | 4 +- storage/memstore/memstore.go | 79 +++++++++++++++++++++++--------- traversal/focus_test.go | 10 ++-- traversal/walk_test.go | 27 +++++++---- traversal/walk_with_stop_test.go | 4 +- 7 files changed, 91 insertions(+), 45 deletions(-) diff --git a/adl/rot13adl/example_test.go b/adl/rot13adl/example_test.go index 434cd8f8..f3a68435 100644 --- a/adl/rot13adl/example_test.go +++ b/adl/rot13adl/example_test.go @@ -65,8 +65,8 @@ func ExampleReify_loadingToADL() { }} linkSystem := cidlink.DefaultLinkSystem() storage := &memstore.Store{} - linkSystem.StorageReadOpener = storage.OpenRead - linkSystem.StorageWriteOpener = storage.OpenWrite + linkSystem.SetReadStorage(storage) + linkSystem.SetWriteStorage(storage) linkSystem.NodeReifier = func(_ linking.LinkContext, nd datamodel.Node, _ *linking.LinkSystem) (datamodel.Node, error) { return rot13adl.Reify(nd) } diff --git a/linking/linkingExamples_test.go b/linking/linkingExamples_test.go index 5404a6c5..73984737 100644 --- a/linking/linkingExamples_test.go +++ b/linking/linkingExamples_test.go @@ -36,8 +36,8 @@ func ExampleLinkSystem_Store() { // We want to store the serialized data somewhere. // We'll use an in-memory store for this. (It's a package scoped variable.) // You can use any kind of storage system here; - // you just need a function that conforms to the datamodel.BlockWriteOpener interface. - lsys.StorageWriteOpener = (&store).OpenWrite + // or if you need even more control, you could also write a function that conforms to the linking.BlockWriteOpener interface. + lsys.SetWriteStorage(&store) // To create any links, first we need a LinkPrototype. // This gathers together any parameters that might be needed when making a link. @@ -103,8 +103,8 @@ func ExampleLinkSystem_Load() { // We'll use an in-memory store for this. (It's a package scoped variable.) // (This particular memory store was filled with the data we'll load earlier, during ExampleLinkSystem_Store.) // You can use any kind of storage system here; - // you just need a function that conforms to the datamodel.BlockReadOpener interface. - lsys.StorageReadOpener = (&store).OpenRead + // or if you need even more control, you could also write a function that conforms to the linking.BlockReadOpener interface. + lsys.SetReadStorage(&store) // We'll need to decide what in-memory implementation of datamodel.Node we want to use. // Here, we'll use the "basicnode" implementation. This is a good getting-started choice. diff --git a/node/tests/schemaLinks.go b/node/tests/schemaLinks.go index fddd3e6c..151d47cf 100644 --- a/node/tests/schemaLinks.go +++ b/node/tests/schemaLinks.go @@ -31,7 +31,7 @@ func encode(n datamodel.Node) (datamodel.Node, datamodel.Link) { MhLength: 4, }} lsys := cidlink.DefaultLinkSystem() - lsys.StorageWriteOpener = (&store).OpenWrite + lsys.SetWriteStorage(&store) lnk, err := lsys.Store(linking.LinkContext{}, lp, n) if err != nil { @@ -96,7 +96,7 @@ func SchemaTestLinks(t *testing.T, engine Engine) { var order int lsys := cidlink.DefaultLinkSystem() - lsys.StorageReadOpener = (&store).OpenRead + lsys.SetReadStorage(&store) err = traversal.Progress{ Cfg: &traversal.Config{ LinkSystem: lsys, diff --git a/storage/memstore/memstore.go b/storage/memstore/memstore.go index b2a9d8e2..f6689427 100644 --- a/storage/memstore/memstore.go +++ b/storage/memstore/memstore.go @@ -2,54 +2,91 @@ package memstore import ( "bytes" + "context" "fmt" "io" - - "github.com/ipld/go-ipld-prime/datamodel" - "github.com/ipld/go-ipld-prime/linking" ) -// TODO: this is implementing the linking APIs, and it should be updated to just implement the storage APIs, and let LinkSystem construction figure out the rest of the connections. - // Store is a simple in-memory storage. // (It's little more than a map -- in fact, the map is exported, // and you can poke it directly.) // -// The OpenRead method conforms to linking.BlockReadOpener, -// and the OpenWrite method conforms to linking.BlockWriteOpener. -// Therefore it's easy to use in a LinkSystem like this: +// Store conforms to the storage.ReadableStorage and storage.WritableStorage APIs. +// Additionally, it supports storage.PeekableStorage and storage.StreamingReadableStorage, +// because it can do so while provoking fewer copies. +// +// If you want to use this store with streaming APIs, +// you can still do so by using the functions in the storage package, +// such as storage.GetStream and storage.PutStream, which will synthesize the correct behavior. // -// store := memstore.Store{} -// lsys.StorageReadOpener = (&store).OpenRead -// lsys.StorageWriteOpener = (&store).OpenWrite +// You can use this storage with a linking.LinkSystem easily, +// by using the LinkSystem.SetReadStorage and/or LinkSystem.SetWriteStorage methods. +// +// There are no construction parameters for sharding functions nor escaping functions. +// Any keys are acceptable. // // This storage is mostly expected to be used for testing and demos, // and as an example of how you can implement and integrate your own storage systems. +// It does not provide persistence beyond memory. type Store struct { - Bag map[datamodel.Link][]byte + Bag map[string][]byte } func (store *Store) beInitialized() { if store.Bag != nil { return } - store.Bag = make(map[datamodel.Link][]byte) + store.Bag = make(map[string][]byte) } -func (store *Store) OpenRead(lnkCtx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) { +// Get implements go-ipld-prime/storage.ReadableStorage.Get. +// +// Note that this internally performs a defensive copy; +// use Peek for higher performance if you are certain you won't mutate the returned slice. +func (store *Store) Get(ctx context.Context, key string) ([]byte, error) { store.beInitialized() - data, exists := store.Bag[lnk] + content, exists := store.Bag[key] if !exists { return nil, fmt.Errorf("404") // FIXME this needs a standard error type } - return bytes.NewReader(data), nil + cpy := make([]byte, len(content)) + copy(cpy, content) + return cpy, nil } -func (store *Store) OpenWrite(lnkCtx linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) { +// Put implements go-ipld-prime/storage.WritableStorage.Put. +func (store *Store) Put(ctx context.Context, key string, content []byte) error { store.beInitialized() - buf := bytes.Buffer{} - return &buf, func(lnk datamodel.Link) error { - store.Bag[lnk] = buf.Bytes() + if _, exists := store.Bag[key]; exists { return nil - }, nil + } + cpy := make([]byte, len(content)) + copy(cpy, content) + store.Bag[key] = cpy + return nil +} + +// GetStream implements go-ipld-prime/storage.StreamingReadableStorage.GetStream. +// +// It's useful for this storage implementation to explicitly support this, +// because returning a reader gives us room to avoid needing a defensive copy. +func (store *Store) GetStream(ctx context.Context, key string) (io.Reader, error) { + content, exists := store.Bag[key] + if !exists { + return nil, fmt.Errorf("404") // FIXME this needs a standard error type + } + return bytes.NewReader(content), nil } + +// Peek implements go-ipld-prime/storage.PeekableStorage.Peek. +func (store *Store) Peek(ctx context.Context, key string) ([]byte, io.Closer, error) { + content, exists := store.Bag[key] + if !exists { + return nil, nil, fmt.Errorf("404") // FIXME this needs a standard error type + } + return content, noopCloser{}, nil +} + +type noopCloser struct{} + +func (noopCloser) Close() error { return nil } diff --git a/traversal/focus_test.go b/traversal/focus_test.go index 90d10d4a..a74b251f 100644 --- a/traversal/focus_test.go +++ b/traversal/focus_test.go @@ -68,7 +68,7 @@ func encode(n datamodel.Node) (datamodel.Node, datamodel.Link) { MhLength: 4, }} lsys := cidlink.DefaultLinkSystem() - lsys.StorageWriteOpener = (&store).OpenWrite + lsys.SetWriteStorage(&store) lnk, err := lsys.Store(linking.LinkContext{}, lp, n) if err != nil { @@ -144,7 +144,7 @@ func TestFocusWithLinkLoading(t *testing.T) { }) t.Run("link traversal with loader should work", func(t *testing.T) { lsys := cidlink.DefaultLinkSystem() - lsys.StorageReadOpener = (&store).OpenRead + lsys.SetReadStorage(&store) err := traversal.Progress{ Cfg: &traversal.Config{ LinkSystem: lsys, @@ -174,7 +174,7 @@ func TestGetWithLinkLoading(t *testing.T) { }) t.Run("link traversal with loader should work", func(t *testing.T) { lsys := cidlink.DefaultLinkSystem() - lsys.StorageReadOpener = (&store).OpenRead + lsys.SetReadStorage(&store) n, err := traversal.Progress{ Cfg: &traversal.Config{ LinkSystem: lsys, @@ -316,8 +316,8 @@ func TestFocusedTransform(t *testing.T) { func TestFocusedTransformWithLinks(t *testing.T) { var store2 = memstore.Store{} lsys := cidlink.DefaultLinkSystem() - lsys.StorageReadOpener = (&store).OpenRead - lsys.StorageWriteOpener = (&store2).OpenWrite + lsys.SetReadStorage(&store) + lsys.SetWriteStorage(&store2) cfg := traversal.Config{ LinkSystem: lsys, LinkTargetNodePrototypeChooser: basicnode.Chooser, diff --git a/traversal/walk_test.go b/traversal/walk_test.go index b0da82de..9f7697c5 100644 --- a/traversal/walk_test.go +++ b/traversal/walk_test.go @@ -16,6 +16,7 @@ import ( "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/ipld/go-ipld-prime/storage" "github.com/ipld/go-ipld-prime/traversal" "github.com/ipld/go-ipld-prime/traversal/selector" "github.com/ipld/go-ipld-prime/traversal/selector/builder" @@ -130,7 +131,7 @@ func TestWalkMatching(t *testing.T) { s, err := ss.Selector() var order int lsys := cidlink.DefaultLinkSystem() - lsys.StorageReadOpener = (&store).OpenRead + lsys.SetReadStorage(&store) err = traversal.Progress{ Cfg: &traversal.Config{ LinkSystem: lsys, @@ -174,7 +175,7 @@ func TestWalkMatching(t *testing.T) { s, err := ss.Selector() var order int lsys := cidlink.DefaultLinkSystem() - lsys.StorageReadOpener = (&store).OpenRead + lsys.SetReadStorage(&store) err = traversal.Progress{ Cfg: &traversal.Config{ LinkSystem: lsys, @@ -217,7 +218,7 @@ func TestWalkMatching(t *testing.T) { s, err := ss.Selector() var order int lsys := cidlink.DefaultLinkSystem() - lsys.StorageReadOpener = (&store).OpenRead + lsys.SetReadStorage(&store) err = traversal.Progress{ Cfg: &traversal.Config{ LinkSystem: lsys, @@ -302,7 +303,7 @@ func TestWalkBudgets(t *testing.T) { qt.Assert(t, err, qt.Equals, nil) var order int lsys := cidlink.DefaultLinkSystem() - lsys.StorageReadOpener = (&store).OpenRead + lsys.SetReadStorage(&store) err = traversal.Progress{ Cfg: &traversal.Config{ LinkSystem: lsys, @@ -416,12 +417,16 @@ func TestWalkBlockLoadOrder(t *testing.T) { t.Run("CommonSelector_MatchAllRecursively", func(t *testing.T) { s := selectorparse.CommonSelector_MatchAllRecursively - verifySelectorLoads(t, expectedAllBlocks, s, false, (&store).OpenRead) + verifySelectorLoads(t, expectedAllBlocks, s, false, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) { + return storage.GetStream(lctx.Ctx, &store, lnk.Binary()) + }) }) t.Run("CommonSelector_ExploreAllRecursively", func(t *testing.T) { s := selectorparse.CommonSelector_ExploreAllRecursively - verifySelectorLoads(t, expectedAllBlocks, s, false, (&store).OpenRead) + verifySelectorLoads(t, expectedAllBlocks, s, false, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) { + return storage.GetStream(lctx.Ctx, &store, lnk.Binary()) + }) }) t.Run("constructed explore-all selector", func(t *testing.T) { @@ -430,7 +435,9 @@ func TestWalkBlockLoadOrder(t *testing.T) { s := ssb.ExploreRecursive(selector.RecursionLimitNone(), ssb.ExploreAll(ssb.ExploreRecursiveEdge())). Node() - verifySelectorLoads(t, expectedAllBlocks, s, false, (&store).OpenRead) + verifySelectorLoads(t, expectedAllBlocks, s, false, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) { + return storage.GetStream(lctx.Ctx, &store, lnk.Binary()) + }) }) t.Run("explore-all with duplicate load skips via SkipMe", func(t *testing.T) { @@ -463,7 +470,7 @@ func TestWalkBlockLoadOrder(t *testing.T) { return nil, traversal.SkipMe{} } visited[l] = true - return (&store).OpenRead(lc, l) + return storage.GetStream(lc.Ctx, &store, l.Binary()) }) }) @@ -479,6 +486,8 @@ func TestWalkBlockLoadOrder(t *testing.T) { middleMapNodeLnk, } s := selectorparse.CommonSelector_ExploreAllRecursively - verifySelectorLoads(t, expectedLinkRevisitBlocks, s, true, (&store).OpenRead) + verifySelectorLoads(t, expectedLinkRevisitBlocks, s, true, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) { + return storage.GetStream(lctx.Ctx, &store, lnk.Binary()) + }) }) } diff --git a/traversal/walk_with_stop_test.go b/traversal/walk_with_stop_test.go index a67fdb55..7755cf70 100644 --- a/traversal/walk_with_stop_test.go +++ b/traversal/walk_with_stop_test.go @@ -93,7 +93,7 @@ func TestStopAtLink(t *testing.T) { } var order int lsys := cidlink.DefaultLinkSystem() - lsys.StorageReadOpener = (&store).OpenRead + lsys.SetReadStorage(&store) err = traversal.Progress{ Cfg: &traversal.Config{ LinkSystem: lsys, @@ -219,7 +219,7 @@ func stopAtInChainTest(t *testing.T, chainNode datamodel.Node, stopLnk datamodel var order int lsys := cidlink.DefaultLinkSystem() - lsys.StorageReadOpener = (&store).OpenRead + lsys.SetReadStorage(&store) err = traversal.Progress{ Cfg: &traversal.Config{ LinkSystem: lsys,