Skip to content

Commit

Permalink
[dbnode][m3ninx] Add support for reading document containers from an …
Browse files Browse the repository at this point in the history
…index (#3050)

Reading raw document metadata from an index can end up
being rather expensive for metadata with a lot of tags.
This commit introduces the concept of encoded metadata,
which wrap the relevant section of bytes and provides
an efficient reader to retrieve the non-encoded metadata
without ballooning memory usage. Additionally, introduce a
concept of a document that wraps either raw metadata
or encoded metadata, which can be used regardless of
whether an index segment is backed by metadata (from memory)
or encoded metadata (read from disk)
  • Loading branch information
nbroyles authored Jan 8, 2021
1 parent 3fb3218 commit 7917646
Show file tree
Hide file tree
Showing 33 changed files with 963 additions and 245 deletions.
48 changes: 24 additions & 24 deletions src/dbnode/storage/index/block_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,8 @@ func TestBlockQueryAddResultsSegmentsError(t *testing.T) {

b.mutableSegments.foregroundSegments = []*readableSeg{newReadableSeg(seg1, testOpts)}
b.shardRangesSegmentsByVolumeType = map[idxpersist.IndexVolumeType][]blockShardRangesSegments{
idxpersist.DefaultIndexVolumeType: []blockShardRangesSegments{
blockShardRangesSegments{segments: []segment.Segment{seg2, seg3}},
idxpersist.DefaultIndexVolumeType: {
{segments: []segment.Segment{seg2, seg3}},
},
}

Expand Down Expand Up @@ -518,7 +518,7 @@ func TestBlockMockQueryExecutorExecIterErr(t *testing.T) {
return exec, nil
}

dIter := doc.NewMockIterator(ctrl)
dIter := doc.NewMockMetadataIterator(ctrl)
gomock.InOrder(
exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil),
dIter.EXPECT().Next().Return(true),
Expand Down Expand Up @@ -559,7 +559,7 @@ func TestBlockMockQueryExecutorExecLimit(t *testing.T) {
return exec, nil
}

dIter := doc.NewMockIterator(ctrl)
dIter := doc.NewMockMetadataIterator(ctrl)
gomock.InOrder(
exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil),
dIter.EXPECT().Next().Return(true),
Expand Down Expand Up @@ -610,7 +610,7 @@ func TestBlockMockQueryExecutorExecIterCloseErr(t *testing.T) {
return exec, nil
}

dIter := doc.NewMockIterator(ctrl)
dIter := doc.NewMockMetadataIterator(ctrl)
gomock.InOrder(
exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil),
dIter.EXPECT().Next().Return(false),
Expand Down Expand Up @@ -649,7 +649,7 @@ func TestBlockMockQuerySeriesLimitNonExhaustive(t *testing.T) {
return exec, nil
}

dIter := doc.NewMockIterator(ctrl)
dIter := doc.NewMockMetadataIterator(ctrl)
gomock.InOrder(
exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil),
dIter.EXPECT().Next().Return(true),
Expand Down Expand Up @@ -699,7 +699,7 @@ func TestBlockMockQuerySeriesLimitExhaustive(t *testing.T) {
return exec, nil
}

dIter := doc.NewMockIterator(ctrl)
dIter := doc.NewMockMetadataIterator(ctrl)
gomock.InOrder(
exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil),
dIter.EXPECT().Next().Return(true),
Expand Down Expand Up @@ -751,7 +751,7 @@ func TestBlockMockQueryDocsLimitNonExhaustive(t *testing.T) {
return exec, nil
}

dIter := doc.NewMockIterator(ctrl)
dIter := doc.NewMockMetadataIterator(ctrl)
gomock.InOrder(
exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil),
dIter.EXPECT().Next().Return(true),
Expand Down Expand Up @@ -801,7 +801,7 @@ func TestBlockMockQueryDocsLimitExhaustive(t *testing.T) {
return exec, nil
}

dIter := doc.NewMockIterator(ctrl)
dIter := doc.NewMockMetadataIterator(ctrl)
gomock.InOrder(
exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil),
dIter.EXPECT().Next().Return(true),
Expand Down Expand Up @@ -860,7 +860,7 @@ func TestBlockMockQueryMergeResultsMapLimit(t *testing.T) {
_, _, err = results.AddDocuments([]doc.Metadata{testDoc1()})
require.NoError(t, err)

dIter := doc.NewMockIterator(ctrl)
dIter := doc.NewMockMetadataIterator(ctrl)
gomock.InOrder(
exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil),
dIter.EXPECT().Next().Return(true),
Expand Down Expand Up @@ -911,7 +911,7 @@ func TestBlockMockQueryMergeResultsDupeID(t *testing.T) {
_, _, err = results.AddDocuments([]doc.Metadata{testDoc1()})
require.NoError(t, err)

dIter := doc.NewMockIterator(ctrl)
dIter := doc.NewMockMetadataIterator(ctrl)
gomock.InOrder(
exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil),
dIter.EXPECT().Next().Return(true),
Expand Down Expand Up @@ -1900,8 +1900,8 @@ func TestBlockAggregate(t *testing.T) {
require.True(t, exhaustive)

assertAggregateResultsMapEquals(t, map[string][]string{
"f1": []string{"t1", "t2", "t3"},
"f2": []string{"t1"},
"f1": {"t1", "t2", "t3"},
"f2": {"t1"},
}, results)

sp.Finish()
Expand Down Expand Up @@ -1976,7 +1976,7 @@ func TestBlockAggregateNotExhaustive(t *testing.T) {
require.False(t, exhaustive)

assertAggregateResultsMapEquals(t, map[string][]string{
"f1": []string{"t1"},
"f1": {"t1"},
}, results)

sp.Finish()
Expand Down Expand Up @@ -2067,8 +2067,8 @@ func TestBlockE2EInsertAggregate(t *testing.T) {
require.NoError(t, err)
require.True(t, exhaustive)
assertAggregateResultsMapEquals(t, map[string][]string{
"bar": []string{"baz", "qux"},
"some": []string{"more", "other"},
"bar": {"baz", "qux"},
"some": {"more", "other"},
}, results)

results = NewAggregateResults(ident.StringID("ns"), AggregateResultsOptions{
Expand All @@ -2085,7 +2085,7 @@ func TestBlockE2EInsertAggregate(t *testing.T) {
require.NoError(t, err)
require.True(t, exhaustive)
assertAggregateResultsMapEquals(t, map[string][]string{
"bar": []string{"baz", "qux"},
"bar": {"baz", "qux"},
}, results)

results = NewAggregateResults(ident.StringID("ns"), AggregateResultsOptions{
Expand Down Expand Up @@ -2162,7 +2162,7 @@ func testDoc1() doc.Metadata {
return doc.Metadata{
ID: []byte("foo"),
Fields: []doc.Field{
doc.Field{
{
Name: []byte("bar"),
Value: []byte("baz"),
},
Expand All @@ -2174,11 +2174,11 @@ func testDoc1DupeID() doc.Metadata {
return doc.Metadata{
ID: []byte("foo"),
Fields: []doc.Field{
doc.Field{
{
Name: []byte("why"),
Value: []byte("not"),
},
doc.Field{
{
Name: []byte("some"),
Value: []byte("more"),
},
Expand All @@ -2190,11 +2190,11 @@ func testDoc2() doc.Metadata {
return doc.Metadata{
ID: []byte("something"),
Fields: []doc.Field{
doc.Field{
{
Name: []byte("bar"),
Value: []byte("baz"),
},
doc.Field{
{
Name: []byte("some"),
Value: []byte("more"),
},
Expand All @@ -2206,11 +2206,11 @@ func testDoc3() doc.Metadata {
return doc.Metadata{
ID: []byte("bar"),
Fields: []doc.Field{
doc.Field{
{
Name: []byte("bar"),
Value: []byte("qux"),
},
doc.Field{
{
Name: []byte("some"),
Value: []byte("other"),
},
Expand Down
12 changes: 11 additions & 1 deletion src/dbnode/storage/index/read_through_segment.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,18 @@ func (s *readThroughSegmentReader) AllDocs() (index.IDDocIterator, error) {
return s.reader.AllDocs()
}

// Metadata is a pass through call, since there's no postings list to cache.
func (s *readThroughSegmentReader) Metadata(id postings.ID) (doc.Metadata, error) {
return s.reader.Metadata(id)
}

// MetadataIterator is a pass through call, since there's no postings list to cache.
func (s *readThroughSegmentReader) MetadataIterator(pl postings.List) (doc.MetadataIterator, error) {
return s.reader.MetadataIterator(pl)
}

// Doc is a pass through call, since there's no postings list to cache.
func (s *readThroughSegmentReader) Doc(id postings.ID) (doc.Metadata, error) {
func (s *readThroughSegmentReader) Doc(id postings.ID) (doc.Document, error) {
return s.reader.Doc(id)
}

Expand Down
85 changes: 82 additions & 3 deletions src/m3ninx/doc/doc_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions src/m3ninx/doc/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,47 @@ func (ds Documents) Less(i, j int) bool {
func (ds Documents) Swap(i, j int) {
ds[i], ds[j] = ds[j], ds[i]
}

// Encoded is a serialized document metadata.
type Encoded struct {
Bytes []byte
}

// Document contains either metadata or an encoded metadata
// but never both.
type Document struct {
metadata Metadata
encoded Encoded

hasMetadata bool
}

// NewDocumentFromMetadata creates a Document from a Metadata.
func NewDocumentFromMetadata(m Metadata) Document {
return Document{metadata: m, hasMetadata: true}
}

// NewDocumentFromEncoded creates a Document from an Encoded.
func NewDocumentFromEncoded(e Encoded) Document {
return Document{encoded: e}
}

// Metadata returns the metadata it contains, if it has one. Otherwise returns an empty metadata
// and false.
func (d *Document) Metadata() (Metadata, bool) {
if d.hasMetadata {
return d.metadata, true
}

return Metadata{}, false
}

// Encoded returns the encoded metadata it contains, if it has one. Otherwise returns an
// empty encoded metadata and false.
func (d *Document) Encoded() (Encoded, bool) {
if !d.hasMetadata {
return d.encoded, true
}

return Encoded{}, false
}
Loading

0 comments on commit 7917646

Please sign in to comment.