Skip to content

Commit

Permalink
Add sstable index
Browse files Browse the repository at this point in the history
Reads an index file which stores each key and the offset in the data
file at which the records can be found. Backed by btree.BTree.

Still not sure what degree to use when creating the BTree, but 2 is
probably too small.
  • Loading branch information
antw committed Dec 20, 2021
1 parent 4083e16 commit abf76f8
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 0 deletions.
109 changes: 109 additions & 0 deletions internal/sstable/index.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package sstable

import (
"bufio"
"encoding/binary"
"io"
"os"

"github.com/google/btree"
)

// index stores the offset of each key in the SSTable file.
type index struct {
tree *btree.BTree
}

func newIndex(f *os.File) (*index, error) {
tree := btree.New(2)
buf := bufio.NewReader(f)

for {
_, err := buf.Peek(1)
if err != nil {
if err == io.EOF {
break
}
return nil, err
}

keyLen, err := binary.ReadUvarint(buf)
if err != nil {
return nil, err
}

key := make([]byte, keyLen)
_, err = buf.Read(key)
if err != nil {
return nil, err
}

pos, err := binary.ReadUvarint(buf)
if err != nil {
return nil, err
}

tree.ReplaceOrInsert(indexEntry{string(key), pos})
}

return &index{tree}, nil
}

// get returns the position at which the key is stored in the file. The second return value
// indicates whether the key exists.
func (i *index) get(key string) (uint64, bool) {
entry := i.tree.Get(indexEntry{key, 0})
if entry == nil {
return 0, false
}

return uint64(entry.(indexEntry).pos), true
}

// -------------------------------------------------------------------------------------------------

type indexEntry struct {
key string
pos uint64
}

func (ie indexEntry) Less(than btree.Item) bool {
return ie.key < than.(indexEntry).key
}

// -------------------------------------------------------------------------------------------------

type indexWriter struct {
buf *bufio.Writer
}

func (i *indexWriter) Write(key string, pos uint64) error {
err := writeUvarint(i.buf, uint64(len(key)))
if err != nil {
return err
}

_, err = i.buf.Write([]byte(key))
if err != nil {
return err
}

err = writeUvarint(i.buf, pos)
if err != nil {
return err
}

return nil
}

func (i *indexWriter) Flush() error {
return i.buf.Flush()
}

func writeUvarint(w io.Writer, x uint64) error {
b := make([]byte, binary.MaxVarintLen64)
n := binary.PutUvarint(b, x)
_, err := w.Write(b[:n])

return err
}
62 changes: 62 additions & 0 deletions internal/sstable/index_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package sstable

import (
"bufio"
"io/ioutil"
"os"
"testing"

"github.com/stretchr/testify/require"
)

func TestIndex(t *testing.T) {
f, err := ioutil.TempFile("", "index_test")
require.NoError(t, err)
defer func() { _ = os.Remove(f.Name()) }()

writer := indexWriter{bufio.NewWriter(f)}

pairs := []struct {
key string
pos uint64
}{
{"foo", 0},
{"bar", 10},
{"baz", 512},
{"qux", 1024},
}

for _, pair := range pairs {
err = writer.Write(pair.key, pair.pos)
require.NoError(t, err)
}

err = writer.Flush()
require.NoError(t, err)

_, err = f.Seek(0, 0)
require.NoError(t, err)

index, err := newIndex(f)
require.NoError(t, err)

for _, pair := range pairs {
pos, ok := index.get(pair.key)

require.True(t, ok)
require.Equal(t, pair.pos, pos)
}
}

func TestIndexNoKey(t *testing.T) {
f, err := ioutil.TempFile("", "index_no_key_test")
require.NoError(t, err)
defer func() { _ = os.Remove(f.Name()) }()

index, err := newIndex(f)
require.NoError(t, err)

pos, ok := index.get("not-exist")
require.False(t, ok)
require.Equal(t, uint64(0), pos)
}

0 comments on commit abf76f8

Please sign in to comment.