Skip to content

Commit

Permalink
perf(raft): Use raft storage in managedmode (#6457)
Browse files Browse the repository at this point in the history
This PR changes how we use badger in the wal store. Currently we run
badger in normal mode for `w` and `zw` store. We've seen up to 900K
entries for the same hard state (hs) key in `zw` store. These duplicate
keys cause spikes in read latencies in `zw`. The `w` store has more
compactions compared to `zw` store and so it has lesser stale data and
thus lesser spikes in read latencies.

The fix here is to open the `w` and `zw` directories in managed mode and
perform all writes on the same timestamp (max version in the db). This
leads to close to 0 duplicates in the store.

This PR also fixes the raft leader election issue which is a result of high
read latencies.
  • Loading branch information
Ibrahim Jarif authored Sep 21, 2020
1 parent 7e31cb9 commit 6882e37
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 27 deletions.
2 changes: 1 addition & 1 deletion conn/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func TestProposal(t *testing.T) {
require.NoError(t, err)
defer os.RemoveAll(dir)

db, err := badger.Open(badger.DefaultOptions(dir))
db, err := badger.OpenManaged(badger.DefaultOptions(dir))
require.NoError(t, err)
store := raftwal.Init(db, 0, 0)
defer store.Closer.SignalAndWait()
Expand Down
2 changes: 1 addition & 1 deletion dgraph/cmd/zero/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ func run() {
}
glog.Infof("Opening zero BadgerDB with options: %+v\n", kvOpt)

kv, err := badger.Open(kvOpt)
kv, err := badger.OpenManaged(kvOpt)
x.Checkf(err, "Error while opening WAL store")
defer kv.Close()

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ require (
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f // indirect
github.com/blevesearch/snowballstem v0.0.0-20180110192139-26b06a2c243d // indirect
github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd
github.com/dgraph-io/badger/v2 v2.0.1-rc1.0.20200915175413-c1fe0ecee5f3
github.com/dgraph-io/badger/v2 v2.0.1-rc1.0.20200921173231-cde0cedc431e
github.com/dgraph-io/dgo/v200 v200.0.0-20200805103119-a3544c464dd6
github.com/dgraph-io/graphql-transport-ws v0.0.0-20200916064635-48589439591b
github.com/dgraph-io/ristretto v0.0.4-0.20200915135229-0f2ad8c2c06a
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgraph-io/badger v1.6.0 h1:DshxFxZWXUcO0xX476VJC07Xsr6ZCBVRHKZ93Oh7Evo=
github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4=
github.com/dgraph-io/badger/v2 v2.0.1-rc1.0.20200915175413-c1fe0ecee5f3 h1:nKYvaB+Guf+2OgWXiN/xXUasFzi/YutD8S2jgWmE0gA=
github.com/dgraph-io/badger/v2 v2.0.1-rc1.0.20200915175413-c1fe0ecee5f3/go.mod h1:2uGEvGm+JSDLd5UAaKIFSbXDcYyeH0fWJP4N2HMMYMI=
github.com/dgraph-io/badger/v2 v2.0.1-rc1.0.20200921173231-cde0cedc431e h1:hl0v/svPfPrA1kMG7pQa9MrcDsrYlXhhaXyD8n5BbQE=
github.com/dgraph-io/badger/v2 v2.0.1-rc1.0.20200921173231-cde0cedc431e/go.mod h1:2uGEvGm+JSDLd5UAaKIFSbXDcYyeH0fWJP4N2HMMYMI=
github.com/dgraph-io/dgo/v200 v200.0.0-20200805103119-a3544c464dd6 h1:toHzMCdCUgYsjM0cW9+wafnKFXfp1HizIJUyzihN+vk=
github.com/dgraph-io/dgo/v200 v200.0.0-20200805103119-a3544c464dd6/go.mod h1:rHa+h3kI4M8ASOirxyIyNeXBfHFgeskVUum2OrDMN3U=
github.com/dgraph-io/graphql-transport-ws v0.0.0-20200916064635-48589439591b h1:PDEhlwHpkEQ5WBfOOKZCNZTXFDGyCEWTYDhxGQbyIpk=
Expand Down
65 changes: 55 additions & 10 deletions raftwal/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package raftwal
import (
"bytes"
"encoding/binary"
"fmt"
"math"
"sync"

Expand All @@ -34,12 +35,16 @@ import (
"golang.org/x/net/trace"
)

// versionKey is hardcoded into the special key used to fetch the maximum version from the DB.
const versionKey = 1

// DiskStorage handles disk access and writing for the RAFT write-ahead log.
type DiskStorage struct {
db *badger.DB
id uint64
gid uint32
elog trace.EventLog
db *badger.DB
commitTs uint64
id uint64
gid uint32
elog trace.EventLog

cache *sync.Map
Closer *z.Closer
Expand All @@ -60,6 +65,8 @@ func Init(db *badger.DB, id uint64, gid uint32) *DiskStorage {
Closer: z.NewCloser(1),
indexRangeChan: make(chan indexRange, 16),
}

w.fetchMaxVersion()
if prev, err := RaftId(db); err != nil || prev != id {
x.Check(w.StoreRaftId(id))
}
Expand Down Expand Up @@ -88,14 +95,43 @@ func Init(db *badger.DB, id uint64, gid uint32) *DiskStorage {
return w
}

// fetchMaxVersion fetches the commitTs to be used in the raftwal. The version is
// fetched from the special key "maxVersion-id" or from db.MaxVersion
// API which uses the stream framework.
func (w *DiskStorage) fetchMaxVersion() {
// This is a special key that is used to fetch the latest version.
key := []byte(fmt.Sprintf("maxVersion-%d", versionKey))

txn := w.db.NewTransactionAt(math.MaxUint64, true)
defer txn.Discard()

item, err := txn.Get(key)
if err == nil {
w.commitTs = item.Version()
return
}
if err == badger.ErrKeyNotFound {
// We don't have the special key so get it using the MaxVersion API.
version, err := w.db.MaxVersion()
x.Check(err)

w.commitTs = version + 1
// Insert the same key back into badger for reuse.
x.Check(txn.Set(key, nil))
x.Check(txn.CommitAt(w.commitTs, nil))
} else {
x.Check(err)
}
}

func (w *DiskStorage) processIndexRange() {
defer w.Closer.Done()

processSingleRange := func(r indexRange) {
if r.from == r.until {
return
}
batch := w.db.NewWriteBatch()
batch := w.db.NewWriteBatchAt(w.commitTs)
if err := w.deleteRange(batch, r.from, r.until); err != nil {
glog.Errorf("deleteRange failed with error: %v, from: %d, until: %d\n",
err, r.from, r.until)
Expand Down Expand Up @@ -192,9 +228,18 @@ func (w *DiskStorage) entryPrefix() []byte {
return b
}

func (w *DiskStorage) update(cb func(txn *badger.Txn) error) error {
txn := w.db.NewTransactionAt(math.MaxUint64, true)
defer txn.Discard()
if err := cb(txn); err != nil {
return err
}
return txn.CommitAt(w.commitTs, nil)
}

// StoreRaftId stores the given RAFT ID in disk.
func (w *DiskStorage) StoreRaftId(id uint64) error {
return w.db.Update(func(txn *badger.Txn) error {
return w.update(func(txn *badger.Txn) error {
var b [8]byte
binary.BigEndian.PutUint64(b[:], id)
return txn.Set(idKey, b[:])
Expand All @@ -203,7 +248,7 @@ func (w *DiskStorage) StoreRaftId(id uint64) error {

// UpdateCheckpoint writes the given snapshot to disk.
func (w *DiskStorage) UpdateCheckpoint(snap *pb.Snapshot) error {
return w.db.Update(func(txn *badger.Txn) error {
return w.update(func(txn *badger.Txn) error {
data, err := snap.Marshal()
if err != nil {
return err
Expand Down Expand Up @@ -453,7 +498,7 @@ func (w *DiskStorage) reset(es []raftpb.Entry) error {
w.cache = new(sync.Map) // reset cache.

// Clean out the state.
batch := w.db.NewWriteBatch()
batch := w.db.NewWriteBatchAt(w.commitTs)
defer batch.Cancel()

if err := w.deleteFrom(batch, 0); err != nil {
Expand Down Expand Up @@ -679,7 +724,7 @@ func (w *DiskStorage) CreateSnapshot(i uint64, cs *raftpb.ConfState, data []byte
snap.Metadata.ConfState = *cs
snap.Data = data

batch := w.db.NewWriteBatch()
batch := w.db.NewWriteBatchAt(w.commitTs)
defer batch.Cancel()
if err := w.setSnapshot(batch, &snap); err != nil {
return err
Expand All @@ -701,7 +746,7 @@ func (w *DiskStorage) CreateSnapshot(i uint64, cs *raftpb.ConfState, data []byte
// writes then all of them can be written together. Note that when writing an Entry with Index i,
// any previously-persisted entries with Index >= i must be discarded.
func (w *DiskStorage) Save(h *raftpb.HardState, es []raftpb.Entry, snap *raftpb.Snapshot) error {
batch := w.db.NewWriteBatch()
batch := w.db.NewWriteBatchAt(w.commitTs)
defer batch.Cancel()

if err := w.addEntries(batch, es); err != nil {
Expand Down
20 changes: 10 additions & 10 deletions raftwal/storage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func TestStorageTerm(t *testing.T) {
require.NoError(t, err)
defer os.RemoveAll(dir)

db, err := badger.Open(badger.DefaultOptions(dir))
db, err := badger.OpenManaged(badger.DefaultOptions(dir))
require.NoError(t, err)
ds := Init(db, 0, 0)
defer ds.Closer.SignalAndWait()
Expand Down Expand Up @@ -101,7 +101,7 @@ func TestStorageEntries(t *testing.T) {
require.NoError(t, err)
defer os.RemoveAll(dir)

db, err := badger.Open(badger.DefaultOptions(dir))
db, err := badger.OpenManaged(badger.DefaultOptions(dir))
require.NoError(t, err)
ds := Init(db, 0, 0)
defer ds.Closer.SignalAndWait()
Expand Down Expand Up @@ -147,7 +147,7 @@ func TestStorageLastIndex(t *testing.T) {
require.NoError(t, err)
defer os.RemoveAll(dir)

db, err := badger.Open(badger.DefaultOptions(dir))
db, err := badger.OpenManaged(badger.DefaultOptions(dir))
require.NoError(t, err)
ds := Init(db, 0, 0)
defer ds.Closer.SignalAndWait()
Expand Down Expand Up @@ -178,7 +178,7 @@ func TestStorageFirstIndex(t *testing.T) {
require.NoError(t, err)
defer os.RemoveAll(dir)

db, err := badger.Open(badger.DefaultOptions(dir))
db, err := badger.OpenManaged(badger.DefaultOptions(dir))
require.NoError(t, err)
ds := Init(db, 0, 0)
defer ds.Closer.SignalAndWait()
Expand All @@ -194,7 +194,7 @@ func TestStorageFirstIndex(t *testing.T) {
t.Errorf("first = %d, want %d", first, 4)
}

batch := db.NewWriteBatch()
batch := db.NewWriteBatchAt(ds.commitTs)
require.NoError(t, ds.deleteRange(batch, 0, 4))
require.NoError(t, batch.Flush())
ds.cache.Store(firstKey, 0)
Expand All @@ -212,7 +212,7 @@ func TestStorageCompact(t *testing.T) {
require.NoError(t, err)
defer os.RemoveAll(dir)

db, err := badger.Open(badger.DefaultOptions(dir))
db, err := badger.OpenManaged(badger.DefaultOptions(dir))
require.NoError(t, err)
ds := Init(db, 0, 0)
defer ds.Closer.SignalAndWait()
Expand All @@ -237,7 +237,7 @@ func TestStorageCompact(t *testing.T) {
for i, tt := range tests {
first, err := ds.FirstIndex()
require.NoError(t, err)
batch := db.NewWriteBatch()
batch := db.NewWriteBatchAt(ds.commitTs)
err = ds.deleteRange(batch, first-1, tt.i)
require.NoError(t, batch.Flush())
if err != tt.werr {
Expand All @@ -264,7 +264,7 @@ func TestStorageCreateSnapshot(t *testing.T) {
require.NoError(t, err)
defer os.RemoveAll(dir)

db, err := badger.Open(badger.DefaultOptions(dir))
db, err := badger.OpenManaged(badger.DefaultOptions(dir))
require.NoError(t, err)
ds := Init(db, 0, 0)
defer ds.Closer.SignalAndWait()
Expand Down Expand Up @@ -302,7 +302,7 @@ func TestStorageAppend(t *testing.T) {
require.NoError(t, err)
defer os.RemoveAll(dir)

db, err := badger.Open(badger.DefaultOptions(dir))
db, err := badger.OpenManaged(badger.DefaultOptions(dir))
require.NoError(t, err)
ds := Init(db, 0, 0)
defer ds.Closer.SignalAndWait()
Expand Down Expand Up @@ -351,7 +351,7 @@ func TestStorageAppend(t *testing.T) {

for i, tt := range tests {
require.NoError(t, ds.reset(ents))
batch := db.NewWriteBatch()
batch := db.NewWriteBatchAt(ds.commitTs)
err := ds.addEntries(batch, tt.entries)
if err != tt.werr {
t.Errorf("#%d: err = %v, want %v", i, err, tt.werr)
Expand Down
2 changes: 1 addition & 1 deletion worker/draft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func TestCalculateSnapshot(t *testing.T) {
require.NoError(t, err)
defer os.RemoveAll(dir)

db, err := openBadger(dir)
db, err := badger.OpenManaged(badger.DefaultOptions(dir))
require.NoError(t, err)
ds := raftwal.Init(db, 0, 0)
defer ds.Closer.SignalAndWait()
Expand Down
2 changes: 1 addition & 1 deletion worker/server_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func (s *ServerState) initStorage() {
glog.Infof("Opening write-ahead log BadgerDB with options: %+v\n", opt)
opt.EncryptionKey = key

s.WALstore, err = badger.Open(opt)
s.WALstore, err = badger.OpenManaged(opt)
x.Checkf(err, "Error while creating badger KV WAL store")
}
{
Expand Down

0 comments on commit 6882e37

Please sign in to comment.