Skip to content

Commit

Permalink
Fix discard stats moved by GC bug (#929) (#1098)
Browse files Browse the repository at this point in the history
The discard stats are stored like normal keys in badger, which means if
the size of the value of discard stats key is greater the value threshold it
will be stored in the value log. When value log GC happens, we insert
the same key in badger with a !badger!move prefix which points to the
new value log file. So when searching for the value of a key, if the value
points to a value log file that doesn't exist, we search for the same
key with !badger!move prefix.

The above logic was missing from the populateDiscardStats function. The
earlier implementation would never search for the key with !badger!move
prefix. It would return error on the first attempt to find the key. This
commit ensures we search for a key with prefix badger move if the value
for the existing key pointed to a value log file that no longer exists.
  • Loading branch information
Ibrahim Jarif committed Oct 24, 2019
1 parent 64eb1df commit efb9d9d
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 22 deletions.
64 changes: 42 additions & 22 deletions value.go
Original file line number Diff line number Diff line change
Expand Up @@ -1420,34 +1420,54 @@ func (vlog *valueLog) encodedDiscardStats() []byte {
// populateDiscardStats populates vlog.lfDiscardStats
// This function will be called while initializing valueLog
func (vlog *valueLog) populateDiscardStats() error {
discardStatsKey := y.KeyWithTs(lfDiscardStatsKey, math.MaxUint64)
vs, err := vlog.db.get(discardStatsKey)
if err != nil {
return err
}

// check if value is Empty
if vs.Value == nil || len(vs.Value) == 0 {
return nil
}

key := y.KeyWithTs(lfDiscardStatsKey, math.MaxUint64)
var statsMap map[uint32]int64
// discard map is stored in the vlog file.
if vs.Meta&bitValuePointer > 0 {
var vp valuePointer
var val []byte
var vp valuePointer
for {
vs, err := vlog.db.get(key)
if err != nil {
return err
}
// Value doesn't exist.
if vs.Meta == 0 && len(vs.Value) == 0 {
vlog.opt.Debugf("Value log discard stats empty")
return nil
}
vp.Decode(vs.Value)
// Entry stored in LSM tree.
if vs.Meta&bitValuePointer == 0 {
val = y.SafeCopy(val, vs.Value)
break
}
// Read entry from value log.
result, cb, err := vlog.Read(vp, new(y.Slice))
if err != nil {
return errors.Wrapf(err, "failed to read value pointer from vlog file: %+v", vp)
runCallback(cb)
val = y.SafeCopy(val, result)
// The result is stored in val. We can break the loop from here.
if err == nil {
break
}
defer runCallback(cb)
if err := json.Unmarshal(result, &statsMap); err != nil {
return errors.Wrapf(err, "failed to unmarshal discard stats")
if err != ErrRetry {
return err
}
} else {
if err := json.Unmarshal(vs.Value, &statsMap); err != nil {
return errors.Wrapf(err, "failed to unmarshal discard stats")
// If we're at this point it means we haven't found the value yet and if the current key has
// badger move prefix, we should break from here since we've already tried the original key
// and the key with move prefix. "val" would be empty since we haven't found the value yet.
if bytes.HasPrefix(key, badgerMove) {
break
}
// If we're at this point it means the discard stats key was moved by the GC and the actual
// entry is the one prefixed by badger move key.
// Prepend existing key with badger move and search for the key.
key = append(badgerMove, key...)
}

if len(val) == 0 {
return nil
}
if err := json.Unmarshal(val, &statsMap); err != nil {
return errors.Wrapf(err, "failed to unmarshal discard stats")
}
vlog.opt.Debugf("Value Log Discard stats: %v", statsMap)
vlog.lfDiscardStats = &lfDiscardStats{m: statsMap}
Expand Down
56 changes: 56 additions & 0 deletions value_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -974,3 +974,59 @@ func TestValueLogTruncate(t *testing.T) {
require.Equal(t, 2, int(db.vlog.maxFid))
require.NoError(t, db.Close())
}

// Regression test for https://github.com/dgraph-io/badger/issues/926
func TestDiscardStatsMove(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
ops := getTestOptions(dir)
ops.ValueLogMaxEntries = 1
db, err := Open(ops)
require.NoError(t, err)

stat := make(map[uint32]int64, ops.ValueThreshold+10)
for i := uint32(0); i < uint32(ops.ValueThreshold+10); i++ {
stat[i] = 0
}

// Set discard stats.
db.vlog.lfDiscardStats = &lfDiscardStats{
m: stat,
}
entries := []*Entry{{
Key: y.KeyWithTs(lfDiscardStatsKey, 1),
// The discard stat value is more than value threshold.
Value: db.vlog.encodedDiscardStats(),
}}
// Push discard stats entry to the write channel.
req, err := db.sendToWriteCh(entries)
require.NoError(t, err)
req.Wait()

// Unset discard stats. We've already pushed the stats. If we don't unset it then it will be
// pushed again on DB close. Also, the first insertion was in vlog file 1, this insertion would
// be in value log file 3.
db.vlog.lfDiscardStats.m = nil

// Push more entries so that we get more than 1 value log files.
require.NoError(t, db.Update(func(txn *Txn) error {
e := NewEntry([]byte("f"), []byte("1"))
return txn.SetEntry(e)
}))
require.NoError(t, db.Update(func(txn *Txn) error {
e := NewEntry([]byte("ff"), []byte("1"))
return txn.SetEntry(e)

}))

tr := trace.New("Badger.ValueLog", "GC")
// Use first value log file for GC. This value log file contains the discard stats.
lf := db.vlog.filesMap[0]
require.NoError(t, db.vlog.rewrite(lf, tr))
require.NoError(t, db.Close())

db, err = Open(ops)
require.NoError(t, err)
require.Equal(t, stat, db.vlog.lfDiscardStats.m)
require.NoError(t, db.Close())
}

0 comments on commit efb9d9d

Please sign in to comment.