Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve tsdb-index and tsdb-index-health. #1503

Merged
merged 4 commits into from
Mar 17, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 64 additions & 4 deletions tools/tsdb-index-health/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package main

import (
"encoding/json"
"flag"
"fmt"
"math"
"os"
Expand All @@ -26,7 +27,10 @@ import (
var logger = log.NewLogfmtLogger(os.Stderr)

func main() {
if len(os.Args) < 2 {
verifyChunks := flag.Bool("check-chunks", false, "Verify chunks in segment files.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: when i use the flag, I get an error saying Failed to read meta from block dir --check-chunks error: open --check-chunks/meta.json: no such file or directory. This is because on lines 38-42 we don't check whether the CLI arg is a directory or a block.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch, thanks! Updated PR to use only remaining (non-CLI flag arguments) as block directories.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A problem here is that the --help output doesn't mention the required argument:

$ ./tools/tsdb-index-health/tsdb-index-health --help
Usage of ./tools/tsdb-index-health/tsdb-index-health:
  -check-chunks
    	Verify chunks in segment files.

flag.Parse()

if flag.NArg() == 0 {
fmt.Println("Usage:", os.Args[0], "<block-dir> [<block-dir> ...]")
pstibrany marked this conversation as resolved.
Show resolved Hide resolved
return
}
Expand All @@ -38,7 +42,7 @@ func main() {
continue
}

stats, err := GatherIndexHealthStats(logger, filepath.Join(b, block.IndexFilename), meta.MinTime, meta.MaxTime)
stats, err := GatherIndexHealthStats(logger, b, meta.MinTime, meta.MaxTime, *verifyChunks)
if err != nil {
fmt.Fprintln(os.Stderr, "Failed to gather health stats from block dir", b, "error:", err)
continue
Expand Down Expand Up @@ -137,8 +141,17 @@ func (n *minMaxSumInt64) Avg() int64 {
return n.sum / n.cnt
}

func GatherIndexHealthStats(logger log.Logger, fn string, minTime, maxTime int64) (stats HealthStats, err error) {
r, err := index.NewFileReader(fn)
func GatherIndexHealthStats(logger log.Logger, blockDir string, minTime, maxTime int64, checkChunks bool) (stats HealthStats, err error) {
var cr *chunks.Reader
if checkChunks {
cr, err = chunks.NewDirReader(filepath.Join(blockDir, block.ChunksDirname), nil)
if err != nil {
return stats, errors.Wrap(err, "open chunks dir")
}
defer runutil.CloseWithErrCapture(&err, cr, "closing chunks reader")
}

r, err := index.NewFileReader(filepath.Join(blockDir, block.IndexFilename))
if err != nil {
return stats, errors.Wrap(err, "open index file")
}
Expand Down Expand Up @@ -274,6 +287,10 @@ func GatherIndexHealthStats(logger log.Logger, fn string, minTime, maxTime int64
} else {
seriesLifeDurationWithoutSingleSampleSeries.Add(seriesLifeTimeMs)
}

if checkChunks {
verifyChunks(logger, cr, lset, chks)
}
}
if p.Err() != nil {
return stats, errors.Wrap(err, "walk postings")
Expand All @@ -300,3 +317,46 @@ func GatherIndexHealthStats(logger log.Logger, fn string, minTime, maxTime int64
stats.ChunkMinDuration = model.Duration(time.Duration(chunkDuration.min) * time.Millisecond)
return stats, nil
}

func verifyChunks(l log.Logger, cr *chunks.Reader, lset labels.Labels, chks []chunks.Meta) {
for _, cm := range chks {
ch, err := cr.Chunk(cm.Ref)
if err != nil {
level.Error(l).Log("msg", "failed to read chunk", "ref", cm.Ref, "err", err)
continue
}

samples := 0
firstSample := true
prevTs := int64(-1)

it := ch.Iterator(nil)
for it.Err() == nil && it.Next() {
samples++
ts, _ := it.At()

if firstSample {
firstSample = false
if ts != cm.MinTime {
level.Warn(l).Log("ref", cm.Ref, "msg", "timestamp of the first sample doesn't match chunk MinTime", "sampleTimestamp", formatTimestamp(ts), "chunkMinTime", formatTimestamp(cm.MinTime))
}
} else if ts <= prevTs {
level.Warn(l).Log("ref", cm.Ref, "msg", "found sample with timestamp not strictly higher than previous timestamp", "previous", formatTimestamp(prevTs), "sampleTimestamp", formatTimestamp(ts))
}

prevTs = ts
}

if e := it.Err(); e != nil {
level.Warn(l).Log("ref", cm.Ref, "msg", "failed to iterate over chunk samples", "err", err)
} else if samples == 0 {
level.Warn(l).Log("ref", cm.Ref, "msg", "no samples found in the chunk")
} else if prevTs != cm.MaxTime {
level.Warn(l).Log("ref", cm.Ref, "msg", "timestamp of the last sample doesn't match chunk MaxTime", "sampleTimestamp", formatTimestamp(prevTs), "chunkMaxTime", formatTimestamp(cm.MaxTime))
}
}
}

func formatTimestamp(ts int64) string {
return fmt.Sprintf("%d (%s)", ts, timestamp.Time(ts).UTC().Format(time.RFC3339Nano))
}
6 changes: 5 additions & 1 deletion tools/tsdb-index/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ import (
"flag"
"fmt"
"os"
"time"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/timestamp"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/chunks"
Expand Down Expand Up @@ -99,7 +101,9 @@ func printBlockIndex(blockDir string, printChunks bool, matchers []*labels.Match
fmt.Println("series", lbls.String())
if printChunks {
for _, c := range chks {
fmt.Println("chunk", c.Ref, "min time:", c.MinTime, "max time:", c.MaxTime)
fmt.Println("chunk", c.Ref,
"min time:", c.MinTime, timestamp.Time(c.MinTime).UTC().Format(time.RFC3339Nano),
"max time:", c.MaxTime, timestamp.Time(c.MaxTime).UTC().Format(time.RFC3339Nano))
}
}
}
Expand Down