From 2bc501a3182e4d33c58a63447fb24ea98f7368ef Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 6 Sep 2024 14:33:55 +1000 Subject: [PATCH] Attach consistency check debug logs to trace spans (#9213) * Attach consistency check debug logs to trace spans * Add changelog entry --- CHANGELOG.md | 1 + pkg/querier/blocks_consistency_checker.go | 10 ++++------ pkg/querier/blocks_consistency_checker_test.go | 4 ++-- pkg/querier/blocks_store_queryable.go | 3 +-- pkg/querier/blocks_store_queryable_test.go | 16 ++++++++-------- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a3691f33db..e0a31844e32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -82,6 +82,7 @@ * [ENHANCEMENT] Ingester: add `cortex_lifecycler_read_only` metric which is set to 1 when ingester's lifecycler is set to read-only mode. #9095 * [ENHANCEMENT] Add a new field, `encode_time_seconds` to query stats log messages, to record the amount of time it takes the query-frontend to encode a response. This does not include any serialization time for downstream components. #9062 * [ENHANCEMENT] OTLP: If the flag `-distributor.otel-created-timestamp-zero-ingestion-enabled` is true, OTel start timestamps are converted to Prometheus zero samples to mark series start. #9131 +* [ENHANCEMENT] Querier: attach logs emitted during query consistency check to trace span for query. #9213 * [BUGFIX] Ruler: add support for draining any outstanding alert notifications before shutting down. This can be enabled with the `-ruler.drain-notification-queue-on-shutdown=true` CLI flag. #8346 * [BUGFIX] Query-frontend: fix `-querier.max-query-lookback` enforcement when `-compactor.blocks-retention-period` is not set, and viceversa. #8388 * [BUGFIX] Ingester: fix sporadic `not found` error causing an internal server error if label names are queried with matchers during head compaction. #8391 diff --git a/pkg/querier/blocks_consistency_checker.go b/pkg/querier/blocks_consistency_checker.go index dec25121fb6..a74b9100dc9 100644 --- a/pkg/querier/blocks_consistency_checker.go +++ b/pkg/querier/blocks_consistency_checker.go @@ -20,17 +20,15 @@ import ( type BlocksConsistency struct { uploadGracePeriod time.Duration deletionGracePeriod time.Duration - logger log.Logger checksTotal prometheus.Counter checksFailed prometheus.Counter } -func NewBlocksConsistency(uploadGracePeriod, deletionGracePeriod time.Duration, logger log.Logger, reg prometheus.Registerer) *BlocksConsistency { +func NewBlocksConsistency(uploadGracePeriod, deletionGracePeriod time.Duration, reg prometheus.Registerer) *BlocksConsistency { return &BlocksConsistency{ uploadGracePeriod: uploadGracePeriod, deletionGracePeriod: deletionGracePeriod, - logger: logger, checksTotal: promauto.With(reg).NewCounter(prometheus.CounterOpts{ Name: "cortex_querier_blocks_consistency_checks_total", Help: "Total number of queries that needed to run with consistency checks. A consistency check is required when querying blocks from store-gateways to make sure that all blocks are queried.", @@ -44,7 +42,7 @@ func NewBlocksConsistency(uploadGracePeriod, deletionGracePeriod time.Duration, // NewTracker creates a consistency tracker from the known blocks. It filters out any block uploaded within uploadGracePeriod // and with a deletion mark within deletionGracePeriod. -func (c *BlocksConsistency) NewTracker(knownBlocks bucketindex.Blocks, knownDeletionMarks map[ulid.ULID]*bucketindex.BlockDeletionMark) BlocksConsistencyTracker { +func (c *BlocksConsistency) NewTracker(knownBlocks bucketindex.Blocks, knownDeletionMarks map[ulid.ULID]*bucketindex.BlockDeletionMark, logger log.Logger) BlocksConsistencyTracker { blocksToTrack := make(map[ulid.ULID]struct{}, len(knownBlocks)) for _, block := range knownBlocks { // Some recently uploaded blocks, already discovered by the querier, may not have been discovered @@ -55,7 +53,7 @@ func (c *BlocksConsistency) NewTracker(knownBlocks bucketindex.Blocks, knownDele // - Blocks uploaded by compactor: the source blocks are marked for deletion but will continue to be // queried by queriers for a while (depends on the configured deletion marks delay). if c.uploadGracePeriod > 0 && time.Since(block.GetUploadedAt()) < c.uploadGracePeriod { - level.Debug(c.logger).Log("msg", "block skipped from consistency check because it was uploaded recently", "block", block.ID.String(), "uploadedAt", block.GetUploadedAt().String()) + level.Debug(logger).Log("msg", "block skipped from consistency check because it was uploaded recently", "block", block.ID.String(), "uploadedAt", block.GetUploadedAt().String()) continue } @@ -67,7 +65,7 @@ func (c *BlocksConsistency) NewTracker(knownBlocks bucketindex.Blocks, knownDele deletionTime := time.Unix(mark.DeletionTime, 0) if c.deletionGracePeriod > 0 && time.Since(deletionTime) > c.deletionGracePeriod { - level.Debug(c.logger).Log("msg", "block skipped from consistency check because it is marked for deletion", "block", block.ID.String(), "deletionTime", deletionTime.String()) + level.Debug(logger).Log("msg", "block skipped from consistency check because it is marked for deletion", "block", block.ID.String(), "deletionTime", deletionTime.String()) continue } } diff --git a/pkg/querier/blocks_consistency_checker_test.go b/pkg/querier/blocks_consistency_checker_test.go index 4e89cfd07f0..2e6f29ca27f 100644 --- a/pkg/querier/blocks_consistency_checker_test.go +++ b/pkg/querier/blocks_consistency_checker_test.go @@ -126,8 +126,8 @@ func TestBlocksConsistencyTracker_Check(t *testing.T) { for testName, testData := range tests { t.Run(testName, func(t *testing.T) { reg := prometheus.NewPedanticRegistry() - c := NewBlocksConsistency(uploadGracePeriod, deletionGracePeriod, log.NewNopLogger(), reg) - tracker := c.NewTracker(testData.knownBlocks, testData.knownDeletionMarks) + c := NewBlocksConsistency(uploadGracePeriod, deletionGracePeriod, reg) + tracker := c.NewTracker(testData.knownBlocks, testData.knownDeletionMarks, log.NewNopLogger()) var missingBlocks []ulid.ULID for _, queriedBlocksAttempt := range testData.queriedBlocks { missingBlocks = tracker.Check(queriedBlocksAttempt) diff --git a/pkg/querier/blocks_store_queryable.go b/pkg/querier/blocks_store_queryable.go index 723aec5a8b7..461017a4308 100644 --- a/pkg/querier/blocks_store_queryable.go +++ b/pkg/querier/blocks_store_queryable.go @@ -258,7 +258,6 @@ func NewBlocksStoreQueryableFromConfig(querierCfg Config, gatewayCfg storegatewa // recently marked for deletion, until the "ignore delay / 2". This means the consistency checker // exclude such blocks about 50% of the time before querier and store-gateway stops querying them. storageCfg.BucketStore.IgnoreDeletionMarksDelay/2, - logger, reg, ) @@ -548,7 +547,7 @@ func (q *blocksStoreQuerier) queryWithConsistencyCheck( touchedStores = map[string]struct{}{} ) - consistencyTracker := q.consistency.NewTracker(knownBlocks, knownDeletionMarks) + consistencyTracker := q.consistency.NewTracker(knownBlocks, knownDeletionMarks, spanLog) defer func() { // Do not track consistency check metrics if query failed with an error unrelated to consistency check (e.g. context canceled), // because it means we interrupted the requests, and we don't know whether consistency check would have succeeded diff --git a/pkg/querier/blocks_store_queryable_test.go b/pkg/querier/blocks_store_queryable_test.go index 271b17e1513..3ff72d1f677 100644 --- a/pkg/querier/blocks_store_queryable_test.go +++ b/pkg/querier/blocks_store_queryable_test.go @@ -1563,7 +1563,7 @@ func TestBlocksStoreQuerier_Select(t *testing.T) { maxT: maxT, finder: finder, stores: stores, - consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), reg), + consistency: NewBlocksConsistency(0, 0, reg), logger: log.NewNopLogger(), metrics: newBlocksStoreQueryableMetrics(reg), limits: testData.limits, @@ -1704,7 +1704,7 @@ func TestBlocksStoreQuerier_ShouldReturnContextCanceledIfContextWasCanceledWhile maxT: maxT, finder: finder, stores: stores, - consistency: NewBlocksConsistency(0, 0, logger, reg), + consistency: NewBlocksConsistency(0, 0, reg), logger: logger, metrics: newBlocksStoreQueryableMetrics(reg), limits: &blocksStoreLimitsMock{}, @@ -1932,7 +1932,7 @@ func TestBlocksStoreQuerier_Select_cancelledContext(t *testing.T) { maxT: maxT, finder: finder, stores: stores, - consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), nil), + consistency: NewBlocksConsistency(0, 0, nil), logger: log.NewNopLogger(), metrics: newBlocksStoreQueryableMetrics(reg), limits: &blocksStoreLimitsMock{}, @@ -2435,7 +2435,7 @@ func TestBlocksStoreQuerier_Labels(t *testing.T) { maxT: maxT, finder: finder, stores: stores, - consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), nil), + consistency: NewBlocksConsistency(0, 0, nil), logger: log.NewNopLogger(), metrics: newBlocksStoreQueryableMetrics(reg), limits: &blocksStoreLimitsMock{}, @@ -2506,7 +2506,7 @@ func TestBlocksStoreQuerier_Labels(t *testing.T) { maxT: maxT, finder: finder, stores: stores, - consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), nil), + consistency: NewBlocksConsistency(0, 0, nil), logger: log.NewNopLogger(), metrics: newBlocksStoreQueryableMetrics(reg), limits: &blocksStoreLimitsMock{}, @@ -2580,7 +2580,7 @@ func TestBlocksStoreQuerier_SelectSortedShouldHonorQueryStoreAfter(t *testing.T) maxT: testData.queryMaxT, finder: finder, stores: &blocksStoreSetMock{}, - consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), nil), + consistency: NewBlocksConsistency(0, 0, nil), logger: log.NewNopLogger(), metrics: newBlocksStoreQueryableMetrics(nil), limits: &blocksStoreLimitsMock{}, @@ -2683,7 +2683,7 @@ func TestBlocksStoreQuerier_MaxLabelsQueryRange(t *testing.T) { maxT: testData.queryMaxT, finder: finder, stores: &blocksStoreSetMock{}, - consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), nil), + consistency: NewBlocksConsistency(0, 0, nil), logger: log.NewNopLogger(), metrics: newBlocksStoreQueryableMetrics(nil), limits: &blocksStoreLimitsMock{ @@ -2824,7 +2824,7 @@ func TestBlocksStoreQuerier_PromQLExecution(t *testing.T) { // Instantiate the querier that will be executed to run the query. logger := log.NewNopLogger() - queryable, err := NewBlocksStoreQueryable(stores, finder, NewBlocksConsistency(0, 0, logger, nil), &blocksStoreLimitsMock{}, 0, 0, logger, nil) + queryable, err := NewBlocksStoreQueryable(stores, finder, NewBlocksConsistency(0, 0, nil), &blocksStoreLimitsMock{}, 0, 0, logger, nil) require.NoError(t, err) require.NoError(t, services.StartAndAwaitRunning(context.Background(), queryable)) defer services.StopAndAwaitTerminated(context.Background(), queryable) // nolint:errcheck