query-tee: add support for comparing native histograms in results (#8724

) * Make float comparison error message unambiguous * Add support for comparing native histograms in instant vector results * Add support for comparing native histograms in range vector results * Improve sample comparison error message * Remove use of `errors.Wrap` in favour of `fmt.Errorf` * Remove unnecessary conversions in test * Add tests for `compareSampleHistogramPair` * Add changelog entry * Log information about failing series when range vector result comparison fails * Address PR feedback: rename function parameters
grafana · Jul 16, 2024 · b8fe2a8 · b8fe2a8
1 parent c02f761
commit b8fe2a8
Show file tree

Hide file tree

Showing 3 changed files with 1,132 additions and 54 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -93,6 +93,7 @@
 * [ENHANCEMENT] Log user agent when processing a request. #8419
 * [ENHANCEMENT] Add `time` parameter to proxied instant queries if it is not included in the incoming request. This is optional but enabled by default, and can be disabled with `-proxy.add-missing-time-parameter-to-instant-queries=false`. #8419
 * [ENHANCEMENT] Add support for sending only a proportion of requests to all backends, with the remainder only sent to the preferred backend. The default behaviour is to send all requests to all backends. This can be configured with `-proxy.secondary-backends-request-proportion`. #8532
+* [ENHANCEMENT] Compare native histograms in query results when comparing results between two backends. #8724
 * [BUGFIX] Ensure any errors encountered while forwarding a request to a backend (eg. DNS resolution failures) are logged. #8419
 
 ### Documentation

diff --git a/tools/querytee/response_comparator.go b/tools/querytee/response_comparator.go
@@ -9,10 +9,11 @@ import (
 	"encoding/json"
 	"fmt"
 	"math"
+	"slices"
 	"time"
 
+	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
-	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 
 	util_log "github.com/grafana/mimir/pkg/util/log"
@@ -63,12 +64,12 @@ func (s *SamplesComparator) Compare(expectedResponse, actualResponse []byte) (Co
 
 	err := json.Unmarshal(expectedResponse, &expected)
 	if err != nil {
-		return ComparisonFailed, errors.Wrap(err, "unable to unmarshal expected response")
+		return ComparisonFailed, fmt.Errorf("unable to unmarshal expected response: %w", err)
 	}
 
 	err = json.Unmarshal(actualResponse, &actual)
 	if err != nil {
-		return ComparisonFailed, errors.Wrap(err, "unable to unmarshal actual response")
+		return ComparisonFailed, fmt.Errorf("unable to unmarshal actual response: %w", err)
 	}
 
 	if expected.Status != actual.Status {
@@ -132,25 +133,63 @@ func compareMatrix(expectedRaw, actualRaw json.RawMessage, opts SampleComparison
 		}
 
 		actualMetric := actual[actualMetricIndex]
-		expectedMetricLen := len(expectedMetric.Values)
-		actualMetricLen := len(actualMetric.Values)
-
-		if expectedMetricLen != actualMetricLen {
-			err := fmt.Errorf("expected %d samples for metric %s but got %d", expectedMetricLen,
-				expectedMetric.Metric, actualMetricLen)
-			if expectedMetricLen > 0 && actualMetricLen > 0 {
-				level.Error(util_log.Logger).Log("msg", err.Error(), "oldest-expected-ts", expectedMetric.Values[0].Timestamp,
-					"newest-expected-ts", expectedMetric.Values[expectedMetricLen-1].Timestamp,
-					"oldest-actual-ts", actualMetric.Values[0].Timestamp, "newest-actual-ts", actualMetric.Values[actualMetricLen-1].Timestamp)
+		expectedFloatSamplesCount := len(expectedMetric.Values)
+		actualFloatSamplesCount := len(actualMetric.Values)
+		expectedHistogramSamplesCount := len(expectedMetric.Histograms)
+		actualHistogramSamplesCount := len(actualMetric.Histograms)
+
+		if expectedFloatSamplesCount != actualFloatSamplesCount || expectedHistogramSamplesCount != actualHistogramSamplesCount {
+			err := fmt.Errorf(
+				"expected %d float sample(s) and %d histogram sample(s) for metric %s but got %d float sample(s) and %d histogram sample(s)",
+				expectedFloatSamplesCount,
+				expectedHistogramSamplesCount,
+				expectedMetric.Metric,
+				actualFloatSamplesCount,
+				actualHistogramSamplesCount,
+			)
+
+			shouldLog := false
+			logger := util_log.Logger
+
+			if expectedFloatSamplesCount > 0 && actualFloatSamplesCount > 0 {
+				logger = log.With(logger,
+					"oldest-expected-float-ts", expectedMetric.Values[0].Timestamp,
+					"newest-expected-float-ts", expectedMetric.Values[expectedFloatSamplesCount-1].Timestamp,
+					"oldest-actual-float-ts", actualMetric.Values[0].Timestamp,
+					"newest-actual-float-ts", actualMetric.Values[actualFloatSamplesCount-1].Timestamp,
+				)
+				shouldLog = true
 			}
+
+			if expectedHistogramSamplesCount > 0 && actualHistogramSamplesCount > 0 {
+				logger = log.With(logger,
+					"oldest-expected-histogram-ts", expectedMetric.Histograms[0].Timestamp,
+					"newest-expected-histogram-ts", expectedMetric.Histograms[expectedHistogramSamplesCount-1].Timestamp,
+					"oldest-actual-histogram-ts", actualMetric.Histograms[0].Timestamp,
+					"newest-actual-histogram-ts", actualMetric.Histograms[actualHistogramSamplesCount-1].Timestamp,
+				)
+				shouldLog = true
+			}
+
+			if shouldLog {
+				level.Error(logger).Log("msg", err.Error())
+			}
+
 			return err
 		}
 
 		for i, expectedSamplePair := range expectedMetric.Values {
 			actualSamplePair := actualMetric.Values[i]
 			err := compareSamplePair(expectedSamplePair, actualSamplePair, opts)
 			if err != nil {
-				return errors.Wrapf(err, "sample pair not matching for metric %s", expectedMetric.Metric)
+				return fmt.Errorf("float sample pair does not match for metric %s: %w", expectedMetric.Metric, err)
+			}
+		}
+
+		for i, expectedHistogramPair := range expectedMetric.Histograms {
+			actualHistogramPair := actualMetric.Histograms[i]
+			if err := compareSampleHistogramPair(expectedHistogramPair, actualHistogramPair, opts); err != nil {
+				return fmt.Errorf("histogram sample pair does not match for metric %s: %w", expectedMetric.Metric, err)
 			}
 		}
 	}
@@ -188,15 +227,33 @@ func compareVector(expectedRaw, actualRaw json.RawMessage, opts SampleComparison
 		}
 
 		actualMetric := actual[actualMetricIndex]
-		err := compareSamplePair(model.SamplePair{
-			Timestamp: expectedMetric.Timestamp,
-			Value:     expectedMetric.Value,
-		}, model.SamplePair{
-			Timestamp: actualMetric.Timestamp,
-			Value:     actualMetric.Value,
-		}, opts)
-		if err != nil {
-			return errors.Wrapf(err, "sample pair not matching for metric %s", expectedMetric.Metric)
+
+		if expectedMetric.Histogram == nil && actualMetric.Histogram == nil {
+			err := compareSamplePair(model.SamplePair{
+				Timestamp: expectedMetric.Timestamp,
+				Value:     expectedMetric.Value,
+			}, model.SamplePair{
+				Timestamp: actualMetric.Timestamp,
+				Value:     actualMetric.Value,
+			}, opts)
+			if err != nil {
+				return fmt.Errorf("float sample pair does not match for metric %s: %w", expectedMetric.Metric, err)
+			}
+		} else if expectedMetric.Histogram != nil && actualMetric.Histogram == nil {
+			return fmt.Errorf("sample pair does not match for metric %s: expected histogram but got float value", expectedMetric.Metric)
+		} else if expectedMetric.Histogram == nil && actualMetric.Histogram != nil {
+			return fmt.Errorf("sample pair does not match for metric %s: expected float value but got histogram", expectedMetric.Metric)
+		} else { // Expected value is a histogram and the actual value is a histogram.
+			err := compareSampleHistogramPair(model.SampleHistogramPair{
+				Timestamp: expectedMetric.Timestamp,
+				Histogram: expectedMetric.Histogram,
+			}, model.SampleHistogramPair{
+				Timestamp: actualMetric.Timestamp,
+				Histogram: actualMetric.Histogram,
+			}, opts)
+			if err != nil {
+				return fmt.Errorf("histogram sample pair does not match for metric %s: %w", expectedMetric.Metric, err)
+			}
 		}
 	}
 
@@ -231,26 +288,65 @@ func compareSamplePair(expected, actual model.SamplePair, opts SampleComparisonO
 	if opts.SkipRecentSamples > 0 && time.Since(expected.Timestamp.Time()) < opts.SkipRecentSamples {
 		return nil
 	}
-	if !compareSampleValue(expected.Value, actual.Value, opts) {
+	if !compareSampleValue(float64(expected.Value), float64(actual.Value), opts) {
 		return fmt.Errorf("expected value %s for timestamp %v but got %s", expected.Value, expected.Timestamp, actual.Value)
 	}
 
 	return nil
 }
 
-func compareSampleValue(first, second model.SampleValue, opts SampleComparisonOptions) bool {
-	f := float64(first)
-	s := float64(second)
-
-	if (math.IsNaN(f) && math.IsNaN(s)) ||
-		(math.IsInf(f, 1) && math.IsInf(s, 1)) ||
-		(math.IsInf(f, -1) && math.IsInf(s, -1)) {
+func compareSampleValue(first, second float64, opts SampleComparisonOptions) bool {
+	if (math.IsNaN(first) && math.IsNaN(second)) ||
+		(math.IsInf(first, 1) && math.IsInf(second, 1)) ||
+		(math.IsInf(first, -1) && math.IsInf(second, -1)) {
 		return true
 	} else if opts.Tolerance <= 0 {
-		return math.Float64bits(f) == math.Float64bits(s)
+		return math.Float64bits(first) == math.Float64bits(second)
+	}
+	if opts.UseRelativeError && second != 0 {
+		return math.Abs(first-second)/math.Abs(second) <= opts.Tolerance
+	}
+	return math.Abs(first-second) <= opts.Tolerance
+}
+
+func compareSampleHistogramPair(expected, actual model.SampleHistogramPair, opts SampleComparisonOptions) error {
+	if expected.Timestamp != actual.Timestamp {
+		return fmt.Errorf("expected timestamp %v but got %v", expected.Timestamp, actual.Timestamp)
 	}
-	if opts.UseRelativeError && s != 0 {
-		return math.Abs(f-s)/math.Abs(s) <= opts.Tolerance
+
+	if opts.SkipRecentSamples > 0 && time.Since(expected.Timestamp.Time()) < opts.SkipRecentSamples {
+		return nil
+	}
+
+	if !compareSampleValue(float64(expected.Histogram.Sum), float64(actual.Histogram.Sum), opts) {
+		return fmt.Errorf("expected sum %s for timestamp %v but got %s", expected.Histogram.Sum, expected.Timestamp, actual.Histogram.Sum)
+	}
+
+	if !compareSampleValue(float64(expected.Histogram.Count), float64(actual.Histogram.Count), opts) {
+		return fmt.Errorf("expected count %s for timestamp %v but got %s", expected.Histogram.Count, expected.Timestamp, actual.Histogram.Count)
+	}
+
+	if !slices.EqualFunc(expected.Histogram.Buckets, actual.Histogram.Buckets, compareSampleHistogramBuckets(opts)) {
+		return fmt.Errorf("expected buckets %s for timestamp %v but got %s", expected.Histogram.Buckets, expected.Timestamp, actual.Histogram.Buckets)
+	}
+
+	return nil
+}
+
+func compareSampleHistogramBuckets(opts SampleComparisonOptions) func(expected, actual *model.HistogramBucket) bool {
+	return func(first, second *model.HistogramBucket) bool {
+		if !compareSampleValue(float64(first.Lower), float64(second.Lower), opts) {
+			return false
+		}
+
+		if !compareSampleValue(float64(first.Upper), float64(second.Upper), opts) {
+			return false
+		}
+
+		if !compareSampleValue(float64(first.Count), float64(second.Count), opts) {
+			return false
+		}
+
+		return first.Boundaries == second.Boundaries
 	}
-	return math.Abs(f-s) <= opts.Tolerance
 }