Skip to content

Commit

Permalink
Fix race condition in querytee test (#8013)
Browse files Browse the repository at this point in the history
* Fix race condition in querytee test

This fixes race conditions such as the one seen in
https://github.com/grafana/mimir/actions/runs/8892796420/job/24417658697

It was caused by a latencyPair request consuming a response index before
a prior request had a chance to consume it.

* Update changelog
  • Loading branch information
jhesketh authored Apr 30, 2024
1 parent c55ed05 commit 2460a3f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 11 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
### Query-tee

* [ENHANCEMENT] Log queries that take longer than `proxy.log-slow-query-response-threshold` when compared to other backends. #7346
* [ENHANCEMENT] Add two new metrics for measuring the relative duration between backends: #7782
* [ENHANCEMENT] Add two new metrics for measuring the relative duration between backends: #7782 #8013
* `cortex_querytee_backend_response_relative_duration_seconds`
* `cortex_querytee_backend_response_relative_duration_proportional`

Expand Down
17 changes: 7 additions & 10 deletions tools/querytee/proxy_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,6 @@ func Test_ProxyEndpoint_LogSlowQueries(t *testing.T) {
func Test_ProxyEndpoint_RelativeDurationMetric(t *testing.T) {
scenarios := map[string]struct {
latencyPairs []latencyPair
expectedSampleCount uint64
expectedDurationSampleSum float64
expectedProportionalSampleSum float64
}{
Expand All @@ -463,7 +462,6 @@ func Test_ProxyEndpoint_RelativeDurationMetric(t *testing.T) {
secondaryResponseLatency: 2 * time.Second,
},
},
expectedSampleCount: 2,
expectedDurationSampleSum: 5,
expectedProportionalSampleSum: (3.0/1 + 5.0/2),
},
Expand All @@ -473,7 +471,6 @@ func Test_ProxyEndpoint_RelativeDurationMetric(t *testing.T) {
secondaryResponseLatency: 7 * time.Second,
},
},
expectedSampleCount: 1,
expectedDurationSampleSum: -5,
expectedProportionalSampleSum: (2.0 / 7),
},
Expand All @@ -498,27 +495,27 @@ func Test_ProxyEndpoint_RelativeDurationMetric(t *testing.T) {
resp := httptest.NewRecorder()
req, err := http.NewRequest("GET", "http://test/api/v1/test", nil)
require.NoError(t, err)
for range scenario.latencyPairs {
for i := range scenario.latencyPairs {
// This is just in serial to keep the tests simple
endpoint.ServeHTTP(resp, req)
// The HTTP request above will return as soon as the primary response is received, but this doesn't guarantee that the response comparison has been completed.
// Wait for the response comparison to complete the number of requests we have.
// We do this for each latencyPair to avoid a race where the second request would consume a result expected for the first request.
waitForResponseComparisonMetric(t, reg, ComparisonSuccess, uint64(i+1))
}

// The HTTP request above will return as soon as the primary response is received, but this doesn't guarantee that the response comparison has been completed.
// Wait for the response comparison to complete the number of requests we have
waitForResponseComparisonMetric(t, reg, ComparisonSuccess, scenario.expectedSampleCount)

got, done, err := prometheus.ToTransactionalGatherer(reg).Gather()
defer done()
require.NoError(t, err, "Failed to gather metrics from registry")

gotDuration := filterMetrics(got, []string{"cortex_querytee_backend_response_relative_duration_seconds"})
require.Equal(t, 1, len(gotDuration), "Expect only one metric after filtering")
require.Equal(t, scenario.expectedSampleCount, gotDuration[0].Metric[0].Histogram.GetSampleCount())
require.Equal(t, uint64(len(scenario.latencyPairs)), gotDuration[0].Metric[0].Histogram.GetSampleCount())
require.Equal(t, scenario.expectedDurationSampleSum, gotDuration[0].Metric[0].Histogram.GetSampleSum())

gotProportional := filterMetrics(got, []string{"cortex_querytee_backend_response_relative_duration_proportional"})
require.Equal(t, 1, len(gotProportional), "Expect only one metric after filtering")
require.Equal(t, scenario.expectedSampleCount, gotProportional[0].Metric[0].Histogram.GetSampleCount())
require.Equal(t, uint64(len(scenario.latencyPairs)), gotProportional[0].Metric[0].Histogram.GetSampleCount())
require.Equal(t, scenario.expectedProportionalSampleSum, gotProportional[0].Metric[0].Histogram.GetSampleSum())
})
}
Expand Down

0 comments on commit 2460a3f

Please sign in to comment.