Skip to content

Commit

Permalink
Switch Prometheus API responsiveness to use 1m windows.
Browse files Browse the repository at this point in the history
Default so far was 5m as in SLI definition. We're changing this to 1m, so we
have we calculate the the SLI over more data points. In the past, we were
suffering from having not enough of them, which was causing 99th percentaile to
be actually max over the series.

Minor: remove recording rules with 30s window.
  • Loading branch information
oxddr committed Oct 7, 2019
1 parent 9570718 commit c9438e2
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ const (
// latencyQuery matches description of the API call latency SLI and measure 99th percentaile over 5m windows
//
// latencyQuery: %v should be replaced with (1) filters and (2) query window size..
latencyQuery = "quantile_over_time(0.99, apiserver:apiserver_request_latency:histogram_quantile{ %v}[%v])"
latencyQuery = "quantile_over_time(0.99, apiserver:apiserver_request_latency_1m:histogram_quantile{%v}[%v])"

// simpleLatencyQuery measures 99th percentile of API call latency over given period of time
// it doesn't match SLI, but is useful in shorter tests, where we don't have enough number of windows to use latencyQuery meaningfully.
Expand Down
17 changes: 0 additions & 17 deletions clusterloader2/pkg/prometheus/manifests/prometheus-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,23 +74,6 @@ spec:
record: kubeproxy:kubeproxy_network_programming_duration:histogram_quantile
labels:
quantile: "0.50"
- name: apiserver.30s.rules
rules:
- expr: |
histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket[30s])) by (resource, subresource, verb, scope, le))
record: apiserver:apiserver_request_latency_30s:histogram_quantile
labels:
quantile: "0.99"
- expr: |
histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket[30s])) by (resource, subresource, verb, scope, le))
record: apiserver:apiserver_request_latency_30s:histogram_quantile
labels:
quantile: "0.90"
- expr: |
histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket[30s])) by (resource, subresource, verb, scope, le))
record: apiserver:apiserver_request_latency_30s:histogram_quantile
labels:
quantile: "0.50"
- name: apiserver.1m.rules
rules:
- expr: |
Expand Down

0 comments on commit c9438e2

Please sign in to comment.