From bba01ede0d62b80047df700c5c61f26ddbad45f3 Mon Sep 17 00:00:00 2001 From: Kemal Akkoyun Date: Mon, 26 Aug 2019 14:29:28 +0200 Subject: [PATCH 1/5] Remove unncessary Query API metrics Signed-off-by: Kemal Akkoyun --- pkg/query/api/v1.go | 27 --------------------------- pkg/query/api/v1_test.go | 4 ---- 2 files changed, 31 deletions(-) diff --git a/pkg/query/api/v1.go b/pkg/query/api/v1.go index ad897a7333..153bf7b49b 100644 --- a/pkg/query/api/v1.go +++ b/pkg/query/api/v1.go @@ -100,8 +100,6 @@ type API struct { queryableCreate query.QueryableCreator queryEngine *promql.Engine - instantQueryDuration prometheus.Histogram - rangeQueryDuration prometheus.Histogram enableAutodownsampling bool enablePartialResponse bool now func() time.Time @@ -116,31 +114,10 @@ func NewAPI( enableAutodownsampling bool, enablePartialResponse bool, ) *API { - instantQueryDuration := prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "thanos_query_api_instant_query_duration_seconds", - Help: "Time it takes to perform instant query on promEngine backed up with thanos querier.", - Buckets: []float64{ - 0.05, 0.1, 0.25, 0.6, 1, 2, 3.5, 5, 7.5, 10, 15, 20, - }, - }) - rangeQueryDuration := prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "thanos_query_api_range_query_duration_seconds", - Help: "Time it takes to perform range query on promEngine backed up with thanos querier.", - Buckets: []float64{ - 0.05, 0.1, 0.25, 0.6, 1, 2, 3.5, 5, 7.5, 10, 15, 20, - }, - }) - - reg.MustRegister( - instantQueryDuration, - rangeQueryDuration, - ) return &API{ logger: logger, queryEngine: qe, queryableCreate: c, - instantQueryDuration: instantQueryDuration, - rangeQueryDuration: rangeQueryDuration, enableAutodownsampling: enableAutodownsampling, enablePartialResponse: enablePartialResponse, @@ -281,7 +258,6 @@ func (api *API) query(r *http.Request) (interface{}, []error, *ApiError) { span, ctx := tracing.StartSpan(ctx, "promql_instant_query") defer span.Finish() - begin := api.now() qry, err := api.queryEngine.NewInstantQuery(api.queryableCreate(enableDedup, 0, enablePartialResponse), r.FormValue("query"), ts) if err != nil { return nil, nil, &ApiError{errorBadData, err} @@ -299,7 +275,6 @@ func (api *API) query(r *http.Request) (interface{}, []error, *ApiError) { } return nil, nil, &ApiError{errorExec, res.Err} } - api.instantQueryDuration.Observe(time.Since(begin).Seconds()) return &queryData{ ResultType: res.Value.Type(), @@ -369,7 +344,6 @@ func (api *API) queryRange(r *http.Request) (interface{}, []error, *ApiError) { span, ctx := tracing.StartSpan(ctx, "promql_range_query") defer span.Finish() - begin := api.now() qry, err := api.queryEngine.NewRangeQuery( api.queryableCreate(enableDedup, maxSourceResolution, enablePartialResponse), r.FormValue("query"), @@ -391,7 +365,6 @@ func (api *API) queryRange(r *http.Request) (interface{}, []error, *ApiError) { } return nil, nil, &ApiError{errorExec, res.Err} } - api.rangeQueryDuration.Observe(time.Since(begin).Seconds()) return &queryData{ ResultType: res.Value.Type(), diff --git a/pkg/query/api/v1_test.go b/pkg/query/api/v1_test.go index f017a6ab0b..e9433bc15c 100644 --- a/pkg/query/api/v1_test.go +++ b/pkg/query/api/v1_test.go @@ -31,7 +31,6 @@ import ( "github.com/go-kit/kit/log" opentracing "github.com/opentracing/opentracing-go" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/route" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/timestamp" @@ -71,9 +70,6 @@ func TestEndpoints(t *testing.T) { queryableCreate: testQueryableCreator(suite.Storage()), queryEngine: suite.QueryEngine(), - instantQueryDuration: prometheus.NewHistogram(prometheus.HistogramOpts{}), - rangeQueryDuration: prometheus.NewHistogram(prometheus.HistogramOpts{}), - now: func() time.Time { return now }, } From 525d77d181ae97280a14b7fe5f0c666e84b98ef4 Mon Sep 17 00:00:00 2001 From: Kemal Akkoyun Date: Mon, 26 Aug 2019 14:31:42 +0200 Subject: [PATCH 2/5] Refactor receive to use common instrumentation meiddleware Signed-off-by: Kemal Akkoyun --- pkg/extprom/http/instrument_server.go | 2 +- pkg/receive/handler.go | 61 +++++---------------------- 2 files changed, 12 insertions(+), 51 deletions(-) diff --git a/pkg/extprom/http/instrument_server.go b/pkg/extprom/http/instrument_server.go index 87ffbb8e19..5a49383b0a 100644 --- a/pkg/extprom/http/instrument_server.go +++ b/pkg/extprom/http/instrument_server.go @@ -35,7 +35,7 @@ type defaultInstrumentationMiddleware struct { } // NewInstrumentationMiddleware provides default InstrumentationMiddleware. -func NewInstrumentationMiddleware(reg *prometheus.Registry) InstrumentationMiddleware { +func NewInstrumentationMiddleware(reg prometheus.Registerer) InstrumentationMiddleware { ins := defaultInstrumentationMiddleware{ requestDuration: prometheus.NewHistogramVec( prometheus.HistogramOpts{ diff --git a/pkg/receive/handler.go b/pkg/receive/handler.go index 82385f8855..398ee4e023 100644 --- a/pkg/receive/handler.go +++ b/pkg/receive/handler.go @@ -21,10 +21,10 @@ import ( opentracing "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/route" promtsdb "github.com/prometheus/prometheus/storage/tsdb" terrors "github.com/prometheus/prometheus/tsdb/errors" + extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http" "github.com/thanos-io/thanos/pkg/runutil" "github.com/thanos-io/thanos/pkg/store/prompb" ) @@ -54,9 +54,6 @@ type Handler struct { hashring Hashring // Metrics - requestDuration *prometheus.HistogramVec - requestsTotal *prometheus.CounterVec - responseSize *prometheus.HistogramVec forwardRequestsTotal *prometheus.CounterVec // These fields are uint32 rather than boolean to be able to use atomic functions. @@ -72,29 +69,8 @@ func NewHandler(logger log.Logger, o *Options) *Handler { logger: logger, readyStorage: o.ReadyStorage, receiver: o.Receiver, + router: route.New(), options: o, - requestDuration: prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Name: "thanos_http_request_duration_seconds", - Help: "Histogram of latencies for HTTP requests.", - Buckets: []float64{.1, .2, .4, 1, 3, 8, 20, 60, 120}, - }, - []string{"handler"}, - ), - requestsTotal: prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "thanos_http_requests_total", - Help: "Tracks the number of HTTP requests.", - }, []string{"code", "handler", "method"}, - ), - responseSize: prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Name: "thanos_http_response_size_bytes", - Help: "Histogram of response size for HTTP requests.", - Buckets: prometheus.ExponentialBuckets(100, 10, 8), - }, - []string{"handler"}, - ), forwardRequestsTotal: prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "thanos_receive_forward_requests_total", @@ -103,35 +79,20 @@ func NewHandler(logger log.Logger, o *Options) *Handler { ), } - router := route.New().WithInstrumentation(h.instrumentHandler) - h.router = router + ins := extpromhttp.NewNopInstrumentationMiddleware() + if o.Registry != nil { + ins = extpromhttp.NewInstrumentationMiddleware(o.Registry) + o.Registry.MustRegister(h.forwardRequestsTotal) + } readyf := h.testReady - router.Post("/api/v1/receive", readyf(h.receive)) - - if o.Registry != nil { - o.Registry.MustRegister( - h.requestDuration, - h.requestsTotal, - h.responseSize, - h.forwardRequestsTotal, - ) + instrf := func(name string, next func(w http.ResponseWriter, r *http.Request)) http.HandlerFunc { + return ins.NewHandler(name, http.HandlerFunc(next)) } - return h -} + h.router.Post("/api/v1/receive", instrf("receive", readyf(h.receive))) -func (h *Handler) instrumentHandler(handlerName string, handler http.HandlerFunc) http.HandlerFunc { - return promhttp.InstrumentHandlerDuration( - h.requestDuration.MustCurryWith(prometheus.Labels{"handler": handlerName}), - promhttp.InstrumentHandlerResponseSize( - h.responseSize.MustCurryWith(prometheus.Labels{"handler": handlerName}), - promhttp.InstrumentHandlerCounter( - h.requestsTotal.MustCurryWith(prometheus.Labels{"handler": handlerName}), - handler, - ), - ), - ) + return h } // StorageReady marks the storage as ready. From 2e4587d37061ae4fe3a782e3bd7712166116f064 Mon Sep 17 00:00:00 2001 From: Kemal Akkoyun Date: Mon, 26 Aug 2019 14:32:06 +0200 Subject: [PATCH 3/5] Add changes to changelog Signed-off-by: Kemal Akkoyun --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0959ed4a25..aa945d2ec9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel ### Added +- [#1378](https://github.com/thanos-io/thanos/pull/1378) Thanos Receive now exposes `thanos_receive_config_hash`, `thanos_receive_config_last_reload_successful` and `thanos_receive_config_last_reload_success_timestamp_seconds` metrics to track latest configuration change - [#1358](https://github.com/thanos-io/thanos/pull/1358) Added `part_size` configuration option for HTTP multipart requests minimum part size for S3 storage type - [#1363](https://github.com/thanos-io/thanos/pull/1363) Thanos Receive now exposes `thanos_receive_hashring_nodes` and `thanos_receive_hashring_tenants` metrics to monitor status of hash-rings @@ -32,6 +33,10 @@ We use *breaking* word for marking changes that are not backward compatible (rel - [#1327](https://github.com/thanos-io/thanos/pull/1327) `/series` API end-point now properly returns an empty array just like Prometheus if there are no results - [#1302](https://github.com/thanos-io/thanos/pull/1302) Thanos now efficiently reuses HTTP keep-alive connections +### Deprecated + +- [#xxx](https://github.com/thanos-io/thanos/pull/xxx) Thanos Query and Receive now uses common instrumentation middleware. As as result, for sake of `http_requests_total` and `http_request_duration_seconds_bucket`; Thanos Query no longer exposes `thanos_query_api_instant_query_duration_seconds`, `thanos_query_api_range_query_duration_second` metrics and Thanos Receive no longer exposes `thanos_http_request_duration_seconds`, `thanos_http_requests_total`, `thanos_http_response_size_bytes`. + ## [v0.6.0](https://github.com/thanos-io/thanos/releases/tag/v0.6.0) - 2019.07.18 ### Added From 850c71693a36961bc43c0a5871250abc3145b845 Mon Sep 17 00:00:00 2001 From: Kemal Akkoyun Date: Mon, 26 Aug 2019 14:45:21 +0200 Subject: [PATCH 4/5] Update changelog Signed-off-by: Kemal Akkoyun --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa945d2ec9..6255347c92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel ### Deprecated -- [#xxx](https://github.com/thanos-io/thanos/pull/xxx) Thanos Query and Receive now uses common instrumentation middleware. As as result, for sake of `http_requests_total` and `http_request_duration_seconds_bucket`; Thanos Query no longer exposes `thanos_query_api_instant_query_duration_seconds`, `thanos_query_api_range_query_duration_second` metrics and Thanos Receive no longer exposes `thanos_http_request_duration_seconds`, `thanos_http_requests_total`, `thanos_http_response_size_bytes`. +- [#1458](https://github.com/thanos-io/thanos/pull/1458) Thanos Query and Receive now use common instrumentation middleware. As as result, for sake of `http_requests_total` and `http_request_duration_seconds_bucket`; Thanos Query no longer exposes `thanos_query_api_instant_query_duration_seconds`, `thanos_query_api_range_query_duration_second` metrics and Thanos Receive no longer exposes `thanos_http_request_duration_seconds`, `thanos_http_requests_total`, `thanos_http_response_size_bytes`. ## [v0.6.0](https://github.com/thanos-io/thanos/releases/tag/v0.6.0) - 2019.07.18 From 72226599434f07e3b990ea7e63f239dedd8549e4 Mon Sep 17 00:00:00 2001 From: Kemal Akkoyun Date: Mon, 26 Aug 2019 15:48:08 +0200 Subject: [PATCH 5/5] Update query metrics used in dashboards Signed-off-by: Kemal Akkoyun --- examples/grafana/thanos-query.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/grafana/thanos-query.json b/examples/grafana/thanos-query.json index 85601c8243..424437e554 100644 --- a/examples/grafana/thanos-query.json +++ b/examples/grafana/thanos-query.json @@ -306,14 +306,14 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.9999, sum(rate(thanos_query_api_instant_query_duration_seconds_bucket{$labelselector=\"$labelvalue\",kubernetes_pod_name=~\"$pod\"}[$interval])) by (kubernetes_pod_name, le))", + "expr": "histogram_quantile(0.9999, sum(rate(http_request_duration_seconds_bucket{$labelselector=\"$labelvalue\",kubernetes_pod_name=~\"$pod\",handler=\"query\"}[$interval])) by (kubernetes_pod_name, le))", "format": "time_series", "intervalFactor": 1, "legendFormat": "instant_query {{kubernetes_pod_name}}", "refId": "A" }, { - "expr": "histogram_quantile(0.9999, sum(rate(thanos_query_api_range_query_duration_seconds_bucket{$labelselector=\"$labelvalue\",kubernetes_pod_name=~\"$pod\"}[$interval])) by (kubernetes_pod_name, le))", + "expr": "histogram_quantile(0.9999, sum(rate(http_request_duration_seconds_bucket{$labelselector=\"$labelvalue\",kubernetes_pod_name=~\"$pod\",handler=\"query_range\"}[$interval])) by (kubernetes_pod_name, le))", "format": "time_series", "intervalFactor": 1, "legendFormat": "range_query {{kubernetes_pod_name}}", @@ -1035,4 +1035,4 @@ "title": "Thanos Query", "uid": "opwl5gSiz", "version": 2 -} \ No newline at end of file +}