diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b9001860c..bf8c6dbbfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ We use *breaking* word for marking changes that are not backward compatible (rel - [#1358](https://github.com/thanos-io/thanos/pull/1358) Added `part_size` configuration option for HTTP multipart requests minimum part size for S3 storage type +- [#1363](https://github.com/thanos-io/thanos/pull/1363) Thanos Receive now exposes `thanos_receive_hashring_nodes` and `thanos_receive_hashring_tenants` metrics to monitor status of hash-rings + ### Changed - [#1338](https://github.com/thanos-io/thanos/pull/1338) Querier still warns on store API duplicate, but allows a single one from duplicated set. This is gracefully warn about the problematic logic and not disrupt immediately. @@ -66,7 +68,7 @@ The other `type` you can use is `JAEGER` now. The `config` keys and values are J ### Changed -- [#1284](https://github.com/thanos-io/thanos/pull/1284) Add support for multiple label-sets in Info gRPC service. +- [#1284](https://github.com/thanos-io/thanos/pull/1284) Add support for multiple label-sets in Info gRPC service. This deprecates the single `Labels` slice of the `InfoResponse`, in a future release backward compatible handling for the single set of Labels will be removed. Upgrading to v0.6.0 or higher is advised. *breaking* If you run have duplicate queries in your Querier configuration with hierarchical federation of multiple Queries this PR makes Thanos Querier to detect this case and block all duplicates. Refer to 0.6.1 which at least allows for single replica to work. diff --git a/pkg/receive/config.go b/pkg/receive/config.go index 7fcd2c7c5b..61f9f0d05b 100644 --- a/pkg/receive/config.go +++ b/pkg/receive/config.go @@ -33,9 +33,11 @@ type ConfigWatcher struct { logger log.Logger watcher *fsnotify.Watcher - changesCounter prometheus.Counter - errorCounter prometheus.Counter - refreshCounter prometheus.Counter + changesCounter prometheus.Counter + errorCounter prometheus.Counter + refreshCounter prometheus.Counter + hashringNodesGauge *prometheus.GaugeVec + hashringTenantsGauge *prometheus.GaugeVec // last is the last known configuration. last []HashringConfig @@ -75,6 +77,18 @@ func NewConfigWatcher(logger log.Logger, r prometheus.Registerer, path string, i Name: "thanos_receive_hashrings_file_refreshes_total", Help: "The number of refreshes of the hashrings configuration file.", }), + hashringNodesGauge: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "thanos_receive_hashring_nodes", + Help: "The number of nodes per hashring.", + }, + []string{"name"}), + hashringTenantsGauge: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "thanos_receive_hashring_tenants", + Help: "The number of tenants per hashring.", + }, + []string{"name"}), } if r != nil { @@ -82,6 +96,8 @@ func NewConfigWatcher(logger log.Logger, r prometheus.Registerer, path string, i c.changesCounter, c.errorCounter, c.refreshCounter, + c.hashringNodesGauge, + c.hashringTenantsGauge, ) } @@ -172,6 +188,11 @@ func (cw *ConfigWatcher) refresh(ctx context.Context) { // Save the last known configuration. cw.last = config + for _, c := range config { + cw.hashringNodesGauge.WithLabelValues(c.Hashring).Set(float64(len(c.Endpoints))) + cw.hashringTenantsGauge.WithLabelValues(c.Hashring).Set(float64(len(c.Tenants))) + } + select { case <-ctx.Done(): return