Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OTLP: Add metrics to track otlp request samples per batch #8265

Merged
merged 5 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* [FEATURE] Alertmanager: Added `-alertmanager.max-silences-count` and `-alertmanager.max-silence-size-bytes` to set limits on per tenant silences. Disabled by default. #6898
* [FEATURE] Ingester: add experimental support for the server-side circuit breakers when writing to ingesters. This can be enabled using `-ingester.circuit-breaker.enabled` option. Further `-ingester.circuit-breaker.*` options for configuring circuit-breaker are available. Added metrics `cortex_ingester_circuit_breaker_results_total`, `cortex_ingester_circuit_breaker_transitions_total` and `cortex_ingester_circuit_breaker_current_state`. #8180
* [FEATURE] Distributor, ingester: add new setting `-validation.past-grace-period` to limit how old (based on the wall clock minus OOO window) the ingested samples can be. The default 0 value disables this limit. #8262
* [ENHANCEMENT] Distributor: add metrics `cortex_distributor_samples_per_request` and `cortex_distributor_exemplars_per_request` to track samples/exemplars per request. #8265
* [ENHANCEMENT] Reduced memory allocations in functions used to propagate contextual information between gRPC calls. #7529
* [ENHANCEMENT] Distributor: add experimental limit for exemplars per series per request, enabled with `-distributor.max-exemplars-per-series-per-request`, the number of discarded exemplars are tracked with `cortex_discarded_exemplars_total{reason="too_many_exemplars_per_series_per_request"}` #7989 #8010
* [ENHANCEMENT] Store-gateway: merge series from different blocks concurrently. #7456
Expand Down
26 changes: 26 additions & 0 deletions pkg/distributor/distributor.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ type Distributor struct {
dedupedSamples *prometheus.CounterVec
labelsHistogram prometheus.Histogram
sampleDelayHistogram prometheus.Histogram
incomingSamplesPerRequest *prometheus.HistogramVec
incomingExemplarsPerRequest *prometheus.HistogramVec
latestSeenSampleTimestampPerUser *prometheus.GaugeVec
hashCollisionCount prometheus.Counter

Expand Down Expand Up @@ -410,6 +412,20 @@ func New(cfg Config, clientConfig ingester_client.Config, limits *validation.Ove
60 * 60 * 24, // 24h
},
}),
incomingSamplesPerRequest: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
Name: "cortex_distributor_samples_per_request",
Help: "Number of samples per request before deduplication and validation.",
NativeHistogramBucketFactor: 2,
NativeHistogramMinResetDuration: 1 * time.Hour,
NativeHistogramMaxBucketNumber: 100,
}, []string{"user"}),
incomingExemplarsPerRequest: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
Name: "cortex_distributor_exemplars_per_request",
Help: "Number of exemplars per request before deduplication and validation.",
NativeHistogramBucketFactor: 2,
NativeHistogramMinResetDuration: 1 * time.Hour,
NativeHistogramMaxBucketNumber: 100,
}, []string{"user"}),
latestSeenSampleTimestampPerUser: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
Name: "cortex_distributor_latest_seen_sample_timestamp_seconds",
Help: "Unix timestamp of latest received sample per user.",
Expand Down Expand Up @@ -649,6 +665,8 @@ func (d *Distributor) cleanupInactiveUser(userID string) {
d.incomingSamples.DeleteLabelValues(userID)
d.incomingExemplars.DeleteLabelValues(userID)
d.incomingMetadata.DeleteLabelValues(userID)
d.incomingSamplesPerRequest.DeleteLabelValues(userID)
d.incomingExemplarsPerRequest.DeleteLabelValues(userID)
d.nonHASamples.DeleteLabelValues(userID)
d.latestSeenSampleTimestampPerUser.DeleteLabelValues(userID)

Expand Down Expand Up @@ -1031,7 +1049,11 @@ func (d *Distributor) prePushValidationMiddleware(next PushFunc) PushFunc {

var firstPartialErr error
var removeIndexes []int
totalSamples, totalExemplars := 0, 0

for tsIdx, ts := range req.Timeseries {
totalSamples += len(ts.Samples)
totalExemplars += len(ts.Exemplars)
if len(ts.Labels) == 0 {
removeIndexes = append(removeIndexes, tsIdx)
continue
Expand All @@ -1057,6 +1079,10 @@ func (d *Distributor) prePushValidationMiddleware(next PushFunc) PushFunc {
validatedSamples += len(ts.Samples) + len(ts.Histograms)
validatedExemplars += len(ts.Exemplars)
}

d.incomingSamplesPerRequest.WithLabelValues(userID).Observe(float64(totalSamples))
d.incomingExemplarsPerRequest.WithLabelValues(userID).Observe(float64(totalExemplars))

if len(removeIndexes) > 0 {
for _, removeIndex := range removeIndexes {
mimirpb.ReusePreallocTimeseries(&req.Timeseries[removeIndex])
Expand Down
Loading