Skip to content

Commit

Permalink
Upgrade Prometheus and add evaluation_delay in rule group config (#1474)
Browse files Browse the repository at this point in the history
* Upgrade Prometheus and add evaluation_delay in rule group config

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Add CHANGELOG entry

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Fix lint

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Fix comments

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>
  • Loading branch information
codesome authored Mar 15, 2022
1 parent 242e2f7 commit 20af422
Show file tree
Hide file tree
Showing 11 changed files with 76 additions and 113 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Grafana Mimir - main / unreleased

* [CHANGE] Compactor: No longer upload debug meta files to object storage. #1257
* [FEATURE] Ruler: Allow setting `evaluation_delay` for each rule group via rules group configuration file. #1474
* [FEATURE] Distributor: Added the ability to forward specifics metrics to alternative remote_write API endpoints. #1052
* [ENHANCEMENT] Ruler: Add more detailed query information to ruler query stats logging. #1411
* [ENHANCEMENT] Admin: Admin API now has some styling. #1482
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ replace git.apache.org/thrift.git => github.com/apache/thrift v0.0.0-20180902110
replace github.com/bradfitz/gomemcache => github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab

// Using a fork of Prometheus while we work on querysharding to avoid a dependency on the upstream.
replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20220210151959-f8e3195f7500
replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20220314132007-23ce9ad9f0ff

// Pin hashicorp depencencies since the Prometheus fork, go mod tries to update them.
replace github.com/hashicorp/go-immutable-radix => github.com/hashicorp/go-immutable-radix v1.2.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1000,8 +1000,8 @@ github.com/grafana/dskit v0.0.0-20220314143558-7b6c9c059728/go.mod h1:q51XdMLLHN
github.com/grafana/e2e v0.1.0 h1:nThd0U0TjUqyOOupSb+qDd4BOdhqwhR/oYbjoqiMlZk=
github.com/grafana/e2e v0.1.0/go.mod h1:+26VJWpczg2OU3D0537acnHSHzhJORpxOs6F+M27tZo=
github.com/grafana/memberlist v0.2.5-0.20211201083710-c7bc8e9df94b/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/grafana/mimir-prometheus v0.0.0-20220210151959-f8e3195f7500 h1:1l+T/VWSSC3Uz+9Pkgxv7pUngRLpoeRWagX3DPsTn6I=
github.com/grafana/mimir-prometheus v0.0.0-20220210151959-f8e3195f7500/go.mod h1:6K+MGuCdYASOcOEKusiGUeYeRoobrW/26smN9OCXb0M=
github.com/grafana/mimir-prometheus v0.0.0-20220314132007-23ce9ad9f0ff h1:8cfiJnBz2zYCBvMNK1zAmVc7lu27mQBtuHxoCdxpqPw=
github.com/grafana/mimir-prometheus v0.0.0-20220314132007-23ce9ad9f0ff/go.mod h1:6K+MGuCdYASOcOEKusiGUeYeRoobrW/26smN9OCXb0M=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
Expand Down
57 changes: 16 additions & 41 deletions pkg/ruler/compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/prometheus/model/exemplar"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/value"
"github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/rules"
Expand All @@ -39,29 +38,15 @@ type PusherAppender struct {
failedWrites prometheus.Counter
totalWrites prometheus.Counter

ctx context.Context
pusher Pusher
labels []labels.Labels
samples []mimirpb.Sample
userID string
evaluationDelay time.Duration
ctx context.Context
pusher Pusher
labels []labels.Labels
samples []mimirpb.Sample
userID string
}

func (a *PusherAppender) Append(_ storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) {
a.labels = append(a.labels, l)

// Adapt staleness markers for ruler evaluation delay. As the upstream code
// is using the actual time, when there is a no longer available series.
// This then causes 'out of order' append failures once the series is
// becoming available again.
// see https://github.com/prometheus/prometheus/blob/6c56a1faaaad07317ff585bda75b99bdba0517ad/rules/manager.go#L647-L660
// Similar to staleness markers, the rule manager also appends actual time to the ALERTS and ALERTS_FOR_STATE series.
// See: https://github.com/prometheus/prometheus/blob/ae086c73cb4d6db9e8b67d5038d3704fea6aec4a/rules/alerting.go#L414-L417
metricName := l.Get(labels.MetricName)
if a.evaluationDelay > 0 && (value.IsStaleNaN(v) || metricName == "ALERTS" || metricName == "ALERTS_FOR_STATE") {
t -= a.evaluationDelay.Milliseconds()
}

a.samples = append(a.samples, mimirpb.Sample{
TimestampMs: t,
Value: v,
Expand Down Expand Up @@ -100,9 +85,8 @@ func (a *PusherAppender) Rollback() error {

// PusherAppendable fulfills the storage.Appendable interface for prometheus manager
type PusherAppendable struct {
pusher Pusher
userID string
rulesLimits RulesLimits
pusher Pusher
userID string

totalWrites prometheus.Counter
failedWrites prometheus.Counter
Expand All @@ -112,7 +96,6 @@ func NewPusherAppendable(pusher Pusher, userID string, limits RulesLimits, total
return &PusherAppendable{
pusher: pusher,
userID: userID,
rulesLimits: limits,
totalWrites: totalWrites,
failedWrites: failedWrites,
}
Expand All @@ -124,10 +107,9 @@ func (t *PusherAppendable) Appender(ctx context.Context) storage.Appender {
failedWrites: t.failedWrites,
totalWrites: t.totalWrites,

ctx: ctx,
pusher: t.pusher,
userID: t.userID,
evaluationDelay: t.rulesLimits.EvaluationDelay(t.userID),
ctx: ctx,
pusher: t.pusher,
userID: t.userID,
}
}

Expand All @@ -139,18 +121,6 @@ type RulesLimits interface {
RulerMaxRulesPerRuleGroup(userID string) int
}

// EngineQueryFunc returns a new query function using the rules.EngineQueryFunc function
// and passing an altered timestamp.
func EngineQueryFunc(engine *promql.Engine, q storage.Queryable, overrides RulesLimits, userID string) rules.QueryFunc {
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
orig := rules.EngineQueryFunc(engine, q)
// Delay the evaluation of all rules by a set interval to give a buffer
// to metric that haven't been forwarded to cortex yet.
evaluationDelay := overrides.EvaluationDelay(userID)
return orig(ctx, qs, t.Add(-evaluationDelay))
}
}

func MetricsQueryFunc(qf rules.QueryFunc, queries, failedQueries prometheus.Counter) rules.QueryFunc {
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
queries.Inc()
Expand Down Expand Up @@ -282,7 +252,7 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, queryable, federatedQuery
// Errors from PromQL are always "user" errors.
q = querier.NewErrorTranslateQueryableWithFn(q, WrapQueryableErrors)

queryFunc = EngineQueryFunc(engine, q, overrides, userID)
queryFunc = rules.EngineQueryFunc(engine, q)
queryFunc = MetricsQueryFunc(queryFunc, totalQueries, failedQueries)
queryFunc = RecordAndReportRuleQueryMetrics(queryFunc, queryTime, logger)
return queryFunc
Expand All @@ -304,6 +274,11 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, queryable, federatedQuery
OutageTolerance: cfg.OutageTolerance,
ForGracePeriod: cfg.ForGracePeriod,
ResendDelay: cfg.ResendDelay,
DefaultEvaluationDelay: func() time.Duration {
// Delay the evaluation of all rules by a set interval to give a buffer
// to metric that haven't been forwarded to Mimir yet.
return overrides.EvaluationDelay(userID)
},
})
}
}
Expand Down
35 changes: 2 additions & 33 deletions pkg/ruler/compat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ func TestPusherAppendable(t *testing.T) {
for _, tc := range []struct {
name string
series string
evalDelay time.Duration
value float64
expectedTS int64
}{
Expand All @@ -65,50 +64,20 @@ func TestPusherAppendable(t *testing.T) {
expectedTS: 120_000,
},
{
name: "tenant with delay, normal value",
series: "foo_bar",
value: 1.234,
expectedTS: 120_000,
evalDelay: time.Minute,
},
{
name: "tenant with delay, stale nan value",
value: math.Float64frombits(value.StaleNaN),
expectedTS: 60_000,
evalDelay: time.Minute,
},
{
name: "ALERTS without delay, normal value",
name: "ALERTS, normal value",
series: `ALERTS{alertname="boop"}`,
value: 1.234,
expectedTS: 120_000,
},
{
name: "ALERTS without delay, stale nan value",
name: "ALERTS, stale nan value",
series: `ALERTS{alertname="boop"}`,
value: math.Float64frombits(value.StaleNaN),
expectedTS: 120_000,
},
{
name: "ALERTS with delay, normal value",
series: `ALERTS{alertname="boop"}`,
value: 1.234,
expectedTS: 60_000,
evalDelay: time.Minute,
},
{
name: "ALERTS with delay, stale nan value",
series: `ALERTS_FOR_STATE{alertname="boop"}`,
value: math.Float64frombits(value.StaleNaN),
expectedTS: 60_000,
evalDelay: time.Minute,
},
} {
t.Run(tc.name, func(t *testing.T) {
ctx := context.Background()
pa.rulesLimits = &ruleLimits{
evalDelay: tc.evalDelay,
}

lbls, err := parser.ParseMetric(tc.series)
require.NoError(t, err)
Expand Down
11 changes: 6 additions & 5 deletions vendor/github.com/prometheus/prometheus/model/rulefmt/rulefmt.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions vendor/github.com/prometheus/prometheus/rules/alerting.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 40 additions & 22 deletions vendor/github.com/prometheus/prometheus/rules/manager.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions vendor/github.com/prometheus/prometheus/rules/recording.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 20af422

Please sign in to comment.