Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade Prometheus and add evaluation_delay in rule group config #1474

Merged
merged 4 commits into from
Mar 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Grafana Mimir - main / unreleased

* [CHANGE] Compactor: No longer upload debug meta files to object storage. #1257
* [FEATURE] Ruler: Allow setting `evaluation_delay` for each rule group via rules group configuration file. #1474
* [FEATURE] Distributor: Added the ability to forward specifics metrics to alternative remote_write API endpoints. #1052
* [ENHANCEMENT] Ruler: Add more detailed query information to ruler query stats logging. #1411

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ replace git.apache.org/thrift.git => github.com/apache/thrift v0.0.0-20180902110
replace github.com/bradfitz/gomemcache => github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab

// Using a fork of Prometheus while we work on querysharding to avoid a dependency on the upstream.
replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20220210151959-f8e3195f7500
replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20220314132007-23ce9ad9f0ff

// Pin hashicorp depencencies since the Prometheus fork, go mod tries to update them.
replace github.com/hashicorp/go-immutable-radix => github.com/hashicorp/go-immutable-radix v1.2.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1000,8 +1000,8 @@ github.com/grafana/dskit v0.0.0-20220211095946-19921f863583/go.mod h1:q51XdMLLHN
github.com/grafana/e2e v0.1.0 h1:nThd0U0TjUqyOOupSb+qDd4BOdhqwhR/oYbjoqiMlZk=
github.com/grafana/e2e v0.1.0/go.mod h1:+26VJWpczg2OU3D0537acnHSHzhJORpxOs6F+M27tZo=
github.com/grafana/memberlist v0.2.5-0.20211201083710-c7bc8e9df94b/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/grafana/mimir-prometheus v0.0.0-20220210151959-f8e3195f7500 h1:1l+T/VWSSC3Uz+9Pkgxv7pUngRLpoeRWagX3DPsTn6I=
github.com/grafana/mimir-prometheus v0.0.0-20220210151959-f8e3195f7500/go.mod h1:6K+MGuCdYASOcOEKusiGUeYeRoobrW/26smN9OCXb0M=
github.com/grafana/mimir-prometheus v0.0.0-20220314132007-23ce9ad9f0ff h1:8cfiJnBz2zYCBvMNK1zAmVc7lu27mQBtuHxoCdxpqPw=
github.com/grafana/mimir-prometheus v0.0.0-20220314132007-23ce9ad9f0ff/go.mod h1:6K+MGuCdYASOcOEKusiGUeYeRoobrW/26smN9OCXb0M=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
Expand Down
57 changes: 16 additions & 41 deletions pkg/ruler/compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/prometheus/model/exemplar"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/value"
"github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/rules"
Expand All @@ -39,29 +38,15 @@ type PusherAppender struct {
failedWrites prometheus.Counter
totalWrites prometheus.Counter

ctx context.Context
pusher Pusher
labels []labels.Labels
samples []mimirpb.Sample
userID string
evaluationDelay time.Duration
ctx context.Context
pusher Pusher
labels []labels.Labels
samples []mimirpb.Sample
userID string
}

func (a *PusherAppender) Append(_ storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) {
a.labels = append(a.labels, l)

// Adapt staleness markers for ruler evaluation delay. As the upstream code
// is using the actual time, when there is a no longer available series.
// This then causes 'out of order' append failures once the series is
// becoming available again.
// see https://github.com/prometheus/prometheus/blob/6c56a1faaaad07317ff585bda75b99bdba0517ad/rules/manager.go#L647-L660
// Similar to staleness markers, the rule manager also appends actual time to the ALERTS and ALERTS_FOR_STATE series.
// See: https://github.com/prometheus/prometheus/blob/ae086c73cb4d6db9e8b67d5038d3704fea6aec4a/rules/alerting.go#L414-L417
metricName := l.Get(labels.MetricName)
if a.evaluationDelay > 0 && (value.IsStaleNaN(v) || metricName == "ALERTS" || metricName == "ALERTS_FOR_STATE") {
t -= a.evaluationDelay.Milliseconds()
}

a.samples = append(a.samples, mimirpb.Sample{
TimestampMs: t,
Value: v,
Expand Down Expand Up @@ -100,9 +85,8 @@ func (a *PusherAppender) Rollback() error {

// PusherAppendable fulfills the storage.Appendable interface for prometheus manager
type PusherAppendable struct {
pusher Pusher
userID string
rulesLimits RulesLimits
pusher Pusher
userID string

totalWrites prometheus.Counter
failedWrites prometheus.Counter
Expand All @@ -112,7 +96,6 @@ func NewPusherAppendable(pusher Pusher, userID string, limits RulesLimits, total
return &PusherAppendable{
pusher: pusher,
userID: userID,
rulesLimits: limits,
totalWrites: totalWrites,
failedWrites: failedWrites,
}
Expand All @@ -124,10 +107,9 @@ func (t *PusherAppendable) Appender(ctx context.Context) storage.Appender {
failedWrites: t.failedWrites,
totalWrites: t.totalWrites,

ctx: ctx,
pusher: t.pusher,
userID: t.userID,
evaluationDelay: t.rulesLimits.EvaluationDelay(t.userID),
ctx: ctx,
pusher: t.pusher,
userID: t.userID,
}
}

Expand All @@ -139,18 +121,6 @@ type RulesLimits interface {
RulerMaxRulesPerRuleGroup(userID string) int
}

// EngineQueryFunc returns a new query function using the rules.EngineQueryFunc function
// and passing an altered timestamp.
func EngineQueryFunc(engine *promql.Engine, q storage.Queryable, overrides RulesLimits, userID string) rules.QueryFunc {
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
orig := rules.EngineQueryFunc(engine, q)
// Delay the evaluation of all rules by a set interval to give a buffer
// to metric that haven't been forwarded to cortex yet.
evaluationDelay := overrides.EvaluationDelay(userID)
return orig(ctx, qs, t.Add(-evaluationDelay))
}
}

func MetricsQueryFunc(qf rules.QueryFunc, queries, failedQueries prometheus.Counter) rules.QueryFunc {
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
queries.Inc()
Expand Down Expand Up @@ -282,7 +252,7 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, queryable, federatedQuery
// Errors from PromQL are always "user" errors.
q = querier.NewErrorTranslateQueryableWithFn(q, WrapQueryableErrors)

queryFunc = EngineQueryFunc(engine, q, overrides, userID)
queryFunc = rules.EngineQueryFunc(engine, q)
queryFunc = MetricsQueryFunc(queryFunc, totalQueries, failedQueries)
queryFunc = RecordAndReportRuleQueryMetrics(queryFunc, queryTime, logger)
return queryFunc
Expand All @@ -304,6 +274,11 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, queryable, federatedQuery
OutageTolerance: cfg.OutageTolerance,
ForGracePeriod: cfg.ForGracePeriod,
ResendDelay: cfg.ResendDelay,
DefaultEvaluationDelay: func() time.Duration {
// Delay the evaluation of all rules by a set interval to give a buffer
// to metric that haven't been forwarded to Mimir yet.
return overrides.EvaluationDelay(userID)
},
})
}
}
Expand Down
35 changes: 2 additions & 33 deletions pkg/ruler/compat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ func TestPusherAppendable(t *testing.T) {
for _, tc := range []struct {
name string
series string
evalDelay time.Duration
value float64
expectedTS int64
}{
Expand All @@ -65,50 +64,20 @@ func TestPusherAppendable(t *testing.T) {
expectedTS: 120_000,
},
{
name: "tenant with delay, normal value",
series: "foo_bar",
value: 1.234,
expectedTS: 120_000,
evalDelay: time.Minute,
},
{
name: "tenant with delay, stale nan value",
value: math.Float64frombits(value.StaleNaN),
expectedTS: 60_000,
evalDelay: time.Minute,
},
{
name: "ALERTS without delay, normal value",
name: "ALERTS, normal value",
series: `ALERTS{alertname="boop"}`,
value: 1.234,
expectedTS: 120_000,
},
{
name: "ALERTS without delay, stale nan value",
name: "ALERTS, stale nan value",
series: `ALERTS{alertname="boop"}`,
value: math.Float64frombits(value.StaleNaN),
expectedTS: 120_000,
},
{
name: "ALERTS with delay, normal value",
series: `ALERTS{alertname="boop"}`,
value: 1.234,
expectedTS: 60_000,
evalDelay: time.Minute,
},
{
name: "ALERTS with delay, stale nan value",
series: `ALERTS_FOR_STATE{alertname="boop"}`,
value: math.Float64frombits(value.StaleNaN),
expectedTS: 60_000,
evalDelay: time.Minute,
},
} {
t.Run(tc.name, func(t *testing.T) {
ctx := context.Background()
pa.rulesLimits = &ruleLimits{
evalDelay: tc.evalDelay,
}

lbls, err := parser.ParseMetric(tc.series)
require.NoError(t, err)
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions vendor/github.com/prometheus/prometheus/rules/alerting.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 40 additions & 22 deletions vendor/github.com/prometheus/prometheus/rules/manager.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions vendor/github.com/prometheus/prometheus/rules/recording.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading