Skip to content

Commit

Permalink
add bucket prefix
Browse files Browse the repository at this point in the history
Signed-off-by: Xiang Dai <764524258@qq.com>
  • Loading branch information
daixiang0 committed Feb 25, 2020
1 parent ffece8a commit dcf97d1
Show file tree
Hide file tree
Showing 14 changed files with 589 additions and 74 deletions.
20 changes: 10 additions & 10 deletions examples/alerts/alerts.md
Original file line number Diff line number Diff line change
Expand Up @@ -430,40 +430,40 @@ rules:
## Replicate
[embedmd]:# (../tmp/thanos-replicate.rules.yaml yaml)
[embedmd]:# (../tmp/thanos-bucket-replicate.rules.yaml yaml)
```yaml
name: thanos-replicate.rules
name: thanos-bucket-replicate.rules
rules:
- alert: ThanosReplicateIsDown
- alert: ThanosBucketReplicateIsDown
annotations:
message: Thanos Replicate has disappeared from Prometheus target discovery.
expr: |
absent(up{job=~"thanos-replicate.*"})
absent(up{job=~"thanos-bucket-replicate.*"})
for: 5m
labels:
severity: critical
- alert: ThanosReplicateErrorRate
- alert: ThanosBucketReplicateErrorRate
annotations:
message: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts
failed.
expr: |
(
sum(rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-replicate.*"}[5m]))
sum(rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-bucket-replicate.*"}[5m]))
/ on (namespace) group_left
sum(rate(thanos_replicate_replication_runs_total{job=~"thanos-replicate.*"}[5m]))
sum(rate(thanos_replicate_replication_runs_total{job=~"thanos-bucket-replicate.*"}[5m]))
) * 100 >= 10
for: 5m
labels:
severity: critical
- alert: ThanosReplicateRunLatency
- alert: ThanosBucketReplicateRunLatency
annotations:
message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{
$value }} seconds for the replicate operations.
expr: |
(
histogram_quantile(0.9, sum by (job, le) (thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-replicate.*"})) > 120
histogram_quantile(0.9, sum by (job, le) (thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"})) > 120
and
sum by (job) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-replicate.*"}[5m])) > 0
sum by (job) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m])) > 0
)
for: 5m
labels:
Expand Down
18 changes: 9 additions & 9 deletions examples/alerts/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -439,38 +439,38 @@ groups:
for: 5m
labels:
severity: critical
- name: thanos-replicate.rules
- name: thanos-bucket-replicate.rules
rules:
- alert: ThanosReplicateIsDown
- alert: ThanosBucketReplicateIsDown
annotations:
message: Thanos Replicate has disappeared from Prometheus target discovery.
expr: |
absent(up{job=~"thanos-replicate.*"})
absent(up{job=~"thanos-bucket-replicate.*"})
for: 5m
labels:
severity: critical
- alert: ThanosReplicateErrorRate
- alert: ThanosBucketReplicateErrorRate
annotations:
message: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts
failed.
expr: |
(
sum(rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-replicate.*"}[5m]))
sum(rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-bucket-replicate.*"}[5m]))
/ on (namespace) group_left
sum(rate(thanos_replicate_replication_runs_total{job=~"thanos-replicate.*"}[5m]))
sum(rate(thanos_replicate_replication_runs_total{job=~"thanos-bucket-replicate.*"}[5m]))
) * 100 >= 10
for: 5m
labels:
severity: critical
- alert: ThanosReplicateRunLatency
- alert: ThanosBucketReplicateRunLatency
annotations:
message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{
$value }} seconds for the replicate operations.
expr: |
(
histogram_quantile(0.9, sum by (job, le) (thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-replicate.*"})) > 120
histogram_quantile(0.9, sum by (job, le) (thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"})) > 120
and
sum by (job) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-replicate.*"}[5m])) > 0
sum by (job) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m])) > 0
)
for: 5m
labels:
Expand Down
2 changes: 1 addition & 1 deletion examples/alerts/rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,5 +121,5 @@ groups:
labels:
quantile: "0.99"
record: :thanos_objstore_bucket_operation_duration_seconds:histogram_quantile
- name: thanos-replicate.rules
- name: thanos-bucket-replicate.rules
rules: []
Loading

0 comments on commit dcf97d1

Please sign in to comment.