Skip to content

Commit

Permalink
Add "etcd_operation" and "etcd_type" variables to Grafana dashboard
Browse files Browse the repository at this point in the history
This should ease quering.
  • Loading branch information
oxddr committed Feb 12, 2020
1 parent 5c47df1 commit f944286
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 259 deletions.
28 changes: 20 additions & 8 deletions clusterloader2/pkg/prometheus/manifests/dashboards/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import attr
from grafanalib import core as g

DECREASING_ORDER_TOOLTIP = g.Tooltip(sort=g.SORT_DESC)
PANEL_HEIGHT = g.Pixels(300)
QUANTILES = [0.99, 0.9, 0.5]

SOURCE_TEMPLATE = g.Template(name="source", type="datasource", query="prometheus")


@attr.s
class Dashboard(g.Dashboard):
time = attr.ib(default=g.Time("now-30d", "now"))
templating = attr.ib(
default=g.Templating(
list=[
# Make it possible to use $source as a source.
g.Template(name="source", type="datasource", query="prometheus")
]
)
)
# Make it possible to use $source as a source.
templating = attr.ib(default=g.Templating(list=[SOURCE_TEMPLATE]))


# Graph is a g.Graph with reasonable defaults applied.
Expand Down Expand Up @@ -96,3 +93,18 @@ def min_max_avg(base, by, legend=""):

def any_of(*choices):
return "|".join(choices)


def one_line(text):
"""Turns multiline PromQL string into a one line.
Useful to keep sane diffs for generated (*.json) dashboards.
"""
tokens = text.split('"')
for i, item in enumerate(tokens):
if not i % 2:
item = re.sub(r"\s+", "", item)
item = re.sub(",", ", ", item)
item = re.sub(r"\)by\(", ") by (", item)
tokens[i] = item
return '"'.join(tokens)
Original file line number Diff line number Diff line change
Expand Up @@ -91,35 +91,67 @@ def api_call_latency(title, verb, scope, threshold):
]

ETCD_PANELS = [
d.simple_graph("etcd leader", "etcd_server_is_leader"),
d.simple_graph("etcd leader", "etcd_server_is_leader", legend="{{instance}}"),
d.simple_graph(
"etcd bytes sent",
"irate(etcd_network_client_grpc_sent_bytes_total[1m])",
yAxes=g.single_y_axis(format=g.BYTES_PER_SEC_FORMAT),
),
d.simple_graph(
"etcd lists rate",
'sum(rate(etcd_request_duration_seconds_count{operation="list"}[1m])) by (type)',
yAxes=g.single_y_axis(format=g.OPS_FORMAT),
legend="{{instance}}",
),
d.simple_graph(
"etcd operations rate",
"sum(rate(etcd_request_duration_seconds_count[1m])) by (operation, type)",
d.one_line(
"""
sum(
rate(
etcd_request_duration_seconds_count{
operation=~"${etcd_operation:regex}",
type=~".*(${etcd_type:pipe})"
}[1m]
)
) by (operation, type)
"""
),
yAxes=g.single_y_axis(format=g.OPS_FORMAT),
),
d.simple_graph(
"etcd get lease latency by instance (99th percentile)",
'histogram_quantile(0.99, sum(rate(etcd_request_duration_seconds_bucket{operation="get", type="*coordination.Lease"}[1m])) by (le, type, instance))',
yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
legend="{{operation}} {{type}}",
),
d.simple_graph(
"etcd get latency by type (99th percentile)",
'histogram_quantile(0.99, sum(rate(etcd_request_duration_seconds_bucket{operation="get"}[1m])) by (le, type))',
d.one_line(
"""
histogram_quantile(
0.99,
sum(
rate(
etcd_request_duration_seconds_bucket{
operation=~"${etcd_operation:regex}",
type=~".*(${etcd_type:pipe})"
}[1m]
)
) by (le, operation, type, instance)
)
"""
),
yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
legend="{{operation}} {{type}} on {{instance}}",
),
d.simple_graph(
"etcd get latency by type (50th percentile)",
'histogram_quantile(0.50, sum(rate(etcd_request_duration_seconds_bucket{operation="get"}[1m])) by (le, type))',
d.one_line(
"""
histogram_quantile(
0.50,
sum(
rate(
etcd_request_duration_seconds_bucket{
operation=~"${etcd_operation:regex}",
type=~".*(${etcd_type:pipe})"
}[1m]
)
) by (le, operation, type, instance)
)
"""
),
yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
),
d.simple_graph("etcd instance id", "sum(etcd_server_id) by (instance, server_id)"),
Expand Down Expand Up @@ -392,4 +424,26 @@ def api_call_latency(title, verb, scope, threshold):
),
d.Row(title="Master VM", panels=VM_PANELS, collapse=True),
],
templating=g.Templating(
list=[
d.SOURCE_TEMPLATE,
g.Template(
name="etcd_type",
type="query",
dataSource="$source",
regex=r"\*\[+\]+(.*)",
query="label_values(etcd_request_duration_seconds_count, type)",
multi=True,
includeAll=True,
),
g.Template(
name="etcd_operation",
type="query",
dataSource="$source",
query="label_values(etcd_request_duration_seconds_count, operation)",
multi=True,
includeAll=True,
),
]
),
).auto_panel_ids()
Loading

0 comments on commit f944286

Please sign in to comment.