Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix inaccurate per-block metrics #6491

Merged
merged 1 commit into from
Dec 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/6491.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix inaccurate per-block Prometheus metrics.
60 changes: 18 additions & 42 deletions synapse/util/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,72 +80,48 @@ class Measure(object):
__slots__ = [
"clock",
"name",
"start_context",
"_logging_context",
"start",
"created_context",
"start_usage",
]

def __init__(self, clock, name):
self.clock = clock
self.name = name
self.start_context = None
self._logging_context = None
self.start = None
self.created_context = False

def __enter__(self):
self.start = self.clock.time()
self.start_context = LoggingContext.current_context()
if not self.start_context:
self.start_context = LoggingContext("Measure")
self.start_context.__enter__()
self.created_context = True

self.start_usage = self.start_context.get_resource_usage()
if self._logging_context:
raise RuntimeError("Measure() objects cannot be re-used")

self.start = self.clock.time()
parent_context = LoggingContext.current_context()
self._logging_context = LoggingContext(
"Measure[%s]" % (self.name,), parent_context
)
self._logging_context.__enter__()
in_flight.register((self.name,), self._update_in_flight)

def __exit__(self, exc_type, exc_val, exc_tb):
if isinstance(exc_type, Exception) or not self.start_context:
return

in_flight.unregister((self.name,), self._update_in_flight)
if not self._logging_context:
raise RuntimeError("Measure() block exited without being entered")

duration = self.clock.time() - self.start
usage = self._logging_context.get_resource_usage()

block_counter.labels(self.name).inc()
block_timer.labels(self.name).inc(duration)

context = LoggingContext.current_context()

if context != self.start_context:
logger.warning(
"Context has unexpectedly changed from '%s' to '%s'. (%r)",
self.start_context,
context,
self.name,
)
return

if not context:
logger.warning("Expected context. (%r)", self.name)
return
in_flight.unregister((self.name,), self._update_in_flight)
self._logging_context.__exit__(exc_type, exc_val, exc_tb)

current = context.get_resource_usage()
usage = current - self.start_usage
try:
block_counter.labels(self.name).inc()
block_timer.labels(self.name).inc(duration)
block_ru_utime.labels(self.name).inc(usage.ru_utime)
block_ru_stime.labels(self.name).inc(usage.ru_stime)
block_db_txn_count.labels(self.name).inc(usage.db_txn_count)
block_db_txn_duration.labels(self.name).inc(usage.db_txn_duration_sec)
block_db_sched_duration.labels(self.name).inc(usage.db_sched_duration_sec)
except ValueError:
logger.warning(
"Failed to save metrics! OLD: %r, NEW: %r", self.start_usage, current
)

if self.created_context:
self.start_context.__exit__(exc_type, exc_val, exc_tb)
logger.warning("Failed to save metrics! Usage: %s", usage)

def _update_in_flight(self, metrics):
"""Gets called when processing in flight metrics
Expand Down