Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add a prometheus metric for active cache lookups. #5750

Merged
merged 2 commits into from
Jul 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/5750.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a prometheus metric for pending cache lookups.
17 changes: 16 additions & 1 deletion synapse/util/caches/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015, 2016 OpenMarket Ltd
# Copyright 2019 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -51,7 +52,19 @@ def get_cache_factor_for(cache_name):
response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"])


def register_cache(cache_type, cache_name, cache):
def register_cache(cache_type, cache_name, cache, collect_callback=None):
"""Register a cache object for metric collection.

Args:
cache_type (str):
cache_name (str): name of the cache
cache (object): cache itself
collect_callback (callable|None): if not None, a function which is called during
metric collection to update additional metrics.

Returns:
CacheMetric: an object which provides inc_{hits,misses,evictions} methods
"""

# Check if the metric is already registered. Unregister it, if so.
# This usually happens during tests, as at runtime these caches are
Expand Down Expand Up @@ -90,6 +103,8 @@ def collect(self):
cache_hits.labels(cache_name).set(self.hits)
cache_evicted.labels(cache_name).set(self.evicted_size)
cache_total.labels(cache_name).set(self.hits + self.misses)
if collect_callback:
collect_callback()
except Exception as e:
logger.warn("Error calculating metrics for %s: %s", cache_name, e)
raise
Expand Down
18 changes: 17 additions & 1 deletion synapse/util/caches/descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import six
from six import itervalues, string_types

from prometheus_client import Gauge

from twisted.internet import defer

from synapse.logging.context import make_deferred_yieldable, preserve_fn
Expand All @@ -37,6 +39,12 @@
logger = logging.getLogger(__name__)


cache_pending_metric = Gauge(
"synapse_util_caches_cache_pending",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so, question. The existing cache metrics (

cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
etc) have a colon in their names, but the prometheus exposition code duplicates them, replacing colons with underscores, going so far as to call colons weird:
# Get rid of the weird colon things while we're at it

I'm assuming that means that the colon-names are deprecated and underscores are the new black, but I'm not sure of any authority or conscious decision here.

"Number of lookups currently pending for this cache",
["name"],
)

_CacheSentinel = object()


Expand Down Expand Up @@ -82,11 +90,19 @@ def __init__(self, name, max_entries=1000, keylen=1, tree=False, iterable=False)
self.name = name
self.keylen = keylen
self.thread = None
self.metrics = register_cache("cache", name, self.cache)
self.metrics = register_cache(
"cache",
name,
self.cache,
collect_callback=self._metrics_collection_callback,
)

def _on_evicted(self, evicted_count):
self.metrics.inc_evictions(evicted_count)

def _metrics_collection_callback(self):
cache_pending_metric.labels(self.name).set(len(self._pending_deferred_cache))

def check_thread(self):
expected_thread = self.thread
if expected_thread is None:
Expand Down