Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Quick & dirty metric for background update status #15740

Merged
merged 4 commits into from
Jun 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/15740.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Expose a metric reporting the database background update status.
2 changes: 2 additions & 0 deletions synapse/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ def collect() -> Iterable[Metric]:

@attr.s(slots=True, hash=True, auto_attribs=True)
class LaterGauge(Collector):
"""A Gauge which periodically calls a user-provided callback to produce metrics."""

name: str
desc: str
labels: Optional[Sequence[str]] = attr.ib(hash=False)
Expand Down
30 changes: 30 additions & 0 deletions synapse/storage/background_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from enum import IntEnum
from types import TracebackType
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -136,6 +137,15 @@ def total_items_per_ms(self) -> Optional[float]:
return float(self.total_item_count) / float(self.total_duration_ms)


class UpdaterStatus(IntEnum):
# Use negative values for error conditions.
ABORTED = -1
DISABLED = 0
NOT_STARTED = 1
RUNNING_UPDATE = 2
COMPLETE = 3


class BackgroundUpdater:
"""Background updates are updates to the database that run in the
background. Each update processes a batch of data at once. We attempt to
Expand All @@ -158,11 +168,16 @@ def __init__(self, hs: "HomeServer", database: "DatabasePool"):

self._background_update_performance: Dict[str, BackgroundUpdatePerformance] = {}
self._background_update_handlers: Dict[str, _BackgroundUpdateHandler] = {}
# TODO: all these bool flags make me feel icky---can we combine into a status
MatMaul marked this conversation as resolved.
Show resolved Hide resolved
# enum?
self._all_done = False

# Whether we're currently running updates
self._running = False

# Marker to be set if we abort and halt all background updates.
self._aborted = False
MatMaul marked this conversation as resolved.
Show resolved Hide resolved

# Whether background updates are enabled. This allows us to
# enable/disable background updates via the admin API.
self.enabled = True
Expand All @@ -175,6 +190,20 @@ def __init__(self, hs: "HomeServer", database: "DatabasePool"):
self.sleep_duration_ms = hs.config.background_updates.sleep_duration_ms
self.sleep_enabled = hs.config.background_updates.sleep_enabled

def get_status(self) -> UpdaterStatus:
"""An integer summarising the updater status. Used as a metric."""
if self._aborted:
return UpdaterStatus.ABORTED
# TODO: a status for "have seen at least one failure, but haven't aborted yet".
if not self.enabled:
return UpdaterStatus.DISABLED

if self._all_done:
return UpdaterStatus.COMPLETE
if self._running:
return UpdaterStatus.RUNNING_UPDATE
return UpdaterStatus.NOT_STARTED

def register_update_controller_callbacks(
self,
on_update: ON_UPDATE_CALLBACK,
Expand Down Expand Up @@ -296,6 +325,7 @@ async def run_background_updates(self, sleep: bool) -> None:
except Exception:
back_to_back_failures += 1
if back_to_back_failures >= 5:
self._aborted = True
raise RuntimeError(
"5 back-to-back background update failures; aborting."
)
Expand Down
8 changes: 7 additions & 1 deletion synapse/storage/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
current_context,
make_deferred_yieldable,
)
from synapse.metrics import register_threadpool
from synapse.metrics import LaterGauge, register_threadpool
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.storage.background_updates import BackgroundUpdater
from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
Expand Down Expand Up @@ -547,6 +547,12 @@ def __init__(
self._db_pool = make_pool(hs.get_reactor(), database_config, engine)

self.updates = BackgroundUpdater(hs, self)
LaterGauge(
"synapse_background_update_status",
"Background update status",
[],
self.updates.get_status,
)

self._previous_txn_total_time = 0.0
self._current_txn_total_time = 0.0
Expand Down