From 5aaadfaa2ef0b39c679bc6f63444bab9acd061c3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jul 2022 11:16:38 +0100 Subject: [PATCH 1/6] Don't hold onto full state in state cache --- synapse/state/__init__.py | 48 +++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index e3faa52cd64d..50b37c4e9f17 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -14,7 +14,7 @@ # limitations under the License. import heapq import logging -from collections import defaultdict +from collections import ChainMap, defaultdict from typing import ( TYPE_CHECKING, Any, @@ -92,8 +92,11 @@ def __init__( prev_group: Optional[int] = None, delta_ids: Optional[StateMap[str]] = None, ): - if state is None and state_group is None: - raise Exception("Either state or state group must be not None") + if state is None and state_group is None and prev_group is None: + raise Exception("One of state, state_group or prev_group must be not None") + + if prev_group is not None and delta_ids is None: + raise Exception("If prev_group is set so must delta_ids") # A map from (type, state_key) to event_id. # @@ -120,12 +123,32 @@ async def get_state( if self._state is not None: return self._state - assert self.state_group is not None + if self.state_group is not None: + return await state_storage.get_state_ids_for_group( + self.state_group, state_filter + ) + + assert self.prev_group is not None and self.delta_ids is not None - return await state_storage.get_state_ids_for_group( - self.state_group, state_filter + prev_state = await state_storage.get_state_ids_for_group( + self.prev_group, state_filter ) + # ChainMap expects MutableMapping, but since we're using it immutably + # its safe to give it immutable maps. + return ChainMap(self.delta_ids, prev_state) # type: ignore[arg-type] + + def set_state_group(self, state_group: int) -> None: + """Update the state group assigned to this state (e.g. after we've + persisted it) + """ + + self.state_group = state_group + + # We clear out the state as we know longer need to explicitly keep it in + # the `state_cache` (as the store state group cache will do that.) + self._state = None + def __len__(self) -> int: # The len should is used to estimate how large this cache entry is, for # cache eviction purposes. This is why if `self.state` is None it's fine @@ -320,7 +343,7 @@ async def compute_event_context( current_state_ids=state_ids_before_event, ) ) - entry.state_group = state_group_before_event + entry.set_state_group(state_group_before_event) else: state_group_before_event = entry.state_group @@ -769,9 +792,14 @@ def _make_state_cache_entry( prev_group = old_group delta_ids = n_delta_ids - return _StateCacheEntry( - state=new_state, state_group=None, prev_group=prev_group, delta_ids=delta_ids - ) + if prev_group is not None: + # If we have a prev group and deltas then we can drop the new state from + # the cache (to reduce memory usage). + return _StateCacheEntry( + state=None, state_group=None, prev_group=prev_group, delta_ids=delta_ids + ) + else: + return _StateCacheEntry(state=new_state, state_group=None) @attr.s(slots=True, auto_attribs=True) From 2e71dde0fff2b80b495a95a658b485ff94777888 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jul 2022 14:43:07 +0100 Subject: [PATCH 2/6] Fix up cache entry __len__ --- synapse/state/__init__.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 50b37c4e9f17..f42278964563 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -151,10 +151,18 @@ def set_state_group(self, state_group: int) -> None: def __len__(self) -> int: # The len should is used to estimate how large this cache entry is, for - # cache eviction purposes. This is why if `self.state` is None it's fine - # to return 1. + # cache eviction purposes. This is why it's fine to return 1 if we're + # not storing any state. - return len(self._state) if self._state else 1 + length = 0 + + if self._state: + length += len(self._state) + + if self.delta_ids: + length += len(self.delta_ids) + + return length or 1 # Make sure its not 0. class StateHandler: From e58c171bf0141a5c147cef1c8f3aba94519441a6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jul 2022 15:01:35 +0100 Subject: [PATCH 3/6] Newsfile --- changelog.d/13324.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13324.misc diff --git a/changelog.d/13324.misc b/changelog.d/13324.misc new file mode 100644 index 000000000000..30670cf56c1f --- /dev/null +++ b/changelog.d/13324.misc @@ -0,0 +1 @@ +Reduce the amount of state we store in the `state_cache`. From c99cbd165c2e8f82ec2a4af827974bcfa2299e88 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Jul 2022 15:26:42 +0100 Subject: [PATCH 4/6] Comments Co-authored-by: Brendan Abolivier --- synapse/state/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index f42278964563..6fbb27eb407a 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -146,11 +146,11 @@ def set_state_group(self, state_group: int) -> None: self.state_group = state_group # We clear out the state as we know longer need to explicitly keep it in - # the `state_cache` (as the store state group cache will do that.) + # the `state_cache` (as the store state group cache will do that). self._state = None def __len__(self) -> int: - # The len should is used to estimate how large this cache entry is, for + # The len should be used to estimate how large this cache entry is, for # cache eviction purposes. This is why it's fine to return 1 if we're # not storing any state. From 1acc6128efc5e2563ce8bc8a60da888f436c7a9d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Jul 2022 15:27:32 +0100 Subject: [PATCH 5/6] Add a comment --- synapse/state/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 6fbb27eb407a..d9e2511e8fb5 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -140,7 +140,9 @@ async def get_state( def set_state_group(self, state_group: int) -> None: """Update the state group assigned to this state (e.g. after we've - persisted it) + persisted it). + + Note: this will cause the cache entry to drop any stored state. """ self.state_group = state_group From 895b5c107d2416abbfd19637f0eaa3e6b4c6178b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Jul 2022 15:29:30 +0100 Subject: [PATCH 6/6] Don't cache the state if it has a state group --- synapse/state/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index d9e2511e8fb5..87ccd52f0ab7 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -780,7 +780,7 @@ def _make_state_cache_entry( old_state_event_ids = set(state.values()) if new_state_event_ids == old_state_event_ids: # got an exact match. - return _StateCacheEntry(state=new_state, state_group=sg) + return _StateCacheEntry(state=None, state_group=sg) # TODO: We want to create a state group for this set of events, to # increase cache hits, but we need to make sure that it doesn't