Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Update get_pdu to return the original, pristine EventBase #13320

Merged
merged 22 commits into from
Jul 20, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ee236ca
Update get_pdu to return original, pristine EventBase
MadLittleMods Jul 18, 2022
79a1b72
Add changelog
MadLittleMods Jul 18, 2022
bfd35fd
Internal change, no specific bugfix
MadLittleMods Jul 18, 2022
e0e20a5
Explain why not
MadLittleMods Jul 18, 2022
22410f2
Add tests
MadLittleMods Jul 19, 2022
09c411b
Some more clarity
MadLittleMods Jul 19, 2022
6029b42
Re-use room ID
MadLittleMods Jul 19, 2022
09167b1
Better still actionable no-fluff assertion message
MadLittleMods Jul 19, 2022
eb6a291
Describe why we use a cache here
MadLittleMods Jul 19, 2022
1c4e57c
Remove direct access to internal property
MadLittleMods Jul 19, 2022
488f5ed
Make it obvious that we're pulling and using a different cache
MadLittleMods Jul 19, 2022
29a5269
Remove assumption/speculation
MadLittleMods Jul 19, 2022
2688e44
Default is already no metadata
MadLittleMods Jul 19, 2022
24913e7
Refactor structure to avoid duplicating the event copy logic
MadLittleMods Jul 19, 2022
0e6dd5a
Pluralization typo
MadLittleMods Jul 19, 2022
5bc75ed
Explain that we return a copy that is safe to modify
MadLittleMods Jul 19, 2022
dea7669
Fix lints
MadLittleMods Jul 19, 2022
72e65a5
Fix description typo
MadLittleMods Jul 19, 2022
86fe0dc
Share event throughout
MadLittleMods Jul 20, 2022
fd879bb
Different comment
MadLittleMods Jul 20, 2022
354678f
Merge branch 'madlittlemods/pristine-get_pdu' of github.com:matrix-or…
MadLittleMods Jul 20, 2022
233077c
Merge branch 'develop' into madlittlemods/pristine-get_pdu
MadLittleMods Jul 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13320.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix `FederationClient.get_pdu()` returning events from the cache as `outliers` instead of original events we saw over federation.
43 changes: 37 additions & 6 deletions synapse/federation/federation_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
RoomVersion,
RoomVersions,
)
from synapse.events import EventBase, builder
from synapse.events import EventBase, builder, make_event_from_dict
from synapse.federation.federation_base import (
FederationBase,
InvalidEventSignatureError,
Expand Down Expand Up @@ -309,7 +309,7 @@ async def get_pdu_from_destination_raw(
)

logger.debug(
"retrieved event id %s from %s: %r",
"get_pdu_from_destination_raw: retrieved event id %s from %s: %r",
event_id,
destination,
transaction_data,
Expand Down Expand Up @@ -360,9 +360,25 @@ async def get_pdu(

# TODO: Rate limit the number of times we try and get the same event.

ev = self._get_pdu_cache.get(event_id)
if ev:
return ev
event_from_cache = self._get_pdu_cache.get(event_id)
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
if event_from_cache:
assert not event_from_cache.internal_metadata.outlier, (
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
"Event from cache unexpectedly an `outlier` when it should be pristine and untouched without metadata set. "
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
"We are probably not be returning a copy of the event because downstream callers are modifying the event reference we have in the cache."
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
)

# Make sure to return a copy because downstream callers will use
# this event reference directly and change our original, pristine,
# untouched PDU. For example when people mark the event as an
# `outlier` (`event.internal_metadata.outlier = true`), we don't
# want that to propagate back into the cache.
event_copy = make_event_from_dict(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we're going to build a new EventBase on each call, why not just cache the raw json?

Copy link
Contributor Author

@MadLittleMods MadLittleMods Jul 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would work. But we would have to convert the EventBase back to JSON from get_pdu_from_destination_raw since it needs one for _check_sigs_and_hash(room_version, pdu) anyway (whether that be in get_pdu_from_destination_raw or in get_pdu). I'm inclined to leave it as-is. Feel free to push again

event_from_cache.get_pdu_json(),
event_from_cache.room_version,
internal_metadata_dict=None,
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
)

return event_copy

pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {})

Expand Down Expand Up @@ -405,7 +421,22 @@ async def get_pdu(
if signed_pdu:
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
self._get_pdu_cache[event_id] = signed_pdu

return signed_pdu
# Make sure to return a copy because downstream callers will use this
# event reference directly and change our original, pristine, untouched
# PDU. For example when people mark the event as an `outlier`
# (`event.internal_metadata.outlier = true`), we don't want that to
# propagate back into the cache.
#
# We could get away with only making a new copy of the event when
# pulling from cache but it's probably better to have good hygiene and
# not dirty the cache in the first place as well.
event_copy = make_event_from_dict(
signed_pdu.get_pdu_json(),
signed_pdu.room_version,
internal_metadata_dict=None,
)
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved

return event_copy

async def get_room_state_ids(
self, destination: str, room_id: str, event_id: str
Expand Down
19 changes: 15 additions & 4 deletions synapse/handlers/federation_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,10 +766,21 @@ async def _process_pulled_event(
"""
logger.info("Processing pulled event %s", event)

# these should not be outliers.
assert (
not event.internal_metadata.is_outlier()
), "pulled event unexpectedly flagged as outlier"
# This function should not be used to persist outliers. If you happen to
# run into a situation where the event you're trying to process/backfill
# is marked as an `outlier`, then you should update that spot to return
# an `EventBase` copy that doesn't have `outlier` flag set.
#
# `EventBase` is used to represent both an event we have not yet
# persisted, and one that we have persisted and now keep in the cache.
# In an ideal world this method would only be called with the first type
# of event, but it turns out that's not actually the case and for
# example, you could get an event from cache that is marked as an
# `outlier` (fix up that spot though).
assert not event.internal_metadata.is_outlier(), (
"This is a safe-guard to make sure we're not trying to persist an outlier using this function (use something else). "
"If you're trying to process/backfill an event, this is the right method but you need pass in an event copy that doesn't have `event.internal_metada.outlier = true`."
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
)

event_id = event.event_id

Expand Down
23 changes: 20 additions & 3 deletions synapse/storage/databases/main/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -1346,9 +1346,24 @@ def _update_outliers_txn(
event_id: outlier for event_id, outlier in txn
}

logger.debug(
"_update_outliers_txn: events=%s have_persisted=%s",
[ev.event_id for ev, _ in events_and_contexts],
have_persisted,
)

to_remove = set()
for event, context in events_and_contexts:
if event.event_id not in have_persisted:
outlier_persisted = have_persisted.get(event.event_id)
logger.debug(
"_update_outliers_txn: event=%s outlier=%s outlier_persisted=%s",
event.event_id,
event.internal_metadata.is_outlier(),
outlier_persisted,
)

# Ignore events which we haven't persisted at all
if outlier_persisted is None:
continue

to_remove.add(event)
Expand All @@ -1358,7 +1373,6 @@ def _update_outliers_txn(
# was an outlier or not - what we have is at least as good.
continue

outlier_persisted = have_persisted[event.event_id]
if not event.internal_metadata.is_outlier() and outlier_persisted:
# We received a copy of an event that we had already stored as
# an outlier in the database. We now have some state at that event
Expand All @@ -1369,7 +1383,10 @@ def _update_outliers_txn(
# events down /sync. In general they will be historical events, so that
# doesn't matter too much, but that is not always the case.

logger.info("Updating state for ex-outlier event %s", event.event_id)
logger.info(
"_update_outliers_txn: Updating state for ex-outlier event %s",
event.event_id,
)

# insert into event_to_state_groups.
try:
Expand Down