From 71b625d80806886794c5e72f7ff11432e99b736c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Feb 2019 16:54:35 +0000 Subject: [PATCH 1/8] Stop backpaginating when events not visible --- synapse/handlers/federation.py | 31 +++++++++++++++++++++++++++++ synapse/storage/event_federation.py | 22 ++++++++++++++++++++ synapse/visibility.py | 30 +++++++++++++++++++++++----- 3 files changed, 78 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 083f2e0ac..7b3834a91 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -830,6 +830,37 @@ class FederationHandler(BaseHandler): logger.debug("Not backfilling as no extremeties found.") return + # We only want to paginate if we can actually see the events we'll get, + # as otherwise we'll just spend a lot of resources to get redacted + # events. + # + # We do this by filtering all the extremities and seeing if any remain. + # Given we don't have the extremity events themselves, we need to + # actually check the events that references them. + # + # TODO: Filter the list of extremities if we do do a backfill + # TODO: Correctly handle the case where we are allowed to see the + # forward event but not the extremity, e.g. in the case of initial + # join of the server. + + forward_events = yield self.store.get_forward_events( + list(extremities), + ) + + extremities_events = yield self.store.get_events( + forward_events, + check_redacted=False, + get_prev_content=False, + ) + + filtered_extremities = yield filter_events_for_server( + self.store, self.server_name, list(extremities_events.values()), + redact=False, + ) + + if not filtered_extremities: + defer.returnValue(False) + # Check if we reached a point where we should start backfilling. sorted_extremeties_tuple = sorted( extremities.items(), diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 38809ed0f..830b171ca 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -442,6 +442,28 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, event_results.reverse() return event_results + @defer.inlineCallbacks + def get_forward_events(self, event_ids): + """Fetch all events that have the given events as a prev event + + Args: + event_ids (iterable[str]) + + Returns: + Deferred[list[str]] + """ + rows = yield self._simple_select_many_batch( + table="event_edges", + column="prev_event_id", + iterable=event_ids, + retcols=("event_id",), + desc="get_forward_events" + ) + + defer.returnValue([ + row["event_id"] for row in rows + ]) + class EventFederationStore(EventFederationWorkerStore): """ Responsible for storing and serving up the various graphs associated diff --git a/synapse/visibility.py b/synapse/visibility.py index 0281a7c91..f6dcc9663 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -216,7 +216,20 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks -def filter_events_for_server(store, server_name, events): +def filter_events_for_server(store, server_name, events, redact=True): + """Filter a list of events based on whether given server is allowed to + see them. + + Args: + store (DataStore) + server_name (str) + events (iterable[FrozenEvent]) + redact (bool): Whether to return a redacted version of the event, or + to filter them out entirely. + + Returns + Deferred[list[FrozenEvent]] + """ # Whatever else we do, we need to check for senders which have requested # erasure of their data. erased_senders = yield store.are_users_erased( @@ -231,7 +244,10 @@ def filter_events_for_server(store, server_name, events): "Sender of %s has been erased, redacting", event.event_id, ) - return prune_event(event) + if redact: + return prune_event(event) + else: + return None # state will be None if we decided we didn't need to filter by # room membership. @@ -265,7 +281,10 @@ def filter_events_for_server(store, server_name, events): return event else: # server has no users in the room: redact - return prune_event(event) + if redact: + return prune_event(event) + else: + return None return event @@ -361,7 +380,8 @@ def filter_events_for_server(store, server_name, events): for e_id, key_to_eid in iteritems(event_to_state_ids) } - defer.returnValue([ + to_return = ( redact_disallowed(e, event_to_state[e.event_id]) for e in events - ]) + ) + defer.returnValue([e for e in to_return if e is not None]) From 56f4ece778cddf081ea559ed3571bd91a8fee22b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Feb 2019 18:15:13 +0000 Subject: [PATCH 2/8] Newsfile --- changelog.d/4699.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/4699.bugfix diff --git a/changelog.d/4699.bugfix b/changelog.d/4699.bugfix new file mode 100644 index 000000000..991121b5b --- /dev/null +++ b/changelog.d/4699.bugfix @@ -0,0 +1 @@ +Fix attempting to paginate in rooms where server cannot see any events From b183fef9ac8075aaab892d1042596e0bba824167 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Feb 2019 13:06:10 +0000 Subject: [PATCH 3/8] Update comments --- synapse/handlers/federation.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 7b3834a91..de839ca52 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -836,12 +836,22 @@ class FederationHandler(BaseHandler): # # We do this by filtering all the extremities and seeing if any remain. # Given we don't have the extremity events themselves, we need to - # actually check the events that references them. + # actually check the events that reference them. + # + # *Note*: the spec wants us to keep backfilling until we reach the start + # of the room in case we are allowed to see some of the history. However + # in practice that causes more issues than its worth, as a) its + # relatively rare for there to be any visible history and b) even when + # there is its often sufficiently long ago that clients would stop + # attempting to paginate before backfill reached the visible history. + # + # TODO: If we do do a backfill the we should filter the extremities to + # only include those that point to visible portions of history. # - # TODO: Filter the list of extremities if we do do a backfill # TODO: Correctly handle the case where we are allowed to see the # forward event but not the extremity, e.g. in the case of initial - # join of the server. + # join of the server where we are allowed to see the join event but + # not anything before it. forward_events = yield self.store.get_forward_events( list(extremities), From 71ef5fc41164665a59f7ff3690ef54256f2a5cea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Feb 2019 13:22:23 +0000 Subject: [PATCH 4/8] Update newsfile to have a full stop --- changelog.d/4699.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/4699.bugfix b/changelog.d/4699.bugfix index 991121b5b..8cd8340cc 100644 --- a/changelog.d/4699.bugfix +++ b/changelog.d/4699.bugfix @@ -1 +1 @@ -Fix attempting to paginate in rooms where server cannot see any events +Fix attempting to paginate in rooms where server cannot see any events. From 8b63fe4c261a4451024dba7506b5872686c22282 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 11:56:03 +0000 Subject: [PATCH 5/8] s/get_forward_events/get_successor_events/ --- synapse/handlers/federation.py | 2 +- synapse/storage/event_federation.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 0425380e5..32d7ba6cf 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -881,7 +881,7 @@ class FederationHandler(BaseHandler): # join of the server where we are allowed to see the join event but # not anything before it. - forward_events = yield self.store.get_forward_events( + forward_events = yield self.store.get_successor_events( list(extremities), ) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 830b171ca..a8d90456e 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -443,7 +443,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, return event_results @defer.inlineCallbacks - def get_forward_events(self, event_ids): + def get_successor_events(self, event_ids): """Fetch all events that have the given events as a prev event Args: @@ -457,7 +457,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, column="prev_event_id", iterable=event_ids, retcols=("event_id",), - desc="get_forward_events" + desc="get_successor_events" ) defer.returnValue([ From d1523aed6bebf00a4643d72eea611b029db65f08 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 14:34:34 +0000 Subject: [PATCH 6/8] Only check history visibility when filtering When filtering events to send to server we check more than just history visibility. However when deciding whether to backfill or not we only care about the history visibility. --- synapse/handlers/federation.py | 4 +- synapse/visibility.py | 77 +++++++++++++++++++--------------- 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 32d7ba6cf..bf2989aef 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -891,9 +891,11 @@ class FederationHandler(BaseHandler): get_prev_content=False, ) + # We set `check_history_visibility_only` as we might otherwise get false + # positives from users having been erased. filtered_extremities = yield filter_events_for_server( self.store, self.server_name, list(extremities_events.values()), - redact=False, + redact=False, check_history_visibility_only=True, ) if not filtered_extremities: diff --git a/synapse/visibility.py b/synapse/visibility.py index f6dcc9663..8b9c7180b 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -216,7 +216,8 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks -def filter_events_for_server(store, server_name, events, redact=True): +def filter_events_for_server(store, server_name, events, redact=True, + check_history_visibility_only=False): """Filter a list of events based on whether given server is allowed to see them. @@ -226,34 +227,25 @@ def filter_events_for_server(store, server_name, events, redact=True): events (iterable[FrozenEvent]) redact (bool): Whether to return a redacted version of the event, or to filter them out entirely. + check_history_visibility_only (bool): Whether to only check the + history visibility, rather than things like if the sender has been + erased. This is used e.g. during pagination to decide whether to + backfill or not. Returns Deferred[list[FrozenEvent]] """ - # Whatever else we do, we need to check for senders which have requested - # erasure of their data. - erased_senders = yield store.are_users_erased( - (e.sender for e in events), - ) - def redact_disallowed(event, state): - # if the sender has been gdpr17ed, always return a redacted - # copy of the event. + def is_sender_erased(event, erased_senders): if erased_senders[event.sender]: logger.info( "Sender of %s has been erased, redacting", event.event_id, ) - if redact: - return prune_event(event) - else: - return None - - # state will be None if we decided we didn't need to filter by - # room membership. - if not state: - return event + return True + return False + def check_event_is_visible(event, state): history = state.get((EventTypes.RoomHistoryVisibility, ''), None) if history: visibility = history.content.get("history_visibility", "shared") @@ -275,18 +267,15 @@ def filter_events_for_server(store, server_name, events, redact=True): memtype = ev.membership if memtype == Membership.JOIN: - return event + return True elif memtype == Membership.INVITE: if visibility == "invited": - return event + return True else: # server has no users in the room: redact - if redact: - return prune_event(event) - else: - return None + return False - return event + return True # Next lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't @@ -315,16 +304,31 @@ def filter_events_for_server(store, server_name, events, redact=True): for e in itervalues(event_map) ) + if not check_history_visibility_only: + erased_senders = yield store.are_users_erased( + (e.sender for e in events), + ) + else: + # We don't want to check whether users are erased, which is equivalent + # to no users having been erased. + erased_senders = {} + if all_open: # all the history_visibility state affecting these events is open, so # we don't need to filter by membership state. We *do* need to check # for user erasure, though. if erased_senders: - events = [ - redact_disallowed(e, None) - for e in events - ] + to_return = [] + for e in events: + if not is_sender_erased(e, erased_senders): + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + defer.returnValue(to_return) + + # If there are no erased users then we can just return the given list + # of events without having to copy it. defer.returnValue(events) # Ok, so we're dealing with events that have non-trivial visibility @@ -380,8 +384,13 @@ def filter_events_for_server(store, server_name, events, redact=True): for e_id, key_to_eid in iteritems(event_to_state_ids) } - to_return = ( - redact_disallowed(e, event_to_state[e.event_id]) - for e in events - ) - defer.returnValue([e for e in to_return if e is not None]) + to_return = [] + for e in events: + erased = is_sender_erased(e, erased_senders) + visible = check_event_is_visible(e, event_to_state[e.event_id]) + if visible and not erased: + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) From 0d2d046709270797f46a65672a5702b194dabef9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 16:04:04 +0000 Subject: [PATCH 7/8] Fix missing null guard --- synapse/visibility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 8b9c7180b..e9dc73c25 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -237,7 +237,7 @@ def filter_events_for_server(store, server_name, events, redact=True, """ def is_sender_erased(event, erased_senders): - if erased_senders[event.sender]: + if erased_senders and erased_senders[event.sender]: logger.info( "Sender of %s has been erased, redacting", event.event_id, From aa06d26ae05585ddfb5e33a2dd521c9aa27b6cfa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 5 Mar 2019 09:16:35 +0000 Subject: [PATCH 8/8] clarify comments --- changelog.d/4699.bugfix | 2 +- synapse/handlers/federation.py | 19 +++++++++++-------- synapse/visibility.py | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/changelog.d/4699.bugfix b/changelog.d/4699.bugfix index 8cd8340cc..1d7f3174e 100644 --- a/changelog.d/4699.bugfix +++ b/changelog.d/4699.bugfix @@ -1 +1 @@ -Fix attempting to paginate in rooms where server cannot see any events. +Fix attempting to paginate in rooms where server cannot see any events, to avoid unnecessarily pulling in lots of redacted events. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index bf2989aef..72b63d64d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -862,9 +862,9 @@ class FederationHandler(BaseHandler): # as otherwise we'll just spend a lot of resources to get redacted # events. # - # We do this by filtering all the extremities and seeing if any remain. - # Given we don't have the extremity events themselves, we need to - # actually check the events that reference them. + # We do this by filtering all the backwards extremities and seeing if + # any remain. Given we don't have the extremity events themselves, we + # need to actually check the events that reference them. # # *Note*: the spec wants us to keep backfilling until we reach the start # of the room in case we are allowed to see some of the history. However @@ -873,13 +873,16 @@ class FederationHandler(BaseHandler): # there is its often sufficiently long ago that clients would stop # attempting to paginate before backfill reached the visible history. # - # TODO: If we do do a backfill the we should filter the extremities to - # only include those that point to visible portions of history. + # TODO: If we do do a backfill then we should filter the backwards + # extremities to only include those that point to visible portions of + # history. # # TODO: Correctly handle the case where we are allowed to see the - # forward event but not the extremity, e.g. in the case of initial - # join of the server where we are allowed to see the join event but - # not anything before it. + # forward event but not the backward extremity, e.g. in the case of + # initial join of the server where we are allowed to see the join + # event but not anything before it. This would require looking at the + # state *before* the event, ignoring the special casing certain event + # types have. forward_events = yield self.store.get_successor_events( list(extremities), diff --git a/synapse/visibility.py b/synapse/visibility.py index e9dc73c25..efec21673 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -277,7 +277,7 @@ def filter_events_for_server(store, server_name, events, redact=True, return True - # Next lets check to see if all the events have a history visibility + # Lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). event_to_state_ids = yield store.get_state_ids_for_events(