From 7c6b2204d143550d81e5bf9612c4e69fe0866b4c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 9 Jun 2022 11:13:03 +0100 Subject: [PATCH] Faster joins: add issue links to the TODOs (#13004) ... to help us keep track of these things --- changelog.d/13004.misc | 1 + synapse/handlers/federation.py | 13 ++++++++++++- synapse/handlers/federation_event.py | 2 ++ synapse/handlers/message.py | 1 + synapse/storage/controllers/persist_events.py | 5 ++++- synapse/storage/controllers/state.py | 3 +++ synapse/storage/databases/main/room.py | 2 ++ synapse/storage/databases/main/state.py | 1 + synapse/storage/state.py | 1 + 9 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 changelog.d/13004.misc diff --git a/changelog.d/13004.misc b/changelog.d/13004.misc new file mode 100644 index 000000000..d8e93d87a --- /dev/null +++ b/changelog.d/13004.misc @@ -0,0 +1 @@ +Faster joins: add issue links to the TODO comments in the code. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 6a143440d..5e1613962 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -545,6 +545,7 @@ class FederationHandler: if ret.partial_state: # TODO(faster_joins): roll this back if we don't manage to start the # background resync (eg process_remote_join fails) + # https://github.com/matrix-org/synapse/issues/12998 await self.store.store_partial_state_room(room_id, ret.servers_in_room) max_stream_id = await self._federation_event_handler.process_remote_join( @@ -1506,14 +1507,17 @@ class FederationHandler: # TODO(faster_joins): do we need to lock to avoid races? What happens if other # worker processes kick off a resync in parallel? Perhaps we should just elect # a single worker to do the resync. + # https://github.com/matrix-org/synapse/issues/12994 # # TODO(faster_joins): what happens if we leave the room during a resync? if we # really leave, that might mean we have difficulty getting the room state over # federation. + # https://github.com/matrix-org/synapse/issues/12802 # # TODO(faster_joins): we need some way of prioritising which homeservers in # `other_destinations` to try first, otherwise we'll spend ages trying dead # homeservers for large rooms. + # https://github.com/matrix-org/synapse/issues/12999 if initial_destination is None and len(other_destinations) == 0: raise ValueError( @@ -1543,9 +1547,11 @@ class FederationHandler: # all the events are updated, so we can update current state and # clear the lazy-loading flag. logger.info("Updating current state for %s", room_id) + # TODO(faster_joins): support workers + # https://github.com/matrix-org/synapse/issues/12994 assert ( self._storage_controllers.persistence is not None - ), "TODO(faster_joins): support for workers" + ), "worker-mode deployments not currently supported here" await self._storage_controllers.persistence.update_current_state( room_id ) @@ -1559,6 +1565,8 @@ class FederationHandler: ) # TODO(faster_joins) update room stats and user directory? + # https://github.com/matrix-org/synapse/issues/12814 + # https://github.com/matrix-org/synapse/issues/12815 return # we raced against more events arriving with partial state. Go round @@ -1566,6 +1574,8 @@ class FederationHandler: # TODO(faster_joins): there is still a race here, whereby incoming events which raced # with us will fail to be persisted after the call to `clear_partial_state_room` due to # having partial state. + # https://github.com/matrix-org/synapse/issues/12988 + # continue events = await self.store.get_events_as_list( @@ -1588,6 +1598,7 @@ class FederationHandler: # indefinitely is also not the right thing to do if we can # reach all homeservers and they all claim they don't have # the state we want. + # https://github.com/matrix-org/synapse/issues/13000 logger.error( "Failed to get state for %s at %s from %s because %s, " "giving up!", diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 87a060835..9889d1cb4 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -532,6 +532,7 @@ class FederationEventHandler: # # TODO(faster_joins): we probably need to be more intelligent, and # exclude partial-state prev_events from consideration + # https://github.com/matrix-org/synapse/issues/13001 logger.warning( "%s still has partial state: can't de-partial-state it yet", event.event_id, @@ -777,6 +778,7 @@ class FederationEventHandler: state_ids = await self._resolve_state_at_missing_prevs(origin, event) # TODO(faster_joins): make sure that _resolve_state_at_missing_prevs does # not return partial state + # https://github.com/matrix-org/synapse/issues/13002 await self._process_received_pdu( origin, event, state_ids=state_ids, backfilled=backfilled diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index f455158a2..294217cc2 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1102,6 +1102,7 @@ class EventCreationHandler: # # TODO(faster_joins): figure out how this works, and make sure that the # old state is complete. + # https://github.com/matrix-org/synapse/issues/13003 metadata = await self.store.get_metadata_for_events(state_event_ids) state_map_for_event: MutableStateMap[str] = {} diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 4caaa8180..4bcb99d06 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -388,10 +388,13 @@ class EventsPersistenceStorageController: # TODO(faster_joins): get a real stream ordering, to make this work correctly # across workers. + # https://github.com/matrix-org/synapse/issues/12994 # # TODO(faster_joins): this can race against event persistence, in which case we # will end up with incorrect state. Perhaps we should make this a job we - # farm out to the event persister, somehow. + # farm out to the event persister thread, somehow. + # https://github.com/matrix-org/synapse/issues/13007 + # stream_id = self.main_store.get_room_max_stream_ordering() await self.persist_events_store.update_current_state(room_id, delta, stream_id) diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 3b4cdb67e..d3a44bc87 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -452,6 +452,9 @@ class StateStorageController: up to date. """ # FIXME(faster_joins): what do we do here? + # https://github.com/matrix-org/synapse/issues/12814 + # https://github.com/matrix-org/synapse/issues/12815 + # https://github.com/matrix-org/synapse/issues/13008 return await self.stores.main.get_partial_current_state_deltas( prev_stream_id, max_stream_id diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 68d4fc2e6..5760d3428 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1112,6 +1112,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): # this can race with incoming events, so we watch out for FK errors. # TODO(faster_joins): this still doesn't completely fix the race, since the persist process # is not atomic. I fear we need an application-level lock. + # https://github.com/matrix-org/synapse/issues/12988 try: await self.db_pool.runInteraction( "clear_partial_state_room", self._clear_partial_state_room_txn, room_id @@ -1119,6 +1120,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): return True except self.db_pool.engine.module.DatabaseError as e: # TODO(faster_joins): how do we distinguish between FK errors and other errors? + # https://github.com/matrix-org/synapse/issues/12988 logger.warning( "Exception while clearing lazy partial-state-room %s, retrying: %s", room_id, diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 5e6efbd0f..9674c4a75 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -435,6 +435,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): ) # TODO(faster_joins): need to do something about workers here + # https://github.com/matrix-org/synapse/issues/12994 txn.call_after(self.is_partial_state_event.invalidate, (event.event_id,)) txn.call_after( self._get_state_group_for_event.prefill, diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 96aaffb53..af3bab2c1 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -546,6 +546,7 @@ class StateFilter: # the sender of a piece of state wasn't actually in the room, then clearly that # state shouldn't have been returned. # We should at least add some tests around this to see what happens. + # https://github.com/matrix-org/synapse/issues/13006 # if we haven't requested membership events, then it depends on the value of # 'include_others'