Add ability to wait for replication streams (#7542)

The idea here is that if an instance persists an event via the replication HTTP API it can return before we receive that event over replication, which can lead to races where code assumes that persisting an event immediately updates various caches (e.g. current state of the room).

Most of Synapse doesn't hit such races, so we don't do the waiting automagically, instead we do so where necessary to avoid unnecessary delays. We may decide to change our minds here if it turns out there are a lot of subtle races going on.

People probably want to look at this commit by commit.
This commit is contained in:
Erik Johnston 2020-05-22 14:21:54 +01:00 committed by GitHub
parent 06a02bc1ce
commit 1531b214fc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 304 additions and 112 deletions

View file

@ -126,6 +126,7 @@ class FederationHandler(BaseHandler):
self._server_notices_mxid = hs.config.server_notices_mxid
self.config = hs.config
self.http_client = hs.get_simple_http_client()
self._replication = hs.get_replication_data_handler()
self._send_events_to_master = ReplicationFederationSendEventsRestServlet.make_client(
hs
@ -1221,7 +1222,7 @@ class FederationHandler(BaseHandler):
async def do_invite_join(
self, target_hosts: Iterable[str], room_id: str, joinee: str, content: JsonDict
) -> None:
) -> Tuple[str, int]:
""" Attempts to join the `joinee` to the room `room_id` via the
servers contained in `target_hosts`.
@ -1304,15 +1305,23 @@ class FederationHandler(BaseHandler):
room_id=room_id, room_version=room_version_obj,
)
await self._persist_auth_tree(
max_stream_id = await self._persist_auth_tree(
origin, auth_chain, state, event, room_version_obj
)
# We wait here until this instance has seen the events come down
# replication (if we're using replication) as the below uses caches.
#
# TODO: Currently the events stream is written to from master
await self._replication.wait_for_stream_position(
"master", "events", max_stream_id
)
# Check whether this room is the result of an upgrade of a room we already know
# about. If so, migrate over user information
predecessor = await self.store.get_room_predecessor(room_id)
if not predecessor or not isinstance(predecessor.get("room_id"), str):
return
return event.event_id, max_stream_id
old_room_id = predecessor["room_id"]
logger.debug(
"Found predecessor for %s during remote join: %s", room_id, old_room_id
@ -1325,6 +1334,7 @@ class FederationHandler(BaseHandler):
)
logger.debug("Finished joining %s to %s", joinee, room_id)
return event.event_id, max_stream_id
finally:
room_queue = self.room_queues[room_id]
del self.room_queues[room_id]
@ -1554,7 +1564,7 @@ class FederationHandler(BaseHandler):
async def do_remotely_reject_invite(
self, target_hosts: Iterable[str], room_id: str, user_id: str, content: JsonDict
) -> EventBase:
) -> Tuple[EventBase, int]:
origin, event, room_version = await self._make_and_verify_event(
target_hosts, room_id, user_id, "leave", content=content
)
@ -1574,9 +1584,9 @@ class FederationHandler(BaseHandler):
await self.federation_client.send_leave(target_hosts, event)
context = await self.state_handler.compute_event_context(event)
await self.persist_events_and_notify([(event, context)])
stream_id = await self.persist_events_and_notify([(event, context)])
return event
return event, stream_id
async def _make_and_verify_event(
self,
@ -1888,7 +1898,7 @@ class FederationHandler(BaseHandler):
state: List[EventBase],
event: EventBase,
room_version: RoomVersion,
) -> None:
) -> int:
"""Checks the auth chain is valid (and passes auth checks) for the
state and event. Then persists the auth chain and state atomically.
Persists the event separately. Notifies about the persisted events
@ -1982,7 +1992,7 @@ class FederationHandler(BaseHandler):
event, old_state=state
)
await self.persist_events_and_notify([(event, new_event_context)])
return await self.persist_events_and_notify([(event, new_event_context)])
async def _prep_event(
self,
@ -2835,7 +2845,7 @@ class FederationHandler(BaseHandler):
self,
event_and_contexts: Sequence[Tuple[EventBase, EventContext]],
backfilled: bool = False,
) -> None:
) -> int:
"""Persists events and tells the notifier/pushers about them, if
necessary.
@ -2845,11 +2855,12 @@ class FederationHandler(BaseHandler):
backfilling or not
"""
if self.config.worker_app:
await self._send_events_to_master(
result = await self._send_events_to_master(
store=self.store,
event_and_contexts=event_and_contexts,
backfilled=backfilled,
)
return result["max_stream_id"]
else:
max_stream_id = await self.storage.persistence.persist_events(
event_and_contexts, backfilled=backfilled
@ -2864,6 +2875,8 @@ class FederationHandler(BaseHandler):
for event, _ in event_and_contexts:
await self._notify_persisted_event(event, max_stream_id)
return max_stream_id
async def _notify_persisted_event(
self, event: EventBase, max_stream_id: int
) -> None: