When restarting a partial join resync, prioritise the server which actioned a partial join (#14126)

This commit is contained in:
David Robertson 2022-10-18 12:33:18 +01:00 committed by GitHub
parent 4af93bd7f6
commit c3a4780080
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 95 additions and 31 deletions

View file

@ -632,6 +632,7 @@ class FederationHandler:
room_id=room_id,
servers=ret.servers_in_room,
device_lists_stream_id=self.store.get_device_stream_token(),
joined_via=origin,
)
try:
@ -1615,13 +1616,13 @@ class FederationHandler:
"""Resumes resyncing of all partial-state rooms after a restart."""
assert not self.config.worker.worker_app
partial_state_rooms = await self.store.get_partial_state_rooms_and_servers()
for room_id, servers_in_room in partial_state_rooms.items():
partial_state_rooms = await self.store.get_partial_state_room_resync_info()
for room_id, resync_info in partial_state_rooms.items():
run_as_background_process(
desc="sync_partial_state_room",
func=self._sync_partial_state_room,
initial_destination=None,
other_destinations=servers_in_room,
initial_destination=resync_info.joined_via,
other_destinations=resync_info.servers_in_room,
room_id=room_id,
)
@ -1650,28 +1651,12 @@ class FederationHandler:
# really leave, that might mean we have difficulty getting the room state over
# federation.
# https://github.com/matrix-org/synapse/issues/12802
#
# TODO(faster_joins): we need some way of prioritising which homeservers in
# `other_destinations` to try first, otherwise we'll spend ages trying dead
# homeservers for large rooms.
# https://github.com/matrix-org/synapse/issues/12999
if initial_destination is None and len(other_destinations) == 0:
raise ValueError(
f"Cannot resync state of {room_id}: no destinations provided"
)
# Make an infinite iterator of destinations to try. Once we find a working
# destination, we'll stick with it until it flakes.
destinations: Collection[str]
if initial_destination is not None:
# Move `initial_destination` to the front of the list.
destinations = list(other_destinations)
if initial_destination in destinations:
destinations.remove(initial_destination)
destinations = [initial_destination] + destinations
else:
destinations = other_destinations
destinations = _prioritise_destinations_for_partial_state_resync(
initial_destination, other_destinations, room_id
)
destination_iter = itertools.cycle(destinations)
# `destination` is the current remote homeserver we're pulling from.
@ -1769,3 +1754,29 @@ class FederationHandler:
room_id,
destination,
)
def _prioritise_destinations_for_partial_state_resync(
initial_destination: Optional[str],
other_destinations: Collection[str],
room_id: str,
) -> Collection[str]:
"""Work out the order in which we should ask servers to resync events.
If an `initial_destination` is given, it takes top priority. Otherwise
all servers are treated equally.
:raises ValueError: if no destination is provided at all.
"""
if initial_destination is None and len(other_destinations) == 0:
raise ValueError(f"Cannot resync state of {room_id}: no destinations provided")
if initial_destination is None:
return other_destinations
# Move `initial_destination` to the front of the list.
destinations = list(other_destinations)
if initial_destination in destinations:
destinations.remove(initial_destination)
destinations = [initial_destination] + destinations
return destinations