Merge remote-tracking branch 'upstream/release-v1.70'

This commit is contained in:
Tulir Asokan 2022-10-19 17:24:11 +03:00
commit c3b3895da4
142 changed files with 4896 additions and 2015 deletions

View file

@ -45,6 +45,7 @@ from synapse.api.errors import (
Codes,
FederationDeniedError,
FederationError,
FederationPullAttemptBackoffError,
HttpResponseException,
LimitExceededError,
NotFoundError,
@ -631,6 +632,7 @@ class FederationHandler:
room_id=room_id,
servers=ret.servers_in_room,
device_lists_stream_id=self.store.get_device_stream_token(),
joined_via=origin,
)
try:
@ -781,15 +783,27 @@ class FederationHandler:
# Send the signed event back to the room, and potentially receive some
# further information about the room in the form of partial state events
stripped_room_state = await self.federation_client.send_knock(
target_hosts, event
)
knock_response = await self.federation_client.send_knock(target_hosts, event)
# Store any stripped room state events in the "unsigned" key of the event.
# This is a bit of a hack and is cribbing off of invites. Basically we
# store the room state here and retrieve it again when this event appears
# in the invitee's sync stream. It is stripped out for all other local users.
event.unsigned["knock_room_state"] = stripped_room_state["knock_state_events"]
stripped_room_state = (
knock_response.get("knock_room_state")
# Since v1.37, Synapse incorrectly used "knock_state_events" for this field.
# Thus, we also check for a 'knock_state_events' to support old instances.
# See https://github.com/matrix-org/synapse/issues/14088.
or knock_response.get("knock_state_events")
)
if stripped_room_state is None:
raise KeyError(
"Missing 'knock_room_state' (or legacy 'knock_state_events') field in "
"send_knock response"
)
event.unsigned["knock_room_state"] = stripped_room_state
context = EventContext.for_outlier(self._storage_controllers)
stream_id = await self._federation_event_handler.persist_events_and_notify(
@ -928,7 +942,7 @@ class FederationHandler:
# The remote hasn't signed it yet, obviously. We'll do the full checks
# when we get the event back in `on_send_join_request`
await self._event_auth_handler.check_auth_rules_from_context(event, context)
await self._event_auth_handler.check_auth_rules_from_context(event)
return event
async def on_invite_request(
@ -1109,7 +1123,7 @@ class FederationHandler:
try:
# The remote hasn't signed it yet, obviously. We'll do the full checks
# when we get the event back in `on_send_leave_request`
await self._event_auth_handler.check_auth_rules_from_context(event, context)
await self._event_auth_handler.check_auth_rules_from_context(event)
except AuthError as e:
logger.warning("Failed to create new leave %r because %s", event, e)
raise e
@ -1168,7 +1182,7 @@ class FederationHandler:
try:
# The remote hasn't signed it yet, obviously. We'll do the full checks
# when we get the event back in `on_send_knock_request`
await self._event_auth_handler.check_auth_rules_from_context(event, context)
await self._event_auth_handler.check_auth_rules_from_context(event)
except AuthError as e:
logger.warning("Failed to create new knock %r because %s", event, e)
raise e
@ -1334,9 +1348,7 @@ class FederationHandler:
try:
validate_event_for_room_version(event)
await self._event_auth_handler.check_auth_rules_from_context(
event, context
)
await self._event_auth_handler.check_auth_rules_from_context(event)
except AuthError as e:
logger.warning("Denying new third party invite %r because %s", event, e)
raise e
@ -1386,7 +1398,7 @@ class FederationHandler:
try:
validate_event_for_room_version(event)
await self._event_auth_handler.check_auth_rules_from_context(event, context)
await self._event_auth_handler.check_auth_rules_from_context(event)
except AuthError as e:
logger.warning("Denying third party invite %r because %s", event, e)
raise e
@ -1602,13 +1614,13 @@ class FederationHandler:
"""Resumes resyncing of all partial-state rooms after a restart."""
assert not self.config.worker.worker_app
partial_state_rooms = await self.store.get_partial_state_rooms_and_servers()
for room_id, servers_in_room in partial_state_rooms.items():
partial_state_rooms = await self.store.get_partial_state_room_resync_info()
for room_id, resync_info in partial_state_rooms.items():
run_as_background_process(
desc="sync_partial_state_room",
func=self._sync_partial_state_room,
initial_destination=None,
other_destinations=servers_in_room,
initial_destination=resync_info.joined_via,
other_destinations=resync_info.servers_in_room,
room_id=room_id,
)
@ -1637,28 +1649,12 @@ class FederationHandler:
# really leave, that might mean we have difficulty getting the room state over
# federation.
# https://github.com/matrix-org/synapse/issues/12802
#
# TODO(faster_joins): we need some way of prioritising which homeservers in
# `other_destinations` to try first, otherwise we'll spend ages trying dead
# homeservers for large rooms.
# https://github.com/matrix-org/synapse/issues/12999
if initial_destination is None and len(other_destinations) == 0:
raise ValueError(
f"Cannot resync state of {room_id}: no destinations provided"
)
# Make an infinite iterator of destinations to try. Once we find a working
# destination, we'll stick with it until it flakes.
destinations: Collection[str]
if initial_destination is not None:
# Move `initial_destination` to the front of the list.
destinations = list(other_destinations)
if initial_destination in destinations:
destinations.remove(initial_destination)
destinations = [initial_destination] + destinations
else:
destinations = other_destinations
destinations = _prioritise_destinations_for_partial_state_resync(
initial_destination, other_destinations, room_id
)
destination_iter = itertools.cycle(destinations)
# `destination` is the current remote homeserver we're pulling from.
@ -1708,7 +1704,22 @@ class FederationHandler:
destination, event
)
break
except FederationPullAttemptBackoffError as exc:
# Log a warning about why we failed to process the event (the error message
# for `FederationPullAttemptBackoffError` is pretty good)
logger.warning("_sync_partial_state_room: %s", exc)
# We do not record a failed pull attempt when we backoff fetching a missing
# `prev_event` because not being able to fetch the `prev_events` just means
# we won't be able to de-outlier the pulled event. But we can still use an
# `outlier` in the state/auth chain for another event. So we shouldn't stop
# a downstream event from trying to pull it.
#
# This avoids a cascade of backoff for all events in the DAG downstream from
# one event backoff upstream.
except FederationError as e:
# TODO: We should `record_event_failed_pull_attempt` here,
# see https://github.com/matrix-org/synapse/issues/13700
if attempt == len(destinations) - 1:
# We have tried every remote server for this event. Give up.
# TODO(faster_joins) giving up isn't the right thing to do
@ -1741,3 +1752,29 @@ class FederationHandler:
room_id,
destination,
)
def _prioritise_destinations_for_partial_state_resync(
initial_destination: Optional[str],
other_destinations: Collection[str],
room_id: str,
) -> Collection[str]:
"""Work out the order in which we should ask servers to resync events.
If an `initial_destination` is given, it takes top priority. Otherwise
all servers are treated equally.
:raises ValueError: if no destination is provided at all.
"""
if initial_destination is None and len(other_destinations) == 0:
raise ValueError(f"Cannot resync state of {room_id}: no destinations provided")
if initial_destination is None:
return other_destinations
# Move `initial_destination` to the front of the list.
destinations = list(other_destinations)
if initial_destination in destinations:
destinations.remove(initial_destination)
destinations = [initial_destination] + destinations
return destinations