Improve failover logic for MSC3083 restricted rooms. (#10447)

If the federation client receives an M_UNABLE_TO_AUTHORISE_JOIN or
M_UNABLE_TO_GRANT_JOIN response it will attempt another server
before giving up completely.
This commit is contained in:
Patrick Cloke 2021-07-29 07:50:14 -04:00 committed by GitHub
parent f8c87c65eb
commit 3a541a7daa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 4 deletions

View File

@ -0,0 +1 @@
Update support for [MSC3083](https://github.com/matrix-org/matrix-doc/pull/3083) to consider changes in the MSC around which servers can issue join events.

View File

@ -22,6 +22,7 @@ from typing import (
Awaitable,
Callable,
Collection,
Container,
Dict,
Iterable,
List,
@ -513,6 +514,7 @@ class FederationClient(FederationBase):
description: str,
destinations: Iterable[str],
callback: Callable[[str], Awaitable[T]],
failover_errcodes: Optional[Container[str]] = None,
failover_on_unknown_endpoint: bool = False,
) -> T:
"""Try an operation on a series of servers, until it succeeds
@ -533,6 +535,9 @@ class FederationClient(FederationBase):
next server tried. Normally the stacktrace is logged but this is
suppressed if the exception is an InvalidResponseError.
failover_errcodes: Error codes (specific to this endpoint) which should
cause a failover when received as part of an HTTP 400 error.
failover_on_unknown_endpoint: if True, we will try other servers if it looks
like a server doesn't support the endpoint. This is typically useful
if the endpoint in question is new or experimental.
@ -544,6 +549,9 @@ class FederationClient(FederationBase):
SynapseError if the chosen remote server returns a 300/400 code, or
no servers were reachable.
"""
if failover_errcodes is None:
failover_errcodes = ()
for destination in destinations:
if destination == self.server_name:
continue
@ -558,11 +566,17 @@ class FederationClient(FederationBase):
synapse_error = e.to_synapse_error()
failover = False
# Failover on an internal server error, or if the destination
# doesn't implemented the endpoint for some reason.
# Failover should occur:
#
# * On internal server errors.
# * If the destination responds that it cannot complete the request.
# * If the destination doesn't implemented the endpoint for some reason.
if 500 <= e.code < 600:
failover = True
elif e.code == 400 and synapse_error.errcode in failover_errcodes:
failover = True
elif failover_on_unknown_endpoint and self._is_unknown_endpoint(
e, synapse_error
):
@ -678,8 +692,20 @@ class FederationClient(FederationBase):
return destination, ev, room_version
# MSC3083 defines additional error codes for room joins. Unfortunately
# we do not yet know the room version, assume these will only be returned
# by valid room versions.
failover_errcodes = (
(Codes.UNABLE_AUTHORISE_JOIN, Codes.UNABLE_TO_GRANT_JOIN)
if membership == Membership.JOIN
else None
)
return await self._try_destination_list(
"make_" + membership, destinations, send_request
"make_" + membership,
destinations,
send_request,
failover_errcodes=failover_errcodes,
)
async def send_join(
@ -818,7 +844,14 @@ class FederationClient(FederationBase):
origin=destination,
)
# MSC3083 defines additional error codes for room joins.
failover_errcodes = None
if room_version.msc3083_join_rules:
failover_errcodes = (
Codes.UNABLE_AUTHORISE_JOIN,
Codes.UNABLE_TO_GRANT_JOIN,
)
# If the join is being authorised via allow rules, we need to send
# the /send_join back to the same server that was originally used
# with /make_join.
@ -827,7 +860,9 @@ class FederationClient(FederationBase):
get_domain_from_id(pdu.content["join_authorised_via_users_server"])
]
return await self._try_destination_list("send_join", destinations, send_request)
return await self._try_destination_list(
"send_join", destinations, send_request, failover_errcodes=failover_errcodes
)
async def _do_send_join(
self, room_version: RoomVersion, destination: str, pdu: EventBase