Make federation catchup send last event from any server. (#9640)

Currently federation catchup will send the last *local* event that we
failed to send to the remote. This can cause issues for large rooms
where lots of servers have sent events while the remote server was down,
as when it comes back up again it'll be flooded with events from various
points in the DAG.

Instead, let's make it so that all the servers send the most recent
events, even if its not theirs. The remote should deduplicate the
events, so there shouldn't be much overhead in doing this.
Alternatively, the servers could only send local events if they were
also extremities and hope that the other server will send the event
over, but that is a bit risky.
This commit is contained in:
Erik Johnston 2021-03-18 15:52:26 +00:00 committed by GitHub
parent 7b06f85c0e
commit dd71eb0f8a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 141 additions and 38 deletions

View file

@ -35,7 +35,7 @@ from twisted.internet import defer
from twisted.internet.abstract import isIPAddress
from twisted.python import failure
from synapse.api.constants import EduTypes, EventTypes, Membership
from synapse.api.constants import EduTypes, EventTypes
from synapse.api.errors import (
AuthError,
Codes,
@ -63,7 +63,7 @@ from synapse.replication.http.federation import (
ReplicationFederationSendEduRestServlet,
ReplicationGetQueryRestServlet,
)
from synapse.types import JsonDict, get_domain_from_id
from synapse.types import JsonDict
from synapse.util import glob_to_regex, json_decoder, unwrapFirstError
from synapse.util.async_helpers import Linearizer, concurrently_execute
from synapse.util.caches.response_cache import ResponseCache
@ -727,27 +727,6 @@ class FederationServer(FederationBase):
if the event was unacceptable for any other reason (eg, too large,
too many prev_events, couldn't find the prev_events)
"""
# check that it's actually being sent from a valid destination to
# workaround bug #1753 in 0.18.5 and 0.18.6
if origin != get_domain_from_id(pdu.sender):
# We continue to accept join events from any server; this is
# necessary for the federation join dance to work correctly.
# (When we join over federation, the "helper" server is
# responsible for sending out the join event, rather than the
# origin. See bug #1893. This is also true for some third party
# invites).
if not (
pdu.type == "m.room.member"
and pdu.content
and pdu.content.get("membership", None)
in (Membership.JOIN, Membership.INVITE)
):
logger.info(
"Discarding PDU %s from invalid origin %s", pdu.event_id, origin
)
return
else:
logger.info("Accepting join PDU %s from %s", pdu.event_id, origin)
# We've already checked that we know the room version by this point
room_version = await self.store.get_room_version(pdu.room_id)