Refactor MSC3030 /timestamp_to_event to move away from our snowflake pull from destination pattern (#14096)

1. `federation_client.timestamp_to_event(...)` now handles all `destination` looping and uses our generic `_try_destination_list(...)` helper.
 2. Consistently handling `NotRetryingDestination` and `FederationDeniedError` across `get_pdu` , backfill, and the generic `_try_destination_list` which is used for many places we use this pattern.
 3. `get_pdu(...)` now returns `PulledPduInfo` so we know which `destination` we ended up pulling the PDU from
This commit is contained in:
Eric Eastwood 2022-10-26 16:10:55 -05:00 committed by GitHub
parent 0d59ae706a
commit 40fa8294e3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 192 additions and 127 deletions

View file

@ -49,7 +49,6 @@ from synapse.api.constants import (
from synapse.api.errors import (
AuthError,
Codes,
HttpResponseException,
LimitExceededError,
NotFoundError,
StoreError,
@ -60,7 +59,6 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
from synapse.event_auth import validate_event_for_room_version
from synapse.events import EventBase
from synapse.events.utils import copy_and_fixup_power_levels_contents
from synapse.federation.federation_client import InvalidResponseError
from synapse.handlers.relations import BundledAggregations
from synapse.module_api import NOT_SPAM
from synapse.rest.admin._base import assert_user_is_admin
@ -1472,7 +1470,12 @@ class TimestampLookupHandler:
Raises:
SynapseError if unable to find any event locally in the given direction
"""
logger.debug(
"get_event_for_timestamp(room_id=%s, timestamp=%s, direction=%s) Finding closest event...",
room_id,
timestamp,
direction,
)
local_event_id = await self.store.get_event_id_for_timestamp(
room_id, timestamp, direction
)
@ -1524,85 +1527,54 @@ class TimestampLookupHandler:
)
)
# Loop through each homeserver candidate until we get a succesful response
for domain in likely_domains:
# We don't want to ask our own server for information we don't have
if domain == self.server_name:
continue
remote_response = await self.federation_client.timestamp_to_event(
destinations=likely_domains,
room_id=room_id,
timestamp=timestamp,
direction=direction,
)
if remote_response is not None:
logger.debug(
"get_event_for_timestamp: remote_response=%s",
remote_response,
)
try:
remote_response = await self.federation_client.timestamp_to_event(
domain, room_id, timestamp, direction
)
logger.debug(
"get_event_for_timestamp: response from domain(%s)=%s",
domain,
remote_response,
remote_event_id = remote_response.event_id
remote_origin_server_ts = remote_response.origin_server_ts
# Backfill this event so we can get a pagination token for
# it with `/context` and paginate `/messages` from this
# point.
pulled_pdu_info = await self.federation_event_handler.backfill_event_id(
likely_domains, room_id, remote_event_id
)
remote_event = pulled_pdu_info.pdu
# XXX: When we see that the remote server is not trustworthy,
# maybe we should not ask them first in the future.
if remote_origin_server_ts != remote_event.origin_server_ts:
logger.info(
"get_event_for_timestamp: Remote server (%s) claimed that remote_event_id=%s occured at remote_origin_server_ts=%s but that isn't true (actually occured at %s). Their claims are dubious and we should consider not trusting them.",
pulled_pdu_info.pull_origin,
remote_event_id,
remote_origin_server_ts,
remote_event.origin_server_ts,
)
remote_event_id = remote_response.event_id
remote_origin_server_ts = remote_response.origin_server_ts
# Backfill this event so we can get a pagination token for
# it with `/context` and paginate `/messages` from this
# point.
#
# TODO: The requested timestamp may lie in a part of the
# event graph that the remote server *also* didn't have,
# in which case they will have returned another event
# which may be nowhere near the requested timestamp. In
# the future, we may need to reconcile that gap and ask
# other homeservers, and/or extend `/timestamp_to_event`
# to return events on *both* sides of the timestamp to
# help reconcile the gap faster.
remote_event = (
await self.federation_event_handler.backfill_event_id(
domain, room_id, remote_event_id
)
)
# XXX: When we see that the remote server is not trustworthy,
# maybe we should not ask them first in the future.
if remote_origin_server_ts != remote_event.origin_server_ts:
logger.info(
"get_event_for_timestamp: Remote server (%s) claimed that remote_event_id=%s occured at remote_origin_server_ts=%s but that isn't true (actually occured at %s). Their claims are dubious and we should consider not trusting them.",
domain,
remote_event_id,
remote_origin_server_ts,
remote_event.origin_server_ts,
)
# Only return the remote event if it's closer than the local event
if not local_event or (
abs(remote_event.origin_server_ts - timestamp)
< abs(local_event.origin_server_ts - timestamp)
):
logger.info(
"get_event_for_timestamp: returning remote_event_id=%s (%s) since it's closer to timestamp=%s than local_event=%s (%s)",
remote_event_id,
remote_event.origin_server_ts,
timestamp,
local_event.event_id if local_event else None,
local_event.origin_server_ts if local_event else None,
)
return remote_event_id, remote_origin_server_ts
except (HttpResponseException, InvalidResponseError) as ex:
# Let's not put a high priority on some other homeserver
# failing to respond or giving a random response
logger.debug(
"get_event_for_timestamp: Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s",
domain,
type(ex).__name__,
ex,
ex.args,
)
except Exception:
# But we do want to see some exceptions in our code
logger.warning(
"get_event_for_timestamp: Failed to fetch /timestamp_to_event from %s because of exception",
domain,
exc_info=True,
# Only return the remote event if it's closer than the local event
if not local_event or (
abs(remote_event.origin_server_ts - timestamp)
< abs(local_event.origin_server_ts - timestamp)
):
logger.info(
"get_event_for_timestamp: returning remote_event_id=%s (%s) since it's closer to timestamp=%s than local_event=%s (%s)",
remote_event_id,
remote_event.origin_server_ts,
timestamp,
local_event.event_id if local_event else None,
local_event.origin_server_ts if local_event else None,
)
return remote_event_id, remote_origin_server_ts
# To appease mypy, we have to add both of these conditions to check for
# `None`. We only expect `local_event` to be `None` when