mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-05-02 16:34:49 -04:00
Keep track when we try and fail to process a pulled event (#13589)
We can follow-up this PR with: 1. Only try to backfill from an event if we haven't tried recently -> https://github.com/matrix-org/synapse/issues/13622 1. When we decide to backfill that event again, process it in the background so it doesn't block and make `/messages` slow when we know it will probably fail again -> https://github.com/matrix-org/synapse/issues/13623 1. Generally track failures everywhere we try and fail to pull an event over federation -> https://github.com/matrix-org/synapse/issues/13700 Fix https://github.com/matrix-org/synapse/issues/13621 Part of https://github.com/matrix-org/synapse/issues/13356 Mentioned in [internal doc](https://docs.google.com/document/d/1lvUoVfYUiy6UaHB6Rb4HicjaJAU40-APue9Q4vzuW3c/edit#bookmark=id.qv7cj51sv9i5)
This commit is contained in:
parent
666ae87729
commit
957e3d74fc
7 changed files with 329 additions and 9 deletions
|
@ -1294,6 +1294,51 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
|
|||
|
||||
return event_id_results
|
||||
|
||||
@trace
|
||||
async def record_event_failed_pull_attempt(
|
||||
self, room_id: str, event_id: str, cause: str
|
||||
) -> None:
|
||||
"""
|
||||
Record when we fail to pull an event over federation.
|
||||
|
||||
This information allows us to be more intelligent when we decide to
|
||||
retry (we don't need to fail over and over) and we can process that
|
||||
event in the background so we don't block on it each time.
|
||||
|
||||
Args:
|
||||
room_id: The room where the event failed to pull from
|
||||
event_id: The event that failed to be fetched or processed
|
||||
cause: The error message or reason that we failed to pull the event
|
||||
"""
|
||||
await self.db_pool.runInteraction(
|
||||
"record_event_failed_pull_attempt",
|
||||
self._record_event_failed_pull_attempt_upsert_txn,
|
||||
room_id,
|
||||
event_id,
|
||||
cause,
|
||||
db_autocommit=True, # Safe as it's a single upsert
|
||||
)
|
||||
|
||||
def _record_event_failed_pull_attempt_upsert_txn(
|
||||
self,
|
||||
txn: LoggingTransaction,
|
||||
room_id: str,
|
||||
event_id: str,
|
||||
cause: str,
|
||||
) -> None:
|
||||
sql = """
|
||||
INSERT INTO event_failed_pull_attempts (
|
||||
room_id, event_id, num_attempts, last_attempt_ts, last_cause
|
||||
)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT (room_id, event_id) DO UPDATE SET
|
||||
num_attempts=event_failed_pull_attempts.num_attempts + 1,
|
||||
last_attempt_ts=EXCLUDED.last_attempt_ts,
|
||||
last_cause=EXCLUDED.last_cause;
|
||||
"""
|
||||
|
||||
txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec(), cause))
|
||||
|
||||
async def get_missing_events(
|
||||
self,
|
||||
room_id: str,
|
||||
|
|
|
@ -2435,17 +2435,31 @@ class PersistEventsStore:
|
|||
"DELETE FROM event_backward_extremities"
|
||||
" WHERE event_id = ? AND room_id = ?"
|
||||
)
|
||||
backward_extremity_tuples_to_remove = [
|
||||
(ev.event_id, ev.room_id)
|
||||
for ev in events
|
||||
if not ev.internal_metadata.is_outlier()
|
||||
# If we encountered an event with no prev_events, then we might
|
||||
# as well remove it now because it won't ever have anything else
|
||||
# to backfill from.
|
||||
or len(ev.prev_event_ids()) == 0
|
||||
]
|
||||
txn.execute_batch(
|
||||
query,
|
||||
[
|
||||
(ev.event_id, ev.room_id)
|
||||
for ev in events
|
||||
if not ev.internal_metadata.is_outlier()
|
||||
# If we encountered an event with no prev_events, then we might
|
||||
# as well remove it now because it won't ever have anything else
|
||||
# to backfill from.
|
||||
or len(ev.prev_event_ids()) == 0
|
||||
],
|
||||
backward_extremity_tuples_to_remove,
|
||||
)
|
||||
|
||||
# Clear out the failed backfill attempts after we successfully pulled
|
||||
# the event. Since we no longer need these events as backward
|
||||
# extremities, it also means that they won't be backfilled from again so
|
||||
# we no longer need to store the backfill attempts around it.
|
||||
query = """
|
||||
DELETE FROM event_failed_pull_attempts
|
||||
WHERE event_id = ? and room_id = ?
|
||||
"""
|
||||
txn.execute_batch(
|
||||
query,
|
||||
backward_extremity_tuples_to_remove,
|
||||
)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue