Catch-up after Federation Outage (bonus): Catch-up on Synapse Startup (#8322)

Signed-off-by: Olivier Wilkinson (reivilibre) <olivier@librepush.net>
Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Fix _set_destination_retry_timings

This came about because the code assumed that retry_interval
could not be NULL — which has been challenged by catch-up.
This commit is contained in:
reivilibre 2020-09-18 14:59:13 +01:00 committed by GitHub
parent 8a4a4186de
commit 36efbcaf51
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 218 additions and 5 deletions

View file

@ -218,6 +218,7 @@ class TransactionStore(SQLBaseStore):
retry_interval = EXCLUDED.retry_interval
WHERE
EXCLUDED.retry_interval = 0
OR destinations.retry_interval IS NULL
OR destinations.retry_interval < EXCLUDED.retry_interval
"""
@ -249,7 +250,11 @@ class TransactionStore(SQLBaseStore):
"retry_interval": retry_interval,
},
)
elif retry_interval == 0 or prev_row["retry_interval"] < retry_interval:
elif (
retry_interval == 0
or prev_row["retry_interval"] is None
or prev_row["retry_interval"] < retry_interval
):
self.db_pool.simple_update_one_txn(
txn,
"destinations",
@ -397,7 +402,7 @@ class TransactionStore(SQLBaseStore):
@staticmethod
def _get_catch_up_room_event_ids_txn(
txn, destination: str, last_successful_stream_ordering: int,
txn: LoggingTransaction, destination: str, last_successful_stream_ordering: int,
) -> List[str]:
q = """
SELECT event_id FROM destination_rooms
@ -412,3 +417,60 @@ class TransactionStore(SQLBaseStore):
)
event_ids = [row[0] for row in txn]
return event_ids
async def get_catch_up_outstanding_destinations(
self, after_destination: Optional[str]
) -> List[str]:
"""
Gets at most 25 destinations which have outstanding PDUs to be caught up,
and are not being backed off from
Args:
after_destination:
If provided, all destinations must be lexicographically greater
than this one.
Returns:
list of up to 25 destinations with outstanding catch-up.
These are the lexicographically first destinations which are
lexicographically greater than after_destination (if provided).
"""
time = self.hs.get_clock().time_msec()
return await self.db_pool.runInteraction(
"get_catch_up_outstanding_destinations",
self._get_catch_up_outstanding_destinations_txn,
time,
after_destination,
)
@staticmethod
def _get_catch_up_outstanding_destinations_txn(
txn: LoggingTransaction, now_time_ms: int, after_destination: Optional[str]
) -> List[str]:
q = """
SELECT destination FROM destinations
WHERE destination IN (
SELECT destination FROM destination_rooms
WHERE destination_rooms.stream_ordering >
destinations.last_successful_stream_ordering
)
AND destination > ?
AND (
retry_last_ts IS NULL OR
retry_last_ts + retry_interval < ?
)
ORDER BY destination
LIMIT 25
"""
txn.execute(
q,
(
# everything is lexicographically greater than "" so this gives
# us the first batch of up to 25.
after_destination or "",
now_time_ms,
),
)
destinations = [row[0] for row in txn]
return destinations