Fix catchup-on-reconnect for the Federation Stream (#7374)

looks like we managed to break this during the refactorathon.
2025-11-22 20:12:26 -05:00 · 2020-05-05 14:15:57 +01:00 · 2020-05-05 14:15:57 +01:00 · d5aa7d93ed
commit d5aa7d93ed
parent 8123b2f909
11 changed files with 158 additions and 48 deletions
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@ -31,6 +31,7 @@ Events are replicated via a separate events stream.

 import logging
 from collections import namedtuple
+from typing import List, Tuple

 from six import iteritems

@ -69,7 +70,11 @@ class FederationRemoteSendQueue(object):

        self.edus = SortedDict()  # stream position -> Edu

+        # stream ID for the next entry into presence_changed/keyed_edu_changed/edus.
        self.pos = 1
+
+        # map from stream ID to the time that stream entry was generated, so that we
+        # can clear out entries after a while
        self.pos_time = SortedDict()

        # EVERYTHING IS SAD. In particular, python only makes new scopes when
@ -250,19 +255,23 @@ class FederationRemoteSendQueue(object):
        self._clear_queue_before_pos(token)

    async def get_replication_rows(
-        self, from_token, to_token, limit, federation_ack=None
-    ):
+        self, instance_name: str, from_token: int, to_token: int, target_row_count: int
+    ) -> Tuple[List[Tuple[int, Tuple]], int, bool]:
        """Get rows to be sent over federation between the two tokens

        Args:
-            from_token (int)
-            to_token(int)
-            limit (int)
-            federation_ack (int): Optional. The position where the worker is
-                explicitly acknowledged it has handled. Allows us to drop
-                data from before that point
+            instance_name: the name of the current process
+            from_token: the previous stream token: the starting point for fetching the
+                updates
+            to_token: the new stream token: the point to get updates up to
+            target_row_count: a target for the number of rows to be returned.
+
+        Returns: a triplet `(updates, new_last_token, limited)`, where:
+           * `updates` is a list of `(token, row)` entries.
+           * `new_last_token` is the new position in stream.
+           * `limited` is whether there are more updates to fetch.
        """
-        # TODO: Handle limit.
+        # TODO: Handle target_row_count.

        # To handle restarts where we wrap around
        if from_token > self.pos:
@ -270,12 +279,7 @@ class FederationRemoteSendQueue(object):

        # list of tuple(int, BaseFederationRow), where the first is the position
        # of the federation stream.
-        rows = []
-
-        # There should be only one reader, so lets delete everything its
-        # acknowledged its seen.
-        if federation_ack:
-            self._clear_queue_before_pos(federation_ack)
+        rows = []  # type: List[Tuple[int, BaseFederationRow]]

        # Fetch changed presence
        i = self.presence_changed.bisect_right(from_token)
@ -332,7 +336,11 @@ class FederationRemoteSendQueue(object):
        # Sort rows based on pos
        rows.sort()

-        return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
+        return (
+            [(pos, (row.TypeId, row.to_data())) for pos, row in rows],
+            to_token,
+            False,
+        )


 class BaseFederationRow(object):