Improve get auth chain difference algorithm. (#7095)

It was originally implemented by pulling the full auth chain of all state sets out of the database and doing set comparison. However, that can take a lot work if the state and auth chains are large. Instead, lets try and fetch the auth chains at the same time and calculate the difference on the fly, allowing us to bail early if all the auth chains converge. Assuming that the auth chains do converge more often than not, this should improve performance. Hopefully.
2025-12-15 19:08:43 -05:00 · 2020-03-18 16:46:41 +00:00 · 2020-03-18 16:46:41 +00:00 · 4a17a647a9
commit 4a17a647a9
parent 88b41986db
6 changed files with 310 additions and 71 deletions
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@ -13,19 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from twisted.internet import defer
-
 import tests.unittest
 import tests.utils


-class EventFederationWorkerStoreTestCase(tests.unittest.TestCase):
-    @defer.inlineCallbacks
-    def setUp(self):
-        hs = yield tests.utils.setup_test_homeserver(self.addCleanup)
+class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
+    def prepare(self, reactor, clock, hs):
        self.store = hs.get_datastore()

-    @defer.inlineCallbacks
    def test_get_prev_events_for_room(self):
        room_id = "@ROOM:local"

@ -61,15 +56,14 @@ class EventFederationWorkerStoreTestCase(tests.unittest.TestCase):
            )

        for i in range(0, 20):
-            yield self.store.db.runInteraction("insert", insert_event, i)
+            self.get_success(self.store.db.runInteraction("insert", insert_event, i))

        # this should get the last ten
-        r = yield self.store.get_prev_events_for_room(room_id)
+        r = self.get_success(self.store.get_prev_events_for_room(room_id))
        self.assertEqual(10, len(r))
        for i in range(0, 10):
            self.assertEqual("$event_%i:local" % (19 - i), r[i])

-    @defer.inlineCallbacks
    def test_get_rooms_with_many_extremities(self):
        room1 = "#room1"
        room2 = "#room2"
@ -86,25 +80,154 @@ class EventFederationWorkerStoreTestCase(tests.unittest.TestCase):
            )

        for i in range(0, 20):
-            yield self.store.db.runInteraction("insert", insert_event, i, room1)
-            yield self.store.db.runInteraction("insert", insert_event, i, room2)
-            yield self.store.db.runInteraction("insert", insert_event, i, room3)
+            self.get_success(
+                self.store.db.runInteraction("insert", insert_event, i, room1)
+            )
+            self.get_success(
+                self.store.db.runInteraction("insert", insert_event, i, room2)
+            )
+            self.get_success(
+                self.store.db.runInteraction("insert", insert_event, i, room3)
+            )

        # Test simple case
-        r = yield self.store.get_rooms_with_many_extremities(5, 5, [])
+        r = self.get_success(self.store.get_rooms_with_many_extremities(5, 5, []))
        self.assertEqual(len(r), 3)

        # Does filter work?

-        r = yield self.store.get_rooms_with_many_extremities(5, 5, [room1])
+        r = self.get_success(self.store.get_rooms_with_many_extremities(5, 5, [room1]))
        self.assertTrue(room2 in r)
        self.assertTrue(room3 in r)
        self.assertEqual(len(r), 2)

-        r = yield self.store.get_rooms_with_many_extremities(5, 5, [room1, room2])
+        r = self.get_success(
+            self.store.get_rooms_with_many_extremities(5, 5, [room1, room2])
+        )
        self.assertEqual(r, [room3])

        # Does filter and limit work?

-        r = yield self.store.get_rooms_with_many_extremities(5, 1, [room1])
+        r = self.get_success(self.store.get_rooms_with_many_extremities(5, 1, [room1]))
        self.assertTrue(r == [room2] or r == [room3])
+
+    def test_auth_difference(self):
+        room_id = "@ROOM:local"
+
+        # The silly auth graph we use to test the auth difference algorithm,
+        # where the top are the most recent events.
+        #
+        #   A   B
+        #    \ /
+        #  D  E
+        #  \  |
+        #   ` F   C
+        #     |  /|
+        #     G ´ |
+        #     | \ |
+        #     H   I
+        #     |   |
+        #     K   J
+
+        auth_graph = {
+            "a": ["e"],
+            "b": ["e"],
+            "c": ["g", "i"],
+            "d": ["f"],
+            "e": ["f"],
+            "f": ["g"],
+            "g": ["h", "i"],
+            "h": ["k"],
+            "i": ["j"],
+            "k": [],
+            "j": [],
+        }
+
+        depth_map = {
+            "a": 7,
+            "b": 7,
+            "c": 4,
+            "d": 6,
+            "e": 6,
+            "f": 5,
+            "g": 3,
+            "h": 2,
+            "i": 2,
+            "k": 1,
+            "j": 1,
+        }
+
+        # We rudely fiddle with the appropriate tables directly, as that's much
+        # easier than constructing events properly.
+
+        def insert_event(txn, event_id, stream_ordering):
+
+            depth = depth_map[event_id]
+
+            self.store.db.simple_insert_txn(
+                txn,
+                table="events",
+                values={
+                    "event_id": event_id,
+                    "room_id": room_id,
+                    "depth": depth,
+                    "topological_ordering": depth,
+                    "type": "m.test",
+                    "processed": True,
+                    "outlier": False,
+                    "stream_ordering": stream_ordering,
+                },
+            )
+
+            self.store.db.simple_insert_many_txn(
+                txn,
+                table="event_auth",
+                values=[
+                    {"event_id": event_id, "room_id": room_id, "auth_id": a}
+                    for a in auth_graph[event_id]
+                ],
+            )
+
+        next_stream_ordering = 0
+        for event_id in auth_graph:
+            next_stream_ordering += 1
+            self.get_success(
+                self.store.db.runInteraction(
+                    "insert", insert_event, event_id, next_stream_ordering
+                )
+            )
+
+        # Now actually test that various combinations give the right result:
+
+        difference = self.get_success(
+            self.store.get_auth_chain_difference([{"a"}, {"b"}])
+        )
+        self.assertSetEqual(difference, {"a", "b"})
+
+        difference = self.get_success(
+            self.store.get_auth_chain_difference([{"a"}, {"b"}, {"c"}])
+        )
+        self.assertSetEqual(difference, {"a", "b", "c", "e", "f"})
+
+        difference = self.get_success(
+            self.store.get_auth_chain_difference([{"a", "c"}, {"b"}])
+        )
+        self.assertSetEqual(difference, {"a", "b", "c"})
+
+        difference = self.get_success(
+            self.store.get_auth_chain_difference([{"a"}, {"b"}, {"d"}])
+        )
+        self.assertSetEqual(difference, {"a", "b", "d", "e"})
+
+        difference = self.get_success(
+            self.store.get_auth_chain_difference([{"a"}, {"b"}, {"c"}, {"d"}])
+        )
+        self.assertSetEqual(difference, {"a", "b", "c", "d", "e", "f"})
+
+        difference = self.get_success(
+            self.store.get_auth_chain_difference([{"a"}, {"b"}, {"e"}])
+        )
+        self.assertSetEqual(difference, {"a", "b"})
+
+        difference = self.get_success(self.store.get_auth_chain_difference([{"a"}]))
+        self.assertSetEqual(difference, set())