Improve get auth chain difference algorithm. (#7095)

It was originally implemented by pulling the full auth chain of all
state sets out of the database and doing set comparison. However, that
can take a lot work if the state and auth chains are large.

Instead, lets try and fetch the auth chains at the same time and
calculate the difference on the fly, allowing us to bail early if all
the auth chains converge. Assuming that the auth chains do converge more
often than not, this should improve performance. Hopefully.
This commit is contained in:
Erik Johnston 2020-03-18 16:46:41 +00:00 committed by GitHub
parent 88b41986db
commit 4a17a647a9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 310 additions and 71 deletions

View file

@ -13,19 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from twisted.internet import defer
import tests.unittest
import tests.utils
class EventFederationWorkerStoreTestCase(tests.unittest.TestCase):
@defer.inlineCallbacks
def setUp(self):
hs = yield tests.utils.setup_test_homeserver(self.addCleanup)
class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
def prepare(self, reactor, clock, hs):
self.store = hs.get_datastore()
@defer.inlineCallbacks
def test_get_prev_events_for_room(self):
room_id = "@ROOM:local"
@ -61,15 +56,14 @@ class EventFederationWorkerStoreTestCase(tests.unittest.TestCase):
)
for i in range(0, 20):
yield self.store.db.runInteraction("insert", insert_event, i)
self.get_success(self.store.db.runInteraction("insert", insert_event, i))
# this should get the last ten
r = yield self.store.get_prev_events_for_room(room_id)
r = self.get_success(self.store.get_prev_events_for_room(room_id))
self.assertEqual(10, len(r))
for i in range(0, 10):
self.assertEqual("$event_%i:local" % (19 - i), r[i])
@defer.inlineCallbacks
def test_get_rooms_with_many_extremities(self):
room1 = "#room1"
room2 = "#room2"
@ -86,25 +80,154 @@ class EventFederationWorkerStoreTestCase(tests.unittest.TestCase):
)
for i in range(0, 20):
yield self.store.db.runInteraction("insert", insert_event, i, room1)
yield self.store.db.runInteraction("insert", insert_event, i, room2)
yield self.store.db.runInteraction("insert", insert_event, i, room3)
self.get_success(
self.store.db.runInteraction("insert", insert_event, i, room1)
)
self.get_success(
self.store.db.runInteraction("insert", insert_event, i, room2)
)
self.get_success(
self.store.db.runInteraction("insert", insert_event, i, room3)
)
# Test simple case
r = yield self.store.get_rooms_with_many_extremities(5, 5, [])
r = self.get_success(self.store.get_rooms_with_many_extremities(5, 5, []))
self.assertEqual(len(r), 3)
# Does filter work?
r = yield self.store.get_rooms_with_many_extremities(5, 5, [room1])
r = self.get_success(self.store.get_rooms_with_many_extremities(5, 5, [room1]))
self.assertTrue(room2 in r)
self.assertTrue(room3 in r)
self.assertEqual(len(r), 2)
r = yield self.store.get_rooms_with_many_extremities(5, 5, [room1, room2])
r = self.get_success(
self.store.get_rooms_with_many_extremities(5, 5, [room1, room2])
)
self.assertEqual(r, [room3])
# Does filter and limit work?
r = yield self.store.get_rooms_with_many_extremities(5, 1, [room1])
r = self.get_success(self.store.get_rooms_with_many_extremities(5, 1, [room1]))
self.assertTrue(r == [room2] or r == [room3])
def test_auth_difference(self):
room_id = "@ROOM:local"
# The silly auth graph we use to test the auth difference algorithm,
# where the top are the most recent events.
#
# A B
# \ /
# D E
# \ |
# ` F C
# | /|
# G ´ |
# | \ |
# H I
# | |
# K J
auth_graph = {
"a": ["e"],
"b": ["e"],
"c": ["g", "i"],
"d": ["f"],
"e": ["f"],
"f": ["g"],
"g": ["h", "i"],
"h": ["k"],
"i": ["j"],
"k": [],
"j": [],
}
depth_map = {
"a": 7,
"b": 7,
"c": 4,
"d": 6,
"e": 6,
"f": 5,
"g": 3,
"h": 2,
"i": 2,
"k": 1,
"j": 1,
}
# We rudely fiddle with the appropriate tables directly, as that's much
# easier than constructing events properly.
def insert_event(txn, event_id, stream_ordering):
depth = depth_map[event_id]
self.store.db.simple_insert_txn(
txn,
table="events",
values={
"event_id": event_id,
"room_id": room_id,
"depth": depth,
"topological_ordering": depth,
"type": "m.test",
"processed": True,
"outlier": False,
"stream_ordering": stream_ordering,
},
)
self.store.db.simple_insert_many_txn(
txn,
table="event_auth",
values=[
{"event_id": event_id, "room_id": room_id, "auth_id": a}
for a in auth_graph[event_id]
],
)
next_stream_ordering = 0
for event_id in auth_graph:
next_stream_ordering += 1
self.get_success(
self.store.db.runInteraction(
"insert", insert_event, event_id, next_stream_ordering
)
)
# Now actually test that various combinations give the right result:
difference = self.get_success(
self.store.get_auth_chain_difference([{"a"}, {"b"}])
)
self.assertSetEqual(difference, {"a", "b"})
difference = self.get_success(
self.store.get_auth_chain_difference([{"a"}, {"b"}, {"c"}])
)
self.assertSetEqual(difference, {"a", "b", "c", "e", "f"})
difference = self.get_success(
self.store.get_auth_chain_difference([{"a", "c"}, {"b"}])
)
self.assertSetEqual(difference, {"a", "b", "c"})
difference = self.get_success(
self.store.get_auth_chain_difference([{"a"}, {"b"}, {"d"}])
)
self.assertSetEqual(difference, {"a", "b", "d", "e"})
difference = self.get_success(
self.store.get_auth_chain_difference([{"a"}, {"b"}, {"c"}, {"d"}])
)
self.assertSetEqual(difference, {"a", "b", "c", "d", "e", "f"})
difference = self.get_success(
self.store.get_auth_chain_difference([{"a"}, {"b"}, {"e"}])
)
self.assertSetEqual(difference, {"a", "b"})
difference = self.get_success(self.store.get_auth_chain_difference([{"a"}]))
self.assertSetEqual(difference, set())