Fix overflows in /messages backfill calculation (#13936)

* Reproduce bug
* Compute `least_function` first
* Substitute `least_function` with an f-string
* Bugfix: avoid overflow

Co-authored-by: Eric Eastwood <erice@element.io>
This commit is contained in:
David Robertson 2022-09-30 11:54:53 +01:00 committed by GitHub
parent 1cc2ca81ba
commit e8f30a76ca
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 103 additions and 41 deletions

View file

@ -766,9 +766,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
self.store.get_backfill_points_in_room(room_id, depth_map["B"], limit=100)
)
backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
self.assertListEqual(
backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"]
)
self.assertEqual(backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"])
# Try at "A"
backfill_points = self.get_success(
@ -814,7 +812,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
)
backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
# Only the backfill points that we didn't record earlier exist here.
self.assertListEqual(backfill_event_ids, ["b6", "2", "b1"])
self.assertEqual(backfill_event_ids, ["b6", "2", "b1"])
def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duration(
self,
@ -860,7 +858,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
)
backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
self.assertListEqual(backfill_event_ids, ["b3", "b2"])
self.assertEqual(backfill_event_ids, ["b3", "b2"])
# Now advance time by 20 hours (above 2^4 because we made 4 attemps) and
# see if we can now backfill it
@ -871,7 +869,48 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
)
backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"])
self.assertEqual(backfill_event_ids, ["b3", "b2", "b1"])
def test_get_backfill_points_in_room_works_after_many_failed_pull_attempts_that_could_naively_overflow(
self,
) -> None:
"""
A test that reproduces #13929 (Postgres only).
Test to make sure we can still get backfill points after many failed pull
attempts that cause us to backoff to the limit. Even if the backoff formula
would tell us to wait for more seconds than can be expressed in a 32 bit
signed int.
"""
setup_info = self._setup_room_for_backfill_tests()
room_id = setup_info.room_id
depth_map = setup_info.depth_map
# Pretend that we have tried and failed 10 times to backfill event b1.
for _ in range(10):
self.get_success(
self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause")
)
# If the backoff periods grow without limit:
# After the first failed attempt, we would have backed off for 1 << 1 = 2 hours.
# After the second failed attempt we would have backed off for 1 << 2 = 4 hours,
# so after the 10th failed attempt we should backoff for 1 << 10 == 1024 hours.
# Wait 1100 hours just so we have a nice round number.
self.reactor.advance(datetime.timedelta(hours=1100).total_seconds())
# 1024 hours in milliseconds is 1024 * 3600000, which exceeds the largest 32 bit
# signed integer. The bug we're reproducing is that this overflow causes an
# error in postgres preventing us from fetching a set of backwards extremities
# to retry fetching.
backfill_points = self.get_success(
self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
)
# We should aim to fetch all backoff points: b1's latest backoff period has
# expired, and we haven't tried the rest.
backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
self.assertEqual(backfill_event_ids, ["b3", "b2", "b1"])
def _setup_room_for_insertion_backfill_tests(self) -> _BackfillSetupInfo:
"""
@ -965,9 +1004,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
)
)
backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
self.assertListEqual(
backfill_event_ids, ["insertion_eventB", "insertion_eventA"]
)
self.assertEqual(backfill_event_ids, ["insertion_eventB", "insertion_eventA"])
# Try at "insertion_eventA"
backfill_points = self.get_success(
@ -1011,7 +1048,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
)
backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
# Only the backfill points that we didn't record earlier exist here.
self.assertListEqual(backfill_event_ids, ["insertion_eventB"])
self.assertEqual(backfill_event_ids, ["insertion_eventB"])
def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_after_backoff_duration(
self,
@ -1069,7 +1106,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
)
)
backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
self.assertListEqual(backfill_event_ids, [])
self.assertEqual(backfill_event_ids, [])
# Now advance time by 20 hours (above 2^4 because we made 4 attemps) and
# see if we can now backfill it
@ -1083,7 +1120,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
)
)
backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
self.assertListEqual(backfill_event_ids, ["insertion_eventA"])
self.assertEqual(backfill_event_ids, ["insertion_eventA"])
@attr.s