Prevent join->join membership transitions changing member count (#7977)

`StatsHandler` handles updates to the `current_state_delta_stream`, and updates room stats such as the amount of state events, joined users, etc.

However, it counts every new join membership as a new user entering a room (and that user being in another room), whereas it's possible for a user's membership status to go from join -> join, for instance when they change their per-room profile information.

This PR adds a check for join->join membership transitions, and bails out early, as none of the further checks are necessary at that point.

Due to this bug, membership stats in many rooms have ended up being wildly larger than their true values. I am not sure if we also want to include a migration step which recalculates these statistics (possibly using the `_populate_stats_process_rooms` bg update).

Bug introduced in the initial implementation https://github.com/matrix-org/synapse/pull/4338.
This commit is contained in:
Andrew Morgan 2020-08-03 13:54:24 -07:00 committed by GitHub
parent 6812509807
commit 5d92a1428c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 126 additions and 13 deletions

View file

@ -72,6 +72,9 @@ class StatsStore(StateDeltasStore):
self.db.updates.register_background_update_handler(
"populate_stats_process_rooms", self._populate_stats_process_rooms
)
self.db.updates.register_background_update_handler(
"populate_stats_process_rooms_2", self._populate_stats_process_rooms_2
)
self.db.updates.register_background_update_handler(
"populate_stats_process_users", self._populate_stats_process_users
)
@ -140,11 +143,30 @@ class StatsStore(StateDeltasStore):
return len(users_to_work_on)
async def _populate_stats_process_rooms(self, progress, batch_size):
"""
This was a background update which regenerated statistics for rooms.
It has been replaced by StatsStore._populate_stats_process_rooms_2. This background
job has been scheduled to run as part of Synapse v1.0.0, and again now. To ensure
someone upgrading from <v1.0.0, this background task has been turned into a no-op
so that the potentially expensive task is not run twice.
Further context: https://github.com/matrix-org/synapse/pull/7977
"""
await self.db.updates._end_background_update("populate_stats_process_rooms")
return 1
async def _populate_stats_process_rooms_2(self, progress, batch_size):
"""
This is a background update which regenerates statistics for rooms.
It replaces StatsStore._populate_stats_process_rooms. See its docstring for the
reasoning.
"""
if not self.stats_enabled:
await self.db.updates._end_background_update("populate_stats_process_rooms")
await self.db.updates._end_background_update(
"populate_stats_process_rooms_2"
)
return 1
last_room_id = progress.get("last_room_id", "")
@ -160,12 +182,14 @@ class StatsStore(StateDeltasStore):
return [r for r, in txn]
rooms_to_work_on = await self.db.runInteraction(
"populate_stats_rooms_get_batch", _get_next_batch
"populate_stats_rooms_2_get_batch", _get_next_batch
)
# No more rooms -- complete the transaction.
if not rooms_to_work_on:
await self.db.updates._end_background_update("populate_stats_process_rooms")
await self.db.updates._end_background_update(
"populate_stats_process_rooms_2"
)
return 1
for room_id in rooms_to_work_on:
@ -173,9 +197,9 @@ class StatsStore(StateDeltasStore):
progress["last_room_id"] = room_id
await self.db.runInteraction(
"_populate_stats_process_rooms",
"_populate_stats_process_rooms_2",
self.db.updates._background_update_progress_txn,
"populate_stats_process_rooms",
"populate_stats_process_rooms_2",
progress,
)