mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-05-06 14:14:58 -04:00
add a cache to have_seen_event (#9953)
Empirically, this helped my server considerably when handling gaps in Matrix HQ. The problem was that we would repeatedly call have_seen_events for the same set of (50K or so) auth_events, each of which would take many minutes to complete, even though it's only an index scan.
This commit is contained in:
parent
10e6d2abce
commit
b4b2fd2ece
10 changed files with 206 additions and 21 deletions
|
@ -16,14 +16,14 @@ import logging
|
|||
from typing import Any, List, Set, Tuple
|
||||
|
||||
from synapse.api.errors import SynapseError
|
||||
from synapse.storage._base import SQLBaseStore
|
||||
from synapse.storage.databases.main import CacheInvalidationWorkerStore
|
||||
from synapse.storage.databases.main.state import StateGroupWorkerStore
|
||||
from synapse.types import RoomStreamToken
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PurgeEventsStore(StateGroupWorkerStore, SQLBaseStore):
|
||||
class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
|
||||
async def purge_history(
|
||||
self, room_id: str, token: str, delete_local_events: bool
|
||||
) -> Set[int]:
|
||||
|
@ -203,8 +203,6 @@ class PurgeEventsStore(StateGroupWorkerStore, SQLBaseStore):
|
|||
"DELETE FROM event_to_state_groups "
|
||||
"WHERE event_id IN (SELECT event_id from events_to_purge)"
|
||||
)
|
||||
for event_id, _ in event_rows:
|
||||
txn.call_after(self._get_state_group_for_event.invalidate, (event_id,))
|
||||
|
||||
# Delete all remote non-state events
|
||||
for table in (
|
||||
|
@ -283,6 +281,20 @@ class PurgeEventsStore(StateGroupWorkerStore, SQLBaseStore):
|
|||
# so make sure to keep this actually last.
|
||||
txn.execute("DROP TABLE events_to_purge")
|
||||
|
||||
for event_id, should_delete in event_rows:
|
||||
self._invalidate_cache_and_stream(
|
||||
txn, self._get_state_group_for_event, (event_id,)
|
||||
)
|
||||
|
||||
# XXX: This is racy, since have_seen_events could be called between the
|
||||
# transaction completing and the invalidation running. On the other hand,
|
||||
# that's no different to calling `have_seen_events` just before the
|
||||
# event is deleted from the database.
|
||||
if should_delete:
|
||||
self._invalidate_cache_and_stream(
|
||||
txn, self.have_seen_event, (room_id, event_id)
|
||||
)
|
||||
|
||||
logger.info("[purge] done")
|
||||
|
||||
return referenced_state_groups
|
||||
|
@ -422,7 +434,11 @@ class PurgeEventsStore(StateGroupWorkerStore, SQLBaseStore):
|
|||
# index on them. In any case we should be clearing out 'stream' tables
|
||||
# periodically anyway (#5888)
|
||||
|
||||
# TODO: we could probably usefully do a bunch of cache invalidation here
|
||||
# TODO: we could probably usefully do a bunch more cache invalidation here
|
||||
|
||||
# XXX: as with purge_history, this is racy, but no worse than other races
|
||||
# that already exist.
|
||||
self._invalidate_cache_and_stream(txn, self.have_seen_event, (room_id,))
|
||||
|
||||
logger.info("[purge] done")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue