mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-06-10 01:22:39 -04:00
Fix have_seen_event
cache not being invalidated (#13863)
Fix https://github.com/matrix-org/synapse/issues/13856 Fix https://github.com/matrix-org/synapse/issues/13865 > Discovered while trying to make Synapse fast enough for [this MSC2716 test for importing many batches](https://github.com/matrix-org/complement/pull/214#discussion_r741678240). As an example, disabling the `have_seen_event` cache saves 10 seconds for each `/messages` request in that MSC2716 Complement test because we're not making as many federation requests for `/state` (speeding up `have_seen_event` itself is related to https://github.com/matrix-org/synapse/issues/13625) > > But this will also make `/messages` faster in general so we can include it in the [faster `/messages` milestone](https://github.com/matrix-org/synapse/milestone/11). > > *-- https://github.com/matrix-org/synapse/issues/13856* ### The problem `_invalidate_caches_for_event` doesn't run in monolith mode which means we never even tried to clear the `have_seen_event` and other caches. And even in worker mode, it only runs on the workers, not the master (AFAICT). Additionally there was bug with the key being wrong so `_invalidate_caches_for_event` never invalidates the `have_seen_event` cache even when it does run. Because we were using the `@cachedList` wrong, it was putting items in the cache under keys like `((room_id, event_id),)` with a `set` in a `set` (ex. `(('!TnCIJPKzdQdUlIyXdQ:test', '$Iu0eqEBN7qcyF1S9B3oNB3I91v2o5YOgRNPwi_78s-k'),)`) and we we're trying to invalidate with just `(room_id, event_id)` which did nothing.
This commit is contained in:
parent
35e9d6a616
commit
29269d9d3f
5 changed files with 165 additions and 67 deletions
|
@ -1474,32 +1474,38 @@ class EventsWorkerStore(SQLBaseStore):
|
|||
# the batches as big as possible.
|
||||
|
||||
results: Set[str] = set()
|
||||
for chunk in batch_iter(event_ids, 500):
|
||||
r = await self._have_seen_events_dict(
|
||||
[(room_id, event_id) for event_id in chunk]
|
||||
for event_ids_chunk in batch_iter(event_ids, 500):
|
||||
events_seen_dict = await self._have_seen_events_dict(
|
||||
room_id, event_ids_chunk
|
||||
)
|
||||
results.update(
|
||||
eid for (eid, have_event) in events_seen_dict.items() if have_event
|
||||
)
|
||||
results.update(eid for ((_rid, eid), have_event) in r.items() if have_event)
|
||||
|
||||
return results
|
||||
|
||||
@cachedList(cached_method_name="have_seen_event", list_name="keys")
|
||||
@cachedList(cached_method_name="have_seen_event", list_name="event_ids")
|
||||
async def _have_seen_events_dict(
|
||||
self, keys: Collection[Tuple[str, str]]
|
||||
) -> Dict[Tuple[str, str], bool]:
|
||||
self,
|
||||
room_id: str,
|
||||
event_ids: Collection[str],
|
||||
) -> Dict[str, bool]:
|
||||
"""Helper for have_seen_events
|
||||
|
||||
Returns:
|
||||
a dict {(room_id, event_id)-> bool}
|
||||
a dict {event_id -> bool}
|
||||
"""
|
||||
# if the event cache contains the event, obviously we've seen it.
|
||||
|
||||
cache_results = {
|
||||
(rid, eid)
|
||||
for (rid, eid) in keys
|
||||
if await self._get_event_cache.contains((eid,))
|
||||
event_id
|
||||
for event_id in event_ids
|
||||
if await self._get_event_cache.contains((event_id,))
|
||||
}
|
||||
results = dict.fromkeys(cache_results, True)
|
||||
remaining = [k for k in keys if k not in cache_results]
|
||||
remaining = [
|
||||
event_id for event_id in event_ids if event_id not in cache_results
|
||||
]
|
||||
if not remaining:
|
||||
return results
|
||||
|
||||
|
@ -1511,23 +1517,21 @@ class EventsWorkerStore(SQLBaseStore):
|
|||
|
||||
sql = "SELECT event_id FROM events AS e WHERE "
|
||||
clause, args = make_in_list_sql_clause(
|
||||
txn.database_engine, "e.event_id", [eid for (_rid, eid) in remaining]
|
||||
txn.database_engine, "e.event_id", remaining
|
||||
)
|
||||
txn.execute(sql + clause, args)
|
||||
found_events = {eid for eid, in txn}
|
||||
|
||||
# ... and then we can update the results for each key
|
||||
results.update(
|
||||
{(rid, eid): (eid in found_events) for (rid, eid) in remaining}
|
||||
)
|
||||
results.update({eid: (eid in found_events) for eid in remaining})
|
||||
|
||||
await self.db_pool.runInteraction("have_seen_events", have_seen_events_txn)
|
||||
return results
|
||||
|
||||
@cached(max_entries=100000, tree=True)
|
||||
async def have_seen_event(self, room_id: str, event_id: str) -> bool:
|
||||
res = await self._have_seen_events_dict(((room_id, event_id),))
|
||||
return res[(room_id, event_id)]
|
||||
res = await self._have_seen_events_dict(room_id, [event_id])
|
||||
return res[event_id]
|
||||
|
||||
def _get_current_state_event_counts_txn(
|
||||
self, txn: LoggingTransaction, room_id: str
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue