From fa5c2bc082185580e7d65a8759a8b5213933a137 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 12 Jul 2018 11:37:44 +0100 Subject: [PATCH] Reduce set building in get_entities_changed This line shows up as about 5% of cpu time on a synchrotron: not_known_entities = set(entities) - set(self._entity_to_key) Presumably the problem here is that _entity_to_key can be largeish, and building a set for its keys every time this function is called is slow. Here we rewrite the logic to avoid building so many sets. --- synapse/util/caches/stream_change_cache.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 8637867c6..a1f8ff8f1 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -74,14 +74,18 @@ class StreamChangeCache(object): assert type(stream_pos) is int if stream_pos >= self._earliest_known_stream_pos: - not_known_entities = set(entities) - set(self._entity_to_key) + changed_entities = { + self._cache[k] for k in self._cache.islice( + start=self._cache.bisect_right(stream_pos), + ) + } - result = ( - {self._cache[k] for k in self._cache.islice( - start=self._cache.bisect_right(stream_pos))} - .intersection(entities) - .union(not_known_entities) - ) + # we need to include entities which we don't know about, as well as + # those which are known to have changed since the stream pos. + result = { + e for e in entities + if e in changed_entities or e not in self._entity_to_key + } self.metrics.inc_hits() else: