Reduce set building in get_entities_changed

This line shows up as about 5% of cpu time on a synchrotron:

    not_known_entities = set(entities) - set(self._entity_to_key)

Presumably the problem here is that _entity_to_key can be largeish, and
building a set for its keys every time this function is called is slow.

Here we rewrite the logic to avoid building so many sets.
This commit is contained in:
Richard van der Hoff 2018-07-12 11:37:44 +01:00
parent 0456e05977
commit fa5c2bc082

View File

@ -74,14 +74,18 @@ class StreamChangeCache(object):
assert type(stream_pos) is int assert type(stream_pos) is int
if stream_pos >= self._earliest_known_stream_pos: if stream_pos >= self._earliest_known_stream_pos:
not_known_entities = set(entities) - set(self._entity_to_key) changed_entities = {
self._cache[k] for k in self._cache.islice(
start=self._cache.bisect_right(stream_pos),
)
}
result = ( # we need to include entities which we don't know about, as well as
{self._cache[k] for k in self._cache.islice( # those which are known to have changed since the stream pos.
start=self._cache.bisect_right(stream_pos))} result = {
.intersection(entities) e for e in entities
.union(not_known_entities) if e in changed_entities or e not in self._entity_to_key
) }
self.metrics.inc_hits() self.metrics.inc_hits()
else: else: