Make get_state_groups_from_groups faster.

Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy.
2025-06-25 01:10:27 -04:00 · 2017-05-17 14:31:23 +01:00 · 2017-05-17 14:31:23 +01:00 · bbfe4e996c
commit bbfe4e996c
parent 9f430fa07f
3 changed files with 58 additions and 41 deletions
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@ -563,20 +563,22 @@ class StateStore(SQLBaseStore):
                where a `state_key` of `None` matches all state_keys for the
                `type`.
        """
-        is_all, state_dict_ids = self._state_group_cache.get(group)
+        is_all, known_absent, state_dict_ids = self._state_group_cache.get(group)
        type_to_key = {}
        missing_types = set()
        for typ, state_key in types:
            key = (typ, state_key)
            if state_key is None:
                type_to_key[typ] = None
-                missing_types.add((typ, state_key))
+                missing_types.add(key)
            else:
                if type_to_key.get(typ, object()) is not None:
                    type_to_key.setdefault(typ, set()).add(state_key)
-                if (typ, state_key) not in state_dict_ids:
+                if key not in state_dict_ids and key not in known_absent:
-                    missing_types.add((typ, state_key))
+                    missing_types.add(key)
        sentinel = object()
@ -590,7 +592,7 @@ class StateStore(SQLBaseStore):
                return True
            return False
-        got_all = not (missing_types or types is None)
+        got_all = is_all or not missing_types
        return {
            k: v for k, v in state_dict_ids.iteritems()
@ -607,7 +609,7 @@ class StateStore(SQLBaseStore):
        Args:
            group: The state group to lookup
        """
-        is_all, state_dict_ids = self._state_group_cache.get(group)
+        is_all, _, state_dict_ids = self._state_group_cache.get(group)
        return state_dict_ids, is_all
@ -624,7 +626,7 @@ class StateStore(SQLBaseStore):
        missing_groups = []
        if types is not None:
            for group in set(groups):
-                state_dict_ids, missing_types, got_all = self._get_some_state_from_cache(
+                state_dict_ids, _, got_all = self._get_some_state_from_cache(
                    group, types
                )
                results[group] = state_dict_ids
@ -653,18 +655,6 @@ class StateStore(SQLBaseStore):
            # Now we want to update the cache with all the things we fetched
            # from the database.
            for group, group_state_dict in group_to_state_dict.iteritems():
                if types:
                    # We delibrately put key -> None mappings into the cache to
                    # cache absence of the key, on the assumption that if we've
                    # explicitly asked for some types then we will probably ask
                    # for them again.
                    state_dict = {
                        (intern_string(etype), intern_string(state_key)): None
                        for (etype, state_key) in types
                    }
                    state_dict.update(results[group])
                    results[group] = state_dict
                else:
                state_dict = results[group]
                state_dict.update(
@ -677,17 +667,9 @@ class StateStore(SQLBaseStore):
                    key=group,
                    value=state_dict,
                    full=(types is None),
                    known_absent=types,
                )
        # Remove all the entries with None values. The None values were just
        # used for bookkeeping in the cache.
        for group, state_dict in results.iteritems():
            results[group] = {
                key: event_id
                for key, event_id in state_dict.iteritems()
                if event_id
            }
        defer.returnValue(results)
    def get_next_state_group(self):
--- a/synapse/util/caches/dictionary_cache.py
+++ b/synapse/util/caches/dictionary_cache.py
@ -23,7 +23,17 @@ import logging
 logger = logging.getLogger(__name__)
-class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "value"))):
+class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "known_absent", "value"))):
    """Returned when getting an entry from the cache
    Attributes:
        full (bool): Whether the cache has the full or dict or just some keys.
            If not full then not all requested keys will necessarily be present
            in `value`
        known_absent (set): Keys that were looked up in the dict and were not
            there.
        value (dict): The full or partial dict value
    """
    def __len__(self):
        return len(self.value)
@ -58,21 +68,31 @@ class DictionaryCache(object):
                )
    def get(self, key, dict_keys=None):
        """Fetch an entry out of the cache
        Args:
            key
            dict_key(list): If given a set of keys then return only those keys
                that exist in the cache.
        Returns:
            DictionaryEntry
        """
        entry = self.cache.get(key, self.sentinel)
        if entry is not self.sentinel:
            self.metrics.inc_hits()
            if dict_keys is None:
-                return DictionaryEntry(entry.full, dict(entry.value))
+                return DictionaryEntry(entry.full, entry.known_absent, dict(entry.value))
            else:
-                return DictionaryEntry(entry.full, {
+                return DictionaryEntry(entry.full, entry.known_absent, {
                    k: entry.value[k]
                    for k in dict_keys
                    if k in entry.value
                })
        self.metrics.inc_misses()
-        return DictionaryEntry(False, {})
+        return DictionaryEntry(False, set(), {})
    def invalidate(self, key):
        self.check_thread()
@ -87,19 +107,34 @@ class DictionaryCache(object):
        self.sequence += 1
        self.cache.clear()
-    def update(self, sequence, key, value, full=False):
+    def update(self, sequence, key, value, full=False, known_absent=None):
        """Updates the entry in the cache
        Args:
            sequence
            key
            value (dict): The value to update the cache with.
            full (bool): Whether the given value is the full dict, or just a
                partial subset there of. If not full then any existing entries
                for the key will be updated.
            known_absent (set): Set of keys that we know don't exist in the full
                dict.
        """
        self.check_thread()
        if self.sequence == sequence:
            # Only update the cache if the caches sequence number matches the
            # number that the cache had before the SELECT was started (SYN-369)
            if known_absent is None:
                known_absent = set()
            if full:
-                self._insert(key, value)
+                self._insert(key, value, known_absent)
            else:
-                self._update_or_insert(key, value)
+                self._update_or_insert(key, value, known_absent)
-    def _update_or_insert(self, key, value):
+    def _update_or_insert(self, key, value, known_absent):
-        entry = self.cache.setdefault(key, DictionaryEntry(False, {}))
+        entry = self.cache.setdefault(key, DictionaryEntry(False, set(), {}))
        entry.value.update(value)
        entry.known_absent.update(known_absent)
-    def _insert(self, key, value):
+    def _insert(self, key, value, known_absent):
-        self.cache[key] = DictionaryEntry(True, value)
+        self.cache[key] = DictionaryEntry(True, known_absent, value)
--- a/tests/util/test_dict_cache.py
+++ b/tests/util/test_dict_cache.py
@ -28,7 +28,7 @@ class DictCacheTestCase(unittest.TestCase):
        key = "test_simple_cache_hit_full"
        v = self.cache.get(key)
-        self.assertEqual((False, {}), v)
+        self.assertEqual((False, set(), {}), v)
        seq = self.cache.sequence
        test_value = {"test": "test_simple_cache_hit_full"}