Make get_state_groups_from_groups faster.

Most of the time was spent copying a dict to filter out sentinel values
that indicated that keys did not exist in the dict. The sentinel values
were added to ensure that we cached the non-existence of keys.

By updating DictionaryCache to keep track of which keys were known to
not exist itself we can remove a dictionary copy.
This commit is contained in:
Erik Johnston 2017-05-17 14:31:23 +01:00
parent 9f430fa07f
commit bbfe4e996c
3 changed files with 58 additions and 41 deletions

View File

@ -563,20 +563,22 @@ class StateStore(SQLBaseStore):
where a `state_key` of `None` matches all state_keys for the where a `state_key` of `None` matches all state_keys for the
`type`. `type`.
""" """
is_all, state_dict_ids = self._state_group_cache.get(group) is_all, known_absent, state_dict_ids = self._state_group_cache.get(group)
type_to_key = {} type_to_key = {}
missing_types = set() missing_types = set()
for typ, state_key in types: for typ, state_key in types:
key = (typ, state_key)
if state_key is None: if state_key is None:
type_to_key[typ] = None type_to_key[typ] = None
missing_types.add((typ, state_key)) missing_types.add(key)
else: else:
if type_to_key.get(typ, object()) is not None: if type_to_key.get(typ, object()) is not None:
type_to_key.setdefault(typ, set()).add(state_key) type_to_key.setdefault(typ, set()).add(state_key)
if (typ, state_key) not in state_dict_ids: if key not in state_dict_ids and key not in known_absent:
missing_types.add((typ, state_key)) missing_types.add(key)
sentinel = object() sentinel = object()
@ -590,7 +592,7 @@ class StateStore(SQLBaseStore):
return True return True
return False return False
got_all = not (missing_types or types is None) got_all = is_all or not missing_types
return { return {
k: v for k, v in state_dict_ids.iteritems() k: v for k, v in state_dict_ids.iteritems()
@ -607,7 +609,7 @@ class StateStore(SQLBaseStore):
Args: Args:
group: The state group to lookup group: The state group to lookup
""" """
is_all, state_dict_ids = self._state_group_cache.get(group) is_all, _, state_dict_ids = self._state_group_cache.get(group)
return state_dict_ids, is_all return state_dict_ids, is_all
@ -624,7 +626,7 @@ class StateStore(SQLBaseStore):
missing_groups = [] missing_groups = []
if types is not None: if types is not None:
for group in set(groups): for group in set(groups):
state_dict_ids, missing_types, got_all = self._get_some_state_from_cache( state_dict_ids, _, got_all = self._get_some_state_from_cache(
group, types group, types
) )
results[group] = state_dict_ids results[group] = state_dict_ids
@ -653,18 +655,6 @@ class StateStore(SQLBaseStore):
# Now we want to update the cache with all the things we fetched # Now we want to update the cache with all the things we fetched
# from the database. # from the database.
for group, group_state_dict in group_to_state_dict.iteritems(): for group, group_state_dict in group_to_state_dict.iteritems():
if types:
# We delibrately put key -> None mappings into the cache to
# cache absence of the key, on the assumption that if we've
# explicitly asked for some types then we will probably ask
# for them again.
state_dict = {
(intern_string(etype), intern_string(state_key)): None
for (etype, state_key) in types
}
state_dict.update(results[group])
results[group] = state_dict
else:
state_dict = results[group] state_dict = results[group]
state_dict.update( state_dict.update(
@ -677,17 +667,9 @@ class StateStore(SQLBaseStore):
key=group, key=group,
value=state_dict, value=state_dict,
full=(types is None), full=(types is None),
known_absent=types,
) )
# Remove all the entries with None values. The None values were just
# used for bookkeeping in the cache.
for group, state_dict in results.iteritems():
results[group] = {
key: event_id
for key, event_id in state_dict.iteritems()
if event_id
}
defer.returnValue(results) defer.returnValue(results)
def get_next_state_group(self): def get_next_state_group(self):

View File

@ -23,7 +23,17 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "value"))): class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "known_absent", "value"))):
"""Returned when getting an entry from the cache
Attributes:
full (bool): Whether the cache has the full or dict or just some keys.
If not full then not all requested keys will necessarily be present
in `value`
known_absent (set): Keys that were looked up in the dict and were not
there.
value (dict): The full or partial dict value
"""
def __len__(self): def __len__(self):
return len(self.value) return len(self.value)
@ -58,21 +68,31 @@ class DictionaryCache(object):
) )
def get(self, key, dict_keys=None): def get(self, key, dict_keys=None):
"""Fetch an entry out of the cache
Args:
key
dict_key(list): If given a set of keys then return only those keys
that exist in the cache.
Returns:
DictionaryEntry
"""
entry = self.cache.get(key, self.sentinel) entry = self.cache.get(key, self.sentinel)
if entry is not self.sentinel: if entry is not self.sentinel:
self.metrics.inc_hits() self.metrics.inc_hits()
if dict_keys is None: if dict_keys is None:
return DictionaryEntry(entry.full, dict(entry.value)) return DictionaryEntry(entry.full, entry.known_absent, dict(entry.value))
else: else:
return DictionaryEntry(entry.full, { return DictionaryEntry(entry.full, entry.known_absent, {
k: entry.value[k] k: entry.value[k]
for k in dict_keys for k in dict_keys
if k in entry.value if k in entry.value
}) })
self.metrics.inc_misses() self.metrics.inc_misses()
return DictionaryEntry(False, {}) return DictionaryEntry(False, set(), {})
def invalidate(self, key): def invalidate(self, key):
self.check_thread() self.check_thread()
@ -87,19 +107,34 @@ class DictionaryCache(object):
self.sequence += 1 self.sequence += 1
self.cache.clear() self.cache.clear()
def update(self, sequence, key, value, full=False): def update(self, sequence, key, value, full=False, known_absent=None):
"""Updates the entry in the cache
Args:
sequence
key
value (dict): The value to update the cache with.
full (bool): Whether the given value is the full dict, or just a
partial subset there of. If not full then any existing entries
for the key will be updated.
known_absent (set): Set of keys that we know don't exist in the full
dict.
"""
self.check_thread() self.check_thread()
if self.sequence == sequence: if self.sequence == sequence:
# Only update the cache if the caches sequence number matches the # Only update the cache if the caches sequence number matches the
# number that the cache had before the SELECT was started (SYN-369) # number that the cache had before the SELECT was started (SYN-369)
if known_absent is None:
known_absent = set()
if full: if full:
self._insert(key, value) self._insert(key, value, known_absent)
else: else:
self._update_or_insert(key, value) self._update_or_insert(key, value, known_absent)
def _update_or_insert(self, key, value): def _update_or_insert(self, key, value, known_absent):
entry = self.cache.setdefault(key, DictionaryEntry(False, {})) entry = self.cache.setdefault(key, DictionaryEntry(False, set(), {}))
entry.value.update(value) entry.value.update(value)
entry.known_absent.update(known_absent)
def _insert(self, key, value): def _insert(self, key, value, known_absent):
self.cache[key] = DictionaryEntry(True, value) self.cache[key] = DictionaryEntry(True, known_absent, value)

View File

@ -28,7 +28,7 @@ class DictCacheTestCase(unittest.TestCase):
key = "test_simple_cache_hit_full" key = "test_simple_cache_hit_full"
v = self.cache.get(key) v = self.cache.get(key)
self.assertEqual((False, {}), v) self.assertEqual((False, set(), {}), v)
seq = self.cache.sequence seq = self.cache.sequence
test_value = {"test": "test_simple_cache_hit_full"} test_value = {"test": "test_simple_cache_hit_full"}