Make get_state_groups_from_groups faster.

Most of the time was spent copying a dict to filter out sentinel values
that indicated that keys did not exist in the dict. The sentinel values
were added to ensure that we cached the non-existence of keys.

By updating DictionaryCache to keep track of which keys were known to
not exist itself we can remove a dictionary copy.
This commit is contained in:
Erik Johnston 2017-05-17 14:31:23 +01:00
parent 9f430fa07f
commit bbfe4e996c
3 changed files with 58 additions and 41 deletions

View file

@ -23,7 +23,17 @@ import logging
logger = logging.getLogger(__name__)
class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "value"))):
class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "known_absent", "value"))):
"""Returned when getting an entry from the cache
Attributes:
full (bool): Whether the cache has the full or dict or just some keys.
If not full then not all requested keys will necessarily be present
in `value`
known_absent (set): Keys that were looked up in the dict and were not
there.
value (dict): The full or partial dict value
"""
def __len__(self):
return len(self.value)
@ -58,21 +68,31 @@ class DictionaryCache(object):
)
def get(self, key, dict_keys=None):
"""Fetch an entry out of the cache
Args:
key
dict_key(list): If given a set of keys then return only those keys
that exist in the cache.
Returns:
DictionaryEntry
"""
entry = self.cache.get(key, self.sentinel)
if entry is not self.sentinel:
self.metrics.inc_hits()
if dict_keys is None:
return DictionaryEntry(entry.full, dict(entry.value))
return DictionaryEntry(entry.full, entry.known_absent, dict(entry.value))
else:
return DictionaryEntry(entry.full, {
return DictionaryEntry(entry.full, entry.known_absent, {
k: entry.value[k]
for k in dict_keys
if k in entry.value
})
self.metrics.inc_misses()
return DictionaryEntry(False, {})
return DictionaryEntry(False, set(), {})
def invalidate(self, key):
self.check_thread()
@ -87,19 +107,34 @@ class DictionaryCache(object):
self.sequence += 1
self.cache.clear()
def update(self, sequence, key, value, full=False):
def update(self, sequence, key, value, full=False, known_absent=None):
"""Updates the entry in the cache
Args:
sequence
key
value (dict): The value to update the cache with.
full (bool): Whether the given value is the full dict, or just a
partial subset there of. If not full then any existing entries
for the key will be updated.
known_absent (set): Set of keys that we know don't exist in the full
dict.
"""
self.check_thread()
if self.sequence == sequence:
# Only update the cache if the caches sequence number matches the
# number that the cache had before the SELECT was started (SYN-369)
if known_absent is None:
known_absent = set()
if full:
self._insert(key, value)
self._insert(key, value, known_absent)
else:
self._update_or_insert(key, value)
self._update_or_insert(key, value, known_absent)
def _update_or_insert(self, key, value):
entry = self.cache.setdefault(key, DictionaryEntry(False, {}))
def _update_or_insert(self, key, value, known_absent):
entry = self.cache.setdefault(key, DictionaryEntry(False, set(), {}))
entry.value.update(value)
entry.known_absent.update(known_absent)
def _insert(self, key, value):
self.cache[key] = DictionaryEntry(True, value)
def _insert(self, key, value, known_absent):
self.cache[key] = DictionaryEntry(True, known_absent, value)