Remove keylen from LruCache. (#9993)

`keylen` seems to be a thing that is frequently incorrectly set, and we don't really need it.

The only time it was used was to figure out if we had removed a subtree in `del_multi`, which we can do better by changing `TreeCache.pop` to return a different type (`TreeCacheNode`).

Commits should be independently reviewable.
This commit is contained in:
Richard van der Hoff 2021-05-24 14:02:01 +01:00 committed by GitHub
parent 316f89e87f
commit c0df6bae06
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 80 additions and 55 deletions

1
changelog.d/9993.misc Normal file
View File

@ -0,0 +1 @@
Remove `keylen` param on `LruCache`.

View File

@ -24,7 +24,7 @@ class SlavedClientIpStore(BaseSlavedStore):
super().__init__(database, db_conn, hs) super().__init__(database, db_conn, hs)
self.client_ip_last_seen = LruCache( self.client_ip_last_seen = LruCache(
cache_name="client_ip_last_seen", keylen=4, max_size=50000 cache_name="client_ip_last_seen", max_size=50000
) # type: LruCache[tuple, int] ) # type: LruCache[tuple, int]
async def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id): async def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id):

View File

@ -436,7 +436,7 @@ class ClientIpStore(ClientIpWorkerStore):
def __init__(self, database: DatabasePool, db_conn, hs): def __init__(self, database: DatabasePool, db_conn, hs):
self.client_ip_last_seen = LruCache( self.client_ip_last_seen = LruCache(
cache_name="client_ip_last_seen", keylen=4, max_size=50000 cache_name="client_ip_last_seen", max_size=50000
) )
super().__init__(database, db_conn, hs) super().__init__(database, db_conn, hs)

View File

@ -1053,7 +1053,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
# Map of (user_id, device_id) -> bool. If there is an entry that implies # Map of (user_id, device_id) -> bool. If there is an entry that implies
# the device exists. # the device exists.
self.device_id_exists_cache = LruCache( self.device_id_exists_cache = LruCache(
cache_name="device_id_exists", keylen=2, max_size=10000 cache_name="device_id_exists", max_size=10000
) )
async def store_device( async def store_device(

View File

@ -157,7 +157,6 @@ class EventsWorkerStore(SQLBaseStore):
self._get_event_cache = LruCache( self._get_event_cache = LruCache(
cache_name="*getEvent*", cache_name="*getEvent*",
keylen=3,
max_size=hs.config.caches.event_cache_size, max_size=hs.config.caches.event_cache_size,
) )

View File

@ -70,7 +70,6 @@ class DeferredCache(Generic[KT, VT]):
self, self,
name: str, name: str,
max_entries: int = 1000, max_entries: int = 1000,
keylen: int = 1,
tree: bool = False, tree: bool = False,
iterable: bool = False, iterable: bool = False,
apply_cache_factor_from_config: bool = True, apply_cache_factor_from_config: bool = True,
@ -101,7 +100,6 @@ class DeferredCache(Generic[KT, VT]):
# a Deferred. # a Deferred.
self.cache = LruCache( self.cache = LruCache(
max_size=max_entries, max_size=max_entries,
keylen=keylen,
cache_name=name, cache_name=name,
cache_type=cache_type, cache_type=cache_type,
size_callback=(lambda d: len(d) or 1) if iterable else None, size_callback=(lambda d: len(d) or 1) if iterable else None,

View File

@ -270,7 +270,6 @@ class DeferredCacheDescriptor(_CacheDescriptorBase):
cache = DeferredCache( cache = DeferredCache(
name=self.orig.__name__, name=self.orig.__name__,
max_entries=self.max_entries, max_entries=self.max_entries,
keylen=self.num_args,
tree=self.tree, tree=self.tree,
iterable=self.iterable, iterable=self.iterable,
) # type: DeferredCache[CacheKey, Any] ) # type: DeferredCache[CacheKey, Any]

View File

@ -34,7 +34,7 @@ from typing_extensions import Literal
from synapse.config import cache as cache_config from synapse.config import cache as cache_config
from synapse.util import caches from synapse.util import caches
from synapse.util.caches import CacheMetric, register_cache from synapse.util.caches import CacheMetric, register_cache
from synapse.util.caches.treecache import TreeCache from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
try: try:
from pympler.asizeof import Asizer from pympler.asizeof import Asizer
@ -160,7 +160,6 @@ class LruCache(Generic[KT, VT]):
self, self,
max_size: int, max_size: int,
cache_name: Optional[str] = None, cache_name: Optional[str] = None,
keylen: int = 1,
cache_type: Type[Union[dict, TreeCache]] = dict, cache_type: Type[Union[dict, TreeCache]] = dict,
size_callback: Optional[Callable] = None, size_callback: Optional[Callable] = None,
metrics_collection_callback: Optional[Callable[[], None]] = None, metrics_collection_callback: Optional[Callable[[], None]] = None,
@ -173,9 +172,6 @@ class LruCache(Generic[KT, VT]):
cache_name: The name of this cache, for the prometheus metrics. If unset, cache_name: The name of this cache, for the prometheus metrics. If unset,
no metrics will be reported on this cache. no metrics will be reported on this cache.
keylen: The length of the tuple used as the cache key. Ignored unless
cache_type is `TreeCache`.
cache_type (type): cache_type (type):
type of underlying cache to be used. Typically one of dict type of underlying cache to be used. Typically one of dict
or TreeCache. or TreeCache.
@ -403,7 +399,9 @@ class LruCache(Generic[KT, VT]):
popped = cache.pop(key) popped = cache.pop(key)
if popped is None: if popped is None:
return return
for leaf in enumerate_leaves(popped, keylen - len(cast(tuple, key))): # for each deleted node, we now need to remove it from the linked list
# and run its callbacks.
for leaf in iterate_tree_cache_entry(popped):
delete_node(leaf) delete_node(leaf)
@synchronized @synchronized

View File

@ -1,18 +1,43 @@
from typing import Dict # Copyright 2016-2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
SENTINEL = object() SENTINEL = object()
class TreeCacheNode(dict):
"""The type of nodes in our tree.
Has its own type so we can distinguish it from real dicts that are stored at the
leaves.
"""
pass
class TreeCache: class TreeCache:
""" """
Tree-based backing store for LruCache. Allows subtrees of data to be deleted Tree-based backing store for LruCache. Allows subtrees of data to be deleted
efficiently. efficiently.
Keys must be tuples. Keys must be tuples.
The data structure is a chain of TreeCacheNodes:
root = {key_1: {key_2: _value}}
""" """
def __init__(self): def __init__(self):
self.size = 0 self.size = 0
self.root = {} # type: Dict self.root = TreeCacheNode()
def __setitem__(self, key, value): def __setitem__(self, key, value):
return self.set(key, value) return self.set(key, value)
@ -21,10 +46,23 @@ class TreeCache:
return self.get(key, SENTINEL) is not SENTINEL return self.get(key, SENTINEL) is not SENTINEL
def set(self, key, value): def set(self, key, value):
if isinstance(value, TreeCacheNode):
# this would mean we couldn't tell where our tree ended and the value
# started.
raise ValueError("Cannot store TreeCacheNodes in a TreeCache")
node = self.root node = self.root
for k in key[:-1]: for k in key[:-1]:
node = node.setdefault(k, {}) next_node = node.get(k, SENTINEL)
node[key[-1]] = _Entry(value) if next_node is SENTINEL:
next_node = node[k] = TreeCacheNode()
elif not isinstance(next_node, TreeCacheNode):
# this suggests that the caller is not being consistent with its key
# length.
raise ValueError("value conflicts with an existing subtree")
node = next_node
node[key[-1]] = value
self.size += 1 self.size += 1
def get(self, key, default=None): def get(self, key, default=None):
@ -33,25 +71,41 @@ class TreeCache:
node = node.get(k, None) node = node.get(k, None)
if node is None: if node is None:
return default return default
return node.get(key[-1], _Entry(default)).value return node.get(key[-1], default)
def clear(self): def clear(self):
self.size = 0 self.size = 0
self.root = {} self.root = TreeCacheNode()
def pop(self, key, default=None): def pop(self, key, default=None):
"""Remove the given key, or subkey, from the cache
Args:
key: key or subkey to remove.
default: value to return if key is not found
Returns:
If the key is not found, 'default'. If the key is complete, the removed
value. If the key is partial, the TreeCacheNode corresponding to the part
of the tree that was removed.
"""
# a list of the nodes we have touched on the way down the tree
nodes = [] nodes = []
node = self.root node = self.root
for k in key[:-1]: for k in key[:-1]:
node = node.get(k, None) node = node.get(k, None)
nodes.append(node) # don't add the root node
if node is None: if node is None:
return default return default
if not isinstance(node, TreeCacheNode):
# we've gone off the end of the tree
raise ValueError("pop() key too long")
nodes.append(node) # don't add the root node
popped = node.pop(key[-1], SENTINEL) popped = node.pop(key[-1], SENTINEL)
if popped is SENTINEL: if popped is SENTINEL:
return default return default
# working back up the tree, clear out any nodes that are now empty
node_and_keys = list(zip(nodes, key)) node_and_keys = list(zip(nodes, key))
node_and_keys.reverse() node_and_keys.reverse()
node_and_keys.append((self.root, None)) node_and_keys.append((self.root, None))
@ -61,14 +115,15 @@ class TreeCache:
if n: if n:
break break
# found an empty node: remove it from its parent, and loop.
node_and_keys[i + 1][0].pop(k) node_and_keys[i + 1][0].pop(k)
popped, cnt = _strip_and_count_entires(popped) cnt = sum(1 for _ in iterate_tree_cache_entry(popped))
self.size -= cnt self.size -= cnt
return popped return popped
def values(self): def values(self):
return list(iterate_tree_cache_entry(self.root)) return iterate_tree_cache_entry(self.root)
def __len__(self): def __len__(self):
return self.size return self.size
@ -78,36 +133,9 @@ def iterate_tree_cache_entry(d):
"""Helper function to iterate over the leaves of a tree, i.e. a dict of that """Helper function to iterate over the leaves of a tree, i.e. a dict of that
can contain dicts. can contain dicts.
""" """
if isinstance(d, dict): if isinstance(d, TreeCacheNode):
for value_d in d.values(): for value_d in d.values():
for value in iterate_tree_cache_entry(value_d): for value in iterate_tree_cache_entry(value_d):
yield value yield value
else: else:
if isinstance(d, _Entry): yield d
yield d.value
else:
yield d
class _Entry:
__slots__ = ["value"]
def __init__(self, value):
self.value = value
def _strip_and_count_entires(d):
"""Takes an _Entry or dict with leaves of _Entry's, and either returns the
value or a dictionary with _Entry's replaced by their values.
Also returns the count of _Entry's
"""
if isinstance(d, dict):
cnt = 0
for key, value in d.items():
v, n = _strip_and_count_entires(value)
d[key] = v
cnt += n
return d, cnt
else:
return d.value, 1

View File

@ -59,7 +59,7 @@ class LruCacheTestCase(unittest.HomeserverTestCase):
self.assertEquals(cache.pop("key"), None) self.assertEquals(cache.pop("key"), None)
def test_del_multi(self): def test_del_multi(self):
cache = LruCache(4, keylen=2, cache_type=TreeCache) cache = LruCache(4, cache_type=TreeCache)
cache[("animal", "cat")] = "mew" cache[("animal", "cat")] = "mew"
cache[("animal", "dog")] = "woof" cache[("animal", "dog")] = "woof"
cache[("vehicles", "car")] = "vroom" cache[("vehicles", "car")] = "vroom"
@ -165,7 +165,7 @@ class LruCacheCallbacksTestCase(unittest.HomeserverTestCase):
m2 = Mock() m2 = Mock()
m3 = Mock() m3 = Mock()
m4 = Mock() m4 = Mock()
cache = LruCache(4, keylen=2, cache_type=TreeCache) cache = LruCache(4, cache_type=TreeCache)
cache.set(("a", "1"), "value", callbacks=[m1]) cache.set(("a", "1"), "value", callbacks=[m1])
cache.set(("a", "2"), "value", callbacks=[m2]) cache.set(("a", "2"), "value", callbacks=[m2])

View File

@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
from synapse.util.caches.treecache import TreeCache from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
from .. import unittest from .. import unittest
@ -64,12 +64,14 @@ class TreeCacheTestCase(unittest.TestCase):
cache[("a", "b")] = "AB" cache[("a", "b")] = "AB"
cache[("b", "a")] = "BA" cache[("b", "a")] = "BA"
self.assertEquals(cache.get(("a", "a")), "AA") self.assertEquals(cache.get(("a", "a")), "AA")
cache.pop(("a",)) popped = cache.pop(("a",))
self.assertEquals(cache.get(("a", "a")), None) self.assertEquals(cache.get(("a", "a")), None)
self.assertEquals(cache.get(("a", "b")), None) self.assertEquals(cache.get(("a", "b")), None)
self.assertEquals(cache.get(("b", "a")), "BA") self.assertEquals(cache.get(("b", "a")), "BA")
self.assertEquals(len(cache), 1) self.assertEquals(len(cache), 1)
self.assertEquals({"AA", "AB"}, set(iterate_tree_cache_entry(popped)))
def test_clear(self): def test_clear(self):
cache = TreeCache() cache = TreeCache()
cache[("a",)] = "A" cache[("a",)] = "A"