move DeferredCache into its own module

2024-10-01 01:36:05 -04:00 · 2020-10-14 23:25:23 +01:00 · 2020-10-14 23:25:23 +01:00 · 4182bb812f
commit 4182bb812f
parent 9f87da0a84
10 changed files with 367 additions and 332 deletions
--- a/synapse/replication/slave/storage/client_ips.py
+++ b/synapse/replication/slave/storage/client_ips.py
@ -15,7 +15,7 @@
 from synapse.storage.database import DatabasePool
 from synapse.storage.databases.main.client_ips import LAST_SEEN_GRANULARITY
-from synapse.util.caches.descriptors import DeferredCache
+from synapse.util.caches.deferred_cache import DeferredCache
 from ._base import BaseSlavedStore
--- a/synapse/storage/databases/main/client_ips.py
+++ b/synapse/storage/databases/main/client_ips.py
@ -19,7 +19,7 @@ from typing import Dict, Optional, Tuple
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import DatabasePool, make_tuple_comparison_clause
-from synapse.util.caches.descriptors import DeferredCache
+from synapse.util.caches.deferred_cache import DeferredCache
 logger = logging.getLogger(__name__)
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@ -34,7 +34,8 @@ from synapse.storage.database import (
 )
 from synapse.types import Collection, JsonDict, get_verify_key_from_cross_signing_key
 from synapse.util import json_decoder, json_encoder
-from synapse.util.caches.descriptors import DeferredCache, cached, cachedList
+from synapse.util.caches.deferred_cache import DeferredCache
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.iterutils import batch_iter
 from synapse.util.stringutils import shortstr
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@ -42,7 +42,8 @@ from synapse.storage.database import DatabasePool
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator
 from synapse.types import Collection, get_domain_from_id
-from synapse.util.caches.descriptors import DeferredCache, cached
+from synapse.util.caches.deferred_cache import DeferredCache
 from synapse.util.caches.descriptors import cached
 from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import Measure
--- a/synapse/util/caches/deferred_cache.py
+++ b/synapse/util/caches/deferred_cache.py
@ -0,0 +1,292 @@
 # -*- coding: utf-8 -*-
 # Copyright 2015, 2016 OpenMarket Ltd
 # Copyright 2018 New Vector Ltd
 # Copyright 2020 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import enum
 import threading
 from typing import Callable, Generic, Iterable, MutableMapping, Optional, TypeVar, cast
 from prometheus_client import Gauge
 from twisted.internet import defer
 from synapse.util.async_helpers import ObservableDeferred
 from synapse.util.caches import register_cache
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
 cache_pending_metric = Gauge(
    "synapse_util_caches_cache_pending",
    "Number of lookups currently pending for this cache",
    ["name"],
 )
 KT = TypeVar("KT")
 VT = TypeVar("VT")
 class _Sentinel(enum.Enum):
    # defining a sentinel in this way allows mypy to correctly handle the
    # type of a dictionary lookup.
    sentinel = object()
 class DeferredCache(Generic[KT, VT]):
    """Wraps an LruCache, adding support for Deferred results.
    It expects that each entry added with set() will be a Deferred; likewise get()
    may return an ObservableDeferred.
    """
    __slots__ = (
        "cache",
        "name",
        "keylen",
        "thread",
        "metrics",
        "_pending_deferred_cache",
    )
    def __init__(
        self,
        name: str,
        max_entries: int = 1000,
        keylen: int = 1,
        tree: bool = False,
        iterable: bool = False,
        apply_cache_factor_from_config: bool = True,
    ):
        """
        Args:
            name: The name of the cache
            max_entries: Maximum amount of entries that the cache will hold
            keylen: The length of the tuple used as the cache key. Ignored unless
               `tree` is True.
            tree: Use a TreeCache instead of a dict as the underlying cache type
            iterable: If True, count each item in the cached object as an entry,
                rather than each cached object
            apply_cache_factor_from_config: Whether cache factors specified in the
                config file affect `max_entries`
        """
        cache_type = TreeCache if tree else dict
        # _pending_deferred_cache maps from the key value to a `CacheEntry` object.
        self._pending_deferred_cache = (
            cache_type()
        )  # type: MutableMapping[KT, CacheEntry]
        # cache is used for completed results and maps to the result itself, rather than
        # a Deferred.
        self.cache = LruCache(
            max_size=max_entries,
            keylen=keylen,
            cache_type=cache_type,
            size_callback=(lambda d: len(d)) if iterable else None,
            evicted_callback=self._on_evicted,
            apply_cache_factor_from_config=apply_cache_factor_from_config,
        )
        self.name = name
        self.keylen = keylen
        self.thread = None  # type: Optional[threading.Thread]
        self.metrics = register_cache(
            "cache",
            name,
            self.cache,
            collect_callback=self._metrics_collection_callback,
        )
    @property
    def max_entries(self):
        return self.cache.max_size
    def _on_evicted(self, evicted_count):
        self.metrics.inc_evictions(evicted_count)
    def _metrics_collection_callback(self):
        cache_pending_metric.labels(self.name).set(len(self._pending_deferred_cache))
    def check_thread(self):
        expected_thread = self.thread
        if expected_thread is None:
            self.thread = threading.current_thread()
        else:
            if expected_thread is not threading.current_thread():
                raise ValueError(
                    "Cache objects can only be accessed from the main thread"
                )
    def get(
        self,
        key: KT,
        default=_Sentinel.sentinel,
        callback: Optional[Callable[[], None]] = None,
        update_metrics: bool = True,
    ):
        """Looks the key up in the caches.
        Args:
            key(tuple)
            default: What is returned if key is not in the caches. If not
                specified then function throws KeyError instead
            callback(fn): Gets called when the entry in the cache is invalidated
            update_metrics (bool): whether to update the cache hit rate metrics
        Returns:
            Either an ObservableDeferred or the result itself
        """
        callbacks = [callback] if callback else []
        val = self._pending_deferred_cache.get(key, _Sentinel.sentinel)
        if val is not _Sentinel.sentinel:
            val.callbacks.update(callbacks)
            if update_metrics:
                self.metrics.inc_hits()
            return val.deferred
        val = self.cache.get(key, _Sentinel.sentinel, callbacks=callbacks)
        if val is not _Sentinel.sentinel:
            self.metrics.inc_hits()
            return val
        if update_metrics:
            self.metrics.inc_misses()
        if default is _Sentinel.sentinel:
            raise KeyError()
        else:
            return default
    def set(
        self,
        key: KT,
        value: defer.Deferred,
        callback: Optional[Callable[[], None]] = None,
    ) -> ObservableDeferred:
        if not isinstance(value, defer.Deferred):
            raise TypeError("not a Deferred")
        callbacks = [callback] if callback else []
        self.check_thread()
        observable = ObservableDeferred(value, consumeErrors=True)
        observer = observable.observe()
        entry = CacheEntry(deferred=observable, callbacks=callbacks)
        existing_entry = self._pending_deferred_cache.pop(key, None)
        if existing_entry:
            existing_entry.invalidate()
        self._pending_deferred_cache[key] = entry
        def compare_and_pop():
            """Check if our entry is still the one in _pending_deferred_cache, and
            if so, pop it.
            Returns true if the entries matched.
            """
            existing_entry = self._pending_deferred_cache.pop(key, None)
            if existing_entry is entry:
                return True
            # oops, the _pending_deferred_cache has been updated since
            # we started our query, so we are out of date.
            #
            # Better put back whatever we took out. (We do it this way
            # round, rather than peeking into the _pending_deferred_cache
            # and then removing on a match, to make the common case faster)
            if existing_entry is not None:
                self._pending_deferred_cache[key] = existing_entry
            return False
        def cb(result):
            if compare_and_pop():
                self.cache.set(key, result, entry.callbacks)
            else:
                # we're not going to put this entry into the cache, so need
                # to make sure that the invalidation callbacks are called.
                # That was probably done when _pending_deferred_cache was
                # updated, but it's possible that `set` was called without
                # `invalidate` being previously called, in which case it may
                # not have been. Either way, let's double-check now.
                entry.invalidate()
        def eb(_fail):
            compare_and_pop()
            entry.invalidate()
        # once the deferred completes, we can move the entry from the
        # _pending_deferred_cache to the real cache.
        #
        observer.addCallbacks(cb, eb)
        return observable
    def prefill(self, key: KT, value: VT, callback: Callable[[], None] = None):
        callbacks = [callback] if callback else []
        self.cache.set(key, value, callbacks=callbacks)
    def invalidate(self, key):
        self.check_thread()
        self.cache.pop(key, None)
        # if we have a pending lookup for this key, remove it from the
        # _pending_deferred_cache, which will (a) stop it being returned
        # for future queries and (b) stop it being persisted as a proper entry
        # in self.cache.
        entry = self._pending_deferred_cache.pop(key, None)
        # run the invalidation callbacks now, rather than waiting for the
        # deferred to resolve.
        if entry:
            entry.invalidate()
    def invalidate_many(self, key: KT):
        self.check_thread()
        if not isinstance(key, tuple):
            raise TypeError("The cache key must be a tuple not %r" % (type(key),))
        self.cache.del_multi(key)
        # if we have a pending lookup for this key, remove it from the
        # _pending_deferred_cache, as above
        entry_dict = self._pending_deferred_cache.pop(cast(KT, key), None)
        if entry_dict is not None:
            for entry in iterate_tree_cache_entry(entry_dict):
                entry.invalidate()
    def invalidate_all(self):
        self.check_thread()
        self.cache.clear()
        for entry in self._pending_deferred_cache.values():
            entry.invalidate()
        self._pending_deferred_cache.clear()
 class CacheEntry:
    __slots__ = ["deferred", "callbacks", "invalidated"]
    def __init__(
        self, deferred: ObservableDeferred, callbacks: Iterable[Callable[[], None]]
    ):
        self.deferred = deferred
        self.callbacks = set(callbacks)
        self.invalidated = False
    def invalidate(self):
        if not self.invalidated:
            self.invalidated = True
            for callback in self.callbacks:
                callback()
            self.callbacks.clear()
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@ -13,44 +13,24 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import enum
 import functools
 import inspect
 import logging
-import threading
+from typing import Any, Callable, Generic, Optional, Tuple, TypeVar, Union, cast
 from typing import (
    Any,
    Callable,
    Generic,
    Iterable,
    MutableMapping,
    Optional,
    Tuple,
    TypeVar,
    Union,
    cast,
 )
 from weakref import WeakValueDictionary
 from prometheus_client import Gauge
 from twisted.internet import defer
 from synapse.logging.context import make_deferred_yieldable, preserve_fn
 from synapse.util import unwrapFirstError
 from synapse.util.async_helpers import ObservableDeferred
-from synapse.util.caches.lrucache import LruCache
+from synapse.util.caches.deferred_cache import DeferredCache
 from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
 from . import register_cache
 logger = logging.getLogger(__name__)
 CacheKey = Union[Tuple, Any]
 F = TypeVar("F", bound=Callable[..., Any])
 KT = TypeVar("KT")
 VT = TypeVar("VT")
 class _CachedFunction(Generic[F]):
@ -68,266 +48,6 @@ class _CachedFunction(Generic[F]):
    __call__ = None  # type: F
 cache_pending_metric = Gauge(
    "synapse_util_caches_cache_pending",
    "Number of lookups currently pending for this cache",
    ["name"],
 )
 class _Sentinel(enum.Enum):
    # defining a sentinel in this way allows mypy to correctly handle the
    # type of a dictionary lookup.
    sentinel = object()
 class CacheEntry:
    __slots__ = ["deferred", "callbacks", "invalidated"]
    def __init__(
        self, deferred: ObservableDeferred, callbacks: Iterable[Callable[[], None]]
    ):
        self.deferred = deferred
        self.callbacks = set(callbacks)
        self.invalidated = False
    def invalidate(self):
        if not self.invalidated:
            self.invalidated = True
            for callback in self.callbacks:
                callback()
            self.callbacks.clear()
 class DeferredCache(Generic[KT, VT]):
    """Wraps an LruCache, adding support for Deferred results.
    It expects that each entry added with set() will be a Deferred; likewise get()
    may return an ObservableDeferred.
    """
    __slots__ = (
        "cache",
        "name",
        "keylen",
        "thread",
        "metrics",
        "_pending_deferred_cache",
    )
    def __init__(
        self,
        name: str,
        max_entries: int = 1000,
        keylen: int = 1,
        tree: bool = False,
        iterable: bool = False,
        apply_cache_factor_from_config: bool = True,
    ):
        """
        Args:
            name: The name of the cache
            max_entries: Maximum amount of entries that the cache will hold
            keylen: The length of the tuple used as the cache key. Ignored unless
               `tree` is True.
            tree: Use a TreeCache instead of a dict as the underlying cache type
            iterable: If True, count each item in the cached object as an entry,
                rather than each cached object
            apply_cache_factor_from_config: Whether cache factors specified in the
                config file affect `max_entries`
        """
        cache_type = TreeCache if tree else dict
        # _pending_deferred_cache maps from the key value to a `CacheEntry` object.
        self._pending_deferred_cache = (
            cache_type()
        )  # type: MutableMapping[KT, CacheEntry]
        # cache is used for completed results and maps to the result itself, rather than
        # a Deferred.
        self.cache = LruCache(
            max_size=max_entries,
            keylen=keylen,
            cache_type=cache_type,
            size_callback=(lambda d: len(d)) if iterable else None,
            evicted_callback=self._on_evicted,
            apply_cache_factor_from_config=apply_cache_factor_from_config,
        )
        self.name = name
        self.keylen = keylen
        self.thread = None  # type: Optional[threading.Thread]
        self.metrics = register_cache(
            "cache",
            name,
            self.cache,
            collect_callback=self._metrics_collection_callback,
        )
    @property
    def max_entries(self):
        return self.cache.max_size
    def _on_evicted(self, evicted_count):
        self.metrics.inc_evictions(evicted_count)
    def _metrics_collection_callback(self):
        cache_pending_metric.labels(self.name).set(len(self._pending_deferred_cache))
    def check_thread(self):
        expected_thread = self.thread
        if expected_thread is None:
            self.thread = threading.current_thread()
        else:
            if expected_thread is not threading.current_thread():
                raise ValueError(
                    "Cache objects can only be accessed from the main thread"
                )
    def get(
        self,
        key: KT,
        default=_Sentinel.sentinel,
        callback: Optional[Callable[[], None]] = None,
        update_metrics: bool = True,
    ):
        """Looks the key up in the caches.
        Args:
            key(tuple)
            default: What is returned if key is not in the caches. If not
                specified then function throws KeyError instead
            callback(fn): Gets called when the entry in the cache is invalidated
            update_metrics (bool): whether to update the cache hit rate metrics
        Returns:
            Either an ObservableDeferred or the result itself
        """
        callbacks = [callback] if callback else []
        val = self._pending_deferred_cache.get(key, _Sentinel.sentinel)
        if val is not _Sentinel.sentinel:
            val.callbacks.update(callbacks)
            if update_metrics:
                self.metrics.inc_hits()
            return val.deferred
        val = self.cache.get(key, _Sentinel.sentinel, callbacks=callbacks)
        if val is not _Sentinel.sentinel:
            self.metrics.inc_hits()
            return val
        if update_metrics:
            self.metrics.inc_misses()
        if default is _Sentinel.sentinel:
            raise KeyError()
        else:
            return default
    def set(
        self,
        key: KT,
        value: defer.Deferred,
        callback: Optional[Callable[[], None]] = None,
    ) -> ObservableDeferred:
        if not isinstance(value, defer.Deferred):
            raise TypeError("not a Deferred")
        callbacks = [callback] if callback else []
        self.check_thread()
        observable = ObservableDeferred(value, consumeErrors=True)
        observer = observable.observe()
        entry = CacheEntry(deferred=observable, callbacks=callbacks)
        existing_entry = self._pending_deferred_cache.pop(key, None)
        if existing_entry:
            existing_entry.invalidate()
        self._pending_deferred_cache[key] = entry
        def compare_and_pop():
            """Check if our entry is still the one in _pending_deferred_cache, and
            if so, pop it.
            Returns true if the entries matched.
            """
            existing_entry = self._pending_deferred_cache.pop(key, None)
            if existing_entry is entry:
                return True
            # oops, the _pending_deferred_cache has been updated since
            # we started our query, so we are out of date.
            #
            # Better put back whatever we took out. (We do it this way
            # round, rather than peeking into the _pending_deferred_cache
            # and then removing on a match, to make the common case faster)
            if existing_entry is not None:
                self._pending_deferred_cache[key] = existing_entry
            return False
        def cb(result):
            if compare_and_pop():
                self.cache.set(key, result, entry.callbacks)
            else:
                # we're not going to put this entry into the cache, so need
                # to make sure that the invalidation callbacks are called.
                # That was probably done when _pending_deferred_cache was
                # updated, but it's possible that `set` was called without
                # `invalidate` being previously called, in which case it may
                # not have been. Either way, let's double-check now.
                entry.invalidate()
        def eb(_fail):
            compare_and_pop()
            entry.invalidate()
        # once the deferred completes, we can move the entry from the
        # _pending_deferred_cache to the real cache.
        #
        observer.addCallbacks(cb, eb)
        return observable
    def prefill(self, key: KT, value: VT, callback: Callable[[], None] = None):
        callbacks = [callback] if callback else []
        self.cache.set(key, value, callbacks=callbacks)
    def invalidate(self, key):
        self.check_thread()
        self.cache.pop(key, None)
        # if we have a pending lookup for this key, remove it from the
        # _pending_deferred_cache, which will (a) stop it being returned
        # for future queries and (b) stop it being persisted as a proper entry
        # in self.cache.
        entry = self._pending_deferred_cache.pop(key, None)
        # run the invalidation callbacks now, rather than waiting for the
        # deferred to resolve.
        if entry:
            entry.invalidate()
    def invalidate_many(self, key: KT):
        self.check_thread()
        if not isinstance(key, tuple):
            raise TypeError("The cache key must be a tuple not %r" % (type(key),))
        self.cache.del_multi(key)
        # if we have a pending lookup for this key, remove it from the
        # _pending_deferred_cache, as above
        entry_dict = self._pending_deferred_cache.pop(cast(KT, key), None)
        if entry_dict is not None:
            for entry in iterate_tree_cache_entry(entry_dict):
                entry.invalidate()
    def invalidate_all(self):
        self.check_thread()
        self.cache.clear()
        for entry in self._pending_deferred_cache.values():
            entry.invalidate()
        self._pending_deferred_cache.clear()
 class _CacheDescriptorBase:
    def __init__(self, orig: _CachedFunction, num_args, cache_context=False):
        self.orig = orig
--- a/tests/storage/test__base.py
+++ b/tests/storage/test__base.py
@ -20,7 +20,8 @@ from mock import Mock
 from twisted.internet import defer
 from synapse.util.async_helpers import ObservableDeferred
-from synapse.util.caches.descriptors import DeferredCache, cached
+from synapse.util.caches.deferred_cache import DeferredCache
 from synapse.util.caches.descriptors import cached
 from tests import unittest
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@ -15,7 +15,7 @@
 # limitations under the License.
 from synapse.metrics import REGISTRY, InFlightGauge, generate_latest
-from synapse.util.caches.descriptors import DeferredCache
+from synapse.util.caches.deferred_cache import DeferredCache
 from tests import unittest
--- a/tests/util/caches/test_deferred_cache.py
+++ b/tests/util/caches/test_deferred_cache.py
@ -0,0 +1,64 @@
 # -*- coding: utf-8 -*-
 # Copyright 2020 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
 from functools import partial
 from twisted.internet import defer
 import synapse.util.caches.deferred_cache
 class DeferredCacheTestCase(unittest.TestCase):
    def test_invalidate_all(self):
        cache = synapse.util.caches.deferred_cache.DeferredCache("testcache")
        callback_record = [False, False]
        def record_callback(idx):
            callback_record[idx] = True
        # add a couple of pending entries
        d1 = defer.Deferred()
        cache.set("key1", d1, partial(record_callback, 0))
        d2 = defer.Deferred()
        cache.set("key2", d2, partial(record_callback, 1))
        # lookup should return observable deferreds
        self.assertFalse(cache.get("key1").has_called())
        self.assertFalse(cache.get("key2").has_called())
        # let one of the lookups complete
        d2.callback("result2")
        # for now at least, the cache will return real results rather than an
        # observabledeferred
        self.assertEqual(cache.get("key2"), "result2")
        # now do the invalidation
        cache.invalidate_all()
        # lookup should return none
        self.assertIsNone(cache.get("key1", None))
        self.assertIsNone(cache.get("key2", None))
        # both callbacks should have been callbacked
        self.assertTrue(callback_record[0], "Invalidation callback for key1 not called")
        self.assertTrue(callback_record[1], "Invalidation callback for key2 not called")
        # letting the other lookup complete should do nothing
        d1.callback("result1")
        self.assertIsNone(cache.get("key1", None))
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
 from functools import partial
 import mock
@ -42,49 +41,6 @@ def run_on_reactor():
    return make_deferred_yieldable(d)
 class DeferredCacheTestCase(unittest.TestCase):
    def test_invalidate_all(self):
        cache = descriptors.DeferredCache("testcache")
        callback_record = [False, False]
        def record_callback(idx):
            callback_record[idx] = True
        # add a couple of pending entries
        d1 = defer.Deferred()
        cache.set("key1", d1, partial(record_callback, 0))
        d2 = defer.Deferred()
        cache.set("key2", d2, partial(record_callback, 1))
        # lookup should return observable deferreds
        self.assertFalse(cache.get("key1").has_called())
        self.assertFalse(cache.get("key2").has_called())
        # let one of the lookups complete
        d2.callback("result2")
        # for now at least, the cache will return real results rather than an
        # observabledeferred
        self.assertEqual(cache.get("key2"), "result2")
        # now do the invalidation
        cache.invalidate_all()
        # lookup should return none
        self.assertIsNone(cache.get("key1", None))
        self.assertIsNone(cache.get("key2", None))
        # both callbacks should have been callbacked
        self.assertTrue(callback_record[0], "Invalidation callback for key1 not called")
        self.assertTrue(callback_record[1], "Invalidation callback for key2 not called")
        # letting the other lookup complete should do nothing
        d1.callback("result1")
        self.assertIsNone(cache.get("key1", None))
 class DescriptorTestCase(unittest.TestCase):
    @defer.inlineCallbacks
    def test_cache(self):