Allow configuration of Synapse's cache without using synctl or environment variables (#6391)

This commit is contained in:
Amber Brown 2020-05-12 03:45:23 +10:00 committed by GitHub
parent a8580c5f19
commit 7cb8b4bc67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 620 additions and 146 deletions

1
changelog.d/6391.feature Normal file
View File

@ -0,0 +1 @@
Synapse's cache factor can now be configured in `homeserver.yaml` by the `caches.global_factor` setting. Additionally, `caches.per_cache_factors` controls the cache factors for individual caches.

View File

@ -603,6 +603,45 @@ acme:
## Caching ##
# Caching can be configured through the following options.
#
# A cache 'factor' is a multiplier that can be applied to each of
# Synapse's caches in order to increase or decrease the maximum
# number of entries that can be stored.
# The number of events to cache in memory. Not affected by
# caches.global_factor.
#
#event_cache_size: 10K
caches:
# Controls the global cache factor, which is the default cache factor
# for all caches if a specific factor for that cache is not otherwise
# set.
#
# This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
# variable. Setting by environment variable takes priority over
# setting through the config file.
#
# Defaults to 0.5, which will half the size of all caches.
#
#global_factor: 1.0
# A dictionary of cache name to cache factor for that individual
# cache. Overrides the global cache factor for a given cache.
#
# These can also be set through environment variables comprised
# of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
# letters and underscores. Setting by environment variable
# takes priority over setting through the config file.
# Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
#
per_cache_factors:
#get_users_who_share_room_with_user: 2.0
## Database ##
# The 'database' setting defines the database that synapse uses to store all of
@ -646,10 +685,6 @@ database:
args:
database: DATADIR/homeserver.db
# Number of events to cache in memory.
#
#event_cache_size: 10K
## Logging ##

View File

@ -37,7 +37,7 @@ from synapse.api.errors import (
from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
from synapse.events import EventBase
from synapse.types import StateMap, UserID
from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache
from synapse.util.caches import register_cache
from synapse.util.caches.lrucache import LruCache
from synapse.util.metrics import Measure
@ -73,7 +73,7 @@ class Auth(object):
self.store = hs.get_datastore()
self.state = hs.get_state_handler()
self.token_cache = LruCache(CACHE_SIZE_FACTOR * 10000)
self.token_cache = LruCache(10000)
register_cache("cache", "token_cache", self.token_cache)
self._auth_blocking = AuthBlocking(self.hs)

View File

@ -69,7 +69,6 @@ from synapse.server import HomeServer
from synapse.storage import DataStore
from synapse.storage.engines import IncorrectDatabaseSetup
from synapse.storage.prepare_database import UpgradeDatabaseException
from synapse.util.caches import CACHE_SIZE_FACTOR
from synapse.util.httpresourcetree import create_resource_tree
from synapse.util.manhole import manhole
from synapse.util.module_loader import load_module
@ -516,8 +515,8 @@ def phone_stats_home(hs, stats, stats_process=_stats_process):
daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages()
stats["daily_sent_messages"] = daily_sent_messages
stats["cache_factor"] = CACHE_SIZE_FACTOR
stats["event_cache_size"] = hs.config.event_cache_size
stats["cache_factor"] = hs.config.caches.global_factor
stats["event_cache_size"] = hs.config.caches.event_cache_size
#
# Performance statistics

164
synapse/config/cache.py Normal file
View File

@ -0,0 +1,164 @@
# -*- coding: utf-8 -*-
# Copyright 2019 Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Callable, Dict
from ._base import Config, ConfigError
# The prefix for all cache factor-related environment variables
_CACHES = {}
_CACHE_PREFIX = "SYNAPSE_CACHE_FACTOR"
_DEFAULT_FACTOR_SIZE = 0.5
_DEFAULT_EVENT_CACHE_SIZE = "10K"
class CacheProperties(object):
def __init__(self):
# The default factor size for all caches
self.default_factor_size = float(
os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE)
)
self.resize_all_caches_func = None
properties = CacheProperties()
def add_resizable_cache(cache_name: str, cache_resize_callback: Callable):
"""Register a cache that's size can dynamically change
Args:
cache_name: A reference to the cache
cache_resize_callback: A callback function that will be ran whenever
the cache needs to be resized
"""
_CACHES[cache_name.lower()] = cache_resize_callback
# Ensure all loaded caches are sized appropriately
#
# This method should only run once the config has been read,
# as it uses values read from it
if properties.resize_all_caches_func:
properties.resize_all_caches_func()
class CacheConfig(Config):
section = "caches"
_environ = os.environ
@staticmethod
def reset():
"""Resets the caches to their defaults. Used for tests."""
properties.default_factor_size = float(
os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE)
)
properties.resize_all_caches_func = None
_CACHES.clear()
def generate_config_section(self, **kwargs):
return """\
## Caching ##
# Caching can be configured through the following options.
#
# A cache 'factor' is a multiplier that can be applied to each of
# Synapse's caches in order to increase or decrease the maximum
# number of entries that can be stored.
# The number of events to cache in memory. Not affected by
# caches.global_factor.
#
#event_cache_size: 10K
caches:
# Controls the global cache factor, which is the default cache factor
# for all caches if a specific factor for that cache is not otherwise
# set.
#
# This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
# variable. Setting by environment variable takes priority over
# setting through the config file.
#
# Defaults to 0.5, which will half the size of all caches.
#
#global_factor: 1.0
# A dictionary of cache name to cache factor for that individual
# cache. Overrides the global cache factor for a given cache.
#
# These can also be set through environment variables comprised
# of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
# letters and underscores. Setting by environment variable
# takes priority over setting through the config file.
# Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
#
per_cache_factors:
#get_users_who_share_room_with_user: 2.0
"""
def read_config(self, config, **kwargs):
self.event_cache_size = self.parse_size(
config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE)
)
self.cache_factors = {} # type: Dict[str, float]
cache_config = config.get("caches") or {}
self.global_factor = cache_config.get(
"global_factor", properties.default_factor_size
)
if not isinstance(self.global_factor, (int, float)):
raise ConfigError("caches.global_factor must be a number.")
# Set the global one so that it's reflected in new caches
properties.default_factor_size = self.global_factor
# Load cache factors from the config
individual_factors = cache_config.get("per_cache_factors") or {}
if not isinstance(individual_factors, dict):
raise ConfigError("caches.per_cache_factors must be a dictionary")
# Override factors from environment if necessary
individual_factors.update(
{
key[len(_CACHE_PREFIX) + 1 :].lower(): float(val)
for key, val in self._environ.items()
if key.startswith(_CACHE_PREFIX + "_")
}
)
for cache, factor in individual_factors.items():
if not isinstance(factor, (int, float)):
raise ConfigError(
"caches.per_cache_factors.%s must be a number" % (cache.lower(),)
)
self.cache_factors[cache.lower()] = factor
# Resize all caches (if necessary) with the new factors we've loaded
self.resize_all_caches()
# Store this function so that it can be called from other classes without
# needing an instance of Config
properties.resize_all_caches_func = self.resize_all_caches
def resize_all_caches(self):
"""Ensure all cache sizes are up to date
For each cache, run the mapped callback function with either
a specific cache factor or the default, global one.
"""
for cache_name, callback in _CACHES.items():
new_factor = self.cache_factors.get(cache_name, self.global_factor)
callback(new_factor)

View File

@ -68,10 +68,6 @@ database:
name: sqlite3
args:
database: %(database_path)s
# Number of events to cache in memory.
#
#event_cache_size: 10K
"""
@ -116,8 +112,6 @@ class DatabaseConfig(Config):
self.databases = []
def read_config(self, config, **kwargs):
self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K"))
# We *experimentally* support specifying multiple databases via the
# `databases` key. This is a map from a label to database config in the
# same format as the `database` config option, plus an extra

View File

@ -17,6 +17,7 @@
from ._base import RootConfig
from .api import ApiConfig
from .appservice import AppServiceConfig
from .cache import CacheConfig
from .captcha import CaptchaConfig
from .cas import CasConfig
from .consent_config import ConsentConfig
@ -55,6 +56,7 @@ class HomeServerConfig(RootConfig):
config_classes = [
ServerConfig,
TlsConfig,
CacheConfig,
DatabaseConfig,
LoggingConfig,
RatelimitConfig,

View File

@ -49,7 +49,6 @@ from synapse.http.proxyagent import ProxyAgent
from synapse.logging.context import make_deferred_yieldable
from synapse.logging.opentracing import set_tag, start_active_span, tags
from synapse.util.async_helpers import timeout_deferred
from synapse.util.caches import CACHE_SIZE_FACTOR
logger = logging.getLogger(__name__)
@ -241,7 +240,10 @@ class SimpleHttpClient(object):
# tends to do so in batches, so we need to allow the pool to keep
# lots of idle connections around.
pool = HTTPConnectionPool(self.reactor)
pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5))
# XXX: The justification for using the cache factor here is that larger instances
# will need both more cache and more connections.
# Still, this should probably be a separate dial
pool.maxPersistentPerHost = max((100 * hs.config.caches.global_factor, 5))
pool.cachedConnectionTimeout = 2 * 60
self.agent = ProxyAgent(

View File

@ -33,6 +33,8 @@ from prometheus_client import REGISTRY
from twisted.web.resource import Resource
from synapse.util import caches
try:
from prometheus_client.samples import Sample
except ImportError:
@ -103,13 +105,15 @@ def nameify_sample(sample):
def generate_latest(registry, emit_help=False):
# Trigger the cache metrics to be rescraped, which updates the common
# metrics but do not produce metrics themselves
for collector in caches.collectors_by_name.values():
collector.collect()
output = []
for metric in registry.collect():
if metric.name.startswith("__unused"):
continue
if not metric.samples:
# No samples, don't bother.
continue

View File

@ -51,6 +51,7 @@ push_rules_delta_state_cache_metric = register_cache(
"cache",
"push_rules_delta_state_cache_metric",
cache=[], # Meaningless size, as this isn't a cache that stores values
resizable=False,
)
@ -67,7 +68,8 @@ class BulkPushRuleEvaluator(object):
self.room_push_rule_cache_metrics = register_cache(
"cache",
"room_push_rule_cache",
cache=[], # Meaningless size, as this isn't a cache that stores values
cache=[], # Meaningless size, as this isn't a cache that stores values,
resizable=False,
)
@defer.inlineCallbacks

View File

@ -22,7 +22,7 @@ from six import string_types
from synapse.events import EventBase
from synapse.types import UserID
from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache
from synapse.util.caches import register_cache
from synapse.util.caches.lrucache import LruCache
logger = logging.getLogger(__name__)
@ -165,7 +165,7 @@ class PushRuleEvaluatorForEvent(object):
# Caches (string, is_glob, word_boundary) -> regex for push. See _glob_matches
regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR)
regex_cache = LruCache(50000)
register_cache("cache", "regex_push_cache", regex_cache)

View File

@ -15,7 +15,6 @@
from synapse.storage.data_stores.main.client_ips import LAST_SEEN_GRANULARITY
from synapse.storage.database import Database
from synapse.util.caches import CACHE_SIZE_FACTOR
from synapse.util.caches.descriptors import Cache
from ._base import BaseSlavedStore
@ -26,7 +25,7 @@ class SlavedClientIpStore(BaseSlavedStore):
super(SlavedClientIpStore, self).__init__(database, db_conn, hs)
self.client_ip_last_seen = Cache(
name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR
name="client_ip_last_seen", keylen=4, max_entries=50000
)
def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id):

View File

@ -35,7 +35,6 @@ from synapse.state import v1, v2
from synapse.storage.data_stores.main.events_worker import EventRedactBehaviour
from synapse.types import StateMap
from synapse.util.async_helpers import Linearizer
from synapse.util.caches import get_cache_factor_for
from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.metrics import Measure, measure_func
@ -53,7 +52,6 @@ state_groups_histogram = Histogram(
KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key"))
SIZE_OF_CACHE = 100000 * get_cache_factor_for("state_cache")
EVICTION_TIMEOUT_SECONDS = 60 * 60
@ -447,7 +445,7 @@ class StateResolutionHandler(object):
self._state_cache = ExpiringCache(
cache_name="state_cache",
clock=self.clock,
max_len=SIZE_OF_CACHE,
max_len=100000,
expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000,
iterable=True,
reset_expiry_on_get=True,

View File

@ -22,7 +22,6 @@ from twisted.internet import defer
from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.storage._base import SQLBaseStore
from synapse.storage.database import Database, make_tuple_comparison_clause
from synapse.util.caches import CACHE_SIZE_FACTOR
from synapse.util.caches.descriptors import Cache
logger = logging.getLogger(__name__)
@ -361,7 +360,7 @@ class ClientIpStore(ClientIpBackgroundUpdateStore):
def __init__(self, database: Database, db_conn, hs):
self.client_ip_last_seen = Cache(
name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR
name="client_ip_last_seen", keylen=4, max_entries=50000
)
super(ClientIpStore, self).__init__(database, db_conn, hs)

View File

@ -75,7 +75,10 @@ class EventsWorkerStore(SQLBaseStore):
super(EventsWorkerStore, self).__init__(database, db_conn, hs)
self._get_event_cache = Cache(
"*getEvent*", keylen=3, max_entries=hs.config.event_cache_size
"*getEvent*",
keylen=3,
max_entries=hs.config.caches.event_cache_size,
apply_cache_factor_from_config=False,
)
self._event_fetch_lock = threading.Condition()

View File

@ -28,7 +28,6 @@ from synapse.storage.data_stores.state.bg_updates import StateBackgroundUpdateSt
from synapse.storage.database import Database
from synapse.storage.state import StateFilter
from synapse.types import StateMap
from synapse.util.caches import get_cache_factor_for
from synapse.util.caches.descriptors import cached
from synapse.util.caches.dictionary_cache import DictionaryCache
@ -90,11 +89,10 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
self._state_group_cache = DictionaryCache(
"*stateGroupCache*",
# TODO: this hasn't been tuned yet
50000 * get_cache_factor_for("stateGroupCache"),
50000,
)
self._state_group_members_cache = DictionaryCache(
"*stateGroupMembersCache*",
500000 * get_cache_factor_for("stateGroupMembersCache"),
"*stateGroupMembersCache*", 500000,
)
@cached(max_entries=10000, iterable=True)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015, 2016 OpenMarket Ltd
# Copyright 2019 The Matrix.org Foundation C.I.C.
# Copyright 2019, 2020 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -15,28 +15,18 @@
# limitations under the License.
import logging
import os
from typing import Dict
from typing import Callable, Dict, Optional
import six
from six.moves import intern
from prometheus_client.core import REGISTRY, Gauge, GaugeMetricFamily
import attr
from prometheus_client.core import Gauge
from synapse.config.cache import add_resizable_cache
logger = logging.getLogger(__name__)
CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5))
def get_cache_factor_for(cache_name):
env_var = "SYNAPSE_CACHE_FACTOR_" + cache_name.upper()
factor = os.environ.get(env_var)
if factor:
return float(factor)
return CACHE_SIZE_FACTOR
caches_by_name = {}
collectors_by_name = {} # type: Dict
@ -44,6 +34,7 @@ cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"])
cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"])
cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"])
response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"])
@ -53,32 +44,17 @@ response_cache_evicted = Gauge(
response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"])
def register_cache(cache_type, cache_name, cache, collect_callback=None):
"""Register a cache object for metric collection.
Args:
cache_type (str):
cache_name (str): name of the cache
cache (object): cache itself
collect_callback (callable|None): if not None, a function which is called during
metric collection to update additional metrics.
Returns:
CacheMetric: an object which provides inc_{hits,misses,evictions} methods
"""
# Check if the metric is already registered. Unregister it, if so.
# This usually happens during tests, as at runtime these caches are
# effectively singletons.
metric_name = "cache_%s_%s" % (cache_type, cache_name)
if metric_name in collectors_by_name.keys():
REGISTRY.unregister(collectors_by_name[metric_name])
@attr.s
class CacheMetric(object):
hits = 0
misses = 0
evicted_size = 0
_cache = attr.ib()
_cache_type = attr.ib(type=str)
_cache_name = attr.ib(type=str)
_collect_callback = attr.ib(type=Optional[Callable])
hits = attr.ib(default=0)
misses = attr.ib(default=0)
evicted_size = attr.ib(default=0)
def inc_hits(self):
self.hits += 1
@ -94,26 +70,56 @@ def register_cache(cache_type, cache_name, cache, collect_callback=None):
def collect(self):
try:
if cache_type == "response_cache":
response_cache_size.labels(cache_name).set(len(cache))
response_cache_hits.labels(cache_name).set(self.hits)
response_cache_evicted.labels(cache_name).set(self.evicted_size)
response_cache_total.labels(cache_name).set(self.hits + self.misses)
if self._cache_type == "response_cache":
response_cache_size.labels(self._cache_name).set(len(self._cache))
response_cache_hits.labels(self._cache_name).set(self.hits)
response_cache_evicted.labels(self._cache_name).set(self.evicted_size)
response_cache_total.labels(self._cache_name).set(
self.hits + self.misses
)
else:
cache_size.labels(cache_name).set(len(cache))
cache_hits.labels(cache_name).set(self.hits)
cache_evicted.labels(cache_name).set(self.evicted_size)
cache_total.labels(cache_name).set(self.hits + self.misses)
if collect_callback:
collect_callback()
cache_size.labels(self._cache_name).set(len(self._cache))
cache_hits.labels(self._cache_name).set(self.hits)
cache_evicted.labels(self._cache_name).set(self.evicted_size)
cache_total.labels(self._cache_name).set(self.hits + self.misses)
if getattr(self._cache, "max_size", None):
cache_max_size.labels(self._cache_name).set(self._cache.max_size)
if self._collect_callback:
self._collect_callback()
except Exception as e:
logger.warning("Error calculating metrics for %s: %s", cache_name, e)
logger.warning("Error calculating metrics for %s: %s", self._cache_name, e)
raise
yield GaugeMetricFamily("__unused", "")
metric = CacheMetric()
REGISTRY.register(metric)
def register_cache(
cache_type: str,
cache_name: str,
cache,
collect_callback: Optional[Callable] = None,
resizable: bool = True,
resize_callback: Optional[Callable] = None,
) -> CacheMetric:
"""Register a cache object for metric collection and resizing.
Args:
cache_type
cache_name: name of the cache
cache: cache itself
collect_callback: If given, a function which is called during metric
collection to update additional metrics.
resizable: Whether this cache supports being resized.
resize_callback: A function which can be called to resize the cache.
Returns:
CacheMetric: an object which provides inc_{hits,misses,evictions} methods
"""
if resizable:
if not resize_callback:
resize_callback = getattr(cache, "set_cache_factor")
add_resizable_cache(cache_name, resize_callback)
metric = CacheMetric(cache, cache_type, cache_name, collect_callback)
metric_name = "cache_%s_%s" % (cache_type, cache_name)
caches_by_name[cache_name] = cache
collectors_by_name[metric_name] = metric
return metric

View File

@ -13,6 +13,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
import inspect
import logging
@ -30,7 +31,6 @@ from twisted.internet import defer
from synapse.logging.context import make_deferred_yieldable, preserve_fn
from synapse.util import unwrapFirstError
from synapse.util.async_helpers import ObservableDeferred
from synapse.util.caches import get_cache_factor_for
from synapse.util.caches.lrucache import LruCache
from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
@ -81,7 +81,6 @@ class CacheEntry(object):
class Cache(object):
__slots__ = (
"cache",
"max_entries",
"name",
"keylen",
"thread",
@ -89,7 +88,29 @@ class Cache(object):
"_pending_deferred_cache",
)
def __init__(self, name, max_entries=1000, keylen=1, tree=False, iterable=False):
def __init__(
self,
name: str,
max_entries: int = 1000,
keylen: int = 1,
tree: bool = False,
iterable: bool = False,
apply_cache_factor_from_config: bool = True,
):
"""
Args:
name: The name of the cache
max_entries: Maximum amount of entries that the cache will hold
keylen: The length of the tuple used as the cache key
tree: Use a TreeCache instead of a dict as the underlying cache type
iterable: If True, count each item in the cached object as an entry,
rather than each cached object
apply_cache_factor_from_config: Whether cache factors specified in the
config file affect `max_entries`
Returns:
Cache
"""
cache_type = TreeCache if tree else dict
self._pending_deferred_cache = cache_type()
@ -99,6 +120,7 @@ class Cache(object):
cache_type=cache_type,
size_callback=(lambda d: len(d)) if iterable else None,
evicted_callback=self._on_evicted,
apply_cache_factor_from_config=apply_cache_factor_from_config,
)
self.name = name
@ -111,6 +133,10 @@ class Cache(object):
collect_callback=self._metrics_collection_callback,
)
@property
def max_entries(self):
return self.cache.max_size
def _on_evicted(self, evicted_count):
self.metrics.inc_evictions(evicted_count)
@ -370,13 +396,11 @@ class CacheDescriptor(_CacheDescriptorBase):
cache_context=cache_context,
)
max_entries = int(max_entries * get_cache_factor_for(orig.__name__))
self.max_entries = max_entries
self.tree = tree
self.iterable = iterable
def __get__(self, obj, objtype=None):
def __get__(self, obj, owner):
cache = Cache(
name=self.orig.__name__,
max_entries=self.max_entries,

View File

@ -18,6 +18,7 @@ from collections import OrderedDict
from six import iteritems, itervalues
from synapse.config import cache as cache_config
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.util.caches import register_cache
@ -51,15 +52,16 @@ class ExpiringCache(object):
an item on access. Defaults to False.
iterable (bool): If true, the size is calculated by summing the
sizes of all entries, rather than the number of entries.
"""
self._cache_name = cache_name
self._original_max_size = max_len
self._max_size = int(max_len * cache_config.properties.default_factor_size)
self._clock = clock
self._max_len = max_len
self._expiry_ms = expiry_ms
self._reset_expiry_on_get = reset_expiry_on_get
self._cache = OrderedDict()
@ -82,9 +84,11 @@ class ExpiringCache(object):
def __setitem__(self, key, value):
now = self._clock.time_msec()
self._cache[key] = _CacheEntry(now, value)
self.evict()
def evict(self):
# Evict if there are now too many items
while self._max_len and len(self) > self._max_len:
while self._max_size and len(self) > self._max_size:
_key, value = self._cache.popitem(last=False)
if self.iterable:
self.metrics.inc_evictions(len(value.value))
@ -170,6 +174,23 @@ class ExpiringCache(object):
else:
return len(self._cache)
def set_cache_factor(self, factor: float) -> bool:
"""
Set the cache factor for this individual cache.
This will trigger a resize if it changes, which may require evicting
items from the cache.
Returns:
bool: Whether the cache changed size or not.
"""
new_size = int(self._original_max_size * factor)
if new_size != self._max_size:
self._max_size = new_size
self.evict()
return True
return False
class _CacheEntry(object):
__slots__ = ["time", "value"]

View File

@ -13,10 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import threading
from functools import wraps
from typing import Callable, Optional, Type, Union
from synapse.config import cache as cache_config
from synapse.util.caches.treecache import TreeCache
@ -52,17 +53,18 @@ class LruCache(object):
def __init__(
self,
max_size,
keylen=1,
cache_type=dict,
size_callback=None,
evicted_callback=None,
max_size: int,
keylen: int = 1,
cache_type: Type[Union[dict, TreeCache]] = dict,
size_callback: Optional[Callable] = None,
evicted_callback: Optional[Callable] = None,
apply_cache_factor_from_config: bool = True,
):
"""
Args:
max_size (int):
max_size: The maximum amount of entries the cache can hold
keylen (int):
keylen: The length of the tuple used as the cache key
cache_type (type):
type of underlying cache to be used. Typically one of dict
@ -73,9 +75,23 @@ class LruCache(object):
evicted_callback (func(int)|None):
if not None, called on eviction with the size of the evicted
entry
apply_cache_factor_from_config (bool): If true, `max_size` will be
multiplied by a cache factor derived from the homeserver config
"""
cache = cache_type()
self.cache = cache # Used for introspection.
# Save the original max size, and apply the default size factor.
self._original_max_size = max_size
# We previously didn't apply the cache factor here, and as such some caches were
# not affected by the global cache factor. Add an option here to disable applying
# the cache factor when a cache is created
if apply_cache_factor_from_config:
self.max_size = int(max_size * cache_config.properties.default_factor_size)
else:
self.max_size = int(max_size)
list_root = _Node(None, None, None, None)
list_root.next_node = list_root
list_root.prev_node = list_root
@ -83,7 +99,7 @@ class LruCache(object):
lock = threading.Lock()
def evict():
while cache_len() > max_size:
while cache_len() > self.max_size:
todelete = list_root.prev_node
evicted_len = delete_node(todelete)
cache.pop(todelete.key, None)
@ -236,6 +252,7 @@ class LruCache(object):
return key in cache
self.sentinel = object()
self._on_resize = evict
self.get = cache_get
self.set = cache_set
self.setdefault = cache_set_default
@ -266,3 +283,20 @@ class LruCache(object):
def __contains__(self, key):
return self.contains(key)
def set_cache_factor(self, factor: float) -> bool:
"""
Set the cache factor for this individual cache.
This will trigger a resize if it changes, which may require evicting
items from the cache.
Returns:
bool: Whether the cache changed size or not.
"""
new_size = int(self._original_max_size * factor)
if new_size != self.max_size:
self.max_size = new_size
self._on_resize()
return True
return False

View File

@ -38,7 +38,7 @@ class ResponseCache(object):
self.timeout_sec = timeout_ms / 1000.0
self._name = name
self._metrics = register_cache("response_cache", name, self)
self._metrics = register_cache("response_cache", name, self, resizable=False)
def size(self):
return len(self.pending_result_cache)

View File

@ -14,6 +14,7 @@
# limitations under the License.
import logging
import math
from typing import Dict, FrozenSet, List, Mapping, Optional, Set, Union
from six import integer_types
@ -46,7 +47,8 @@ class StreamChangeCache:
max_size=10000,
prefilled_cache: Optional[Mapping[EntityType, int]] = None,
):
self._max_size = int(max_size * caches.CACHE_SIZE_FACTOR)
self._original_max_size = max_size
self._max_size = math.floor(max_size)
self._entity_to_key = {} # type: Dict[EntityType, int]
# map from stream id to the a set of entities which changed at that stream id.
@ -58,12 +60,31 @@ class StreamChangeCache:
#
self._earliest_known_stream_pos = current_stream_pos
self.name = name
self.metrics = caches.register_cache("cache", self.name, self._cache)
self.metrics = caches.register_cache(
"cache", self.name, self._cache, resize_callback=self.set_cache_factor
)
if prefilled_cache:
for entity, stream_pos in prefilled_cache.items():
self.entity_has_changed(entity, stream_pos)
def set_cache_factor(self, factor: float) -> bool:
"""
Set the cache factor for this individual cache.
This will trigger a resize if it changes, which may require evicting
items from the cache.
Returns:
bool: Whether the cache changed size or not.
"""
new_size = math.floor(self._original_max_size * factor)
if new_size != self._max_size:
self.max_size = new_size
self._evict()
return True
return False
def has_entity_changed(self, entity: EntityType, stream_pos: int) -> bool:
"""Returns True if the entity may have been updated since stream_pos
"""
@ -171,6 +192,7 @@ class StreamChangeCache:
e1 = self._cache[stream_pos] = set()
e1.add(entity)
self._entity_to_key[entity] = stream_pos
self._evict()
# if the cache is too big, remove entries
while len(self._cache) > self._max_size:
@ -179,6 +201,13 @@ class StreamChangeCache:
for entity in r:
del self._entity_to_key[entity]
def _evict(self):
while len(self._cache) > self._max_size:
k, r = self._cache.popitem(0)
self._earliest_known_stream_pos = max(k, self._earliest_known_stream_pos)
for entity in r:
self._entity_to_key.pop(entity, None)
def get_max_pos_of_last_change(self, entity: EntityType) -> int:
"""Returns an upper bound of the stream id of the last change to an

View File

@ -38,7 +38,7 @@ class TTLCache(object):
self._timer = timer
self._metrics = register_cache("ttl", cache_name, self)
self._metrics = register_cache("ttl", cache_name, self, resizable=False)
def set(self, key, value, ttl):
"""Add/update an entry in the cache

127
tests/config/test_cache.py Normal file
View File

@ -0,0 +1,127 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from synapse.config._base import Config, RootConfig
from synapse.config.cache import CacheConfig, add_resizable_cache
from synapse.util.caches.lrucache import LruCache
from tests.unittest import TestCase
class FakeServer(Config):
section = "server"
class TestConfig(RootConfig):
config_classes = [FakeServer, CacheConfig]
class CacheConfigTests(TestCase):
def setUp(self):
# Reset caches before each test
TestConfig().caches.reset()
def test_individual_caches_from_environ(self):
"""
Individual cache factors will be loaded from the environment.
"""
config = {}
t = TestConfig()
t.caches._environ = {
"SYNAPSE_CACHE_FACTOR_SOMETHING_OR_OTHER": "2",
"SYNAPSE_NOT_CACHE": "BLAH",
}
t.read_config(config, config_dir_path="", data_dir_path="")
self.assertEqual(dict(t.caches.cache_factors), {"something_or_other": 2.0})
def test_config_overrides_environ(self):
"""
Individual cache factors defined in the environment will take precedence
over those in the config.
"""
config = {"caches": {"per_cache_factors": {"foo": 2, "bar": 3}}}
t = TestConfig()
t.caches._environ = {
"SYNAPSE_CACHE_FACTOR_SOMETHING_OR_OTHER": "2",
"SYNAPSE_CACHE_FACTOR_FOO": 1,
}
t.read_config(config, config_dir_path="", data_dir_path="")
self.assertEqual(
dict(t.caches.cache_factors),
{"foo": 1.0, "bar": 3.0, "something_or_other": 2.0},
)
def test_individual_instantiated_before_config_load(self):
"""
If a cache is instantiated before the config is read, it will be given
the default cache size in the interim, and then resized once the config
is loaded.
"""
cache = LruCache(100)
add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
self.assertEqual(cache.max_size, 50)
config = {"caches": {"per_cache_factors": {"foo": 3}}}
t = TestConfig()
t.read_config(config, config_dir_path="", data_dir_path="")
self.assertEqual(cache.max_size, 300)
def test_individual_instantiated_after_config_load(self):
"""
If a cache is instantiated after the config is read, it will be
immediately resized to the correct size given the per_cache_factor if
there is one.
"""
config = {"caches": {"per_cache_factors": {"foo": 2}}}
t = TestConfig()
t.read_config(config, config_dir_path="", data_dir_path="")
cache = LruCache(100)
add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
self.assertEqual(cache.max_size, 200)
def test_global_instantiated_before_config_load(self):
"""
If a cache is instantiated before the config is read, it will be given
the default cache size in the interim, and then resized to the new
default cache size once the config is loaded.
"""
cache = LruCache(100)
add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
self.assertEqual(cache.max_size, 50)
config = {"caches": {"global_factor": 4}}
t = TestConfig()
t.read_config(config, config_dir_path="", data_dir_path="")
self.assertEqual(cache.max_size, 400)
def test_global_instantiated_after_config_load(self):
"""
If a cache is instantiated after the config is read, it will be
immediately resized to the correct size given the global factor if there
is no per-cache factor.
"""
config = {"caches": {"global_factor": 1.5}}
t = TestConfig()
t.read_config(config, config_dir_path="", data_dir_path="")
cache = LruCache(100)
add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
self.assertEqual(cache.max_size, 150)

View File

@ -25,8 +25,8 @@ from synapse.util.caches.descriptors import Cache, cached
from tests import unittest
class CacheTestCase(unittest.TestCase):
def setUp(self):
class CacheTestCase(unittest.HomeserverTestCase):
def prepare(self, reactor, clock, homeserver):
self.cache = Cache("test")
def test_empty(self):
@ -96,7 +96,7 @@ class CacheTestCase(unittest.TestCase):
cache.get(3)
class CacheDecoratorTestCase(unittest.TestCase):
class CacheDecoratorTestCase(unittest.HomeserverTestCase):
@defer.inlineCallbacks
def test_passthrough(self):
class A(object):
@ -239,7 +239,7 @@ class CacheDecoratorTestCase(unittest.TestCase):
callcount2 = [0]
class A(object):
@cached(max_entries=4) # HACK: This makes it 2 due to cache factor
@cached(max_entries=2)
def func(self, key):
callcount[0] += 1
return key

View File

@ -43,7 +43,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase):
)
hs.config.app_service_config_files = self.as_yaml_files
hs.config.event_cache_size = 1
hs.config.caches.event_cache_size = 1
hs.config.password_providers = []
self.as_token = "token1"
@ -110,7 +110,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase):
)
hs.config.app_service_config_files = self.as_yaml_files
hs.config.event_cache_size = 1
hs.config.caches.event_cache_size = 1
hs.config.password_providers = []
self.as_list = [
@ -422,7 +422,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
)
hs.config.app_service_config_files = [f1, f2]
hs.config.event_cache_size = 1
hs.config.caches.event_cache_size = 1
hs.config.password_providers = []
database = hs.get_datastores().databases[0]
@ -440,7 +440,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
)
hs.config.app_service_config_files = [f1, f2]
hs.config.event_cache_size = 1
hs.config.caches.event_cache_size = 1
hs.config.password_providers = []
with self.assertRaises(ConfigError) as cm:
@ -464,7 +464,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
)
hs.config.app_service_config_files = [f1, f2]
hs.config.event_cache_size = 1
hs.config.caches.event_cache_size = 1
hs.config.password_providers = []
with self.assertRaises(ConfigError) as cm:

View File

@ -51,7 +51,8 @@ class SQLBaseStoreTestCase(unittest.TestCase):
config = Mock()
config._disable_native_upserts = True
config.event_cache_size = 1
config.caches = Mock()
config.caches.event_cache_size = 1
hs = TestHomeServer("test", config=config)
sqlite_config = {"name": "sqlite3"}

View File

@ -15,6 +15,7 @@
# limitations under the License.
from synapse.metrics import REGISTRY, InFlightGauge, generate_latest
from synapse.util.caches.descriptors import Cache
from tests import unittest
@ -129,3 +130,36 @@ class BuildInfoTests(unittest.TestCase):
self.assertTrue(b"osversion=" in items[0])
self.assertTrue(b"pythonversion=" in items[0])
self.assertTrue(b"version=" in items[0])
class CacheMetricsTests(unittest.HomeserverTestCase):
def test_cache_metric(self):
"""
Caches produce metrics reflecting their state when scraped.
"""
CACHE_NAME = "cache_metrics_test_fgjkbdfg"
cache = Cache(CACHE_NAME, max_entries=777)
items = {
x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")
for x in filter(
lambda x: b"cache_metrics_test_fgjkbdfg" in x,
generate_latest(REGISTRY).split(b"\n"),
)
}
self.assertEqual(items["synapse_util_caches_cache_size"], "0.0")
self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0")
cache.prefill("1", "hi")
items = {
x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")
for x in filter(
lambda x: b"cache_metrics_test_fgjkbdfg" in x,
generate_latest(REGISTRY).split(b"\n"),
)
}
self.assertEqual(items["synapse_util_caches_cache_size"], "1.0")
self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0")

View File

@ -21,7 +21,7 @@ from tests.utils import MockClock
from .. import unittest
class ExpiringCacheTestCase(unittest.TestCase):
class ExpiringCacheTestCase(unittest.HomeserverTestCase):
def test_get_set(self):
clock = MockClock()
cache = ExpiringCache("test", clock, max_len=1)

View File

@ -22,7 +22,7 @@ from synapse.util.caches.treecache import TreeCache
from .. import unittest
class LruCacheTestCase(unittest.TestCase):
class LruCacheTestCase(unittest.HomeserverTestCase):
def test_get_set(self):
cache = LruCache(1)
cache["key"] = "value"
@ -84,7 +84,7 @@ class LruCacheTestCase(unittest.TestCase):
self.assertEquals(len(cache), 0)
class LruCacheCallbacksTestCase(unittest.TestCase):
class LruCacheCallbacksTestCase(unittest.HomeserverTestCase):
def test_get(self):
m = Mock()
cache = LruCache(1)
@ -233,7 +233,7 @@ class LruCacheCallbacksTestCase(unittest.TestCase):
self.assertEquals(m3.call_count, 1)
class LruCacheSizedTestCase(unittest.TestCase):
class LruCacheSizedTestCase(unittest.HomeserverTestCase):
def test_evict(self):
cache = LruCache(5, size_callback=len)
cache["key1"] = [0]

View File

@ -1,11 +1,9 @@
from mock import patch
from synapse.util.caches.stream_change_cache import StreamChangeCache
from tests import unittest
class StreamChangeCacheTests(unittest.TestCase):
class StreamChangeCacheTests(unittest.HomeserverTestCase):
"""
Tests for StreamChangeCache.
"""
@ -54,7 +52,6 @@ class StreamChangeCacheTests(unittest.TestCase):
self.assertTrue(cache.has_entity_changed("user@foo.com", 0))
self.assertTrue(cache.has_entity_changed("not@here.website", 0))
@patch("synapse.util.caches.CACHE_SIZE_FACTOR", 1.0)
def test_entity_has_changed_pops_off_start(self):
"""
StreamChangeCache.entity_has_changed will respect the max size and

View File

@ -167,6 +167,7 @@ def default_config(name, parse=False):
# disable user directory updates, because they get done in the
# background, which upsets the test runner.
"update_user_directory": False,
"caches": {"global_factor": 1},
}
if parse: