2020-05-11 13:45:23 -04:00
|
|
|
#
|
2023-11-21 15:29:58 -05:00
|
|
|
# This file is licensed under the Affero General Public License (AGPL) version 3.
|
|
|
|
#
|
|
|
|
# Copyright (C) 2023 New Vector, Ltd
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as
|
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# See the GNU Affero General Public License for more details:
|
|
|
|
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
|
|
|
#
|
|
|
|
# Originally licensed under the Apache License, Version 2.0:
|
|
|
|
# <http://www.apache.org/licenses/LICENSE-2.0>.
|
|
|
|
#
|
|
|
|
# [This file includes modifications made by New Vector Limited]
|
2020-05-11 13:45:23 -04:00
|
|
|
#
|
|
|
|
#
|
|
|
|
|
2022-02-11 14:05:20 -05:00
|
|
|
import logging
|
2020-05-11 13:45:23 -04:00
|
|
|
import os
|
2020-05-27 08:17:01 -04:00
|
|
|
import re
|
2020-06-10 13:27:49 -04:00
|
|
|
import threading
|
2022-12-16 08:53:28 -05:00
|
|
|
from typing import Any, Callable, Dict, Mapping, Optional
|
2020-05-11 13:45:23 -04:00
|
|
|
|
2021-12-01 07:28:23 -05:00
|
|
|
import attr
|
|
|
|
|
2022-04-11 12:07:23 -04:00
|
|
|
from synapse.types import JsonDict
|
2022-06-30 13:48:04 -04:00
|
|
|
from synapse.util.check_dependencies import check_requirements
|
2021-05-05 11:54:36 -04:00
|
|
|
|
2020-05-11 13:45:23 -04:00
|
|
|
from ._base import Config, ConfigError
|
|
|
|
|
2022-02-11 14:05:20 -05:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2020-05-11 13:45:23 -04:00
|
|
|
# The prefix for all cache factor-related environment variables
|
|
|
|
_CACHE_PREFIX = "SYNAPSE_CACHE_FACTOR"
|
2020-05-27 08:17:01 -04:00
|
|
|
|
|
|
|
# Map from canonicalised cache name to cache.
|
2021-07-15 06:02:43 -04:00
|
|
|
_CACHES: Dict[str, Callable[[float], None]] = {}
|
2020-05-27 08:17:01 -04:00
|
|
|
|
2020-06-10 13:27:49 -04:00
|
|
|
# a lock on the contents of _CACHES
|
|
|
|
_CACHES_LOCK = threading.Lock()
|
|
|
|
|
2020-05-11 13:45:23 -04:00
|
|
|
_DEFAULT_FACTOR_SIZE = 0.5
|
|
|
|
_DEFAULT_EVENT_CACHE_SIZE = "10K"
|
|
|
|
|
|
|
|
|
2021-12-01 07:28:23 -05:00
|
|
|
@attr.s(slots=True, auto_attribs=True)
|
2020-09-04 06:54:56 -04:00
|
|
|
class CacheProperties:
|
2021-12-01 07:28:23 -05:00
|
|
|
# The default factor size for all caches
|
|
|
|
default_factor_size: float = float(
|
|
|
|
os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE)
|
|
|
|
)
|
|
|
|
resize_all_caches_func: Optional[Callable[[], None]] = None
|
2020-05-11 13:45:23 -04:00
|
|
|
|
|
|
|
|
|
|
|
properties = CacheProperties()
|
|
|
|
|
|
|
|
|
2020-05-27 08:17:01 -04:00
|
|
|
def _canonicalise_cache_name(cache_name: str) -> str:
|
|
|
|
"""Gets the canonical form of the cache name.
|
|
|
|
|
|
|
|
Since we specify cache names in config and environment variables we need to
|
|
|
|
ignore case and special characters. For example, some caches have asterisks
|
2020-05-27 08:34:46 -04:00
|
|
|
in their name to denote that they're not attached to a particular database
|
2020-05-27 08:17:01 -04:00
|
|
|
function, and these asterisks need to be stripped out
|
|
|
|
"""
|
|
|
|
|
|
|
|
cache_name = re.sub(r"[^A-Za-z_1-9]", "", cache_name)
|
|
|
|
|
|
|
|
return cache_name.lower()
|
|
|
|
|
|
|
|
|
2021-03-26 12:49:46 -04:00
|
|
|
def add_resizable_cache(
|
|
|
|
cache_name: str, cache_resize_callback: Callable[[float], None]
|
2021-12-01 07:28:23 -05:00
|
|
|
) -> None:
|
2022-05-11 09:43:22 -04:00
|
|
|
"""Register a cache whose size can dynamically change
|
2020-05-11 13:45:23 -04:00
|
|
|
|
|
|
|
Args:
|
|
|
|
cache_name: A reference to the cache
|
2022-05-11 09:43:22 -04:00
|
|
|
cache_resize_callback: A callback function that will run whenever
|
2020-05-11 13:45:23 -04:00
|
|
|
the cache needs to be resized
|
|
|
|
"""
|
2020-05-27 08:17:01 -04:00
|
|
|
# Some caches have '*' in them which we strip out.
|
|
|
|
cache_name = _canonicalise_cache_name(cache_name)
|
|
|
|
|
2020-06-10 13:27:49 -04:00
|
|
|
# sometimes caches are initialised from background threads, so we need to make
|
|
|
|
# sure we don't conflict with another thread running a resize operation
|
|
|
|
with _CACHES_LOCK:
|
|
|
|
_CACHES[cache_name] = cache_resize_callback
|
2020-05-11 13:45:23 -04:00
|
|
|
|
|
|
|
# Ensure all loaded caches are sized appropriately
|
|
|
|
#
|
|
|
|
# This method should only run once the config has been read,
|
|
|
|
# as it uses values read from it
|
|
|
|
if properties.resize_all_caches_func:
|
|
|
|
properties.resize_all_caches_func()
|
|
|
|
|
|
|
|
|
|
|
|
class CacheConfig(Config):
|
|
|
|
section = "caches"
|
2022-12-16 08:53:28 -05:00
|
|
|
_environ: Mapping[str, str] = os.environ
|
2020-05-11 13:45:23 -04:00
|
|
|
|
2022-05-11 09:43:22 -04:00
|
|
|
event_cache_size: int
|
|
|
|
cache_factors: Dict[str, float]
|
|
|
|
global_factor: float
|
|
|
|
track_memory_usage: bool
|
|
|
|
expiry_time_msec: Optional[int]
|
|
|
|
sync_response_cache_duration: int
|
|
|
|
|
2020-05-11 13:45:23 -04:00
|
|
|
@staticmethod
|
2021-12-01 07:28:23 -05:00
|
|
|
def reset() -> None:
|
2020-05-11 13:45:23 -04:00
|
|
|
"""Resets the caches to their defaults. Used for tests."""
|
|
|
|
properties.default_factor_size = float(
|
|
|
|
os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE)
|
|
|
|
)
|
|
|
|
properties.resize_all_caches_func = None
|
2020-06-10 13:27:49 -04:00
|
|
|
with _CACHES_LOCK:
|
|
|
|
_CACHES.clear()
|
2020-05-11 13:45:23 -04:00
|
|
|
|
2022-04-11 12:07:23 -04:00
|
|
|
def read_config(self, config: JsonDict, **kwargs: Any) -> None:
|
2022-05-11 09:43:22 -04:00
|
|
|
"""Populate this config object with values from `config`.
|
|
|
|
|
|
|
|
This method does NOT resize existing or future caches: use `resize_all_caches`.
|
|
|
|
We use two separate methods so that we can reject bad config before applying it.
|
|
|
|
"""
|
2020-05-11 13:45:23 -04:00
|
|
|
self.event_cache_size = self.parse_size(
|
|
|
|
config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE)
|
|
|
|
)
|
2022-05-11 09:43:22 -04:00
|
|
|
self.cache_factors = {}
|
2020-05-11 13:45:23 -04:00
|
|
|
|
|
|
|
cache_config = config.get("caches") or {}
|
2022-05-11 09:43:22 -04:00
|
|
|
self.global_factor = cache_config.get("global_factor", _DEFAULT_FACTOR_SIZE)
|
Prefer `type(x) is int` to `isinstance(x, int)` (#14945)
* Perfer `type(x) is int` to `isinstance(x, int)`
This covered all additional instances I could see where `x` was
user-controlled.
The remaining cases are
```
$ rg -s 'isinstance.*[^_]int'
tests/replication/_base.py
576: if isinstance(obj, int):
synapse/util/caches/stream_change_cache.py
136: assert isinstance(stream_pos, int)
214: assert isinstance(stream_pos, int)
246: assert isinstance(stream_pos, int)
267: assert isinstance(stream_pos, int)
synapse/replication/tcp/external_cache.py
133: if isinstance(result, int):
synapse/metrics/__init__.py
100: if isinstance(calls, (int, float)):
synapse/handlers/appservice.py
262: assert isinstance(new_token, int)
synapse/config/_util.py
62: if isinstance(p, int):
```
which cover metrics, logic related to `jsonschema`, and replication and
data streams. AFAICS these are all internal to Synapse
* Changelog
2023-01-31 05:33:07 -05:00
|
|
|
if type(self.global_factor) not in (int, float):
|
2020-05-11 13:45:23 -04:00
|
|
|
raise ConfigError("caches.global_factor must be a number.")
|
|
|
|
|
|
|
|
# Load cache factors from the config
|
|
|
|
individual_factors = cache_config.get("per_cache_factors") or {}
|
|
|
|
if not isinstance(individual_factors, dict):
|
|
|
|
raise ConfigError("caches.per_cache_factors must be a dictionary")
|
|
|
|
|
2020-05-27 08:17:01 -04:00
|
|
|
# Canonicalise the cache names *before* updating with the environment
|
|
|
|
# variables.
|
|
|
|
individual_factors = {
|
|
|
|
_canonicalise_cache_name(key): val
|
|
|
|
for key, val in individual_factors.items()
|
|
|
|
}
|
|
|
|
|
2020-05-11 13:45:23 -04:00
|
|
|
# Override factors from environment if necessary
|
|
|
|
individual_factors.update(
|
|
|
|
{
|
2020-05-27 08:17:01 -04:00
|
|
|
_canonicalise_cache_name(key[len(_CACHE_PREFIX) + 1 :]): float(val)
|
2020-05-11 13:45:23 -04:00
|
|
|
for key, val in self._environ.items()
|
|
|
|
if key.startswith(_CACHE_PREFIX + "_")
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
for cache, factor in individual_factors.items():
|
Prefer `type(x) is int` to `isinstance(x, int)` (#14945)
* Perfer `type(x) is int` to `isinstance(x, int)`
This covered all additional instances I could see where `x` was
user-controlled.
The remaining cases are
```
$ rg -s 'isinstance.*[^_]int'
tests/replication/_base.py
576: if isinstance(obj, int):
synapse/util/caches/stream_change_cache.py
136: assert isinstance(stream_pos, int)
214: assert isinstance(stream_pos, int)
246: assert isinstance(stream_pos, int)
267: assert isinstance(stream_pos, int)
synapse/replication/tcp/external_cache.py
133: if isinstance(result, int):
synapse/metrics/__init__.py
100: if isinstance(calls, (int, float)):
synapse/handlers/appservice.py
262: assert isinstance(new_token, int)
synapse/config/_util.py
62: if isinstance(p, int):
```
which cover metrics, logic related to `jsonschema`, and replication and
data streams. AFAICS these are all internal to Synapse
* Changelog
2023-01-31 05:33:07 -05:00
|
|
|
if type(factor) not in (int, float):
|
2020-05-11 13:45:23 -04:00
|
|
|
raise ConfigError(
|
2020-05-27 08:17:01 -04:00
|
|
|
"caches.per_cache_factors.%s must be a number" % (cache,)
|
2020-05-11 13:45:23 -04:00
|
|
|
)
|
2020-05-27 08:17:01 -04:00
|
|
|
self.cache_factors[cache] = factor
|
2020-05-11 13:45:23 -04:00
|
|
|
|
2021-05-05 11:54:36 -04:00
|
|
|
self.track_memory_usage = cache_config.get("track_memory_usage", False)
|
|
|
|
if self.track_memory_usage:
|
2022-10-18 08:59:04 -04:00
|
|
|
check_requirements("cache-memory")
|
2021-05-05 11:54:36 -04:00
|
|
|
|
2022-02-11 14:05:20 -05:00
|
|
|
expire_caches = cache_config.get("expire_caches", True)
|
|
|
|
cache_entry_ttl = cache_config.get("cache_entry_ttl", "30m")
|
|
|
|
|
|
|
|
if expire_caches:
|
2022-05-11 09:43:22 -04:00
|
|
|
self.expiry_time_msec = self.parse_duration(cache_entry_ttl)
|
2021-07-05 11:32:12 -04:00
|
|
|
else:
|
|
|
|
self.expiry_time_msec = None
|
|
|
|
|
2022-02-11 14:05:20 -05:00
|
|
|
# Backwards compatibility support for the now-removed "expiry_time" config flag.
|
|
|
|
expiry_time = cache_config.get("expiry_time")
|
|
|
|
|
|
|
|
if expiry_time and expire_caches:
|
|
|
|
logger.warning(
|
|
|
|
"You have set two incompatible options, expiry_time and expire_caches. Please only use the "
|
|
|
|
"expire_caches and cache_entry_ttl options and delete the expiry_time option as it is "
|
|
|
|
"deprecated."
|
|
|
|
)
|
|
|
|
if expiry_time:
|
|
|
|
logger.warning(
|
|
|
|
"Expiry_time is a deprecated option, please use the expire_caches and cache_entry_ttl options "
|
|
|
|
"instead."
|
|
|
|
)
|
|
|
|
self.expiry_time_msec = self.parse_duration(expiry_time)
|
|
|
|
|
2022-05-13 15:32:39 -04:00
|
|
|
self.cache_autotuning = cache_config.get("cache_autotuning")
|
|
|
|
if self.cache_autotuning:
|
|
|
|
max_memory_usage = self.cache_autotuning.get("max_cache_memory_usage")
|
|
|
|
self.cache_autotuning["max_cache_memory_usage"] = self.parse_size(
|
|
|
|
max_memory_usage
|
|
|
|
)
|
|
|
|
|
|
|
|
target_mem_size = self.cache_autotuning.get("target_cache_memory_usage")
|
|
|
|
self.cache_autotuning["target_cache_memory_usage"] = self.parse_size(
|
|
|
|
target_mem_size
|
|
|
|
)
|
|
|
|
|
|
|
|
min_cache_ttl = self.cache_autotuning.get("min_cache_ttl")
|
|
|
|
self.cache_autotuning["min_cache_ttl"] = self.parse_duration(min_cache_ttl)
|
|
|
|
|
2021-08-03 09:45:04 -04:00
|
|
|
self.sync_response_cache_duration = self.parse_duration(
|
2022-06-15 12:55:20 -04:00
|
|
|
cache_config.get("sync_response_cache_duration", "2m")
|
2021-08-03 09:45:04 -04:00
|
|
|
)
|
|
|
|
|
2021-12-01 07:28:23 -05:00
|
|
|
def resize_all_caches(self) -> None:
|
2022-05-11 09:43:22 -04:00
|
|
|
"""Ensure all cache sizes are up-to-date.
|
2020-05-11 13:45:23 -04:00
|
|
|
|
|
|
|
For each cache, run the mapped callback function with either
|
|
|
|
a specific cache factor or the default, global one.
|
|
|
|
"""
|
2022-05-11 09:43:22 -04:00
|
|
|
# Set the global factor size, so that new caches are appropriately sized.
|
|
|
|
properties.default_factor_size = self.global_factor
|
|
|
|
|
|
|
|
# Store this function so that it can be called from other classes without
|
|
|
|
# needing an instance of CacheConfig
|
|
|
|
properties.resize_all_caches_func = self.resize_all_caches
|
|
|
|
|
2020-06-10 13:27:49 -04:00
|
|
|
# block other threads from modifying _CACHES while we iterate it.
|
|
|
|
with _CACHES_LOCK:
|
|
|
|
for cache_name, callback in _CACHES.items():
|
|
|
|
new_factor = self.cache_factors.get(cache_name, self.global_factor)
|
|
|
|
callback(new_factor)
|