mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-05-03 01:14:48 -04:00
Make background updates controllable via a plugin (#11306)
Co-authored-by: Brendan Abolivier <babolivier@matrix.org>
This commit is contained in:
parent
9d1971a5c4
commit
d08ef6f155
12 changed files with 407 additions and 61 deletions
|
@ -12,12 +12,22 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Iterable, Optional
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
AsyncContextManager,
|
||||
Awaitable,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
Optional,
|
||||
)
|
||||
|
||||
import attr
|
||||
|
||||
from synapse.metrics.background_process_metrics import run_as_background_process
|
||||
from synapse.storage.types import Connection
|
||||
from synapse.types import JsonDict
|
||||
from synapse.util import json_encoder
|
||||
from synapse.util import Clock, json_encoder
|
||||
|
||||
from . import engines
|
||||
|
||||
|
@ -28,6 +38,45 @@ if TYPE_CHECKING:
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
ON_UPDATE_CALLBACK = Callable[[str, str, bool], AsyncContextManager[int]]
|
||||
DEFAULT_BATCH_SIZE_CALLBACK = Callable[[str, str], Awaitable[int]]
|
||||
MIN_BATCH_SIZE_CALLBACK = Callable[[str, str], Awaitable[int]]
|
||||
|
||||
|
||||
@attr.s(slots=True, frozen=True, auto_attribs=True)
|
||||
class _BackgroundUpdateHandler:
|
||||
"""A handler for a given background update.
|
||||
|
||||
Attributes:
|
||||
callback: The function to call to make progress on the background
|
||||
update.
|
||||
oneshot: Wether the update is likely to happen all in one go, ignoring
|
||||
the supplied target duration, e.g. index creation. This is used by
|
||||
the update controller to help correctly schedule the update.
|
||||
"""
|
||||
|
||||
callback: Callable[[JsonDict, int], Awaitable[int]]
|
||||
oneshot: bool = False
|
||||
|
||||
|
||||
class _BackgroundUpdateContextManager:
|
||||
BACKGROUND_UPDATE_INTERVAL_MS = 1000
|
||||
BACKGROUND_UPDATE_DURATION_MS = 100
|
||||
|
||||
def __init__(self, sleep: bool, clock: Clock):
|
||||
self._sleep = sleep
|
||||
self._clock = clock
|
||||
|
||||
async def __aenter__(self) -> int:
|
||||
if self._sleep:
|
||||
await self._clock.sleep(self.BACKGROUND_UPDATE_INTERVAL_MS / 1000)
|
||||
|
||||
return self.BACKGROUND_UPDATE_DURATION_MS
|
||||
|
||||
async def __aexit__(self, *exc) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class BackgroundUpdatePerformance:
|
||||
"""Tracks the how long a background update is taking to update its items"""
|
||||
|
||||
|
@ -84,20 +133,22 @@ class BackgroundUpdater:
|
|||
|
||||
MINIMUM_BACKGROUND_BATCH_SIZE = 1
|
||||
DEFAULT_BACKGROUND_BATCH_SIZE = 100
|
||||
BACKGROUND_UPDATE_INTERVAL_MS = 1000
|
||||
BACKGROUND_UPDATE_DURATION_MS = 100
|
||||
|
||||
def __init__(self, hs: "HomeServer", database: "DatabasePool"):
|
||||
self._clock = hs.get_clock()
|
||||
self.db_pool = database
|
||||
|
||||
self._database_name = database.name()
|
||||
|
||||
# if a background update is currently running, its name.
|
||||
self._current_background_update: Optional[str] = None
|
||||
|
||||
self._on_update_callback: Optional[ON_UPDATE_CALLBACK] = None
|
||||
self._default_batch_size_callback: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None
|
||||
self._min_batch_size_callback: Optional[MIN_BATCH_SIZE_CALLBACK] = None
|
||||
|
||||
self._background_update_performance: Dict[str, BackgroundUpdatePerformance] = {}
|
||||
self._background_update_handlers: Dict[
|
||||
str, Callable[[JsonDict, int], Awaitable[int]]
|
||||
] = {}
|
||||
self._background_update_handlers: Dict[str, _BackgroundUpdateHandler] = {}
|
||||
self._all_done = False
|
||||
|
||||
# Whether we're currently running updates
|
||||
|
@ -107,6 +158,83 @@ class BackgroundUpdater:
|
|||
# enable/disable background updates via the admin API.
|
||||
self.enabled = True
|
||||
|
||||
def register_update_controller_callbacks(
|
||||
self,
|
||||
on_update: ON_UPDATE_CALLBACK,
|
||||
default_batch_size: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None,
|
||||
min_batch_size: Optional[DEFAULT_BATCH_SIZE_CALLBACK] = None,
|
||||
) -> None:
|
||||
"""Register callbacks from a module for each hook."""
|
||||
if self._on_update_callback is not None:
|
||||
logger.warning(
|
||||
"More than one module tried to register callbacks for controlling"
|
||||
" background updates. Only the callbacks registered by the first module"
|
||||
" (in order of appearance in Synapse's configuration file) that tried to"
|
||||
" do so will be called."
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
self._on_update_callback = on_update
|
||||
|
||||
if default_batch_size is not None:
|
||||
self._default_batch_size_callback = default_batch_size
|
||||
|
||||
if min_batch_size is not None:
|
||||
self._min_batch_size_callback = min_batch_size
|
||||
|
||||
def _get_context_manager_for_update(
|
||||
self,
|
||||
sleep: bool,
|
||||
update_name: str,
|
||||
database_name: str,
|
||||
oneshot: bool,
|
||||
) -> AsyncContextManager[int]:
|
||||
"""Get a context manager to run a background update with.
|
||||
|
||||
If a module has registered a `update_handler` callback, use the context manager
|
||||
it returns.
|
||||
|
||||
Otherwise, returns a context manager that will return a default value, optionally
|
||||
sleeping if needed.
|
||||
|
||||
Args:
|
||||
sleep: Whether we can sleep between updates.
|
||||
update_name: The name of the update.
|
||||
database_name: The name of the database the update is being run on.
|
||||
oneshot: Whether the update will complete all in one go, e.g. index creation.
|
||||
In such cases the returned target duration is ignored.
|
||||
|
||||
Returns:
|
||||
The target duration in milliseconds that the background update should run for.
|
||||
|
||||
Note: this is a *target*, and an iteration may take substantially longer or
|
||||
shorter.
|
||||
"""
|
||||
if self._on_update_callback is not None:
|
||||
return self._on_update_callback(update_name, database_name, oneshot)
|
||||
|
||||
return _BackgroundUpdateContextManager(sleep, self._clock)
|
||||
|
||||
async def _default_batch_size(self, update_name: str, database_name: str) -> int:
|
||||
"""The batch size to use for the first iteration of a new background
|
||||
update.
|
||||
"""
|
||||
if self._default_batch_size_callback is not None:
|
||||
return await self._default_batch_size_callback(update_name, database_name)
|
||||
|
||||
return self.DEFAULT_BACKGROUND_BATCH_SIZE
|
||||
|
||||
async def _min_batch_size(self, update_name: str, database_name: str) -> int:
|
||||
"""A lower bound on the batch size of a new background update.
|
||||
|
||||
Used to ensure that progress is always made. Must be greater than 0.
|
||||
"""
|
||||
if self._min_batch_size_callback is not None:
|
||||
return await self._min_batch_size_callback(update_name, database_name)
|
||||
|
||||
return self.MINIMUM_BACKGROUND_BATCH_SIZE
|
||||
|
||||
def get_current_update(self) -> Optional[BackgroundUpdatePerformance]:
|
||||
"""Returns the current background update, if any."""
|
||||
|
||||
|
@ -135,13 +263,8 @@ class BackgroundUpdater:
|
|||
try:
|
||||
logger.info("Starting background schema updates")
|
||||
while self.enabled:
|
||||
if sleep:
|
||||
await self._clock.sleep(self.BACKGROUND_UPDATE_INTERVAL_MS / 1000.0)
|
||||
|
||||
try:
|
||||
result = await self.do_next_background_update(
|
||||
self.BACKGROUND_UPDATE_DURATION_MS
|
||||
)
|
||||
result = await self.do_next_background_update(sleep)
|
||||
except Exception:
|
||||
logger.exception("Error doing update")
|
||||
else:
|
||||
|
@ -203,13 +326,15 @@ class BackgroundUpdater:
|
|||
|
||||
return not update_exists
|
||||
|
||||
async def do_next_background_update(self, desired_duration_ms: float) -> bool:
|
||||
async def do_next_background_update(self, sleep: bool = True) -> bool:
|
||||
"""Does some amount of work on the next queued background update
|
||||
|
||||
Returns once some amount of work is done.
|
||||
|
||||
Args:
|
||||
desired_duration_ms: How long we want to spend updating.
|
||||
sleep: Whether to limit how quickly we run background updates or
|
||||
not.
|
||||
|
||||
Returns:
|
||||
True if we have finished running all the background updates, otherwise False
|
||||
"""
|
||||
|
@ -252,7 +377,19 @@ class BackgroundUpdater:
|
|||
|
||||
self._current_background_update = upd["update_name"]
|
||||
|
||||
await self._do_background_update(desired_duration_ms)
|
||||
# We have a background update to run, otherwise we would have returned
|
||||
# early.
|
||||
assert self._current_background_update is not None
|
||||
update_info = self._background_update_handlers[self._current_background_update]
|
||||
|
||||
async with self._get_context_manager_for_update(
|
||||
sleep=sleep,
|
||||
update_name=self._current_background_update,
|
||||
database_name=self._database_name,
|
||||
oneshot=update_info.oneshot,
|
||||
) as desired_duration_ms:
|
||||
await self._do_background_update(desired_duration_ms)
|
||||
|
||||
return False
|
||||
|
||||
async def _do_background_update(self, desired_duration_ms: float) -> int:
|
||||
|
@ -260,7 +397,7 @@ class BackgroundUpdater:
|
|||
update_name = self._current_background_update
|
||||
logger.info("Starting update batch on background update '%s'", update_name)
|
||||
|
||||
update_handler = self._background_update_handlers[update_name]
|
||||
update_handler = self._background_update_handlers[update_name].callback
|
||||
|
||||
performance = self._background_update_performance.get(update_name)
|
||||
|
||||
|
@ -273,9 +410,14 @@ class BackgroundUpdater:
|
|||
if items_per_ms is not None:
|
||||
batch_size = int(desired_duration_ms * items_per_ms)
|
||||
# Clamp the batch size so that we always make progress
|
||||
batch_size = max(batch_size, self.MINIMUM_BACKGROUND_BATCH_SIZE)
|
||||
batch_size = max(
|
||||
batch_size,
|
||||
await self._min_batch_size(update_name, self._database_name),
|
||||
)
|
||||
else:
|
||||
batch_size = self.DEFAULT_BACKGROUND_BATCH_SIZE
|
||||
batch_size = await self._default_batch_size(
|
||||
update_name, self._database_name
|
||||
)
|
||||
|
||||
progress_json = await self.db_pool.simple_select_one_onecol(
|
||||
"background_updates",
|
||||
|
@ -294,6 +436,8 @@ class BackgroundUpdater:
|
|||
|
||||
duration_ms = time_stop - time_start
|
||||
|
||||
performance.update(items_updated, duration_ms)
|
||||
|
||||
logger.info(
|
||||
"Running background update %r. Processed %r items in %rms."
|
||||
" (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r, batch_size=%r)",
|
||||
|
@ -306,8 +450,6 @@ class BackgroundUpdater:
|
|||
batch_size,
|
||||
)
|
||||
|
||||
performance.update(items_updated, duration_ms)
|
||||
|
||||
return len(self._background_update_performance)
|
||||
|
||||
def register_background_update_handler(
|
||||
|
@ -331,7 +473,9 @@ class BackgroundUpdater:
|
|||
update_name: The name of the update that this code handles.
|
||||
update_handler: The function that does the update.
|
||||
"""
|
||||
self._background_update_handlers[update_name] = update_handler
|
||||
self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
|
||||
update_handler
|
||||
)
|
||||
|
||||
def register_noop_background_update(self, update_name: str) -> None:
|
||||
"""Register a noop handler for a background update.
|
||||
|
@ -453,7 +597,9 @@ class BackgroundUpdater:
|
|||
await self._end_background_update(update_name)
|
||||
return 1
|
||||
|
||||
self.register_background_update_handler(update_name, updater)
|
||||
self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
|
||||
updater, oneshot=True
|
||||
)
|
||||
|
||||
async def _end_background_update(self, update_name: str) -> None:
|
||||
"""Removes a completed background update task from the queue.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue