2014-08-12 10:10:52 -04:00
|
|
|
#
|
2023-11-21 15:29:58 -05:00
|
|
|
# This file is licensed under the Affero General Public License (AGPL) version 3.
|
|
|
|
#
|
2024-01-23 06:26:48 -05:00
|
|
|
# Copyright 2019 The Matrix.org Foundation C.I.C.
|
|
|
|
# Copyright 2014-2016 OpenMarket Ltd
|
2023-11-21 15:29:58 -05:00
|
|
|
# Copyright (C) 2023 New Vector, Ltd
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as
|
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# See the GNU Affero General Public License for more details:
|
|
|
|
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
|
|
|
#
|
|
|
|
# Originally licensed under the Apache License, Version 2.0:
|
|
|
|
# <http://www.apache.org/licenses/LICENSE-2.0>.
|
|
|
|
#
|
|
|
|
# [This file includes modifications made by New Vector Limited]
|
2014-08-12 10:10:52 -04:00
|
|
|
#
|
|
|
|
#
|
|
|
|
import logging
|
2020-01-07 09:18:43 -05:00
|
|
|
from abc import ABCMeta
|
2022-09-21 09:32:01 -04:00
|
|
|
from typing import TYPE_CHECKING, Any, Collection, Dict, Iterable, Optional, Union
|
2014-08-12 10:10:52 -04:00
|
|
|
|
2021-12-13 12:05:00 -05:00
|
|
|
from synapse.storage.database import make_in_list_sql_clause # noqa: F401; noqa: F401
|
|
|
|
from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
|
2021-11-26 13:41:31 -05:00
|
|
|
from synapse.types import get_domain_from_id
|
2020-08-19 07:26:03 -04:00
|
|
|
from synapse.util import json_decoder
|
2022-09-21 09:32:01 -04:00
|
|
|
from synapse.util.caches.descriptors import CachedFunction
|
2019-06-19 16:04:58 -04:00
|
|
|
|
2020-12-30 08:09:53 -05:00
|
|
|
if TYPE_CHECKING:
|
2021-03-23 07:12:48 -04:00
|
|
|
from synapse.server import HomeServer
|
2020-12-30 08:09:53 -05:00
|
|
|
|
2014-08-12 10:10:52 -04:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2015-02-10 09:50:53 -05:00
|
|
|
|
2020-01-07 09:18:43 -05:00
|
|
|
# some of our subclasses have abstract methods, so we use the ABCMeta metaclass.
|
|
|
|
class SQLBaseStore(metaclass=ABCMeta):
|
2019-12-04 09:00:29 -05:00
|
|
|
"""Base class for data stores that holds helper functions.
|
|
|
|
|
|
|
|
Note that multiple instances of this class will exist as there will be one
|
|
|
|
per data store (and not one per physical database).
|
|
|
|
"""
|
|
|
|
|
2023-02-09 09:49:04 -05:00
|
|
|
db_pool: DatabasePool
|
|
|
|
|
2021-12-13 12:05:00 -05:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
database: DatabasePool,
|
|
|
|
db_conn: LoggingDatabaseConnection,
|
|
|
|
hs: "HomeServer",
|
|
|
|
):
|
2014-08-13 13:15:23 -04:00
|
|
|
self.hs = hs
|
2016-06-02 10:23:09 -04:00
|
|
|
self._clock = hs.get_clock()
|
2019-12-18 05:45:12 -05:00
|
|
|
self.database_engine = database.engine
|
2020-08-05 16:38:57 -04:00
|
|
|
self.db_pool = database
|
2019-05-28 11:47:42 -04:00
|
|
|
|
2022-09-21 09:32:01 -04:00
|
|
|
self.external_cached_functions: Dict[str, CachedFunction] = {}
|
|
|
|
|
2022-11-04 14:43:14 -04:00
|
|
|
def process_replication_rows( # noqa: B027 (no-op by design)
|
2020-12-30 08:09:53 -05:00
|
|
|
self,
|
|
|
|
stream_name: str,
|
|
|
|
instance_name: str,
|
2021-11-26 13:41:31 -05:00
|
|
|
token: int,
|
2020-12-30 08:09:53 -05:00
|
|
|
rows: Iterable[Any],
|
|
|
|
) -> None:
|
2023-01-04 06:49:26 -05:00
|
|
|
"""
|
|
|
|
Used by storage classes to invalidate caches based on incoming replication data. These
|
|
|
|
must not update any ID generators, use `process_replication_position`.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def process_replication_position( # noqa: B027 (no-op by design)
|
|
|
|
self,
|
|
|
|
stream_name: str,
|
|
|
|
instance_name: str,
|
|
|
|
token: int,
|
|
|
|
) -> None:
|
|
|
|
"""
|
|
|
|
Used by storage classes to advance ID generators based on incoming replication data. This
|
|
|
|
is called after process_replication_rows such that caches are invalidated before any token
|
|
|
|
positions advance.
|
|
|
|
"""
|
2020-05-07 08:51:08 -04:00
|
|
|
|
2020-12-30 08:09:53 -05:00
|
|
|
def _invalidate_state_caches(
|
2022-02-02 12:24:07 -05:00
|
|
|
self, room_id: str, members_changed: Collection[str]
|
2020-12-30 08:09:53 -05:00
|
|
|
) -> None:
|
2019-02-18 12:53:31 -05:00
|
|
|
"""Invalidates caches that are based on the current state, but does
|
|
|
|
not stream invalidations down replication.
|
|
|
|
|
|
|
|
Args:
|
2020-12-30 08:09:53 -05:00
|
|
|
room_id: Room where state changed
|
|
|
|
members_changed: The user_ids of members that have changed
|
2019-02-18 12:53:31 -05:00
|
|
|
"""
|
2023-06-08 08:14:40 -04:00
|
|
|
|
|
|
|
# XXX: If you add something to this function make sure you add it to
|
|
|
|
# `_invalidate_state_caches_all` as well.
|
|
|
|
|
2022-02-02 12:24:07 -05:00
|
|
|
# If there were any membership changes, purge the appropriate caches.
|
2020-02-21 07:15:07 -05:00
|
|
|
for host in {get_domain_from_id(u) for u in members_changed}:
|
2019-04-03 05:07:29 -04:00
|
|
|
self._attempt_to_invalidate_cache("is_host_joined", (room_id, host))
|
2023-06-08 08:14:40 -04:00
|
|
|
self._attempt_to_invalidate_cache("is_host_invited", (room_id, host))
|
2022-02-02 12:24:07 -05:00
|
|
|
if members_changed:
|
|
|
|
self._attempt_to_invalidate_cache("get_users_in_room", (room_id,))
|
2022-06-06 11:46:11 -04:00
|
|
|
self._attempt_to_invalidate_cache("get_current_hosts_in_room", (room_id,))
|
2022-02-02 12:24:07 -05:00
|
|
|
self._attempt_to_invalidate_cache(
|
|
|
|
"get_users_in_room_with_profiles", (room_id,)
|
|
|
|
)
|
2022-07-11 16:08:39 -04:00
|
|
|
self._attempt_to_invalidate_cache(
|
|
|
|
"get_number_joined_users_in_room", (room_id,)
|
|
|
|
)
|
|
|
|
self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
|
|
|
|
|
2022-07-25 05:21:06 -04:00
|
|
|
# There's no easy way of invalidating this cache for just the users
|
|
|
|
# that have changed, so we just clear the entire thing.
|
|
|
|
self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None)
|
|
|
|
|
2022-07-11 16:08:39 -04:00
|
|
|
for user_id in members_changed:
|
|
|
|
self._attempt_to_invalidate_cache(
|
|
|
|
"get_user_in_room_with_profile", (room_id, user_id)
|
|
|
|
)
|
2022-09-29 09:55:12 -04:00
|
|
|
self._attempt_to_invalidate_cache("get_rooms_for_user", (user_id,))
|
2024-07-19 11:19:15 -04:00
|
|
|
self._attempt_to_invalidate_cache(
|
|
|
|
"_get_rooms_for_local_user_where_membership_is_inner", (user_id,)
|
|
|
|
)
|
2019-02-22 09:38:14 -05:00
|
|
|
|
2022-02-02 12:24:07 -05:00
|
|
|
# Purge other caches based on room state.
|
2019-04-03 05:07:29 -04:00
|
|
|
self._attempt_to_invalidate_cache("get_room_summary", (room_id,))
|
2022-06-01 11:02:53 -04:00
|
|
|
self._attempt_to_invalidate_cache("get_partial_current_state_ids", (room_id,))
|
2019-02-22 09:38:14 -05:00
|
|
|
|
2023-06-08 08:14:40 -04:00
|
|
|
def _invalidate_state_caches_all(self, room_id: str) -> None:
|
|
|
|
"""Invalidates caches that are based on the current state, but does
|
|
|
|
not stream invalidations down replication.
|
|
|
|
|
|
|
|
Same as `_invalidate_state_caches`, except that works when we don't know
|
|
|
|
which memberships have changed.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
room_id: Room where state changed
|
|
|
|
"""
|
|
|
|
self._attempt_to_invalidate_cache("get_partial_current_state_ids", (room_id,))
|
|
|
|
self._attempt_to_invalidate_cache("get_users_in_room", (room_id,))
|
|
|
|
self._attempt_to_invalidate_cache("is_host_invited", None)
|
|
|
|
self._attempt_to_invalidate_cache("is_host_joined", None)
|
|
|
|
self._attempt_to_invalidate_cache("get_current_hosts_in_room", (room_id,))
|
|
|
|
self._attempt_to_invalidate_cache("get_users_in_room_with_profiles", (room_id,))
|
|
|
|
self._attempt_to_invalidate_cache("get_number_joined_users_in_room", (room_id,))
|
|
|
|
self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
|
|
|
|
self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None)
|
|
|
|
self._attempt_to_invalidate_cache("get_user_in_room_with_profile", None)
|
|
|
|
self._attempt_to_invalidate_cache("get_rooms_for_user", None)
|
2024-07-19 11:19:15 -04:00
|
|
|
self._attempt_to_invalidate_cache(
|
|
|
|
"_get_rooms_for_local_user_where_membership_is_inner", None
|
|
|
|
)
|
2023-06-08 08:14:40 -04:00
|
|
|
self._attempt_to_invalidate_cache("get_room_summary", (room_id,))
|
|
|
|
|
2020-01-22 05:37:00 -05:00
|
|
|
def _attempt_to_invalidate_cache(
|
|
|
|
self, cache_name: str, key: Optional[Collection[Any]]
|
2022-09-21 09:32:01 -04:00
|
|
|
) -> bool:
|
2019-02-22 09:38:14 -05:00
|
|
|
"""Attempts to invalidate the cache of the given name, ignoring if the
|
|
|
|
cache doesn't exist. Mainly used for invalidating caches on workers,
|
|
|
|
where they may not have the cache.
|
2019-02-18 12:53:31 -05:00
|
|
|
|
2022-07-19 07:25:29 -04:00
|
|
|
Note that this function does not invalidate any remote caches, only the
|
|
|
|
local in-memory ones. Any remote invalidation must be performed before
|
|
|
|
calling this.
|
|
|
|
|
2019-02-22 09:38:14 -05:00
|
|
|
Args:
|
2020-01-22 05:37:00 -05:00
|
|
|
cache_name
|
|
|
|
key: Entry to invalidate. If None then invalidates the entire
|
|
|
|
cache.
|
2019-02-22 09:38:14 -05:00
|
|
|
"""
|
2020-01-22 05:37:00 -05:00
|
|
|
|
2019-02-22 09:38:14 -05:00
|
|
|
try:
|
2020-10-19 07:20:29 -04:00
|
|
|
cache = getattr(self, cache_name)
|
2019-02-22 09:38:14 -05:00
|
|
|
except AttributeError:
|
2022-09-21 09:32:01 -04:00
|
|
|
# Check if an externally defined module cache has been registered
|
|
|
|
cache = self.external_cached_functions.get(cache_name)
|
|
|
|
if not cache:
|
|
|
|
# We probably haven't pulled in the cache in this worker,
|
|
|
|
# which is fine.
|
|
|
|
return False
|
2020-10-19 07:20:29 -04:00
|
|
|
|
|
|
|
if key is None:
|
|
|
|
cache.invalidate_all()
|
|
|
|
else:
|
2022-07-19 07:25:29 -04:00
|
|
|
# Prefer any local-only invalidation method. Invalidating any non-local
|
|
|
|
# cache must be be done before this.
|
|
|
|
invalidate_method = getattr(cache, "invalidate_local", cache.invalidate)
|
|
|
|
invalidate_method(tuple(key))
|
2019-02-18 12:53:31 -05:00
|
|
|
|
2022-09-21 09:32:01 -04:00
|
|
|
return True
|
|
|
|
|
|
|
|
def register_external_cached_function(
|
|
|
|
self, cache_name: str, func: CachedFunction
|
|
|
|
) -> None:
|
|
|
|
self.external_cached_functions[cache_name] = func
|
|
|
|
|
2018-08-30 10:19:58 -04:00
|
|
|
|
2020-12-30 08:09:53 -05:00
|
|
|
def db_to_json(db_content: Union[memoryview, bytes, bytearray, str]) -> Any:
|
2018-08-30 10:19:58 -04:00
|
|
|
"""
|
|
|
|
Take some data from a database row and return a JSON-decoded object.
|
|
|
|
|
|
|
|
Args:
|
2020-12-30 08:09:53 -05:00
|
|
|
db_content: The JSON-encoded contents from the database.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The object decoded from JSON.
|
2018-08-30 10:19:58 -04:00
|
|
|
"""
|
|
|
|
# psycopg2 on Python 3 returns memoryview objects, which we need to
|
|
|
|
# cast to bytes to decode
|
|
|
|
if isinstance(db_content, memoryview):
|
|
|
|
db_content = db_content.tobytes()
|
|
|
|
|
2020-08-19 07:26:03 -04:00
|
|
|
# Decode it to a Unicode string before feeding it to the JSON decoder, since
|
2021-04-27 08:13:07 -04:00
|
|
|
# it only supports handling strings
|
2018-08-30 10:19:58 -04:00
|
|
|
if isinstance(db_content, (bytes, bytearray)):
|
|
|
|
db_content = db_content.decode("utf8")
|
|
|
|
|
|
|
|
try:
|
2020-08-19 07:26:03 -04:00
|
|
|
return json_decoder.decode(db_content)
|
2018-08-30 10:19:58 -04:00
|
|
|
except Exception:
|
|
|
|
logging.warning("Tried to decode '%r' as JSON and failed", db_content)
|
|
|
|
raise
|