2018-07-20 10:32:23 -04:00
|
|
|
#
|
2023-11-21 15:29:58 -05:00
|
|
|
# This file is licensed under the Affero General Public License (AGPL) version 3.
|
|
|
|
#
|
2024-01-23 06:26:48 -05:00
|
|
|
# Copyright 2014 - 2016 OpenMarket Ltd
|
2023-11-21 15:29:58 -05:00
|
|
|
# Copyright (C) 2023 New Vector, Ltd
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as
|
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# See the GNU Affero General Public License for more details:
|
|
|
|
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
|
|
|
#
|
|
|
|
# Originally licensed under the Apache License, Version 2.0:
|
|
|
|
# <http://www.apache.org/licenses/LICENSE-2.0>.
|
|
|
|
#
|
|
|
|
# [This file includes modifications made by New Vector Limited]
|
2018-07-20 10:32:23 -04:00
|
|
|
#
|
|
|
|
#
|
|
|
|
import logging
|
2023-09-15 09:37:44 -04:00
|
|
|
from typing import TYPE_CHECKING, List, Optional, Set, Tuple, cast
|
2021-09-20 08:56:23 -04:00
|
|
|
|
2018-07-20 10:32:23 -04:00
|
|
|
from twisted.python.failure import Failure
|
|
|
|
|
2023-01-27 07:27:55 -05:00
|
|
|
from synapse.api.constants import Direction, EventTypes, Membership
|
2018-07-20 10:32:23 -04:00
|
|
|
from synapse.api.errors import SynapseError
|
2020-08-28 16:47:11 -04:00
|
|
|
from synapse.api.filtering import Filter
|
2022-03-03 10:43:06 -05:00
|
|
|
from synapse.events.utils import SerializeEventConfig
|
2023-08-16 10:19:54 -04:00
|
|
|
from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME
|
2022-08-03 11:57:38 -04:00
|
|
|
from synapse.logging.opentracing import trace
|
2019-11-04 12:09:22 -05:00
|
|
|
from synapse.metrics.background_process_metrics import run_as_background_process
|
2022-09-07 05:54:44 -04:00
|
|
|
from synapse.rest.admin._base import assert_user_is_admin
|
2020-08-28 16:47:11 -04:00
|
|
|
from synapse.streams.config import PaginationConfig
|
2023-09-15 09:37:44 -04:00
|
|
|
from synapse.types import (
|
|
|
|
JsonDict,
|
|
|
|
JsonMapping,
|
|
|
|
Requester,
|
|
|
|
ScheduledTask,
|
|
|
|
StreamKeyType,
|
|
|
|
TaskStatus,
|
|
|
|
)
|
2024-06-10 16:03:50 -04:00
|
|
|
from synapse.types.handlers import ShutdownRoomParams, ShutdownRoomResponse
|
2022-12-12 11:19:30 -05:00
|
|
|
from synapse.types.state import StateFilter
|
2018-08-10 09:50:21 -04:00
|
|
|
from synapse.util.async_helpers import ReadWriteLock
|
2018-07-20 10:32:23 -04:00
|
|
|
from synapse.visibility import filter_events_for_client
|
|
|
|
|
2020-09-03 17:02:29 -04:00
|
|
|
if TYPE_CHECKING:
|
|
|
|
from synapse.server import HomeServer
|
|
|
|
|
|
|
|
|
2018-07-20 10:32:23 -04:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2023-06-13 13:31:08 -04:00
|
|
|
# How many single event gaps we tolerate returning in a `/messages` response before we
|
|
|
|
# backfill and try to fill in the history. This is an arbitrarily picked number so feel
|
|
|
|
# free to tune it in the future.
|
|
|
|
BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3
|
|
|
|
|
2018-07-20 10:32:23 -04:00
|
|
|
|
2023-08-16 10:19:54 -04:00
|
|
|
# This is used to avoid purging a room several time at the same moment,
|
|
|
|
# and also paginating during a purge. Pagination can trigger backfill,
|
|
|
|
# which would create old events locally, and would potentially clash with the room delete.
|
|
|
|
PURGE_PAGINATION_LOCK_NAME = "purge_pagination_lock"
|
2023-07-31 05:58:03 -04:00
|
|
|
|
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
PURGE_HISTORY_ACTION_NAME = "purge_history"
|
2021-11-12 07:35:31 -05:00
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
PURGE_ROOM_ACTION_NAME = "purge_room"
|
2021-11-12 07:35:31 -05:00
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME = "shutdown_and_purge_room"
|
2018-07-20 10:32:23 -04:00
|
|
|
|
|
|
|
|
2020-09-04 06:54:56 -04:00
|
|
|
class PaginationHandler:
|
2018-07-20 10:32:23 -04:00
|
|
|
"""Handles pagination and purge history requests.
|
|
|
|
|
|
|
|
These are in the same handler due to the fact we need to block clients
|
|
|
|
paginating during a purge.
|
|
|
|
"""
|
|
|
|
|
2020-09-03 17:02:29 -04:00
|
|
|
def __init__(self, hs: "HomeServer"):
|
2018-07-20 10:32:23 -04:00
|
|
|
self.hs = hs
|
|
|
|
self.auth = hs.get_auth()
|
2022-02-23 06:04:02 -05:00
|
|
|
self.store = hs.get_datastores().main
|
2022-05-31 08:17:50 -04:00
|
|
|
self._storage_controllers = hs.get_storage_controllers()
|
|
|
|
self._state_storage_controller = self._storage_controllers.state
|
2018-07-20 10:32:23 -04:00
|
|
|
self.clock = hs.get_clock()
|
2019-08-22 05:42:59 -04:00
|
|
|
self._server_name = hs.hostname
|
2021-11-12 07:35:31 -05:00
|
|
|
self._room_shutdown_handler = hs.get_room_shutdown_handler()
|
2022-03-18 13:49:32 -04:00
|
|
|
self._relations_handler = hs.get_relations_handler()
|
2023-07-31 05:58:03 -04:00
|
|
|
self._worker_locks = hs.get_worker_locks_handler()
|
2023-09-15 09:37:44 -04:00
|
|
|
self._task_scheduler = hs.get_task_scheduler()
|
2018-07-20 10:32:23 -04:00
|
|
|
|
|
|
|
self.pagination_lock = ReadWriteLock()
|
2021-11-12 07:35:31 -05:00
|
|
|
# IDs of rooms in which there currently an active purge *or delete* operation.
|
2021-07-16 13:22:36 -04:00
|
|
|
self._purges_in_progress_by_room: Set[str] = set()
|
2019-05-09 08:21:57 -04:00
|
|
|
self._event_serializer = hs.get_event_client_serializer()
|
2018-07-20 10:32:23 -04:00
|
|
|
|
2021-09-29 06:44:15 -04:00
|
|
|
self._retention_default_max_lifetime = (
|
2021-10-15 10:30:48 -04:00
|
|
|
hs.config.retention.retention_default_max_lifetime
|
2021-09-29 06:44:15 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
self._retention_allowed_lifetime_min = (
|
2021-10-15 10:30:48 -04:00
|
|
|
hs.config.retention.retention_allowed_lifetime_min
|
2021-09-29 06:44:15 -04:00
|
|
|
)
|
|
|
|
self._retention_allowed_lifetime_max = (
|
2021-10-15 10:30:48 -04:00
|
|
|
hs.config.retention.retention_allowed_lifetime_max
|
2021-09-29 06:44:15 -04:00
|
|
|
)
|
2023-09-15 09:37:44 -04:00
|
|
|
self._forgotten_room_retention_period = (
|
|
|
|
hs.config.server.forgotten_room_retention_period
|
|
|
|
)
|
2022-08-26 03:38:10 -04:00
|
|
|
self._is_master = hs.config.worker.worker_app is None
|
2021-09-29 06:44:15 -04:00
|
|
|
|
2022-08-26 03:38:10 -04:00
|
|
|
if hs.config.retention.retention_enabled and self._is_master:
|
2019-11-04 12:09:22 -05:00
|
|
|
# Run the purge jobs described in the configuration file.
|
2021-10-15 10:30:48 -04:00
|
|
|
for job in hs.config.retention.retention_purge_jobs:
|
2020-01-17 15:51:44 -05:00
|
|
|
logger.info("Setting up purge job with config: %s", job)
|
|
|
|
|
2019-11-04 12:09:22 -05:00
|
|
|
self.clock.looping_call(
|
|
|
|
run_as_background_process,
|
2021-09-28 09:24:40 -04:00
|
|
|
job.interval,
|
2019-11-04 12:09:22 -05:00
|
|
|
"purge_history_for_rooms_in_range",
|
|
|
|
self.purge_history_for_rooms_in_range,
|
2021-09-28 09:24:40 -04:00
|
|
|
job.shortest_max_lifetime,
|
|
|
|
job.longest_max_lifetime,
|
2019-11-04 12:09:22 -05:00
|
|
|
)
|
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
self._task_scheduler.register_action(
|
|
|
|
self._purge_history, PURGE_HISTORY_ACTION_NAME
|
|
|
|
)
|
|
|
|
self._task_scheduler.register_action(self._purge_room, PURGE_ROOM_ACTION_NAME)
|
|
|
|
self._task_scheduler.register_action(
|
|
|
|
self._shutdown_and_purge_room, SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME
|
|
|
|
)
|
|
|
|
|
2020-09-03 17:02:29 -04:00
|
|
|
async def purge_history_for_rooms_in_range(
|
|
|
|
self, min_ms: Optional[int], max_ms: Optional[int]
|
2021-09-20 08:56:23 -04:00
|
|
|
) -> None:
|
2019-11-04 12:09:22 -05:00
|
|
|
"""Purge outdated events from rooms within the given retention range.
|
|
|
|
|
|
|
|
If a default retention policy is defined in the server's configuration and its
|
|
|
|
'max_lifetime' is within this range, also targets rooms which don't have a
|
|
|
|
retention policy.
|
|
|
|
|
|
|
|
Args:
|
2020-09-03 17:02:29 -04:00
|
|
|
min_ms: Duration in milliseconds that define the lower limit of
|
2019-11-04 12:09:22 -05:00
|
|
|
the range to handle (exclusive). If None, it means that the range has no
|
|
|
|
lower limit.
|
2020-09-03 17:02:29 -04:00
|
|
|
max_ms: Duration in milliseconds that define the upper limit of
|
2019-11-04 12:09:22 -05:00
|
|
|
the range to handle (inclusive). If None, it means that the range has no
|
|
|
|
upper limit.
|
|
|
|
"""
|
2020-08-24 13:21:04 -04:00
|
|
|
# We want the storage layer to include rooms with no retention policy in its
|
2019-11-04 12:09:22 -05:00
|
|
|
# return value only if a default retention policy is defined in the server's
|
|
|
|
# configuration and that policy's 'max_lifetime' is either lower (or equal) than
|
|
|
|
# max_ms or higher than min_ms (or both).
|
|
|
|
if self._retention_default_max_lifetime is not None:
|
|
|
|
include_null = True
|
|
|
|
|
|
|
|
if min_ms is not None and min_ms >= self._retention_default_max_lifetime:
|
|
|
|
# The default max_lifetime is lower than (or equal to) min_ms.
|
|
|
|
include_null = False
|
|
|
|
|
|
|
|
if max_ms is not None and max_ms < self._retention_default_max_lifetime:
|
|
|
|
# The default max_lifetime is higher than max_ms.
|
|
|
|
include_null = False
|
|
|
|
else:
|
|
|
|
include_null = False
|
|
|
|
|
2020-01-17 15:51:44 -05:00
|
|
|
logger.info(
|
2023-09-15 09:37:44 -04:00
|
|
|
"[purge] Running retention purge job for %s < max_lifetime <= %s (include NULLs = %s)",
|
2020-01-17 15:51:44 -05:00
|
|
|
min_ms,
|
|
|
|
max_ms,
|
|
|
|
include_null,
|
|
|
|
)
|
|
|
|
|
2020-06-16 08:06:17 -04:00
|
|
|
rooms = await self.store.get_rooms_for_retention_period_in_range(
|
2019-11-04 12:09:22 -05:00
|
|
|
min_ms, max_ms, include_null
|
|
|
|
)
|
|
|
|
|
2020-01-17 15:51:44 -05:00
|
|
|
logger.debug("[purge] Rooms to purge: %s", rooms)
|
|
|
|
|
2020-06-15 07:03:36 -04:00
|
|
|
for room_id, retention_policy in rooms.items():
|
2020-01-17 15:51:44 -05:00
|
|
|
logger.info("[purge] Attempting to purge messages in room %s", room_id)
|
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
if len(await self.get_delete_tasks_by_room(room_id, only_active=True)) > 0:
|
2019-11-04 12:09:22 -05:00
|
|
|
logger.warning(
|
2023-09-15 09:37:44 -04:00
|
|
|
"[purge] not purging room %s for retention as there's an ongoing purge"
|
|
|
|
" running for this room",
|
2019-11-04 12:09:22 -05:00
|
|
|
room_id,
|
|
|
|
)
|
|
|
|
continue
|
|
|
|
|
2020-08-24 13:21:04 -04:00
|
|
|
# If max_lifetime is None, it means that the room has no retention policy.
|
|
|
|
# Given we only retrieve such rooms when there's a default retention policy
|
|
|
|
# defined in the server's configuration, we can safely assume that's the
|
|
|
|
# case and use it for this room.
|
|
|
|
max_lifetime = (
|
2022-05-23 13:18:23 -04:00
|
|
|
retention_policy.max_lifetime or self._retention_default_max_lifetime
|
2020-08-24 13:21:04 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
# Cap the effective max_lifetime to be within the range allowed in the
|
|
|
|
# config.
|
|
|
|
# We do this in two steps:
|
|
|
|
# 1. Make sure it's higher or equal to the minimum allowed value, and if
|
|
|
|
# it's not replace it with that value. This is because the server
|
|
|
|
# operator can be required to not delete information before a given
|
|
|
|
# time, e.g. to comply with freedom of information laws.
|
|
|
|
# 2. Make sure the resulting value is lower or equal to the maximum allowed
|
|
|
|
# value, and if it's not replace it with that value. This is because the
|
|
|
|
# server operator can be required to delete any data after a specific
|
|
|
|
# amount of time.
|
|
|
|
if self._retention_allowed_lifetime_min is not None:
|
|
|
|
max_lifetime = max(self._retention_allowed_lifetime_min, max_lifetime)
|
|
|
|
|
|
|
|
if self._retention_allowed_lifetime_max is not None:
|
|
|
|
max_lifetime = min(max_lifetime, self._retention_allowed_lifetime_max)
|
|
|
|
|
|
|
|
logger.debug("[purge] max_lifetime for room %s: %s", room_id, max_lifetime)
|
2019-11-04 12:09:22 -05:00
|
|
|
|
|
|
|
# Figure out what token we should start purging at.
|
|
|
|
ts = self.clock.time_msec() - max_lifetime
|
|
|
|
|
2020-06-16 08:06:17 -04:00
|
|
|
stream_ordering = await self.store.find_first_stream_ordering_after_ts(ts)
|
2019-11-04 12:09:22 -05:00
|
|
|
|
2020-06-16 08:06:17 -04:00
|
|
|
r = await self.store.get_room_event_before_stream_ordering(
|
2019-11-19 08:22:37 -05:00
|
|
|
room_id,
|
|
|
|
stream_ordering,
|
2019-11-04 12:09:22 -05:00
|
|
|
)
|
|
|
|
if not r:
|
|
|
|
logger.warning(
|
|
|
|
"[purge] purging events not possible: No event found "
|
|
|
|
"(ts %i => stream_ordering %i)",
|
2019-11-19 08:22:37 -05:00
|
|
|
ts,
|
|
|
|
stream_ordering,
|
2019-11-04 12:09:22 -05:00
|
|
|
)
|
|
|
|
continue
|
|
|
|
|
|
|
|
(stream, topo, _event_id) = r
|
|
|
|
token = "t%d-%d" % (topo, stream)
|
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
logger.info("Starting purging events in room %s", room_id)
|
2019-11-04 12:09:22 -05:00
|
|
|
|
|
|
|
# We want to purge everything, including local events, and to run the purge in
|
|
|
|
# the background so that it's not blocking any other operation apart from
|
|
|
|
# other purges in the same room.
|
|
|
|
run_as_background_process(
|
2023-09-15 09:37:44 -04:00
|
|
|
PURGE_HISTORY_ACTION_NAME,
|
|
|
|
self.purge_history,
|
2019-11-19 08:22:37 -05:00
|
|
|
room_id,
|
|
|
|
token,
|
|
|
|
True,
|
2019-11-04 12:09:22 -05:00
|
|
|
)
|
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
async def start_purge_history(
|
2020-09-03 17:02:29 -04:00
|
|
|
self, room_id: str, token: str, delete_local_events: bool = False
|
|
|
|
) -> str:
|
2018-07-20 10:32:23 -04:00
|
|
|
"""Start off a history purge on a room.
|
|
|
|
|
|
|
|
Args:
|
2020-09-03 17:02:29 -04:00
|
|
|
room_id: The room to purge from
|
|
|
|
token: topological token to delete events before
|
|
|
|
delete_local_events: True to delete local events as well as
|
2018-07-20 10:32:23 -04:00
|
|
|
remote ones
|
|
|
|
|
|
|
|
Returns:
|
2020-09-03 17:02:29 -04:00
|
|
|
unique ID for this purge transaction.
|
2018-07-20 10:32:23 -04:00
|
|
|
"""
|
2023-09-15 09:37:44 -04:00
|
|
|
purge_id = await self._task_scheduler.schedule_task(
|
|
|
|
PURGE_HISTORY_ACTION_NAME,
|
|
|
|
resource_id=room_id,
|
|
|
|
params={"token": token, "delete_local_events": delete_local_events},
|
|
|
|
)
|
2018-07-20 10:32:23 -04:00
|
|
|
|
|
|
|
# we log the purge_id here so that it can be tied back to the
|
|
|
|
# request id in the log lines.
|
|
|
|
logger.info("[purge] starting purge_id %s", purge_id)
|
|
|
|
|
|
|
|
return purge_id
|
|
|
|
|
2020-08-28 16:47:11 -04:00
|
|
|
async def _purge_history(
|
2023-09-15 09:37:44 -04:00
|
|
|
self,
|
|
|
|
task: ScheduledTask,
|
|
|
|
) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
|
|
|
|
"""
|
|
|
|
Scheduler action to purge some history of a room.
|
|
|
|
"""
|
|
|
|
if (
|
|
|
|
task.resource_id is None
|
|
|
|
or task.params is None
|
|
|
|
or "token" not in task.params
|
|
|
|
or "delete_local_events" not in task.params
|
|
|
|
):
|
|
|
|
return (
|
|
|
|
TaskStatus.FAILED,
|
|
|
|
None,
|
|
|
|
"Not enough parameters passed to _purge_history",
|
|
|
|
)
|
|
|
|
err = await self.purge_history(
|
|
|
|
task.resource_id,
|
|
|
|
task.params["token"],
|
|
|
|
task.params["delete_local_events"],
|
|
|
|
)
|
|
|
|
if err is not None:
|
|
|
|
return TaskStatus.FAILED, None, err
|
|
|
|
return TaskStatus.COMPLETE, None, None
|
|
|
|
|
|
|
|
async def purge_history(
|
|
|
|
self,
|
|
|
|
room_id: str,
|
|
|
|
token: str,
|
|
|
|
delete_local_events: bool,
|
|
|
|
) -> Optional[str]:
|
2018-07-20 10:32:23 -04:00
|
|
|
"""Carry out a history purge on a room.
|
|
|
|
|
|
|
|
Args:
|
2020-08-28 16:47:11 -04:00
|
|
|
room_id: The room to purge from
|
|
|
|
token: topological token to delete events before
|
|
|
|
delete_local_events: True to delete local events as well as remote ones
|
2018-07-20 10:32:23 -04:00
|
|
|
"""
|
|
|
|
try:
|
2023-07-31 05:58:03 -04:00
|
|
|
async with self._worker_locks.acquire_read_write_lock(
|
2023-08-16 10:19:54 -04:00
|
|
|
PURGE_PAGINATION_LOCK_NAME, room_id, write=True
|
2023-07-31 05:58:03 -04:00
|
|
|
):
|
2022-05-31 08:17:50 -04:00
|
|
|
await self._storage_controllers.purge_events.purge_history(
|
2019-10-30 11:12:49 -04:00
|
|
|
room_id, token, delete_local_events
|
|
|
|
)
|
2018-07-20 10:32:23 -04:00
|
|
|
logger.info("[purge] complete")
|
2023-09-15 09:37:44 -04:00
|
|
|
return None
|
2018-07-20 10:32:23 -04:00
|
|
|
except Exception:
|
2019-02-25 11:56:41 -05:00
|
|
|
f = Failure()
|
|
|
|
logger.error(
|
2023-05-31 07:18:29 -04:00
|
|
|
"[purge] failed", exc_info=(f.type, f.value, f.getTracebackObject())
|
2019-02-25 11:56:41 -05:00
|
|
|
)
|
2023-09-15 09:37:44 -04:00
|
|
|
return f.getErrorMessage()
|
2018-07-20 10:32:23 -04:00
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
async def get_delete_task(self, delete_id: str) -> Optional[ScheduledTask]:
|
2021-11-12 07:35:31 -05:00
|
|
|
"""Get the current status of an active deleting
|
|
|
|
|
|
|
|
Args:
|
|
|
|
delete_id: delete_id returned by start_shutdown_and_purge_room
|
2023-09-15 09:37:44 -04:00
|
|
|
or start_purge_history.
|
2021-11-12 07:35:31 -05:00
|
|
|
"""
|
2023-09-15 09:37:44 -04:00
|
|
|
return await self._task_scheduler.get_task(delete_id)
|
2021-11-12 07:35:31 -05:00
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
async def get_delete_tasks_by_room(
|
|
|
|
self, room_id: str, only_active: Optional[bool] = False
|
|
|
|
) -> List[ScheduledTask]:
|
|
|
|
"""Get complete, failed or active delete tasks by room
|
2021-11-12 07:35:31 -05:00
|
|
|
|
|
|
|
Args:
|
|
|
|
room_id: room_id that is deleted
|
2023-09-15 09:37:44 -04:00
|
|
|
only_active: if True, completed&failed tasks will be omitted
|
|
|
|
"""
|
|
|
|
statuses = [TaskStatus.ACTIVE]
|
|
|
|
if not only_active:
|
|
|
|
statuses += [TaskStatus.COMPLETE, TaskStatus.FAILED]
|
|
|
|
|
|
|
|
return await self._task_scheduler.get_tasks(
|
|
|
|
actions=[PURGE_ROOM_ACTION_NAME, SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME],
|
|
|
|
resource_id=room_id,
|
|
|
|
statuses=statuses,
|
|
|
|
)
|
|
|
|
|
|
|
|
async def _purge_room(
|
|
|
|
self,
|
|
|
|
task: ScheduledTask,
|
|
|
|
) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
|
|
|
|
"""
|
|
|
|
Scheduler action to purge a room.
|
2021-11-12 07:35:31 -05:00
|
|
|
"""
|
2023-09-15 09:37:44 -04:00
|
|
|
if not task.resource_id:
|
|
|
|
raise Exception("No room id passed to purge_room task")
|
|
|
|
params = task.params if task.params else {}
|
|
|
|
await self.purge_room(task.resource_id, params.get("force", False))
|
|
|
|
return TaskStatus.COMPLETE, None, None
|
2021-11-12 07:35:31 -05:00
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
async def purge_room(
|
|
|
|
self,
|
|
|
|
room_id: str,
|
|
|
|
force: bool,
|
|
|
|
) -> None:
|
2020-11-30 11:48:12 -05:00
|
|
|
"""Purge the given room from the database.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
room_id: room to be purged
|
|
|
|
force: set true to skip checking for joined users.
|
|
|
|
"""
|
2023-09-15 09:37:44 -04:00
|
|
|
logger.info("starting purge room_id=%s force=%s", room_id, force)
|
|
|
|
|
2023-07-31 05:58:03 -04:00
|
|
|
async with self._worker_locks.acquire_multi_read_write_lock(
|
2023-08-16 10:19:54 -04:00
|
|
|
[
|
|
|
|
(PURGE_PAGINATION_LOCK_NAME, room_id),
|
|
|
|
(NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id),
|
|
|
|
],
|
2023-07-31 05:58:03 -04:00
|
|
|
write=True,
|
|
|
|
):
|
2019-08-22 05:42:59 -04:00
|
|
|
# first check that we have no users in this room
|
2023-09-15 09:37:44 -04:00
|
|
|
joined = await self.store.is_host_joined(room_id, self._server_name)
|
|
|
|
if joined:
|
|
|
|
if force:
|
|
|
|
logger.info(
|
|
|
|
"force-purging room %s with some local users still joined",
|
|
|
|
room_id,
|
|
|
|
)
|
|
|
|
else:
|
2020-11-30 11:48:12 -05:00
|
|
|
raise SynapseError(400, "Users are still joined to this room")
|
2019-08-22 05:42:59 -04:00
|
|
|
|
2022-05-31 08:17:50 -04:00
|
|
|
await self._storage_controllers.purge_events.purge_room(room_id)
|
2019-08-22 05:42:59 -04:00
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
logger.info("purge complete for room_id %s", room_id)
|
|
|
|
|
2022-08-03 11:57:38 -04:00
|
|
|
@trace
|
2019-12-10 11:54:34 -05:00
|
|
|
async def get_messages(
|
2018-07-20 10:32:23 -04:00
|
|
|
self,
|
2020-08-28 16:47:11 -04:00
|
|
|
requester: Requester,
|
2020-09-03 17:02:29 -04:00
|
|
|
room_id: str,
|
|
|
|
pagin_config: PaginationConfig,
|
2020-08-28 16:47:11 -04:00
|
|
|
as_client_event: bool = True,
|
|
|
|
event_filter: Optional[Filter] = None,
|
2022-09-07 05:54:44 -04:00
|
|
|
use_admin_priviledge: bool = False,
|
2022-03-16 10:39:15 -04:00
|
|
|
) -> JsonDict:
|
2018-07-20 10:32:23 -04:00
|
|
|
"""Get messages in a room.
|
|
|
|
|
|
|
|
Args:
|
2020-08-28 16:47:11 -04:00
|
|
|
requester: The user requesting messages.
|
|
|
|
room_id: The room they want messages from.
|
|
|
|
pagin_config: The pagination config rules to apply, if any.
|
|
|
|
as_client_event: True to get events in client-server format.
|
|
|
|
event_filter: Filter to apply to results or None
|
2022-09-07 05:54:44 -04:00
|
|
|
use_admin_priviledge: if `True`, return all events, regardless
|
|
|
|
of whether `user` has access to them. To be used **ONLY**
|
|
|
|
from the admin API.
|
2022-03-16 10:39:15 -04:00
|
|
|
|
2018-07-20 10:32:23 -04:00
|
|
|
Returns:
|
2020-08-28 16:47:11 -04:00
|
|
|
Pagination API results
|
2018-07-20 10:32:23 -04:00
|
|
|
"""
|
2022-09-07 05:54:44 -04:00
|
|
|
if use_admin_priviledge:
|
|
|
|
await assert_user_is_admin(self.auth, requester)
|
|
|
|
|
2018-07-20 10:32:23 -04:00
|
|
|
user_id = requester.user.to_string()
|
|
|
|
|
|
|
|
if pagin_config.from_token:
|
2020-09-08 10:00:17 -04:00
|
|
|
from_token = pagin_config.from_token
|
2023-01-27 07:27:55 -05:00
|
|
|
elif pagin_config.direction == Direction.FORWARDS:
|
2022-11-24 04:10:51 -05:00
|
|
|
from_token = (
|
|
|
|
await self.hs.get_event_sources().get_start_token_for_pagination(
|
|
|
|
room_id
|
|
|
|
)
|
|
|
|
)
|
2018-07-20 10:32:23 -04:00
|
|
|
else:
|
2022-04-06 06:40:28 -04:00
|
|
|
from_token = (
|
|
|
|
await self.hs.get_event_sources().get_current_token_for_pagination(
|
|
|
|
room_id
|
|
|
|
)
|
|
|
|
)
|
|
|
|
# We expect `/messages` to use historic pagination tokens by default but
|
|
|
|
# `/messages` should still works with live tokens when manually provided.
|
2022-05-11 00:39:14 -04:00
|
|
|
assert from_token.room_key.topological is not None
|
2018-07-20 10:32:23 -04:00
|
|
|
|
2020-09-11 07:22:55 -04:00
|
|
|
room_token = from_token.room_key
|
2018-07-20 10:32:23 -04:00
|
|
|
|
2023-08-23 04:23:41 -04:00
|
|
|
(membership, member_event_id) = (None, None)
|
|
|
|
if not use_admin_priviledge:
|
|
|
|
(
|
|
|
|
membership,
|
|
|
|
member_event_id,
|
|
|
|
) = await self.auth.check_user_in_room_or_world_readable(
|
|
|
|
room_id, requester, allow_departed_users=True
|
|
|
|
)
|
|
|
|
|
|
|
|
if pagin_config.direction == Direction.BACKWARDS:
|
|
|
|
# if we're going backwards, we might need to backfill. This
|
|
|
|
# requires that we have a topo token.
|
|
|
|
if room_token.topological:
|
|
|
|
curr_topo = room_token.topological
|
|
|
|
else:
|
|
|
|
curr_topo = await self.store.get_current_topological_token(
|
|
|
|
room_id, room_token.stream
|
2022-09-07 05:54:44 -04:00
|
|
|
)
|
2018-07-20 10:32:23 -04:00
|
|
|
|
2023-08-23 04:23:41 -04:00
|
|
|
# If they have left the room then clamp the token to be before
|
|
|
|
# they left the room, to save the effort of loading from the
|
|
|
|
# database.
|
|
|
|
if (
|
|
|
|
pagin_config.direction == Direction.BACKWARDS
|
|
|
|
and not use_admin_priviledge
|
|
|
|
and membership == Membership.LEAVE
|
|
|
|
):
|
|
|
|
# This is only None if the room is world_readable, in which case
|
|
|
|
# "Membership.JOIN" would have been returned and we should never hit
|
|
|
|
# this branch.
|
|
|
|
assert member_event_id
|
2018-07-20 10:32:23 -04:00
|
|
|
|
2023-08-23 04:23:41 -04:00
|
|
|
leave_token = await self.store.get_topological_token_for_event(
|
|
|
|
member_event_id
|
|
|
|
)
|
|
|
|
assert leave_token.topological is not None
|
2023-06-13 13:31:08 -04:00
|
|
|
|
2023-08-23 04:23:41 -04:00
|
|
|
if leave_token.topological < curr_topo:
|
|
|
|
from_token = from_token.copy_and_replace(
|
|
|
|
StreamKeyType.ROOM, leave_token
|
2023-06-13 13:31:08 -04:00
|
|
|
)
|
2020-09-03 17:02:29 -04:00
|
|
|
|
2023-08-23 04:23:41 -04:00
|
|
|
to_room_key = None
|
|
|
|
if pagin_config.to_token:
|
|
|
|
to_room_key = pagin_config.to_token.room_key
|
|
|
|
|
|
|
|
# Initially fetch the events from the database. With any luck, we can return
|
|
|
|
# these without blocking on backfill (handled below).
|
2024-08-07 12:27:50 -04:00
|
|
|
events, next_key = (
|
|
|
|
await self.store.paginate_room_events_by_topological_ordering(
|
|
|
|
room_id=room_id,
|
|
|
|
from_key=from_token.room_key,
|
|
|
|
to_key=to_room_key,
|
|
|
|
direction=pagin_config.direction,
|
|
|
|
limit=pagin_config.limit,
|
|
|
|
event_filter=event_filter,
|
|
|
|
)
|
2023-08-23 04:23:41 -04:00
|
|
|
)
|
2018-07-20 10:32:23 -04:00
|
|
|
|
2023-08-23 04:23:41 -04:00
|
|
|
if pagin_config.direction == Direction.BACKWARDS:
|
|
|
|
# We use a `Set` because there can be multiple events at a given depth
|
|
|
|
# and we only care about looking at the unique continum of depths to
|
|
|
|
# find gaps.
|
|
|
|
event_depths: Set[int] = {event.depth for event in events}
|
|
|
|
sorted_event_depths = sorted(event_depths)
|
|
|
|
|
|
|
|
# Inspect the depths of the returned events to see if there are any gaps
|
|
|
|
found_big_gap = False
|
|
|
|
number_of_gaps = 0
|
|
|
|
previous_event_depth = (
|
|
|
|
sorted_event_depths[0] if len(sorted_event_depths) > 0 else 0
|
2018-07-20 10:32:23 -04:00
|
|
|
)
|
2023-08-23 04:23:41 -04:00
|
|
|
for event_depth in sorted_event_depths:
|
|
|
|
# We don't expect a negative depth but we'll just deal with it in
|
|
|
|
# any case by taking the absolute value to get the true gap between
|
|
|
|
# any two integers.
|
|
|
|
depth_gap = abs(event_depth - previous_event_depth)
|
|
|
|
# A `depth_gap` of 1 is a normal continuous chain to the next event
|
|
|
|
# (1 <-- 2 <-- 3) so anything larger indicates a missing event (it's
|
|
|
|
# also possible there is no event at a given depth but we can't ever
|
|
|
|
# know that for sure)
|
|
|
|
if depth_gap > 1:
|
|
|
|
number_of_gaps += 1
|
|
|
|
|
|
|
|
# We only tolerate a small number single-event long gaps in the
|
|
|
|
# returned events because those are most likely just events we've
|
|
|
|
# failed to pull in the past. Anything longer than that is probably
|
|
|
|
# a sign that we're missing a decent chunk of history and we should
|
|
|
|
# try to backfill it.
|
|
|
|
#
|
|
|
|
# XXX: It's possible we could tolerate longer gaps if we checked
|
|
|
|
# that a given events `prev_events` is one that has failed pull
|
|
|
|
# attempts and we could just treat it like a dead branch of history
|
|
|
|
# for now or at least something that we don't need the block the
|
|
|
|
# client on to try pulling.
|
|
|
|
#
|
|
|
|
# XXX: If we had something like MSC3871 to indicate gaps in the
|
|
|
|
# timeline to the client, we could also get away with any sized gap
|
|
|
|
# and just have the client refetch the holes as they see fit.
|
|
|
|
if depth_gap > 2:
|
|
|
|
found_big_gap = True
|
|
|
|
break
|
|
|
|
previous_event_depth = event_depth
|
|
|
|
|
|
|
|
# Backfill in the foreground if we found a big gap, have too many holes,
|
|
|
|
# or we don't have enough events to fill the limit that the client asked
|
|
|
|
# for.
|
|
|
|
missing_too_many_events = (
|
|
|
|
number_of_gaps > BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD
|
|
|
|
)
|
|
|
|
not_enough_events_to_fill_response = len(events) < pagin_config.limit
|
|
|
|
if (
|
|
|
|
found_big_gap
|
|
|
|
or missing_too_many_events
|
|
|
|
or not_enough_events_to_fill_response
|
|
|
|
):
|
|
|
|
did_backfill = await self.hs.get_federation_handler().maybe_backfill(
|
|
|
|
room_id,
|
|
|
|
curr_topo,
|
|
|
|
limit=pagin_config.limit,
|
2023-06-13 13:31:08 -04:00
|
|
|
)
|
|
|
|
|
2023-08-23 04:23:41 -04:00
|
|
|
# If we did backfill something, refetch the events from the database to
|
|
|
|
# catch anything new that might have been added since we last fetched.
|
|
|
|
if did_backfill:
|
2024-08-07 12:27:50 -04:00
|
|
|
events, next_key = (
|
|
|
|
await self.store.paginate_room_events_by_topological_ordering(
|
|
|
|
room_id=room_id,
|
|
|
|
from_key=from_token.room_key,
|
|
|
|
to_key=to_room_key,
|
|
|
|
direction=pagin_config.direction,
|
|
|
|
limit=pagin_config.limit,
|
|
|
|
event_filter=event_filter,
|
|
|
|
)
|
2023-06-13 13:31:08 -04:00
|
|
|
)
|
2023-08-23 04:23:41 -04:00
|
|
|
else:
|
|
|
|
# Otherwise, we can backfill in the background for eventual
|
|
|
|
# consistency's sake but we don't need to block the client waiting
|
|
|
|
# for a costly federation call and processing.
|
|
|
|
run_as_background_process(
|
|
|
|
"maybe_backfill_in_the_background",
|
|
|
|
self.hs.get_federation_handler().maybe_backfill,
|
|
|
|
room_id,
|
|
|
|
curr_topo,
|
|
|
|
limit=pagin_config.limit,
|
|
|
|
)
|
2023-06-13 13:31:08 -04:00
|
|
|
|
2023-08-23 04:23:41 -04:00
|
|
|
next_token = from_token.copy_and_replace(StreamKeyType.ROOM, next_key)
|
2018-07-20 10:32:23 -04:00
|
|
|
|
2022-05-30 16:03:52 -04:00
|
|
|
# if no events are returned from pagination, that implies
|
|
|
|
# we have reached the end of the available events.
|
|
|
|
# In that case we do not return end, to tell the client
|
|
|
|
# there is no need for further queries.
|
|
|
|
if not events:
|
|
|
|
return {
|
|
|
|
"chunk": [],
|
|
|
|
"start": await from_token.to_string(self.store),
|
|
|
|
}
|
2019-01-02 18:37:39 -05:00
|
|
|
|
2022-05-30 16:03:52 -04:00
|
|
|
if event_filter:
|
|
|
|
events = await event_filter.filter(events)
|
|
|
|
|
2022-09-07 05:54:44 -04:00
|
|
|
if not use_admin_priviledge:
|
|
|
|
events = await filter_events_for_client(
|
|
|
|
self._storage_controllers,
|
|
|
|
user_id,
|
|
|
|
events,
|
|
|
|
is_peeking=(member_event_id is None),
|
|
|
|
)
|
2019-01-02 18:37:39 -05:00
|
|
|
|
2022-05-30 16:03:52 -04:00
|
|
|
# if after the filter applied there are no more events
|
|
|
|
# return immediately - but there might be more in next_token batch
|
2018-07-20 10:32:23 -04:00
|
|
|
if not events:
|
2019-07-23 09:00:55 -04:00
|
|
|
return {
|
|
|
|
"chunk": [],
|
2020-09-30 15:29:19 -04:00
|
|
|
"start": await from_token.to_string(self.store),
|
|
|
|
"end": await next_token.to_string(self.store),
|
2019-07-23 09:00:55 -04:00
|
|
|
}
|
2018-07-20 10:32:23 -04:00
|
|
|
|
2018-08-16 09:22:47 -04:00
|
|
|
state = None
|
2021-10-27 11:26:30 -04:00
|
|
|
if event_filter and event_filter.lazy_load_members and len(events) > 0:
|
2018-08-16 09:22:47 -04:00
|
|
|
# TODO: remove redundant members
|
|
|
|
|
2018-10-25 12:49:55 -04:00
|
|
|
# FIXME: we also care about invite targets etc.
|
|
|
|
state_filter = StateFilter.from_types(
|
|
|
|
(EventTypes.Member, event.sender) for event in events
|
|
|
|
)
|
2018-08-16 09:22:47 -04:00
|
|
|
|
2022-05-31 08:17:50 -04:00
|
|
|
state_ids = await self._state_storage_controller.get_state_ids_for_event(
|
2018-10-25 12:49:55 -04:00
|
|
|
events[0].event_id, state_filter=state_filter
|
2018-08-16 09:22:47 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
if state_ids:
|
2020-09-03 17:02:29 -04:00
|
|
|
state_dict = await self.store.get_events(list(state_ids.values()))
|
|
|
|
state = state_dict.values()
|
2018-08-16 09:22:47 -04:00
|
|
|
|
2022-03-18 13:49:32 -04:00
|
|
|
aggregations = await self._relations_handler.get_bundled_aggregations(
|
|
|
|
events, user_id
|
|
|
|
)
|
2022-01-07 09:10:46 -05:00
|
|
|
|
2018-07-20 10:32:23 -04:00
|
|
|
time_now = self.clock.time_msec()
|
|
|
|
|
2023-03-06 11:08:39 -05:00
|
|
|
serialize_options = SerializeEventConfig(
|
|
|
|
as_client_event=as_client_event, requester=requester
|
|
|
|
)
|
2022-03-03 10:43:06 -05:00
|
|
|
|
2018-07-20 10:32:23 -04:00
|
|
|
chunk = {
|
2019-05-09 08:21:57 -04:00
|
|
|
"chunk": (
|
2023-10-27 05:04:08 -04:00
|
|
|
await self._event_serializer.serialize_events(
|
2021-12-20 14:14:38 -05:00
|
|
|
events,
|
|
|
|
time_now,
|
2022-03-03 10:43:06 -05:00
|
|
|
config=serialize_options,
|
2022-01-07 09:10:46 -05:00
|
|
|
bundle_aggregations=aggregations,
|
2019-05-09 08:21:57 -04:00
|
|
|
)
|
|
|
|
),
|
2020-09-30 15:29:19 -04:00
|
|
|
"start": await from_token.to_string(self.store),
|
|
|
|
"end": await next_token.to_string(self.store),
|
2018-07-20 10:32:23 -04:00
|
|
|
}
|
|
|
|
|
2018-08-16 09:22:47 -04:00
|
|
|
if state:
|
2023-10-27 05:04:08 -04:00
|
|
|
chunk["state"] = await self._event_serializer.serialize_events(
|
2022-03-03 10:43:06 -05:00
|
|
|
state, time_now, config=serialize_options
|
2019-05-09 08:21:57 -04:00
|
|
|
)
|
2018-08-16 09:22:47 -04:00
|
|
|
|
2019-07-23 09:00:55 -04:00
|
|
|
return chunk
|
2021-11-12 07:35:31 -05:00
|
|
|
|
|
|
|
async def _shutdown_and_purge_room(
|
|
|
|
self,
|
2023-09-15 09:37:44 -04:00
|
|
|
task: ScheduledTask,
|
|
|
|
) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
|
2021-11-12 07:35:31 -05:00
|
|
|
"""
|
2023-09-15 09:37:44 -04:00
|
|
|
Scheduler action to shutdown and purge a room.
|
2021-11-12 07:35:31 -05:00
|
|
|
"""
|
2023-09-15 09:37:44 -04:00
|
|
|
if task.resource_id is None or task.params is None:
|
|
|
|
raise Exception(
|
|
|
|
"No room id and/or no parameters passed to shutdown_and_purge_room task"
|
|
|
|
)
|
2021-11-12 07:35:31 -05:00
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
room_id = task.resource_id
|
2021-11-12 07:35:31 -05:00
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
async def update_result(result: Optional[JsonMapping]) -> None:
|
|
|
|
await self._task_scheduler.update_task(task.id, result=result)
|
2021-11-12 07:35:31 -05:00
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
shutdown_result = (
|
|
|
|
cast(ShutdownRoomResponse, task.result) if task.result else None
|
|
|
|
)
|
2021-11-12 07:35:31 -05:00
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
shutdown_result = await self._room_shutdown_handler.shutdown_room(
|
|
|
|
room_id,
|
|
|
|
cast(ShutdownRoomParams, task.params),
|
|
|
|
shutdown_result,
|
|
|
|
update_result,
|
|
|
|
)
|
2021-11-12 07:35:31 -05:00
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
if task.params.get("purge", False):
|
|
|
|
await self.purge_room(
|
|
|
|
room_id,
|
|
|
|
task.params.get("force_purge", False),
|
2021-11-12 07:35:31 -05:00
|
|
|
)
|
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
return (TaskStatus.COMPLETE, shutdown_result, None)
|
|
|
|
|
|
|
|
async def start_shutdown_and_purge_room(
|
2021-11-12 07:35:31 -05:00
|
|
|
self,
|
|
|
|
room_id: str,
|
2023-09-15 09:37:44 -04:00
|
|
|
shutdown_params: ShutdownRoomParams,
|
2021-11-12 07:35:31 -05:00
|
|
|
) -> str:
|
|
|
|
"""Start off shut down and purge on a room.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
room_id: The ID of the room to shut down.
|
2023-09-15 09:37:44 -04:00
|
|
|
shutdown_params: parameters for the shutdown
|
2021-11-12 07:35:31 -05:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
unique ID for this delete transaction.
|
|
|
|
"""
|
2023-09-15 09:37:44 -04:00
|
|
|
if len(await self.get_delete_tasks_by_room(room_id, only_active=True)) > 0:
|
|
|
|
raise SynapseError(400, "Purge already in progress for %s" % (room_id,))
|
2021-11-12 07:35:31 -05:00
|
|
|
|
|
|
|
# This check is double to `RoomShutdownHandler.shutdown_room`
|
|
|
|
# But here the requester get a direct response / error with HTTP request
|
|
|
|
# and do not have to check the purge status
|
2023-09-15 09:37:44 -04:00
|
|
|
new_room_user_id = shutdown_params["new_room_user_id"]
|
2021-11-12 07:35:31 -05:00
|
|
|
if new_room_user_id is not None:
|
|
|
|
if not self.hs.is_mine_id(new_room_user_id):
|
|
|
|
raise SynapseError(
|
|
|
|
400, "User must be our own: %s" % (new_room_user_id,)
|
|
|
|
)
|
|
|
|
|
2023-09-15 09:37:44 -04:00
|
|
|
delete_id = await self._task_scheduler.schedule_task(
|
|
|
|
SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME,
|
|
|
|
resource_id=room_id,
|
|
|
|
params=shutdown_params,
|
|
|
|
)
|
2021-11-12 07:35:31 -05:00
|
|
|
|
|
|
|
# we log the delete_id here so that it can be tied back to the
|
|
|
|
# request id in the log lines.
|
|
|
|
logger.info(
|
|
|
|
"starting shutdown room_id %s with delete_id %s",
|
|
|
|
room_id,
|
|
|
|
delete_id,
|
|
|
|
)
|
|
|
|
|
|
|
|
return delete_id
|