mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-11-25 18:36:22 -05:00
Revert "Sort internal changes in changelog" Revert "Update CHANGES.md" Revert "1.49.0rc1" Revert "Revert "Move `glob_to_regex` and `re_word_boundary` to `matrix-python-common` (#11505) (#11527)" Revert "Refactors in `_generate_sync_entry_for_rooms` (#11515)" Revert "Correctly register shutdown handler for presence workers (#11518)" Revert "Fix `ModuleApi.looping_background_call` for non-async functions (#11524)" Revert "Fix 'delete room' admin api to work on incomplete rooms (#11523)" Revert "Correctly ignore invites from ignored users (#11511)" Revert "Fix the test breakage introduced by #11435 as a result of concurrent PRs (#11522)" Revert "Stabilise support for MSC2918 refresh tokens as they have now been merged into the Matrix specification. (#11435)" Revert "Save the OIDC session ID (sid) with the device on login (#11482)" Revert "Add admin API to get some information about federation status (#11407)" Revert "Include bundled aggregations in /sync and related fixes (#11478)" Revert "Move `glob_to_regex` and `re_word_boundary` to `matrix-python-common` (#11505)" Revert "Update backward extremity docs to make it clear that it does not indicate whether we have fetched an events' `prev_events` (#11469)" Revert "Support configuring the lifetime of non-refreshable access tokens separately to refreshable access tokens. (#11445)" Revert "Add type hints to `synapse/tests/rest/admin` (#11501)" Revert "Revert accidental commits to develop." Revert "Newsfile" Revert "Give `tests.server.setup_test_homeserver` (nominally!) the same behaviour" Revert "Move `tests.utils.setup_test_homeserver` to `tests.server`" Revert "Convert one of the `setup_test_homeserver`s to `make_test_homeserver_synchronous`" Revert "Disambiguate queries on `state_key` (#11497)" Revert "Comments on the /sync tentacles (#11494)" Revert "Clean up tests.storage.test_appservice (#11492)" Revert "Clean up `tests.storage.test_main` to remove use of legacy code. (#11493)" Revert "Clean up `tests.test_visibility` to remove legacy code. (#11495)" Revert "Minor cleanup on recently ported doc pages (#11466)" Revert "Add most of the missing type hints to `synapse.federation`. (#11483)" Revert "Avoid waiting for zombie processes in `synctl stop` (#11490)" Revert "Fix media repository failing when media store path contains symlinks (#11446)" Revert "Add type annotations to `tests.storage.test_appservice`. (#11488)" Revert "`scripts-dev/sign_json`: support for signing events (#11486)" Revert "Add MSC3030 experimental client and federation API endpoints to get the closest event to a given timestamp (#9445)" Revert "Port wiki pages to documentation website (#11402)" Revert "Add a license header and comment. (#11479)" Revert "Clean-up get_version_string (#11468)" Revert "Link background update controller docs to summary (#11475)" Revert "Additional type hints for config module. (#11465)" Revert "Register the login redirect endpoint for v3. (#11451)" Revert "Update openid.md" Revert "Remove mention of OIDC certification from Dex (#11470)" Revert "Add a note about huge pages to our Postgres doc (#11467)" Revert "Don't start Synapse master process if `worker_app` is set (#11416)" Revert "Expose worker & homeserver as entrypoints in `setup.py` (#11449)" Revert "Bundle relations of relations into the `/relations` result. (#11284)" Revert "Fix `LruCache` corruption bug with a `size_callback` that can return 0 (#11454)" Revert "Eliminate a few `Any`s in `LruCache` type hints (#11453)" Revert "Remove unnecessary `json.dumps` from `tests.rest.admin` (#11461)" Revert "Merge branch 'master' into develop" This reverts commit26b5d2320f. This reverts commitbce4220f38. This reverts commit966b5d0fa0. This reverts commit088d748f2c. This reverts commit14d593f72d. This reverts commit2a3ec6facf. This reverts commiteccc49d755. This reverts commitb1ecd19c5d. This reverts commit9c55dedc8c. This reverts commit2d42e586a8. This reverts commit2f053f3f82. This reverts commita15a893df8. This reverts commit8b4b153c9e. This reverts commit494ebd7347. This reverts commita77c369897. This reverts commit4eb77965cd. This reverts commit637df95de6. This reverts commite5f426cd54. This reverts commit8cd68b8102. This reverts commit6cae125e20. This reverts commit7be88fbf48. This reverts commitb3fd99b74a. This reverts commitf7ec6e7d9e. This reverts commit5640992d17. This reverts commitd26808dd85. This reverts commitf91624a595. This reverts commit16d39a5490. This reverts commit8a4c296987. This reverts commit49e1356ee3. This reverts commitd2279f471b. This reverts commitb50e39df57. This reverts commit858d80bf0f. This reverts commit435f044807. This reverts commitf61462e1be. This reverts commita6f1a3abec. This reverts commit84dc50e160. This reverts commited635d3285. This reverts commit7b62791e00. This reverts commit153194c771. This reverts commitf44d729d4c. This reverts commita265fbd397. This reverts commitb9fef1a7cd. This reverts commitb0eb64ff7b. This reverts commitf1795463bf. This reverts commit70cbb1a5e3. This reverts commit42bf020463. This reverts commit379f2650cf. This reverts commit7ff22d6da4. This reverts commit5a0b652d36. This reverts commit432a174bc1. This reverts commitb14f8a1baf, reversing changes made toe713855dca.
450 lines
16 KiB
Python
450 lines
16 KiB
Python
# Copyright 2020 The Matrix.org Foundation C.I.C.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import logging
|
|
from typing import Any, List, Set, Tuple
|
|
|
|
from synapse.api.errors import SynapseError
|
|
from synapse.storage.databases.main import CacheInvalidationWorkerStore
|
|
from synapse.storage.databases.main.state import StateGroupWorkerStore
|
|
from synapse.types import RoomStreamToken
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
|
|
async def purge_history(
|
|
self, room_id: str, token: str, delete_local_events: bool
|
|
) -> Set[int]:
|
|
"""Deletes room history before a certain point.
|
|
|
|
Note that only a single purge can occur at once, this is guaranteed via
|
|
a higher level (in the PaginationHandler).
|
|
|
|
Args:
|
|
room_id:
|
|
token: A topological token to delete events before
|
|
delete_local_events:
|
|
if True, we will delete local events as well as remote ones
|
|
(instead of just marking them as outliers and deleting their
|
|
state groups).
|
|
|
|
Returns:
|
|
The set of state groups that are referenced by deleted events.
|
|
"""
|
|
|
|
parsed_token = await RoomStreamToken.parse(self, token)
|
|
|
|
return await self.db_pool.runInteraction(
|
|
"purge_history",
|
|
self._purge_history_txn,
|
|
room_id,
|
|
parsed_token,
|
|
delete_local_events,
|
|
)
|
|
|
|
def _purge_history_txn(
|
|
self, txn, room_id: str, token: RoomStreamToken, delete_local_events: bool
|
|
) -> Set[int]:
|
|
# Tables that should be pruned:
|
|
# event_auth
|
|
# event_backward_extremities
|
|
# event_edges
|
|
# event_forward_extremities
|
|
# event_json
|
|
# event_push_actions
|
|
# event_reference_hashes
|
|
# event_relations
|
|
# event_search
|
|
# event_to_state_groups
|
|
# events
|
|
# rejections
|
|
# room_depth
|
|
# state_groups
|
|
# state_groups_state
|
|
# destination_rooms
|
|
|
|
# we will build a temporary table listing the events so that we don't
|
|
# have to keep shovelling the list back and forth across the
|
|
# connection. Annoyingly the python sqlite driver commits the
|
|
# transaction on CREATE, so let's do this first.
|
|
#
|
|
# furthermore, we might already have the table from a previous (failed)
|
|
# purge attempt, so let's drop the table first.
|
|
|
|
txn.execute("DROP TABLE IF EXISTS events_to_purge")
|
|
|
|
txn.execute(
|
|
"CREATE TEMPORARY TABLE events_to_purge ("
|
|
" event_id TEXT NOT NULL,"
|
|
" should_delete BOOLEAN NOT NULL"
|
|
")"
|
|
)
|
|
|
|
# First ensure that we're not about to delete all the forward extremeties
|
|
txn.execute(
|
|
"SELECT e.event_id, e.depth FROM events as e "
|
|
"INNER JOIN event_forward_extremities as f "
|
|
"ON e.event_id = f.event_id "
|
|
"AND e.room_id = f.room_id "
|
|
"WHERE f.room_id = ?",
|
|
(room_id,),
|
|
)
|
|
rows = txn.fetchall()
|
|
# if we already have no forwards extremities (for example because they were
|
|
# cleared out by the `delete_old_current_state_events` background database
|
|
# update), then we may as well carry on.
|
|
if rows:
|
|
max_depth = max(row[1] for row in rows)
|
|
|
|
if max_depth < token.topological:
|
|
# We need to ensure we don't delete all the events from the database
|
|
# otherwise we wouldn't be able to send any events (due to not
|
|
# having any backwards extremities)
|
|
raise SynapseError(
|
|
400, "topological_ordering is greater than forward extremities"
|
|
)
|
|
|
|
logger.info("[purge] looking for events to delete")
|
|
|
|
should_delete_expr = "state_key IS NULL"
|
|
should_delete_params: Tuple[Any, ...] = ()
|
|
if not delete_local_events:
|
|
should_delete_expr += " AND event_id NOT LIKE ?"
|
|
|
|
# We include the parameter twice since we use the expression twice
|
|
should_delete_params += ("%:" + self.hs.hostname, "%:" + self.hs.hostname)
|
|
|
|
should_delete_params += (room_id, token.topological)
|
|
|
|
# Note that we insert events that are outliers and aren't going to be
|
|
# deleted, as nothing will happen to them.
|
|
txn.execute(
|
|
"INSERT INTO events_to_purge"
|
|
" SELECT event_id, %s"
|
|
" FROM events AS e LEFT JOIN state_events USING (event_id)"
|
|
" WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?"
|
|
% (should_delete_expr, should_delete_expr),
|
|
should_delete_params,
|
|
)
|
|
|
|
# We create the indices *after* insertion as that's a lot faster.
|
|
|
|
# create an index on should_delete because later we'll be looking for
|
|
# the should_delete / shouldn't_delete subsets
|
|
txn.execute(
|
|
"CREATE INDEX events_to_purge_should_delete"
|
|
" ON events_to_purge(should_delete)"
|
|
)
|
|
|
|
# We do joins against events_to_purge for e.g. calculating state
|
|
# groups to purge, etc., so lets make an index.
|
|
txn.execute("CREATE INDEX events_to_purge_id ON events_to_purge(event_id)")
|
|
|
|
txn.execute("SELECT event_id, should_delete FROM events_to_purge")
|
|
event_rows = txn.fetchall()
|
|
logger.info(
|
|
"[purge] found %i events before cutoff, of which %i can be deleted",
|
|
len(event_rows),
|
|
sum(1 for e in event_rows if e[1]),
|
|
)
|
|
|
|
logger.info("[purge] Finding new backward extremities")
|
|
|
|
# We calculate the new entries for the backward extremities by finding
|
|
# events to be purged that are pointed to by events we're not going to
|
|
# purge.
|
|
txn.execute(
|
|
"SELECT DISTINCT e.event_id FROM events_to_purge AS e"
|
|
" INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id"
|
|
" LEFT JOIN events_to_purge AS ep2 ON ed.event_id = ep2.event_id"
|
|
" WHERE ep2.event_id IS NULL"
|
|
)
|
|
new_backwards_extrems = txn.fetchall()
|
|
|
|
logger.info("[purge] replacing backward extremities: %r", new_backwards_extrems)
|
|
|
|
txn.execute(
|
|
"DELETE FROM event_backward_extremities WHERE room_id = ?", (room_id,)
|
|
)
|
|
|
|
# Update backward extremeties
|
|
txn.execute_batch(
|
|
"INSERT INTO event_backward_extremities (room_id, event_id)"
|
|
" VALUES (?, ?)",
|
|
[(room_id, event_id) for event_id, in new_backwards_extrems],
|
|
)
|
|
|
|
logger.info("[purge] finding state groups referenced by deleted events")
|
|
|
|
# Get all state groups that are referenced by events that are to be
|
|
# deleted.
|
|
txn.execute(
|
|
"""
|
|
SELECT DISTINCT state_group FROM events_to_purge
|
|
INNER JOIN event_to_state_groups USING (event_id)
|
|
"""
|
|
)
|
|
|
|
referenced_state_groups = {sg for sg, in txn}
|
|
logger.info(
|
|
"[purge] found %i referenced state groups", len(referenced_state_groups)
|
|
)
|
|
|
|
logger.info("[purge] removing events from event_to_state_groups")
|
|
txn.execute(
|
|
"DELETE FROM event_to_state_groups "
|
|
"WHERE event_id IN (SELECT event_id from events_to_purge)"
|
|
)
|
|
|
|
# Delete all remote non-state events
|
|
for table in (
|
|
"events",
|
|
"event_json",
|
|
"event_auth",
|
|
"event_edges",
|
|
"event_forward_extremities",
|
|
"event_reference_hashes",
|
|
"event_relations",
|
|
"event_search",
|
|
"rejections",
|
|
"redactions",
|
|
):
|
|
logger.info("[purge] removing events from %s", table)
|
|
|
|
txn.execute(
|
|
"DELETE FROM %s WHERE event_id IN ("
|
|
" SELECT event_id FROM events_to_purge WHERE should_delete"
|
|
")" % (table,)
|
|
)
|
|
|
|
# event_push_actions lacks an index on event_id, and has one on
|
|
# (room_id, event_id) instead.
|
|
for table in ("event_push_actions",):
|
|
logger.info("[purge] removing events from %s", table)
|
|
|
|
txn.execute(
|
|
"DELETE FROM %s WHERE room_id = ? AND event_id IN ("
|
|
" SELECT event_id FROM events_to_purge WHERE should_delete"
|
|
")" % (table,),
|
|
(room_id,),
|
|
)
|
|
|
|
# Mark all state and own events as outliers
|
|
logger.info("[purge] marking remaining events as outliers")
|
|
txn.execute(
|
|
"UPDATE events SET outlier = ?"
|
|
" WHERE event_id IN ("
|
|
" SELECT event_id FROM events_to_purge "
|
|
" WHERE NOT should_delete"
|
|
")",
|
|
(True,),
|
|
)
|
|
|
|
# synapse tries to take out an exclusive lock on room_depth whenever it
|
|
# persists events (because upsert), and once we run this update, we
|
|
# will block that for the rest of our transaction.
|
|
#
|
|
# So, let's stick it at the end so that we don't block event
|
|
# persistence.
|
|
#
|
|
# We do this by calculating the minimum depth of the backwards
|
|
# extremities. However, the events in event_backward_extremities
|
|
# are ones we don't have yet so we need to look at the events that
|
|
# point to it via event_edges table.
|
|
txn.execute(
|
|
"""
|
|
SELECT COALESCE(MIN(depth), 0)
|
|
FROM event_backward_extremities AS eb
|
|
INNER JOIN event_edges AS eg ON eg.prev_event_id = eb.event_id
|
|
INNER JOIN events AS e ON e.event_id = eg.event_id
|
|
WHERE eb.room_id = ?
|
|
""",
|
|
(room_id,),
|
|
)
|
|
(min_depth,) = txn.fetchone()
|
|
|
|
logger.info("[purge] updating room_depth to %d", min_depth)
|
|
|
|
txn.execute(
|
|
"UPDATE room_depth SET min_depth = ? WHERE room_id = ?",
|
|
(min_depth, room_id),
|
|
)
|
|
|
|
# finally, drop the temp table. this will commit the txn in sqlite,
|
|
# so make sure to keep this actually last.
|
|
txn.execute("DROP TABLE events_to_purge")
|
|
|
|
for event_id, should_delete in event_rows:
|
|
self._invalidate_cache_and_stream(
|
|
txn, self._get_state_group_for_event, (event_id,)
|
|
)
|
|
|
|
# XXX: This is racy, since have_seen_events could be called between the
|
|
# transaction completing and the invalidation running. On the other hand,
|
|
# that's no different to calling `have_seen_events` just before the
|
|
# event is deleted from the database.
|
|
if should_delete:
|
|
self._invalidate_cache_and_stream(
|
|
txn, self.have_seen_event, (room_id, event_id)
|
|
)
|
|
self._invalidate_get_event_cache(event_id)
|
|
|
|
logger.info("[purge] done")
|
|
|
|
return referenced_state_groups
|
|
|
|
async def purge_room(self, room_id: str) -> List[int]:
|
|
"""Deletes all record of a room
|
|
|
|
Args:
|
|
room_id
|
|
|
|
Returns:
|
|
The list of state groups to delete.
|
|
"""
|
|
return await self.db_pool.runInteraction(
|
|
"purge_room", self._purge_room_txn, room_id
|
|
)
|
|
|
|
def _purge_room_txn(self, txn, room_id: str) -> List[int]:
|
|
# First we fetch all the state groups that should be deleted, before
|
|
# we delete that information.
|
|
txn.execute(
|
|
"""
|
|
SELECT DISTINCT state_group FROM events
|
|
INNER JOIN event_to_state_groups USING(event_id)
|
|
WHERE events.room_id = ?
|
|
""",
|
|
(room_id,),
|
|
)
|
|
|
|
state_groups = [row[0] for row in txn]
|
|
|
|
# Get all the auth chains that are referenced by events that are to be
|
|
# deleted.
|
|
txn.execute(
|
|
"""
|
|
SELECT chain_id, sequence_number FROM events
|
|
LEFT JOIN event_auth_chains USING (event_id)
|
|
WHERE room_id = ?
|
|
""",
|
|
(room_id,),
|
|
)
|
|
referenced_chain_id_tuples = list(txn)
|
|
|
|
logger.info("[purge] removing events from event_auth_chain_links")
|
|
txn.executemany(
|
|
"""
|
|
DELETE FROM event_auth_chain_links WHERE
|
|
origin_chain_id = ? AND origin_sequence_number = ?
|
|
""",
|
|
referenced_chain_id_tuples,
|
|
)
|
|
|
|
# Now we delete tables which lack an index on room_id but have one on event_id
|
|
for table in (
|
|
"event_auth",
|
|
"event_edges",
|
|
"event_json",
|
|
"event_push_actions_staging",
|
|
"event_reference_hashes",
|
|
"event_relations",
|
|
"event_to_state_groups",
|
|
"event_auth_chains",
|
|
"event_auth_chain_to_calculate",
|
|
"redactions",
|
|
"rejections",
|
|
"state_events",
|
|
):
|
|
logger.info("[purge] removing %s from %s", room_id, table)
|
|
|
|
txn.execute(
|
|
"""
|
|
DELETE FROM %s WHERE event_id IN (
|
|
SELECT event_id FROM events WHERE room_id=?
|
|
)
|
|
"""
|
|
% (table,),
|
|
(room_id,),
|
|
)
|
|
|
|
# and finally, the tables with an index on room_id (or no useful index)
|
|
for table in (
|
|
"current_state_events",
|
|
"destination_rooms",
|
|
"event_backward_extremities",
|
|
"event_forward_extremities",
|
|
"event_push_actions",
|
|
"event_search",
|
|
"events",
|
|
"group_rooms",
|
|
"public_room_list_stream",
|
|
"receipts_graph",
|
|
"receipts_linearized",
|
|
"room_aliases",
|
|
"room_depth",
|
|
"room_memberships",
|
|
"room_stats_state",
|
|
"room_stats_current",
|
|
"room_stats_earliest_token",
|
|
"rooms",
|
|
"stream_ordering_to_exterm",
|
|
"users_in_public_rooms",
|
|
"users_who_share_private_rooms",
|
|
# no useful index, but let's clear them anyway
|
|
"appservice_room_list",
|
|
"e2e_room_keys",
|
|
"event_push_summary",
|
|
"pusher_throttle",
|
|
"group_summary_rooms",
|
|
"room_account_data",
|
|
"room_tags",
|
|
"local_current_membership",
|
|
):
|
|
logger.info("[purge] removing %s from %s", room_id, table)
|
|
txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,))
|
|
|
|
# Other tables we do NOT need to clear out:
|
|
#
|
|
# - blocked_rooms
|
|
# This is important, to make sure that we don't accidentally rejoin a blocked
|
|
# room after it was purged
|
|
#
|
|
# - user_directory
|
|
# This has a room_id column, but it is unused
|
|
#
|
|
|
|
# Other tables that we might want to consider clearing out include:
|
|
#
|
|
# - event_reports
|
|
# Given that these are intended for abuse management my initial
|
|
# inclination is to leave them in place.
|
|
#
|
|
# - current_state_delta_stream
|
|
# - ex_outlier_stream
|
|
# - room_tags_revisions
|
|
# The problem with these is that they are largeish and there is no room_id
|
|
# index on them. In any case we should be clearing out 'stream' tables
|
|
# periodically anyway (#5888)
|
|
|
|
# TODO: we could probably usefully do a bunch more cache invalidation here
|
|
|
|
# XXX: as with purge_history, this is racy, but no worse than other races
|
|
# that already exist.
|
|
self._invalidate_cache_and_stream(txn, self.have_seen_event, (room_id,))
|
|
|
|
logger.info("[purge] done")
|
|
|
|
return state_groups
|