From 671540dccf3996620ffe65705904fb911e21fb68 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:27:08 +0000 Subject: [PATCH 1/5] rename delete_old_state -> purge_history (beacause it deletes more than state) --- synapse/handlers/message.py | 2 +- synapse/storage/events.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 21f1717dd..1c7860bb0 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -72,7 +72,7 @@ class MessageHandler(BaseHandler): depth = event.depth with (yield self.pagination_lock.write(room_id)): - yield self.store.delete_old_state(room_id, depth) + yield self.store.purge_history(room_id, depth) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7a9cd3ec9..21533970d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2031,16 +2031,16 @@ class EventsStore(SQLBaseStore): ) return self.runInteraction("get_all_new_events", get_all_new_events_txn) - def delete_old_state(self, room_id, topological_ordering): - return self.runInteraction( - "delete_old_state", - self._delete_old_state_txn, room_id, topological_ordering - ) - - def _delete_old_state_txn(self, txn, room_id, topological_ordering): - """Deletes old room state + def purge_history(self, room_id, topological_ordering): + """Deletes room history before a certain point """ + return self.runInteraction( + "purge_history", + self._purge_history_txn, room_id, topological_ordering + ) + + def _purge_history_txn(self, txn, room_id, topological_ordering): # Tables that should be pruned: # event_auth # event_backward_extremities From 61ffaa8137ac962f84a077bb53c4a1b06b21b49b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:34:35 +0000 Subject: [PATCH 2/5] bump purge logging to info this thing takes ages and the only sign of any progress is the logs, so having some logs is useful. --- synapse/storage/events.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 21533970d..803a4e247 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2081,7 +2081,7 @@ class EventsStore(SQLBaseStore): 400, "topological_ordering is greater than forward extremeties" ) - logger.debug("[purge] looking for events to delete") + logger.info("[purge] looking for events to delete") txn.execute( "SELECT event_id, state_key FROM events" @@ -2102,7 +2102,7 @@ class EventsStore(SQLBaseStore): for event_id, state_key in event_rows: txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) - logger.debug("[purge] Finding new backward extremities") + logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremeties by finding # all events that point to events that are to be purged @@ -2116,7 +2116,7 @@ class EventsStore(SQLBaseStore): ) new_backwards_extrems = txn.fetchall() - logger.debug("[purge] replacing backward extremities: %r", new_backwards_extrems) + logger.info("[purge] replacing backward extremities: %r", new_backwards_extrems) txn.execute( "DELETE FROM event_backward_extremities WHERE room_id = ?", @@ -2132,7 +2132,7 @@ class EventsStore(SQLBaseStore): ] ) - logger.debug("[purge] finding redundant state groups") + logger.info("[purge] finding redundant state groups") # Get all state groups that are only referenced by events that are # to be deleted. @@ -2149,15 +2149,15 @@ class EventsStore(SQLBaseStore): ) state_rows = txn.fetchall() - logger.debug("[purge] found %i redundant state groups", len(state_rows)) + logger.info("[purge] found %i redundant state groups", len(state_rows)) # make a set of the redundant state groups, so that we can look them up # efficiently state_groups_to_delete = set([sg for sg, in state_rows]) # Now we get all the state groups that rely on these state groups - logger.debug("[purge] finding state groups which depend on redundant" - " state groups") + logger.info("[purge] finding state groups which depend on redundant" + " state groups") remaining_state_groups = [] for i in xrange(0, len(state_rows), 100): chunk = [sg for sg, in state_rows[i:i + 100]] @@ -2182,7 +2182,7 @@ class EventsStore(SQLBaseStore): # Now we turn the state groups that reference to-be-deleted state # groups to non delta versions. for sg in remaining_state_groups: - logger.debug("[purge] de-delta-ing remaining state group %s", sg) + logger.info("[purge] de-delta-ing remaining state group %s", sg) curr_state = self._get_state_groups_from_groups_txn( txn, [sg], types=None ) @@ -2219,7 +2219,7 @@ class EventsStore(SQLBaseStore): ], ) - logger.debug("[purge] removing redundant state groups") + logger.info("[purge] removing redundant state groups") txn.executemany( "DELETE FROM state_groups_state WHERE state_group = ?", state_rows @@ -2230,13 +2230,13 @@ class EventsStore(SQLBaseStore): ) # Delete all non-state - logger.debug("[purge] removing events from event_to_state_groups") + logger.info("[purge] removing events from event_to_state_groups") txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", [(event_id,) for event_id, _ in event_rows] ) - logger.debug("[purge] updating room_depth") + logger.info("[purge] updating room_depth") txn.execute( "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", (topological_ordering, room_id,) @@ -2258,7 +2258,8 @@ class EventsStore(SQLBaseStore): "event_signatures", "rejections", ): - logger.debug("[purge] removing remote non-state events from %s", table) + logger.info("[purge] removing remote non-state events from %s", + table) txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), @@ -2266,7 +2267,7 @@ class EventsStore(SQLBaseStore): ) # Mark all state and own events as outliers - logger.debug("[purge] marking remaining events as outliers") + logger.info("[purge] marking remaining events as outliers") txn.executemany( "UPDATE events SET outlier = ?" " WHERE event_id = ?", From e571aef06d3b1af3946e790841f4b8a3a4cfdebf Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:40:29 +0000 Subject: [PATCH 3/5] purge: Move cache invalidation to more appropriate place it was a bit of a non-sequitur there --- synapse/storage/events.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 803a4e247..24d997830 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2099,9 +2099,6 @@ class EventsStore(SQLBaseStore): "[purge] found %i events before cutoff, of which %i are remote" " non-state events to delete", len(event_rows), len(to_delete)) - for event_id, state_key in event_rows: - txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) - logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremeties by finding @@ -2229,12 +2226,15 @@ class EventsStore(SQLBaseStore): state_rows ) - # Delete all non-state logger.info("[purge] removing events from event_to_state_groups") txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", [(event_id,) for event_id, _ in event_rows] ) + for event_id, _ in event_rows: + txn.call_after(self._get_state_group_for_event.invalidate, ( + event_id, + )) logger.info("[purge] updating room_depth") txn.execute( From 74fcbf741b3a7b95b5cc44478050e8a40fb7dc46 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 8 Feb 2018 18:44:52 +0000 Subject: [PATCH 4/5] delete_local_events for purge_history Add a flag which makes the purger delete local events --- docs/admin_api/purge_history_api.rst | 14 +++++++++-- synapse/handlers/message.py | 4 ++-- synapse/http/servlet.py | 18 +++++++++++--- synapse/rest/client/v1/admin.py | 11 ++++++++- synapse/storage/events.py | 35 ++++++++++++++++++++++------ 5 files changed, 67 insertions(+), 15 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index 08b330636..b4e5bd9d7 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -4,8 +4,6 @@ Purge History API The purge history API allows server admins to purge historic events from their database, reclaiming disk space. -**NB!** This will not delete local events (locally sent messages content etc) from the database, but will remove lots of the metadata about them and does dramatically reduce the on disk space usage - Depending on the amount of history being purged a call to the API may take several minutes or longer. During this period users will not be able to paginate further back in the room from the point being purged from. @@ -15,3 +13,15 @@ The API is simply: ``POST /_matrix/client/r0/admin/purge_history//`` including an ``access_token`` of a server admin. + +By default, events sent by local users are not deleted, as they may represent +the only copies of this content in existence. (Events sent by remote users are +deleted, and room state data before the cutoff is always removed). + +To delete local events as well, set ``delete_local_events`` in the body: + +.. code:: json + + { + "delete_local_events": True, + } diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1c7860bb0..276d1a772 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -63,7 +63,7 @@ class MessageHandler(BaseHandler): self.spam_checker = hs.get_spam_checker() @defer.inlineCallbacks - def purge_history(self, room_id, event_id): + def purge_history(self, room_id, event_id, delete_local_events=False): event = yield self.store.get_event(event_id) if event.room_id != room_id: @@ -72,7 +72,7 @@ class MessageHandler(BaseHandler): depth = event.depth with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history(room_id, depth) + yield self.store.purge_history(room_id, depth, delete_local_events) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 71420e54d..ef8e62901 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -148,11 +148,13 @@ def parse_string_from_args(args, name, default=None, required=False, return default -def parse_json_value_from_request(request): +def parse_json_value_from_request(request, allow_empty_body=False): """Parse a JSON value from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into None Returns: The JSON value. @@ -165,6 +167,9 @@ def parse_json_value_from_request(request): except Exception: raise SynapseError(400, "Error reading JSON content.") + if not content_bytes and allow_empty_body: + return None + try: content = simplejson.loads(content_bytes) except Exception as e: @@ -174,17 +179,24 @@ def parse_json_value_from_request(request): return content -def parse_json_object_from_request(request): +def parse_json_object_from_request(request, allow_empty_body=False): """Parse a JSON object from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into an empty dict. Raises: SynapseError if the request body couldn't be decoded as JSON or if it wasn't a JSON object. """ - content = parse_json_value_from_request(request) + content = parse_json_value_from_request( + request, allow_empty_body=allow_empty_body, + ) + + if allow_empty_body and content is None: + return {} if type(content) != dict: message = "Content must be a JSON object." diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 5022808ea..f954d2ea6 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -128,7 +128,16 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): if not is_admin: raise AuthError(403, "You are not a server admin") - yield self.handlers.message_handler.purge_history(room_id, event_id) + body = parse_json_object_from_request(request, allow_empty_body=True) + + delete_local_events = bool( + body.get("delete_local_history", False) + ) + + yield self.handlers.message_handler.purge_history( + room_id, event_id, + delete_local_events=delete_local_events, + ) defer.returnValue((200, {})) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 24d997830..11a2ff2d8 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2031,16 +2031,32 @@ class EventsStore(SQLBaseStore): ) return self.runInteraction("get_all_new_events", get_all_new_events_txn) - def purge_history(self, room_id, topological_ordering): + def purge_history( + self, room_id, topological_ordering, delete_local_events, + ): """Deletes room history before a certain point + + Args: + room_id (str): + + topological_ordering (int): + minimum topo ordering to preserve + + delete_local_events (bool): + if True, we will delete local events as well as remote ones + (instead of just marking them as outliers and deleting their + state groups). """ return self.runInteraction( "purge_history", - self._purge_history_txn, room_id, topological_ordering + self._purge_history_txn, room_id, topological_ordering, + delete_local_events, ) - def _purge_history_txn(self, txn, room_id, topological_ordering): + def _purge_history_txn( + self, txn, room_id, topological_ordering, delete_local_events, + ): # Tables that should be pruned: # event_auth # event_backward_extremities @@ -2093,11 +2109,14 @@ class EventsStore(SQLBaseStore): to_delete = [ (event_id,) for event_id, state_key in event_rows - if state_key is None and not self.hs.is_mine_id(event_id) + if state_key is None and ( + delete_local_events or not self.hs.is_mine_id(event_id) + ) ] logger.info( - "[purge] found %i events before cutoff, of which %i are remote" - " non-state events to delete", len(event_rows), len(to_delete)) + "[purge] found %i events before cutoff, of which %i can be deleted", + len(event_rows), len(to_delete), + ) logger.info("[purge] Finding new backward extremities") @@ -2273,7 +2292,9 @@ class EventsStore(SQLBaseStore): " WHERE event_id = ?", [ (True, event_id,) for event_id, state_key in event_rows - if state_key is not None or self.hs.is_mine_id(event_id) + if state_key is not None or ( + not delete_local_events and self.hs.is_mine_id(event_id) + ) ] ) From 39a6b3549638c70e3aaf51b361576fbd729eb655 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Feb 2018 12:13:34 +0000 Subject: [PATCH 5/5] purge: move room_depth update to end ... to avoid locking the table for too long --- synapse/storage/events.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 11a2ff2d8..238a2006b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2255,12 +2255,6 @@ class EventsStore(SQLBaseStore): event_id, )) - logger.info("[purge] updating room_depth") - txn.execute( - "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", - (topological_ordering, room_id,) - ) - # Delete all remote non-state events for table in ( "events", @@ -2298,6 +2292,18 @@ class EventsStore(SQLBaseStore): ] ) + # synapse tries to take out an exclusive lock on room_depth whenever it + # persists events (because upsert), and once we run this update, we + # will block that for the rest of our transaction. + # + # So, let's stick it at the end so that we don't block event + # persistence. + logger.info("[purge] updating room_depth") + txn.execute( + "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", + (topological_ordering, room_id,) + ) + logger.info("[purge] done") @defer.inlineCallbacks