From 053ecae4db7c51c7fa3d56f5ea2b1289d73fa10d Mon Sep 17 00:00:00 2001 From: Andrew Conrad Date: Tue, 14 Nov 2017 15:09:50 -0600 Subject: [PATCH 001/173] Mention SyTest in the README, after Development Signed-off-by: Andrew Conrad --- README.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.rst b/README.rst index 9da8c7f7a..cda0794e5 100644 --- a/README.rst +++ b/README.rst @@ -883,6 +883,17 @@ This should end with a 'PASSED' result:: PASSED (successes=143) +Running the Integration Tests +============================= + +Synapse is accompanied by `SyTest `_, +a Matrix homeserver integration testing suite, which uses HTTP requests to +access the API as a Matrix client would. It is able to run Synapse directly from +the source tree, so installation of the server is not required. + +Testing with SyTest is recommended for verifying that changes related to the +Client-Server API are functioning correctly. See the `installation instructions +`_ for details. Building Internal API Documentation =================================== From 4102468da98543ddb899857b67f5666ae4cfae92 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 16 Jan 2018 16:13:28 -0500 Subject: [PATCH 002/173] Note that Synapse is available in Fedora Signed-off-by: Jeremy Cline --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index ef1eed458..ef27b92dd 100644 --- a/README.rst +++ b/README.rst @@ -354,6 +354,10 @@ https://matrix.org/docs/projects/try-matrix-now.html (or build your own with one Fedora ------ +Synapse is in the Fedora repositories as ``matrix-synapse``:: + + sudo dnf install matrix-synapse + Oleg Girko provides Fedora RPMs at https://obs.infoserver.lv/project/monitor/matrix-synapse From 92c52df702654df094abb69c9aa24f302cdc6130 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Feb 2018 17:55:18 +0000 Subject: [PATCH 003/173] Make store_file use store_into_file --- synapse/rest/media/v1/media_storage.py | 37 ++++++++------------------ 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 3f8d4b9c2..83471b317 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -58,23 +58,13 @@ class MediaStorage(object): Returns: Deferred[str]: the file path written to in the primary media store """ - path = self._file_info_to_path(file_info) - fname = os.path.join(self.local_media_directory, path) - dirname = os.path.dirname(fname) - if not os.path.exists(dirname): - os.makedirs(dirname) - - # Write to the main repository - yield make_deferred_yieldable(threads.deferToThread( - _write_file_synchronously, source, fname, - )) - - # Tell the storage providers about the new file. They'll decide - # if they should upload it and whether to do so synchronously - # or not. - for provider in self.storage_providers: - yield provider.store_file(path, file_info) + with self.store_into_file(file_info) as (f, fname, finish_cb): + # Write to the main repository + yield make_deferred_yieldable(threads.deferToThread( + _write_file_synchronously, source, f, + )) + yield finish_cb() defer.returnValue(fname) @@ -240,21 +230,16 @@ class MediaStorage(object): ) -def _write_file_synchronously(source, fname): - """Write `source` to the path `fname` synchronously. Should be called +def _write_file_synchronously(source, dest): + """Write `source` to the file like `dest` synchronously. Should be called from a thread. Args: - source: A file like object to be written - fname (str): Path to write to + source: A file like object that's to be written + dest: A file like object to be written to """ - dirname = os.path.dirname(fname) - if not os.path.exists(dirname): - os.makedirs(dirname) - source.seek(0) # Ensure we read from the start of the file - with open(fname, "wb") as f: - shutil.copyfileobj(source, f) + shutil.copyfileobj(source, dest) class FileResponder(Responder): From 3acd616979d59c6ce6a63a4f896390d058bceacc Mon Sep 17 00:00:00 2001 From: Pascal Bach Date: Sun, 18 Feb 2018 11:08:15 +0100 Subject: [PATCH 004/173] Update pynacl dependency to 1.2.1 or higher Signed-off-by: Pascal Bach --- synapse/python_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 97b631e60..5d65b5fd6 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -24,7 +24,7 @@ REQUIREMENTS = { "unpaddedbase64>=1.1.0": ["unpaddedbase64>=1.1.0"], "canonicaljson>=1.0.0": ["canonicaljson>=1.0.0"], "signedjson>=1.0.0": ["signedjson>=1.0.0"], - "pynacl==0.3.0": ["nacl==0.3.0", "nacl.bindings"], + "pynacl>=1.2.1": ["nacl>=1.2.1", "nacl.bindings"], "service_identity>=1.0.0": ["service_identity>=1.0.0"], "Twisted>=16.0.0": ["twisted>=16.0.0"], "pyopenssl>=0.14": ["OpenSSL>=0.14"], From 6ff8c87484d13c00fddc87b0bcc3f4cd691c81ff Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 11:30:54 +0000 Subject: [PATCH 005/173] Batch inserts into event_push_actions_staging --- synapse/push/bulk_push_rule_evaluator.py | 15 ++++--- synapse/storage/event_push_actions.py | 53 ++++++++++++++++-------- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index bf4f1c583..64e9a1da5 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -144,6 +144,7 @@ class BulkPushRuleEvaluator(object): Deferred """ rules_by_user = yield self._get_rules_for_event(event, context) + actions_by_user = {} room_members = yield self.store.get_joined_users_from_context( event, context @@ -189,14 +190,16 @@ class BulkPushRuleEvaluator(object): if matches: actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: - # Push rules say we should notify the user of this event, - # so we mark it in the DB in the staging area. (This - # will then get handled when we persist the event) - yield self.store.add_push_actions_to_staging( - event.event_id, uid, actions, - ) + actions_by_user[uid] = actions break + # Push rules say we should notify the user of this event, + # so we mark it in the DB in the staging area. (This + # will then get handled when we persist the event) + yield self.store.add_push_actions_to_staging( + event.event_id, actions_by_user, + ) + def _condition_checker(evaluator, conditions, uid, display_name, cache): for cond in conditions: diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index f787431b7..04e8836e6 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -755,32 +755,51 @@ class EventPushActionsStore(SQLBaseStore): (rotate_to_stream_ordering,) ) - def add_push_actions_to_staging(self, event_id, user_id, actions): - """Add the push actions for the user and event to the push - action staging area. + def add_push_actions_to_staging(self, event_id, user_id_actions): + """Add the push actions for the event to the push action staging area. Args: event_id (str) - user_id (str) - actions (list[dict|str]): An action can either be a string or - dict. + user_id_actions (dict[str, list[dict|str])]): A dictionary mapping + user_id to list of push actions, where an action can either be + a string or dict. Returns: Deferred """ - is_highlight = 1 if _action_has_highlight(actions) else 0 + if not user_id_actions: + return - return self._simple_insert( - table="event_push_actions_staging", - values={ - "event_id": event_id, - "user_id": user_id, - "actions": _serialize_action(actions, is_highlight), - "notif": 1, - "highlight": is_highlight, - }, - desc="add_push_actions_to_staging", + # This is a helper function for generating the necessary tuple that + # can be used to inert into the `event_push_actions_staging` table. + def _gen_entry(user_id, actions): + is_highlight = 1 if _action_has_highlight(actions) else 0 + return ( + event_id, # event_id column + user_id, # user_id column + _serialize_action(actions, is_highlight), # actions column + 1, # notif column + is_highlight, # highlight column + ) + + def _add_push_actions_to_staging_txn(txn): + # We don't use _simple_insert_many here to avoid the overhead + # of generating lists of dicts. + + sql = """ + INSERT INTO event_push_actions_staging + (event_id, user_id, actions, notif, highlight) + VALUES (?, ?, ?, ?, ?) + """ + + txn.executemany(sql, ( + _gen_entry(user_id, actions) + for user_id, actions in user_id_actions.iteritems() + )) + + return self.runInteraction( + "add_push_actions_to_staging", _add_push_actions_to_staging_txn ) def remove_push_actions_from_staging(self, event_id): From d874d4f2d78956973d589bd32aa64d752d14cdc9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 11:36:56 +0000 Subject: [PATCH 006/173] Delete from push_actions_staging in federation too --- synapse/handlers/federation.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 46bcf8b08..8832ba58b 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1447,16 +1447,24 @@ class FederationHandler(BaseHandler): auth_events=auth_events, ) - if not event.internal_metadata.is_outlier() and not backfilled: - yield self.action_generator.handle_push_actions_for_event( - event, context - ) + try: + if not event.internal_metadata.is_outlier() and not backfilled: + yield self.action_generator.handle_push_actions_for_event( + event, context + ) - event_stream_id, max_stream_id = yield self.store.persist_event( - event, - context=context, - backfilled=backfilled, - ) + event_stream_id, max_stream_id = yield self.store.persist_event( + event, + context=context, + backfilled=backfilled, + ) + except: # noqa: E722, as we reraise the exception this is fine. + # Ensure that we actually remove the entries in the push actions + # staging area + logcontext.preserve_fn( + self.store.remove_push_actions_from_staging + )(event.event_id) + raise if not backfilled: # this intentionally does not yield: we don't care about the result From e440e2845642a27f15d073aea58ce20b2785f66e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 11:41:40 +0000 Subject: [PATCH 007/173] Fix unit tests --- tests/replication/slave/storage/test_events.py | 10 ++++++---- tests/storage/test_event_push_actions.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index 4780f2ab7..cb058d314 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -230,10 +230,12 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): state_handler = self.hs.get_state_handler() context = yield state_handler.compute_event_context(event) - for user_id, actions in push_actions: - yield self.master_store.add_push_actions_to_staging( - event.event_id, user_id, actions, - ) + yield self.master_store.add_push_actions_to_staging( + event.event_id, { + user_id: actions + for user_id, actions in push_actions + }, + ) ordering = None if backfill: diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index d483e7cf9..550f9ba66 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -71,7 +71,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): event.depth = stream yield self.store.add_push_actions_to_staging( - event.event_id, user_id, action, + event.event_id, {user_id: action}, ) yield self.store.runInteraction( "", self.store._set_push_actions_for_event_and_users_txn, From ad0ccf15ea35603c9453523acb3f43661fdbaa12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 12:29:50 +0000 Subject: [PATCH 008/173] Refactor _set_push_actions_for_event_and_users_txn to use events_and_contexts --- synapse/storage/event_push_actions.py | 65 +++++++++++++++------------ synapse/storage/events.py | 11 ++--- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index f787431b7..dac350548 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -88,11 +88,13 @@ class EventPushActionsStore(SQLBaseStore): self._rotate_notifs, 30 * 60 * 1000 ) - def _set_push_actions_for_event_and_users_txn(self, txn, event): - """ + def _set_push_actions_for_event_and_users_txn(self, txn, events_and_contexts): + """Handles moving push actions from staging table to main + event_push_actions table for all events in `events_and_contexts`. + Args: - event: the event set actions for - tuples: list of tuples of (user_id, actions) + events_and_contexts (list[(EventBase, EventContext)]): events + we are persisting """ sql = """ @@ -105,34 +107,39 @@ class EventPushActionsStore(SQLBaseStore): WHERE event_id = ? """ - txn.execute(sql, ( - event.room_id, event.internal_metadata.stream_ordering, - event.depth, event.event_id, - )) + if events_and_contexts: + txn.executemany(sql, ( + ( + event.room_id, event.internal_metadata.stream_ordering, + event.depth, event.event_id, + ) + for event, _ in events_and_contexts + )) - user_ids = self._simple_select_onecol_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - retcol="user_id", - ) - - self._simple_delete_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - ) - - for uid in user_ids: - txn.call_after( - self.get_unread_event_push_actions_by_room_for_user.invalidate_many, - (event.room_id, uid,) + for event, _ in events_and_contexts: + user_ids = self._simple_select_onecol_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + retcol="user_id", ) + for uid in user_ids: + txn.call_after( + self.get_unread_event_push_actions_by_room_for_user.invalidate_many, + (event.room_id, uid,) + ) + + txn.executemany( + "DELETE FROM event_push_actions_staging WHERE event_id = ?", + ( + (event.event_id,) + for event, _ in events_and_contexts + ) + ) + @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( self, room_id, user_id, last_read_event_id diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 73177e0bc..c8b8abc2e 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1162,16 +1162,17 @@ class EventsStore(SQLBaseStore): backfilled (bool): True if the events were backfilled """ + # Insert all the push actions into the event_push_actions table. + self._set_push_actions_for_event_and_users_txn( + txn, + events_and_contexts=events_and_contexts, + ) + if not events_and_contexts: # nothing to do here return for event, context in events_and_contexts: - # Insert all the push actions into the event_push_actions table. - self._set_push_actions_for_event_and_users_txn( - txn, event, - ) - if event.type == EventTypes.Redaction and event.redacts is not None: # Remove the entries in the event_push_actions table for the # redacted event. From 24087bffa932660cf4482b465b759f7161465f8a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 12:33:04 +0000 Subject: [PATCH 009/173] Ensure all push actions are deleted from staging --- synapse/storage/event_push_actions.py | 10 +++++++++- synapse/storage/events.py | 11 ++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index dac350548..6a122b05a 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -88,13 +88,21 @@ class EventPushActionsStore(SQLBaseStore): self._rotate_notifs, 30 * 60 * 1000 ) - def _set_push_actions_for_event_and_users_txn(self, txn, events_and_contexts): + def _set_push_actions_for_event_and_users_txn(self, txn, events_and_contexts, + all_events_and_contexts): """Handles moving push actions from staging table to main event_push_actions table for all events in `events_and_contexts`. + Also ensures that all events in `all_events_and_contexts` are removed + from the push action staging area. + Args: events_and_contexts (list[(EventBase, EventContext)]): events we are persisting + all_events_and_contexts (list[(EventBase, EventContext)]): all + events that we were going to persist. This includes events + we've already persisted, etc, that wouldn't appear in + events_and_context. """ sql = """ diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c8b8abc2e..7f8561a0c 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -693,6 +693,8 @@ class EventsStore(SQLBaseStore): list of the event ids which are the forward extremities. """ + all_events_and_contexts = events_and_contexts + max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering self._update_current_state_txn(txn, state_delta_for_room, max_stream_order) @@ -755,6 +757,7 @@ class EventsStore(SQLBaseStore): self._update_metadata_tables_txn( txn, events_and_contexts=events_and_contexts, + all_events_and_contexts=all_events_and_contexts, backfilled=backfilled, ) @@ -1152,13 +1155,18 @@ class EventsStore(SQLBaseStore): ec for ec in events_and_contexts if ec[0] not in to_remove ] - def _update_metadata_tables_txn(self, txn, events_and_contexts, backfilled): + def _update_metadata_tables_txn(self, txn, events_and_contexts, + all_events_and_contexts, backfilled): """Update all the miscellaneous tables for new events Args: txn (twisted.enterprise.adbapi.Connection): db connection events_and_contexts (list[(EventBase, EventContext)]): events we are persisting + all_events_and_contexts (list[(EventBase, EventContext)]): all + events that we were going to persist. This includes events + we've already persisted, etc, that wouldn't appear in + events_and_context. backfilled (bool): True if the events were backfilled """ @@ -1166,6 +1174,7 @@ class EventsStore(SQLBaseStore): self._set_push_actions_for_event_and_users_txn( txn, events_and_contexts=events_and_contexts, + all_events_and_contexts=all_events_and_contexts, ) if not events_and_contexts: From 25634ed1523b88f82edaa73124f3afd057fbb16d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 12:40:44 +0000 Subject: [PATCH 010/173] Fix test --- tests/storage/test_event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index d483e7cf9..dc90e5c24 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -75,7 +75,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): ) yield self.store.runInteraction( "", self.store._set_push_actions_for_event_and_users_txn, - event, + [(event, None)], [(event, None)], ) def _rotate(stream): From f5ac4dc2d46d329e7053259c61ad402269903ee3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Feb 2018 12:17:14 +0000 Subject: [PATCH 011/173] Split ReceiptsStore --- synapse/replication/slave/storage/receipts.py | 33 +----- synapse/storage/__init__.py | 3 - synapse/storage/receipts.py | 109 ++++++++++-------- 3 files changed, 69 insertions(+), 76 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index b371574ec..4e845ec04 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +17,7 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore -from synapse.storage.receipts import ReceiptsStore -from synapse.util.caches.stream_change_cache import StreamChangeCache +from synapse.storage.receipts import ReceiptsWorkerStore # So, um, we want to borrow a load of functions intended for reading from # a DataStore, but we don't want to take functions that either write to the @@ -29,36 +28,14 @@ from synapse.util.caches.stream_change_cache import StreamChangeCache # the method descriptor on the DataStore and chuck them into our class. -class SlavedReceiptsStore(BaseSlavedStore): +class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - super(SlavedReceiptsStore, self).__init__(db_conn, hs) - - self._receipts_id_gen = SlavedIdTracker( + receipts_id_gen = SlavedIdTracker( db_conn, "receipts_linearized", "stream_id" ) - self._receipts_stream_cache = StreamChangeCache( - "ReceiptsRoomChangeCache", self._receipts_id_gen.get_current_token() - ) - - get_receipts_for_user = ReceiptsStore.__dict__["get_receipts_for_user"] - get_linearized_receipts_for_room = ( - ReceiptsStore.__dict__["get_linearized_receipts_for_room"] - ) - _get_linearized_receipts_for_rooms = ( - ReceiptsStore.__dict__["_get_linearized_receipts_for_rooms"] - ) - get_last_receipt_event_id_for_user = ( - ReceiptsStore.__dict__["get_last_receipt_event_id_for_user"] - ) - - get_max_receipt_stream_id = DataStore.get_max_receipt_stream_id.__func__ - get_all_updated_receipts = DataStore.get_all_updated_receipts.__func__ - - get_linearized_receipts_for_rooms = ( - DataStore.get_linearized_receipts_for_rooms.__func__ - ) + super(SlavedReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) def stream_positions(self): result = super(SlavedReceiptsStore, self).stream_positions() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index f8fbd02ce..e1c4fe086 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -104,9 +104,6 @@ class DataStore(RoomMemberStore, RoomStore, db_conn, "events", "stream_ordering", step=-1, extra_tables=[("ex_outlier_stream", "event_stream_ordering")] ) - self._receipts_id_gen = StreamIdGenerator( - db_conn, "receipts_linearized", "stream_id" - ) self._account_data_id_gen = StreamIdGenerator( db_conn, "account_data_max_stream_id", "stream_id" ) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 12b3cc7f5..aa62474a4 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +15,7 @@ # limitations under the License. from ._base import SQLBaseStore +from .util.id_generators import StreamIdGenerator from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList, cached from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -26,9 +28,17 @@ import ujson as json logger = logging.getLogger(__name__) -class ReceiptsStore(SQLBaseStore): - def __init__(self, db_conn, hs): - super(ReceiptsStore, self).__init__(db_conn, hs) +class ReceiptsWorkerStore(SQLBaseStore): + def __init__(self, receipts_id_gen, db_conn, hs): + """ + Args: + receipts_id_gen (StreamIdGenerator|SlavedIdTracker) + db_conn: Database connection + hs (Homeserver) + """ + super(ReceiptsWorkerStore, self).__init__(db_conn, hs) + + self._receipts_id_gen = receipts_id_gen self._receipts_stream_cache = StreamChangeCache( "ReceiptsRoomChangeCache", self._receipts_id_gen.get_current_token() @@ -39,26 +49,6 @@ class ReceiptsStore(SQLBaseStore): receipts = yield self.get_receipts_for_room(room_id, "m.read") defer.returnValue(set(r['user_id'] for r in receipts)) - def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, - user_id): - if receipt_type != "m.read": - return - - # Returns an ObservableDeferred - res = self.get_users_with_read_receipts_in_room.cache.get( - room_id, None, update_metrics=False, - ) - - if res: - if isinstance(res, defer.Deferred) and res.called: - res = res.result - if user_id in res: - # We'd only be adding to the set, so no point invalidating if the - # user is already there - return - - self.get_users_with_read_receipts_in_room.invalidate((room_id,)) - @cached(num_args=2) def get_receipts_for_room(self, room_id, receipt_type): return self._simple_select_list( @@ -273,6 +263,57 @@ class ReceiptsStore(SQLBaseStore): def get_max_receipt_stream_id(self): return self._receipts_id_gen.get_current_token() + def get_all_updated_receipts(self, last_id, current_id, limit=None): + if last_id == current_id: + return defer.succeed([]) + + def get_all_updated_receipts_txn(txn): + sql = ( + "SELECT stream_id, room_id, receipt_type, user_id, event_id, data" + " FROM receipts_linearized" + " WHERE ? < stream_id AND stream_id <= ?" + " ORDER BY stream_id ASC" + ) + args = [last_id, current_id] + if limit is not None: + sql += " LIMIT ?" + args.append(limit) + txn.execute(sql, args) + + return txn.fetchall() + return self.runInteraction( + "get_all_updated_receipts", get_all_updated_receipts_txn + ) + + +class ReceiptsStore(ReceiptsWorkerStore): + def __init__(self, db_conn, hs): + receipts_id_gen = StreamIdGenerator( + db_conn, "receipts_linearized", "stream_id" + ) + + super(ReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) + + def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, + user_id): + if receipt_type != "m.read": + return + + # Returns an ObservableDeferred + res = self.get_users_with_read_receipts_in_room.cache.get( + room_id, None, update_metrics=False, + ) + + if res: + if isinstance(res, defer.Deferred) and res.called: + res = res.result + if user_id in res: + # We'd only be adding to the set, so no point invalidating if the + # user is already there + return + + self.get_users_with_read_receipts_in_room.invalidate((room_id,)) + def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, data, stream_id): txn.call_after( @@ -457,25 +498,3 @@ class ReceiptsStore(SQLBaseStore): "data": json.dumps(data), } ) - - def get_all_updated_receipts(self, last_id, current_id, limit=None): - if last_id == current_id: - return defer.succeed([]) - - def get_all_updated_receipts_txn(txn): - sql = ( - "SELECT stream_id, room_id, receipt_type, user_id, event_id, data" - " FROM receipts_linearized" - " WHERE ? < stream_id AND stream_id <= ?" - " ORDER BY stream_id ASC" - ) - args = [last_id, current_id] - if limit is not None: - sql += " LIMIT ?" - args.append(limit) - txn.execute(sql, args) - - return txn.fetchall() - return self.runInteraction( - "get_all_updated_receipts", get_all_updated_receipts_txn - ) From e316bbb4c07cad97c4cff5bc0c5b0dc2cd7bc519 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 17:33:18 +0000 Subject: [PATCH 012/173] Use abstract base class to access stream IDs --- synapse/replication/slave/storage/receipts.py | 9 +++- synapse/storage/receipts.py | 42 ++++++++++++------- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index 4e845ec04..a2eb4a02d 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -31,11 +31,16 @@ from synapse.storage.receipts import ReceiptsWorkerStore class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - receipts_id_gen = SlavedIdTracker( + # We instansiate this first as the ReceiptsWorkerStore constructor + # needs to be able to call get_max_receipt_stream_id + self._receipts_id_gen = SlavedIdTracker( db_conn, "receipts_linearized", "stream_id" ) - super(SlavedReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) + super(SlavedReceiptsStore, self).__init__(db_conn, hs) + + def get_max_receipt_stream_id(self): + return self._receipts_id_gen.get_current_token() def stream_positions(self): result = super(SlavedReceiptsStore, self).stream_positions() diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index aa62474a4..b11cf7ff6 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -21,6 +21,7 @@ from synapse.util.caches.stream_change_cache import StreamChangeCache from twisted.internet import defer +import abc import logging import ujson as json @@ -29,21 +30,30 @@ logger = logging.getLogger(__name__) class ReceiptsWorkerStore(SQLBaseStore): - def __init__(self, receipts_id_gen, db_conn, hs): - """ - Args: - receipts_id_gen (StreamIdGenerator|SlavedIdTracker) - db_conn: Database connection - hs (Homeserver) - """ + """This is an abstract base class where subclasses must implement + `get_max_receipt_stream_id` which can be called in the initializer. + """ + + # This ABCMeta metaclass ensures that we cannot be instantiated without + # the abstract methods being implemented. + __metaclass__ = abc.ABCMeta + + def __init__(self, db_conn, hs): super(ReceiptsWorkerStore, self).__init__(db_conn, hs) - self._receipts_id_gen = receipts_id_gen - self._receipts_stream_cache = StreamChangeCache( - "ReceiptsRoomChangeCache", self._receipts_id_gen.get_current_token() + "ReceiptsRoomChangeCache", self.get_max_receipt_stream_id() ) + @abc.abstractmethod + def get_max_receipt_stream_id(self): + """Get the current max stream ID for receipts stream + + Returns: + int + """ + pass + @cachedInlineCallbacks() def get_users_with_read_receipts_in_room(self, room_id): receipts = yield self.get_receipts_for_room(room_id, "m.read") @@ -260,9 +270,6 @@ class ReceiptsWorkerStore(SQLBaseStore): } defer.returnValue(results) - def get_max_receipt_stream_id(self): - return self._receipts_id_gen.get_current_token() - def get_all_updated_receipts(self, last_id, current_id, limit=None): if last_id == current_id: return defer.succeed([]) @@ -288,11 +295,16 @@ class ReceiptsWorkerStore(SQLBaseStore): class ReceiptsStore(ReceiptsWorkerStore): def __init__(self, db_conn, hs): - receipts_id_gen = StreamIdGenerator( + # We instansiate this first as the ReceiptsWorkerStore constructor + # needs to be able to call get_max_receipt_stream_id + self._receipts_id_gen = StreamIdGenerator( db_conn, "receipts_linearized", "stream_id" ) - super(ReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) + super(ReceiptsStore, self).__init__(db_conn, hs) + + def get_max_receipt_stream_id(self): + return self._receipts_id_gen.get_current_token() def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, user_id): From 95e4cffd859b0fc3fcd54c755fcc0da403f97b94 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 17:58:40 +0000 Subject: [PATCH 013/173] Fix comment --- synapse/replication/slave/storage/receipts.py | 2 +- synapse/storage/receipts.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index a2eb4a02d..f0e29e983 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -31,7 +31,7 @@ from synapse.storage.receipts import ReceiptsWorkerStore class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - # We instansiate this first as the ReceiptsWorkerStore constructor + # We instantiate this first as the ReceiptsWorkerStore constructor # needs to be able to call get_max_receipt_stream_id self._receipts_id_gen = SlavedIdTracker( db_conn, "receipts_linearized", "stream_id" diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index b11cf7ff6..c2a6613a6 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -295,7 +295,7 @@ class ReceiptsWorkerStore(SQLBaseStore): class ReceiptsStore(ReceiptsWorkerStore): def __init__(self, db_conn, hs): - # We instansiate this first as the ReceiptsWorkerStore constructor + # We instantiate this first as the ReceiptsWorkerStore constructor # needs to be able to call get_max_receipt_stream_id self._receipts_id_gen = StreamIdGenerator( db_conn, "receipts_linearized", "stream_id" From 8fbb4d0d19031d8cd3742285fb9b36c3bdfc52a0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 17:59:23 +0000 Subject: [PATCH 014/173] Raise exception in abstract method --- synapse/storage/receipts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index c2a6613a6..40530632c 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -52,7 +52,7 @@ class ReceiptsWorkerStore(SQLBaseStore): Returns: int """ - pass + raise NotImplementedError() @cachedInlineCallbacks() def get_users_with_read_receipts_in_room(self, room_id): From ca9b9d970374766f7b53dc5b1762cfa1b0e1a5dd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Feb 2018 12:08:42 +0000 Subject: [PATCH 015/173] Split AccountDataStore and TagStore --- .../replication/slave/storage/account_data.py | 38 +--------- synapse/storage/__init__.py | 8 -- synapse/storage/account_data.py | 75 +++++++++++++++---- synapse/storage/tags.py | 15 ++-- 4 files changed, 69 insertions(+), 67 deletions(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index efbd87918..6c95261aa 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -15,48 +15,18 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore -from synapse.storage.account_data import AccountDataStore -from synapse.storage.tags import TagsStore -from synapse.util.caches.stream_change_cache import StreamChangeCache +from synapse.storage.account_data import AccountDataWorkerStore +from synapse.storage.tags import TagsWorkerStore -class SlavedAccountDataStore(BaseSlavedStore): +class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - super(SlavedAccountDataStore, self).__init__(db_conn, hs) self._account_data_id_gen = SlavedIdTracker( db_conn, "account_data_max_stream_id", "stream_id", ) - self._account_data_stream_cache = StreamChangeCache( - "AccountDataAndTagsChangeCache", - self._account_data_id_gen.get_current_token(), - ) - get_account_data_for_user = ( - AccountDataStore.__dict__["get_account_data_for_user"] - ) - - get_global_account_data_by_type_for_users = ( - AccountDataStore.__dict__["get_global_account_data_by_type_for_users"] - ) - - get_global_account_data_by_type_for_user = ( - AccountDataStore.__dict__["get_global_account_data_by_type_for_user"] - ) - - get_tags_for_user = TagsStore.__dict__["get_tags_for_user"] - get_tags_for_room = ( - DataStore.get_tags_for_room.__func__ - ) - get_account_data_for_room = ( - DataStore.get_account_data_for_room.__func__ - ) - - get_updated_tags = DataStore.get_updated_tags.__func__ - get_updated_account_data_for_user = ( - DataStore.get_updated_account_data_for_user.__func__ - ) + super(SlavedAccountDataStore, self).__init__(db_conn, hs) def get_max_account_data_stream_id(self): return self._account_data_id_gen.get_current_token() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index e1c4fe086..e221284ee 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -104,9 +104,6 @@ class DataStore(RoomMemberStore, RoomStore, db_conn, "events", "stream_ordering", step=-1, extra_tables=[("ex_outlier_stream", "event_stream_ordering")] ) - self._account_data_id_gen = StreamIdGenerator( - db_conn, "account_data_max_stream_id", "stream_id" - ) self._presence_id_gen = StreamIdGenerator( db_conn, "presence_stream", "stream_id" ) @@ -159,11 +156,6 @@ class DataStore(RoomMemberStore, RoomStore, "MembershipStreamChangeCache", events_max, ) - account_max = self._account_data_id_gen.get_current_token() - self._account_data_stream_cache = StreamChangeCache( - "AccountDataAndTagsChangeCache", account_max, - ) - self._presence_on_startup = self._get_active_presence(db_conn) presence_cache_prefill, min_presence_val = self._get_cache_dict( diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 56a0bde54..66fed4bdc 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -13,18 +13,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore from twisted.internet import defer +from ._base import SQLBaseStore +from .util.id_generators import StreamIdGenerator + +from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.caches.descriptors import cached, cachedList, cachedInlineCallbacks +import abc import ujson as json import logging logger = logging.getLogger(__name__) -class AccountDataStore(SQLBaseStore): +class AccountDataWorkerStore(SQLBaseStore): + """This is an abstract base class where subclasses must implement + `get_max_account_data_stream_id` which can be called in the initializer. + """ + + # This ABCMeta metaclass ensures that we cannot be instantiated without + # the abstract methods being implemented. + __metaclass__ = abc.ABCMeta + + def __init__(self, db_conn, hs): + account_max = self.get_max_account_data_stream_id() + self._account_data_stream_cache = StreamChangeCache( + "AccountDataAndTagsChangeCache", account_max, + ) + + super(AccountDataWorkerStore, self).__init__(db_conn, hs) + + @abc.abstractmethod + def get_max_account_data_stream_id(self): + """Get the current max stream ID for account data stream + + Returns: + int + """ + raise NotImplementedError() @cached() def get_account_data_for_user(self, user_id): @@ -209,6 +237,36 @@ class AccountDataStore(SQLBaseStore): "get_updated_account_data_for_user", get_updated_account_data_for_user_txn ) + @cachedInlineCallbacks(num_args=2, cache_context=True, max_entries=5000) + def is_ignored_by(self, ignored_user_id, ignorer_user_id, cache_context): + ignored_account_data = yield self.get_global_account_data_by_type_for_user( + "m.ignored_user_list", ignorer_user_id, + on_invalidate=cache_context.invalidate, + ) + if not ignored_account_data: + defer.returnValue(False) + + defer.returnValue( + ignored_user_id in ignored_account_data.get("ignored_users", {}) + ) + + +class AccountDataStore(AccountDataWorkerStore): + def __init__(self, db_conn, hs): + self._account_data_id_gen = StreamIdGenerator( + db_conn, "account_data_max_stream_id", "stream_id" + ) + + super(AccountDataStore, self).__init__(db_conn, hs) + + def get_max_account_data_stream_id(self): + """Get the current max stream id for the private user data stream + + Returns: + A deferred int. + """ + return self._account_data_id_gen.get_current_token() + @defer.inlineCallbacks def add_account_data_to_room(self, user_id, room_id, account_data_type, content): """Add some account_data to a room for a user. @@ -321,16 +379,3 @@ class AccountDataStore(SQLBaseStore): "update_account_data_max_stream_id", _update, ) - - @cachedInlineCallbacks(num_args=2, cache_context=True, max_entries=5000) - def is_ignored_by(self, ignored_user_id, ignorer_user_id, cache_context): - ignored_account_data = yield self.get_global_account_data_by_type_for_user( - "m.ignored_user_list", ignorer_user_id, - on_invalidate=cache_context.invalidate, - ) - if not ignored_account_data: - defer.returnValue(False) - - defer.returnValue( - ignored_user_id in ignored_account_data.get("ignored_users", {}) - ) diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index bff73f3f0..484d66991 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore +from .account_data import AccountDataWorkerStore + from synapse.util.caches.descriptors import cached from twisted.internet import defer @@ -23,15 +24,7 @@ import logging logger = logging.getLogger(__name__) -class TagsStore(SQLBaseStore): - def get_max_account_data_stream_id(self): - """Get the current max stream id for the private user data stream - - Returns: - A deferred int. - """ - return self._account_data_id_gen.get_current_token() - +class TagsWorkerStore(AccountDataWorkerStore): @cached() def get_tags_for_user(self, user_id): """Get all the tags for a user. @@ -170,6 +163,8 @@ class TagsStore(SQLBaseStore): row["tag"]: json.loads(row["content"]) for row in rows }) + +class TagsStore(TagsWorkerStore): @defer.inlineCallbacks def add_tag_to_room(self, user_id, room_id, tag, content): """Add a tag to a room for a user. From cbaad969f951c2db067ced709154151a7dfaf6f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 10:39:27 +0000 Subject: [PATCH 016/173] Split PushRulesStore --- .../replication/slave/storage/push_rule.py | 23 ++---- synapse/storage/__init__.py | 12 ---- synapse/storage/push_rule.py | 71 ++++++++++++++----- 3 files changed, 61 insertions(+), 45 deletions(-) diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index 83e880fdd..0e3d9a87d 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -15,29 +15,15 @@ from .events import SlavedEventStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore -from synapse.storage.push_rule import PushRuleStore -from synapse.util.caches.stream_change_cache import StreamChangeCache +from synapse.storage.push_rule import PushRulesWorkerStore -class SlavedPushRuleStore(SlavedEventStore): +class SlavedPushRuleStore(PushRulesWorkerStore, SlavedEventStore): def __init__(self, db_conn, hs): - super(SlavedPushRuleStore, self).__init__(db_conn, hs) self._push_rules_stream_id_gen = SlavedIdTracker( db_conn, "push_rules_stream", "stream_id", ) - self.push_rules_stream_cache = StreamChangeCache( - "PushRulesStreamChangeCache", - self._push_rules_stream_id_gen.get_current_token(), - ) - - get_push_rules_for_user = PushRuleStore.__dict__["get_push_rules_for_user"] - get_push_rules_enabled_for_user = ( - PushRuleStore.__dict__["get_push_rules_enabled_for_user"] - ) - have_push_rules_changed_for_user = ( - DataStore.have_push_rules_changed_for_user.__func__ - ) + super(SlavedPushRuleStore, self).__init__(db_conn, hs) def get_push_rules_stream_token(self): return ( @@ -45,6 +31,9 @@ class SlavedPushRuleStore(SlavedEventStore): self._stream_id_gen.get_current_token(), ) + def get_max_push_rules_stream_id(self): + return self._push_rules_stream_id_gen.get_current_token() + def stream_positions(self): result = super(SlavedPushRuleStore, self).stream_positions() result["push_rules"] = self._push_rules_stream_id_gen.get_current_token() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index e1c4fe086..cd2759858 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -177,18 +177,6 @@ class DataStore(RoomMemberStore, RoomStore, prefilled_cache=presence_cache_prefill ) - push_rules_prefill, push_rules_id = self._get_cache_dict( - db_conn, "push_rules_stream", - entity_column="user_id", - stream_column="stream_id", - max_value=self._push_rules_stream_id_gen.get_current_token()[0], - ) - - self.push_rules_stream_cache = StreamChangeCache( - "PushRulesStreamChangeCache", push_rules_id, - prefilled_cache=push_rules_prefill, - ) - max_device_inbox_id = self._device_inbox_id_gen.get_current_token() device_inbox_prefill, min_device_inbox_id = self._get_cache_dict( db_conn, "device_inbox", diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 8758b1c0c..b35bd7a64 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -15,10 +15,12 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList +from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.push.baserules import list_with_base_rules from synapse.api.constants import EventTypes from twisted.internet import defer +import abc import logging import simplejson as json @@ -48,7 +50,39 @@ def _load_rules(rawrules, enabled_map): return rules -class PushRuleStore(SQLBaseStore): +class PushRulesWorkerStore(SQLBaseStore): + """This is an abstract base class where subclasses must implement + `get_max_push_rules_stream_id` which can be called in the initializer. + """ + + # This ABCMeta metaclass ensures that we cannot be instantiated without + # the abstract methods being implemented. + __metaclass__ = abc.ABCMeta + + def __init__(self, db_conn, hs): + super(PushRulesWorkerStore, self).__init__(db_conn, hs) + + push_rules_prefill, push_rules_id = self._get_cache_dict( + db_conn, "push_rules_stream", + entity_column="user_id", + stream_column="stream_id", + max_value=self.get_max_push_rules_stream_id(), + ) + + self.push_rules_stream_cache = StreamChangeCache( + "PushRulesStreamChangeCache", push_rules_id, + prefilled_cache=push_rules_prefill, + ) + + @abc.abstractmethod + def get_max_push_rules_stream_id(self): + """Get the position of the push rules stream. + + Returns: + int + """ + raise NotImplementedError() + @cachedInlineCallbacks(max_entries=5000) def get_push_rules_for_user(self, user_id): rows = yield self._simple_select_list( @@ -89,6 +123,24 @@ class PushRuleStore(SQLBaseStore): r['rule_id']: False if r['enabled'] == 0 else True for r in results }) + def have_push_rules_changed_for_user(self, user_id, last_id): + if not self.push_rules_stream_cache.has_entity_changed(user_id, last_id): + return defer.succeed(False) + else: + def have_push_rules_changed_txn(txn): + sql = ( + "SELECT COUNT(stream_id) FROM push_rules_stream" + " WHERE user_id = ? AND ? < stream_id" + ) + txn.execute(sql, (user_id, last_id)) + count, = txn.fetchone() + return bool(count) + return self.runInteraction( + "have_push_rules_changed", have_push_rules_changed_txn + ) + + +class PushRuleStore(PushRulesWorkerStore): @cachedList(cached_method_name="get_push_rules_for_user", list_name="user_ids", num_args=1, inlineCallbacks=True) def bulk_get_push_rules(self, user_ids): @@ -526,21 +578,8 @@ class PushRuleStore(SQLBaseStore): room stream ordering it corresponds to.""" return self._push_rules_stream_id_gen.get_current_token() - def have_push_rules_changed_for_user(self, user_id, last_id): - if not self.push_rules_stream_cache.has_entity_changed(user_id, last_id): - return defer.succeed(False) - else: - def have_push_rules_changed_txn(txn): - sql = ( - "SELECT COUNT(stream_id) FROM push_rules_stream" - " WHERE user_id = ? AND ? < stream_id" - ) - txn.execute(sql, (user_id, last_id)) - count, = txn.fetchone() - return bool(count) - return self.runInteraction( - "have_push_rules_changed", have_push_rules_changed_txn - ) + def get_max_push_rules_stream_id(self): + return self.get_push_rules_stream_token()[0] class RuleNotFoundException(Exception): From 6f727653717d9fc32f5676f4d991fbbe85c1c812 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 10:54:18 +0000 Subject: [PATCH 017/173] Split PusherStore --- synapse/replication/slave/storage/pushers.py | 11 ++--------- synapse/storage/pusher.py | 10 ++++++---- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py index 4e8d68ece..e352c3235 100644 --- a/synapse/replication/slave/storage/pushers.py +++ b/synapse/replication/slave/storage/pushers.py @@ -16,10 +16,10 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore +from synapse.storage.pusher import PusherWorkerStore -class SlavedPusherStore(BaseSlavedStore): +class SlavedPusherStore(PusherWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedPusherStore, self).__init__(db_conn, hs) @@ -28,13 +28,6 @@ class SlavedPusherStore(BaseSlavedStore): extra_tables=[("deleted_pushers", "stream_id")], ) - get_all_pushers = DataStore.get_all_pushers.__func__ - get_pushers_by = DataStore.get_pushers_by.__func__ - get_pushers_by_app_id_and_pushkey = ( - DataStore.get_pushers_by_app_id_and_pushkey.__func__ - ) - _decode_pushers_rows = DataStore._decode_pushers_rows.__func__ - def stream_positions(self): result = super(SlavedPusherStore, self).stream_positions() result["pushers"] = self._pushers_id_gen.get_current_token() diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index 3d8b4d5d5..b0159c70c 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -27,7 +27,7 @@ import types logger = logging.getLogger(__name__) -class PusherStore(SQLBaseStore): +class PusherWorkerStore(SQLBaseStore): def _decode_pushers_rows(self, rows): for r in rows: dataJson = r['data'] @@ -102,9 +102,6 @@ class PusherStore(SQLBaseStore): rows = yield self.runInteraction("get_all_pushers", get_pushers) defer.returnValue(rows) - def get_pushers_stream_token(self): - return self._pushers_id_gen.get_current_token() - def get_all_updated_pushers(self, last_id, current_id, limit): if last_id == current_id: return defer.succeed(([], [])) @@ -177,6 +174,11 @@ class PusherStore(SQLBaseStore): "get_all_updated_pushers_rows", get_all_updated_pushers_rows_txn ) + +class PusherStore(PusherWorkerStore): + def get_pushers_stream_token(self): + return self._pushers_id_gen.get_current_token() + @cachedInlineCallbacks(num_args=1, max_entries=15000) def get_if_user_has_pusher(self, user_id): # This only exists for the cachedList decorator From d15d237b0de69d9e8ea533060df9a8bb844b9a4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:01:13 +0000 Subject: [PATCH 018/173] Split out EventPushActionWorkerStore --- synapse/replication/slave/storage/events.py | 23 +--- synapse/storage/event_push_actions.py | 144 ++++++++++---------- 2 files changed, 76 insertions(+), 91 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f8c164b48..c5d6c6bd8 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -17,7 +17,7 @@ import logging from synapse.api.constants import EventTypes from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore -from synapse.storage.event_push_actions import EventPushActionsStore +from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore @@ -38,7 +38,8 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): +class SlavedEventStore(EventPushActionsWorkerStore, StateGroupWorkerStore, + BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) @@ -80,30 +81,12 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): get_invited_rooms_for_user = RoomMemberStore.__dict__[ "get_invited_rooms_for_user" ] - get_unread_event_push_actions_by_room_for_user = ( - EventPushActionsStore.__dict__["get_unread_event_push_actions_by_room_for_user"] - ) - _get_unread_counts_by_receipt_txn = ( - DataStore._get_unread_counts_by_receipt_txn.__func__ - ) - _get_unread_counts_by_pos_txn = ( - DataStore._get_unread_counts_by_pos_txn.__func__ - ) get_recent_event_ids_for_room = ( StreamStore.__dict__["get_recent_event_ids_for_room"] ) _get_joined_hosts_cache = RoomMemberStore.__dict__["_get_joined_hosts_cache"] has_room_changed_since = DataStore.has_room_changed_since.__func__ - get_unread_push_actions_for_user_in_range_for_http = ( - DataStore.get_unread_push_actions_for_user_in_range_for_http.__func__ - ) - get_unread_push_actions_for_user_in_range_for_email = ( - DataStore.get_unread_push_actions_for_user_in_range_for_email.__func__ - ) - get_push_action_users_in_range = ( - DataStore.get_push_action_users_in_range.__func__ - ) get_event = DataStore.get_event.__func__ get_events = DataStore.get_events.__func__ get_rooms_for_user_where_membership_is = ( diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index f787431b7..124583835 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -62,77 +62,7 @@ def _deserialize_action(actions, is_highlight): return DEFAULT_NOTIF_ACTION -class EventPushActionsStore(SQLBaseStore): - EPA_HIGHLIGHT_INDEX = "epa_highlight_index" - - def __init__(self, db_conn, hs): - super(EventPushActionsStore, self).__init__(db_conn, hs) - - self.register_background_index_update( - self.EPA_HIGHLIGHT_INDEX, - index_name="event_push_actions_u_highlight", - table="event_push_actions", - columns=["user_id", "stream_ordering"], - ) - - self.register_background_index_update( - "event_push_actions_highlights_index", - index_name="event_push_actions_highlights_index", - table="event_push_actions", - columns=["user_id", "room_id", "topological_ordering", "stream_ordering"], - where_clause="highlight=1" - ) - - self._doing_notif_rotation = False - self._rotate_notif_loop = self._clock.looping_call( - self._rotate_notifs, 30 * 60 * 1000 - ) - - def _set_push_actions_for_event_and_users_txn(self, txn, event): - """ - Args: - event: the event set actions for - tuples: list of tuples of (user_id, actions) - """ - - sql = """ - INSERT INTO event_push_actions ( - room_id, event_id, user_id, actions, stream_ordering, - topological_ordering, notif, highlight - ) - SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight - FROM event_push_actions_staging - WHERE event_id = ? - """ - - txn.execute(sql, ( - event.room_id, event.internal_metadata.stream_ordering, - event.depth, event.event_id, - )) - - user_ids = self._simple_select_onecol_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - retcol="user_id", - ) - - self._simple_delete_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - ) - - for uid in user_ids: - txn.call_after( - self.get_unread_event_push_actions_by_room_for_user.invalidate_many, - (event.room_id, uid,) - ) - +class EventPushActionsWorkerStore(SQLBaseStore): @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( self, room_id, user_id, last_read_event_id @@ -449,6 +379,78 @@ class EventPushActionsStore(SQLBaseStore): # Now return the first `limit` defer.returnValue(notifs[:limit]) + +class EventPushActionsStore(EventPushActionsWorkerStore): + EPA_HIGHLIGHT_INDEX = "epa_highlight_index" + + def __init__(self, db_conn, hs): + super(EventPushActionsStore, self).__init__(db_conn, hs) + + self.register_background_index_update( + self.EPA_HIGHLIGHT_INDEX, + index_name="event_push_actions_u_highlight", + table="event_push_actions", + columns=["user_id", "stream_ordering"], + ) + + self.register_background_index_update( + "event_push_actions_highlights_index", + index_name="event_push_actions_highlights_index", + table="event_push_actions", + columns=["user_id", "room_id", "topological_ordering", "stream_ordering"], + where_clause="highlight=1" + ) + + self._doing_notif_rotation = False + self._rotate_notif_loop = self._clock.looping_call( + self._rotate_notifs, 30 * 60 * 1000 + ) + + def _set_push_actions_for_event_and_users_txn(self, txn, event): + """ + Args: + event: the event set actions for + tuples: list of tuples of (user_id, actions) + """ + + sql = """ + INSERT INTO event_push_actions ( + room_id, event_id, user_id, actions, stream_ordering, + topological_ordering, notif, highlight + ) + SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight + FROM event_push_actions_staging + WHERE event_id = ? + """ + + txn.execute(sql, ( + event.room_id, event.internal_metadata.stream_ordering, + event.depth, event.event_id, + )) + + user_ids = self._simple_select_onecol_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + retcol="user_id", + ) + + self._simple_delete_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + ) + + for uid in user_ids: + txn.call_after( + self.get_unread_event_push_actions_by_room_for_user.invalidate_many, + (event.room_id, uid,) + ) + @defer.inlineCallbacks def get_push_actions_for_user(self, user_id, before=None, limit=50, only_highlight=False): From c96d547f4dd3c10d3c4be66c40cb07232a16a987 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:03:42 +0000 Subject: [PATCH 019/173] Actually use new param --- synapse/storage/event_push_actions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 6a122b05a..214ace27c 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -140,11 +140,13 @@ class EventPushActionsStore(SQLBaseStore): (event.room_id, uid,) ) + # Now we delete the staging area for *all* events that were being + # persisted. txn.executemany( "DELETE FROM event_push_actions_staging WHERE event_id = ?", ( (event.event_id,) - for event, _ in events_and_contexts + for event, _ in all_events_and_contexts ) ) From 573712da6b720cb808e61e4cbd5426e85e58a161 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:29:49 +0000 Subject: [PATCH 020/173] Update comments --- synapse/push/bulk_push_rule_evaluator.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 64e9a1da5..7c680659b 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -190,12 +190,13 @@ class BulkPushRuleEvaluator(object): if matches: actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: + # Push rules say we should notify the user of this event actions_by_user[uid] = actions break - # Push rules say we should notify the user of this event, - # so we mark it in the DB in the staging area. (This - # will then get handled when we persist the event) + # Mark in the DB staging area the push actions for users who should be + # notified for this event. (This will then get handled when we persist + # the event) yield self.store.add_push_actions_to_staging( event.event_id, actions_by_user, ) From 27b094f382a33b7d69eb592951a463c3e53af5b4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:41:48 +0000 Subject: [PATCH 021/173] Split out get_events and co into a worker store --- synapse/replication/slave/storage/events.py | 13 +- synapse/storage/events.py | 695 ++++++++++---------- 2 files changed, 352 insertions(+), 356 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f8c164b48..74a81a0a5 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -18,6 +18,7 @@ from synapse.api.constants import EventTypes from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore +from synapse.storage.events import EventsWorkerStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore @@ -38,7 +39,7 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): +class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) @@ -104,8 +105,6 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): get_push_action_users_in_range = ( DataStore.get_push_action_users_in_range.__func__ ) - get_event = DataStore.get_event.__func__ - get_events = DataStore.get_events.__func__ get_rooms_for_user_where_membership_is = ( DataStore.get_rooms_for_user_where_membership_is.__func__ ) @@ -135,14 +134,6 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): _set_before_and_after = staticmethod(DataStore._set_before_and_after) - _get_events = DataStore._get_events.__func__ - _get_events_from_cache = DataStore._get_events_from_cache.__func__ - - _invalidate_get_event_cache = DataStore._invalidate_get_event_cache.__func__ - _enqueue_events = DataStore._enqueue_events.__func__ - _do_fetch = DataStore._do_fetch.__func__ - _fetch_event_rows = DataStore._fetch_event_rows.__func__ - _get_event_from_row = DataStore._get_event_from_row.__func__ _get_rooms_for_user_where_membership_is_txn = ( DataStore._get_rooms_for_user_where_membership_is_txn.__func__ ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 73177e0bc..681a33314 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -199,7 +199,356 @@ def _retry_on_integrity_error(func): return f -class EventsStore(SQLBaseStore): +class EventsWorkerStore(SQLBaseStore): + def __init__(self, db_conn, hs): + super(EventsWorkerStore, self).__init__(db_conn, hs) + + self._event_persist_queue = _EventPeristenceQueue() + + @defer.inlineCallbacks + def get_event(self, event_id, check_redacted=True, + get_prev_content=False, allow_rejected=False, + allow_none=False): + """Get an event from the database by event_id. + + Args: + event_id (str): The event_id of the event to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + allow_none (bool): If True, return None if no event found, if + False throw an exception. + + Returns: + Deferred : A FrozenEvent. + """ + events = yield self._get_events( + [event_id], + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + if not events and not allow_none: + raise SynapseError(404, "Could not find event %s" % (event_id,)) + + defer.returnValue(events[0] if events else None) + + @defer.inlineCallbacks + def get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + """Get events from the database + + Args: + event_ids (list): The event_ids of the events to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + + Returns: + Deferred : Dict from event_id to event. + """ + events = yield self._get_events( + event_ids, + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + defer.returnValue({e.event_id: e for e in events}) + + @defer.inlineCallbacks + def _get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + if not event_ids: + defer.returnValue([]) + + event_id_list = event_ids + event_ids = set(event_ids) + + event_entry_map = self._get_events_from_cache( + event_ids, + allow_rejected=allow_rejected, + ) + + missing_events_ids = [e for e in event_ids if e not in event_entry_map] + + if missing_events_ids: + missing_events = yield self._enqueue_events( + missing_events_ids, + check_redacted=check_redacted, + allow_rejected=allow_rejected, + ) + + event_entry_map.update(missing_events) + + events = [] + for event_id in event_id_list: + entry = event_entry_map.get(event_id, None) + if not entry: + continue + + if allow_rejected or not entry.event.rejected_reason: + if check_redacted and entry.redacted_event: + event = entry.redacted_event + else: + event = entry.event + + events.append(event) + + if get_prev_content: + if "replaces_state" in event.unsigned: + prev = yield self.get_event( + event.unsigned["replaces_state"], + get_prev_content=False, + allow_none=True, + ) + if prev: + event.unsigned = dict(event.unsigned) + event.unsigned["prev_content"] = prev.content + event.unsigned["prev_sender"] = prev.sender + + defer.returnValue(events) + + def _invalidate_get_event_cache(self, event_id): + self._get_event_cache.invalidate((event_id,)) + + def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): + """Fetch events from the caches + + Args: + events (list(str)): list of event_ids to fetch + allow_rejected (bool): Whether to teturn events that were rejected + update_metrics (bool): Whether to update the cache hit ratio metrics + + Returns: + dict of event_id -> _EventCacheEntry for each event_id in cache. If + allow_rejected is `False` then there will still be an entry but it + will be `None` + """ + event_map = {} + + for event_id in events: + ret = self._get_event_cache.get( + (event_id,), None, + update_metrics=update_metrics, + ) + if not ret: + continue + + if allow_rejected or not ret.event.rejected_reason: + event_map[event_id] = ret + else: + event_map[event_id] = None + + return event_map + + def _do_fetch(self, conn): + """Takes a database connection and waits for requests for events from + the _event_fetch_list queue. + """ + event_list = [] + i = 0 + while True: + try: + with self._event_fetch_lock: + event_list = self._event_fetch_list + self._event_fetch_list = [] + + if not event_list: + single_threaded = self.database_engine.single_threaded + if single_threaded or i > EVENT_QUEUE_ITERATIONS: + self._event_fetch_ongoing -= 1 + return + else: + self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) + i += 1 + continue + i = 0 + + event_id_lists = zip(*event_list)[0] + event_ids = [ + item for sublist in event_id_lists for item in sublist + ] + + rows = self._new_transaction( + conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids + ) + + row_dict = { + r["event_id"]: r + for r in rows + } + + # We only want to resolve deferreds from the main thread + def fire(lst, res): + for ids, d in lst: + if not d.called: + try: + with PreserveLoggingContext(): + d.callback([ + res[i] + for i in ids + if i in res + ]) + except Exception: + logger.exception("Failed to callback") + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list, row_dict) + except Exception as e: + logger.exception("do_fetch") + + # We only want to resolve deferreds from the main thread + def fire(evs): + for _, d in evs: + if not d.called: + with PreserveLoggingContext(): + d.errback(e) + + if event_list: + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list) + + @defer.inlineCallbacks + def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): + """Fetches events from the database using the _event_fetch_list. This + allows batch and bulk fetching of events - it allows us to fetch events + without having to create a new transaction for each request for events. + """ + if not events: + defer.returnValue({}) + + events_d = defer.Deferred() + with self._event_fetch_lock: + self._event_fetch_list.append( + (events, events_d) + ) + + self._event_fetch_lock.notify() + + if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: + self._event_fetch_ongoing += 1 + should_start = True + else: + should_start = False + + if should_start: + with PreserveLoggingContext(): + self.runWithConnection( + self._do_fetch + ) + + logger.debug("Loading %d events", len(events)) + with PreserveLoggingContext(): + rows = yield events_d + logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) + + if not allow_rejected: + rows[:] = [r for r in rows if not r["rejects"]] + + res = yield make_deferred_yieldable(defer.gatherResults( + [ + preserve_fn(self._get_event_from_row)( + row["internal_metadata"], row["json"], row["redacts"], + rejected_reason=row["rejects"], + ) + for row in rows + ], + consumeErrors=True + )) + + defer.returnValue({ + e.event.event_id: e + for e in res if e + }) + + def _fetch_event_rows(self, txn, events): + rows = [] + N = 200 + for i in range(1 + len(events) / N): + evs = events[i * N:(i + 1) * N] + if not evs: + break + + sql = ( + "SELECT " + " e.event_id as event_id, " + " e.internal_metadata," + " e.json," + " r.redacts as redacts," + " rej.event_id as rejects " + " FROM event_json as e" + " LEFT JOIN rejections as rej USING (event_id)" + " LEFT JOIN redactions as r ON e.event_id = r.redacts" + " WHERE e.event_id IN (%s)" + ) % (",".join(["?"] * len(evs)),) + + txn.execute(sql, evs) + rows.extend(self.cursor_to_dict(txn)) + + return rows + + @defer.inlineCallbacks + def _get_event_from_row(self, internal_metadata, js, redacted, + rejected_reason=None): + with Measure(self._clock, "_get_event_from_row"): + d = json.loads(js) + internal_metadata = json.loads(internal_metadata) + + if rejected_reason: + rejected_reason = yield self._simple_select_one_onecol( + table="rejections", + keyvalues={"event_id": rejected_reason}, + retcol="reason", + desc="_get_event_from_row_rejected_reason", + ) + + original_ev = FrozenEvent( + d, + internal_metadata_dict=internal_metadata, + rejected_reason=rejected_reason, + ) + + redacted_event = None + if redacted: + redacted_event = prune_event(original_ev) + + redaction_id = yield self._simple_select_one_onecol( + table="redactions", + keyvalues={"redacts": redacted_event.event_id}, + retcol="event_id", + desc="_get_event_from_row_redactions", + ) + + redacted_event.unsigned["redacted_by"] = redaction_id + # Get the redaction event. + + because = yield self.get_event( + redaction_id, + check_redacted=False, + allow_none=True, + ) + + if because: + # It's fine to do add the event directly, since get_pdu_json + # will serialise this field correctly + redacted_event.unsigned["redacted_because"] = because + + cache_entry = _EventCacheEntry( + event=original_ev, + redacted_event=redacted_event, + ) + + self._get_event_cache.prefill((original_ev.event_id,), cache_entry) + + defer.returnValue(cache_entry) + + +class EventsStore(EventsWorkerStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" @@ -234,8 +583,6 @@ class EventsStore(SQLBaseStore): psql_only=True, ) - self._event_persist_queue = _EventPeristenceQueue() - self._state_resolution_handler = hs.get_state_resolution_handler() def persist_events(self, events_and_contexts, backfilled=False): @@ -609,62 +956,6 @@ class EventsStore(SQLBaseStore): defer.returnValue((to_delete, to_insert)) - @defer.inlineCallbacks - def get_event(self, event_id, check_redacted=True, - get_prev_content=False, allow_rejected=False, - allow_none=False): - """Get an event from the database by event_id. - - Args: - event_id (str): The event_id of the event to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - allow_none (bool): If True, return None if no event found, if - False throw an exception. - - Returns: - Deferred : A FrozenEvent. - """ - events = yield self._get_events( - [event_id], - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - if not events and not allow_none: - raise SynapseError(404, "Could not find event %s" % (event_id,)) - - defer.returnValue(events[0] if events else None) - - @defer.inlineCallbacks - def get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - """Get events from the database - - Args: - event_ids (list): The event_ids of the events to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - - Returns: - Deferred : Dict from event_id to event. - """ - events = yield self._get_events( - event_ids, - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - defer.returnValue({e.event_id: e for e in events}) - @log_function def _persist_events_txn(self, txn, events_and_contexts, backfilled, delete_existing=False, state_delta_for_room={}, @@ -1375,292 +1666,6 @@ class EventsStore(SQLBaseStore): "have_events", f, ) - @defer.inlineCallbacks - def _get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - if not event_ids: - defer.returnValue([]) - - event_id_list = event_ids - event_ids = set(event_ids) - - event_entry_map = self._get_events_from_cache( - event_ids, - allow_rejected=allow_rejected, - ) - - missing_events_ids = [e for e in event_ids if e not in event_entry_map] - - if missing_events_ids: - missing_events = yield self._enqueue_events( - missing_events_ids, - check_redacted=check_redacted, - allow_rejected=allow_rejected, - ) - - event_entry_map.update(missing_events) - - events = [] - for event_id in event_id_list: - entry = event_entry_map.get(event_id, None) - if not entry: - continue - - if allow_rejected or not entry.event.rejected_reason: - if check_redacted and entry.redacted_event: - event = entry.redacted_event - else: - event = entry.event - - events.append(event) - - if get_prev_content: - if "replaces_state" in event.unsigned: - prev = yield self.get_event( - event.unsigned["replaces_state"], - get_prev_content=False, - allow_none=True, - ) - if prev: - event.unsigned = dict(event.unsigned) - event.unsigned["prev_content"] = prev.content - event.unsigned["prev_sender"] = prev.sender - - defer.returnValue(events) - - def _invalidate_get_event_cache(self, event_id): - self._get_event_cache.invalidate((event_id,)) - - def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): - """Fetch events from the caches - - Args: - events (list(str)): list of event_ids to fetch - allow_rejected (bool): Whether to teturn events that were rejected - update_metrics (bool): Whether to update the cache hit ratio metrics - - Returns: - dict of event_id -> _EventCacheEntry for each event_id in cache. If - allow_rejected is `False` then there will still be an entry but it - will be `None` - """ - event_map = {} - - for event_id in events: - ret = self._get_event_cache.get( - (event_id,), None, - update_metrics=update_metrics, - ) - if not ret: - continue - - if allow_rejected or not ret.event.rejected_reason: - event_map[event_id] = ret - else: - event_map[event_id] = None - - return event_map - - def _do_fetch(self, conn): - """Takes a database connection and waits for requests for events from - the _event_fetch_list queue. - """ - event_list = [] - i = 0 - while True: - try: - with self._event_fetch_lock: - event_list = self._event_fetch_list - self._event_fetch_list = [] - - if not event_list: - single_threaded = self.database_engine.single_threaded - if single_threaded or i > EVENT_QUEUE_ITERATIONS: - self._event_fetch_ongoing -= 1 - return - else: - self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) - i += 1 - continue - i = 0 - - event_id_lists = zip(*event_list)[0] - event_ids = [ - item for sublist in event_id_lists for item in sublist - ] - - rows = self._new_transaction( - conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids - ) - - row_dict = { - r["event_id"]: r - for r in rows - } - - # We only want to resolve deferreds from the main thread - def fire(lst, res): - for ids, d in lst: - if not d.called: - try: - with PreserveLoggingContext(): - d.callback([ - res[i] - for i in ids - if i in res - ]) - except Exception: - logger.exception("Failed to callback") - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list, row_dict) - except Exception as e: - logger.exception("do_fetch") - - # We only want to resolve deferreds from the main thread - def fire(evs): - for _, d in evs: - if not d.called: - with PreserveLoggingContext(): - d.errback(e) - - if event_list: - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list) - - @defer.inlineCallbacks - def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): - """Fetches events from the database using the _event_fetch_list. This - allows batch and bulk fetching of events - it allows us to fetch events - without having to create a new transaction for each request for events. - """ - if not events: - defer.returnValue({}) - - events_d = defer.Deferred() - with self._event_fetch_lock: - self._event_fetch_list.append( - (events, events_d) - ) - - self._event_fetch_lock.notify() - - if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: - self._event_fetch_ongoing += 1 - should_start = True - else: - should_start = False - - if should_start: - with PreserveLoggingContext(): - self.runWithConnection( - self._do_fetch - ) - - logger.debug("Loading %d events", len(events)) - with PreserveLoggingContext(): - rows = yield events_d - logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) - - if not allow_rejected: - rows[:] = [r for r in rows if not r["rejects"]] - - res = yield make_deferred_yieldable(defer.gatherResults( - [ - preserve_fn(self._get_event_from_row)( - row["internal_metadata"], row["json"], row["redacts"], - rejected_reason=row["rejects"], - ) - for row in rows - ], - consumeErrors=True - )) - - defer.returnValue({ - e.event.event_id: e - for e in res if e - }) - - def _fetch_event_rows(self, txn, events): - rows = [] - N = 200 - for i in range(1 + len(events) / N): - evs = events[i * N:(i + 1) * N] - if not evs: - break - - sql = ( - "SELECT " - " e.event_id as event_id, " - " e.internal_metadata," - " e.json," - " r.redacts as redacts," - " rej.event_id as rejects " - " FROM event_json as e" - " LEFT JOIN rejections as rej USING (event_id)" - " LEFT JOIN redactions as r ON e.event_id = r.redacts" - " WHERE e.event_id IN (%s)" - ) % (",".join(["?"] * len(evs)),) - - txn.execute(sql, evs) - rows.extend(self.cursor_to_dict(txn)) - - return rows - - @defer.inlineCallbacks - def _get_event_from_row(self, internal_metadata, js, redacted, - rejected_reason=None): - with Measure(self._clock, "_get_event_from_row"): - d = json.loads(js) - internal_metadata = json.loads(internal_metadata) - - if rejected_reason: - rejected_reason = yield self._simple_select_one_onecol( - table="rejections", - keyvalues={"event_id": rejected_reason}, - retcol="reason", - desc="_get_event_from_row_rejected_reason", - ) - - original_ev = FrozenEvent( - d, - internal_metadata_dict=internal_metadata, - rejected_reason=rejected_reason, - ) - - redacted_event = None - if redacted: - redacted_event = prune_event(original_ev) - - redaction_id = yield self._simple_select_one_onecol( - table="redactions", - keyvalues={"redacts": redacted_event.event_id}, - retcol="event_id", - desc="_get_event_from_row_redactions", - ) - - redacted_event.unsigned["redacted_by"] = redaction_id - # Get the redaction event. - - because = yield self.get_event( - redaction_id, - check_redacted=False, - allow_none=True, - ) - - if because: - # It's fine to do add the event directly, since get_pdu_json - # will serialise this field correctly - redacted_event.unsigned["redacted_because"] = because - - cache_entry = _EventCacheEntry( - event=original_ev, - redacted_event=redacted_event, - ) - - self._get_event_cache.prefill((original_ev.event_id,), cache_entry) - - defer.returnValue(cache_entry) - @defer.inlineCallbacks def count_daily_messages(self): """ From 46244b27591f8674364ddefa9ae62cecb161fea3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:20:42 +0000 Subject: [PATCH 022/173] Split AS stores --- .../replication/slave/storage/appservice.py | 34 ++++--------------- synapse/storage/appservice.py | 26 +++++++++----- 2 files changed, 23 insertions(+), 37 deletions(-) diff --git a/synapse/replication/slave/storage/appservice.py b/synapse/replication/slave/storage/appservice.py index 0d3f31a50..3b9ded009 100644 --- a/synapse/replication/slave/storage/appservice.py +++ b/synapse/replication/slave/storage/appservice.py @@ -13,33 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import BaseSlavedStore -from synapse.storage import DataStore -from synapse.config.appservice import load_appservices -from synapse.storage.appservice import _make_exclusive_regex +from synapse.storage.appservice import ( + ApplicationServiceWorkerStore, ApplicationServiceTransactionWorkerStore, +) -class SlavedApplicationServiceStore(BaseSlavedStore): - def __init__(self, db_conn, hs): - super(SlavedApplicationServiceStore, self).__init__(db_conn, hs) - self.services_cache = load_appservices( - hs.config.server_name, - hs.config.app_service_config_files - ) - self.exclusive_user_regex = _make_exclusive_regex(self.services_cache) - - get_app_service_by_token = DataStore.get_app_service_by_token.__func__ - get_app_service_by_user_id = DataStore.get_app_service_by_user_id.__func__ - get_app_services = DataStore.get_app_services.__func__ - get_new_events_for_appservice = DataStore.get_new_events_for_appservice.__func__ - create_appservice_txn = DataStore.create_appservice_txn.__func__ - get_appservices_by_state = DataStore.get_appservices_by_state.__func__ - get_oldest_unsent_txn = DataStore.get_oldest_unsent_txn.__func__ - _get_last_txn = DataStore._get_last_txn.__func__ - complete_appservice_txn = DataStore.complete_appservice_txn.__func__ - get_appservice_state = DataStore.get_appservice_state.__func__ - set_appservice_last_pos = DataStore.set_appservice_last_pos.__func__ - set_appservice_state = DataStore.set_appservice_state.__func__ - get_if_app_services_interested_in_user = ( - DataStore.get_if_app_services_interested_in_user.__func__ - ) +class SlavedApplicationServiceStore(ApplicationServiceTransactionWorkerStore, + ApplicationServiceWorkerStore): + pass diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 79673b427..f66cc9809 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -20,6 +20,7 @@ from twisted.internet import defer from synapse.api.constants import Membership from synapse.appservice import AppServiceTransaction from synapse.config.appservice import load_appservices +from synapse.storage.events import EventsWorkerStore from synapse.storage.roommember import RoomsForUser from ._base import SQLBaseStore @@ -46,17 +47,16 @@ def _make_exclusive_regex(services_cache): return exclusive_user_regex -class ApplicationServiceStore(SQLBaseStore): - +class ApplicationServiceWorkerStore(SQLBaseStore): def __init__(self, db_conn, hs): - super(ApplicationServiceStore, self).__init__(db_conn, hs) - self.hostname = hs.hostname self.services_cache = load_appservices( hs.hostname, hs.config.app_service_config_files ) self.exclusive_user_regex = _make_exclusive_regex(self.services_cache) + super(ApplicationServiceWorkerStore, self).__init__(db_conn, hs) + def get_app_services(self): return self.services_cache @@ -112,6 +112,13 @@ class ApplicationServiceStore(SQLBaseStore): return service return None + +class ApplicationServiceStore(ApplicationServiceWorkerStore): + + def __init__(self, db_conn, hs): + super(ApplicationServiceStore, self).__init__(db_conn, hs) + self.hostname = hs.hostname + def get_app_service_rooms(self, service): """Get a list of RoomsForUser for this application service. @@ -184,11 +191,8 @@ class ApplicationServiceStore(SQLBaseStore): return rooms_for_user_matching_user_id -class ApplicationServiceTransactionStore(SQLBaseStore): - - def __init__(self, db_conn, hs): - super(ApplicationServiceTransactionStore, self).__init__(db_conn, hs) - +class ApplicationServiceTransactionWorkerStore(ApplicationServiceWorkerStore, + EventsWorkerStore): @defer.inlineCallbacks def get_appservices_by_state(self, state): """Get a list of application services based on their state. @@ -433,3 +437,7 @@ class ApplicationServiceTransactionStore(SQLBaseStore): events = yield self._get_events(event_ids) defer.returnValue((upper_bound, events)) + + +class ApplicationServiceTransactionStore(ApplicationServiceTransactionWorkerStore): + pass From 3dec9c66b3c2af1bf5b7283d7db443b65ebbd8a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 12:01:08 +0000 Subject: [PATCH 023/173] Split out RoomMemberStore --- synapse/replication/slave/storage/events.py | 30 +- synapse/storage/roommember.py | 360 ++++++++++---------- 2 files changed, 184 insertions(+), 206 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 74a81a0a5..0dc87aee7 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -19,7 +19,7 @@ from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore from synapse.storage.events import EventsWorkerStore -from synapse.storage.roommember import RoomMemberStore +from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore from synapse.storage.signatures import SignatureStore @@ -39,7 +39,8 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore): +class SlavedEventStore(RoomMemberWorkerStore, EventsWorkerStore, + StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) @@ -69,18 +70,9 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore # Cached functions can't be accessed through a class instance so we need # to reach inside the __dict__ to extract them. - get_rooms_for_user = RoomMemberStore.__dict__["get_rooms_for_user"] - get_users_in_room = RoomMemberStore.__dict__["get_users_in_room"] - get_hosts_in_room = RoomMemberStore.__dict__["get_hosts_in_room"] - get_users_who_share_room_with_user = ( - RoomMemberStore.__dict__["get_users_who_share_room_with_user"] - ) get_latest_event_ids_in_room = EventFederationStore.__dict__[ "get_latest_event_ids_in_room" ] - get_invited_rooms_for_user = RoomMemberStore.__dict__[ - "get_invited_rooms_for_user" - ] get_unread_event_push_actions_by_room_for_user = ( EventPushActionsStore.__dict__["get_unread_event_push_actions_by_room_for_user"] ) @@ -93,7 +85,6 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore get_recent_event_ids_for_room = ( StreamStore.__dict__["get_recent_event_ids_for_room"] ) - _get_joined_hosts_cache = RoomMemberStore.__dict__["_get_joined_hosts_cache"] has_room_changed_since = DataStore.has_room_changed_since.__func__ get_unread_push_actions_for_user_in_range_for_http = ( @@ -105,9 +96,6 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore get_push_action_users_in_range = ( DataStore.get_push_action_users_in_range.__func__ ) - get_rooms_for_user_where_membership_is = ( - DataStore.get_rooms_for_user_where_membership_is.__func__ - ) get_membership_changes_for_user = ( DataStore.get_membership_changes_for_user.__func__ ) @@ -116,27 +104,15 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore DataStore.get_room_events_stream_for_room.__func__ ) get_events_around = DataStore.get_events_around.__func__ - get_joined_users_from_state = DataStore.get_joined_users_from_state.__func__ - get_joined_users_from_context = DataStore.get_joined_users_from_context.__func__ - _get_joined_users_from_context = ( - RoomMemberStore.__dict__["_get_joined_users_from_context"] - ) - - get_joined_hosts = DataStore.get_joined_hosts.__func__ - _get_joined_hosts = RoomMemberStore.__dict__["_get_joined_hosts"] get_recent_events_for_room = DataStore.get_recent_events_for_room.__func__ get_room_events_stream_for_rooms = ( DataStore.get_room_events_stream_for_rooms.__func__ ) - is_host_joined = RoomMemberStore.__dict__["is_host_joined"] get_stream_token_for_event = DataStore.get_stream_token_for_event.__func__ _set_before_and_after = staticmethod(DataStore._set_before_and_after) - _get_rooms_for_user_where_membership_is_txn = ( - DataStore._get_rooms_for_user_where_membership_is_txn.__func__ - ) _get_events_around_txn = DataStore._get_events_around_txn.__func__ get_backfill_events = DataStore.get_backfill_events.__func__ diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 3e77fd390..6574fe74b 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -17,7 +17,7 @@ from twisted.internet import defer from collections import namedtuple -from ._base import SQLBaseStore +from synapse.storage.events import EventsWorkerStore from synapse.util.async import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -48,97 +48,7 @@ ProfileInfo = namedtuple( _MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update" -class RoomMemberStore(SQLBaseStore): - def __init__(self, db_conn, hs): - super(RoomMemberStore, self).__init__(db_conn, hs) - self.register_background_update_handler( - _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile - ) - - def _store_room_members_txn(self, txn, events, backfilled): - """Store a room member in the database. - """ - self._simple_insert_many_txn( - txn, - table="room_memberships", - values=[ - { - "event_id": event.event_id, - "user_id": event.state_key, - "sender": event.user_id, - "room_id": event.room_id, - "membership": event.membership, - "display_name": event.content.get("displayname", None), - "avatar_url": event.content.get("avatar_url", None), - } - for event in events - ] - ) - - for event in events: - txn.call_after( - self._membership_stream_cache.entity_has_changed, - event.state_key, event.internal_metadata.stream_ordering - ) - txn.call_after( - self.get_invited_rooms_for_user.invalidate, (event.state_key,) - ) - - # We update the local_invites table only if the event is "current", - # i.e., its something that has just happened. - # The only current event that can also be an outlier is if its an - # invite that has come in across federation. - is_new_state = not backfilled and ( - not event.internal_metadata.is_outlier() - or event.internal_metadata.is_invite_from_remote() - ) - is_mine = self.hs.is_mine_id(event.state_key) - if is_new_state and is_mine: - if event.membership == Membership.INVITE: - self._simple_insert_txn( - txn, - table="local_invites", - values={ - "event_id": event.event_id, - "invitee": event.state_key, - "inviter": event.sender, - "room_id": event.room_id, - "stream_id": event.internal_metadata.stream_ordering, - } - ) - else: - sql = ( - "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - txn.execute(sql, ( - event.internal_metadata.stream_ordering, - event.event_id, - event.room_id, - event.state_key, - )) - - @defer.inlineCallbacks - def locally_reject_invite(self, user_id, room_id): - sql = ( - "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - def f(txn, stream_ordering): - txn.execute(sql, ( - stream_ordering, - True, - room_id, - user_id, - )) - - with self._stream_id_gen.get_next() as stream_ordering: - yield self.runInteraction("locally_reject_invite", f, stream_ordering) - +class RoomMemberWorkerStore(EventsWorkerStore): @cachedInlineCallbacks(max_entries=100000, iterable=True, cache_context=True) def get_hosts_in_room(self, room_id, cache_context): """Returns the set of all hosts currently in the room @@ -295,89 +205,6 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(user_who_share_room) - def forget(self, user_id, room_id): - """Indicate that user_id wishes to discard history for room_id.""" - def f(txn): - sql = ( - "UPDATE" - " room_memberships" - " SET" - " forgotten = 1" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - ) - txn.execute(sql, (user_id, room_id)) - - txn.call_after(self.was_forgotten_at.invalidate_all) - txn.call_after(self.did_forget.invalidate, (user_id, room_id)) - self._invalidate_cache_and_stream( - txn, self.who_forgot_in_room, (room_id,) - ) - return self.runInteraction("forget_membership", f) - - @cachedInlineCallbacks(num_args=2) - def did_forget(self, user_id, room_id): - """Returns whether user_id has elected to discard history for room_id. - - Returns False if they have since re-joined.""" - def f(txn): - sql = ( - "SELECT" - " COUNT(*)" - " FROM" - " room_memberships" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - " AND" - " forgotten = 0" - ) - txn.execute(sql, (user_id, room_id)) - rows = txn.fetchall() - return rows[0][0] - count = yield self.runInteraction("did_forget_membership", f) - defer.returnValue(count == 0) - - @cachedInlineCallbacks(num_args=3) - def was_forgotten_at(self, user_id, room_id, event_id): - """Returns whether user_id has elected to discard history for room_id at - event_id. - - event_id must be a membership event.""" - def f(txn): - sql = ( - "SELECT" - " forgotten" - " FROM" - " room_memberships" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - " AND" - " event_id = ?" - ) - txn.execute(sql, (user_id, room_id, event_id)) - rows = txn.fetchall() - return rows[0][0] - forgot = yield self.runInteraction("did_forget_membership_at", f) - defer.returnValue(forgot == 1) - - @cached() - def who_forgot_in_room(self, room_id): - return self._simple_select_list( - table="room_memberships", - retcols=("user_id", "event_id"), - keyvalues={ - "room_id": room_id, - "forgotten": 1, - }, - desc="who_forgot" - ) - def get_joined_users_from_context(self, event, context): state_group = context.state_group if not state_group: @@ -600,6 +427,185 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(joined_hosts) + @cached(max_entries=10000, iterable=True) + def _get_joined_hosts_cache(self, room_id): + return _JoinedHostsCache(self, room_id) + + +class RoomMemberStore(RoomMemberWorkerStore): + def __init__(self, db_conn, hs): + super(RoomMemberStore, self).__init__(db_conn, hs) + self.register_background_update_handler( + _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile + ) + + def _store_room_members_txn(self, txn, events, backfilled): + """Store a room member in the database. + """ + self._simple_insert_many_txn( + txn, + table="room_memberships", + values=[ + { + "event_id": event.event_id, + "user_id": event.state_key, + "sender": event.user_id, + "room_id": event.room_id, + "membership": event.membership, + "display_name": event.content.get("displayname", None), + "avatar_url": event.content.get("avatar_url", None), + } + for event in events + ] + ) + + for event in events: + txn.call_after( + self._membership_stream_cache.entity_has_changed, + event.state_key, event.internal_metadata.stream_ordering + ) + txn.call_after( + self.get_invited_rooms_for_user.invalidate, (event.state_key,) + ) + + # We update the local_invites table only if the event is "current", + # i.e., its something that has just happened. + # The only current event that can also be an outlier is if its an + # invite that has come in across federation. + is_new_state = not backfilled and ( + not event.internal_metadata.is_outlier() + or event.internal_metadata.is_invite_from_remote() + ) + is_mine = self.hs.is_mine_id(event.state_key) + if is_new_state and is_mine: + if event.membership == Membership.INVITE: + self._simple_insert_txn( + txn, + table="local_invites", + values={ + "event_id": event.event_id, + "invitee": event.state_key, + "inviter": event.sender, + "room_id": event.room_id, + "stream_id": event.internal_metadata.stream_ordering, + } + ) + else: + sql = ( + "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + txn.execute(sql, ( + event.internal_metadata.stream_ordering, + event.event_id, + event.room_id, + event.state_key, + )) + + @defer.inlineCallbacks + def locally_reject_invite(self, user_id, room_id): + sql = ( + "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + def f(txn, stream_ordering): + txn.execute(sql, ( + stream_ordering, + True, + room_id, + user_id, + )) + + with self._stream_id_gen.get_next() as stream_ordering: + yield self.runInteraction("locally_reject_invite", f, stream_ordering) + + def forget(self, user_id, room_id): + """Indicate that user_id wishes to discard history for room_id.""" + def f(txn): + sql = ( + "UPDATE" + " room_memberships" + " SET" + " forgotten = 1" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + ) + txn.execute(sql, (user_id, room_id)) + + txn.call_after(self.was_forgotten_at.invalidate_all) + txn.call_after(self.did_forget.invalidate, (user_id, room_id)) + self._invalidate_cache_and_stream( + txn, self.who_forgot_in_room, (room_id,) + ) + return self.runInteraction("forget_membership", f) + + @cachedInlineCallbacks(num_args=2) + def did_forget(self, user_id, room_id): + """Returns whether user_id has elected to discard history for room_id. + + Returns False if they have since re-joined.""" + def f(txn): + sql = ( + "SELECT" + " COUNT(*)" + " FROM" + " room_memberships" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + " AND" + " forgotten = 0" + ) + txn.execute(sql, (user_id, room_id)) + rows = txn.fetchall() + return rows[0][0] + count = yield self.runInteraction("did_forget_membership", f) + defer.returnValue(count == 0) + + @cachedInlineCallbacks(num_args=3) + def was_forgotten_at(self, user_id, room_id, event_id): + """Returns whether user_id has elected to discard history for room_id at + event_id. + + event_id must be a membership event.""" + def f(txn): + sql = ( + "SELECT" + " forgotten" + " FROM" + " room_memberships" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + " AND" + " event_id = ?" + ) + txn.execute(sql, (user_id, room_id, event_id)) + rows = txn.fetchall() + return rows[0][0] + forgot = yield self.runInteraction("did_forget_membership_at", f) + defer.returnValue(forgot == 1) + + @cached() + def who_forgot_in_room(self, room_id): + return self._simple_select_list( + table="room_memberships", + retcols=("user_id", "event_id"), + keyvalues={ + "room_id": room_id, + "forgotten": 1, + }, + desc="who_forgot" + ) + @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): target_min_stream_id = progress.get( @@ -675,10 +681,6 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(result) - @cached(max_entries=10000, iterable=True) - def _get_joined_hosts_cache(self, room_id): - return _JoinedHostsCache(self, room_id) - class _JoinedHostsCache(object): """Cache for joined hosts in a room that is optimised to handle updates From faeb369f158a3ca6ba8f48ca1d551b2b53f4c53a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 15:19:54 +0000 Subject: [PATCH 024/173] Fix missing invalidations for receipt storage --- synapse/replication/slave/storage/receipts.py | 2 ++ synapse/storage/receipts.py | 28 +++++++++---------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index f0e29e983..1647072f6 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -53,6 +53,8 @@ class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): self.get_last_receipt_event_id_for_user.invalidate( (user_id, room_id, receipt_type) ) + self._invalidate_get_users_with_receipts_in_room(room_id, receipt_type, user_id) + self.get_receipts_for_room.invalidate((room_id, receipt_type)) def process_replication_rows(self, stream_name, token, rows): if stream_name == "receipts": diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 40530632c..eac8694e0 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -292,20 +292,6 @@ class ReceiptsWorkerStore(SQLBaseStore): "get_all_updated_receipts", get_all_updated_receipts_txn ) - -class ReceiptsStore(ReceiptsWorkerStore): - def __init__(self, db_conn, hs): - # We instantiate this first as the ReceiptsWorkerStore constructor - # needs to be able to call get_max_receipt_stream_id - self._receipts_id_gen = StreamIdGenerator( - db_conn, "receipts_linearized", "stream_id" - ) - - super(ReceiptsStore, self).__init__(db_conn, hs) - - def get_max_receipt_stream_id(self): - return self._receipts_id_gen.get_current_token() - def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, user_id): if receipt_type != "m.read": @@ -326,6 +312,20 @@ class ReceiptsStore(ReceiptsWorkerStore): self.get_users_with_read_receipts_in_room.invalidate((room_id,)) + +class ReceiptsStore(ReceiptsWorkerStore): + def __init__(self, db_conn, hs): + # We instantiate this first as the ReceiptsWorkerStore constructor + # needs to be able to call get_max_receipt_stream_id + self._receipts_id_gen = StreamIdGenerator( + db_conn, "receipts_linearized", "stream_id" + ) + + super(ReceiptsStore, self).__init__(db_conn, hs) + + def get_max_receipt_stream_id(self): + return self._receipts_id_gen.get_current_token() + def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, data, stream_id): txn.call_after( From bb73f55fc6559658080d6cdd5672506fda7843ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:31:16 +0000 Subject: [PATCH 025/173] Use absolute imports --- synapse/replication/slave/storage/account_data.py | 4 ++-- synapse/storage/account_data.py | 4 ++-- synapse/storage/tags.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 6c95261aa..f76dc5a56 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -13,8 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import BaseSlavedStore -from ._slaved_id_tracker import SlavedIdTracker +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.storage.account_data import AccountDataWorkerStore from synapse.storage.tags import TagsWorkerStore diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 66fed4bdc..0d6acbd9a 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -15,8 +15,8 @@ from twisted.internet import defer -from ._base import SQLBaseStore -from .util.id_generators import StreamIdGenerator +from synapse.storage._base import SQLBaseStore +from synapse.storage.util.id_generators import StreamIdGenerator from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.caches.descriptors import cached, cachedList, cachedInlineCallbacks diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 484d66991..a8d0bf80c 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .account_data import AccountDataWorkerStore +from synapse.storage.account_data import AccountDataWorkerStore from synapse.util.caches.descriptors import cached from twisted.internet import defer From 26d37f7a63cf33ac8e3f8346a6e982c7b2f6cbb1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:33:55 +0000 Subject: [PATCH 026/173] Update copyright --- synapse/replication/slave/storage/account_data.py | 1 + synapse/storage/account_data.py | 1 + synapse/storage/tags.py | 1 + 3 files changed, 3 insertions(+) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index f76dc5a56..6c8d2954d 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 0d6acbd9a..466194e96 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index a8d0bf80c..fc46bf7bb 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 7e6cf89dc2a7fc3c159459b34562719ab61713ba Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:39:19 +0000 Subject: [PATCH 027/173] Update copyright --- synapse/replication/slave/storage/push_rule.py | 1 + synapse/storage/__init__.py | 1 + synapse/storage/push_rule.py | 1 + 3 files changed, 3 insertions(+) diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index 0e3d9a87d..bb2c40b6e 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index cd2759858..26faf985b 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index b35bd7a64..583efb7bd 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From c2ecfcc3a4374d47db38465510e4c480da353ebb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:41:34 +0000 Subject: [PATCH 028/173] Update copyright --- synapse/replication/slave/storage/pushers.py | 1 + synapse/storage/pusher.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py index e352c3235..a7cd5a729 100644 --- a/synapse/replication/slave/storage/pushers.py +++ b/synapse/replication/slave/storage/pushers.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index b0159c70c..f4af3e4ca 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From eb9b5eec819a2d594bc237652d831e64517d0c93 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:42:39 +0000 Subject: [PATCH 029/173] Update copyright --- synapse/storage/event_push_actions.py | 1 + synapse/storage/events.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 124583835..4cabf70ad 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 73177e0bc..c636da4b7 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 3bd760628bc0b178f6709b9ea3439a44ebcebab2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:49:18 +0000 Subject: [PATCH 030/173] _event_persist_queue shouldn't be in worker store --- synapse/storage/events.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 681a33314..32da81c47 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -200,10 +200,6 @@ def _retry_on_integrity_error(func): class EventsWorkerStore(SQLBaseStore): - def __init__(self, db_conn, hs): - super(EventsWorkerStore, self).__init__(db_conn, hs) - - self._event_persist_queue = _EventPeristenceQueue() @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, @@ -583,6 +579,10 @@ class EventsStore(EventsWorkerStore): psql_only=True, ) + + + self._event_persist_queue = _EventPeristenceQueue() + self._state_resolution_handler = hs.get_state_resolution_handler() def persist_events(self, events_and_contexts, backfilled=False): From 5d0f6658489287dc19ce9e089fc61b06208f3fc4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:49:58 +0000 Subject: [PATCH 031/173] Remove redundant clock --- synapse/storage/events.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 32da81c47..f6aa3612e 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -550,7 +550,6 @@ class EventsStore(EventsWorkerStore): def __init__(self, db_conn, hs): super(EventsStore, self).__init__(db_conn, hs) - self._clock = hs.get_clock() self.register_background_update_handler( self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts ) @@ -579,8 +578,6 @@ class EventsStore(EventsWorkerStore): psql_only=True, ) - - self._event_persist_queue = _EventPeristenceQueue() self._state_resolution_handler = hs.get_state_resolution_handler() From bf8a36e0805a1626d335f78ddb90ba25220bbfa1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:52:10 +0000 Subject: [PATCH 032/173] Update copyright --- synapse/replication/slave/storage/events.py | 1 + synapse/storage/events.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 74a81a0a5..f35cba289 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index f6aa3612e..84a6f6782 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From eba93b05bfaa1e6cb5bd66621021f6fff750ab97 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 11:01:21 +0000 Subject: [PATCH 033/173] Split EventsWorkerStore into separate file --- synapse/replication/slave/storage/events.py | 2 +- synapse/storage/events.py | 365 +----------------- synapse/storage/events_worker.py | 395 ++++++++++++++++++++ 3 files changed, 401 insertions(+), 361 deletions(-) create mode 100644 synapse/storage/events_worker.py diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f35cba289..5edfacc9e 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -19,7 +19,7 @@ from synapse.api.constants import EventTypes from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore -from synapse.storage.events import EventsWorkerStore +from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 84a6f6782..99d6cca58 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -13,16 +13,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore -from twisted.internet import defer, reactor +from synapse.storage.events_worker import EventsWorkerStore -from synapse.events import FrozenEvent, USE_FROZEN_DICTS -from synapse.events.utils import prune_event +from twisted.internet import defer + +from synapse.events import USE_FROZEN_DICTS from synapse.util.async import ObservableDeferred from synapse.util.logcontext import ( - preserve_fn, PreserveLoggingContext, make_deferred_yieldable + PreserveLoggingContext, make_deferred_yieldable ) from synapse.util.logutils import log_function from synapse.util.metrics import Measure @@ -62,16 +62,6 @@ def encode_json(json_object): return json.dumps(json_object, ensure_ascii=False) -# These values are used in the `enqueus_event` and `_do_fetch` methods to -# control how we batch/bulk fetch events from the database. -# The values are plucked out of thing air to make initial sync run faster -# on jki.re -# TODO: Make these configurable. -EVENT_QUEUE_THREADS = 3 # Max number of threads that will fetch events -EVENT_QUEUE_ITERATIONS = 3 # No. times we block waiting for requests for events -EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events - - class _EventPeristenceQueue(object): """Queues up events so that they can be persisted in bulk with only one concurrent transaction per room. @@ -200,351 +190,6 @@ def _retry_on_integrity_error(func): return f -class EventsWorkerStore(SQLBaseStore): - - @defer.inlineCallbacks - def get_event(self, event_id, check_redacted=True, - get_prev_content=False, allow_rejected=False, - allow_none=False): - """Get an event from the database by event_id. - - Args: - event_id (str): The event_id of the event to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - allow_none (bool): If True, return None if no event found, if - False throw an exception. - - Returns: - Deferred : A FrozenEvent. - """ - events = yield self._get_events( - [event_id], - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - if not events and not allow_none: - raise SynapseError(404, "Could not find event %s" % (event_id,)) - - defer.returnValue(events[0] if events else None) - - @defer.inlineCallbacks - def get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - """Get events from the database - - Args: - event_ids (list): The event_ids of the events to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - - Returns: - Deferred : Dict from event_id to event. - """ - events = yield self._get_events( - event_ids, - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - defer.returnValue({e.event_id: e for e in events}) - - @defer.inlineCallbacks - def _get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - if not event_ids: - defer.returnValue([]) - - event_id_list = event_ids - event_ids = set(event_ids) - - event_entry_map = self._get_events_from_cache( - event_ids, - allow_rejected=allow_rejected, - ) - - missing_events_ids = [e for e in event_ids if e not in event_entry_map] - - if missing_events_ids: - missing_events = yield self._enqueue_events( - missing_events_ids, - check_redacted=check_redacted, - allow_rejected=allow_rejected, - ) - - event_entry_map.update(missing_events) - - events = [] - for event_id in event_id_list: - entry = event_entry_map.get(event_id, None) - if not entry: - continue - - if allow_rejected or not entry.event.rejected_reason: - if check_redacted and entry.redacted_event: - event = entry.redacted_event - else: - event = entry.event - - events.append(event) - - if get_prev_content: - if "replaces_state" in event.unsigned: - prev = yield self.get_event( - event.unsigned["replaces_state"], - get_prev_content=False, - allow_none=True, - ) - if prev: - event.unsigned = dict(event.unsigned) - event.unsigned["prev_content"] = prev.content - event.unsigned["prev_sender"] = prev.sender - - defer.returnValue(events) - - def _invalidate_get_event_cache(self, event_id): - self._get_event_cache.invalidate((event_id,)) - - def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): - """Fetch events from the caches - - Args: - events (list(str)): list of event_ids to fetch - allow_rejected (bool): Whether to teturn events that were rejected - update_metrics (bool): Whether to update the cache hit ratio metrics - - Returns: - dict of event_id -> _EventCacheEntry for each event_id in cache. If - allow_rejected is `False` then there will still be an entry but it - will be `None` - """ - event_map = {} - - for event_id in events: - ret = self._get_event_cache.get( - (event_id,), None, - update_metrics=update_metrics, - ) - if not ret: - continue - - if allow_rejected or not ret.event.rejected_reason: - event_map[event_id] = ret - else: - event_map[event_id] = None - - return event_map - - def _do_fetch(self, conn): - """Takes a database connection and waits for requests for events from - the _event_fetch_list queue. - """ - event_list = [] - i = 0 - while True: - try: - with self._event_fetch_lock: - event_list = self._event_fetch_list - self._event_fetch_list = [] - - if not event_list: - single_threaded = self.database_engine.single_threaded - if single_threaded or i > EVENT_QUEUE_ITERATIONS: - self._event_fetch_ongoing -= 1 - return - else: - self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) - i += 1 - continue - i = 0 - - event_id_lists = zip(*event_list)[0] - event_ids = [ - item for sublist in event_id_lists for item in sublist - ] - - rows = self._new_transaction( - conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids - ) - - row_dict = { - r["event_id"]: r - for r in rows - } - - # We only want to resolve deferreds from the main thread - def fire(lst, res): - for ids, d in lst: - if not d.called: - try: - with PreserveLoggingContext(): - d.callback([ - res[i] - for i in ids - if i in res - ]) - except Exception: - logger.exception("Failed to callback") - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list, row_dict) - except Exception as e: - logger.exception("do_fetch") - - # We only want to resolve deferreds from the main thread - def fire(evs): - for _, d in evs: - if not d.called: - with PreserveLoggingContext(): - d.errback(e) - - if event_list: - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list) - - @defer.inlineCallbacks - def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): - """Fetches events from the database using the _event_fetch_list. This - allows batch and bulk fetching of events - it allows us to fetch events - without having to create a new transaction for each request for events. - """ - if not events: - defer.returnValue({}) - - events_d = defer.Deferred() - with self._event_fetch_lock: - self._event_fetch_list.append( - (events, events_d) - ) - - self._event_fetch_lock.notify() - - if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: - self._event_fetch_ongoing += 1 - should_start = True - else: - should_start = False - - if should_start: - with PreserveLoggingContext(): - self.runWithConnection( - self._do_fetch - ) - - logger.debug("Loading %d events", len(events)) - with PreserveLoggingContext(): - rows = yield events_d - logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) - - if not allow_rejected: - rows[:] = [r for r in rows if not r["rejects"]] - - res = yield make_deferred_yieldable(defer.gatherResults( - [ - preserve_fn(self._get_event_from_row)( - row["internal_metadata"], row["json"], row["redacts"], - rejected_reason=row["rejects"], - ) - for row in rows - ], - consumeErrors=True - )) - - defer.returnValue({ - e.event.event_id: e - for e in res if e - }) - - def _fetch_event_rows(self, txn, events): - rows = [] - N = 200 - for i in range(1 + len(events) / N): - evs = events[i * N:(i + 1) * N] - if not evs: - break - - sql = ( - "SELECT " - " e.event_id as event_id, " - " e.internal_metadata," - " e.json," - " r.redacts as redacts," - " rej.event_id as rejects " - " FROM event_json as e" - " LEFT JOIN rejections as rej USING (event_id)" - " LEFT JOIN redactions as r ON e.event_id = r.redacts" - " WHERE e.event_id IN (%s)" - ) % (",".join(["?"] * len(evs)),) - - txn.execute(sql, evs) - rows.extend(self.cursor_to_dict(txn)) - - return rows - - @defer.inlineCallbacks - def _get_event_from_row(self, internal_metadata, js, redacted, - rejected_reason=None): - with Measure(self._clock, "_get_event_from_row"): - d = json.loads(js) - internal_metadata = json.loads(internal_metadata) - - if rejected_reason: - rejected_reason = yield self._simple_select_one_onecol( - table="rejections", - keyvalues={"event_id": rejected_reason}, - retcol="reason", - desc="_get_event_from_row_rejected_reason", - ) - - original_ev = FrozenEvent( - d, - internal_metadata_dict=internal_metadata, - rejected_reason=rejected_reason, - ) - - redacted_event = None - if redacted: - redacted_event = prune_event(original_ev) - - redaction_id = yield self._simple_select_one_onecol( - table="redactions", - keyvalues={"redacts": redacted_event.event_id}, - retcol="event_id", - desc="_get_event_from_row_redactions", - ) - - redacted_event.unsigned["redacted_by"] = redaction_id - # Get the redaction event. - - because = yield self.get_event( - redaction_id, - check_redacted=False, - allow_none=True, - ) - - if because: - # It's fine to do add the event directly, since get_pdu_json - # will serialise this field correctly - redacted_event.unsigned["redacted_because"] = because - - cache_entry = _EventCacheEntry( - event=original_ev, - redacted_event=redacted_event, - ) - - self._get_event_cache.prefill((original_ev.event_id,), cache_entry) - - defer.returnValue(cache_entry) - - class EventsStore(EventsWorkerStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py new file mode 100644 index 000000000..86c3b48ad --- /dev/null +++ b/synapse/storage/events_worker.py @@ -0,0 +1,395 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ._base import SQLBaseStore + +from twisted.internet import defer, reactor + +from synapse.events import FrozenEvent +from synapse.events.utils import prune_event + +from synapse.util.logcontext import ( + preserve_fn, PreserveLoggingContext, make_deferred_yieldable +) +from synapse.util.metrics import Measure +from synapse.api.errors import SynapseError + +from collections import namedtuple + +import logging +import ujson as json + +# these are only included to make the type annotations work +from synapse.events import EventBase # noqa: F401 +from synapse.events.snapshot import EventContext # noqa: F401 + +logger = logging.getLogger(__name__) + + +# These values are used in the `enqueus_event` and `_do_fetch` methods to +# control how we batch/bulk fetch events from the database. +# The values are plucked out of thing air to make initial sync run faster +# on jki.re +# TODO: Make these configurable. +EVENT_QUEUE_THREADS = 3 # Max number of threads that will fetch events +EVENT_QUEUE_ITERATIONS = 3 # No. times we block waiting for requests for events +EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events + + +_EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event")) + + +class EventsWorkerStore(SQLBaseStore): + + @defer.inlineCallbacks + def get_event(self, event_id, check_redacted=True, + get_prev_content=False, allow_rejected=False, + allow_none=False): + """Get an event from the database by event_id. + + Args: + event_id (str): The event_id of the event to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + allow_none (bool): If True, return None if no event found, if + False throw an exception. + + Returns: + Deferred : A FrozenEvent. + """ + events = yield self._get_events( + [event_id], + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + if not events and not allow_none: + raise SynapseError(404, "Could not find event %s" % (event_id,)) + + defer.returnValue(events[0] if events else None) + + @defer.inlineCallbacks + def get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + """Get events from the database + + Args: + event_ids (list): The event_ids of the events to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + + Returns: + Deferred : Dict from event_id to event. + """ + events = yield self._get_events( + event_ids, + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + defer.returnValue({e.event_id: e for e in events}) + + @defer.inlineCallbacks + def _get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + if not event_ids: + defer.returnValue([]) + + event_id_list = event_ids + event_ids = set(event_ids) + + event_entry_map = self._get_events_from_cache( + event_ids, + allow_rejected=allow_rejected, + ) + + missing_events_ids = [e for e in event_ids if e not in event_entry_map] + + if missing_events_ids: + missing_events = yield self._enqueue_events( + missing_events_ids, + check_redacted=check_redacted, + allow_rejected=allow_rejected, + ) + + event_entry_map.update(missing_events) + + events = [] + for event_id in event_id_list: + entry = event_entry_map.get(event_id, None) + if not entry: + continue + + if allow_rejected or not entry.event.rejected_reason: + if check_redacted and entry.redacted_event: + event = entry.redacted_event + else: + event = entry.event + + events.append(event) + + if get_prev_content: + if "replaces_state" in event.unsigned: + prev = yield self.get_event( + event.unsigned["replaces_state"], + get_prev_content=False, + allow_none=True, + ) + if prev: + event.unsigned = dict(event.unsigned) + event.unsigned["prev_content"] = prev.content + event.unsigned["prev_sender"] = prev.sender + + defer.returnValue(events) + + def _invalidate_get_event_cache(self, event_id): + self._get_event_cache.invalidate((event_id,)) + + def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): + """Fetch events from the caches + + Args: + events (list(str)): list of event_ids to fetch + allow_rejected (bool): Whether to teturn events that were rejected + update_metrics (bool): Whether to update the cache hit ratio metrics + + Returns: + dict of event_id -> _EventCacheEntry for each event_id in cache. If + allow_rejected is `False` then there will still be an entry but it + will be `None` + """ + event_map = {} + + for event_id in events: + ret = self._get_event_cache.get( + (event_id,), None, + update_metrics=update_metrics, + ) + if not ret: + continue + + if allow_rejected or not ret.event.rejected_reason: + event_map[event_id] = ret + else: + event_map[event_id] = None + + return event_map + + def _do_fetch(self, conn): + """Takes a database connection and waits for requests for events from + the _event_fetch_list queue. + """ + event_list = [] + i = 0 + while True: + try: + with self._event_fetch_lock: + event_list = self._event_fetch_list + self._event_fetch_list = [] + + if not event_list: + single_threaded = self.database_engine.single_threaded + if single_threaded or i > EVENT_QUEUE_ITERATIONS: + self._event_fetch_ongoing -= 1 + return + else: + self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) + i += 1 + continue + i = 0 + + event_id_lists = zip(*event_list)[0] + event_ids = [ + item for sublist in event_id_lists for item in sublist + ] + + rows = self._new_transaction( + conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids + ) + + row_dict = { + r["event_id"]: r + for r in rows + } + + # We only want to resolve deferreds from the main thread + def fire(lst, res): + for ids, d in lst: + if not d.called: + try: + with PreserveLoggingContext(): + d.callback([ + res[i] + for i in ids + if i in res + ]) + except Exception: + logger.exception("Failed to callback") + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list, row_dict) + except Exception as e: + logger.exception("do_fetch") + + # We only want to resolve deferreds from the main thread + def fire(evs): + for _, d in evs: + if not d.called: + with PreserveLoggingContext(): + d.errback(e) + + if event_list: + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list) + + @defer.inlineCallbacks + def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): + """Fetches events from the database using the _event_fetch_list. This + allows batch and bulk fetching of events - it allows us to fetch events + without having to create a new transaction for each request for events. + """ + if not events: + defer.returnValue({}) + + events_d = defer.Deferred() + with self._event_fetch_lock: + self._event_fetch_list.append( + (events, events_d) + ) + + self._event_fetch_lock.notify() + + if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: + self._event_fetch_ongoing += 1 + should_start = True + else: + should_start = False + + if should_start: + with PreserveLoggingContext(): + self.runWithConnection( + self._do_fetch + ) + + logger.debug("Loading %d events", len(events)) + with PreserveLoggingContext(): + rows = yield events_d + logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) + + if not allow_rejected: + rows[:] = [r for r in rows if not r["rejects"]] + + res = yield make_deferred_yieldable(defer.gatherResults( + [ + preserve_fn(self._get_event_from_row)( + row["internal_metadata"], row["json"], row["redacts"], + rejected_reason=row["rejects"], + ) + for row in rows + ], + consumeErrors=True + )) + + defer.returnValue({ + e.event.event_id: e + for e in res if e + }) + + def _fetch_event_rows(self, txn, events): + rows = [] + N = 200 + for i in range(1 + len(events) / N): + evs = events[i * N:(i + 1) * N] + if not evs: + break + + sql = ( + "SELECT " + " e.event_id as event_id, " + " e.internal_metadata," + " e.json," + " r.redacts as redacts," + " rej.event_id as rejects " + " FROM event_json as e" + " LEFT JOIN rejections as rej USING (event_id)" + " LEFT JOIN redactions as r ON e.event_id = r.redacts" + " WHERE e.event_id IN (%s)" + ) % (",".join(["?"] * len(evs)),) + + txn.execute(sql, evs) + rows.extend(self.cursor_to_dict(txn)) + + return rows + + @defer.inlineCallbacks + def _get_event_from_row(self, internal_metadata, js, redacted, + rejected_reason=None): + with Measure(self._clock, "_get_event_from_row"): + d = json.loads(js) + internal_metadata = json.loads(internal_metadata) + + if rejected_reason: + rejected_reason = yield self._simple_select_one_onecol( + table="rejections", + keyvalues={"event_id": rejected_reason}, + retcol="reason", + desc="_get_event_from_row_rejected_reason", + ) + + original_ev = FrozenEvent( + d, + internal_metadata_dict=internal_metadata, + rejected_reason=rejected_reason, + ) + + redacted_event = None + if redacted: + redacted_event = prune_event(original_ev) + + redaction_id = yield self._simple_select_one_onecol( + table="redactions", + keyvalues={"redacts": redacted_event.event_id}, + retcol="event_id", + desc="_get_event_from_row_redactions", + ) + + redacted_event.unsigned["redacted_by"] = redaction_id + # Get the redaction event. + + because = yield self.get_event( + redaction_id, + check_redacted=False, + allow_none=True, + ) + + if because: + # It's fine to do add the event directly, since get_pdu_json + # will serialise this field correctly + redacted_event.unsigned["redacted_because"] = because + + cache_entry = _EventCacheEntry( + event=original_ev, + redacted_event=redacted_event, + ) + + self._get_event_cache.prefill((original_ev.event_id,), cache_entry) + + defer.returnValue(cache_entry) From 70349872c2dcff20a7b174bf0fcfdd5b8e47eec3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 11:14:35 +0000 Subject: [PATCH 034/173] Update copyright --- synapse/replication/slave/storage/events.py | 1 + synapse/storage/events.py | 1 + synapse/storage/roommember.py | 1 + 3 files changed, 3 insertions(+) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 0dc87aee7..ef7a42d80 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 681a33314..3e3229b40 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 6574fe74b..b9158b989 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 6ae9a3d2a6cd9db7e07fda270728cf15351a5a0b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:45:00 +0000 Subject: [PATCH 035/173] Update copyright --- synapse/replication/slave/storage/appservice.py | 1 + synapse/storage/appservice.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/replication/slave/storage/appservice.py b/synapse/replication/slave/storage/appservice.py index 3b9ded009..8cae3076f 100644 --- a/synapse/replication/slave/storage/appservice.py +++ b/synapse/replication/slave/storage/appservice.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index f66cc9809..063906f5a 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 8956f0147aecc27a0590eeeecd130d05b7a55767 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Feb 2018 10:06:51 +0000 Subject: [PATCH 036/173] Add comment --- synapse/storage/appservice.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 063906f5a..90fb51d43 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -441,4 +441,8 @@ class ApplicationServiceTransactionWorkerStore(ApplicationServiceWorkerStore, class ApplicationServiceTransactionStore(ApplicationServiceTransactionWorkerStore): + # This is currently empty due to there not being any AS storage functions + # that can't be run on the workers. Since this may change in future, and + # to keep consistency with the other stores, we keep this empty class for + # now. pass From 493e25d5545389264f696be0e07544bf82a0818a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Feb 2018 12:01:36 +0000 Subject: [PATCH 037/173] Move storage functions for push calculations This will allow push actions for an event to be calculated on workers. --- synapse/app/pusher.py | 5 - synapse/app/synchrotron.py | 8 +- synapse/storage/event_push_actions.py | 126 +++++++++++++------------- synapse/storage/push_rule.py | 14 ++- synapse/storage/pusher.py | 22 +++-- synapse/storage/roommember.py | 24 ++--- 6 files changed, 101 insertions(+), 98 deletions(-) diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 32ccea3f1..98a4a7c62 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -32,7 +32,6 @@ from synapse.replication.tcp.client import ReplicationClientHandler from synapse.server import HomeServer from synapse.storage import DataStore from synapse.storage.engines import create_engine -from synapse.storage.roommember import RoomMemberStore from synapse.util.httpresourcetree import create_resource_tree from synapse.util.logcontext import LoggingContext, preserve_fn from synapse.util.manhole import manhole @@ -75,10 +74,6 @@ class PusherSlaveStore( DataStore.get_profile_displayname.__func__ ) - who_forgot_in_room = ( - RoomMemberStore.__dict__["who_forgot_in_room"] - ) - class PusherServer(HomeServer): def setup(self): diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index f87531f1b..abe91dcfb 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -62,8 +62,6 @@ logger = logging.getLogger("synapse.app.synchrotron") class SynchrotronSlavedStore( - SlavedPushRuleStore, - SlavedEventStore, SlavedReceiptsStore, SlavedAccountDataStore, SlavedApplicationServiceStore, @@ -73,14 +71,12 @@ class SynchrotronSlavedStore( SlavedGroupServerStore, SlavedDeviceInboxStore, SlavedDeviceStore, + SlavedPushRuleStore, + SlavedEventStore, SlavedClientIpStore, RoomStore, BaseSlavedStore, ): - who_forgot_in_room = ( - RoomMemberStore.__dict__["who_forgot_in_room"] - ) - did_forget = ( RoomMemberStore.__dict__["did_forget"] ) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index fe6887414..6454045c2 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -380,6 +380,69 @@ class EventPushActionsWorkerStore(SQLBaseStore): # Now return the first `limit` defer.returnValue(notifs[:limit]) + def add_push_actions_to_staging(self, event_id, user_id_actions): + """Add the push actions for the event to the push action staging area. + + Args: + event_id (str) + user_id_actions (dict[str, list[dict|str])]): A dictionary mapping + user_id to list of push actions, where an action can either be + a string or dict. + + Returns: + Deferred + """ + + if not user_id_actions: + return + + # This is a helper function for generating the necessary tuple that + # can be used to inert into the `event_push_actions_staging` table. + def _gen_entry(user_id, actions): + is_highlight = 1 if _action_has_highlight(actions) else 0 + return ( + event_id, # event_id column + user_id, # user_id column + _serialize_action(actions, is_highlight), # actions column + 1, # notif column + is_highlight, # highlight column + ) + + def _add_push_actions_to_staging_txn(txn): + # We don't use _simple_insert_many here to avoid the overhead + # of generating lists of dicts. + + sql = """ + INSERT INTO event_push_actions_staging + (event_id, user_id, actions, notif, highlight) + VALUES (?, ?, ?, ?, ?) + """ + + txn.executemany(sql, ( + _gen_entry(user_id, actions) + for user_id, actions in user_id_actions.iteritems() + )) + + return self.runInteraction( + "add_push_actions_to_staging", _add_push_actions_to_staging_txn + ) + + def remove_push_actions_from_staging(self, event_id): + """Called if we failed to persist the event to ensure that stale push + actions don't build up in the DB + + Args: + event_id (str) + """ + + return self._simple_delete( + table="event_push_actions_staging", + keyvalues={ + "event_id": event_id, + }, + desc="remove_push_actions_from_staging", + ) + class EventPushActionsStore(EventPushActionsWorkerStore): EPA_HIGHLIGHT_INDEX = "epa_highlight_index" @@ -775,69 +838,6 @@ class EventPushActionsStore(EventPushActionsWorkerStore): (rotate_to_stream_ordering,) ) - def add_push_actions_to_staging(self, event_id, user_id_actions): - """Add the push actions for the event to the push action staging area. - - Args: - event_id (str) - user_id_actions (dict[str, list[dict|str])]): A dictionary mapping - user_id to list of push actions, where an action can either be - a string or dict. - - Returns: - Deferred - """ - - if not user_id_actions: - return - - # This is a helper function for generating the necessary tuple that - # can be used to inert into the `event_push_actions_staging` table. - def _gen_entry(user_id, actions): - is_highlight = 1 if _action_has_highlight(actions) else 0 - return ( - event_id, # event_id column - user_id, # user_id column - _serialize_action(actions, is_highlight), # actions column - 1, # notif column - is_highlight, # highlight column - ) - - def _add_push_actions_to_staging_txn(txn): - # We don't use _simple_insert_many here to avoid the overhead - # of generating lists of dicts. - - sql = """ - INSERT INTO event_push_actions_staging - (event_id, user_id, actions, notif, highlight) - VALUES (?, ?, ?, ?, ?) - """ - - txn.executemany(sql, ( - _gen_entry(user_id, actions) - for user_id, actions in user_id_actions.iteritems() - )) - - return self.runInteraction( - "add_push_actions_to_staging", _add_push_actions_to_staging_txn - ) - - def remove_push_actions_from_staging(self, event_id): - """Called if we failed to persist the event to ensure that stale push - actions don't build up in the DB - - Args: - event_id (str) - """ - - return self._simple_delete( - table="event_push_actions_staging", - keyvalues={ - "event_id": event_id, - }, - desc="remove_push_actions_from_staging", - ) - def _action_has_highlight(actions): for action in actions: diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 583efb7bd..04a0b59a3 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -15,6 +15,10 @@ # limitations under the License. from ._base import SQLBaseStore +from synapse.storage.appservice import ApplicationServiceWorkerStore +from synapse.storage.pusher import PusherWorkerStore +from synapse.storage.receipts import ReceiptsWorkerStore +from synapse.storage.roommember import RoomMemberWorkerStore from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.push.baserules import list_with_base_rules @@ -51,7 +55,11 @@ def _load_rules(rawrules, enabled_map): return rules -class PushRulesWorkerStore(SQLBaseStore): +class PushRulesWorkerStore(ApplicationServiceWorkerStore, + ReceiptsWorkerStore, + PusherWorkerStore, + RoomMemberWorkerStore, + SQLBaseStore): """This is an abstract base class where subclasses must implement `get_max_push_rules_stream_id` which can be called in the initializer. """ @@ -140,8 +148,6 @@ class PushRulesWorkerStore(SQLBaseStore): "have_push_rules_changed", have_push_rules_changed_txn ) - -class PushRuleStore(PushRulesWorkerStore): @cachedList(cached_method_name="get_push_rules_for_user", list_name="user_ids", num_args=1, inlineCallbacks=True) def bulk_get_push_rules(self, user_ids): @@ -281,6 +287,8 @@ class PushRuleStore(PushRulesWorkerStore): results.setdefault(row['user_name'], {})[row['rule_id']] = enabled defer.returnValue(results) + +class PushRuleStore(PushRulesWorkerStore): @defer.inlineCallbacks def add_push_rule( self, user_id, rule_id, priority_class, conditions, actions, diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index f4af3e4ca..307660b99 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -175,11 +175,6 @@ class PusherWorkerStore(SQLBaseStore): "get_all_updated_pushers_rows", get_all_updated_pushers_rows_txn ) - -class PusherStore(PusherWorkerStore): - def get_pushers_stream_token(self): - return self._pushers_id_gen.get_current_token() - @cachedInlineCallbacks(num_args=1, max_entries=15000) def get_if_user_has_pusher(self, user_id): # This only exists for the cachedList decorator @@ -201,6 +196,11 @@ class PusherStore(PusherWorkerStore): defer.returnValue(result) + +class PusherStore(PusherWorkerStore): + def get_pushers_stream_token(self): + return self._pushers_id_gen.get_current_token() + @defer.inlineCallbacks def add_pusher(self, user_id, access_token, kind, app_id, app_display_name, device_display_name, @@ -233,14 +233,18 @@ class PusherStore(PusherWorkerStore): ) if newly_inserted: - # get_if_user_has_pusher only cares if the user has - # at least *one* pusher. - self.get_if_user_has_pusher.invalidate(user_id,) + self.runInteraction( + "add_pusher", + self._invalidate_cache_and_stream, + self.get_if_user_has_pusher, (user_id,) + ) @defer.inlineCallbacks def delete_pusher_by_app_id_pushkey_user_id(self, app_id, pushkey, user_id): def delete_pusher_txn(txn, stream_id): - txn.call_after(self.get_if_user_has_pusher.invalidate, (user_id,)) + self._invalidate_cache_and_stream( + txn, self.get_if_user_has_pusher, (user_id,) + ) self._simple_delete_one_txn( txn, diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index b9158b989..d79877dac 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -432,6 +432,18 @@ class RoomMemberWorkerStore(EventsWorkerStore): def _get_joined_hosts_cache(self, room_id): return _JoinedHostsCache(self, room_id) + @cached() + def who_forgot_in_room(self, room_id): + return self._simple_select_list( + table="room_memberships", + retcols=("user_id", "event_id"), + keyvalues={ + "room_id": room_id, + "forgotten": 1, + }, + desc="who_forgot" + ) + class RoomMemberStore(RoomMemberWorkerStore): def __init__(self, db_conn, hs): @@ -595,18 +607,6 @@ class RoomMemberStore(RoomMemberWorkerStore): forgot = yield self.runInteraction("did_forget_membership_at", f) defer.returnValue(forgot == 1) - @cached() - def who_forgot_in_room(self, room_id): - return self._simple_select_list( - table="room_memberships", - retcols=("user_id", "event_id"), - keyvalues={ - "room_id": room_id, - "forgotten": 1, - }, - desc="who_forgot" - ) - @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): target_min_stream_id = progress.get( From 28e973ac119e0b4ec5b9e45772a572a94d0e6643 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 16:30:10 +0000 Subject: [PATCH 038/173] Calculate push actions on worker --- synapse/app/event_creator.py | 8 +++ synapse/handlers/message.py | 84 ++++++++++++++++++-------- synapse/replication/http/send_event.py | 2 +- 3 files changed, 69 insertions(+), 25 deletions(-) diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index b2ce39925..fc0b9e8c0 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -27,10 +27,14 @@ from synapse.http.server import JsonResource from synapse.http.site import SynapseSite from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore from synapse.replication.slave.storage.devices import SlavedDeviceStore from synapse.replication.slave.storage.events import SlavedEventStore +from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore +from synapse.replication.slave.storage.pushers import SlavedPusherStore +from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.room import RoomStore from synapse.replication.tcp.client import ReplicationClientHandler @@ -48,6 +52,10 @@ logger = logging.getLogger("synapse.app.event_creator") class EventCreatorSlavedStore( + SlavedAccountDataStore, + SlavedPusherStore, + SlavedReceiptsStore, + SlavedPushRuleStore, SlavedDeviceStore, SlavedClientIpStore, SlavedApplicationServiceStore, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index d99d8049b..4c186965a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -553,21 +553,67 @@ class EventCreationHandler(object): event, context, ratelimit=True, - extra_users=[] + extra_users=[], ): - # We now need to go and hit out to wherever we need to hit out to. + """Processes a new event. This includes checking auth, persisting it, + notifying users, sending to remote servers, etc. - # If we're a worker we need to hit out to the master. - if self.config.worker_app: - yield send_event_to_master( - self.http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - requester=requester, - event=event, - context=context, + If called from a worker will hit out to the master process for final + processing. + + Args: + requester (Requester) + event (FrozenEvent) + context (EventContext) + ratelimit (bool) + extra_users (list(str)): Any extra users to notify about event + """ + + yield self.action_generator.handle_push_actions_for_event( + event, context + ) + + try: + # We now need to go and hit out to wherever we need to hit out to. + + # If we're a worker we need to hit out to the master. + if self.config.worker_app: + yield send_event_to_master( + self.http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + event=event, + context=context, + ) + return + + yield self.persist_and_notify_client_event( + requester, + event, + context, + ratelimit=ratelimit, + extra_users=extra_users, ) - return + except: # noqa: E722, as we reraise the exception this is fine. + # Ensure that we actually remove the entries in the push actions + # staging area, if we calculated them. + preserve_fn(self.store.remove_push_actions_from_staging)(event.event_id) + raise + + @defer.inlineCallbacks + def persist_and_notify_client_event( + self, + requester, + event, + context, + ratelimit=True, + extra_users=[], + ): + """Called when we have fully built and authed the event. This should + only be run on master. + """ + assert not self.config.worker_app if ratelimit: yield self.base_handler.ratelimit(requester) @@ -679,20 +725,10 @@ class EventCreationHandler(object): "Changing the room create event is forbidden", ) - yield self.action_generator.handle_push_actions_for_event( - event, context + (event_stream_id, max_stream_id) = yield self.store.persist_event( + event, context=context ) - try: - (event_stream_id, max_stream_id) = yield self.store.persist_event( - event, context=context - ) - except: # noqa: E722, as we reraise the exception this is fine. - # Ensure that we actually remove the entries in the push actions - # staging area - preserve_fn(self.store.remove_push_actions_from_staging)(event.event_id) - raise - # this intentionally does not yield: we don't care about the result # and don't need to wait for it. preserve_fn(self.pusher_pool.on_new_notifications)( diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 468f4b68f..3a99a88bc 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -106,7 +106,7 @@ class ReplicationSendEventRestServlet(RestServlet): event.event_id, event.room_id, ) - yield self.event_creation_handler.handle_new_client_event( + yield self.event_creation_handler.persist_and_notify_client_event( requester, event, context, ) From f756f961eab7ae6e53052ee419413c74d171d144 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:05:27 +0000 Subject: [PATCH 039/173] Fixup comments --- synapse/handlers/message.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4c186965a..c4151d73e 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -574,8 +574,6 @@ class EventCreationHandler(object): ) try: - # We now need to go and hit out to wherever we need to hit out to. - # If we're a worker we need to hit out to the master. if self.config.worker_app: yield send_event_to_master( @@ -610,8 +608,10 @@ class EventCreationHandler(object): ratelimit=True, extra_users=[], ): - """Called when we have fully built and authed the event. This should - only be run on master. + """Called when we have fully built the event, and have already + calculated the push actions for the event. + + This should only be run on master. """ assert not self.config.worker_app From 6b8604239f4c9463023e59664e7810ba58b8f428 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:08:28 +0000 Subject: [PATCH 040/173] Correctly send ratelimit and extra_users params --- synapse/handlers/message.py | 2 ++ synapse/replication/http/send_event.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c4151d73e..5f88f84d3 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -583,6 +583,8 @@ class EventCreationHandler(object): requester=requester, event=event, context=context, + ratelimit=ratelimit, + extra_users=extra_users, ) return diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 3a99a88bc..439bfbb4f 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -29,7 +29,8 @@ logger = logging.getLogger(__name__) @defer.inlineCallbacks -def send_event_to_master(client, host, port, requester, event, context): +def send_event_to_master(client, host, port, requester, event, context, + ratelimit, extra_users): """Send event to be handled on the master Args: @@ -39,6 +40,8 @@ def send_event_to_master(client, host, port, requester, event, context): requester (Requester) event (FrozenEvent) context (EventContext) + ratelimit (bool) + extra_users (list(str)): Any extra users to notify about event """ uri = "http://%s:%s/_synapse/replication/send_event" % (host, port,) @@ -48,6 +51,8 @@ def send_event_to_master(client, host, port, requester, event, context): "rejected_reason": event.rejected_reason, "context": context.serialize(event), "requester": requester.serialize(), + "ratelimit": ratelimit, + "extra_users": extra_users, } try: @@ -74,6 +79,8 @@ class ReplicationSendEventRestServlet(RestServlet): "rejected_reason": .., // The event.rejected_reason field "context": { .. serialized event context .. }, "requester": { .. serialized requester .. }, + "ratelimit": true, + "extra_users": [], } """ PATTERNS = [re.compile("^/_synapse/replication/send_event$")] @@ -98,6 +105,9 @@ class ReplicationSendEventRestServlet(RestServlet): requester = Requester.deserialize(self.store, content["requester"]) context = yield EventContext.deserialize(self.store, content["context"]) + ratelimit = content["ratelimit"] + extra_users = content["extra_users"] + if requester.user: request.authenticated_entity = requester.user.to_string() @@ -108,6 +118,8 @@ class ReplicationSendEventRestServlet(RestServlet): yield self.event_creation_handler.persist_and_notify_client_event( requester, event, context, + ratelimit=ratelimit, + extra_users=extra_users, ) defer.returnValue((200, {})) From f381d6381344eb442f46ae27f29e039175721ff5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:18:33 +0000 Subject: [PATCH 041/173] Check event auth on the worker --- synapse/handlers/message.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5f88f84d3..7d28c2745 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -569,6 +569,20 @@ class EventCreationHandler(object): extra_users (list(str)): Any extra users to notify about event """ + try: + yield self.auth.check_from_context(event, context) + except AuthError as err: + logger.warn("Denying new event %r because %s", event, err) + raise err + + # Ensure that we can round trip before trying to persist in db + try: + dump = ujson.dumps(unfreeze(event.content)) + ujson.loads(dump) + except Exception: + logger.exception("Failed to encode content: %r", event.content) + raise + yield self.action_generator.handle_push_actions_for_event( event, context ) @@ -610,8 +624,8 @@ class EventCreationHandler(object): ratelimit=True, extra_users=[], ): - """Called when we have fully built the event, and have already - calculated the push actions for the event. + """Called when we have fully built the event, have already + calculated the push actions for the event, and checked auth. This should only be run on master. """ @@ -620,20 +634,6 @@ class EventCreationHandler(object): if ratelimit: yield self.base_handler.ratelimit(requester) - try: - yield self.auth.check_from_context(event, context) - except AuthError as err: - logger.warn("Denying new event %r because %s", event, err) - raise err - - # Ensure that we can round trip before trying to persist in db - try: - dump = ujson.dumps(unfreeze(event.content)) - ujson.loads(dump) - except Exception: - logger.exception("Failed to encode content: %r", event.content) - raise - yield self.base_handler.maybe_kick_guest_users(event, context) if event.type == EventTypes.CanonicalAlias: From 8ded8ba2c755f254bd98e15db7bc865eed997f07 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:20:34 +0000 Subject: [PATCH 042/173] Make repl send_event idempotent and retry on timeouts If we treated timeouts as failures on the worker we would attempt to clean up e.g. push actions while the master might still process the event. --- synapse/replication/http/send_event.py | 44 ++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 439bfbb4f..73cd3d91d 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -15,10 +15,15 @@ from twisted.internet import defer -from synapse.api.errors import SynapseError, MatrixCodeMessageException +from synapse.api.errors import ( + SynapseError, MatrixCodeMessageException, CodeMessageException, +) from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.util.async import sleep +from synapse.util.caches.response_cache import ResponseCache +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.metrics import Measure from synapse.types import Requester @@ -43,7 +48,9 @@ def send_event_to_master(client, host, port, requester, event, context, ratelimit (bool) extra_users (list(str)): Any extra users to notify about event """ - uri = "http://%s:%s/_synapse/replication/send_event" % (host, port,) + uri = "http://%s:%s/_synapse/replication/send_event/%s" % ( + host, port, event.event_id, + ) payload = { "event": event.get_pdu_json(), @@ -56,7 +63,20 @@ def send_event_to_master(client, host, port, requester, event, context, } try: - result = yield client.post_json_get_json(uri, payload) + # We keep retrying the same request for timeouts. This is so that we + # have a good idea that the request has either succeeded or failed on + # the master, and so whether we should clean up or not. + while True: + try: + result = yield client.put_json(uri, payload) + break + except CodeMessageException as e: + if e.code != 504: + raise + + # If we timed out we probably don't need to worry about backing + # off too much, but lets just wait a little anyway. + yield sleep(1) except MatrixCodeMessageException as e: # We convert to SynapseError as we know that it was a SynapseError # on the master process that we should send to the client. (And @@ -71,7 +91,7 @@ class ReplicationSendEventRestServlet(RestServlet): The API looks like: - POST /_synapse/replication/send_event + POST /_synapse/replication/send_event/:event_id { "event": { .. serialized event .. }, @@ -83,7 +103,7 @@ class ReplicationSendEventRestServlet(RestServlet): "extra_users": [], } """ - PATTERNS = [re.compile("^/_synapse/replication/send_event$")] + PATTERNS = [re.compile("^/_synapse/replication/send_event/(?P[^/]+)$")] def __init__(self, hs): super(ReplicationSendEventRestServlet, self).__init__() @@ -92,8 +112,20 @@ class ReplicationSendEventRestServlet(RestServlet): self.store = hs.get_datastore() self.clock = hs.get_clock() + # The responses are tiny, so we may as well cache them for a while + self.response_cache = ResponseCache(hs, timeout_ms=30 * 60 * 1000) + + def on_PUT(self, request, event_id): + result = self.response_cache.get(event_id) + if not result: + result = self.response_cache.set( + event_id, + preserve_fn(self._handle_request)(request) + ) + return make_deferred_yieldable(result) + @defer.inlineCallbacks - def on_POST(self, request): + def _handle_request(self, request): with Measure(self.clock, "repl_send_event_parse"): content = parse_json_object_from_request(request) From 89f90d808ad923374a204377b0bc8ed15a9385a9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:59:16 +0000 Subject: [PATCH 043/173] Add some logging --- synapse/replication/http/send_event.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 73cd3d91d..f490622b6 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -72,6 +72,7 @@ def send_event_to_master(client, host, port, requester, event, context, break except CodeMessageException as e: if e.code != 504: + logger.warn("send_event request timed out") raise # If we timed out we probably don't need to worry about backing @@ -118,6 +119,7 @@ class ReplicationSendEventRestServlet(RestServlet): def on_PUT(self, request, event_id): result = self.response_cache.get(event_id) if not result: + logger.warn("Returning cached response") result = self.response_cache.set( event_id, preserve_fn(self._handle_request)(request) From 157298f9862bd06e9268aed9cdffbc1c912fd74d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:59:45 +0000 Subject: [PATCH 044/173] Don't do preserve_fn for every request --- synapse/replication/http/send_event.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index f490622b6..665a56d2e 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -122,10 +122,11 @@ class ReplicationSendEventRestServlet(RestServlet): logger.warn("Returning cached response") result = self.response_cache.set( event_id, - preserve_fn(self._handle_request)(request) + self._handle_request(request) ) return make_deferred_yieldable(result) + @preserve_fn @defer.inlineCallbacks def _handle_request(self, request): with Measure(self.clock, "repl_send_event_parse"): From 126b9bf96f2c989e7402b5e0177ee39a2a20940e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 12:05:33 +0000 Subject: [PATCH 045/173] Log in the correct places --- synapse/replication/http/send_event.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 665a56d2e..70f2fe456 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -72,9 +72,10 @@ def send_event_to_master(client, host, port, requester, event, context, break except CodeMessageException as e: if e.code != 504: - logger.warn("send_event request timed out") raise + logger.warn("send_event request timed out") + # If we timed out we probably don't need to worry about backing # off too much, but lets just wait a little anyway. yield sleep(1) @@ -119,11 +120,12 @@ class ReplicationSendEventRestServlet(RestServlet): def on_PUT(self, request, event_id): result = self.response_cache.get(event_id) if not result: - logger.warn("Returning cached response") result = self.response_cache.set( event_id, self._handle_request(request) ) + else: + logger.warn("Returning cached response") return make_deferred_yieldable(result) @preserve_fn From 3a75de923b9183c073bbddae1e08fae546a11f7a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Mar 2018 12:19:09 +0000 Subject: [PATCH 046/173] Rewrite make_deferred_yieldable avoiding inlineCallbacks ... because (a) it's actually simpler (b) it might be marginally more performant? --- synapse/util/logcontext.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 94fa7cac9..a8dea15c1 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -299,10 +299,6 @@ def preserve_fn(f): Useful for wrapping functions that return a deferred which you don't yield on. """ - def reset_context(result): - LoggingContext.set_current_context(LoggingContext.sentinel) - return result - def g(*args, **kwargs): current = LoggingContext.current_context() res = f(*args, **kwargs) @@ -323,12 +319,11 @@ def preserve_fn(f): # which is supposed to have a single entry and exit point. But # by spawning off another deferred, we are effectively # adding a new exit point.) - res.addBoth(reset_context) + res.addBoth(_set_context_cb, LoggingContext.sentinel) return res return g -@defer.inlineCallbacks def make_deferred_yieldable(deferred): """Given a deferred, make it follow the Synapse logcontext rules: @@ -342,9 +337,16 @@ def make_deferred_yieldable(deferred): (This is more-or-less the opposite operation to preserve_fn.) """ - with PreserveLoggingContext(): - r = yield deferred - defer.returnValue(r) + if isinstance(deferred, defer.Deferred) and not deferred.called: + prev_context = LoggingContext.set_current_context(LoggingContext.sentinel) + deferred.addBoth(_set_context_cb, prev_context) + return deferred + + +def _set_context_cb(result, context): + """A callback function which just sets the logging context""" + LoggingContext.set_current_context(context) + return result # modules to ignore in `logcontext_tracer` From 0c8ba5dd1ce3e5cec201165c50f69aaa5c68c45d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:39:45 +0000 Subject: [PATCH 047/173] Split up RoomStore --- synapse/replication/slave/storage/room.py | 21 +- synapse/storage/room.py | 239 +++++++++++----------- 2 files changed, 125 insertions(+), 135 deletions(-) diff --git a/synapse/replication/slave/storage/room.py b/synapse/replication/slave/storage/room.py index f51038403..5ae167015 100644 --- a/synapse/replication/slave/storage/room.py +++ b/synapse/replication/slave/storage/room.py @@ -14,32 +14,19 @@ # limitations under the License. from ._base import BaseSlavedStore -from synapse.storage import DataStore -from synapse.storage.room import RoomStore +from synapse.storage.room import RoomWorkerStore from ._slaved_id_tracker import SlavedIdTracker -class RoomStore(BaseSlavedStore): +class RoomStore(RoomWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(RoomStore, self).__init__(db_conn, hs) self._public_room_id_gen = SlavedIdTracker( db_conn, "public_room_list_stream", "stream_id" ) - get_public_room_ids = DataStore.get_public_room_ids.__func__ - get_current_public_room_stream_id = ( - DataStore.get_current_public_room_stream_id.__func__ - ) - get_public_room_ids_at_stream_id = ( - RoomStore.__dict__["get_public_room_ids_at_stream_id"] - ) - get_public_room_ids_at_stream_id_txn = ( - DataStore.get_public_room_ids_at_stream_id_txn.__func__ - ) - get_published_at_stream_id_txn = ( - DataStore.get_published_at_stream_id_txn.__func__ - ) - get_public_room_changes = DataStore.get_public_room_changes.__func__ + def get_current_public_room_stream_id(self): + return self._public_room_id_gen.get_current_token() def stream_positions(self): result = super(RoomStore, self).stream_positions() diff --git a/synapse/storage/room.py b/synapse/storage/room.py index fff6652e0..7f2c08d7a 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -16,6 +16,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.storage._base import SQLBaseStore from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -38,7 +39,126 @@ RatelimitOverride = collections.namedtuple( ) -class RoomStore(SearchStore): +class RoomWorkerStore(SQLBaseStore): + def get_public_room_ids(self): + return self._simple_select_onecol( + table="rooms", + keyvalues={ + "is_public": True, + }, + retcol="room_id", + desc="get_public_room_ids", + ) + + @cached(num_args=2, max_entries=100) + def get_public_room_ids_at_stream_id(self, stream_id, network_tuple): + """Get pulbic rooms for a particular list, or across all lists. + + Args: + stream_id (int) + network_tuple (ThirdPartyInstanceID): The list to use (None, None) + means the main list, None means all lsits. + """ + return self.runInteraction( + "get_public_room_ids_at_stream_id", + self.get_public_room_ids_at_stream_id_txn, + stream_id, network_tuple=network_tuple + ) + + def get_public_room_ids_at_stream_id_txn(self, txn, stream_id, + network_tuple): + return { + rm + for rm, vis in self.get_published_at_stream_id_txn( + txn, stream_id, network_tuple=network_tuple + ).items() + if vis + } + + def get_published_at_stream_id_txn(self, txn, stream_id, network_tuple): + if network_tuple: + # We want to get from a particular list. No aggregation required. + + sql = (""" + SELECT room_id, visibility FROM public_room_list_stream + INNER JOIN ( + SELECT room_id, max(stream_id) AS stream_id + FROM public_room_list_stream + WHERE stream_id <= ? %s + GROUP BY room_id + ) grouped USING (room_id, stream_id) + """) + + if network_tuple.appservice_id is not None: + txn.execute( + sql % ("AND appservice_id = ? AND network_id = ?",), + (stream_id, network_tuple.appservice_id, network_tuple.network_id,) + ) + else: + txn.execute( + sql % ("AND appservice_id IS NULL",), + (stream_id,) + ) + return dict(txn) + else: + # We want to get from all lists, so we need to aggregate the results + + logger.info("Executing full list") + + sql = (""" + SELECT room_id, visibility + FROM public_room_list_stream + INNER JOIN ( + SELECT + room_id, max(stream_id) AS stream_id, appservice_id, + network_id + FROM public_room_list_stream + WHERE stream_id <= ? + GROUP BY room_id, appservice_id, network_id + ) grouped USING (room_id, stream_id) + """) + + txn.execute( + sql, + (stream_id,) + ) + + results = {} + # A room is visible if its visible on any list. + for room_id, visibility in txn: + results[room_id] = bool(visibility) or results.get(room_id, False) + + return results + + def get_public_room_changes(self, prev_stream_id, new_stream_id, + network_tuple): + def get_public_room_changes_txn(txn): + then_rooms = self.get_public_room_ids_at_stream_id_txn( + txn, prev_stream_id, network_tuple + ) + + now_rooms_dict = self.get_published_at_stream_id_txn( + txn, new_stream_id, network_tuple + ) + + now_rooms_visible = set( + rm for rm, vis in now_rooms_dict.items() if vis + ) + now_rooms_not_visible = set( + rm for rm, vis in now_rooms_dict.items() if not vis + ) + + newly_visible = now_rooms_visible - then_rooms + newly_unpublished = now_rooms_not_visible & then_rooms + + return newly_visible, newly_unpublished + + return self.runInteraction( + "get_public_room_changes", get_public_room_changes_txn + ) + + +class RoomStore(RoomWorkerStore, SearchStore): @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -225,16 +345,6 @@ class RoomStore(SearchStore): ) self.hs.get_notifier().on_new_replication_data() - def get_public_room_ids(self): - return self._simple_select_onecol( - table="rooms", - keyvalues={ - "is_public": True, - }, - retcol="room_id", - desc="get_public_room_ids", - ) - def get_room_count(self): """Retrieve a list of all rooms """ @@ -326,113 +436,6 @@ class RoomStore(SearchStore): def get_current_public_room_stream_id(self): return self._public_room_id_gen.get_current_token() - @cached(num_args=2, max_entries=100) - def get_public_room_ids_at_stream_id(self, stream_id, network_tuple): - """Get pulbic rooms for a particular list, or across all lists. - - Args: - stream_id (int) - network_tuple (ThirdPartyInstanceID): The list to use (None, None) - means the main list, None means all lsits. - """ - return self.runInteraction( - "get_public_room_ids_at_stream_id", - self.get_public_room_ids_at_stream_id_txn, - stream_id, network_tuple=network_tuple - ) - - def get_public_room_ids_at_stream_id_txn(self, txn, stream_id, - network_tuple): - return { - rm - for rm, vis in self.get_published_at_stream_id_txn( - txn, stream_id, network_tuple=network_tuple - ).items() - if vis - } - - def get_published_at_stream_id_txn(self, txn, stream_id, network_tuple): - if network_tuple: - # We want to get from a particular list. No aggregation required. - - sql = (""" - SELECT room_id, visibility FROM public_room_list_stream - INNER JOIN ( - SELECT room_id, max(stream_id) AS stream_id - FROM public_room_list_stream - WHERE stream_id <= ? %s - GROUP BY room_id - ) grouped USING (room_id, stream_id) - """) - - if network_tuple.appservice_id is not None: - txn.execute( - sql % ("AND appservice_id = ? AND network_id = ?",), - (stream_id, network_tuple.appservice_id, network_tuple.network_id,) - ) - else: - txn.execute( - sql % ("AND appservice_id IS NULL",), - (stream_id,) - ) - return dict(txn) - else: - # We want to get from all lists, so we need to aggregate the results - - logger.info("Executing full list") - - sql = (""" - SELECT room_id, visibility - FROM public_room_list_stream - INNER JOIN ( - SELECT - room_id, max(stream_id) AS stream_id, appservice_id, - network_id - FROM public_room_list_stream - WHERE stream_id <= ? - GROUP BY room_id, appservice_id, network_id - ) grouped USING (room_id, stream_id) - """) - - txn.execute( - sql, - (stream_id,) - ) - - results = {} - # A room is visible if its visible on any list. - for room_id, visibility in txn: - results[room_id] = bool(visibility) or results.get(room_id, False) - - return results - - def get_public_room_changes(self, prev_stream_id, new_stream_id, - network_tuple): - def get_public_room_changes_txn(txn): - then_rooms = self.get_public_room_ids_at_stream_id_txn( - txn, prev_stream_id, network_tuple - ) - - now_rooms_dict = self.get_published_at_stream_id_txn( - txn, new_stream_id, network_tuple - ) - - now_rooms_visible = set( - rm for rm, vis in now_rooms_dict.items() if vis - ) - now_rooms_not_visible = set( - rm for rm, vis in now_rooms_dict.items() if not vis - ) - - newly_visible = now_rooms_visible - then_rooms - newly_unpublished = now_rooms_not_visible & then_rooms - - return newly_visible, newly_unpublished - - return self.runInteraction( - "get_public_room_changes", get_public_room_changes_txn - ) - def get_all_new_public_rooms(self, prev_id, current_id, limit): def get_all_new_public_rooms(txn): sql = (""" From a9a2d66cdd0abc2339641808698e63cb06c4a038 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 14:16:02 +0000 Subject: [PATCH 048/173] Split out SignatureStore and EventFederationStore --- synapse/replication/slave/storage/events.py | 50 +--- synapse/storage/event_federation.py | 264 ++++++++++---------- synapse/storage/signatures.py | 8 +- 3 files changed, 148 insertions(+), 174 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index de0b26f43..d2495ff99 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -17,13 +17,13 @@ import logging from synapse.api.constants import EventTypes from synapse.storage import DataStore -from synapse.storage.event_federation import EventFederationStore +from synapse.storage.event_federation import EventFederationWorkerStore from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore -from synapse.storage.signatures import SignatureStore +from synapse.storage.signatures import SignatureWorkerStore from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker @@ -40,8 +40,12 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, - EventsWorkerStore, StateGroupWorkerStore, +class SlavedEventStore(EventFederationWorkerStore, + RoomMemberWorkerStore, + EventPushActionsWorkerStore, + EventsWorkerStore, + StateGroupWorkerStore, + SignatureWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): @@ -72,9 +76,6 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, # Cached functions can't be accessed through a class instance so we need # to reach inside the __dict__ to extract them. - get_latest_event_ids_in_room = EventFederationStore.__dict__[ - "get_latest_event_ids_in_room" - ] get_recent_event_ids_for_room = ( StreamStore.__dict__["get_recent_event_ids_for_room"] @@ -100,48 +101,13 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, _get_events_around_txn = DataStore._get_events_around_txn.__func__ - get_backfill_events = DataStore.get_backfill_events.__func__ - _get_backfill_events = DataStore._get_backfill_events.__func__ - get_missing_events = DataStore.get_missing_events.__func__ - _get_missing_events = DataStore._get_missing_events.__func__ - - get_auth_chain = DataStore.get_auth_chain.__func__ - get_auth_chain_ids = DataStore.get_auth_chain_ids.__func__ - _get_auth_chain_ids_txn = DataStore._get_auth_chain_ids_txn.__func__ - get_room_max_stream_ordering = DataStore.get_room_max_stream_ordering.__func__ - get_forward_extremeties_for_room = ( - DataStore.get_forward_extremeties_for_room.__func__ - ) - _get_forward_extremeties_for_room = ( - EventFederationStore.__dict__["_get_forward_extremeties_for_room"] - ) - get_all_new_events_stream = DataStore.get_all_new_events_stream.__func__ get_federation_out_pos = DataStore.get_federation_out_pos.__func__ update_federation_out_pos = DataStore.update_federation_out_pos.__func__ - get_latest_event_ids_and_hashes_in_room = ( - DataStore.get_latest_event_ids_and_hashes_in_room.__func__ - ) - _get_latest_event_ids_and_hashes_in_room = ( - DataStore._get_latest_event_ids_and_hashes_in_room.__func__ - ) - _get_event_reference_hashes_txn = ( - DataStore._get_event_reference_hashes_txn.__func__ - ) - add_event_hashes = ( - DataStore.add_event_hashes.__func__ - ) - get_event_reference_hashes = ( - SignatureStore.__dict__["get_event_reference_hashes"] - ) - get_event_reference_hash = ( - SignatureStore.__dict__["get_event_reference_hash"] - ) - def stream_positions(self): result = super(SlavedEventStore, self).stream_positions() result["events"] = self._stream_id_gen.get_current_token() diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 55a05c59d..00ee82d30 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -15,7 +15,10 @@ from twisted.internet import defer -from ._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore +from synapse.storage.events import EventsWorkerStore +from synapse.storage.signatures import SignatureWorkerStore + from synapse.api.errors import StoreError from synapse.util.caches.descriptors import cached from unpaddedbase64 import encode_base64 @@ -27,30 +30,8 @@ from Queue import PriorityQueue, Empty logger = logging.getLogger(__name__) -class EventFederationStore(SQLBaseStore): - """ Responsible for storing and serving up the various graphs associated - with an event. Including the main event graph and the auth chains for an - event. - - Also has methods for getting the front (latest) and back (oldest) edges - of the event graphs. These are used to generate the parents for new events - and backfilling from another server respectively. - """ - - EVENT_AUTH_STATE_ONLY = "event_auth_state_only" - - def __init__(self, db_conn, hs): - super(EventFederationStore, self).__init__(db_conn, hs) - - self.register_background_update_handler( - self.EVENT_AUTH_STATE_ONLY, - self._background_delete_non_state_event_auth, - ) - - hs.get_clock().looping_call( - self._delete_old_forward_extrem_cache, 60 * 60 * 1000 - ) - +class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, + SQLBaseStore): def get_auth_chain(self, event_ids, include_given=False): """Get auth events for given event_ids. The events *must* be state events. @@ -228,88 +209,6 @@ class EventFederationStore(SQLBaseStore): return int(min_depth) if min_depth is not None else None - def _update_min_depth_for_room_txn(self, txn, room_id, depth): - min_depth = self._get_min_depth_interaction(txn, room_id) - - if min_depth and depth >= min_depth: - return - - self._simple_upsert_txn( - txn, - table="room_depth", - keyvalues={ - "room_id": room_id, - }, - values={ - "min_depth": depth, - }, - ) - - def _handle_mult_prev_events(self, txn, events): - """ - For the given event, update the event edges table and forward and - backward extremities tables. - """ - self._simple_insert_many_txn( - txn, - table="event_edges", - values=[ - { - "event_id": ev.event_id, - "prev_event_id": e_id, - "room_id": ev.room_id, - "is_state": False, - } - for ev in events - for e_id, _ in ev.prev_events - ], - ) - - self._update_backward_extremeties(txn, events) - - def _update_backward_extremeties(self, txn, events): - """Updates the event_backward_extremities tables based on the new/updated - events being persisted. - - This is called for new events *and* for events that were outliers, but - are now being persisted as non-outliers. - - Forward extremities are handled when we first start persisting the events. - """ - events_by_room = {} - for ev in events: - events_by_room.setdefault(ev.room_id, []).append(ev) - - query = ( - "INSERT INTO event_backward_extremities (event_id, room_id)" - " SELECT ?, ? WHERE NOT EXISTS (" - " SELECT 1 FROM event_backward_extremities" - " WHERE event_id = ? AND room_id = ?" - " )" - " AND NOT EXISTS (" - " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " - " AND outlier = ?" - " )" - ) - - txn.executemany(query, [ - (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) - for ev in events for e_id, _ in ev.prev_events - if not ev.internal_metadata.is_outlier() - ]) - - query = ( - "DELETE FROM event_backward_extremities" - " WHERE event_id = ? AND room_id = ?" - ) - txn.executemany( - query, - [ - (ev.event_id, ev.room_id) for ev in events - if not ev.internal_metadata.is_outlier() - ] - ) - def get_forward_extremeties_for_room(self, room_id, stream_ordering): """For a given room_id and stream_ordering, return the forward extremeties of the room at that point in "time". @@ -371,28 +270,6 @@ class EventFederationStore(SQLBaseStore): get_forward_extremeties_for_room_txn ) - def _delete_old_forward_extrem_cache(self): - def _delete_old_forward_extrem_cache_txn(txn): - # Delete entries older than a month, while making sure we don't delete - # the only entries for a room. - sql = (""" - DELETE FROM stream_ordering_to_exterm - WHERE - room_id IN ( - SELECT room_id - FROM stream_ordering_to_exterm - WHERE stream_ordering > ? - ) AND stream_ordering < ? - """) - txn.execute( - sql, - (self.stream_ordering_month_ago, self.stream_ordering_month_ago,) - ) - return self.runInteraction( - "_delete_old_forward_extrem_cache", - _delete_old_forward_extrem_cache_txn - ) - def get_backfill_events(self, room_id, event_list, limit): """Get a list of Events for a given topic that occurred before (and including) the events in event_list. Return a list of max size `limit` @@ -522,6 +399,135 @@ class EventFederationStore(SQLBaseStore): return event_results + +class EventFederationStore(EventFederationWorkerStore): + """ Responsible for storing and serving up the various graphs associated + with an event. Including the main event graph and the auth chains for an + event. + + Also has methods for getting the front (latest) and back (oldest) edges + of the event graphs. These are used to generate the parents for new events + and backfilling from another server respectively. + """ + + EVENT_AUTH_STATE_ONLY = "event_auth_state_only" + + def __init__(self, db_conn, hs): + super(EventFederationStore, self).__init__(db_conn, hs) + + self.register_background_update_handler( + self.EVENT_AUTH_STATE_ONLY, + self._background_delete_non_state_event_auth, + ) + + hs.get_clock().looping_call( + self._delete_old_forward_extrem_cache, 60 * 60 * 1000 + ) + + def _update_min_depth_for_room_txn(self, txn, room_id, depth): + min_depth = self._get_min_depth_interaction(txn, room_id) + + if min_depth and depth >= min_depth: + return + + self._simple_upsert_txn( + txn, + table="room_depth", + keyvalues={ + "room_id": room_id, + }, + values={ + "min_depth": depth, + }, + ) + + def _handle_mult_prev_events(self, txn, events): + """ + For the given event, update the event edges table and forward and + backward extremities tables. + """ + self._simple_insert_many_txn( + txn, + table="event_edges", + values=[ + { + "event_id": ev.event_id, + "prev_event_id": e_id, + "room_id": ev.room_id, + "is_state": False, + } + for ev in events + for e_id, _ in ev.prev_events + ], + ) + + self._update_backward_extremeties(txn, events) + + def _update_backward_extremeties(self, txn, events): + """Updates the event_backward_extremities tables based on the new/updated + events being persisted. + + This is called for new events *and* for events that were outliers, but + are now being persisted as non-outliers. + + Forward extremities are handled when we first start persisting the events. + """ + events_by_room = {} + for ev in events: + events_by_room.setdefault(ev.room_id, []).append(ev) + + query = ( + "INSERT INTO event_backward_extremities (event_id, room_id)" + " SELECT ?, ? WHERE NOT EXISTS (" + " SELECT 1 FROM event_backward_extremities" + " WHERE event_id = ? AND room_id = ?" + " )" + " AND NOT EXISTS (" + " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " + " AND outlier = ?" + " )" + ) + + txn.executemany(query, [ + (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) + for ev in events for e_id, _ in ev.prev_events + if not ev.internal_metadata.is_outlier() + ]) + + query = ( + "DELETE FROM event_backward_extremities" + " WHERE event_id = ? AND room_id = ?" + ) + txn.executemany( + query, + [ + (ev.event_id, ev.room_id) for ev in events + if not ev.internal_metadata.is_outlier() + ] + ) + + def _delete_old_forward_extrem_cache(self): + def _delete_old_forward_extrem_cache_txn(txn): + # Delete entries older than a month, while making sure we don't delete + # the only entries for a room. + sql = (""" + DELETE FROM stream_ordering_to_exterm + WHERE + room_id IN ( + SELECT room_id + FROM stream_ordering_to_exterm + WHERE stream_ordering > ? + ) AND stream_ordering < ? + """) + txn.execute( + sql, + (self.stream_ordering_month_ago, self.stream_ordering_month_ago,) + ) + return self.runInteraction( + "_delete_old_forward_extrem_cache", + _delete_old_forward_extrem_cache_txn + ) + def clean_room_for_join(self, room_id): return self.runInteraction( "clean_room_for_join", diff --git a/synapse/storage/signatures.py b/synapse/storage/signatures.py index 67d5d9969..e6eeb1b64 100644 --- a/synapse/storage/signatures.py +++ b/synapse/storage/signatures.py @@ -22,9 +22,7 @@ from synapse.crypto.event_signing import compute_event_reference_hash from synapse.util.caches.descriptors import cached, cachedList -class SignatureStore(SQLBaseStore): - """Persistence for event signatures and hashes""" - +class SignatureWorkerStore(SQLBaseStore): @cached() def get_event_reference_hash(self, event_id): return self._get_event_reference_hashes_txn(event_id) @@ -74,6 +72,10 @@ class SignatureStore(SQLBaseStore): txn.execute(query, (event_id, )) return {k: v for k, v in txn} + +class SignatureStore(SignatureWorkerStore): + """Persistence for event signatures and hashes""" + def _store_event_reference_hashes_txn(self, txn, events): """Store a hash for a PDU Args: From 6411f725bedbc4701e9c624ae23f47d52ff0bd7c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 14:05:41 +0000 Subject: [PATCH 049/173] Calculate stream_ordering_month_ago correctly on workers --- synapse/replication/slave/storage/events.py | 1 - synapse/storage/__init__.py | 15 -- synapse/storage/event_push_actions.py | 149 +++++++++++--------- 3 files changed, 85 insertions(+), 80 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index de0b26f43..a4d7430f9 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -67,7 +67,6 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, "MembershipStreamChangeCache", events_max, ) - self.stream_ordering_month_ago = 0 self._stream_order_on_start = self.get_room_max_stream_ordering() # Cached functions can't be accessed through a class instance so we need diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0f136f8a0..b3cdcfdc2 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -20,7 +20,6 @@ from synapse.storage.devices import DeviceStore from .appservice import ( ApplicationServiceStore, ApplicationServiceTransactionStore ) -from ._base import LoggingTransaction from .directory import DirectoryStore from .events import EventsStore from .presence import PresenceStore, UserPresenceState @@ -228,20 +227,6 @@ class DataStore(RoomMemberStore, RoomStore, prefilled_cache=_group_updates_prefill, ) - cur = LoggingTransaction( - db_conn.cursor(), - name="_find_stream_orderings_for_times_txn", - database_engine=self.database_engine, - after_callbacks=[], - final_callbacks=[], - ) - self._find_stream_orderings_for_times_txn(cur) - cur.close() - - self.find_stream_orderings_looping_call = self._clock.looping_call( - self._find_stream_orderings_for_times, 10 * 60 * 1000 - ) - self._stream_order_on_start = self.get_room_max_stream_ordering() self._min_stream_order_on_start = self.get_room_min_stream_ordering() diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 6454045c2..c08bebe11 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore, LoggingTransaction from twisted.internet import defer from synapse.util.async import sleep from synapse.util.caches.descriptors import cachedInlineCallbacks @@ -64,6 +64,27 @@ def _deserialize_action(actions, is_highlight): class EventPushActionsWorkerStore(SQLBaseStore): + def __init__(self, db_conn, hs): + super(EventPushActionsWorkerStore, self).__init__(db_conn, hs) + + # These get correctly ste by _find_stream_orderings_for_times_txn + self.stream_ordering_month_ago = 0 + self.stream_ordering_day_ago = 0 + + cur = LoggingTransaction( + db_conn.cursor(), + name="_find_stream_orderings_for_times_txn", + database_engine=self.database_engine, + after_callbacks=[], + final_callbacks=[], + ) + self._find_stream_orderings_for_times_txn(cur) + cur.close() + + self.find_stream_orderings_looping_call = self._clock.looping_call( + self._find_stream_orderings_for_times, 10 * 60 * 1000 + ) + @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( self, room_id, user_id, last_read_event_id @@ -443,6 +464,69 @@ class EventPushActionsWorkerStore(SQLBaseStore): desc="remove_push_actions_from_staging", ) + @defer.inlineCallbacks + def _find_stream_orderings_for_times(self): + yield self.runInteraction( + "_find_stream_orderings_for_times", + self._find_stream_orderings_for_times_txn + ) + + def _find_stream_orderings_for_times_txn(self, txn): + logger.info("Searching for stream ordering 1 month ago") + self.stream_ordering_month_ago = self._find_first_stream_ordering_after_ts_txn( + txn, self._clock.time_msec() - 30 * 24 * 60 * 60 * 1000 + ) + logger.info( + "Found stream ordering 1 month ago: it's %d", + self.stream_ordering_month_ago + ) + logger.info("Searching for stream ordering 1 day ago") + self.stream_ordering_day_ago = self._find_first_stream_ordering_after_ts_txn( + txn, self._clock.time_msec() - 24 * 60 * 60 * 1000 + ) + logger.info( + "Found stream ordering 1 day ago: it's %d", + self.stream_ordering_day_ago + ) + + def _find_first_stream_ordering_after_ts_txn(self, txn, ts): + """ + Find the stream_ordering of the first event that was received after + a given timestamp. This is relatively slow as there is no index on + received_ts but we can then use this to delete push actions before + this. + + received_ts must necessarily be in the same order as stream_ordering + and stream_ordering is indexed, so we manually binary search using + stream_ordering + """ + txn.execute("SELECT MAX(stream_ordering) FROM events") + max_stream_ordering = txn.fetchone()[0] + + if max_stream_ordering is None: + return 0 + + range_start = 0 + range_end = max_stream_ordering + + sql = ( + "SELECT received_ts FROM events" + " WHERE stream_ordering > ?" + " ORDER BY stream_ordering" + " LIMIT 1" + ) + + while range_end - range_start > 1: + middle = int((range_end + range_start) / 2) + txn.execute(sql, (middle,)) + middle_ts = txn.fetchone()[0] + if ts > middle_ts: + range_start = middle + else: + range_end = middle + + return range_end + class EventPushActionsStore(EventPushActionsWorkerStore): EPA_HIGHLIGHT_INDEX = "epa_highlight_index" @@ -650,69 +734,6 @@ class EventPushActionsStore(EventPushActionsWorkerStore): WHERE room_id = ? AND user_id = ? AND stream_ordering <= ? """, (room_id, user_id, stream_ordering)) - @defer.inlineCallbacks - def _find_stream_orderings_for_times(self): - yield self.runInteraction( - "_find_stream_orderings_for_times", - self._find_stream_orderings_for_times_txn - ) - - def _find_stream_orderings_for_times_txn(self, txn): - logger.info("Searching for stream ordering 1 month ago") - self.stream_ordering_month_ago = self._find_first_stream_ordering_after_ts_txn( - txn, self._clock.time_msec() - 30 * 24 * 60 * 60 * 1000 - ) - logger.info( - "Found stream ordering 1 month ago: it's %d", - self.stream_ordering_month_ago - ) - logger.info("Searching for stream ordering 1 day ago") - self.stream_ordering_day_ago = self._find_first_stream_ordering_after_ts_txn( - txn, self._clock.time_msec() - 24 * 60 * 60 * 1000 - ) - logger.info( - "Found stream ordering 1 day ago: it's %d", - self.stream_ordering_day_ago - ) - - def _find_first_stream_ordering_after_ts_txn(self, txn, ts): - """ - Find the stream_ordering of the first event that was received after - a given timestamp. This is relatively slow as there is no index on - received_ts but we can then use this to delete push actions before - this. - - received_ts must necessarily be in the same order as stream_ordering - and stream_ordering is indexed, so we manually binary search using - stream_ordering - """ - txn.execute("SELECT MAX(stream_ordering) FROM events") - max_stream_ordering = txn.fetchone()[0] - - if max_stream_ordering is None: - return 0 - - range_start = 0 - range_end = max_stream_ordering - - sql = ( - "SELECT received_ts FROM events" - " WHERE stream_ordering > ?" - " ORDER BY stream_ordering" - " LIMIT 1" - ) - - while range_end - range_start > 1: - middle = int((range_end + range_start) / 2) - txn.execute(sql, (middle,)) - middle_ts = txn.fetchone()[0] - if ts > middle_ts: - range_start = middle - else: - range_end = middle - - return range_end - @defer.inlineCallbacks def _rotate_notifs(self): if self._doing_notif_rotation or self.stream_ordering_day_ago is None: From 784f036306a020fcde495887c2881209b913b9b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:54:37 +0000 Subject: [PATCH 050/173] Move RoomMemberHandler out of Handlers --- synapse/handlers/__init__.py | 2 - synapse/handlers/_base.py | 2 +- synapse/handlers/federation.py | 4 +- synapse/handlers/profile.py | 2 +- synapse/handlers/room.py | 4 +- synapse/handlers/room_member.py | 52 ++++++++++++++---------- synapse/rest/client/v1/admin.py | 7 ++-- synapse/rest/client/v1/room.py | 19 +++++---- synapse/rest/client/v2_alpha/register.py | 2 +- synapse/server.py | 5 +++ tests/rest/client/v1/test_typing.py | 2 +- 11 files changed, 57 insertions(+), 44 deletions(-) diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index 53213cdcc..8f8fd82eb 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -17,7 +17,6 @@ from .register import RegistrationHandler from .room import ( RoomCreationHandler, RoomContextHandler, ) -from .room_member import RoomMemberHandler from .message import MessageHandler from .federation import FederationHandler from .directory import DirectoryHandler @@ -49,7 +48,6 @@ class Handlers(object): self.registration_handler = RegistrationHandler(hs) self.message_handler = MessageHandler(hs) self.room_creation_handler = RoomCreationHandler(hs) - self.room_member_handler = RoomMemberHandler(hs) self.federation_handler = FederationHandler(hs) self.directory_handler = DirectoryHandler(hs) self.admin_handler = AdminHandler(hs) diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index faa5609c0..e089e66fd 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -158,7 +158,7 @@ class BaseHandler(object): # homeserver. requester = synapse.types.create_requester( target_user, is_guest=True) - handler = self.hs.get_handlers().room_member_handler + handler = self.hs.get_room_member_handler() yield handler.update_membership( requester, target_user, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8832ba58b..520612683 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2153,7 +2153,7 @@ class FederationHandler(BaseHandler): raise e yield self._check_signature(event, context) - member_handler = self.hs.get_handlers().room_member_handler + member_handler = self.hs.get_room_member_handler() yield member_handler.send_membership_event(None, event, context) else: destinations = set(x.split(":", 1)[-1] for x in (sender_user_id, room_id)) @@ -2197,7 +2197,7 @@ class FederationHandler(BaseHandler): # TODO: Make sure the signatures actually are correct. event.signatures.update(returned_invite.signatures) - member_handler = self.hs.get_handlers().room_member_handler + member_handler = self.hs.get_room_member_handler() yield member_handler.send_membership_event(None, event, context) @defer.inlineCallbacks diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 9800e2445..c9c287903 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -233,7 +233,7 @@ class ProfileHandler(BaseHandler): ) for room_id in room_ids: - handler = self.hs.get_handlers().room_member_handler + handler = self.hs.get_room_member_handler() try: # Assume the target_user isn't a guest, # because we don't let guests set profile or avatar data. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 6ab020bf4..6c425828c 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -165,7 +165,7 @@ class RoomCreationHandler(BaseHandler): creation_content = config.get("creation_content", {}) - room_member_handler = self.hs.get_handlers().room_member_handler + room_member_handler = self.hs.get_room_member_handler() yield self._send_events_for_new_room( requester, @@ -224,7 +224,7 @@ class RoomCreationHandler(BaseHandler): id_server = invite_3pid["id_server"] address = invite_3pid["address"] medium = invite_3pid["medium"] - yield self.hs.get_handlers().room_member_handler.do_3pid_invite( + yield self.hs.get_room_member_handler().do_3pid_invite( room_id, requester.user, medium, diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 37dc5e99a..0329432f5 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -30,24 +30,33 @@ from synapse.api.errors import AuthError, SynapseError, Codes from synapse.types import UserID, RoomID from synapse.util.async import Linearizer from synapse.util.distributor import user_left_room, user_joined_room -from ._base import BaseHandler logger = logging.getLogger(__name__) id_server_scheme = "https://" -class RoomMemberHandler(BaseHandler): +class RoomMemberHandler(object): # TODO(paul): This handler currently contains a messy conflation of # low-level API that works on UserID objects and so on, and REST-level # API that takes ID strings and returns pagination chunks. These concerns # ought to be separated out a lot better. def __init__(self, hs): - super(RoomMemberHandler, self).__init__(hs) + self.store = hs.get_datastore() + self.auth = hs.get_auth() + self.state_handler = hs.get_state_handler() + self.config = hs.config + self.is_mine = hs.is_mine + self.is_mine_id = hs.is_mine_id + self.simple_http_client = hs.get_simple_http_client() + self.federation_handler = hs.get_handlers().federation_handler + self.directory_handler = hs.get_handlers().directory_handler + self.registration_handler = hs.get_handlers().registration_handler self.profile_handler = hs.get_profile_handler() self.event_creation_hander = hs.get_event_creation_handler() + self.replication_layer = hs.get_replication_layer() self.member_linearizer = Linearizer(name="member") @@ -138,7 +147,7 @@ class RoomMemberHandler(BaseHandler): # join dance for now, since we're kinda implicitly checking # that we are allowed to join when we decide whether or not we # need to do the invite/join dance. - yield self.hs.get_handlers().federation_handler.do_invite_join( + yield self.federation_handler.do_invite_join( remote_room_hosts, room_id, user.to_string(), @@ -204,8 +213,7 @@ class RoomMemberHandler(BaseHandler): # if this is a join with a 3pid signature, we may need to turn a 3pid # invite into a normal invite before we can handle the join. if third_party_signed is not None: - replication = self.hs.get_replication_layer() - yield replication.exchange_third_party_invite( + yield self.replication_layer.exchange_third_party_invite( third_party_signed["sender"], target.to_string(), room_id, @@ -226,7 +234,7 @@ class RoomMemberHandler(BaseHandler): requester.user, ) if not is_requester_admin: - if self.hs.config.block_non_admin_invites: + if self.config.block_non_admin_invites: logger.info( "Blocking invite: user is not admin and non-admin " "invites disabled" @@ -286,7 +294,7 @@ class RoomMemberHandler(BaseHandler): if not is_host_in_room: inviter = yield self.get_inviter(target.to_string(), room_id) - if inviter and not self.hs.is_mine(inviter): + if inviter and not self.is_mine(inviter): remote_room_hosts.append(inviter.domain) content["membership"] = Membership.JOIN @@ -311,7 +319,7 @@ class RoomMemberHandler(BaseHandler): if not inviter: raise SynapseError(404, "Not a known room") - if self.hs.is_mine(inviter): + if self.is_mine(inviter): # the inviter was on our server, but has now left. Carry on # with the normal rejection codepath. # @@ -321,7 +329,7 @@ class RoomMemberHandler(BaseHandler): else: # send the rejection to the inviter's HS. remote_room_hosts = remote_room_hosts + [inviter.domain] - fed_handler = self.hs.get_handlers().federation_handler + fed_handler = self.federation_handler try: ret = yield fed_handler.do_remotely_reject_invite( remote_room_hosts, @@ -393,7 +401,7 @@ class RoomMemberHandler(BaseHandler): "Sender (%s) must be same as requester (%s)" % (sender, requester.user) ) - assert self.hs.is_mine(sender), "Sender must be our own: %s" % (sender,) + assert self.is_mine(sender), "Sender must be our own: %s" % (sender,) else: requester = synapse.types.create_requester(target_user) @@ -477,7 +485,7 @@ class RoomMemberHandler(BaseHandler): Raises: SynapseError if room alias could not be found. """ - directory_handler = self.hs.get_handlers().directory_handler + directory_handler = self.directory_handler mapping = yield directory_handler.get_association(room_alias) if not mapping: @@ -508,7 +516,7 @@ class RoomMemberHandler(BaseHandler): requester, txn_id ): - if self.hs.config.block_non_admin_invites: + if self.config.block_non_admin_invites: is_requester_admin = yield self.auth.is_server_admin( requester.user, ) @@ -555,7 +563,7 @@ class RoomMemberHandler(BaseHandler): str: the matrix ID of the 3pid, or None if it is not recognized. """ try: - data = yield self.hs.get_simple_http_client().get_json( + data = yield self.simple_http_client.get_json( "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server,), { "medium": medium, @@ -578,7 +586,7 @@ class RoomMemberHandler(BaseHandler): if server_hostname not in data["signatures"]: raise AuthError(401, "No signature from server %s" % (server_hostname,)) for key_name, signature in data["signatures"][server_hostname].items(): - key_data = yield self.hs.get_simple_http_client().get_json( + key_data = yield self.simple_http_client.get_json( "%s%s/_matrix/identity/api/v1/pubkey/%s" % (id_server_scheme, server_hostname, key_name,), ) @@ -603,7 +611,7 @@ class RoomMemberHandler(BaseHandler): user, txn_id ): - room_state = yield self.hs.get_state_handler().get_current_state(room_id) + room_state = yield self.state_handler.get_current_state(room_id) inviter_display_name = "" inviter_avatar_url = "" @@ -727,15 +735,15 @@ class RoomMemberHandler(BaseHandler): "sender_avatar_url": inviter_avatar_url, } - if self.hs.config.invite_3pid_guest: - registration_handler = self.hs.get_handlers().registration_handler + if self.config.invite_3pid_guest: + registration_handler = self.registration_handler guest_access_token = yield registration_handler.guest_access_token_for( medium=medium, address=address, inviter_user_id=inviter_user_id, ) - guest_user_info = yield self.hs.get_auth().get_user_by_access_token( + guest_user_info = yield self.auth.get_user_by_access_token( guest_access_token ) @@ -744,7 +752,7 @@ class RoomMemberHandler(BaseHandler): "guest_user_id": guest_user_info["user"].to_string(), }) - data = yield self.hs.get_simple_http_client().post_urlencoded_get_json( + data = yield self.simple_http_client.post_urlencoded_get_json( is_url, invite_config ) @@ -793,10 +801,10 @@ class RoomMemberHandler(BaseHandler): # first member event? create_event_id = current_state_ids.get(("m.room.create", "")) if len(current_state_ids) == 1 and create_event_id: - defer.returnValue(self.hs.is_mine_id(create_event_id)) + defer.returnValue(self.is_mine_id(create_event_id)) for etype, state_key in current_state_ids: - if etype != EventTypes.Member or not self.hs.is_mine_id(state_key): + if etype != EventTypes.Member or not self.is_mine_id(state_key): continue event_id = current_state_ids[(etype, state_key)] diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 6073cc6fa..3917eee42 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -180,6 +180,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): self.handlers = hs.get_handlers() self.state = hs.get_state_handler() self.event_creation_handler = hs.get_event_creation_handler() + self.room_member_handler = hs.get_room_member_handler() @defer.inlineCallbacks def on_POST(self, request, room_id): @@ -238,7 +239,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): logger.info("Kicking %r from %r...", user_id, room_id) target_requester = create_requester(user_id) - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=target_requester, target=target_requester.user, room_id=room_id, @@ -247,9 +248,9 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): ratelimit=False ) - yield self.handlers.room_member_handler.forget(target_requester.user, room_id) + yield self.room_member_handler.forget(target_requester.user, room_id) - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=target_requester, target=target_requester.user, room_id=new_room_id, diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 817fd4784..9d745174c 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -84,6 +84,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): super(RoomStateEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() self.event_creation_hander = hs.get_event_creation_handler() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): # /room/$roomid/state/$eventtype @@ -156,7 +157,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): if event_type == EventTypes.Member: membership = content.get("membership", None) - event = yield self.handlers.room_member_handler.update_membership( + event = yield self.room_member_handler.update_membership( requester, target=UserID.from_string(state_key), room_id=room_id, @@ -229,7 +230,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): class JoinRoomAliasServlet(ClientV1RestServlet): def __init__(self, hs): super(JoinRoomAliasServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): # /join/$room_identifier[/$txn_id] @@ -257,7 +258,7 @@ class JoinRoomAliasServlet(ClientV1RestServlet): except Exception: remote_room_hosts = None elif RoomAlias.is_valid(room_identifier): - handler = self.handlers.room_member_handler + handler = self.room_member_handler room_alias = RoomAlias.from_string(room_identifier) room_id, remote_room_hosts = yield handler.lookup_room_alias(room_alias) room_id = room_id.to_string() @@ -266,7 +267,7 @@ class JoinRoomAliasServlet(ClientV1RestServlet): room_identifier, )) - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=requester, target=requester.user, room_id=room_id, @@ -562,7 +563,7 @@ class RoomEventContextServlet(ClientV1RestServlet): class RoomForgetRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomForgetRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): PATTERNS = ("/rooms/(?P[^/]*)/forget") @@ -575,7 +576,7 @@ class RoomForgetRestServlet(ClientV1RestServlet): allow_guest=False, ) - yield self.handlers.room_member_handler.forget( + yield self.room_member_handler.forget( user=requester.user, room_id=room_id, ) @@ -593,7 +594,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomMembershipRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): # /rooms/$roomid/[invite|join|leave] @@ -622,7 +623,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): content = {} if membership_action == "invite" and self._has_3pid_invite_keys(content): - yield self.handlers.room_member_handler.do_3pid_invite( + yield self.room_member_handler.do_3pid_invite( room_id, requester.user, content["medium"], @@ -644,7 +645,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): if 'reason' in content and membership_action in ['kick', 'ban']: event_content = {'reason': content['reason']} - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=requester, target=target, room_id=room_id, diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index c6f4680a7..0ba62bddc 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -183,7 +183,7 @@ class RegisterRestServlet(RestServlet): self.auth_handler = hs.get_auth_handler() self.registration_handler = hs.get_handlers().registration_handler self.identity_handler = hs.get_handlers().identity_handler - self.room_member_handler = hs.get_handlers().room_member_handler + self.room_member_handler = hs.get_room_member_handler() self.device_handler = hs.get_device_handler() self.macaroon_gen = hs.get_macaroon_generator() diff --git a/synapse/server.py b/synapse/server.py index fbd602d40..5b6effbe3 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -45,6 +45,7 @@ from synapse.handlers.device import DeviceHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.presence import PresenceHandler from synapse.handlers.room_list import RoomListHandler +from synapse.handlers.room_member import RoomMemberHandler from synapse.handlers.set_password import SetPasswordHandler from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import TypingHandler @@ -145,6 +146,7 @@ class HomeServer(object): 'groups_attestation_signing', 'groups_attestation_renewer', 'spam_checker', + 'room_member_handler', ] def __init__(self, hostname, **kwargs): @@ -382,6 +384,9 @@ class HomeServer(object): def build_spam_checker(self): return SpamChecker(self) + def build_room_member_handler(self): + return RoomMemberHandler(self) + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index a269e6f56..e46534cd3 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -95,7 +95,7 @@ class RoomTypingTestCase(RestTestCase): else: if remotedomains is not None: remotedomains.add(member.domain) - hs.get_handlers().room_member_handler.fetch_room_distributions_into = ( + hs.get_room_member_handler().fetch_room_distributions_into = ( fetch_room_distributions_into ) From f793bc38770caf81dc34b9033d7dd2c9bfc0d79b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 13:56:03 +0000 Subject: [PATCH 051/173] Split out stream store --- synapse/replication/slave/storage/events.py | 54 +-- synapse/storage/__init__.py | 8 - synapse/storage/stream.py | 350 +++++++++++--------- 3 files changed, 202 insertions(+), 210 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index de0b26f43..517a9f0ec 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -22,9 +22,8 @@ from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.state import StateGroupWorkerStore -from synapse.storage.stream import StreamStore +from synapse.storage.stream import StreamWorkerStore from synapse.storage.signatures import SignatureStore -from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker @@ -41,34 +40,20 @@ logger = logging.getLogger(__name__) class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, - EventsWorkerStore, StateGroupWorkerStore, + StreamWorkerStore, EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - super(SlavedEventStore, self).__init__(db_conn, hs) self._stream_id_gen = SlavedIdTracker( db_conn, "events", "stream_ordering", ) self._backfill_id_gen = SlavedIdTracker( db_conn, "events", "stream_ordering", step=-1 ) - events_max = self._stream_id_gen.get_current_token() - event_cache_prefill, min_event_val = self._get_cache_dict( - db_conn, "events", - entity_column="room_id", - stream_column="stream_ordering", - max_value=events_max, - ) - self._events_stream_cache = StreamChangeCache( - "EventsRoomStreamChangeCache", min_event_val, - prefilled_cache=event_cache_prefill, - ) - self._membership_stream_cache = StreamChangeCache( - "MembershipStreamChangeCache", events_max, - ) + + super(SlavedEventStore, self).__init__(db_conn, hs) self.stream_ordering_month_ago = 0 - self._stream_order_on_start = self.get_room_max_stream_ordering() # Cached functions can't be accessed through a class instance so we need # to reach inside the __dict__ to extract them. @@ -76,30 +61,6 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, "get_latest_event_ids_in_room" ] - get_recent_event_ids_for_room = ( - StreamStore.__dict__["get_recent_event_ids_for_room"] - ) - has_room_changed_since = DataStore.has_room_changed_since.__func__ - - get_membership_changes_for_user = ( - DataStore.get_membership_changes_for_user.__func__ - ) - get_room_events_max_id = DataStore.get_room_events_max_id.__func__ - get_room_events_stream_for_room = ( - DataStore.get_room_events_stream_for_room.__func__ - ) - get_events_around = DataStore.get_events_around.__func__ - - get_recent_events_for_room = DataStore.get_recent_events_for_room.__func__ - get_room_events_stream_for_rooms = ( - DataStore.get_room_events_stream_for_rooms.__func__ - ) - get_stream_token_for_event = DataStore.get_stream_token_for_event.__func__ - - _set_before_and_after = staticmethod(DataStore._set_before_and_after) - - _get_events_around_txn = DataStore._get_events_around_txn.__func__ - get_backfill_events = DataStore.get_backfill_events.__func__ _get_backfill_events = DataStore._get_backfill_events.__func__ get_missing_events = DataStore.get_missing_events.__func__ @@ -120,8 +81,11 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, get_all_new_events_stream = DataStore.get_all_new_events_stream.__func__ - get_federation_out_pos = DataStore.get_federation_out_pos.__func__ - update_federation_out_pos = DataStore.update_federation_out_pos.__func__ + def get_room_max_stream_ordering(self): + return self._stream_id_gen.get_current_token() + + def get_room_min_stream_ordering(self): + return self._backfill_id_gen.get_current_token() get_latest_event_ids_and_hashes_in_room = ( DataStore.get_latest_event_ids_and_hashes_in_room.__func__ diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0f136f8a0..0ce76d7a8 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -148,14 +148,6 @@ class DataStore(RoomMemberStore, RoomStore, stream_column="stream_ordering", max_value=events_max, ) - self._events_stream_cache = StreamChangeCache( - "EventsRoomStreamChangeCache", min_event_val, - prefilled_cache=event_cache_prefill, - ) - - self._membership_stream_cache = StreamChangeCache( - "MembershipStreamChangeCache", events_max, - ) self._presence_on_startup = self._get_active_presence(db_conn) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 52bdce5be..057f30db3 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -35,13 +35,17 @@ what sort order was used: from twisted.internet import defer -from ._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore +from synapse.storage.events import EventsWorkerStore + from synapse.util.caches.descriptors import cached from synapse.api.constants import EventTypes from synapse.types import RoomStreamToken +from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.storage.engines import PostgresEngine, Sqlite3Engine +import abc import logging @@ -143,81 +147,28 @@ def filter_to_clause(event_filter): return " AND ".join(clauses), args -class StreamStore(SQLBaseStore): - @defer.inlineCallbacks - def get_appservice_room_stream(self, service, from_key, to_key, limit=0): - # NB this lives here instead of appservice.py so we can reuse the - # 'private' StreamToken class in this file. - if limit: - limit = max(limit, MAX_STREAM_SIZE) - else: - limit = MAX_STREAM_SIZE +class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): + __metaclass__ = abc.ABCMeta - # From and to keys should be integers from ordering. - from_id = RoomStreamToken.parse_stream_token(from_key) - to_id = RoomStreamToken.parse_stream_token(to_key) + def __init__(self, db_conn, hs): + super(StreamWorkerStore, self).__init__(db_conn, hs) - if from_key == to_key: - defer.returnValue(([], to_key)) - return - - # select all the events between from/to with a sensible limit - sql = ( - "SELECT e.event_id, e.room_id, e.type, s.state_key, " - "e.stream_ordering FROM events AS e " - "LEFT JOIN state_events as s ON " - "e.event_id = s.event_id " - "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " - "ORDER BY stream_ordering ASC LIMIT %(limit)d " - ) % { - "limit": limit - } - - def f(txn): - # pull out all the events between the tokens - txn.execute(sql, (from_id.stream, to_id.stream,)) - rows = self.cursor_to_dict(txn) - - # Logic: - # - We want ALL events which match the AS room_id regex - # - We want ALL events which match the rooms represented by the AS - # room_alias regex - # - We want ALL events for rooms that AS users have joined. - # This is currently supported via get_app_service_rooms (which is - # used for the Notifier listener rooms). We can't reasonably make a - # SQL query for these room IDs, so we'll pull all the events between - # from/to and filter in python. - rooms_for_as = self._get_app_service_rooms_txn(txn, service) - room_ids_for_as = [r.room_id for r in rooms_for_as] - - def app_service_interested(row): - if row["room_id"] in room_ids_for_as: - return True - - if row["type"] == EventTypes.Member: - if service.is_interested_in_user(row.get("state_key")): - return True - return False - - return [r for r in rows if app_service_interested(r)] - - rows = yield self.runInteraction("get_appservice_room_stream", f) - - ret = yield self._get_events( - [r["event_id"] for r in rows], - get_prev_content=True + events_max = self.get_room_max_stream_ordering() + event_cache_prefill, min_event_val = self._get_cache_dict( + db_conn, "events", + entity_column="room_id", + stream_column="stream_ordering", + max_value=events_max, + ) + self._events_stream_cache = StreamChangeCache( + "EventsRoomStreamChangeCache", min_event_val, + prefilled_cache=event_cache_prefill, + ) + self._membership_stream_cache = StreamChangeCache( + "MembershipStreamChangeCache", events_max, ) - self._set_before_and_after(ret, rows, topo_order=from_id is None) - - if rows: - key = "s%d" % max(r["stream_ordering"] for r in rows) - else: - # Assume we didn't get anything because there was nothing to - # get. - key = to_key - - defer.returnValue((ret, key)) + self._stream_order_on_start = self.get_room_max_stream_ordering() @defer.inlineCallbacks def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0, @@ -380,88 +331,6 @@ class StreamStore(SQLBaseStore): defer.returnValue(ret) - @defer.inlineCallbacks - def paginate_room_events(self, room_id, from_key, to_key=None, - direction='b', limit=-1, event_filter=None): - # Tokens really represent positions between elements, but we use - # the convention of pointing to the event before the gap. Hence - # we have a bit of asymmetry when it comes to equalities. - args = [False, room_id] - if direction == 'b': - order = "DESC" - bounds = upper_bound( - RoomStreamToken.parse(from_key), self.database_engine - ) - if to_key: - bounds = "%s AND %s" % (bounds, lower_bound( - RoomStreamToken.parse(to_key), self.database_engine - )) - else: - order = "ASC" - bounds = lower_bound( - RoomStreamToken.parse(from_key), self.database_engine - ) - if to_key: - bounds = "%s AND %s" % (bounds, upper_bound( - RoomStreamToken.parse(to_key), self.database_engine - )) - - filter_clause, filter_args = filter_to_clause(event_filter) - - if filter_clause: - bounds += " AND " + filter_clause - args.extend(filter_args) - - if int(limit) > 0: - args.append(int(limit)) - limit_str = " LIMIT ?" - else: - limit_str = "" - - sql = ( - "SELECT * FROM events" - " WHERE outlier = ? AND room_id = ? AND %(bounds)s" - " ORDER BY topological_ordering %(order)s," - " stream_ordering %(order)s %(limit)s" - ) % { - "bounds": bounds, - "order": order, - "limit": limit_str - } - - def f(txn): - txn.execute(sql, args) - - rows = self.cursor_to_dict(txn) - - if rows: - topo = rows[-1]["topological_ordering"] - toke = rows[-1]["stream_ordering"] - if direction == 'b': - # Tokens are positions between events. - # This token points *after* the last event in the chunk. - # We need it to point to the event before it in the chunk - # when we are going backwards so we subtract one from the - # stream part. - toke -= 1 - next_token = str(RoomStreamToken(topo, toke)) - else: - # TODO (erikj): We should work out what to do here instead. - next_token = to_key if to_key else from_key - - return rows, next_token, - - rows, token = yield self.runInteraction("paginate_room_events", f) - - events = yield self._get_events( - [r["event_id"] for r in rows], - get_prev_content=True - ) - - self._set_before_and_after(events, rows) - - defer.returnValue((events, token)) - @defer.inlineCallbacks def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None): rows, token = yield self.get_recent_event_ids_for_room( @@ -542,7 +411,7 @@ class StreamStore(SQLBaseStore): `room_id` causes it to return the current room specific topological token. """ - token = yield self._stream_id_gen.get_current_token() + token = yield self.get_room_max_stream_ordering() if room_id is None: defer.returnValue("s%d" % (token,)) else: @@ -552,11 +421,13 @@ class StreamStore(SQLBaseStore): ) defer.returnValue("t%d-%d" % (topo, token)) + @abc.abstractmethod def get_room_max_stream_ordering(self): - return self._stream_id_gen.get_current_token() + raise NotImplementedError() + @abc.abstractmethod def get_room_min_stream_ordering(self): - return self._backfill_id_gen.get_current_token() + raise NotImplementedError() def get_stream_token_for_event(self, event_id): """The stream token for an event @@ -832,3 +703,168 @@ class StreamStore(SQLBaseStore): def has_room_changed_since(self, room_id, stream_id): return self._events_stream_cache.has_entity_changed(room_id, stream_id) + + +class StreamStore(StreamWorkerStore): + def get_room_max_stream_ordering(self): + return self._stream_id_gen.get_current_token() + + def get_room_min_stream_ordering(self): + return self._backfill_id_gen.get_current_token() + + @defer.inlineCallbacks + def get_appservice_room_stream(self, service, from_key, to_key, limit=0): + # NB this lives here instead of appservice.py so we can reuse the + # 'private' StreamToken class in this file. + if limit: + limit = max(limit, MAX_STREAM_SIZE) + else: + limit = MAX_STREAM_SIZE + + # From and to keys should be integers from ordering. + from_id = RoomStreamToken.parse_stream_token(from_key) + to_id = RoomStreamToken.parse_stream_token(to_key) + + if from_key == to_key: + defer.returnValue(([], to_key)) + return + + # select all the events between from/to with a sensible limit + sql = ( + "SELECT e.event_id, e.room_id, e.type, s.state_key, " + "e.stream_ordering FROM events AS e " + "LEFT JOIN state_events as s ON " + "e.event_id = s.event_id " + "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " + "ORDER BY stream_ordering ASC LIMIT %(limit)d " + ) % { + "limit": limit + } + + def f(txn): + # pull out all the events between the tokens + txn.execute(sql, (from_id.stream, to_id.stream,)) + rows = self.cursor_to_dict(txn) + + # Logic: + # - We want ALL events which match the AS room_id regex + # - We want ALL events which match the rooms represented by the AS + # room_alias regex + # - We want ALL events for rooms that AS users have joined. + # This is currently supported via get_app_service_rooms (which is + # used for the Notifier listener rooms). We can't reasonably make a + # SQL query for these room IDs, so we'll pull all the events between + # from/to and filter in python. + rooms_for_as = self._get_app_service_rooms_txn(txn, service) + room_ids_for_as = [r.room_id for r in rooms_for_as] + + def app_service_interested(row): + if row["room_id"] in room_ids_for_as: + return True + + if row["type"] == EventTypes.Member: + if service.is_interested_in_user(row.get("state_key")): + return True + return False + + return [r for r in rows if app_service_interested(r)] + + rows = yield self.runInteraction("get_appservice_room_stream", f) + + ret = yield self._get_events( + [r["event_id"] for r in rows], + get_prev_content=True + ) + + self._set_before_and_after(ret, rows, topo_order=from_id is None) + + if rows: + key = "s%d" % max(r["stream_ordering"] for r in rows) + else: + # Assume we didn't get anything because there was nothing to + # get. + key = to_key + + defer.returnValue((ret, key)) + + @defer.inlineCallbacks + def paginate_room_events(self, room_id, from_key, to_key=None, + direction='b', limit=-1, event_filter=None): + # Tokens really represent positions between elements, but we use + # the convention of pointing to the event before the gap. Hence + # we have a bit of asymmetry when it comes to equalities. + args = [False, room_id] + if direction == 'b': + order = "DESC" + bounds = upper_bound( + RoomStreamToken.parse(from_key), self.database_engine + ) + if to_key: + bounds = "%s AND %s" % (bounds, lower_bound( + RoomStreamToken.parse(to_key), self.database_engine + )) + else: + order = "ASC" + bounds = lower_bound( + RoomStreamToken.parse(from_key), self.database_engine + ) + if to_key: + bounds = "%s AND %s" % (bounds, upper_bound( + RoomStreamToken.parse(to_key), self.database_engine + )) + + filter_clause, filter_args = filter_to_clause(event_filter) + + if filter_clause: + bounds += " AND " + filter_clause + args.extend(filter_args) + + if int(limit) > 0: + args.append(int(limit)) + limit_str = " LIMIT ?" + else: + limit_str = "" + + sql = ( + "SELECT * FROM events" + " WHERE outlier = ? AND room_id = ? AND %(bounds)s" + " ORDER BY topological_ordering %(order)s," + " stream_ordering %(order)s %(limit)s" + ) % { + "bounds": bounds, + "order": order, + "limit": limit_str + } + + def f(txn): + txn.execute(sql, args) + + rows = self.cursor_to_dict(txn) + + if rows: + topo = rows[-1]["topological_ordering"] + toke = rows[-1]["stream_ordering"] + if direction == 'b': + # Tokens are positions between events. + # This token points *after* the last event in the chunk. + # We need it to point to the event before it in the chunk + # when we are going backwards so we subtract one from the + # stream part. + toke -= 1 + next_token = str(RoomStreamToken(topo, toke)) + else: + # TODO (erikj): We should work out what to do here instead. + next_token = to_key if to_key else from_key + + return rows, next_token, + + rows, token = yield self.runInteraction("paginate_room_events", f) + + events = yield self._get_events( + [r["event_id"] for r in rows], + get_prev_content=True + ) + + self._set_before_and_after(events, rows) + + defer.returnValue((events, token)) From 22004b524e5264afb0e883bee486f669ea58833c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:59:40 +0000 Subject: [PATCH 052/173] Fix comment typo --- synapse/storage/event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index c08bebe11..848d8bd72 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -67,7 +67,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): def __init__(self, db_conn, hs): super(EventPushActionsWorkerStore, self).__init__(db_conn, hs) - # These get correctly ste by _find_stream_orderings_for_times_txn + # These get correctly set by _find_stream_orderings_for_times_txn self.stream_ordering_month_ago = 0 self.stream_ordering_day_ago = 0 From 872ff95ed49c9cb30ab5f256c5ff539430e658db Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:00:05 +0000 Subject: [PATCH 053/173] Default stream_ordering_*_ago to None --- synapse/storage/event_push_actions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 848d8bd72..716429356 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -68,8 +68,8 @@ class EventPushActionsWorkerStore(SQLBaseStore): super(EventPushActionsWorkerStore, self).__init__(db_conn, hs) # These get correctly set by _find_stream_orderings_for_times_txn - self.stream_ordering_month_ago = 0 - self.stream_ordering_day_ago = 0 + self.stream_ordering_month_ago = None + self.stream_ordering_day_ago = None cur = LoggingTransaction( db_conn.cursor(), From 1b2af116502e6ebcd2ae24178754145e59ee7c24 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:20:57 +0000 Subject: [PATCH 054/173] Document abstract class and method better --- synapse/storage/stream.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 057f30db3..a2527d2a3 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -148,6 +148,11 @@ def filter_to_clause(event_filter): class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): + """This is an abstract base class where subclasses must implement + `get_room_max_stream_ordering` and `get_room_min_stream_ordering` + which can be called in the initializer. + """ + __metaclass__ = abc.ABCMeta def __init__(self, db_conn, hs): @@ -170,6 +175,14 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): self._stream_order_on_start = self.get_room_max_stream_ordering() + @abc.abstractmethod + def get_room_max_stream_ordering(self): + raise NotImplementedError() + + @abc.abstractmethod + def get_room_min_stream_ordering(self): + raise NotImplementedError() + @defer.inlineCallbacks def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0, order='DESC'): @@ -421,14 +434,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) defer.returnValue("t%d-%d" % (topo, token)) - @abc.abstractmethod - def get_room_max_stream_ordering(self): - raise NotImplementedError() - - @abc.abstractmethod - def get_room_min_stream_ordering(self): - raise NotImplementedError() - def get_stream_token_for_event(self, event_id): """The stream token for an event Args: From 884b26ae4150f19bd1e020c3eed934e978518a09 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:23:48 +0000 Subject: [PATCH 055/173] Remove unused variables --- synapse/storage/__init__.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0ce76d7a8..22c156c15 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -141,14 +141,6 @@ class DataStore(RoomMemberStore, RoomStore, else: self._cache_id_gen = None - events_max = self._stream_id_gen.get_current_token() - event_cache_prefill, min_event_val = self._get_cache_dict( - db_conn, "events", - entity_column="room_id", - stream_column="stream_ordering", - max_value=events_max, - ) - self._presence_on_startup = self._get_active_presence(db_conn) presence_cache_prefill, min_presence_val = self._get_cache_dict( @@ -196,6 +188,7 @@ class DataStore(RoomMemberStore, RoomStore, "DeviceListFederationStreamChangeCache", device_list_max, ) + events_max = self._stream_id_gen.get_current_token() curr_state_delta_prefill, min_curr_state_delta_id = self._get_cache_dict( db_conn, "current_state_delta_stream", entity_column="room_id", From 7c371834ccbdf33f7070981bf23cbf11d1c1c333 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:40:27 +0000 Subject: [PATCH 056/173] Stub out broken function only used for cache --- synapse/storage/signatures.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/signatures.py b/synapse/storage/signatures.py index e6eeb1b64..9e6eaaa53 100644 --- a/synapse/storage/signatures.py +++ b/synapse/storage/signatures.py @@ -25,7 +25,9 @@ from synapse.util.caches.descriptors import cached, cachedList class SignatureWorkerStore(SQLBaseStore): @cached() def get_event_reference_hash(self, event_id): - return self._get_event_reference_hashes_txn(event_id) + # This is a dummy function to allow get_event_reference_hashes + # to use its cache + raise NotImplementedError() @cachedList(cached_method_name="get_event_reference_hash", list_name="event_ids", num_args=1) From 529c026ac149705d0c9948fed71e3f0ca069b759 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:49:12 +0000 Subject: [PATCH 057/173] Move back to hs.is_mine --- synapse/handlers/room_member.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 0329432f5..7ecdf8724 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -43,12 +43,11 @@ class RoomMemberHandler(object): # ought to be separated out a lot better. def __init__(self, hs): + self.hs = hs self.store = hs.get_datastore() self.auth = hs.get_auth() self.state_handler = hs.get_state_handler() self.config = hs.config - self.is_mine = hs.is_mine - self.is_mine_id = hs.is_mine_id self.simple_http_client = hs.get_simple_http_client() self.federation_handler = hs.get_handlers().federation_handler @@ -294,7 +293,7 @@ class RoomMemberHandler(object): if not is_host_in_room: inviter = yield self.get_inviter(target.to_string(), room_id) - if inviter and not self.is_mine(inviter): + if inviter and not self.hs.is_mine(inviter): remote_room_hosts.append(inviter.domain) content["membership"] = Membership.JOIN @@ -319,7 +318,7 @@ class RoomMemberHandler(object): if not inviter: raise SynapseError(404, "Not a known room") - if self.is_mine(inviter): + if self.hs.is_mine(inviter): # the inviter was on our server, but has now left. Carry on # with the normal rejection codepath. # @@ -401,7 +400,7 @@ class RoomMemberHandler(object): "Sender (%s) must be same as requester (%s)" % (sender, requester.user) ) - assert self.is_mine(sender), "Sender must be our own: %s" % (sender,) + assert self.hs.is_mine(sender), "Sender must be our own: %s" % (sender,) else: requester = synapse.types.create_requester(target_user) @@ -801,10 +800,10 @@ class RoomMemberHandler(object): # first member event? create_event_id = current_state_ids.get(("m.room.create", "")) if len(current_state_ids) == 1 and create_event_id: - defer.returnValue(self.is_mine_id(create_event_id)) + defer.returnValue(self.hs.is_mine_id(create_event_id)) for etype, state_key in current_state_ids: - if etype != EventTypes.Member or not self.is_mine_id(state_key): + if etype != EventTypes.Member or not self.hs.is_mine_id(state_key): continue event_id = current_state_ids[(etype, state_key)] From 33bebb63f34aa947a9a48920589cd63d27f1235c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:30:57 +0000 Subject: [PATCH 058/173] Add some caches to help read marker API --- synapse/replication/slave/storage/account_data.py | 1 + synapse/storage/account_data.py | 2 ++ synapse/storage/events.py | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 6c8d2954d..970ac4c24 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -56,6 +56,7 @@ class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlaved (row.data_type, row.user_id,) ) self.get_account_data_for_user.invalidate((row.user_id,)) + self.get_account_data_for_room.invalidate((row.user_id, row.room_id,)) self._account_data_stream_cache.entity_has_changed( row.user_id, token ) diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 466194e96..c062e03d1 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -133,6 +133,7 @@ class AccountDataWorkerStore(SQLBaseStore): for row in rows }) + @cached(num_args=2) def get_account_data_for_room(self, user_id, room_id): """Get all the client account_data for a user for a room. @@ -310,6 +311,7 @@ class AccountDataStore(AccountDataWorkerStore): self._account_data_stream_cache.entity_has_changed(user_id, next_id) self.get_account_data_for_user.invalidate((user_id,)) + self.get_account_data_for_room.invalidate((user_id, room_id,)) result = self._account_data_id_gen.get_current_token() defer.returnValue(result) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index b63392a6c..057b1be4d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -28,7 +28,7 @@ from synapse.util.logutils import log_function from synapse.util.metrics import Measure from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.types import get_domain_from_id from canonicaljson import encode_canonical_json @@ -2033,7 +2033,7 @@ class EventsStore(EventsWorkerStore): to_2, so_2 = yield self._get_event_ordering(event_id2) defer.returnValue((to_1, so_1) > (to_2, so_2)) - @defer.inlineCallbacks + @cachedInlineCallbacks(max_entries=5000) def _get_event_ordering(self, event_id): res = yield self._simple_select_one( table="events", From a83c514d1f8e1573246e0235c97ba140cbff12db Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:53:04 +0000 Subject: [PATCH 059/173] Improve caching for read_marker API We add a new storage function to get a paritcular type of room account data. This allows us to prefill the cache when updating that acount data. --- synapse/handlers/read_marker.py | 6 ++-- .../replication/slave/storage/account_data.py | 3 ++ synapse/storage/account_data.py | 35 +++++++++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/read_marker.py b/synapse/handlers/read_marker.py index b5b0303d5..5142ae153 100644 --- a/synapse/handlers/read_marker.py +++ b/synapse/handlers/read_marker.py @@ -41,9 +41,9 @@ class ReadMarkerHandler(BaseHandler): """ with (yield self.read_marker_linearizer.queue((room_id, user_id))): - account_data = yield self.store.get_account_data_for_room(user_id, room_id) - - existing_read_marker = account_data.get("m.fully_read", None) + existing_read_marker = yield self.store.get_account_data_for_room_and_type( + user_id, room_id, "m.fully_read", + ) should_update = True diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 970ac4c24..355d14ff7 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -57,6 +57,9 @@ class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlaved ) self.get_account_data_for_user.invalidate((row.user_id,)) self.get_account_data_for_room.invalidate((row.user_id, row.room_id,)) + self.get_account_data_for_room_and_type.invalidate( + (row.user_id, row.room_id, row.account_data_type,), + ) self._account_data_stream_cache.entity_has_changed( row.user_id, token ) diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index c062e03d1..077b4faa5 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -157,6 +157,38 @@ class AccountDataWorkerStore(SQLBaseStore): "get_account_data_for_room", get_account_data_for_room_txn ) + @cached(num_args=3, max_entries=5000) + def get_account_data_for_room_and_type(self, user_id, room_id, account_data_type): + """Get all the client account_data for a user for a room. + + Args: + user_id(str): The user to get the account_data for. + room_id(str): The room to get the account_data for. + account_data_type (str): The account data type to get. + Returns: + A deferred dict of the room account_data for that type, or None if + there isn't any set. + """ + def get_account_data_for_room_and_type_txn(txn): + content_json = self._simple_select_one_onecol_txn( + txn, + table="room_account_data", + keyvalues={ + "user_id": user_id, + "room_id": room_id, + "account_data_type": account_data_type, + }, + retcol="content", + allow_none=True + ) + + return json.loads(content_json) if content_json else None + + return self.runInteraction( + "get_account_data_for_room_and_type", + get_account_data_for_room_and_type_txn, + ) + def get_all_updated_account_data(self, last_global_id, last_room_id, current_id, limit): """Get all the client account_data that has changed on the server @@ -312,6 +344,9 @@ class AccountDataStore(AccountDataWorkerStore): self._account_data_stream_cache.entity_has_changed(user_id, next_id) self.get_account_data_for_user.invalidate((user_id,)) self.get_account_data_for_room.invalidate((user_id, room_id,)) + self.get_account_data_for_room_and_type.prefill( + (user_id, room_id, account_data_type,), content, + ) result = self._account_data_id_gen.get_current_token() defer.returnValue(result) From 4b44f05f1941478e2274b4894bfd025deedb9992 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:00:35 +0000 Subject: [PATCH 060/173] Fewer lies are better --- synapse/storage/account_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 077b4faa5..e70c9423e 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -159,14 +159,14 @@ class AccountDataWorkerStore(SQLBaseStore): @cached(num_args=3, max_entries=5000) def get_account_data_for_room_and_type(self, user_id, room_id, account_data_type): - """Get all the client account_data for a user for a room. + """Get the client account_data of given type for a user for a room. Args: user_id(str): The user to get the account_data for. room_id(str): The room to get the account_data for. account_data_type (str): The account data type to get. Returns: - A deferred dict of the room account_data for that type, or None if + A deferred of the room account_data for that type, or None if there isn't any set. """ def get_account_data_for_room_and_type_txn(txn): From 65cf454fd1dda2def0920973e90de3ff305462d4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:10:40 +0000 Subject: [PATCH 061/173] Remove unused DataStore --- synapse/replication/slave/storage/events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f0d70a673..b1f64ef0d 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -16,7 +16,6 @@ import logging from synapse.api.constants import EventTypes -from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationWorkerStore from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.events_worker import EventsWorkerStore From 89b7232ff8ac7a7d3dad74b437d06678a6adca5e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:49:37 +0000 Subject: [PATCH 062/173] Fix typo in getting replication account data processing --- synapse/replication/slave/storage/account_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 355d14ff7..d9ba6d69b 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -58,7 +58,7 @@ class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlaved self.get_account_data_for_user.invalidate((row.user_id,)) self.get_account_data_for_room.invalidate((row.user_id, row.room_id,)) self.get_account_data_for_room_and_type.invalidate( - (row.user_id, row.room_id, row.account_data_type,), + (row.user_id, row.room_id, row.data_type,), ) self._account_data_stream_cache.entity_has_changed( row.user_id, token From d960d23830cb8bffe49a1eafed21bf66c25e4235 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 2 Mar 2018 11:03:18 +0000 Subject: [PATCH 063/173] Add missing yield during 3pid signature checks --- synapse/handlers/room_member.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 7ecdf8724..ed3b97730 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -573,7 +573,7 @@ class RoomMemberHandler(object): if "mxid" in data: if "signatures" not in data: raise AuthError(401, "No signatures on 3pid binding") - self.verify_any_signature(data, id_server) + yield self.verify_any_signature(data, id_server) defer.returnValue(data["mxid"]) except IOError as e: From fafa3e7114bdadd52f190892821ae61862ebfb2c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 18:19:34 +0000 Subject: [PATCH 064/173] Split registration store --- .../replication/slave/storage/registration.py | 18 +-- synapse/storage/registration.py | 118 +++++++++--------- 2 files changed, 64 insertions(+), 72 deletions(-) diff --git a/synapse/replication/slave/storage/registration.py b/synapse/replication/slave/storage/registration.py index e27c7332d..7323bf0f1 100644 --- a/synapse/replication/slave/storage/registration.py +++ b/synapse/replication/slave/storage/registration.py @@ -14,20 +14,8 @@ # limitations under the License. from ._base import BaseSlavedStore -from synapse.storage import DataStore -from synapse.storage.registration import RegistrationStore +from synapse.storage.registration import RegistrationWorkerStore -class SlavedRegistrationStore(BaseSlavedStore): - def __init__(self, db_conn, hs): - super(SlavedRegistrationStore, self).__init__(db_conn, hs) - - # TODO: use the cached version and invalidate deleted tokens - get_user_by_access_token = RegistrationStore.__dict__[ - "get_user_by_access_token" - ] - - _query_for_auth = DataStore._query_for_auth.__func__ - get_user_by_id = RegistrationStore.__dict__[ - "get_user_by_id" - ] +class SlavedRegistrationStore(RegistrationWorkerStore, BaseSlavedStore): + pass diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 95f75d6df..d809b2ba4 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -19,10 +19,70 @@ from twisted.internet import defer from synapse.api.errors import StoreError, Codes from synapse.storage import background_updates +from synapse.storage._base import SQLBaseStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -class RegistrationStore(background_updates.BackgroundUpdateStore): +class RegistrationWorkerStore(SQLBaseStore): + @cached() + def get_user_by_id(self, user_id): + return self._simple_select_one( + table="users", + keyvalues={ + "name": user_id, + }, + retcols=["name", "password_hash", "is_guest"], + allow_none=True, + desc="get_user_by_id", + ) + + @cached() + def get_user_by_access_token(self, token): + """Get a user from the given access token. + + Args: + token (str): The access token of a user. + Returns: + defer.Deferred: None, if the token did not match, otherwise dict + including the keys `name`, `is_guest`, `device_id`, `token_id`. + """ + return self.runInteraction( + "get_user_by_access_token", + self._query_for_auth, + token + ) + + @defer.inlineCallbacks + def is_server_admin(self, user): + res = yield self._simple_select_one_onecol( + table="users", + keyvalues={"name": user.to_string()}, + retcol="admin", + allow_none=True, + desc="is_server_admin", + ) + + defer.returnValue(res if res else False) + + def _query_for_auth(self, txn, token): + sql = ( + "SELECT users.name, users.is_guest, access_tokens.id as token_id," + " access_tokens.device_id" + " FROM users" + " INNER JOIN access_tokens on users.name = access_tokens.user_id" + " WHERE token = ?" + ) + + txn.execute(sql, (token,)) + rows = self.cursor_to_dict(txn) + if rows: + return rows[0] + + return None + + +class RegistrationStore(RegistrationWorkerStore, + background_updates.BackgroundUpdateStore): def __init__(self, db_conn, hs): super(RegistrationStore, self).__init__(db_conn, hs) @@ -187,18 +247,6 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): ) txn.call_after(self.is_guest.invalidate, (user_id,)) - @cached() - def get_user_by_id(self, user_id): - return self._simple_select_one( - table="users", - keyvalues={ - "name": user_id, - }, - retcols=["name", "password_hash", "is_guest"], - allow_none=True, - desc="get_user_by_id", - ) - def get_users_by_id_case_insensitive(self, user_id): """Gets users that match user_id case insensitively. Returns a mapping of user_id -> password_hash. @@ -304,34 +352,6 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): return self.runInteraction("delete_access_token", f) - @cached() - def get_user_by_access_token(self, token): - """Get a user from the given access token. - - Args: - token (str): The access token of a user. - Returns: - defer.Deferred: None, if the token did not match, otherwise dict - including the keys `name`, `is_guest`, `device_id`, `token_id`. - """ - return self.runInteraction( - "get_user_by_access_token", - self._query_for_auth, - token - ) - - @defer.inlineCallbacks - def is_server_admin(self, user): - res = yield self._simple_select_one_onecol( - table="users", - keyvalues={"name": user.to_string()}, - retcol="admin", - allow_none=True, - desc="is_server_admin", - ) - - defer.returnValue(res if res else False) - @cachedInlineCallbacks() def is_guest(self, user_id): res = yield self._simple_select_one_onecol( @@ -344,22 +364,6 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): defer.returnValue(res if res else False) - def _query_for_auth(self, txn, token): - sql = ( - "SELECT users.name, users.is_guest, access_tokens.id as token_id," - " access_tokens.device_id" - " FROM users" - " INNER JOIN access_tokens on users.name = access_tokens.user_id" - " WHERE token = ?" - ) - - txn.execute(sql, (token,)) - rows = self.cursor_to_dict(txn) - if rows: - return rows[0] - - return None - @defer.inlineCallbacks def user_add_threepid(self, user_id, medium, address, validated_at, added_at): yield self._simple_upsert("user_threepids", { From efb79820b4924e13b2c7d1145cf891fd5d441c2a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 2 Mar 2018 14:43:29 +0000 Subject: [PATCH 065/173] Fix bug with delayed cache invalidation stream We poked the notifier before updated the current token for the cache invalidation stream. This mean that sometimes the update wouldn't be sent until the next time a cache was invalidated. --- synapse/storage/_base.py | 26 ++++++++++++++------------ synapse/storage/event_push_actions.py | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 68125006e..2fbebd490 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -48,16 +48,16 @@ class LoggingTransaction(object): passed to the constructor. Adds logging and metrics to the .execute() method.""" __slots__ = [ - "txn", "name", "database_engine", "after_callbacks", "final_callbacks", + "txn", "name", "database_engine", "after_callbacks", "exception_callbacks", ] def __init__(self, txn, name, database_engine, after_callbacks, - final_callbacks): + exception_callbacks): object.__setattr__(self, "txn", txn) object.__setattr__(self, "name", name) object.__setattr__(self, "database_engine", database_engine) object.__setattr__(self, "after_callbacks", after_callbacks) - object.__setattr__(self, "final_callbacks", final_callbacks) + object.__setattr__(self, "exception_callbacks", exception_callbacks) def call_after(self, callback, *args, **kwargs): """Call the given callback on the main twisted thread after the @@ -66,8 +66,8 @@ class LoggingTransaction(object): """ self.after_callbacks.append((callback, args, kwargs)) - def call_finally(self, callback, *args, **kwargs): - self.final_callbacks.append((callback, args, kwargs)) + def call_on_exception(self, callback, *args, **kwargs): + self.exception_callbacks.append((callback, args, kwargs)) def __getattr__(self, name): return getattr(self.txn, name) @@ -215,7 +215,7 @@ class SQLBaseStore(object): self._clock.looping_call(loop, 10000) - def _new_transaction(self, conn, desc, after_callbacks, final_callbacks, + def _new_transaction(self, conn, desc, after_callbacks, exception_callbacks, logging_context, func, *args, **kwargs): start = time.time() * 1000 txn_id = self._TXN_ID @@ -236,7 +236,7 @@ class SQLBaseStore(object): txn = conn.cursor() txn = LoggingTransaction( txn, name, self.database_engine, after_callbacks, - final_callbacks, + exception_callbacks, ) r = func(txn, *args, **kwargs) conn.commit() @@ -308,11 +308,11 @@ class SQLBaseStore(object): current_context = LoggingContext.current_context() after_callbacks = [] - final_callbacks = [] + exception_callbacks = [] def inner_func(conn, *args, **kwargs): return self._new_transaction( - conn, desc, after_callbacks, final_callbacks, current_context, + conn, desc, after_callbacks, exception_callbacks, current_context, func, *args, **kwargs ) @@ -321,9 +321,10 @@ class SQLBaseStore(object): for after_callback, after_args, after_kwargs in after_callbacks: after_callback(*after_args, **after_kwargs) - finally: - for after_callback, after_args, after_kwargs in final_callbacks: + except: # noqa: E722, as we reraise the exception this is fine. + for after_callback, after_args, after_kwargs in exception_callbacks: after_callback(*after_args, **after_kwargs) + raise defer.returnValue(result) @@ -1000,7 +1001,8 @@ class SQLBaseStore(object): # __exit__ called after the transaction finishes. ctx = self._cache_id_gen.get_next() stream_id = ctx.__enter__() - txn.call_finally(ctx.__exit__, None, None, None) + txn.call_on_exception(ctx.__exit__, None, None, None) + txn.call_after(ctx.__exit__, None, None, None) txn.call_after(self.hs.get_notifier().on_new_replication_data) self._simple_insert_txn( diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 716429356..912e8db1d 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -76,7 +76,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): name="_find_stream_orderings_for_times_txn", database_engine=self.database_engine, after_callbacks=[], - final_callbacks=[], + exception_callbacks=[], ) self._find_stream_orderings_for_times_txn(cur) cur.close() From 06a14876e5d78891a5b3ae6c0f454faefd696e79 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 5 Mar 2018 11:53:39 +0000 Subject: [PATCH 066/173] Add find_first_stream_ordering_after_ts Expose this as a public function which can be called outside a txn --- synapse/storage/event_push_actions.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 716429356..54b54dcc6 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -489,6 +489,27 @@ class EventPushActionsWorkerStore(SQLBaseStore): self.stream_ordering_day_ago ) + def find_first_stream_ordering_after_ts(self, ts): + """Gets the stream ordering corresponding to a given timestamp. + + Specifically, finds the stream_ordering of the first event that was + received after the timestamp. This is done by a binary search on the + events table, since there is no index on received_ts, so is + relatively slow. + + Args: + ts (int): timestamp in millis + + Returns: + Deferred[int]: stream ordering of the first event received after + the timestamp + """ + return self.runInteraction( + "_find_first_stream_ordering_after_ts_txn", + self._find_first_stream_ordering_after_ts_txn, + ts, + ) + def _find_first_stream_ordering_after_ts_txn(self, txn, ts): """ Find the stream_ordering of the first event that was received after From c818fcab1152439d79b03494816775b9ee3db613 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 5 Mar 2018 11:47:48 +0000 Subject: [PATCH 067/173] Test and fix find_first_stream_ordering_after_ts It seemed to suffer from a bunch of off-by-one errors. --- synapse/storage/event_push_actions.py | 66 ++++++++++++++++++----- tests/storage/test_event_push_actions.py | 67 ++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 14 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 54b54dcc6..a8c303e11 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -493,15 +493,15 @@ class EventPushActionsWorkerStore(SQLBaseStore): """Gets the stream ordering corresponding to a given timestamp. Specifically, finds the stream_ordering of the first event that was - received after the timestamp. This is done by a binary search on the - events table, since there is no index on received_ts, so is + received on or after the timestamp. This is done by a binary search on + the events table, since there is no index on received_ts, so is relatively slow. Args: ts (int): timestamp in millis Returns: - Deferred[int]: stream ordering of the first event received after + Deferred[int]: stream ordering of the first event received on/after the timestamp """ return self.runInteraction( @@ -510,16 +510,24 @@ class EventPushActionsWorkerStore(SQLBaseStore): ts, ) - def _find_first_stream_ordering_after_ts_txn(self, txn, ts): + @staticmethod + def _find_first_stream_ordering_after_ts_txn(txn, ts): """ - Find the stream_ordering of the first event that was received after - a given timestamp. This is relatively slow as there is no index on - received_ts but we can then use this to delete push actions before + Find the stream_ordering of the first event that was received on or + after a given timestamp. This is relatively slow as there is no index + on received_ts but we can then use this to delete push actions before this. received_ts must necessarily be in the same order as stream_ordering and stream_ordering is indexed, so we manually binary search using stream_ordering + + Args: + txn (twisted.enterprise.adbapi.Transaction): + ts (int): timestamp to search for + + Returns: + int: stream ordering """ txn.execute("SELECT MAX(stream_ordering) FROM events") max_stream_ordering = txn.fetchone()[0] @@ -527,23 +535,53 @@ class EventPushActionsWorkerStore(SQLBaseStore): if max_stream_ordering is None: return 0 + # We want the first stream_ordering in which received_ts is greater + # than or equal to ts. Call this point X. + # + # We maintain the invariants: + # + # range_start <= X <= range_end + # range_start = 0 - range_end = max_stream_ordering + range_end = max_stream_ordering + 1 + # Given a stream_ordering, look up the timestamp at that + # stream_ordering. + # + # The array may be sparse (we may be missing some stream_orderings). + # We treat the gaps as the same as having the same value as the + # preceding entry, because we will pick the lowest stream_ordering + # which satisfies our requirement of received_ts >= ts. + # + # For example, if our array of events indexed by stream_ordering is + # [10, , 20], we should treat this as being equivalent to + # [10, 10, 20]. + # sql = ( "SELECT received_ts FROM events" - " WHERE stream_ordering > ?" - " ORDER BY stream_ordering" + " WHERE stream_ordering <= ?" + " ORDER BY stream_ordering DESC" " LIMIT 1" ) - while range_end - range_start > 1: - middle = int((range_end + range_start) / 2) + while range_end - range_start > 0: + middle = (range_end + range_start) // 2 txn.execute(sql, (middle,)) - middle_ts = txn.fetchone()[0] + row = txn.fetchone() + if row is None: + # no rows with stream_ordering<=middle + range_start = middle + 1 + continue + + middle_ts = row[0] if ts > middle_ts: - range_start = middle + # we got a timestamp lower than the one we were looking for. + # definitely need to look higher: X > middle. + range_start = middle + 1 else: + # we got a timestamp higher than (or the same as) the one we + # were looking for. We aren't yet sure about the point we + # looked up, but we can be sure that X <= middle. range_end = middle return range_end diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 6c1aad149..dbaaa12e2 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -127,3 +127,70 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): yield _assert_counts(1, 1) yield _rotate(10) yield _assert_counts(1, 1) + + @tests.unittest.DEBUG + @defer.inlineCallbacks + def test_find_first_stream_ordering_after_ts(self): + def add_event(so, ts): + return self.store._simple_insert("events", { + "stream_ordering": so, + "received_ts": ts, + "event_id": "event%i" % so, + "type": "", + "room_id": "", + "content": "", + "processed": True, + "outlier": False, + "topological_ordering": 0, + "depth": 0, + }) + + # start with the base case where there are no events in the table + r = yield self.store.find_first_stream_ordering_after_ts(11) + self.assertEqual(r, 0) + + # now with one event + yield add_event(2, 10) + r = yield self.store.find_first_stream_ordering_after_ts(9) + self.assertEqual(r, 2) + r = yield self.store.find_first_stream_ordering_after_ts(10) + self.assertEqual(r, 2) + r = yield self.store.find_first_stream_ordering_after_ts(11) + self.assertEqual(r, 3) + + # add a bunch of dummy events to the events table + for (stream_ordering, ts) in ( + (3, 110), + (4, 120), + (5, 120), + (10, 130), + (20, 140), + ): + yield add_event(stream_ordering, ts) + + r = yield self.store.find_first_stream_ordering_after_ts(110) + self.assertEqual(r, 3, + "First event after 110ms should be 3, was %i" % r) + + # 4 and 5 are both after 12: we want 4 rather than 5 + r = yield self.store.find_first_stream_ordering_after_ts(120) + self.assertEqual(r, 4, + "First event after 120ms should be 4, was %i" % r) + + r = yield self.store.find_first_stream_ordering_after_ts(129) + self.assertEqual(r, 10, + "First event after 129ms should be 10, was %i" % r) + + # check we can get the last event + r = yield self.store.find_first_stream_ordering_after_ts(140) + self.assertEqual(r, 20, + "First event after 14ms should be 20, was %i" % r) + + # off the end + r = yield self.store.find_first_stream_ordering_after_ts(160) + self.assertEqual(r, 21) + + # check we can find an event at ordering zero + yield add_event(0, 5) + r = yield self.store.find_first_stream_ordering_after_ts(1) + self.assertEqual(r, 0) From 2c911d75e820e321c3e9c885f74ffeabd666b60f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 5 Mar 2018 12:24:49 +0000 Subject: [PATCH 068/173] Fix comment typo --- tests/storage/test_event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index dbaaa12e2..575374c6a 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -172,7 +172,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): self.assertEqual(r, 3, "First event after 110ms should be 3, was %i" % r) - # 4 and 5 are both after 12: we want 4 rather than 5 + # 4 and 5 are both after 120: we want 4 rather than 5 r = yield self.store.find_first_stream_ordering_after_ts(120) self.assertEqual(r, 4, "First event after 120ms should be 4, was %i" % r) From f8bfcd7e0d2fc6399eb654a41773cd603b4037fc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Mar 2018 23:20:54 +0000 Subject: [PATCH 069/173] Provide a means to pass a timestamp to purge_history --- docs/admin_api/purge_history_api.rst | 11 +++++- synapse/handlers/message.py | 14 +++---- synapse/rest/client/v1/admin.py | 58 ++++++++++++++++++++++++++-- synapse/storage/stream.py | 27 +++++++++++++ 4 files changed, 96 insertions(+), 14 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index a3a17e9f9..acf1bc574 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -8,9 +8,9 @@ Depending on the amount of history being purged a call to the API may take several minutes or longer. During this period users will not be able to paginate further back in the room from the point being purged from. -The API is simply: +The API is: -``POST /_matrix/client/r0/admin/purge_history//`` +``POST /_matrix/client/r0/admin/purge_history/[/]`` including an ``access_token`` of a server admin. @@ -25,3 +25,10 @@ To delete local events as well, set ``delete_local_events`` in the body: { "delete_local_events": true } + +The caller must specify the point in the room to purge up to. This can be +specified by including an event_id in the URI, or by setting a +``purge_up_to_event_id`` or ``purge_up_to_ts`` in the request body. If an event +id is given, that event (and others at the same graph depth) will be retained. +If ``purge_up_to_ts`` is given, it should be a timestamp since the unix epoch, +in milliseconds. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 7d28c2745..dd00d8a86 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -52,16 +52,12 @@ class MessageHandler(BaseHandler): self.pagination_lock = ReadWriteLock() @defer.inlineCallbacks - def purge_history(self, room_id, event_id, delete_local_events=False): - event = yield self.store.get_event(event_id) - - if event.room_id != room_id: - raise SynapseError(400, "Event is for wrong room.") - - depth = event.depth - + def purge_history(self, room_id, topological_ordering, + delete_local_events=False): with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history(room_id, depth, delete_local_events) + yield self.store.purge_history( + room_id, topological_ordering, delete_local_events, + ) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 3917eee42..dcf6215da 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -17,7 +17,7 @@ from twisted.internet import defer from synapse.api.constants import Membership -from synapse.api.errors import AuthError, SynapseError +from synapse.api.errors import AuthError, SynapseError, Codes from synapse.types import UserID, create_requester from synapse.http.servlet import parse_json_object_from_request @@ -114,12 +114,18 @@ class PurgeMediaCacheRestServlet(ClientV1RestServlet): class PurgeHistoryRestServlet(ClientV1RestServlet): PATTERNS = client_path_patterns( - "/admin/purge_history/(?P[^/]*)/(?P[^/]*)" + "/admin/purge_history/(?P[^/]*)(/(?P[^/]+))?" ) def __init__(self, hs): + """ + + Args: + hs (synapse.server.HomeServer) + """ super(PurgeHistoryRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.store = hs.get_datastore() @defer.inlineCallbacks def on_POST(self, request, room_id, event_id): @@ -133,8 +139,54 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): delete_local_events = bool(body.get("delete_local_events", False)) + # establish the topological ordering we should keep events from. The + # user can provide an event_id in the URL or the request body, or can + # provide a timestamp in the request body. + if event_id is None: + event_id = body.get('purge_up_to_event_id') + + if event_id is not None: + event = yield self.store.get_event(event_id) + + if event.room_id != room_id: + raise SynapseError(400, "Event is for wrong room.") + + depth = event.depth + logger.info( + "[purge] purging up to depth %i (event_id %s)", + depth, event_id, + ) + elif 'purge_up_to_ts' in body: + ts = body['purge_up_to_ts'] + if not isinstance(ts, int): + raise SynapseError( + 400, "purge_up_to_ts must be an int", + errcode=Codes.BAD_JSON, + ) + + stream_ordering = ( + yield self.store.find_first_stream_ordering_after_ts(ts) + ) + + (_, depth, _) = ( + yield self.store.get_room_event_after_stream_ordering( + room_id, stream_ordering, + ) + ) + logger.info( + "[purge] purging up to depth %i (received_ts %i => " + "stream_ordering %i)", + depth, ts, stream_ordering, + ) + else: + raise SynapseError( + 400, + "must specify purge_up_to_event_id or purge_up_to_ts", + errcode=Codes.BAD_JSON, + ) + yield self.handlers.message_handler.purge_history( - room_id, event_id, + room_id, depth, delete_local_events=delete_local_events, ) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index a2527d2a3..515a04699 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -416,6 +416,33 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): "get_recent_events_for_room", get_recent_events_for_room_txn ) + def get_room_event_after_stream_ordering(self, room_id, stream_ordering): + """Gets details of the first event in a room at or after a stream ordering + + Args: + room_id (str): + stream_ordering (int): + + Returns: + Deferred[(int, int, str)]: + (stream ordering, topological ordering, event_id) + """ + def _f(txn): + sql = ( + "SELECT stream_ordering, topological_ordering, event_id" + " FROM events" + " WHERE room_id = ? AND stream_ordering >= ?" + " AND NOT outlier" + " ORDER BY stream_ordering" + " LIMIT 1" + ) + txn.execute(sql, (room_id, stream_ordering, )) + return txn.fetchone() + + return self.runInteraction( + "get_room_event_after_stream_ordering", _f, + ) + @defer.inlineCallbacks def get_room_events_max_id(self, room_id=None): """Returns the current token for rooms stream. From 2e223163ff59fa7d6030654dd8c74e58f8aa1deb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 15:11:30 +0000 Subject: [PATCH 070/173] Split Directory store --- .../replication/slave/storage/directory.py | 8 ++-- synapse/storage/directory.py | 45 ++++++++++--------- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/synapse/replication/slave/storage/directory.py b/synapse/replication/slave/storage/directory.py index 7301d885f..6deecd396 100644 --- a/synapse/replication/slave/storage/directory.py +++ b/synapse/replication/slave/storage/directory.py @@ -14,10 +14,8 @@ # limitations under the License. from ._base import BaseSlavedStore -from synapse.storage.directory import DirectoryStore +from synapse.storage.directory import DirectoryWorkerStore -class DirectoryStore(BaseSlavedStore): - get_aliases_for_room = DirectoryStore.__dict__[ - "get_aliases_for_room" - ] +class DirectoryStore(DirectoryWorkerStore, BaseSlavedStore): + pass diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 79e7c540a..041b0b457 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -29,8 +29,7 @@ RoomAliasMapping = namedtuple( ) -class DirectoryStore(SQLBaseStore): - +class DirectoryWorkerStore(SQLBaseStore): @defer.inlineCallbacks def get_association_from_room_alias(self, room_alias): """ Get's the room_id and server list for a given room_alias @@ -69,6 +68,28 @@ class DirectoryStore(SQLBaseStore): RoomAliasMapping(room_id, room_alias.to_string(), servers) ) + def get_room_alias_creator(self, room_alias): + return self._simple_select_one_onecol( + table="room_aliases", + keyvalues={ + "room_alias": room_alias, + }, + retcol="creator", + desc="get_room_alias_creator", + allow_none=True + ) + + @cached(max_entries=5000) + def get_aliases_for_room(self, room_id): + return self._simple_select_onecol( + "room_aliases", + {"room_id": room_id}, + "room_alias", + desc="get_aliases_for_room", + ) + + +class DirectoryStore(DirectoryWorkerStore): @defer.inlineCallbacks def create_room_alias_association(self, room_alias, room_id, servers, creator=None): """ Creates an associatin between a room alias and room_id/servers @@ -116,17 +137,6 @@ class DirectoryStore(SQLBaseStore): ) defer.returnValue(ret) - def get_room_alias_creator(self, room_alias): - return self._simple_select_one_onecol( - table="room_aliases", - keyvalues={ - "room_alias": room_alias, - }, - retcol="creator", - desc="get_room_alias_creator", - allow_none=True - ) - @defer.inlineCallbacks def delete_room_alias(self, room_alias): room_id = yield self.runInteraction( @@ -162,15 +172,6 @@ class DirectoryStore(SQLBaseStore): return room_id - @cached(max_entries=5000) - def get_aliases_for_room(self, room_id): - return self._simple_select_onecol( - "room_aliases", - {"room_id": room_id}, - "room_alias", - desc="get_aliases_for_room", - ) - def update_aliases_for_room(self, old_room_id, new_room_id, creator): def _update_aliases_for_room_txn(txn): sql = "UPDATE room_aliases SET room_id = ?, creator = ? WHERE room_id = ?" From 69ce365b793ac35c8e9ee6c3b3a0d04d0e61db4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 15:12:22 +0000 Subject: [PATCH 071/173] Fix cache invalidation on deletion --- synapse/storage/directory.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 041b0b457..d0c005975 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -145,7 +145,6 @@ class DirectoryStore(DirectoryWorkerStore): room_alias, ) - self.get_aliases_for_room.invalidate((room_id,)) defer.returnValue(room_id) def _delete_room_alias_txn(self, txn, room_alias): @@ -170,6 +169,10 @@ class DirectoryStore(DirectoryWorkerStore): (room_alias.to_string(),) ) + self._invalidate_cache_and_stream( + txn, self.get_aliases_for_room, (room_id,) + ) + return room_id def update_aliases_for_room(self, old_room_id, new_room_id, creator): From d4ffe61d4fb71953bff0f94ff5d1603afe7d46f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 15:42:57 +0000 Subject: [PATCH 072/173] Remove ability for AS users to call /events and /sync This functionality has been deprecated for a while as well as being broken for a while. Instead of fixing it lets just remove it entirely. See: https://github.com/matrix-org/matrix-doc/issues/1144 --- synapse/handlers/room.py | 9 ++-- synapse/handlers/sync.py | 10 +++-- synapse/storage/appservice.py | 82 +++-------------------------------- synapse/storage/stream.py | 76 -------------------------------- 4 files changed, 14 insertions(+), 163 deletions(-) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 6c425828c..8df8fcbba 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -475,12 +475,9 @@ class RoomEventSource(object): user.to_string() ) if app_service: - events, end_key = yield self.store.get_appservice_room_stream( - service=app_service, - from_key=from_key, - to_key=to_key, - limit=limit, - ) + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() else: room_events = yield self.store.get_membership_changes_for_user( user.to_string(), from_key, to_key diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b12988f3c..56b86356f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -998,8 +998,9 @@ class SyncHandler(object): app_service = self.store.get_app_service_by_user_id(user_id) if app_service: - rooms = yield self.store.get_app_service_rooms(app_service) - joined_room_ids = set(r.room_id for r in rooms) + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() else: joined_room_ids = yield self.store.get_rooms_for_user(user_id) @@ -1030,8 +1031,9 @@ class SyncHandler(object): app_service = self.store.get_app_service_by_user_id(user_id) if app_service: - rooms = yield self.store.get_app_service_rooms(app_service) - joined_room_ids = set(r.room_id for r in rooms) + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() else: joined_room_ids = yield self.store.get_rooms_for_user(user_id) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 90fb51d43..12ea8a158 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -18,11 +18,9 @@ import re import simplejson as json from twisted.internet import defer -from synapse.api.constants import Membership from synapse.appservice import AppServiceTransaction from synapse.config.appservice import load_appservices from synapse.storage.events import EventsWorkerStore -from synapse.storage.roommember import RoomsForUser from ._base import SQLBaseStore @@ -115,81 +113,11 @@ class ApplicationServiceWorkerStore(SQLBaseStore): class ApplicationServiceStore(ApplicationServiceWorkerStore): - - def __init__(self, db_conn, hs): - super(ApplicationServiceStore, self).__init__(db_conn, hs) - self.hostname = hs.hostname - - def get_app_service_rooms(self, service): - """Get a list of RoomsForUser for this application service. - - Application services may be "interested" in lots of rooms depending on - the room ID, the room aliases, or the members in the room. This function - takes all of these into account and returns a list of RoomsForUser which - represent the entire list of room IDs that this application service - wants to know about. - - Args: - service: The application service to get a room list for. - Returns: - A list of RoomsForUser. - """ - return self.runInteraction( - "get_app_service_rooms", - self._get_app_service_rooms_txn, - service, - ) - - def _get_app_service_rooms_txn(self, txn, service): - # get all rooms matching the room ID regex. - room_entries = self._simple_select_list_txn( - txn=txn, table="rooms", keyvalues=None, retcols=["room_id"] - ) - matching_room_list = set([ - r["room_id"] for r in room_entries if - service.is_interested_in_room(r["room_id"]) - ]) - - # resolve room IDs for matching room alias regex. - room_alias_mappings = self._simple_select_list_txn( - txn=txn, table="room_aliases", keyvalues=None, - retcols=["room_id", "room_alias"] - ) - matching_room_list |= set([ - r["room_id"] for r in room_alias_mappings if - service.is_interested_in_alias(r["room_alias"]) - ]) - - # get all rooms for every user for this AS. This is scoped to users on - # this HS only. - user_list = self._simple_select_list_txn( - txn=txn, table="users", keyvalues=None, retcols=["name"] - ) - user_list = [ - u["name"] for u in user_list if - service.is_interested_in_user(u["name"]) - ] - rooms_for_user_matching_user_id = set() # RoomsForUser list - for user_id in user_list: - # FIXME: This assumes this store is linked with RoomMemberStore :( - rooms_for_user = self._get_rooms_for_user_where_membership_is_txn( - txn=txn, - user_id=user_id, - membership_list=[Membership.JOIN] - ) - rooms_for_user_matching_user_id |= set(rooms_for_user) - - # make RoomsForUser tuples for room ids and aliases which are not in the - # main rooms_for_user_list - e.g. they are rooms which do not have AS - # registered users in it. - known_room_ids = [r.room_id for r in rooms_for_user_matching_user_id] - missing_rooms_for_user = [ - RoomsForUser(r, service.sender, "join") for r in - matching_room_list if r not in known_room_ids - ] - rooms_for_user_matching_user_id |= set(missing_rooms_for_user) - - return rooms_for_user_matching_user_id + # This is currently empty due to there not being any AS storage functions + # that can't be run on the workers. Since this may change in future, and + # to keep consistency with the other stores, we keep this empty class for + # now. + pass class ApplicationServiceTransactionWorkerStore(ApplicationServiceWorkerStore, diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index a2527d2a3..b78151cd8 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -39,7 +39,6 @@ from synapse.storage._base import SQLBaseStore from synapse.storage.events import EventsWorkerStore from synapse.util.caches.descriptors import cached -from synapse.api.constants import EventTypes from synapse.types import RoomStreamToken from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.logcontext import make_deferred_yieldable, preserve_fn @@ -717,81 +716,6 @@ class StreamStore(StreamWorkerStore): def get_room_min_stream_ordering(self): return self._backfill_id_gen.get_current_token() - @defer.inlineCallbacks - def get_appservice_room_stream(self, service, from_key, to_key, limit=0): - # NB this lives here instead of appservice.py so we can reuse the - # 'private' StreamToken class in this file. - if limit: - limit = max(limit, MAX_STREAM_SIZE) - else: - limit = MAX_STREAM_SIZE - - # From and to keys should be integers from ordering. - from_id = RoomStreamToken.parse_stream_token(from_key) - to_id = RoomStreamToken.parse_stream_token(to_key) - - if from_key == to_key: - defer.returnValue(([], to_key)) - return - - # select all the events between from/to with a sensible limit - sql = ( - "SELECT e.event_id, e.room_id, e.type, s.state_key, " - "e.stream_ordering FROM events AS e " - "LEFT JOIN state_events as s ON " - "e.event_id = s.event_id " - "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " - "ORDER BY stream_ordering ASC LIMIT %(limit)d " - ) % { - "limit": limit - } - - def f(txn): - # pull out all the events between the tokens - txn.execute(sql, (from_id.stream, to_id.stream,)) - rows = self.cursor_to_dict(txn) - - # Logic: - # - We want ALL events which match the AS room_id regex - # - We want ALL events which match the rooms represented by the AS - # room_alias regex - # - We want ALL events for rooms that AS users have joined. - # This is currently supported via get_app_service_rooms (which is - # used for the Notifier listener rooms). We can't reasonably make a - # SQL query for these room IDs, so we'll pull all the events between - # from/to and filter in python. - rooms_for_as = self._get_app_service_rooms_txn(txn, service) - room_ids_for_as = [r.room_id for r in rooms_for_as] - - def app_service_interested(row): - if row["room_id"] in room_ids_for_as: - return True - - if row["type"] == EventTypes.Member: - if service.is_interested_in_user(row.get("state_key")): - return True - return False - - return [r for r in rows if app_service_interested(r)] - - rows = yield self.runInteraction("get_appservice_room_stream", f) - - ret = yield self._get_events( - [r["event_id"] for r in rows], - get_prev_content=True - ) - - self._set_before_and_after(ret, rows, topo_order=from_id is None) - - if rows: - key = "s%d" % max(r["stream_ordering"] for r in rows) - else: - # Assume we didn't get anything because there was nothing to - # get. - key = to_key - - defer.returnValue((ret, key)) - @defer.inlineCallbacks def paginate_room_events(self, room_id, from_key, to_key=None, direction='b', limit=-1, event_filter=None): From ed9b5eced4f17dfb0a92167a6281e13054821d6f Mon Sep 17 00:00:00 2001 From: Krombel Date: Mon, 5 Mar 2018 17:51:09 +0100 Subject: [PATCH 073/173] use bcrypt.checkpw in bcrypt 3.1.0 checkpw got introduced (already 2 years ago) This makes use of that with enhancements which might get introduced by that Signed-Off-by: Matthias Kesler --- synapse/handlers/auth.py | 6 ++++-- synapse/python_dependencies.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 258cc345d..a5365c4fe 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -863,8 +863,10 @@ class AuthHandler(BaseHandler): """ def _do_validate_hash(): - return bcrypt.hashpw(password.encode('utf8') + self.hs.config.password_pepper, - stored_hash.encode('utf8')) == stored_hash + return bcrypt.checkpw( + password.encode('utf8') + self.hs.config.password_pepper, + stored_hash.encode('utf8') + ) if stored_hash: return make_deferred_yieldable(threads.deferToThread(_do_validate_hash)) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 5d65b5fd6..91179ce53 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -31,7 +31,7 @@ REQUIREMENTS = { "pyyaml": ["yaml"], "pyasn1": ["pyasn1"], "daemonize": ["daemonize"], - "bcrypt": ["bcrypt"], + "bcrypt": ["bcrypt>=3.1.0"], "pillow": ["PIL"], "pydenticon": ["pydenticon"], "ujson": ["ujson"], From 8cb44da4aa569188faa2a94aae6bc093aa8e22ec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 12:06:19 +0000 Subject: [PATCH 074/173] Fix race in sync when joining room The race happens when the user joins a room at the same time as doing a sync. We fetch the current token and then get the rooms the user is in. If the join happens after the current token, but before we get the rooms we end up sending down a partial room entry in the sync. This is fixed by looking at the stream ordering of the membership returned by get_rooms_for_user, and handling the case when that stream ordering is after the current token. --- synapse/handlers/sync.py | 103 +++++++++++++++++++++++++--------- synapse/storage/events.py | 2 +- synapse/storage/roommember.py | 27 ++++++++- 3 files changed, 102 insertions(+), 30 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 56b86356f..163d80417 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -235,10 +235,10 @@ class SyncHandler(object): defer.returnValue(rules) @defer.inlineCallbacks - def ephemeral_by_room(self, sync_config, now_token, since_token=None): + def ephemeral_by_room(self, sync_result_builder, now_token, since_token=None): """Get the ephemeral events for each room the user is in Args: - sync_config (SyncConfig): The flags, filters and user for the sync. + sync_result_builder(SyncResultBuilder) now_token (StreamToken): Where the server is currently up to. since_token (StreamToken): Where the server was when the client last synced. @@ -248,10 +248,12 @@ class SyncHandler(object): typing events for that room. """ + sync_config = sync_result_builder.sync_config + with Measure(self.clock, "ephemeral_by_room"): typing_key = since_token.typing_key if since_token else "0" - room_ids = yield self.store.get_rooms_for_user(sync_config.user.to_string()) + room_ids = sync_result_builder.joined_room_ids typing_source = self.event_sources.sources["typing"] typing, typing_key = yield typing_source.get_new_events( @@ -565,10 +567,22 @@ class SyncHandler(object): # Always use the `now_token` in `SyncResultBuilder` now_token = yield self.event_sources.get_current_token() + user_id = sync_config.user.to_string() + app_service = self.store.get_app_service_by_user_id(user_id) + if app_service: + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() + else: + joined_room_ids = yield self.get_rooms_for_user_at( + user_id, now_token.room_stream_id, + ) + sync_result_builder = SyncResultBuilder( sync_config, full_state, since_token=since_token, now_token=now_token, + joined_room_ids=joined_room_ids, ) account_data_by_room = yield self._generate_sync_entry_for_account_data( @@ -603,7 +617,6 @@ class SyncHandler(object): device_id = sync_config.device_id one_time_key_counts = {} if device_id: - user_id = sync_config.user.to_string() one_time_key_counts = yield self.store.count_e2e_one_time_keys( user_id, device_id ) @@ -891,7 +904,7 @@ class SyncHandler(object): ephemeral_by_room = {} else: now_token, ephemeral_by_room = yield self.ephemeral_by_room( - sync_result_builder.sync_config, + sync_result_builder, now_token=sync_result_builder.now_token, since_token=sync_result_builder.since_token, ) @@ -996,16 +1009,8 @@ class SyncHandler(object): if rooms_changed: defer.returnValue(True) - app_service = self.store.get_app_service_by_user_id(user_id) - if app_service: - # We no longer support AS users using /sync directly. - # See https://github.com/matrix-org/matrix-doc/issues/1144 - raise NotImplementedError() - else: - joined_room_ids = yield self.store.get_rooms_for_user(user_id) - stream_id = RoomStreamToken.parse_stream_token(since_token.room_key).stream - for room_id in joined_room_ids: + for room_id in sync_result_builder.joined_room_ids: if self.store.has_room_changed_since(room_id, stream_id): defer.returnValue(True) defer.returnValue(False) @@ -1029,14 +1034,6 @@ class SyncHandler(object): assert since_token - app_service = self.store.get_app_service_by_user_id(user_id) - if app_service: - # We no longer support AS users using /sync directly. - # See https://github.com/matrix-org/matrix-doc/issues/1144 - raise NotImplementedError() - else: - joined_room_ids = yield self.store.get_rooms_for_user(user_id) - # Get a list of membership change events that have happened. rooms_changed = yield self.store.get_membership_changes_for_user( user_id, since_token.room_key, now_token.room_key @@ -1059,7 +1056,7 @@ class SyncHandler(object): # we do send down the room, and with full state, where necessary old_state_ids = None - if room_id in joined_room_ids and non_joins: + if room_id in sync_result_builder.joined_room_ids and non_joins: # Always include if the user (re)joined the room, especially # important so that device list changes are calculated correctly. # If there are non join member events, but we are still in the room, @@ -1069,7 +1066,7 @@ class SyncHandler(object): # User is in the room so we don't need to do the invite/leave checks continue - if room_id in joined_room_ids or has_join: + if room_id in sync_result_builder.joined_room_ids or has_join: old_state_ids = yield self.get_state_at(room_id, since_token) old_mem_ev_id = old_state_ids.get((EventTypes.Member, user_id), None) old_mem_ev = None @@ -1081,7 +1078,7 @@ class SyncHandler(object): newly_joined_rooms.append(room_id) # If user is in the room then we don't need to do the invite/leave checks - if room_id in joined_room_ids: + if room_id in sync_result_builder.joined_room_ids: continue if not non_joins: @@ -1148,7 +1145,7 @@ class SyncHandler(object): # Get all events for rooms we're currently joined to. room_to_events = yield self.store.get_room_events_stream_for_rooms( - room_ids=joined_room_ids, + room_ids=sync_result_builder.joined_room_ids, from_key=since_token.room_key, to_key=now_token.room_key, limit=timeline_limit + 1, @@ -1156,7 +1153,7 @@ class SyncHandler(object): # We loop through all room ids, even if there are no new events, in case # there are non room events taht we need to notify about. - for room_id in joined_room_ids: + for room_id in sync_result_builder.joined_room_ids: room_entry = room_to_events.get(room_id, None) if room_entry: @@ -1364,6 +1361,54 @@ class SyncHandler(object): else: raise Exception("Unrecognized rtype: %r", room_builder.rtype) + @defer.inlineCallbacks + def get_rooms_for_user_at(self, user_id, stream_ordering): + """Get set of joined rooms for a user at the given stream ordering. + + The stream ordering *must* be recent, otherwise this may throw an + exception if older than a month. (This function is called with the + current token, which should be perfectly fine). + + Args: + user_id (str) + stream_ordering (int) + + ReturnValue: + Deferred[frozenset[str]]: Set of room_ids the user is in at given + stream_ordering. + """ + joined_rooms = yield self.store.get_rooms_for_user_with_stream_ordering( + user_id, + ) + + joined_room_ids = set() + + # We need to check that the stream ordering of the join for each room + # is before the stream_ordering asked for. This might not be the case + # if the user joins a room between us getting the current token and + # calling `get_rooms_for_user_with_stream_ordering`. + # If the membership's stream ordering is after the given stream + # ordering, we need to go and work out if the user was in the room + # before. + for room_id, membeship_stream_ordering in joined_rooms: + if membeship_stream_ordering <= stream_ordering: + joined_room_ids.add(room_id) + continue + + logger.info("SH joined_room_ids membership after current token") + + extrems = yield self.store.get_forward_extremeties_for_room( + room_id, stream_ordering, + ) + users_in_room = yield self.state.get_current_user_in_room( + room_id, extrems, + ) + if user_id in users_in_room: + joined_room_ids.add(room_id) + + joined_room_ids = frozenset(joined_room_ids) + defer.returnValue(joined_room_ids) + def _action_has_highlight(actions): for action in actions: @@ -1413,7 +1458,8 @@ def _calculate_state(timeline_contains, timeline_start, previous, current): class SyncResultBuilder(object): "Used to help build up a new SyncResult for a user" - def __init__(self, sync_config, full_state, since_token, now_token): + def __init__(self, sync_config, full_state, since_token, now_token, + joined_room_ids): """ Args: sync_config(SyncConfig) @@ -1425,6 +1471,7 @@ class SyncResultBuilder(object): self.full_state = full_state self.since_token = since_token self.now_token = now_token + self.joined_room_ids = joined_room_ids self.presence = [] self.account_data = [] diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 057b1be4d..826fad307 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -754,7 +754,7 @@ class EventsStore(EventsWorkerStore): for member in members_changed: self._invalidate_cache_and_stream( - txn, self.get_rooms_for_user, (member,) + txn, self.get_rooms_for_user_with_stream_ordering, (member,) ) for host in set(get_domain_from_id(u) for u in members_changed): diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index d79877dac..52e19e16b 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -38,6 +38,11 @@ RoomsForUser = namedtuple( ("room_id", "sender", "membership", "event_id", "stream_ordering") ) +GetRoomsForUserWithStreamOrdering = namedtuple( + "_GetRoomsForUserWithStreamOrdering", + ("room_id", "stream_ordering",) +) + # We store this using a namedtuple so that we save about 3x space over using a # dict. @@ -181,12 +186,32 @@ class RoomMemberWorkerStore(EventsWorkerStore): return results @cachedInlineCallbacks(max_entries=500000, iterable=True) - def get_rooms_for_user(self, user_id): + def get_rooms_for_user_with_stream_ordering(self, user_id): """Returns a set of room_ids the user is currently joined to + + Args: + user_id (str) + + Returns: + Deferred[frozenset[GetRoomsForUserWithStreamOrdering]]: Returns + the rooms the user is in currently, along with the stream ordering + of the most recent join for that user and room. """ rooms = yield self.get_rooms_for_user_where_membership_is( user_id, membership_list=[Membership.JOIN], ) + defer.returnValue(frozenset( + GetRoomsForUserWithStreamOrdering(r.room_id, r.stream_ordering) + for r in rooms + )) + + @defer.inlineCallbacks + def get_rooms_for_user(self, user_id, on_invalidate=None): + """Returns a set of room_ids the user is currently joined to + """ + rooms = yield self.get_rooms_for_user_with_stream_ordering( + user_id, on_invalidate=on_invalidate, + ) defer.returnValue(frozenset(r.room_id for r in rooms)) @cachedInlineCallbacks(max_entries=500000, cache_context=True, iterable=True) From 02a1296ad634ff8200abe539d27a53a6f850081d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 13:12:08 +0000 Subject: [PATCH 075/173] Fix typo --- synapse/handlers/sync.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 163d80417..b323e0e6b 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1390,8 +1390,8 @@ class SyncHandler(object): # If the membership's stream ordering is after the given stream # ordering, we need to go and work out if the user was in the room # before. - for room_id, membeship_stream_ordering in joined_rooms: - if membeship_stream_ordering <= stream_ordering: + for room_id, membership_stream_ordering in joined_rooms: + if membership_stream_ordering <= stream_ordering: joined_room_ids.add(room_id) continue From a56d54dcb7d3d42fc417d6af82a46d86edf6f73d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 13:29:49 +0000 Subject: [PATCH 076/173] Fix up log message --- synapse/handlers/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b323e0e6b..0f713ce03 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1395,7 +1395,7 @@ class SyncHandler(object): joined_room_ids.add(room_id) continue - logger.info("SH joined_room_ids membership after current token") + logger.info("User joined room after current token: %s", room_id) extrems = yield self.store.get_forward_extremeties_for_room( room_id, stream_ordering, From 20f40348d4ea55cc5b98528673e26bac7396a3cb Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Mar 2018 19:59:24 +0000 Subject: [PATCH 077/173] Factor run_in_background out from preserve_fn It annoys me that we create temporary function objects when there's really no need for it. Let's factor the gubbins out of preserve_fn and start using it. --- docs/log_contexts.rst | 8 +++--- synapse/util/logcontext.py | 51 +++++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/docs/log_contexts.rst b/docs/log_contexts.rst index b19b7fa1e..82ac4f91e 100644 --- a/docs/log_contexts.rst +++ b/docs/log_contexts.rst @@ -279,9 +279,9 @@ Obviously that option means that the operations done in that might be fixed by setting a different logcontext via a ``with LoggingContext(...)`` in ``background_operation``). -The second option is to use ``logcontext.preserve_fn``, which wraps a function -so that it doesn't reset the logcontext even when it returns an incomplete -deferred, and adds a callback to the returned deferred to reset the +The second option is to use ``logcontext.run_in_background``, which wraps a +function so that it doesn't reset the logcontext even when it returns an +incomplete deferred, and adds a callback to the returned deferred to reset the logcontext. In other words, it turns a function that follows the Synapse rules about logcontexts and Deferreds into one which behaves more like an external function — the opposite operation to that described in the previous section. @@ -293,7 +293,7 @@ It can be used like this: def do_request_handling(): yield foreground_operation() - logcontext.preserve_fn(background_operation)() + logcontext.run_in_background(background_operation) # this will now be logged against the request context logger.debug("Request handling complete") diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index a8dea15c1..d660ec785 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -292,36 +292,41 @@ class PreserveLoggingContext(object): def preserve_fn(f): - """Wraps a function, to ensure that the current context is restored after + """Function decorator which wraps the function with run_in_background""" + def g(*args, **kwargs): + return run_in_background(f, *args, **kwargs) + return g + + +def run_in_background(f, *args, **kwargs): + """Calls a function, ensuring that the current context is restored after return from the function, and that the sentinel context is set once the deferred returned by the funtion completes. Useful for wrapping functions that return a deferred which you don't yield on. """ - def g(*args, **kwargs): - current = LoggingContext.current_context() - res = f(*args, **kwargs) - if isinstance(res, defer.Deferred) and not res.called: - # The function will have reset the context before returning, so - # we need to restore it now. - LoggingContext.set_current_context(current) + current = LoggingContext.current_context() + res = f(*args, **kwargs) + if isinstance(res, defer.Deferred) and not res.called: + # The function will have reset the context before returning, so + # we need to restore it now. + LoggingContext.set_current_context(current) - # The original context will be restored when the deferred - # completes, but there is nothing waiting for it, so it will - # get leaked into the reactor or some other function which - # wasn't expecting it. We therefore need to reset the context - # here. - # - # (If this feels asymmetric, consider it this way: we are - # effectively forking a new thread of execution. We are - # probably currently within a ``with LoggingContext()`` block, - # which is supposed to have a single entry and exit point. But - # by spawning off another deferred, we are effectively - # adding a new exit point.) - res.addBoth(_set_context_cb, LoggingContext.sentinel) - return res - return g + # The original context will be restored when the deferred + # completes, but there is nothing waiting for it, so it will + # get leaked into the reactor or some other function which + # wasn't expecting it. We therefore need to reset the context + # here. + # + # (If this feels asymmetric, consider it this way: we are + # effectively forking a new thread of execution. We are + # probably currently within a ``with LoggingContext()`` block, + # which is supposed to have a single entry and exit point. But + # by spawning off another deferred, we are effectively + # adding a new exit point.) + res.addBoth(_set_context_cb, LoggingContext.sentinel) + return res def make_deferred_yieldable(deferred): From dbe80a286b85f9427763344c260921745c2ca78d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Mar 2018 16:17:27 +0000 Subject: [PATCH 078/173] refactor JsonResource rephrase the OPTIONS and unrecognised request handling so that they look similar to the common flow. --- synapse/http/server.py | 84 ++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 165c684d0..d774476e5 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -276,49 +277,54 @@ class JsonResource(HttpServer, resource.Resource): This checks if anyone has registered a callback for that method and path. """ + callback, group_dict = self._get_handler_for_request(request) + + servlet_instance = getattr(callback, "__self__", None) + if servlet_instance is not None: + servlet_classname = servlet_instance.__class__.__name__ + else: + servlet_classname = "%r" % callback + + request_metrics.name = servlet_classname + + # Now trigger the callback. If it returns a response, we send it + # here. If it throws an exception, that is handled by the wrapper + # installed by @request_handler. + + kwargs = intern_dict({ + name: urllib.unquote(value).decode("UTF-8") if value else value + for name, value in group_dict.items() + }) + + callback_return = yield callback(request, **kwargs) + if callback_return is not None: + code, response = callback_return + self._send_response(request, code, response) + + def _get_handler_for_request(self, request): + """Finds a callback method to handle the given request + + Args: + request (twisted.web.http.Request): + + Returns: + Tuple[Callable, dict[str, str]]: callback method, and the dict + mapping keys to path components as specified in the handler's + path match regexp + """ if request.method == "OPTIONS": - self._send_response(request, 200, {}) - return + return _options_handler, {} # Loop through all the registered callbacks to check if the method # and path regex match for path_entry in self.path_regexs.get(request.method, []): m = path_entry.pattern.match(request.path) - if not m: - continue - - # We found a match! First update the metrics object to indicate - # which servlet is handling the request. - - callback = path_entry.callback - - servlet_instance = getattr(callback, "__self__", None) - if servlet_instance is not None: - servlet_classname = servlet_instance.__class__.__name__ - else: - servlet_classname = "%r" % callback - - request_metrics.name = servlet_classname - - # Now trigger the callback. If it returns a response, we send it - # here. If it throws an exception, that is handled by the wrapper - # installed by @request_handler. - - kwargs = intern_dict({ - name: urllib.unquote(value).decode("UTF-8") if value else value - for name, value in m.groupdict().items() - }) - - callback_return = yield callback(request, **kwargs) - if callback_return is not None: - code, response = callback_return - self._send_response(request, code, response) - - return + if m: + # We found a match! + return path_entry.callback, m.groupdict() # Huh. No one wanted to handle that? Fiiiiiine. Send 400. - request_metrics.name = self.__class__.__name__ + ".UnrecognizedRequest" - raise UnrecognizedRequestError() + return _unrecognised_request_handler, {} def _send_response(self, request, code, response_json_object, response_code_message=None): @@ -335,6 +341,14 @@ class JsonResource(HttpServer, resource.Resource): ) +def _options_handler(request): + return {} + + +def _unrecognised_request_handler(request): + raise UnrecognizedRequestError() + + class RequestMetrics(object): def start(self, clock, name): self.start = clock.time_msec() From 88541f9009a7ca39c85cac7483d6a240ef497d33 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Mar 2018 16:19:18 +0000 Subject: [PATCH 079/173] Add a metric which increments when a request is received It's useful to know when there are peaks in incoming requests - which isn't quite the same as there being peaks in outgoing responses, due to the time taken to handle requests. --- synapse/http/server.py | 12 ++++++++++-- synapse/metrics/__init__.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index d774476e5..6c5d8bb55 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -60,6 +60,11 @@ response_count = metrics.register_counter( ) ) +requests_counter = metrics.register_counter( + "requests_received", + labels=["method", "servlet", ], +) + outgoing_responses_counter = metrics.register_counter( "responses", labels=["method", "code"], @@ -146,7 +151,8 @@ def wrap_request_handler(request_handler, include_metrics=False): # at the servlet name. For most requests that name will be # JsonResource (or a subclass), and JsonResource._async_render # will update it once it picks a servlet. - request_metrics.start(self.clock, name=self.__class__.__name__) + servlet_name = self.__class__.__name__ + request_metrics.start(self.clock, name=servlet_name) request_context.request = request_id with request.processing(): @@ -155,6 +161,7 @@ def wrap_request_handler(request_handler, include_metrics=False): if include_metrics: yield request_handler(self, request, request_metrics) else: + requests_counter.inc(request.method, servlet_name) yield request_handler(self, request) except CodeMessageException as e: code = e.code @@ -286,6 +293,7 @@ class JsonResource(HttpServer, resource.Resource): servlet_classname = "%r" % callback request_metrics.name = servlet_classname + requests_counter.inc(request.method, servlet_classname) # Now trigger the callback. If it returns a response, we send it # here. If it throws an exception, that is handled by the wrapper @@ -342,7 +350,7 @@ class JsonResource(HttpServer, resource.Resource): def _options_handler(request): - return {} + return 200, {} def _unrecognised_request_handler(request): diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index e0cfb7d08..50d99d7a5 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -57,15 +57,31 @@ class Metrics(object): return metric def register_counter(self, *args, **kwargs): + """ + Returns: + CounterMetric + """ return self._register(CounterMetric, *args, **kwargs) def register_callback(self, *args, **kwargs): + """ + Returns: + CallbackMetric + """ return self._register(CallbackMetric, *args, **kwargs) def register_distribution(self, *args, **kwargs): + """ + Returns: + DistributionMetric + """ return self._register(DistributionMetric, *args, **kwargs) def register_cache(self, *args, **kwargs): + """ + Returns: + CacheMetric + """ return self._register(CacheMetric, *args, **kwargs) From 58dd148c4f67e1cb6b150bf43a437b33089cfe5e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Mar 2018 18:05:41 +0000 Subject: [PATCH 080/173] Add some docstrings to help figure this out --- synapse/http/server.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 6c5d8bb55..4b567215c 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -237,7 +237,7 @@ class JsonResource(HttpServer, resource.Resource): """ This implements the HttpServer interface and provides JSON support for Resources. - Register callbacks via register_path() + Register callbacks via register_paths() Callbacks can return a tuple of status code and a dict in which case the the dict will automatically be sent to the client as a JSON object. @@ -318,7 +318,11 @@ class JsonResource(HttpServer, resource.Resource): Returns: Tuple[Callable, dict[str, str]]: callback method, and the dict mapping keys to path components as specified in the handler's - path match regexp + path match regexp. + + The callback will normally be a method registered via + register_paths, so will return (possibly via Deferred) either + None, or a tuple of (http code, response body). """ if request.method == "OPTIONS": return _options_handler, {} @@ -350,10 +354,30 @@ class JsonResource(HttpServer, resource.Resource): def _options_handler(request): + """Request handler for OPTIONS requests + + This is a request handler suitable for return from + _get_handler_for_request. It returns a 200 and an empty body. + + Args: + request (twisted.web.http.Request): + + Returns: + Tuple[int, dict]: http code, response body. + """ return 200, {} def _unrecognised_request_handler(request): + """Request handler for unrecognised requests + + This is a request handler suitable for return from + _get_handler_for_request. It actually just raises an + UnrecognizedRequestError. + + Args: + request (twisted.web.http.Request): + """ raise UnrecognizedRequestError() From 1708412f569dc28931a3704d679b41b92ac788b9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Mar 2018 17:32:46 +0000 Subject: [PATCH 081/173] Return an error when doing two purges on a room Queuing up purges doesn't sound like a good thing. --- synapse/handlers/message.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index dd00d8a86..6eb8d19dc 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -50,15 +50,26 @@ class MessageHandler(BaseHandler): self.clock = hs.get_clock() self.pagination_lock = ReadWriteLock() + self._purges_in_progress_by_room = set() @defer.inlineCallbacks def purge_history(self, room_id, topological_ordering, delete_local_events=False): - with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history( - room_id, topological_ordering, delete_local_events, + if room_id in self._purges_in_progress_by_room: + raise SynapseError( + 400, + "History purge already in progress for %s" % (room_id, ), ) + self._purges_in_progress_by_room.add(room_id) + try: + with (yield self.pagination_lock.write(room_id)): + yield self.store.purge_history( + room_id, topological_ordering, delete_local_events, + ) + finally: + self._purges_in_progress_by_room.discard(room_id) + @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, as_client_event=True, event_filter=None): From e48c7aac4d827b66182adf80ab9804f42db186c9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 8 Mar 2018 11:47:28 +0000 Subject: [PATCH 082/173] Add transactional API to history purge Make the purge request return quickly, and allow scripts to poll for updates. --- docs/admin_api/purge_history_api.rst | 27 +++++++ synapse/handlers/message.py | 104 +++++++++++++++++++++++++-- synapse/rest/client/v1/admin.py | 38 +++++++++- 3 files changed, 161 insertions(+), 8 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index acf1bc574..ea2922da5 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -32,3 +32,30 @@ specified by including an event_id in the URI, or by setting a id is given, that event (and others at the same graph depth) will be retained. If ``purge_up_to_ts`` is given, it should be a timestamp since the unix epoch, in milliseconds. + +The API starts the purge running, and returns immediately with a JSON body with +a purge id: + +.. code:: json + + { + "purge_id": "" + } + +Purge status query +------------------ + +It is possible to poll for updates on recent purges with a second API; + +``GET /_matrix/client/r0/admin/purge_history_status/`` + +(again, with a suitable ``access_token``). This API returns a JSON body like +the following: + +.. code:: json + + { + "status": "active" + } + +The status will be one of ``active``, ``complete``, or ``failed``. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 6eb8d19dc..42aab91c5 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer +from twisted.internet import defer, reactor +from twisted.python.failure import Failure from synapse.api.constants import EventTypes, Membership from synapse.api.errors import AuthError, Codes, SynapseError @@ -24,9 +25,10 @@ from synapse.types import ( UserID, RoomAlias, RoomStreamToken, ) from synapse.util.async import run_on_reactor, ReadWriteLock, Limiter -from synapse.util.logcontext import preserve_fn +from synapse.util.logcontext import preserve_fn, run_in_background from synapse.util.metrics import measure_func from synapse.util.frozenutils import unfreeze +from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client from synapse.replication.http.send_event import send_event_to_master @@ -41,6 +43,36 @@ import ujson logger = logging.getLogger(__name__) +class PurgeStatus(object): + """Object tracking the status of a purge request + + This class contains information on the progress of a purge request, for + return by get_purge_status. + + Attributes: + status (int): Tracks whether this request has completed. One of + STATUS_{ACTIVE,COMPLETE,FAILED} + """ + + STATUS_ACTIVE = 0 + STATUS_COMPLETE = 1 + STATUS_FAILED = 2 + + STATUS_TEXT = { + STATUS_ACTIVE: "active", + STATUS_COMPLETE: "complete", + STATUS_FAILED: "failed", + } + + def __init__(self): + self.status = PurgeStatus.STATUS_ACTIVE + + def asdict(self): + return { + "status": PurgeStatus.STATUS_TEXT[self.status] + } + + class MessageHandler(BaseHandler): def __init__(self, hs): @@ -51,25 +83,87 @@ class MessageHandler(BaseHandler): self.pagination_lock = ReadWriteLock() self._purges_in_progress_by_room = set() + # map from purge id to PurgeStatus + self._purges_by_id = {} - @defer.inlineCallbacks - def purge_history(self, room_id, topological_ordering, - delete_local_events=False): + def start_purge_history(self, room_id, topological_ordering, + delete_local_events=False): + """Start off a history purge on a room. + + Args: + room_id (str): The room to purge from + + topological_ordering (int): minimum topo ordering to preserve + delete_local_events (bool): True to delete local events as well as + remote ones + + Returns: + str: unique ID for this purge transaction. + """ if room_id in self._purges_in_progress_by_room: raise SynapseError( 400, "History purge already in progress for %s" % (room_id, ), ) + purge_id = random_string(16) + + # we log the purge_id here so that it can be tied back to the + # request id in the log lines. + logger.info("[purge] starting purge_id %s", purge_id) + + self._purges_by_id[purge_id] = PurgeStatus() + run_in_background( + self._purge_history, + purge_id, room_id, topological_ordering, delete_local_events, + ) + return purge_id + + @defer.inlineCallbacks + def _purge_history(self, purge_id, room_id, topological_ordering, + delete_local_events): + """Carry out a history purge on a room. + + Args: + purge_id (str): The id for this purge + room_id (str): The room to purge from + topological_ordering (int): minimum topo ordering to preserve + delete_local_events (bool): True to delete local events as well as + remote ones + + Returns: + Deferred + """ self._purges_in_progress_by_room.add(room_id) try: with (yield self.pagination_lock.write(room_id)): yield self.store.purge_history( room_id, topological_ordering, delete_local_events, ) + logger.info("[purge] complete") + self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE + except Exception: + logger.error("[purge] failed: %s", Failure().getTraceback().rstrip()) + self._purges_by_id[purge_id].status = PurgeStatus.STATUS_FAILED finally: self._purges_in_progress_by_room.discard(room_id) + # remove the purge from the list 24 hours after it completes + def clear_purge(): + del self._purges_by_id[purge_id] + reactor.callLater(24 * 3600, clear_purge) + + def get_purge_status(self, purge_id): + """Get the current status of an active purge + + Args: + purge_id (str): purge_id returned by start_purge_history + + Returns: + PurgeStatus|None + """ + return self._purges_by_id.get(purge_id) + @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, as_client_event=True, event_filter=None): diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index dcf6215da..303419d28 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -17,7 +17,7 @@ from twisted.internet import defer from synapse.api.constants import Membership -from synapse.api.errors import AuthError, SynapseError, Codes +from synapse.api.errors import AuthError, SynapseError, Codes, NotFoundError from synapse.types import UserID, create_requester from synapse.http.servlet import parse_json_object_from_request @@ -185,12 +185,43 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): errcode=Codes.BAD_JSON, ) - yield self.handlers.message_handler.purge_history( + purge_id = yield self.handlers.message_handler.start_purge_history( room_id, depth, delete_local_events=delete_local_events, ) - defer.returnValue((200, {})) + defer.returnValue((200, { + "purge_id": purge_id, + })) + + +class PurgeHistoryStatusRestServlet(ClientV1RestServlet): + PATTERNS = client_path_patterns( + "/admin/purge_history_status/(?P[^/]+)" + ) + + def __init__(self, hs): + """ + + Args: + hs (synapse.server.HomeServer) + """ + super(PurgeHistoryStatusRestServlet, self).__init__(hs) + self.handlers = hs.get_handlers() + + @defer.inlineCallbacks + def on_GET(self, request, purge_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + + if not is_admin: + raise AuthError(403, "You are not a server admin") + + purge_status = self.handlers.message_handler.get_purge_status(purge_id) + if purge_status is None: + raise NotFoundError("purge id '%s' not found" % purge_id) + + defer.returnValue((200, purge_status.asdict())) class DeactivateAccountRestServlet(ClientV1RestServlet): @@ -561,6 +592,7 @@ class SearchUsersRestServlet(ClientV1RestServlet): def register_servlets(hs, http_server): WhoisRestServlet(hs).register(http_server) PurgeMediaCacheRestServlet(hs).register(http_server) + PurgeHistoryStatusRestServlet(hs).register(http_server) DeactivateAccountRestServlet(hs).register(http_server) PurgeHistoryRestServlet(hs).register(http_server) UsersRestServlet(hs).register(http_server) From 889a2a853a83057d4f218bb828bf686db786d590 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 13 Mar 2018 09:57:54 +0000 Subject: [PATCH 083/173] Add Measure block for persist_events This seems like a useful thing to measure. --- synapse/storage/events.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 826fad307..389087817 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -283,10 +283,11 @@ class EventsStore(EventsWorkerStore): def _maybe_start_persisting(self, room_id): @defer.inlineCallbacks def persisting_queue(item): - yield self._persist_events( - item.events_and_contexts, - backfilled=item.backfilled, - ) + with Measure(self._clock, "persist_events"): + yield self._persist_events( + item.events_and_contexts, + backfilled=item.backfilled, + ) self._event_persist_queue.handle_queue(room_id, persisting_queue) From c3f79c9da56931453ab86a4c726da5a02f18fe1e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Mar 2018 16:17:08 +0000 Subject: [PATCH 084/173] Split out edu/query registration to a separate class --- synapse/federation/federation_server.py | 117 ++++++++++++++---------- synapse/handlers/device.py | 6 +- synapse/handlers/devicemessage.py | 2 +- synapse/handlers/directory.py | 2 +- synapse/handlers/e2e_keys.py | 2 +- synapse/handlers/presence.py | 10 +- synapse/handlers/profile.py | 2 +- synapse/handlers/receipts.py | 2 +- synapse/handlers/typing.py | 2 +- synapse/server.py | 5 + 10 files changed, 90 insertions(+), 60 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 9849953c9..5b1914f2f 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -17,7 +17,7 @@ import logging import simplejson as json from twisted.internet import defer -from synapse.api.errors import AuthError, FederationError, SynapseError +from synapse.api.errors import AuthError, FederationError, SynapseError, NotFoundError from synapse.crypto.event_signing import compute_event_signature from synapse.federation.federation_base import ( FederationBase, @@ -56,6 +56,8 @@ class FederationServer(FederationBase): self._server_linearizer = async.Linearizer("fed_server") self._transaction_linearizer = async.Linearizer("fed_txn_handler") + self.registry = hs.get_federation_registry() + # We cache responses to state queries, as they take a while and often # come in waves. self._state_resp_cache = ResponseCache(hs, timeout_ms=30000) @@ -67,35 +69,6 @@ class FederationServer(FederationBase): """ self.handler = handler - def register_edu_handler(self, edu_type, handler): - if edu_type in self.edu_handlers: - raise KeyError("Already have an EDU handler for %s" % (edu_type,)) - - self.edu_handlers[edu_type] = handler - - def register_query_handler(self, query_type, handler): - """Sets the handler callable that will be used to handle an incoming - federation Query of the given type. - - Args: - query_type (str): Category name of the query, which should match - the string used by make_query. - handler (callable): Invoked to handle incoming queries of this type - - handler is invoked as: - result = handler(args) - - where 'args' is a dict mapping strings to strings of the query - arguments. It should return a Deferred that will eventually yield an - object to encode as JSON. - """ - if query_type in self.query_handlers: - raise KeyError( - "Already have a Query handler for %s" % (query_type,) - ) - - self.query_handlers[query_type] = handler - @defer.inlineCallbacks @log_function def on_backfill_request(self, origin, room_id, versions, limit): @@ -229,16 +202,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def received_edu(self, origin, edu_type, content): received_edus_counter.inc() - - if edu_type in self.edu_handlers: - try: - yield self.edu_handlers[edu_type](origin, content) - except SynapseError as e: - logger.info("Failed to handle edu %r: %r", edu_type, e) - except Exception as e: - logger.exception("Failed to handle edu %r", edu_type) - else: - logger.warn("Received EDU of type %s with no handler", edu_type) + yield self.registry.on_edu(edu_type, origin, content) @defer.inlineCallbacks @log_function @@ -328,14 +292,8 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_query_request(self, query_type, args): received_queries_counter.inc(query_type) - - if query_type in self.query_handlers: - response = yield self.query_handlers[query_type](args) - defer.returnValue((200, response)) - else: - defer.returnValue( - (404, "No handler for Query type '%s'" % (query_type,)) - ) + resp = yield self.registry.on_query(query_type, args) + defer.returnValue((200, resp)) @defer.inlineCallbacks def on_make_join_request(self, room_id, user_id): @@ -607,3 +565,66 @@ class FederationServer(FederationBase): origin, room_id, event_dict ) defer.returnValue(ret) + + +class FederationHandlerRegistry(object): + """Allows classes to register themselves as handlers for a given EDU or + query type for incoming federation traffic. + """ + def __init__(self): + self.edu_handlers = {} + self.query_handlers = {} + + def register_edu_handler(self, edu_type, handler): + """Sets the handler callable that will be used to handle an incoming + federation EDU of the given type. + + Args: + edu_type (str): The type of the incoming EDU to register handler for + handler (Callable[str, dict]): A callable invoked on incoming EDU + of the given type. The arguments are the origin server name and + the EDU contents. + """ + if edu_type in self.edu_handlers: + raise KeyError("Already have an EDU handler for %s" % (edu_type,)) + + self.edu_handlers[edu_type] = handler + + def register_query_handler(self, query_type, handler): + """Sets the handler callable that will be used to handle an incoming + federation query of the given type. + + Args: + query_type (str): Category name of the query, which should match + the string used by make_query. + handler (Callable[dict] -> Deferred[dict]): Invoked to handle + incoming queries of this type. The return will be yielded + on and the result used as the response to the query request. + """ + if query_type in self.query_handlers: + raise KeyError( + "Already have a Query handler for %s" % (query_type,) + ) + + self.query_handlers[query_type] = handler + + @defer.inlineCallbacks + def on_edu(self, edu_type, origin, content): + handler = self.edu_handlers.get(edu_type) + if not handler: + logger.warn("No handler registered for EDU type %s", edu_type) + + try: + yield handler(origin, content) + except SynapseError as e: + logger.info("Failed to handle edu %r: %r", edu_type, e) + except Exception as e: + logger.exception("Failed to handle edu %r", edu_type) + + def on_query(self, query_type, args): + handler = self.query_handlers.get(query_type) + if not handler: + logger.warn("No handler registered for query type %s", query_type) + raise NotFoundError("No handler for Query type '%s'" % (query_type,)) + + return handler(args) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 0e8345385..9e58dbe64 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -41,10 +41,12 @@ class DeviceHandler(BaseHandler): self._edu_updater = DeviceListEduUpdater(hs, self) - self.federation.register_edu_handler( + federation_registry = hs.get_federation_registry() + + federation_registry.register_edu_handler( "m.device_list_update", self._edu_updater.incoming_device_list_update, ) - self.federation.register_query_handler( + federation_registry.register_query_handler( "user_devices", self.on_federation_query_user_devices, ) diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index d996aa90b..f147a20b7 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -37,7 +37,7 @@ class DeviceMessageHandler(object): self.is_mine = hs.is_mine self.federation = hs.get_federation_sender() - hs.get_replication_layer().register_edu_handler( + hs.get_federation_registry().register_edu_handler( "m.direct_to_device", self.on_direct_to_device_edu ) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 8580ada60..e955cb1f3 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -37,7 +37,7 @@ class DirectoryHandler(BaseHandler): self.event_creation_handler = hs.get_event_creation_handler() self.federation = hs.get_replication_layer() - self.federation.register_query_handler( + hs.get_federation_registry().register_query_handler( "directory", self.on_directory_query ) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 9aa95f89e..57f50a4e2 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -40,7 +40,7 @@ class E2eKeysHandler(object): # doesn't really work as part of the generic query API, because the # query request requires an object POST, but we abuse the # "query handler" interface. - self.federation.register_query_handler( + hs.get_federation_registry().register_query_handler( "client_keys", self.on_federation_query_client_keys ) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index cb158ba96..b11ae7835 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -98,24 +98,26 @@ class PresenceHandler(object): self.state = hs.get_state_handler() - self.replication.register_edu_handler( + federation_registry = hs.get_federation_registry() + + federation_registry.register_edu_handler( "m.presence", self.incoming_presence ) - self.replication.register_edu_handler( + federation_registry.register_edu_handler( "m.presence_invite", lambda origin, content: self.invite_presence( observed_user=UserID.from_string(content["observed_user"]), observer_user=UserID.from_string(content["observer_user"]), ) ) - self.replication.register_edu_handler( + federation_registry.register_edu_handler( "m.presence_accept", lambda origin, content: self.accept_presence( observed_user=UserID.from_string(content["observed_user"]), observer_user=UserID.from_string(content["observer_user"]), ) ) - self.replication.register_edu_handler( + federation_registry.register_edu_handler( "m.presence_deny", lambda origin, content: self.deny_presence( observed_user=UserID.from_string(content["observed_user"]), diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index c9c287903..c386c79bb 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -32,7 +32,7 @@ class ProfileHandler(BaseHandler): super(ProfileHandler, self).__init__(hs) self.federation = hs.get_replication_layer() - self.federation.register_query_handler( + hs.get_federation_registry().register_query_handler( "profile", self.on_profile_query ) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 052576527..3f215c2b4 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -35,7 +35,7 @@ class ReceiptsHandler(BaseHandler): self.store = hs.get_datastore() self.hs = hs self.federation = hs.get_federation_sender() - hs.get_replication_layer().register_edu_handler( + hs.get_federation_registry().register_edu_handler( "m.receipt", self._received_remote_receipt ) self.clock = self.hs.get_clock() diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 82dedbbc9..77c0cf146 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -56,7 +56,7 @@ class TypingHandler(object): self.federation = hs.get_federation_sender() - hs.get_replication_layer().register_edu_handler("m.typing", self._recv_edu) + hs.get_federation_registry().register_edu_handler("m.typing", self._recv_edu) hs.get_distributor().observe("user_left_room", self.user_left_room) diff --git a/synapse/server.py b/synapse/server.py index 5b6effbe3..1bc8d6f70 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -34,6 +34,7 @@ from synapse.events.builder import EventBuilderFactory from synapse.events.spamcheck import SpamChecker from synapse.federation import initialize_http_replication from synapse.federation.send_queue import FederationRemoteSendQueue +from synapse.federation.federation_server import FederationHandlerRegistry from synapse.federation.transport.client import TransportLayerClient from synapse.federation.transaction_queue import TransactionQueue from synapse.handlers import Handlers @@ -147,6 +148,7 @@ class HomeServer(object): 'groups_attestation_renewer', 'spam_checker', 'room_member_handler', + 'federation_registry', ] def __init__(self, hostname, **kwargs): @@ -387,6 +389,9 @@ class HomeServer(object): def build_room_member_handler(self): return RoomMemberHandler(self) + def build_federation_registry(self): + return FederationHandlerRegistry() + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) From 631a73f7ef7d89c43be47cf01c7c27a1d633052d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 10:39:19 +0000 Subject: [PATCH 085/173] Fix tests --- tests/handlers/test_directory.py | 9 ++++----- tests/handlers/test_profile.py | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index 571277390..b10392149 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -35,21 +35,20 @@ class DirectoryTestCase(unittest.TestCase): @defer.inlineCallbacks def setUp(self): - self.mock_federation = Mock(spec=[ - "make_query", - "register_edu_handler", - ]) + self.mock_federation = Mock() + self.mock_registry = Mock() self.query_handlers = {} def register_query_handler(query_type, handler): self.query_handlers[query_type] = handler - self.mock_federation.register_query_handler = register_query_handler + self.mock_registry.register_query_handler = register_query_handler hs = yield setup_test_homeserver( http_client=None, resource_for_federation=Mock(), replication_layer=self.mock_federation, + federation_registry=self.mock_registry, ) hs.handlers = DirectoryHandlers(hs) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index a5f47181d..73223ffbd 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -37,23 +37,22 @@ class ProfileTestCase(unittest.TestCase): @defer.inlineCallbacks def setUp(self): - self.mock_federation = Mock(spec=[ - "make_query", - "register_edu_handler", - ]) + self.mock_federation = Mock() + self.mock_registry = Mock() self.query_handlers = {} def register_query_handler(query_type, handler): self.query_handlers[query_type] = handler - self.mock_federation.register_query_handler = register_query_handler + self.mock_registry.register_query_handler = register_query_handler hs = yield setup_test_homeserver( http_client=None, handlers=None, resource_for_federation=Mock(), replication_layer=self.mock_federation, + federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ "send_message", ]) From e05bf34117de19705b36a4803085ea93f7381928 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Mar 2018 14:07:39 +0000 Subject: [PATCH 086/173] Move property setting from ReplicationLayer to FederationBase --- synapse/federation/federation_base.py | 6 ++++++ synapse/federation/federation_client.py | 1 + synapse/federation/federation_server.py | 6 ++++++ synapse/federation/replication.py | 22 ---------------------- 4 files changed, 13 insertions(+), 22 deletions(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 7918d3e44..79eaa3103 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -27,7 +27,13 @@ logger = logging.getLogger(__name__) class FederationBase(object): def __init__(self, hs): + self.hs = hs + + self.server_name = hs.hostname + self.keyring = hs.get_keyring() self.spam_checker = hs.get_spam_checker() + self.store = hs.get_datastore() + self._clock = hs.get_clock() @defer.inlineCallbacks def _check_sigs_and_hash_and_fetch(self, origin, pdus, outlier=False, diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 813907f7f..38440da5b 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -58,6 +58,7 @@ class FederationClient(FederationBase): self._clear_tried_cache, 60 * 1000, ) self.state = hs.get_state_handler() + self.transport_layer = hs.get_federation_transport_client() def _clear_tried_cache(self): """Clear pdu_destination_tried cache""" diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 5b1914f2f..dd73fc50b 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -23,6 +23,8 @@ from synapse.federation.federation_base import ( FederationBase, event_from_pdu_json, ) + +from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction import synapse.metrics from synapse.types import get_domain_from_id @@ -56,6 +58,10 @@ class FederationServer(FederationBase): self._server_linearizer = async.Linearizer("fed_server") self._transaction_linearizer = async.Linearizer("fed_txn_handler") + self.transaction_actions = TransactionActions(self.store) + + self.handler = None + self.registry = hs.get_federation_registry() # We cache responses to state queries, as they take a while and often diff --git a/synapse/federation/replication.py b/synapse/federation/replication.py index 62d865ec4..b8b3a3f93 100644 --- a/synapse/federation/replication.py +++ b/synapse/federation/replication.py @@ -20,8 +20,6 @@ a given transport. from .federation_client import FederationClient from .federation_server import FederationServer -from .persistence import TransactionActions - import logging @@ -47,26 +45,6 @@ class ReplicationLayer(FederationClient, FederationServer): """ def __init__(self, hs, transport_layer): - self.server_name = hs.hostname - - self.keyring = hs.get_keyring() - - self.transport_layer = transport_layer - - self.federation_client = self - - self.store = hs.get_datastore() - - self.handler = None - self.edu_handlers = {} - self.query_handlers = {} - - self._clock = hs.get_clock() - - self.transaction_actions = TransactionActions(self.store) - - self.hs = hs - super(ReplicationLayer, self).__init__(hs) def __str__(self): From 265b993b8afd2501b2aa3a50670f39d6d97eddb7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Mar 2018 14:34:31 +0000 Subject: [PATCH 087/173] Split replication layer into two --- synapse/app/homeserver.py | 2 +- synapse/federation/federation_server.py | 10 +--------- synapse/federation/transport/server.py | 2 +- synapse/handlers/device.py | 3 +-- synapse/handlers/directory.py | 2 +- synapse/handlers/e2e_keys.py | 2 +- synapse/handlers/federation.py | 4 +--- synapse/handlers/presence.py | 1 - synapse/handlers/profile.py | 2 +- synapse/handlers/room_list.py | 2 +- synapse/handlers/room_member.py | 3 +-- synapse/server.py | 13 +++++++++---- 12 files changed, 19 insertions(+), 27 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e375f2bbc..503f461ab 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -348,7 +348,7 @@ def setup(config_options): hs.get_state_handler().start_caching() hs.get_datastore().start_profiling() hs.get_datastore().start_doing_background_updates() - hs.get_replication_layer().start_get_pdu_cache() + hs.get_replication_client().start_get_pdu_cache() register_memory_metrics(hs) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index dd73fc50b..740ef9628 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -54,27 +54,19 @@ class FederationServer(FederationBase): super(FederationServer, self).__init__(hs) self.auth = hs.get_auth() + self.handler = hs.get_handlers().federation_handler self._server_linearizer = async.Linearizer("fed_server") self._transaction_linearizer = async.Linearizer("fed_txn_handler") self.transaction_actions = TransactionActions(self.store) - self.handler = None - self.registry = hs.get_federation_registry() # We cache responses to state queries, as they take a while and often # come in waves. self._state_resp_cache = ResponseCache(hs, timeout_ms=30000) - def set_handler(self, handler): - """Sets the handler that the replication layer will use to communicate - receipt of new PDUs from other home servers. The required methods are - documented on :py:class:`.ReplicationHandler`. - """ - self.handler = handler - @defer.inlineCallbacks @log_function def on_backfill_request(self, origin, room_id, versions, limit): diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 06c16ba4f..04b83e691 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -1190,7 +1190,7 @@ GROUP_ATTESTATION_SERVLET_CLASSES = ( def register_servlets(hs, resource, authenticator, ratelimiter): for servletclass in FEDERATION_SERVLET_CLASSES: servletclass( - handler=hs.get_replication_layer(), + handler=hs.get_replication_server(), authenticator=authenticator, ratelimiter=ratelimiter, server_name=hs.hostname, diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 9e58dbe64..fcf41630d 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -37,7 +37,6 @@ class DeviceHandler(BaseHandler): self.state = hs.get_state_handler() self._auth_handler = hs.get_auth_handler() self.federation_sender = hs.get_federation_sender() - self.federation = hs.get_replication_layer() self._edu_updater = DeviceListEduUpdater(hs, self) @@ -432,7 +431,7 @@ class DeviceListEduUpdater(object): def __init__(self, hs, device_handler): self.store = hs.get_datastore() - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() self.clock = hs.get_clock() self.device_handler = device_handler diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index e955cb1f3..dfe04eb1c 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -36,7 +36,7 @@ class DirectoryHandler(BaseHandler): self.appservice_handler = hs.get_application_service_handler() self.event_creation_handler = hs.get_event_creation_handler() - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() hs.get_federation_registry().register_query_handler( "directory", self.on_directory_query ) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 57f50a4e2..0ca8d036e 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) class E2eKeysHandler(object): def __init__(self, hs): self.store = hs.get_datastore() - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() self.device_handler = hs.get_device_handler() self.is_mine = hs.is_mine self.clock = hs.get_clock() diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 520612683..cfd437916 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -68,7 +68,7 @@ class FederationHandler(BaseHandler): self.hs = hs self.store = hs.get_datastore() - self.replication_layer = hs.get_replication_layer() + self.replication_layer = hs.get_replication_client() self.state_handler = hs.get_state_handler() self.server_name = hs.hostname self.keyring = hs.get_keyring() @@ -78,8 +78,6 @@ class FederationHandler(BaseHandler): self.spam_checker = hs.get_spam_checker() self.event_creation_handler = hs.get_event_creation_handler() - self.replication_layer.set_handler(self) - # When joining a room we need to queue any events for that room up self.room_queues = {} self._room_pdu_linearizer = Linearizer("fed_room_pdu") diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index b11ae7835..a5e501897 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -93,7 +93,6 @@ class PresenceHandler(object): self.store = hs.get_datastore() self.wheel_timer = WheelTimer() self.notifier = hs.get_notifier() - self.replication = hs.get_replication_layer() self.federation = hs.get_federation_sender() self.state = hs.get_state_handler() diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index c386c79bb..0cfac60d7 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -31,7 +31,7 @@ class ProfileHandler(BaseHandler): def __init__(self, hs): super(ProfileHandler, self).__init__(hs) - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() hs.get_federation_registry().register_query_handler( "profile", self.on_profile_query ) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index dfa09141e..f79bd8902 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -409,7 +409,7 @@ class RoomListHandler(BaseHandler): def _get_remote_list_cached(self, server_name, limit=None, since_token=None, search_filter=None, include_all_networks=False, third_party_instance_id=None,): - repl_layer = self.hs.get_replication_layer() + repl_layer = self.hs.get_replication_client() if search_filter: # We can't cache when asking for search return repl_layer.get_public_rooms( diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730..e2f052771 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -55,7 +55,6 @@ class RoomMemberHandler(object): self.registration_handler = hs.get_handlers().registration_handler self.profile_handler = hs.get_profile_handler() self.event_creation_hander = hs.get_event_creation_handler() - self.replication_layer = hs.get_replication_layer() self.member_linearizer = Linearizer(name="member") @@ -212,7 +211,7 @@ class RoomMemberHandler(object): # if this is a join with a 3pid signature, we may need to turn a 3pid # invite into a normal invite before we can handle the join. if third_party_signed is not None: - yield self.replication_layer.exchange_third_party_invite( + yield self.federation_handler.exchange_third_party_invite( third_party_signed["sender"], target.to_string(), room_id, diff --git a/synapse/server.py b/synapse/server.py index 1bc8d6f70..894e9c2ac 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -32,7 +32,8 @@ from synapse.appservice.scheduler import ApplicationServiceScheduler from synapse.crypto.keyring import Keyring from synapse.events.builder import EventBuilderFactory from synapse.events.spamcheck import SpamChecker -from synapse.federation import initialize_http_replication +from synapse.federation.federation_client import FederationClient +from synapse.federation.federation_server import FederationServer from synapse.federation.send_queue import FederationRemoteSendQueue from synapse.federation.federation_server import FederationHandlerRegistry from synapse.federation.transport.client import TransportLayerClient @@ -100,7 +101,8 @@ class HomeServer(object): DEPENDENCIES = [ 'http_client', 'db_pool', - 'replication_layer', + 'replication_client', + 'replication_server', 'handlers', 'v1auth', 'auth', @@ -197,8 +199,11 @@ class HomeServer(object): def get_ratelimiter(self): return self.ratelimiter - def build_replication_layer(self): - return initialize_http_replication(self) + def build_replication_client(self): + return FederationClient(self) + + def build_replication_server(self): + return FederationServer(self) def build_handlers(self): return Handlers(self) From 6ea27fafad7c290b8f082fedfa8ff7948cf9f1fd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 10:15:49 +0000 Subject: [PATCH 088/173] Fix tests --- tests/handlers/test_directory.py | 2 +- tests/handlers/test_e2e_keys.py | 2 +- tests/handlers/test_profile.py | 3 ++- tests/handlers/test_typing.py | 2 +- tests/replication/slave/storage/_base.py | 2 +- tests/rest/client/v1/test_events.py | 2 +- tests/rest/client/v1/test_profile.py | 2 +- tests/rest/client/v1/test_rooms.py | 16 ++++++++-------- tests/rest/client/v1/test_typing.py | 2 +- tests/storage/test_appservice.py | 10 +++++----- 10 files changed, 22 insertions(+), 21 deletions(-) diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index b10392149..b4f36b27a 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -47,7 +47,7 @@ class DirectoryTestCase(unittest.TestCase): hs = yield setup_test_homeserver( http_client=None, resource_for_federation=Mock(), - replication_layer=self.mock_federation, + replication_client=self.mock_federation, federation_registry=self.mock_registry, ) hs.handlers = DirectoryHandlers(hs) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index d92bf240b..fe73f2b96 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -34,7 +34,7 @@ class E2eKeysHandlerTestCase(unittest.TestCase): def setUp(self): self.hs = yield utils.setup_test_homeserver( handlers=None, - replication_layer=mock.Mock(), + replication_client=mock.Mock(), ) self.handler = synapse.handlers.e2e_keys.E2eKeysHandler(self.hs) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 73223ffbd..c69043768 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -51,7 +51,8 @@ class ProfileTestCase(unittest.TestCase): http_client=None, handlers=None, resource_for_federation=Mock(), - replication_layer=self.mock_federation, + replication_client=self.mock_federation, + replication_server=Mock(), federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ "send_message", diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index fcd380b03..a433bbfa8 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -81,7 +81,7 @@ class TypingNotificationsTestCase(unittest.TestCase): "get_current_state_deltas", ]), state_handler=self.state_handler, - handlers=None, + handlers=Mock(), notifier=mock_notifier, resource_for_client=Mock(), resource_for_federation=self.mock_federation_resource, diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 74f104e3b..ceffdaad5 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -31,7 +31,7 @@ class BaseSlavedStoreTestCase(unittest.TestCase): self.hs = yield setup_test_homeserver( "blue", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index e9698bfdc..f04bf7dfd 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -114,7 +114,7 @@ class EventStreamPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_profile.py b/tests/rest/client/v1/test_profile.py index dddcf51b6..feddcf024 100644 --- a/tests/rest/client/v1/test_profile.py +++ b/tests/rest/client/v1/test_profile.py @@ -45,7 +45,7 @@ class ProfileTestCase(unittest.TestCase): http_client=None, resource_for_client=self.mock_resource, federation=Mock(), - replication_layer=Mock(), + replication_client=Mock(), profile_handler=self.mock_handler ) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 9f3725538..2c0708b0d 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -46,7 +46,7 @@ class RoomPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -409,7 +409,7 @@ class RoomsMemberListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -493,7 +493,7 @@ class RoomsCreateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -582,7 +582,7 @@ class RoomTopicTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -697,7 +697,7 @@ class RoomMemberStateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -829,7 +829,7 @@ class RoomMessagesTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -929,7 +929,7 @@ class RoomInitialSyncTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), @@ -1003,7 +1003,7 @@ class RoomMessageListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index e46534cd3..62639e3ad 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -47,7 +47,7 @@ class RoomTypingTestCase(RestTestCase): "red", clock=self.clock, http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index 13d81f972..cc0df7f66 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -42,7 +42,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) self.as_token = "token1" @@ -119,7 +119,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) self.db_pool = hs.get_db_pool() @@ -455,7 +455,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) ApplicationServiceStore(None, hs) @@ -473,7 +473,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) with self.assertRaises(ConfigError) as cm: @@ -497,7 +497,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) with self.assertRaises(ConfigError) as cm: From ea7b3c4b1b829703a780418e6bb6b7860a5b5451 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 11:00:04 +0000 Subject: [PATCH 089/173] Remove unused ReplicationLayer --- synapse/federation/__init__.py | 8 ----- synapse/federation/replication.py | 51 ------------------------------- 2 files changed, 59 deletions(-) delete mode 100644 synapse/federation/replication.py diff --git a/synapse/federation/__init__.py b/synapse/federation/__init__.py index 2e32d245b..f5f0bdfca 100644 --- a/synapse/federation/__init__.py +++ b/synapse/federation/__init__.py @@ -15,11 +15,3 @@ """ This package includes all the federation specific logic. """ - -from .replication import ReplicationLayer - - -def initialize_http_replication(hs): - transport = hs.get_federation_transport_client() - - return ReplicationLayer(hs, transport) diff --git a/synapse/federation/replication.py b/synapse/federation/replication.py deleted file mode 100644 index b8b3a3f93..000000000 --- a/synapse/federation/replication.py +++ /dev/null @@ -1,51 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This layer is responsible for replicating with remote home servers using -a given transport. -""" - -from .federation_client import FederationClient -from .federation_server import FederationServer - -import logging - - -logger = logging.getLogger(__name__) - - -class ReplicationLayer(FederationClient, FederationServer): - """This layer is responsible for replicating with remote home servers over - the given transport. I.e., does the sending and receiving of PDUs to - remote home servers. - - The layer communicates with the rest of the server via a registered - ReplicationHandler. - - In more detail, the layer: - * Receives incoming data and processes it into transactions and pdus. - * Fetches any PDUs it thinks it might have missed. - * Keeps the current state for contexts up to date by applying the - suitable conflict resolution. - * Sends outgoing pdus wrapped in transactions. - * Fills out the references to previous pdus/transactions appropriately - for outgoing data. - """ - - def __init__(self, hs, transport_layer): - super(ReplicationLayer, self).__init__(hs) - - def __str__(self): - return "" % self.server_name From d023ecb8109718cd95c4dd86e916a459fc610547 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 11:08:10 +0000 Subject: [PATCH 090/173] Don't build handlers on workers unnecessarily --- synapse/app/client_reader.py | 1 - synapse/app/event_creator.py | 1 - synapse/app/federation_reader.py | 1 - synapse/app/frontend_proxy.py | 1 - synapse/app/media_repository.py | 1 - 5 files changed, 5 deletions(-) diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 3b3352798..0a8ce9bc6 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -156,7 +156,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index fc0b9e8c0..eb593c527 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -161,7 +161,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 4de43c41f..20d157911 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -144,7 +144,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index e32ee8fe9..816c080d1 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -211,7 +211,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index 1ed1ca877..84c5791b3 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -158,7 +158,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): From 31becf4ac3a1c8c675ecab481b07cffb9aa24fd8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:28:52 +0000 Subject: [PATCH 091/173] Make functions private that can be --- synapse/handlers/room_member.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730..2a6b7e9f8 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -138,7 +138,7 @@ class RoomMemberHandler(object): defer.returnValue(event) @defer.inlineCallbacks - def remote_join(self, remote_room_hosts, room_id, user, content): + def _remote_join(self, remote_room_hosts, room_id, user, content): if len(remote_room_hosts) == 0: raise SynapseError(404, "No known servers") @@ -292,7 +292,7 @@ class RoomMemberHandler(object): raise AuthError(403, "Guest access not allowed") if not is_host_in_room: - inviter = yield self.get_inviter(target.to_string(), room_id) + inviter = yield self._get_inviter(target.to_string(), room_id) if inviter and not self.hs.is_mine(inviter): remote_room_hosts.append(inviter.domain) @@ -306,7 +306,7 @@ class RoomMemberHandler(object): if requester.is_guest: content["kind"] = "guest" - ret = yield self.remote_join( + ret = yield self._remote_join( remote_room_hosts, room_id, target, content ) defer.returnValue(ret) @@ -314,7 +314,7 @@ class RoomMemberHandler(object): elif effective_membership_state == Membership.LEAVE: if not is_host_in_room: # perhaps we've been invited - inviter = yield self.get_inviter(target.to_string(), room_id) + inviter = yield self._get_inviter(target.to_string(), room_id) if not inviter: raise SynapseError(404, "Not a known room") @@ -496,7 +496,7 @@ class RoomMemberHandler(object): defer.returnValue((RoomID.from_string(room_id), servers)) @defer.inlineCallbacks - def get_inviter(self, user_id, room_id): + def _get_inviter(self, user_id, room_id): invite = yield self.store.get_invite_for_user_in_room( user_id=user_id, room_id=room_id, @@ -573,7 +573,7 @@ class RoomMemberHandler(object): if "mxid" in data: if "signatures" not in data: raise AuthError(401, "No signatures on 3pid binding") - yield self.verify_any_signature(data, id_server) + yield self._verify_any_signature(data, id_server) defer.returnValue(data["mxid"]) except IOError as e: @@ -581,7 +581,7 @@ class RoomMemberHandler(object): defer.returnValue(None) @defer.inlineCallbacks - def verify_any_signature(self, data, server_hostname): + def _verify_any_signature(self, data, server_hostname): if server_hostname not in data["signatures"]: raise AuthError(401, "No signature from server %s" % (server_hostname,)) for key_name, signature in data["signatures"][server_hostname].items(): From d0fcc48f9dfc09531619faf23d407807eec46df9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:39:58 +0000 Subject: [PATCH 092/173] extra_users is actually a list of UserIDs --- synapse/handlers/message.py | 2 +- synapse/replication/http/send_event.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 42aab91c5..4f97c8db7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -667,7 +667,7 @@ class EventCreationHandler(object): event (FrozenEvent) context (EventContext) ratelimit (bool) - extra_users (list(str)): Any extra users to notify about event + extra_users (list(UserID)): Any extra users to notify about event """ try: diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 70f2fe456..bbe2f967b 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -25,7 +25,7 @@ from synapse.util.async import sleep from synapse.util.caches.response_cache import ResponseCache from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.metrics import Measure -from synapse.types import Requester +from synapse.types import Requester, UserID import logging import re @@ -46,7 +46,7 @@ def send_event_to_master(client, host, port, requester, event, context, event (FrozenEvent) context (EventContext) ratelimit (bool) - extra_users (list(str)): Any extra users to notify about event + extra_users (list(UserID)): Any extra users to notify about event """ uri = "http://%s:%s/_synapse/replication/send_event/%s" % ( host, port, event.event_id, @@ -59,7 +59,7 @@ def send_event_to_master(client, host, port, requester, event, context, "context": context.serialize(event), "requester": requester.serialize(), "ratelimit": ratelimit, - "extra_users": extra_users, + "extra_users": [u.to_string() for u in extra_users], } try: @@ -143,7 +143,7 @@ class ReplicationSendEventRestServlet(RestServlet): context = yield EventContext.deserialize(self.store, content["context"]) ratelimit = content["ratelimit"] - extra_users = content["extra_users"] + extra_users = [UserID.from_string(u) for u in content["extra_users"]] if requester.user: request.authenticated_entity = requester.user.to_string() From 0f942f68c106b9d0fb89d0eaef9fa942b6d003ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:31:11 +0000 Subject: [PATCH 093/173] Factor out _remote_reject_invite in RoomMember --- synapse/handlers/room_member.py | 50 ++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730..6c8acfbf0 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -154,6 +154,30 @@ class RoomMemberHandler(object): ) yield user_joined_room(self.distributor, user, room_id) + @defer.inlineCallbacks + def _remote_reject_invite(self, remote_room_hosts, room_id, target): + fed_handler = self.federation_handler + try: + ret = yield fed_handler.do_remotely_reject_invite( + remote_room_hosts, + room_id, + target.to_string(), + ) + defer.returnValue(ret) + except Exception as e: + # if we were unable to reject the exception, just mark + # it as rejected on our end and plough ahead. + # + # The 'except' clause is very broad, but we need to + # capture everything from DNS failures upwards + # + logger.warn("Failed to reject invite: %s", e) + + yield self.store.locally_reject_invite( + target.to_string(), room_id + ) + defer.returnValue({}) + @defer.inlineCallbacks def update_membership( self, @@ -328,28 +352,10 @@ class RoomMemberHandler(object): else: # send the rejection to the inviter's HS. remote_room_hosts = remote_room_hosts + [inviter.domain] - fed_handler = self.federation_handler - try: - ret = yield fed_handler.do_remotely_reject_invite( - remote_room_hosts, - room_id, - target.to_string(), - ) - defer.returnValue(ret) - except Exception as e: - # if we were unable to reject the exception, just mark - # it as rejected on our end and plough ahead. - # - # The 'except' clause is very broad, but we need to - # capture everything from DNS failures upwards - # - logger.warn("Failed to reject invite: %s", e) - - yield self.store.locally_reject_invite( - target.to_string(), room_id - ) - - defer.returnValue({}) + res = yield self._remote_reject_invite( + remote_room_hosts, room_id, target, + ) + defer.returnValue(res) res = yield self._local_membership_update( requester=requester, From f43b6d6d9b7e6f6a94ba3e1886cec6ca864fad43 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 11:29:17 +0000 Subject: [PATCH 094/173] Fix docstring types --- synapse/federation/federation_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 5b1914f2f..90302c953 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -581,7 +581,7 @@ class FederationHandlerRegistry(object): Args: edu_type (str): The type of the incoming EDU to register handler for - handler (Callable[str, dict]): A callable invoked on incoming EDU + handler (Callable[[str, dict]]): A callable invoked on incoming EDU of the given type. The arguments are the origin server name and the EDU contents. """ @@ -597,7 +597,7 @@ class FederationHandlerRegistry(object): Args: query_type (str): Category name of the query, which should match the string used by make_query. - handler (Callable[dict] -> Deferred[dict]): Invoked to handle + handler (Callable[[dict], Deferred[dict]]): Invoked to handle incoming queries of this type. The return will be yielded on and the result used as the response to the query request. """ From 8b3573a8b209c60b03d5ef7f4dfed9ccb9e9f7b3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 12:04:38 +0000 Subject: [PATCH 095/173] Refactor get_or_register_3pid_guest --- synapse/handlers/register.py | 26 ++++++++++++++++++++++---- synapse/handlers/room_member.py | 10 +++------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 9021d4d57..ed5939880 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -446,16 +446,34 @@ class RegistrationHandler(BaseHandler): return self.hs.get_auth_handler() @defer.inlineCallbacks - def guest_access_token_for(self, medium, address, inviter_user_id): + def get_or_register_3pid_guest(self, medium, address, inviter_user_id): + """Get a guest access token for a 3PID, creating a guest account if + one doesn't already exist. + + Args: + medium (str) + address (str) + inviter_user_id (str): The user ID who is trying to invite the + 3PID + + Returns: + Deferred[(str, str)]: A 2-tuple of `(user_id, access_token)` of the + 3PID guest account. + """ access_token = yield self.store.get_3pid_guest_access_token(medium, address) if access_token: - defer.returnValue(access_token) + user_info = yield self.auth.get_user_by_access_token( + access_token + ) - _, access_token = yield self.register( + defer.returnValue((user_info["user"].to_string(), access_token)) + + user_id, access_token = yield self.register( generate_token=True, make_guest=True ) access_token = yield self.store.save_or_get_3pid_guest_access_token( medium, address, access_token, inviter_user_id ) - defer.returnValue(access_token) + + defer.returnValue((user_id, access_token)) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730..c3c720536 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -735,20 +735,16 @@ class RoomMemberHandler(object): } if self.config.invite_3pid_guest: - registration_handler = self.registration_handler - guest_access_token = yield registration_handler.guest_access_token_for( + rh = self.registration_handler + guest_user_id, guest_access_token = yield rh.get_or_register_3pid_guest( medium=medium, address=address, inviter_user_id=inviter_user_id, ) - guest_user_info = yield self.auth.get_user_by_access_token( - guest_access_token - ) - invite_config.update({ "guest_access_token": guest_access_token, - "guest_user_id": guest_user_info["user"].to_string(), + "guest_user_id": guest_user_id, }) data = yield self.simple_http_client.post_urlencoded_get_json( From f5160d4a3e559ba23f3e6002a8f9172dff4b3d60 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 12:12:55 +0000 Subject: [PATCH 096/173] RoomMembershipRestServlet doesn't handle /forget Due to the order we register the REST handlers `/forget` was handled by the correct handler. --- synapse/rest/client/v1/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 9d745174c..f8999d64d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -599,7 +599,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): def register(self, http_server): # /rooms/$roomid/[invite|join|leave] PATTERNS = ("/rooms/(?P[^/]*)/" - "(?Pjoin|invite|leave|ban|unban|kick|forget)") + "(?Pjoin|invite|leave|ban|unban|kick)") register_txn_path(self, PATTERNS, http_server) @defer.inlineCallbacks From ea3442c15c32ba98c407c71722cb80821d99d160 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 13:16:21 +0000 Subject: [PATCH 097/173] Add docstring --- synapse/handlers/room_member.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 6c8acfbf0..da35e604d 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -139,6 +139,19 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def remote_join(self, remote_room_hosts, room_id, user, content): + """Try and join a room that this server is not in + + Args: + remote_room_hosts (list[str]): List of servers that can be used + to join via. + room_id (str): Room that we are trying to join + user (UserID): User who is trying to join + content (dict): A dict that should be used as the content of the + join event. + + Returns: + Deferred + """ if len(remote_room_hosts) == 0: raise SynapseError(404, "No known servers") @@ -156,6 +169,19 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def _remote_reject_invite(self, remote_room_hosts, room_id, target): + """Attempt to reject an invite for a room this server is not in. If we + fail to do so we locally mark the invite as rejected. + + Args: + remote_room_hosts (list[str]): List of servers to use to try and + reject invite + room_id (str) + target (UserID): The user rejecting the invite + + Returns: + Deferred[dict]: A dictionary to be returned to the client, may + include event_id etc, or nothing if we locally rejected + """ fed_handler = self.federation_handler try: ret = yield fed_handler.do_remotely_reject_invite( From cea462e2857d3af2007521f131adb46e4f5ce6fe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 13:22:21 +0000 Subject: [PATCH 098/173] s/replication_server/federation_server --- synapse/federation/transport/server.py | 2 +- synapse/server.py | 4 ++-- tests/handlers/test_profile.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 04b83e691..a66a6b069 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -1190,7 +1190,7 @@ GROUP_ATTESTATION_SERVLET_CLASSES = ( def register_servlets(hs, resource, authenticator, ratelimiter): for servletclass in FEDERATION_SERVLET_CLASSES: servletclass( - handler=hs.get_replication_server(), + handler=hs.get_federation_server(), authenticator=authenticator, ratelimiter=ratelimiter, server_name=hs.hostname, diff --git a/synapse/server.py b/synapse/server.py index 894e9c2ac..802a79384 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -102,7 +102,7 @@ class HomeServer(object): 'http_client', 'db_pool', 'replication_client', - 'replication_server', + 'federation_server', 'handlers', 'v1auth', 'auth', @@ -202,7 +202,7 @@ class HomeServer(object): def build_replication_client(self): return FederationClient(self) - def build_replication_server(self): + def build_federation_server(self): return FederationServer(self) def build_handlers(self): diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index c69043768..f9f828471 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -52,7 +52,7 @@ class ProfileTestCase(unittest.TestCase): handlers=None, resource_for_federation=Mock(), replication_client=self.mock_federation, - replication_server=Mock(), + federation_server=Mock(), federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ "send_message", From cb9f8e527c09315eea05955ec970154ea2fb9729 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 13:26:52 +0000 Subject: [PATCH 099/173] s/replication_client/federation_client/ --- synapse/app/homeserver.py | 2 +- synapse/handlers/device.py | 2 +- synapse/handlers/directory.py | 2 +- synapse/handlers/e2e_keys.py | 2 +- synapse/handlers/federation.py | 2 +- synapse/handlers/profile.py | 2 +- synapse/handlers/room_list.py | 2 +- synapse/server.py | 4 ++-- tests/handlers/test_directory.py | 2 +- tests/handlers/test_e2e_keys.py | 2 +- tests/handlers/test_profile.py | 2 +- tests/replication/slave/storage/_base.py | 2 +- tests/rest/client/v1/test_events.py | 2 +- tests/rest/client/v1/test_profile.py | 2 +- tests/rest/client/v1/test_rooms.py | 16 ++++++++-------- tests/rest/client/v1/test_typing.py | 2 +- tests/storage/test_appservice.py | 10 +++++----- 17 files changed, 29 insertions(+), 29 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 503f461ab..e477c7ced 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -348,7 +348,7 @@ def setup(config_options): hs.get_state_handler().start_caching() hs.get_datastore().start_profiling() hs.get_datastore().start_doing_background_updates() - hs.get_replication_client().start_get_pdu_cache() + hs.get_federation_client().start_get_pdu_cache() register_memory_metrics(hs) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index fcf41630d..40f3d2467 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -431,7 +431,7 @@ class DeviceListEduUpdater(object): def __init__(self, hs, device_handler): self.store = hs.get_datastore() - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() self.clock = hs.get_clock() self.device_handler = device_handler diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index dfe04eb1c..c5b6e75e0 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -36,7 +36,7 @@ class DirectoryHandler(BaseHandler): self.appservice_handler = hs.get_application_service_handler() self.event_creation_handler = hs.get_event_creation_handler() - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( "directory", self.on_directory_query ) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 0ca8d036e..31b1ece13 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) class E2eKeysHandler(object): def __init__(self, hs): self.store = hs.get_datastore() - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() self.device_handler = hs.get_device_handler() self.is_mine = hs.is_mine self.clock = hs.get_clock() diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index cfd437916..080aca3d7 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -68,7 +68,7 @@ class FederationHandler(BaseHandler): self.hs = hs self.store = hs.get_datastore() - self.replication_layer = hs.get_replication_client() + self.replication_layer = hs.get_federation_client() self.state_handler = hs.get_state_handler() self.server_name = hs.hostname self.keyring = hs.get_keyring() diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 0cfac60d7..cb710fe79 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -31,7 +31,7 @@ class ProfileHandler(BaseHandler): def __init__(self, hs): super(ProfileHandler, self).__init__(hs) - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( "profile", self.on_profile_query ) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index f79bd8902..5d81f59b4 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -409,7 +409,7 @@ class RoomListHandler(BaseHandler): def _get_remote_list_cached(self, server_name, limit=None, since_token=None, search_filter=None, include_all_networks=False, third_party_instance_id=None,): - repl_layer = self.hs.get_replication_client() + repl_layer = self.hs.get_federation_client() if search_filter: # We can't cache when asking for search return repl_layer.get_public_rooms( diff --git a/synapse/server.py b/synapse/server.py index 802a79384..43c6e0a6d 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -101,7 +101,7 @@ class HomeServer(object): DEPENDENCIES = [ 'http_client', 'db_pool', - 'replication_client', + 'federation_client', 'federation_server', 'handlers', 'v1auth', @@ -199,7 +199,7 @@ class HomeServer(object): def get_ratelimiter(self): return self.ratelimiter - def build_replication_client(self): + def build_federation_client(self): return FederationClient(self) def build_federation_server(self): diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index b4f36b27a..7e5332e27 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -47,7 +47,7 @@ class DirectoryTestCase(unittest.TestCase): hs = yield setup_test_homeserver( http_client=None, resource_for_federation=Mock(), - replication_client=self.mock_federation, + federation_client=self.mock_federation, federation_registry=self.mock_registry, ) hs.handlers = DirectoryHandlers(hs) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index fe73f2b96..d1bd87b89 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -34,7 +34,7 @@ class E2eKeysHandlerTestCase(unittest.TestCase): def setUp(self): self.hs = yield utils.setup_test_homeserver( handlers=None, - replication_client=mock.Mock(), + federation_client=mock.Mock(), ) self.handler = synapse.handlers.e2e_keys.E2eKeysHandler(self.hs) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index f9f828471..458296ee4 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -51,7 +51,7 @@ class ProfileTestCase(unittest.TestCase): http_client=None, handlers=None, resource_for_federation=Mock(), - replication_client=self.mock_federation, + federation_client=self.mock_federation, federation_server=Mock(), federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index ceffdaad5..64e07a8c9 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -31,7 +31,7 @@ class BaseSlavedStoreTestCase(unittest.TestCase): self.hs = yield setup_test_homeserver( "blue", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index f04bf7dfd..2b89c0a3c 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -114,7 +114,7 @@ class EventStreamPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_profile.py b/tests/rest/client/v1/test_profile.py index feddcf024..deac7f100 100644 --- a/tests/rest/client/v1/test_profile.py +++ b/tests/rest/client/v1/test_profile.py @@ -45,7 +45,7 @@ class ProfileTestCase(unittest.TestCase): http_client=None, resource_for_client=self.mock_resource, federation=Mock(), - replication_client=Mock(), + federation_client=Mock(), profile_handler=self.mock_handler ) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 2c0708b0d..7e8966a1a 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -46,7 +46,7 @@ class RoomPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -409,7 +409,7 @@ class RoomsMemberListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -493,7 +493,7 @@ class RoomsCreateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -582,7 +582,7 @@ class RoomTopicTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -697,7 +697,7 @@ class RoomMemberStateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -829,7 +829,7 @@ class RoomMessagesTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -929,7 +929,7 @@ class RoomInitialSyncTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), @@ -1003,7 +1003,7 @@ class RoomMessageListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 62639e3ad..2ec4ecab5 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -47,7 +47,7 @@ class RoomTypingTestCase(RestTestCase): "red", clock=self.clock, http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index cc0df7f66..c2e39a728 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -42,7 +42,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) self.as_token = "token1" @@ -119,7 +119,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) self.db_pool = hs.get_db_pool() @@ -455,7 +455,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) ApplicationServiceStore(None, hs) @@ -473,7 +473,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) with self.assertRaises(ConfigError) as cm: @@ -497,7 +497,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) with self.assertRaises(ConfigError) as cm: From b78717b87ba3fc248838db522f007c13d0cd8c76 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 13:49:13 +0000 Subject: [PATCH 100/173] Split RoomMemberHandler into base and master class The intention here is to split the class into the bits that can be done on workers and the bits that have to be done on the master. In future there will also be a class that can be run on the worker, which will delegate work to the master when necessary. --- synapse/handlers/room_member.py | 231 +++++++++++++++++++------------- synapse/server.py | 6 +- 2 files changed, 139 insertions(+), 98 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 6ee8420d1..ce83d5645 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -14,9 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - -import logging - from signedjson.key import decode_verify_key_bytes from signedjson.sign import verify_signed_json from twisted.internet import defer @@ -31,6 +28,10 @@ from synapse.types import UserID, RoomID from synapse.util.async import Linearizer from synapse.util.distributor import user_left_room, user_joined_room +import abc +import logging + + logger = logging.getLogger(__name__) id_server_scheme = "https://" @@ -42,6 +43,8 @@ class RoomMemberHandler(object): # API that takes ID strings and returns pagination chunks. These concerns # ought to be separated out a lot better. + __metaclass__ = abc.ABCMeta + def __init__(self, hs): self.hs = hs self.store = hs.get_datastore() @@ -62,9 +65,56 @@ class RoomMemberHandler(object): self.clock = hs.get_clock() self.spam_checker = hs.get_spam_checker() - self.distributor = hs.get_distributor() - self.distributor.declare("user_joined_room") - self.distributor.declare("user_left_room") + @abc.abstractmethod + def _remote_join(self, requester, remote_room_hosts, room_id, user, content): + """Try and join a room that this server is not in + + Args: + remote_room_hosts (list[str]): List of servers that can be used + to join via. + room_id (str): Room that we are trying to join + user (UserID): User who is trying to join + content (dict): A dict that should be used as the content of the + join event. + + Returns: + Deferred + """ + raise NotImplementedError() + + @abc.abstractmethod + def _remote_reject_invite(self, remote_room_hosts, room_id, target): + """Attempt to reject an invite for a room this server is not in. If we + fail to do so we locally mark the invite as rejected. + + Args: + remote_room_hosts (list[str]): List of servers to use to try and + reject invite + room_id (str) + target (UserID): The user rejecting the invite + + Returns: + Deferred[dict]: A dictionary to be returned to the client, may + include event_id etc, or nothing if we locally rejected + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_or_register_3pid_guest(self, requester, medium, address, inviter_user_id): + """Get a guest access token for a 3PID, creating a guest account if + one doesn't already exist. + + Args: + medium (str) + address (str) + inviter_user_id (str): The user ID who is trying to invite the + 3PID + + Returns: + Deferred[(str, str)]: A 2-tuple of `(user_id, access_token)` of the + 3PID guest account. + """ + raise NotImplementedError() @defer.inlineCallbacks def _local_membership_update( @@ -137,73 +187,6 @@ class RoomMemberHandler(object): defer.returnValue(event) - @defer.inlineCallbacks - def _remote_join(self, remote_room_hosts, room_id, user, content): - """Try and join a room that this server is not in - - Args: - remote_room_hosts (list[str]): List of servers that can be used - to join via. - room_id (str): Room that we are trying to join - user (UserID): User who is trying to join - content (dict): A dict that should be used as the content of the - join event. - - Returns: - Deferred - """ - if len(remote_room_hosts) == 0: - raise SynapseError(404, "No known servers") - - # We don't do an auth check if we are doing an invite - # join dance for now, since we're kinda implicitly checking - # that we are allowed to join when we decide whether or not we - # need to do the invite/join dance. - yield self.federation_handler.do_invite_join( - remote_room_hosts, - room_id, - user.to_string(), - content, - ) - yield user_joined_room(self.distributor, user, room_id) - - @defer.inlineCallbacks - def _remote_reject_invite(self, remote_room_hosts, room_id, target): - """Attempt to reject an invite for a room this server is not in. If we - fail to do so we locally mark the invite as rejected. - - Args: - remote_room_hosts (list[str]): List of servers to use to try and - reject invite - room_id (str) - target (UserID): The user rejecting the invite - - Returns: - Deferred[dict]: A dictionary to be returned to the client, may - include event_id etc, or nothing if we locally rejected - """ - fed_handler = self.federation_handler - try: - ret = yield fed_handler.do_remotely_reject_invite( - remote_room_hosts, - room_id, - target.to_string(), - ) - defer.returnValue(ret) - except Exception as e: - # if we were unable to reject the exception, just mark - # it as rejected on our end and plough ahead. - # - # The 'except' clause is very broad, but we need to - # capture everything from DNS failures upwards - # - logger.warn("Failed to reject invite: %s", e) - - yield self.store.locally_reject_invite( - target.to_string(), room_id - ) - defer.returnValue({}) - @defer.inlineCallbacks def update_membership( self, @@ -673,6 +656,7 @@ class RoomMemberHandler(object): token, public_keys, fallback_public_key, display_name = ( yield self._ask_id_server_for_third_party_invite( + requester=requester, id_server=id_server, medium=medium, address=address, @@ -709,6 +693,7 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def _ask_id_server_for_third_party_invite( self, + requester, id_server, medium, address, @@ -725,6 +710,7 @@ class RoomMemberHandler(object): Asks an identity server for a third party invite. Args: + requester (Requester) id_server (str): hostname + optional port for the identity server. medium (str): The literal string "email". address (str): The third party address being invited. @@ -767,8 +753,8 @@ class RoomMemberHandler(object): } if self.config.invite_3pid_guest: - rh = self.registration_handler - guest_user_id, guest_access_token = yield rh.get_or_register_3pid_guest( + guest_access_token, guest_user_id = yield self.get_or_register_3pid_guest( + requester=requester, medium=medium, address=address, inviter_user_id=inviter_user_id, @@ -801,27 +787,6 @@ class RoomMemberHandler(object): display_name = data["display_name"] defer.returnValue((token, public_keys, fallback_public_key, display_name)) - @defer.inlineCallbacks - def forget(self, user, room_id): - user_id = user.to_string() - - member = yield self.state_handler.get_current_state( - room_id=room_id, - event_type=EventTypes.Member, - state_key=user_id - ) - membership = member.membership if member else None - - if membership is not None and membership not in [ - Membership.LEAVE, Membership.BAN - ]: - raise SynapseError(400, "User %s in room %s" % ( - user_id, room_id - )) - - if membership: - yield self.store.forget(user_id, room_id) - @defer.inlineCallbacks def _is_host_in_room(self, current_state_ids): # Have we just created the room, and is this about to be the very @@ -843,3 +808,77 @@ class RoomMemberHandler(object): defer.returnValue(True) defer.returnValue(False) + + +class RoomMemberMasterHandler(RoomMemberHandler): + @defer.inlineCallbacks + def _remote_join(self, remote_room_hosts, room_id, user, content): + """Implements RoomMemberHandler._remote_join + """ + if len(remote_room_hosts) == 0: + raise SynapseError(404, "No known servers") + + # We don't do an auth check if we are doing an invite + # join dance for now, since we're kinda implicitly checking + # that we are allowed to join when we decide whether or not we + # need to do the invite/join dance. + yield self.federation_handler.do_invite_join( + remote_room_hosts, + room_id, + user.to_string(), + content, + ) + yield user_joined_room(self.distributor, user, room_id) + + @defer.inlineCallbacks + def _remote_reject_invite(self, remote_room_hosts, room_id, target): + """Implements RoomMemberHandler._remote_reject_invite + """ + fed_handler = self.federation_handler + try: + ret = yield fed_handler.do_remotely_reject_invite( + remote_room_hosts, + room_id, + target.to_string(), + ) + defer.returnValue(ret) + except Exception as e: + # if we were unable to reject the exception, just mark + # it as rejected on our end and plough ahead. + # + # The 'except' clause is very broad, but we need to + # capture everything from DNS failures upwards + # + logger.warn("Failed to reject invite: %s", e) + + yield self.store.locally_reject_invite( + target.to_string(), room_id + ) + defer.returnValue({}) + + def get_or_register_3pid_guest(self, requester, medium, address, inviter_user_id): + """Implements RoomMemberHandler.get_or_register_3pid_guest + """ + rg = self.registration_handler + return rg.get_or_register_3pid_guest(medium, address, inviter_user_id) + + @defer.inlineCallbacks + def forget(self, user, room_id): + user_id = user.to_string() + + member = yield self.state_handler.get_current_state( + room_id=room_id, + event_type=EventTypes.Member, + state_key=user_id + ) + membership = member.membership if member else None + + if membership is not None and membership not in [ + Membership.LEAVE, Membership.BAN + ]: + raise SynapseError(400, "User %s in room %s" % ( + user_id, room_id + )) + + if membership: + yield self.store.forget(user_id, room_id) diff --git a/synapse/server.py b/synapse/server.py index 1bc8d6f70..3d47e2793 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -46,7 +46,7 @@ from synapse.handlers.device import DeviceHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.presence import PresenceHandler from synapse.handlers.room_list import RoomListHandler -from synapse.handlers.room_member import RoomMemberHandler +from synapse.handlers.room_member import RoomMemberMasterHandler from synapse.handlers.set_password import SetPasswordHandler from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import TypingHandler @@ -387,7 +387,9 @@ class HomeServer(object): return SpamChecker(self) def build_room_member_handler(self): - return RoomMemberHandler(self) + if self.config.worker_app: + return Exception("Can't use RoomMemberHandler on workers") + return RoomMemberMasterHandler(self) def build_federation_registry(self): return FederationHandlerRegistry() From 82f16faa78864a23653ca952072a6f0e906f3367 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 16:00:26 +0000 Subject: [PATCH 101/173] Move user_*_room distributor stuff to master class I added yields when calling user_left_room, but they shouldn't matter on the master process as they always return None anyway. --- synapse/handlers/room_member.py | 55 ++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ce83d5645..669210b73 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -116,6 +116,34 @@ class RoomMemberHandler(object): """ raise NotImplementedError() + @abc.abstractmethod + def _user_joined_room(self, target, room_id): + """Notifies distributor on master process that the user has joined the + room. + + Args: + target (UserID) + room_id (str) + + Returns: + Deferred|None + """ + raise NotImplementedError() + + @abc.abstractmethod + def _user_left_room(self, target, room_id): + """Notifies distributor on master process that the user has left the + room. + + Args: + target (UserID) + room_id (str) + + Returns: + Deferred|None + """ + raise NotImplementedError() + @defer.inlineCallbacks def _local_membership_update( self, requester, target, room_id, membership, @@ -178,12 +206,12 @@ class RoomMemberHandler(object): prev_member_event = yield self.store.get_event(prev_member_event_id) newly_joined = prev_member_event.membership != Membership.JOIN if newly_joined: - yield user_joined_room(self.distributor, target, room_id) + yield self._user_joined_room(target, room_id) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) if prev_member_event.membership == Membership.JOIN: - user_left_room(self.distributor, target, room_id) + yield self._user_left_room(target, room_id) defer.returnValue(event) @@ -460,12 +488,12 @@ class RoomMemberHandler(object): prev_member_event = yield self.store.get_event(prev_member_event_id) newly_joined = prev_member_event.membership != Membership.JOIN if newly_joined: - yield user_joined_room(self.distributor, target_user, room_id) + yield self._user_joined_room(target_user, room_id) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) if prev_member_event.membership == Membership.JOIN: - user_left_room(self.distributor, target_user, room_id) + yield self._user_left_room(target_user, room_id) @defer.inlineCallbacks def _can_guest_join(self, current_state_ids): @@ -811,6 +839,13 @@ class RoomMemberHandler(object): class RoomMemberMasterHandler(RoomMemberHandler): + def __init__(self, hs): + super(RoomMemberMasterHandler, self).__init__(hs) + + self.distributor = hs.get_distributor() + self.distributor.declare("user_joined_room") + self.distributor.declare("user_left_room") + @defer.inlineCallbacks def _remote_join(self, remote_room_hosts, room_id, user, content): """Implements RoomMemberHandler._remote_join @@ -828,7 +863,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): user.to_string(), content, ) - yield user_joined_room(self.distributor, user, room_id) + yield self._user_joined_room(user, room_id) @defer.inlineCallbacks def _remote_reject_invite(self, remote_room_hosts, room_id, target): @@ -862,6 +897,16 @@ class RoomMemberMasterHandler(RoomMemberHandler): rg = self.registration_handler return rg.get_or_register_3pid_guest(medium, address, inviter_user_id) + def _user_joined_room(self, target, room_id): + """Implements RoomMemberHandler._user_joined_room + """ + return user_joined_room(self.distributor, target, room_id) + + def _user_left_room(self, target, room_id): + """Implements RoomMemberHandler._user_left_room + """ + return user_left_room(self.distributor, target, room_id) + @defer.inlineCallbacks def forget(self, user, room_id): user_id = user.to_string() From 16adb11cc0aa255f0be98d6446d629e18a0dcefe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 16:57:07 +0000 Subject: [PATCH 102/173] Correct import order --- synapse/handlers/room_member.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 669210b73..78b5feee4 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -14,6 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import abc +import logging + from signedjson.key import decode_verify_key_bytes from signedjson.sign import verify_signed_json from twisted.internet import defer @@ -28,9 +31,6 @@ from synapse.types import UserID, RoomID from synapse.util.async import Linearizer from synapse.util.distributor import user_left_room, user_joined_room -import abc -import logging - logger = logging.getLogger(__name__) From 6dbebef1415fd4d0da29ebf6b90a16b9cc877b96 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 17:15:32 +0000 Subject: [PATCH 103/173] Add missing param to docstrings --- synapse/handlers/room_member.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 78b5feee4..790e11c57 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -70,6 +70,7 @@ class RoomMemberHandler(object): """Try and join a room that this server is not in Args: + requester (Requester) remote_room_hosts (list[str]): List of servers that can be used to join via. room_id (str): Room that we are trying to join @@ -88,6 +89,7 @@ class RoomMemberHandler(object): fail to do so we locally mark the invite as rejected. Args: + requester (Requester) remote_room_hosts (list[str]): List of servers to use to try and reject invite room_id (str) @@ -105,6 +107,7 @@ class RoomMemberHandler(object): one doesn't already exist. Args: + requester (Requester) medium (str) address (str) inviter_user_id (str): The user ID who is trying to invite the From d45a1148245839c5f9776fee6d463ed69981d729 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 17:24:34 +0000 Subject: [PATCH 104/173] Raise, don't return, exception --- synapse/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/server.py b/synapse/server.py index 3d47e2793..6ffe900b7 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -388,7 +388,7 @@ class HomeServer(object): def build_room_member_handler(self): if self.config.worker_app: - return Exception("Can't use RoomMemberHandler on workers") + raise Exception("Can't use RoomMemberHandler on workers") return RoomMemberMasterHandler(self) def build_federation_registry(self): From 3518d0ea8fc4cf9247790fde587e37e5d129ddb0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 17:36:50 +0000 Subject: [PATCH 105/173] Split up ProfileStore --- synapse/replication/slave/storage/profile.py | 21 ++++++++ synapse/storage/profile.py | 50 ++++++++++---------- 2 files changed, 47 insertions(+), 24 deletions(-) create mode 100644 synapse/replication/slave/storage/profile.py diff --git a/synapse/replication/slave/storage/profile.py b/synapse/replication/slave/storage/profile.py new file mode 100644 index 000000000..46c28d417 --- /dev/null +++ b/synapse/replication/slave/storage/profile.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.storage.profile import ProfileWorkerStore + + +class SlavedProfileStore(ProfileWorkerStore, BaseSlavedStore): + pass diff --git a/synapse/storage/profile.py b/synapse/storage/profile.py index ec02e73bc..8612bd5ec 100644 --- a/synapse/storage/profile.py +++ b/synapse/storage/profile.py @@ -21,14 +21,7 @@ from synapse.api.errors import StoreError from ._base import SQLBaseStore -class ProfileStore(SQLBaseStore): - def create_profile(self, user_localpart): - return self._simple_insert( - table="profiles", - values={"user_id": user_localpart}, - desc="create_profile", - ) - +class ProfileWorkerStore(SQLBaseStore): @defer.inlineCallbacks def get_profileinfo(self, user_localpart): try: @@ -61,14 +54,6 @@ class ProfileStore(SQLBaseStore): desc="get_profile_displayname", ) - def set_profile_displayname(self, user_localpart, new_displayname): - return self._simple_update_one( - table="profiles", - keyvalues={"user_id": user_localpart}, - updatevalues={"displayname": new_displayname}, - desc="set_profile_displayname", - ) - def get_profile_avatar_url(self, user_localpart): return self._simple_select_one_onecol( table="profiles", @@ -77,14 +62,6 @@ class ProfileStore(SQLBaseStore): desc="get_profile_avatar_url", ) - def set_profile_avatar_url(self, user_localpart, new_avatar_url): - return self._simple_update_one( - table="profiles", - keyvalues={"user_id": user_localpart}, - updatevalues={"avatar_url": new_avatar_url}, - desc="set_profile_avatar_url", - ) - def get_from_remote_profile_cache(self, user_id): return self._simple_select_one( table="remote_profile_cache", @@ -94,6 +71,31 @@ class ProfileStore(SQLBaseStore): desc="get_from_remote_profile_cache", ) + +class ProfileStore(ProfileWorkerStore): + def create_profile(self, user_localpart): + return self._simple_insert( + table="profiles", + values={"user_id": user_localpart}, + desc="create_profile", + ) + + def set_profile_displayname(self, user_localpart, new_displayname): + return self._simple_update_one( + table="profiles", + keyvalues={"user_id": user_localpart}, + updatevalues={"displayname": new_displayname}, + desc="set_profile_displayname", + ) + + def set_profile_avatar_url(self, user_localpart, new_avatar_url): + return self._simple_update_one( + table="profiles", + keyvalues={"user_id": user_localpart}, + updatevalues={"avatar_url": new_avatar_url}, + desc="set_profile_avatar_url", + ) + def add_remote_profile_cache(self, user_id, displayname, avatar_url): """Ensure we are caching the remote user's profiles. From df8ff682a72362acde7d99f7235ad4d4f87817e0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 17:38:21 +0000 Subject: [PATCH 106/173] Only update remote profile cache on master --- synapse/handlers/profile.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index cb710fe79..3465a787a 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -38,7 +38,10 @@ class ProfileHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() - self.clock.looping_call(self._update_remote_profile_cache, self.PROFILE_UPDATE_MS) + if hs.config.worker_app is None: + self.clock.looping_call( + self._update_remote_profile_cache, self.PROFILE_UPDATE_MS, + ) @defer.inlineCallbacks def get_profile(self, user_id): From 350331d4664467358469e7d6161f9747707239ba Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 17:50:39 +0000 Subject: [PATCH 107/173] _remote_join and co take a requester --- synapse/handlers/room_member.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 1d7e6997b..9977be883 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -370,7 +370,7 @@ class RoomMemberHandler(object): content["kind"] = "guest" ret = yield self._remote_join( - remote_room_hosts, room_id, target, content + requester, remote_room_hosts, room_id, target, content ) defer.returnValue(ret) @@ -392,7 +392,7 @@ class RoomMemberHandler(object): # send the rejection to the inviter's HS. remote_room_hosts = remote_room_hosts + [inviter.domain] res = yield self._remote_reject_invite( - remote_room_hosts, room_id, target, + requester, remote_room_hosts, room_id, target, ) defer.returnValue(res) @@ -849,7 +849,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): self.distributor.declare("user_left_room") @defer.inlineCallbacks - def _remote_join(self, remote_room_hosts, room_id, user, content): + def _remote_join(self, requester, remote_room_hosts, room_id, user, content): """Implements RoomMemberHandler._remote_join """ if len(remote_room_hosts) == 0: @@ -868,7 +868,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): yield self._user_joined_room(user, room_id) @defer.inlineCallbacks - def _remote_reject_invite(self, remote_room_hosts, room_id, target): + def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target): """Implements RoomMemberHandler._remote_reject_invite """ fed_handler = self.federation_handler From b27320b5503d38e6bd05375b272c1a1d18656ea2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 16:32:37 +0000 Subject: [PATCH 108/173] Implement RoomMemberWorkerHandler --- synapse/handlers/room_member.py | 78 ++++++ synapse/replication/http/__init__.py | 2 + synapse/replication/http/membership.py | 334 +++++++++++++++++++++++++ synapse/server.py | 6 +- 4 files changed, 418 insertions(+), 2 deletions(-) create mode 100644 synapse/replication/http/membership.py diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 1d7e6997b..3c5751d66 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -27,6 +27,10 @@ from synapse.api.constants import ( EventTypes, Membership, ) from synapse.api.errors import AuthError, SynapseError, Codes +from synapse.replication.http.membership import ( + remote_join, remote_reject_invite, get_or_register_3pid_guest, + notify_user_membership_change, +) from synapse.types import UserID, RoomID from synapse.util.async import Linearizer from synapse.util.distributor import user_left_room, user_joined_room @@ -929,3 +933,77 @@ class RoomMemberMasterHandler(RoomMemberHandler): if membership: yield self.store.forget(user_id, room_id) + + +class RoomMemberWorkerHandler(RoomMemberHandler): + @defer.inlineCallbacks + def _remote_join(self, requester, remote_room_hosts, room_id, user, content): + """Implements RoomMemberHandler._remote_join + """ + if len(remote_room_hosts) == 0: + raise SynapseError(404, "No known servers") + + ret = yield remote_join( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + remote_room_hosts=remote_room_hosts, + room_id=room_id, + user_id=user.to_string(), + content=content, + ) + + yield self._user_joined_room(user, room_id) + + defer.returnValue(ret) + + def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target): + """Implements RoomMemberHandler._remote_reject_invite + """ + return remote_reject_invite( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + remote_room_hosts=remote_room_hosts, + room_id=room_id, + user_id=target.to_string(), + ) + + def _user_joined_room(self, target, room_id): + """Implements RoomMemberHandler._user_joined_room + """ + return notify_user_membership_change( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + user_id=target.to_string(), + room_id=room_id, + change="join", + ) + + def _user_left_room(self, target, room_id): + """Implements RoomMemberHandler._user_left_room + """ + return notify_user_membership_change( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + user_id=target.to_string(), + room_id=room_id, + change="left", + ) + + def get_or_register_3pid_guest(self, requester, medium, address, inviter_user_id): + """Implements RoomMemberHandler.get_or_register_3pid_guest + """ + return get_or_register_3pid_guest( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + medium=medium, + address=address, + inviter_user_id=inviter_user_id, + ) diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py index b378b4164..7a148301d 100644 --- a/synapse/replication/http/__init__.py +++ b/synapse/replication/http/__init__.py @@ -15,6 +15,7 @@ import send_event +import membership from synapse.http.server import JsonResource @@ -29,3 +30,4 @@ class ReplicationRestResource(JsonResource): def register_servlets(self, hs): send_event.register_servlets(hs, self) + membership.register_servlets(hs, self) diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py new file mode 100644 index 000000000..df16c3b2b --- /dev/null +++ b/synapse/replication/http/membership.py @@ -0,0 +1,334 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.api.errors import SynapseError, MatrixCodeMessageException +from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.types import Requester, UserID +from synapse.util.distributor import user_left_room, user_joined_room + +import logging +import re + +logger = logging.getLogger(__name__) + + +@defer.inlineCallbacks +def remote_join(client, host, port, requester, remote_room_hosts, + room_id, user_id, content): + """Ask the master to do a remote join for the given user to the given room + + Args: + client (SimpleHttpClient) + host (str): host of master + port (int): port on master listening for HTTP replication + requester (Requester) + remote_room_hosts (list[str]): Servers to try and join via + room_id (str) + user_id (str) + content (dict): The event content to use for the join event + + Returns: + Deferred + """ + uri = "http://%s:%s/_synapse/replication/remote_join" % (host, port) + + payload = { + "requester": requester.serialize(), + "remote_room_hosts": remote_room_hosts, + "room_id": room_id, + "user_id": user_id, + "content": content, + } + + try: + result = yield client.post_json_get_json(uri, payload) + except MatrixCodeMessageException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the master process that we should send to the client. (And + # importantly, not stack traces everywhere) + raise SynapseError(e.code, e.msg, e.errcode) + defer.returnValue(result) + + +@defer.inlineCallbacks +def remote_reject_invite(client, host, port, requester, remote_room_hosts, + room_id, user_id): + """Ask master to reject the invite for the user and room. + + Args: + client (SimpleHttpClient) + host (str): host of master + port (int): port on master listening for HTTP replication + requester (Requester) + remote_room_hosts (list[str]): Servers to try and reject via + room_id (str) + user_id (str) + + Returns: + Deferred + """ + uri = "http://%s:%s/_synapse/replication/remote_reject_invite" % (host, port) + + payload = { + "requester": requester.serialize(), + "remote_room_hosts": remote_room_hosts, + "room_id": room_id, + "user_id": user_id, + } + + try: + result = yield client.post_json_get_json(uri, payload) + except MatrixCodeMessageException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the master process that we should send to the client. (And + # importantly, not stack traces everywhere) + raise SynapseError(e.code, e.msg, e.errcode) + defer.returnValue(result) + + +@defer.inlineCallbacks +def get_or_register_3pid_guest(client, host, port, requester, + medium, address, inviter_user_id): + """Ask the master to get/create a guest account for given 3PID. + + Args: + client (SimpleHttpClient) + host (str): host of master + port (int): port on master listening for HTTP replication + requester (Requester) + medium (str) + address (str) + inviter_user_id (str): The user ID who is trying to invite the + 3PID + + Returns: + Deferred[(str, str)]: A 2-tuple of `(user_id, access_token)` of the + 3PID guest account. + """ + + uri = "http://%s:%s/_synapse/replication/get_or_register_3pid_guest" % (host, port) + + payload = { + "requester": requester.serialize(), + "medium": medium, + "address": address, + "inviter_user_id": inviter_user_id, + } + + try: + result = yield client.post_json_get_json(uri, payload) + except MatrixCodeMessageException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the master process that we should send to the client. (And + # importantly, not stack traces everywhere) + raise SynapseError(e.code, e.msg, e.errcode) + defer.returnValue(result) + + +@defer.inlineCallbacks +def notify_user_membership_change(client, host, port, user_id, room_id, change): + """Notify master that a user has joined or left the room + + Args: + client (SimpleHttpClient) + host (str): host of master + port (int): port on master listening for HTTP replication. + user_id (str) + room_id (str) + change (str): Either "join" or "left" + + Returns: + Deferred + """ + assert change in ("join", "left") + + uri = "http://%s:%s/_synapse/replication/user_%s_room" % (host, port, change) + + payload = { + "user_id": user_id, + "room_id": room_id, + } + + try: + result = yield client.post_json_get_json(uri, payload) + except MatrixCodeMessageException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the master process that we should send to the client. (And + # importantly, not stack traces everywhere) + raise SynapseError(e.code, e.msg, e.errcode) + defer.returnValue(result) + + +class ReplicationRemoteJoinRestServlet(RestServlet): + PATTERNS = [re.compile("^/_synapse/replication/remote_join$")] + + def __init__(self, hs): + super(ReplicationRemoteJoinRestServlet, self).__init__() + + self.federation_handler = hs.get_handlers().federation_handler + self.store = hs.get_datastore() + self.clock = hs.get_clock() + + @defer.inlineCallbacks + def on_POST(self, request): + content = parse_json_object_from_request(request) + + remote_room_hosts = content["remote_room_hosts"] + room_id = content["room_id"] + user_id = content["user_id"] + event_content = content["content"] + + requester = Requester.deserialize(self.store, content["requester"]) + + if requester.user: + request.authenticated_entity = requester.user.to_string() + + logger.info( + "remote_join: %s into room: %s", + user_id, room_id, + ) + + yield self.federation_handler.do_invite_join( + remote_room_hosts, + room_id, + user_id, + event_content, + ) + + defer.returnValue((200, {})) + + +class ReplicationRemoteRejectInviteRestServlet(RestServlet): + PATTERNS = [re.compile("^/_synapse/replication/remote_reject_invite$")] + + def __init__(self, hs): + super(ReplicationRemoteRejectInviteRestServlet, self).__init__() + + self.federation_handler = hs.get_handlers().federation_handler + self.store = hs.get_datastore() + self.clock = hs.get_clock() + + @defer.inlineCallbacks + def on_POST(self, request): + content = parse_json_object_from_request(request) + + remote_room_hosts = content["remote_room_hosts"] + room_id = content["room_id"] + user_id = content["user_id"] + + requester = Requester.deserialize(self.store, content["requester"]) + + if requester.user: + request.authenticated_entity = requester.user.to_string() + + logger.info( + "remote_reject_invite: %s out of room: %s", + user_id, room_id, + ) + + try: + event = yield self.federation_handler.do_remotely_reject_invite( + remote_room_hosts, + room_id, + user_id, + ) + ret = event.get_pdu_json() + except Exception as e: + # if we were unable to reject the exception, just mark + # it as rejected on our end and plough ahead. + # + # The 'except' clause is very broad, but we need to + # capture everything from DNS failures upwards + # + logger.warn("Failed to reject invite: %s", e) + + yield self.store.locally_reject_invite( + user_id, room_id + ) + ret = {} + + defer.returnValue((200, ret)) + + +class ReplicationRegister3PIDGuestRestServlet(RestServlet): + PATTERNS = [re.compile("^/_synapse/replication/get_or_register_3pid_guest$")] + + def __init__(self, hs): + super(ReplicationRegister3PIDGuestRestServlet, self).__init__() + + self.registeration_handler = hs.get_handlers().registration_handler + self.store = hs.get_datastore() + self.clock = hs.get_clock() + + @defer.inlineCallbacks + def on_POST(self, request): + content = parse_json_object_from_request(request) + + medium = content["medium"] + address = content["address"] + inviter_user_id = content["inviter_user_id"] + + requester = Requester.deserialize(self.store, content["requester"]) + + if requester.user: + request.authenticated_entity = requester.user.to_string() + + logger.info("get_or_register_3pid_guest: %r", content) + + ret = yield self.registeration_handler.get_or_register_3pid_guest( + medium, address, inviter_user_id, + ) + + defer.returnValue((200, ret)) + + +class ReplicationUserJoinedLeftRoomRestServlet(RestServlet): + PATTERNS = [re.compile("^/_synapse/replication/user_(?Pjoin|left)_room$")] + + def __init__(self, hs): + super(ReplicationUserJoinedLeftRoomRestServlet, self).__init__() + + self.registeration_handler = hs.get_handlers().registration_handler + self.store = hs.get_datastore() + self.clock = hs.get_clock() + self.distributor = hs.get_distributor() + + def on_POST(self, request, change): + content = parse_json_object_from_request(request) + + user_id = content["user_id"] + room_id = content["room_id"] + + logger.info("user membership change: %s in %s", user_id, room_id) + + user = UserID.from_string(user_id) + + if change == "join": + user_joined_room(self.distributor, user, room_id) + elif change == "left": + user_left_room(self.distributor, user, room_id) + else: + raise Exception("Unrecognized change: %r", change) + + return (200, {}) + + +def register_servlets(hs, http_server): + ReplicationRemoteJoinRestServlet(hs).register(http_server) + ReplicationRemoteRejectInviteRestServlet(hs).register(http_server) + ReplicationRegister3PIDGuestRestServlet(hs).register(http_server) + ReplicationUserJoinedLeftRoomRestServlet(hs).register(http_server) diff --git a/synapse/server.py b/synapse/server.py index 763f0f5a6..c48c96727 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -47,7 +47,9 @@ from synapse.handlers.device import DeviceHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.presence import PresenceHandler from synapse.handlers.room_list import RoomListHandler -from synapse.handlers.room_member import RoomMemberMasterHandler +from synapse.handlers.room_member import ( + RoomMemberMasterHandler, RoomMemberWorkerHandler, +) from synapse.handlers.set_password import SetPasswordHandler from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import TypingHandler @@ -393,7 +395,7 @@ class HomeServer(object): def build_room_member_handler(self): if self.config.worker_app: - raise Exception("Can't use RoomMemberHandler on workers") + return RoomMemberWorkerHandler(self) return RoomMemberMasterHandler(self) def build_federation_registry(self): From a08726fc4262c6d8e9c69db3df322317cb87f6c2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:20:54 +0000 Subject: [PATCH 109/173] Add is_blocked to worker store --- synapse/storage/room.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 7f2c08d7a..34ed84ea2 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -157,6 +157,18 @@ class RoomWorkerStore(SQLBaseStore): "get_public_room_changes", get_public_room_changes_txn ) + @cached(max_entries=10000) + def is_room_blocked(self, room_id): + return self._simple_select_one_onecol( + table="blocked_rooms", + keyvalues={ + "room_id": room_id, + }, + retcol="1", + allow_none=True, + desc="is_room_blocked", + ) + class RoomStore(RoomWorkerStore, SearchStore): @@ -485,18 +497,6 @@ class RoomStore(RoomWorkerStore, SearchStore): else: defer.returnValue(None) - @cached(max_entries=10000) - def is_room_blocked(self, room_id): - return self._simple_select_one_onecol( - table="blocked_rooms", - keyvalues={ - "room_id": room_id, - }, - retcol="1", - allow_none=True, - desc="is_room_blocked", - ) - @defer.inlineCallbacks def block_room(self, room_id, user_id): yield self._simple_insert( @@ -507,7 +507,11 @@ class RoomStore(RoomWorkerStore, SearchStore): }, desc="block_room", ) - self.is_room_blocked.invalidate((room_id,)) + yield self.runInteraction( + "block_room_invalidation", + self._invalidate_cache_and_stream, + self.is_room_blocked, (room_id,), + ) def get_media_mxcs_in_room(self, room_id): """Retrieves all the local and remote media MXC URIs in a given room From d144ed6ffb578db83bff0f36826d25d9f09bcd30 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:36:04 +0000 Subject: [PATCH 110/173] fix bug #2926 (loading all state for a given type from the DB if the state_key is None) (#2990) Fixes a regression that had crept in where the caching layer upholds requests for loading state which is filtered by type (but not by state_key), but the DB layer itself would interpret a missing state_key as a request to filter by null state_key rather than returning all state_keys. --- synapse/storage/state.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 2b325e1c1..ffa424603 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -240,6 +240,9 @@ class StateGroupWorkerStore(SQLBaseStore): ( "AND type = ? AND state_key = ?", (etype, state_key) + ) if state_key is not None else ( + "AND type = ?", + (etype,) ) for etype, state_key in types ] @@ -259,10 +262,19 @@ class StateGroupWorkerStore(SQLBaseStore): key = (typ, state_key) results[group][key] = event_id else: + where_args = [] + where_clauses = [] + wildcard_types = False if types is not None: - where_clause = "AND (%s)" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) + for typ in types: + if typ[1] is None: + where_clauses.append("(type = ?)") + where_args.extend(typ[0]) + wildcard_types = True + else: + where_clauses.append("(type = ? AND state_key = ?)") + where_args.extend([typ[0], typ[1]]) + where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: where_clause = "" @@ -279,7 +291,7 @@ class StateGroupWorkerStore(SQLBaseStore): # after we finish deduping state, which requires this func) args = [next_group] if types: - args.extend(i for typ in types for i in typ) + args.extend(where_args) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" @@ -292,9 +304,17 @@ class StateGroupWorkerStore(SQLBaseStore): if (typ, state_key) not in results[group] ) - # If the lengths match then we must have all the types, - # so no need to go walk further down the tree. - if types is not None and len(results[group]) == len(types): + # If the number of entries in the (type,state_key)->event_id dict + # matches the number of (type,state_keys) types we were searching + # for, then we must have found them all, so no need to go walk + # further down the tree... UNLESS our types filter contained + # wildcards (i.e. Nones) in which case we have to do an exhaustive + # search + if ( + types is not None and + not wildcard_types and + len(results[group]) == len(types) + ): break next_group = self._simple_select_one_onecol_txn( From 3f0f06cb31d0293e75943e804b3e1ed8fe58ebed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Mar 2018 11:41:45 +0000 Subject: [PATCH 111/173] Split RoomMemberWorkerHandler to separate file --- synapse/handlers/room_member.py | 78 ------------------- synapse/handlers/room_member_worker.py | 102 +++++++++++++++++++++++++ synapse/server.py | 5 +- 3 files changed, 104 insertions(+), 81 deletions(-) create mode 100644 synapse/handlers/room_member_worker.py diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 3c5751d66..1d7e6997b 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -27,10 +27,6 @@ from synapse.api.constants import ( EventTypes, Membership, ) from synapse.api.errors import AuthError, SynapseError, Codes -from synapse.replication.http.membership import ( - remote_join, remote_reject_invite, get_or_register_3pid_guest, - notify_user_membership_change, -) from synapse.types import UserID, RoomID from synapse.util.async import Linearizer from synapse.util.distributor import user_left_room, user_joined_room @@ -933,77 +929,3 @@ class RoomMemberMasterHandler(RoomMemberHandler): if membership: yield self.store.forget(user_id, room_id) - - -class RoomMemberWorkerHandler(RoomMemberHandler): - @defer.inlineCallbacks - def _remote_join(self, requester, remote_room_hosts, room_id, user, content): - """Implements RoomMemberHandler._remote_join - """ - if len(remote_room_hosts) == 0: - raise SynapseError(404, "No known servers") - - ret = yield remote_join( - self.simple_http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - requester=requester, - remote_room_hosts=remote_room_hosts, - room_id=room_id, - user_id=user.to_string(), - content=content, - ) - - yield self._user_joined_room(user, room_id) - - defer.returnValue(ret) - - def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target): - """Implements RoomMemberHandler._remote_reject_invite - """ - return remote_reject_invite( - self.simple_http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - requester=requester, - remote_room_hosts=remote_room_hosts, - room_id=room_id, - user_id=target.to_string(), - ) - - def _user_joined_room(self, target, room_id): - """Implements RoomMemberHandler._user_joined_room - """ - return notify_user_membership_change( - self.simple_http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - user_id=target.to_string(), - room_id=room_id, - change="join", - ) - - def _user_left_room(self, target, room_id): - """Implements RoomMemberHandler._user_left_room - """ - return notify_user_membership_change( - self.simple_http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - user_id=target.to_string(), - room_id=room_id, - change="left", - ) - - def get_or_register_3pid_guest(self, requester, medium, address, inviter_user_id): - """Implements RoomMemberHandler.get_or_register_3pid_guest - """ - return get_or_register_3pid_guest( - self.simple_http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - requester=requester, - medium=medium, - address=address, - inviter_user_id=inviter_user_id, - ) diff --git a/synapse/handlers/room_member_worker.py b/synapse/handlers/room_member_worker.py new file mode 100644 index 000000000..75a881d99 --- /dev/null +++ b/synapse/handlers/room_member_worker.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.api.errors import SynapseError +from synapse.handlers.room_member import RoomMemberHandler +from synapse.replication.http.membership import ( + remote_join, remote_reject_invite, get_or_register_3pid_guest, + notify_user_membership_change, +) + + +logger = logging.getLogger(__name__) + + +class RoomMemberWorkerHandler(RoomMemberHandler): + @defer.inlineCallbacks + def _remote_join(self, requester, remote_room_hosts, room_id, user, content): + """Implements RoomMemberHandler._remote_join + """ + if len(remote_room_hosts) == 0: + raise SynapseError(404, "No known servers") + + ret = yield remote_join( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + remote_room_hosts=remote_room_hosts, + room_id=room_id, + user_id=user.to_string(), + content=content, + ) + + yield self._user_joined_room(user, room_id) + + defer.returnValue(ret) + + def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target): + """Implements RoomMemberHandler._remote_reject_invite + """ + return remote_reject_invite( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + remote_room_hosts=remote_room_hosts, + room_id=room_id, + user_id=target.to_string(), + ) + + def _user_joined_room(self, target, room_id): + """Implements RoomMemberHandler._user_joined_room + """ + return notify_user_membership_change( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + user_id=target.to_string(), + room_id=room_id, + change="join", + ) + + def _user_left_room(self, target, room_id): + """Implements RoomMemberHandler._user_left_room + """ + return notify_user_membership_change( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + user_id=target.to_string(), + room_id=room_id, + change="left", + ) + + def get_or_register_3pid_guest(self, requester, medium, address, inviter_user_id): + """Implements RoomMemberHandler.get_or_register_3pid_guest + """ + return get_or_register_3pid_guest( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + medium=medium, + address=address, + inviter_user_id=inviter_user_id, + ) diff --git a/synapse/server.py b/synapse/server.py index c48c96727..cd0c1a51b 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -47,9 +47,8 @@ from synapse.handlers.device import DeviceHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.presence import PresenceHandler from synapse.handlers.room_list import RoomListHandler -from synapse.handlers.room_member import ( - RoomMemberMasterHandler, RoomMemberWorkerHandler, -) +from synapse.handlers.room_member import RoomMemberMasterHandler +from synapse.handlers.room_member_worker import RoomMemberWorkerHandler from synapse.handlers.set_password import SetPasswordHandler from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import TypingHandler From 62ad701326089ceebbc2683a2e0c9eeeaa734293 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Mar 2018 14:15:49 +0000 Subject: [PATCH 112/173] s/join/joined/ in notify_user_membership_change --- synapse/handlers/room_member_worker.py | 2 +- synapse/replication/http/membership.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/room_member_worker.py b/synapse/handlers/room_member_worker.py index 75a881d99..493aec1e4 100644 --- a/synapse/handlers/room_member_worker.py +++ b/synapse/handlers/room_member_worker.py @@ -73,7 +73,7 @@ class RoomMemberWorkerHandler(RoomMemberHandler): port=self.config.worker_replication_http_port, user_id=target.to_string(), room_id=room_id, - change="join", + change="joined", ) def _user_left_room(self, target, room_id): diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py index df16c3b2b..fcd038a8a 100644 --- a/synapse/replication/http/membership.py +++ b/synapse/replication/http/membership.py @@ -154,7 +154,7 @@ def notify_user_membership_change(client, host, port, user_id, room_id, change): Returns: Deferred """ - assert change in ("join", "left") + assert change in ("joined", "left") uri = "http://%s:%s/_synapse/replication/user_%s_room" % (host, port, change) @@ -297,7 +297,7 @@ class ReplicationRegister3PIDGuestRestServlet(RestServlet): class ReplicationUserJoinedLeftRoomRestServlet(RestServlet): - PATTERNS = [re.compile("^/_synapse/replication/user_(?Pjoin|left)_room$")] + PATTERNS = [re.compile("^/_synapse/replication/user_(?Pjoined|left)_room$")] def __init__(self, hs): super(ReplicationUserJoinedLeftRoomRestServlet, self).__init__() @@ -317,7 +317,7 @@ class ReplicationUserJoinedLeftRoomRestServlet(RestServlet): user = UserID.from_string(user_id) - if change == "join": + if change == "joined": user_joined_room(self.distributor, user, room_id) elif change == "left": user_left_room(self.distributor, user, room_id) From 0011ede3b0ba9de22af3bb36c300b6779918bfa1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Mar 2018 14:19:23 +0000 Subject: [PATCH 113/173] Fix imports --- synapse/replication/http/__init__.py | 5 +---- synapse/replication/http/membership.py | 6 +++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py index 7a148301d..1d7a60752 100644 --- a/synapse/replication/http/__init__.py +++ b/synapse/replication/http/__init__.py @@ -13,11 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. - -import send_event -import membership - from synapse.http.server import JsonResource +from synapse.replication.http import membership, send_event REPLICATION_PREFIX = "/_synapse/replication" diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py index fcd038a8a..e66c4e881 100644 --- a/synapse/replication/http/membership.py +++ b/synapse/replication/http/membership.py @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging +import re + from twisted.internet import defer from synapse.api.errors import SynapseError, MatrixCodeMessageException @@ -20,9 +23,6 @@ from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.types import Requester, UserID from synapse.util.distributor import user_left_room, user_joined_room -import logging -import re - logger = logging.getLogger(__name__) From 4f28018c83c071d0a70a545ec953bd1399c55c7e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:21:11 +0000 Subject: [PATCH 114/173] Register membership/state servlets in event_creator --- synapse/app/event_creator.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index eb593c527..172e989b5 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -31,14 +31,20 @@ from synapse.replication.slave.storage.account_data import SlavedAccountDataStor from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore from synapse.replication.slave.storage.devices import SlavedDeviceStore +from synapse.replication.slave.storage.directory import DirectoryStore from synapse.replication.slave.storage.events import SlavedEventStore +from synapse.replication.slave.storage.profile import SlavedProfileStore from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore from synapse.replication.slave.storage.pushers import SlavedPusherStore from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.room import RoomStore +from synapse.replication.slave.storage.transactions import TransactionStore from synapse.replication.tcp.client import ReplicationClientHandler -from synapse.rest.client.v1.room import RoomSendEventRestServlet +from synapse.rest.client.v1.room import ( + RoomSendEventRestServlet, RoomMembershipRestServlet, RoomStateEventRestServlet, + JoinRoomAliasServlet, +) from synapse.server import HomeServer from synapse.storage.engines import create_engine from synapse.util.httpresourcetree import create_resource_tree @@ -52,6 +58,9 @@ logger = logging.getLogger("synapse.app.event_creator") class EventCreatorSlavedStore( + DirectoryStore, + TransactionStore, + SlavedProfileStore, SlavedAccountDataStore, SlavedPusherStore, SlavedReceiptsStore, @@ -85,6 +94,9 @@ class EventCreatorServer(HomeServer): elif name == "client": resource = JsonResource(self, canonical_json=False) RoomSendEventRestServlet(self).register(resource) + RoomMembershipRestServlet(self).register(resource) + RoomStateEventRestServlet(self).register(resource) + JoinRoomAliasServlet(self).register(resource) resources.update({ "/_matrix/client/r0": resource, "/_matrix/client/unstable": resource, From cb2c7c0669c7f4d4cc41d65a7f2328589fa04fc5 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 14 Mar 2018 16:09:20 +0000 Subject: [PATCH 115/173] Update CHANGES.rst WIP, need to add most recent PRs --- CHANGES.rst | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index a7ed49e10..0bcd5a717 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,97 @@ This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. +Changes in synapse v0.27.0-rc1 (2018-03-14) +=========================================== + +Features: + * Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) + * Add shiney new purge API. New implementation is:- + * Faster + * Supports clearing by timestamp + * Supports deleting of local events + * Is transactional +(PR #2858,2867,2882, 2946,2962,2943) + + * Let homeservers specify a whitelist for the format of 3PIDs that users are allowed to register with or add to their HS accounts. (PR #2813) + * Add /room/{id}/event/{id} to synapse (PR #2766) + * Add an admin route to get all the media in a room (PR #2818) Thanks to @turt2live! + * Add federation_domain_whitelist option (PR #2820,2821) + + +Changes: + + * Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. New workers are:- + * Event Creation + * Push Actions + +With more to follow + + (PR #2892, #2893, #2894, #2896, #2897, #2898, #2899, #2900, #2901, #2902, #2903, #2904, #2913, #2920, #2921, #2922, #2923, #2924, #2925, #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856) + + * Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) + * Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! + * Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! + * Better logging when login can't find a 3pid (PR #2744) + * add ?ts massaging for ASes (PR #2754) + * Remove 'verbosity'/'log_file' from generated cfg (PR #2755) + * Make indentation of generated log config consistent (PR #2762) + * Remove dead code related to default thumbnails (PR #2764) + * Update http request metrics before calling servlet (PR #2770) + * Do bcrypt hashing in a background thread (PR #2773) + * When using synctl with workers, don't start the main synapse automatically (PR #2774) + * Make Counter render floats (PR #2778) + * Store state groups separately from events (PR #2784) + * Reorganise request and block metrics (PR #2785) + * Metrics for number of RDATA commands received (PR #2786) + * Metrics for events processed in appservice and fed sender (PR #2787) + * Optimise LoggingContext creation and copying (PR #2792) + * Track db txn time in millisecs (PR #2793) + * Track DB scheduling delay per-request (PR #2794, #2795) + * Sanity checking for user ids (PR #2797) + * better exception logging in callbackmetrics (PR #2809) + * Fix bugs in block metrics (PR #2810) + * Add some comments about the reactor tick time metric (PR #2816) + * Use a connection pool for the SimpleHttpClient (PR #2817) + * Remove unused/bitrotted MemoryDataStore (PR #2828) + * Make it possible to run tests against postgres (PR #2829) + * Factor out get_db_conn to HomeServer base class (PR #2830) + * Logging and metrics for the http pusher (PR #2833) + * Improve exception handling in persist_event (PR #2834) + * montoring metrics for number of cache evictions (PR #2844) + * Update pynacl dependency to 1.2.1 or higher (PR #2888) Thanks to @bachp! + * Remove ability for AS users to call /events and /sync (PR #2948) + * use bcrypt.checkpw (PR #2949) Thanks to @krombel! + * Factor run_in_background out from preserve_fn (PR #2961) + * Add a metric which increments when a request is received (PR #2965) + * Improve caching for read_marker API (PR #2927) + + +Synapse 0.27.0 begins the process of rationalising metric names. To enable a graceful migration path, this release just adds new names for the metrics being renamed. A future release will remove the old ones. For more info see [here](https://github.com/matrix-org/synapse/blob/develop/docs/metrics-howto.rst#block-and-response-metrics-renamed-for-0270) + + + +Bug fixes: + + * synapse/config/password_auth_providers: Fixed bracket typo (PR #2683) Thanks to @seckrv! + * Check missing fields in event_from_pdu_json (PR #2745) + * Fix templating error with unban permission message (PR #2761) Thanks to @turt2live! + * Fix flaky test_rooms UTs (PR #2765) + * Fix publicised groups GET API (singular) over federation (PR #2772) + * Fix a logcontext leak in persist_events (PR #2790) + * Fix 'NoneType' object has no attribute 'writeHeaders' (PR #2796) + * fix SQL when searching all users (PR #2803) + * Fix server 500 on public rooms call when no rooms exist (PR #2827) + * Fix SQL for user search (PR #2831) + * Fix sql error in quarantine_media (PR #2837) + * Handle url_previews with no content-type (PR #2845) + * Add missing yield during 3pid signature checks (PR #2933) + * Fix race in sync when joining room (PR #2944) + * Fix slow event search, switch back from GIST to GIN indexes (PR #2769, #2848) + * Fix scary-looking dns resolution errors (PR #2838) + + + Changes in synapse v0.26.0 (2018-01-05) ======================================= From a492b17fe2cbbb10687d9a7cbe9eda67b0562656 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 14 Mar 2018 16:18:09 +0000 Subject: [PATCH 116/173] Update CHANGES.rst clean formatting --- CHANGES.rst | 128 ++++++++++++++++++++++++---------------------------- 1 file changed, 60 insertions(+), 68 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0bcd5a717..4d1b4ba13 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,66 +12,58 @@ Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== Features: - * Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) - * Add shiney new purge API. New implementation is:- +* Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) +* Add shiney new purge API. New implementation is:- * Faster * Supports clearing by timestamp * Supports deleting of local events - * Is transactional -(PR #2858,2867,2882, 2946,2962,2943) + * Is transactional (PR #2858,2867,2882, 2946,2962,2943) - * Let homeservers specify a whitelist for the format of 3PIDs that users are allowed to register with or add to their HS accounts. (PR #2813) - * Add /room/{id}/event/{id} to synapse (PR #2766) - * Add an admin route to get all the media in a room (PR #2818) Thanks to @turt2live! - * Add federation_domain_whitelist option (PR #2820,2821) +* Let homeservers specify a whitelist for the format of 3PIDs that users are allowed to register with or add to their HS accounts. (PR #2813) +* Add /room/{id}/event/{id} to synapse (PR #2766) +* Add an admin route to get all the media in a room (PR #2818) Thanks to @turt2live! +* Add federation_domain_whitelist option (PR #2820,2821) Changes: - * Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. New workers are:- - * Event Creation - * Push Actions - -With more to follow - - (PR #2892, #2893, #2894, #2896, #2897, #2898, #2899, #2900, #2901, #2902, #2903, #2904, #2913, #2920, #2921, #2922, #2923, #2924, #2925, #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856) - - * Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) - * Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! - * Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! - * Better logging when login can't find a 3pid (PR #2744) - * add ?ts massaging for ASes (PR #2754) - * Remove 'verbosity'/'log_file' from generated cfg (PR #2755) - * Make indentation of generated log config consistent (PR #2762) - * Remove dead code related to default thumbnails (PR #2764) - * Update http request metrics before calling servlet (PR #2770) - * Do bcrypt hashing in a background thread (PR #2773) - * When using synctl with workers, don't start the main synapse automatically (PR #2774) - * Make Counter render floats (PR #2778) - * Store state groups separately from events (PR #2784) - * Reorganise request and block metrics (PR #2785) - * Metrics for number of RDATA commands received (PR #2786) - * Metrics for events processed in appservice and fed sender (PR #2787) - * Optimise LoggingContext creation and copying (PR #2792) - * Track db txn time in millisecs (PR #2793) - * Track DB scheduling delay per-request (PR #2794, #2795) - * Sanity checking for user ids (PR #2797) - * better exception logging in callbackmetrics (PR #2809) - * Fix bugs in block metrics (PR #2810) - * Add some comments about the reactor tick time metric (PR #2816) - * Use a connection pool for the SimpleHttpClient (PR #2817) - * Remove unused/bitrotted MemoryDataStore (PR #2828) - * Make it possible to run tests against postgres (PR #2829) - * Factor out get_db_conn to HomeServer base class (PR #2830) - * Logging and metrics for the http pusher (PR #2833) - * Improve exception handling in persist_event (PR #2834) - * montoring metrics for number of cache evictions (PR #2844) - * Update pynacl dependency to 1.2.1 or higher (PR #2888) Thanks to @bachp! - * Remove ability for AS users to call /events and /sync (PR #2948) - * use bcrypt.checkpw (PR #2949) Thanks to @krombel! - * Factor run_in_background out from preserve_fn (PR #2961) - * Add a metric which increments when a request is received (PR #2965) - * Improve caching for read_marker API (PR #2927) +* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. New workers are XXXXXX, (PR #2892, #2893, #2894, #2896, #2897, #2898, #2899, #2900, #2901, #2902, #2903, #2904, #2913, #2920, #2921, #2922, #2923, #2924, #2925, #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856) +* Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) +* Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! +* Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! +* Better logging when login can't find a 3pid (PR #2744) +* add ?ts massaging for ASes (PR #2754) +* Remove 'verbosity'/'log_file' from generated cfg (PR #2755) +* Make indentation of generated log config consistent (PR #2762) +* Remove dead code related to default thumbnails (PR #2764) +* Update http request metrics before calling servlet (PR #2770) +* Do bcrypt hashing in a background thread (PR #2773) +* When using synctl with workers, don't start the main synapse automatically (PR #2774) +* Make Counter render floats (PR #2778) +* Store state groups separately from events (PR #2784) +* Reorganise request and block metrics (PR #2785) +* Metrics for number of RDATA commands received (PR #2786) +* Metrics for events processed in appservice and fed sender (PR #2787) +* Optimise LoggingContext creation and copying (PR #2792) +* Track db txn time in millisecs (PR #2793) +* Track DB scheduling delay per-request (PR #2794, #2795) +* Sanity checking for user ids (PR #2797) +* better exception logging in callbackmetrics (PR #2809) +* Fix bugs in block metrics (PR #2810) +* Add some comments about the reactor tick time metric (PR #2816) +* Use a connection pool for the SimpleHttpClient (PR #2817) +* Remove unused/bitrotted MemoryDataStore (PR #2828) +* Make it possible to run tests against postgres (PR #2829) +* Factor out get_db_conn to HomeServer base class (PR #2830) +* Logging and metrics for the http pusher (PR #2833) +* Improve exception handling in persist_event (PR #2834) +* montoring metrics for number of cache evictions (PR #2844) +* Update pynacl dependency to 1.2.1 or higher (PR #2888) Thanks to @bachp! +* Remove ability for AS users to call /events and /sync (PR #2948) +* use bcrypt.checkpw (PR #2949) Thanks to @krombel! +* Factor run_in_background out from preserve_fn (PR #2961) +* Add a metric which increments when a request is received (PR #2965) +* Improve caching for read_marker API (PR #2927) Synapse 0.27.0 begins the process of rationalising metric names. To enable a graceful migration path, this release just adds new names for the metrics being renamed. A future release will remove the old ones. For more info see [here](https://github.com/matrix-org/synapse/blob/develop/docs/metrics-howto.rst#block-and-response-metrics-renamed-for-0270) @@ -80,22 +72,22 @@ Synapse 0.27.0 begins the process of rationalising metric names. To enable a gra Bug fixes: - * synapse/config/password_auth_providers: Fixed bracket typo (PR #2683) Thanks to @seckrv! - * Check missing fields in event_from_pdu_json (PR #2745) - * Fix templating error with unban permission message (PR #2761) Thanks to @turt2live! - * Fix flaky test_rooms UTs (PR #2765) - * Fix publicised groups GET API (singular) over federation (PR #2772) - * Fix a logcontext leak in persist_events (PR #2790) - * Fix 'NoneType' object has no attribute 'writeHeaders' (PR #2796) - * fix SQL when searching all users (PR #2803) - * Fix server 500 on public rooms call when no rooms exist (PR #2827) - * Fix SQL for user search (PR #2831) - * Fix sql error in quarantine_media (PR #2837) - * Handle url_previews with no content-type (PR #2845) - * Add missing yield during 3pid signature checks (PR #2933) - * Fix race in sync when joining room (PR #2944) - * Fix slow event search, switch back from GIST to GIN indexes (PR #2769, #2848) - * Fix scary-looking dns resolution errors (PR #2838) +* synapse/config/password_auth_providers: Fixed bracket typo (PR #2683) Thanks to @seckrv! +* Check missing fields in event_from_pdu_json (PR #2745) +* Fix templating error with unban permission message (PR #2761) Thanks to @turt2live! +* Fix flaky test_rooms UTs (PR #2765) +* Fix publicised groups GET API (singular) over federation (PR #2772) +* Fix a logcontext leak in persist_events (PR #2790) +* Fix 'NoneType' object has no attribute 'writeHeaders' (PR #2796) +* fix SQL when searching all users (PR #2803) +* Fix server 500 on public rooms call when no rooms exist (PR #2827) +* Fix SQL for user search (PR #2831) +* Fix sql error in quarantine_media (PR #2837) +* Handle url_previews with no content-type (PR #2845) +* Add missing yield during 3pid signature checks (PR #2933) +* Fix race in sync when joining room (PR #2944) +* Fix slow event search, switch back from GIST to GIN indexes (PR #2769, #2848) +* Fix scary-looking dns resolution errors (PR #2838) From fb647164f2a008cdba56577ad0f41c4b4c1147d1 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 14 Mar 2018 16:20:36 +0000 Subject: [PATCH 117/173] Update CHANGES.rst --- CHANGES.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 4d1b4ba13..53a97a769 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,10 +14,10 @@ Changes in synapse v0.27.0-rc1 (2018-03-14) Features: * Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) * Add shiney new purge API. New implementation is:- - * Faster - * Supports clearing by timestamp - * Supports deleting of local events - * Is transactional (PR #2858,2867,2882, 2946,2962,2943) + * Faster + * Supports clearing by timestamp + * Supports deleting of local events + * Is transactional (PR #2858,2867,2882, 2946,2962,2943) * Let homeservers specify a whitelist for the format of 3PIDs that users are allowed to register with or add to their HS accounts. (PR #2813) * Add /room/{id}/event/{id} to synapse (PR #2766) From c33c1ceddd5da8195b38059dce31255209075ba2 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 14 Mar 2018 11:09:08 -0600 Subject: [PATCH 118/173] OCD: Make the event_creator routes regex a code block All the others are code blocks, so this one should be to (currently it is a blockquote). Signed-off-by: Travis Ralston --- docs/workers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workers.rst b/docs/workers.rst index dee04bbf3..80f8d2181 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -230,7 +230,7 @@ file. For example:: ``synapse.app.event_creator`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Handles non-state event creation. It can handle REST endpoints matching: +Handles non-state event creation. It can handle REST endpoints matching:: ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send From 5ccb57d3ff6e3affd32cc796f625b2378483d871 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 14 Mar 2018 17:12:58 +0000 Subject: [PATCH 119/173] Update CHANGES.rst --- CHANGES.rst | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 53a97a769..beac2117d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,16 +1,17 @@ Unreleased ========== + + +Changes in synapse v0.27.0-rc1 (2018-03-14) +=========================================== + synctl no longer starts the main synapse when using ``-a`` option with workers. A new worker file should be added with ``worker_app: synapse.app.homeserver``. This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. - -Changes in synapse v0.27.0-rc1 (2018-03-14) -=========================================== - Features: * Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) * Add shiney new purge API. New implementation is:- @@ -27,7 +28,7 @@ Features: Changes: -* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. New workers are XXXXXX, (PR #2892, #2893, #2894, #2896, #2897, #2898, #2899, #2900, #2901, #2902, #2903, #2904, #2913, #2920, #2921, #2922, #2923, #2924, #2925, #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856) +* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. See more [here](https://github.com/matrix-org/synapse/blob/master/docs/workers.rst), (PR #2892-#2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995) * Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) * Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! * Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! @@ -64,12 +65,9 @@ Changes: * Factor run_in_background out from preserve_fn (PR #2961) * Add a metric which increments when a request is received (PR #2965) * Improve caching for read_marker API (PR #2927) +* Add Measure block for persist_events (PR #2975) -Synapse 0.27.0 begins the process of rationalising metric names. To enable a graceful migration path, this release just adds new names for the metrics being renamed. A future release will remove the old ones. For more info see [here](https://github.com/matrix-org/synapse/blob/develop/docs/metrics-howto.rst#block-and-response-metrics-renamed-for-0270) - - - Bug fixes: * synapse/config/password_auth_providers: Fixed bracket typo (PR #2683) Thanks to @seckrv! From 10fdcf561d614235ba8f6ef3f980ac16ca0d7df7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Mar 2018 17:30:17 +0000 Subject: [PATCH 120/173] Fix up rst formatting --- CHANGES.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index beac2117d..42fe461b2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -13,12 +13,14 @@ This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. Features: + * Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) -* Add shiney new purge API. New implementation is:- - * Faster - * Supports clearing by timestamp - * Supports deleting of local events - * Is transactional (PR #2858,2867,2882, 2946,2962,2943) +* Add shiney new purge API. New implementation is: + + * Faster + * Supports clearing by timestamp + * Supports deleting of local events + * Is transactional (PR #2858,2867,2882, 2946,2962,2943) * Let homeservers specify a whitelist for the format of 3PIDs that users are allowed to register with or add to their HS accounts. (PR #2813) * Add /room/{id}/event/{id} to synapse (PR #2766) @@ -28,7 +30,7 @@ Features: Changes: -* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. See more [here](https://github.com/matrix-org/synapse/blob/master/docs/workers.rst), (PR #2892-#2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995) +* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. See more at `docs/workers.rst `_ (PR #2892-#2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995) * Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) * Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! * Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! From 2059b8573f997e3210e0fa995d03f9168f8aeba6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Mar 2018 18:11:21 +0000 Subject: [PATCH 121/173] Update CHANGES.rst --- CHANGES.rst | 86 +++++++++++++++-------------------------------------- 1 file changed, 24 insertions(+), 62 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 42fe461b2..38cd3bec6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,80 +14,42 @@ reported to prometheus. See `docs/metrics-howto.rst `_ for full details (PR #2858, #2867, #2882, #2946, #2962, #2943) +* Add support for whitelisting 3PIDs that users can register. (PR #2813) +* Add ``/room/{id}/event/{id}`` API (PR #2766) +* Add an admin API to get all the media in a room (PR #2818) Thanks to @turt2live! +* Add ``federation_domain_whitelist`` option (PR #2820, #2821) Changes: -* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. See more at `docs/workers.rst `_ (PR #2892-#2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995) -* Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) -* Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! -* Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! -* Better logging when login can't find a 3pid (PR #2744) -* add ?ts massaging for ASes (PR #2754) -* Remove 'verbosity'/'log_file' from generated cfg (PR #2755) -* Make indentation of generated log config consistent (PR #2762) -* Remove dead code related to default thumbnails (PR #2764) -* Update http request metrics before calling servlet (PR #2770) -* Do bcrypt hashing in a background thread (PR #2773) +* Continue to factor out processing from main process and into worker processes. See updated `docs/workers.rst `_ (PR #2892 - #2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995, #2784) +* Ensure state cache is used when persisting events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) +* Change the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! +* No longer require a specific version of saml2 (PR #2695) Thanks to @okurz! +* Remove ``verbosity``/``log_file`` from generated config (PR #2755) +* Add and improve metrics and logging (PR #2770, #2778, #2785, #2786, #2787, #2793, #2794, #2795, #2809, #2810, #2833, #2834, #2844, #2965, #2927, #2975, #2790, #2796, #2838) * When using synctl with workers, don't start the main synapse automatically (PR #2774) -* Make Counter render floats (PR #2778) -* Store state groups separately from events (PR #2784) -* Reorganise request and block metrics (PR #2785) -* Metrics for number of RDATA commands received (PR #2786) -* Metrics for events processed in appservice and fed sender (PR #2787) -* Optimise LoggingContext creation and copying (PR #2792) -* Track db txn time in millisecs (PR #2793) -* Track DB scheduling delay per-request (PR #2794, #2795) -* Sanity checking for user ids (PR #2797) -* better exception logging in callbackmetrics (PR #2809) -* Fix bugs in block metrics (PR #2810) -* Add some comments about the reactor tick time metric (PR #2816) -* Use a connection pool for the SimpleHttpClient (PR #2817) -* Remove unused/bitrotted MemoryDataStore (PR #2828) -* Make it possible to run tests against postgres (PR #2829) -* Factor out get_db_conn to HomeServer base class (PR #2830) -* Logging and metrics for the http pusher (PR #2833) -* Improve exception handling in persist_event (PR #2834) -* montoring metrics for number of cache evictions (PR #2844) +* Minor performance improvements (PR #2773, #2792) +* Use a connection pool for non-federation outbound connections (PR #2817) +* Make it possible to run unit tests against postgres (PR #2829) * Update pynacl dependency to 1.2.1 or higher (PR #2888) Thanks to @bachp! * Remove ability for AS users to call /events and /sync (PR #2948) -* use bcrypt.checkpw (PR #2949) Thanks to @krombel! -* Factor run_in_background out from preserve_fn (PR #2961) -* Add a metric which increments when a request is received (PR #2965) -* Improve caching for read_marker API (PR #2927) -* Add Measure block for persist_events (PR #2975) - +* Use bcrypt.checkpw (PR #2949) Thanks to @krombel! Bug fixes: -* synapse/config/password_auth_providers: Fixed bracket typo (PR #2683) Thanks to @seckrv! -* Check missing fields in event_from_pdu_json (PR #2745) -* Fix templating error with unban permission message (PR #2761) Thanks to @turt2live! -* Fix flaky test_rooms UTs (PR #2765) +* Fix broken ``ldap_config`` config option (PR #2683) Thanks to @seckrv! +* Fix error message when user is not allowed to unban (PR #2761) Thanks to @turt2live! * Fix publicised groups GET API (singular) over federation (PR #2772) -* Fix a logcontext leak in persist_events (PR #2790) -* Fix 'NoneType' object has no attribute 'writeHeaders' (PR #2796) -* fix SQL when searching all users (PR #2803) -* Fix server 500 on public rooms call when no rooms exist (PR #2827) -* Fix SQL for user search (PR #2831) -* Fix sql error in quarantine_media (PR #2837) -* Handle url_previews with no content-type (PR #2845) -* Add missing yield during 3pid signature checks (PR #2933) -* Fix race in sync when joining room (PR #2944) +* Fix user directory when using ``user_directory_search_all_users`` config option (PR #2803, #2831) +* Fix error on ``/publicRooms`` when no rooms exist (PR #2827) +* Fix bug in quarantine_media (PR #2837) +* Fix url_previews when no Content-Type is returned from URL (PR #2845) +* Fix rare race in sync API when joining room (PR #2944) * Fix slow event search, switch back from GIST to GIN indexes (PR #2769, #2848) -* Fix scary-looking dns resolution errors (PR #2838) From 7d26591048ac5ab6263ddb24a840c58edfd85f50 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 15 Mar 2018 10:33:24 +0000 Subject: [PATCH 122/173] Update CHANGES.rst --- CHANGES.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 38cd3bec6..a50d3ea5b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,12 +6,13 @@ Unreleased Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== -synctl no longer starts the main synapse when using ``-a`` option with workers. -A new worker file should be added with ``worker_app: synapse.app.homeserver``. +The common case for running Synapse is not to run separate workers, but for those that do, synctl no longer starts the main synapse when using ``-a`` option with workers. A new worker file should be added with ``worker_app: synapse.app.homeserver``. This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. +Note, old metrics will be removed from the 0.28.0 release. + Features: * Add ability for ASes to override message send time (PR #2754) From 7367a4a8239a04047fa648694c08f3d1c63dd3ee Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 15 Mar 2018 10:33:52 +0000 Subject: [PATCH 123/173] Update CHANGES.rst --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index a50d3ea5b..48791cec3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,7 +6,7 @@ Unreleased Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== -The common case for running Synapse is not to run separate workers, but for those that do, synctl no longer starts the main synapse when using ``-a`` option with workers. A new worker file should be added with ``worker_app: synapse.app.homeserver``. +The common case for running Synapse is not to run separate workers, but for those that do, be aware that synctl no longer starts the main synapse when using ``-a`` option with workers. A new worker file should be added with ``worker_app: synapse.app.homeserver``. This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. From b29d1abab6ca42f693e3b83ab31440fee95d00f2 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 15 Mar 2018 10:34:15 +0000 Subject: [PATCH 124/173] Update CHANGES.rst --- CHANGES.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 48791cec3..931ecb4d3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,8 +1,3 @@ -Unreleased -========== - - - Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== From 068c21ab102b22aeac7fc9aa1f4d2d229a807872 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 15 Mar 2018 10:36:31 +0000 Subject: [PATCH 125/173] CHANGES.rst: reword metric deprecation --- CHANGES.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 931ecb4d3..e25327f28 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,8 +5,7 @@ The common case for running Synapse is not to run separate workers, but for thos This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. - -Note, old metrics will be removed from the 0.28.0 release. +Note that the v0.28.0 release will remove the deprecated metric names. Features: From 0ad5125814dc18a79423740ac54f96e16a427758 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 15 Mar 2018 11:05:42 +0000 Subject: [PATCH 126/173] Update purge_history_api.rst clarify that `purge_history` will not purge state --- docs/admin_api/purge_history_api.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index ea2922da5..2da833c82 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -16,9 +16,11 @@ including an ``access_token`` of a server admin. By default, events sent by local users are not deleted, as they may represent the only copies of this content in existence. (Events sent by remote users are -deleted, and room state data before the cutoff is always removed). +deleted.) -To delete local events as well, set ``delete_local_events`` in the body: +Room state data (such as joins, leaves, topic) is always preserved. + +To delete local message events as well, set ``delete_local_events`` in the body: .. code:: json From 5ea624b0f5f6ad11b1640729b96272f8e7c94d46 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 15 Mar 2018 11:48:35 +0000 Subject: [PATCH 127/173] CONTRIBUTING.rst: fix CI info --- CONTRIBUTING.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 2a88647ca..c6ee16efc 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -30,8 +30,12 @@ use github's pull request workflow to review the contribution, and either ask you to make any refinements needed or merge it and make them ourselves. The changes will then land on master when we next do a release. -We use Jenkins for continuous integration (http://matrix.org/jenkins), and -typically all pull requests get automatically tested Jenkins: if your change breaks the build, Jenkins will yell about it in #matrix-dev:matrix.org so please lurk there and keep an eye open. +We use `Jenkins `_ and +`Travis `_ for continuous +integration. All pull requests to synapse get automatically tested by Travis; +the Jenkins builds require an adminstrator to start them. If your change +breaks the build, this will be shown in github, so please keep an eye on the +pull request for feedback. Code style ~~~~~~~~~~ @@ -115,4 +119,4 @@ can't be accepted. Git makes this trivial - just use the -s flag when you do Conclusion ~~~~~~~~~~ -That's it! Matrix is a very open and collaborative project as you might expect given our obsession with open communication. If we're going to successfully matrix together all the fragmented communication technologies out there we are reliant on contributions and collaboration from the community to do so. So please get involved - and we hope you have as much fun hacking on Matrix as we do! \ No newline at end of file +That's it! Matrix is a very open and collaborative project as you might expect given our obsession with open communication. If we're going to successfully matrix together all the fragmented communication technologies out there we are reliant on contributions and collaboration from the community to do so. So please get involved - and we hope you have as much fun hacking on Matrix as we do! From ddb00efc1ddec646d02e8def6053003f04d077d7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 14:41:30 +0000 Subject: [PATCH 128/173] Bump version number --- synapse/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/__init__.py b/synapse/__init__.py index ef8853bd2..d72d88b47 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.26.0" +__version__ = "0.27.0-rc1" From 2cc9f76bc3cfa012dcdfe614bdda7e689b8b5e65 Mon Sep 17 00:00:00 2001 From: NotAFile Date: Thu, 15 Mar 2018 16:11:17 +0100 Subject: [PATCH 129/173] replace old style error catching with 'as' keyword This is both easier to read and compatible with python3 (not that that matters) Signed-off-by: Adrian Tschira --- synapse/app/synctl.py | 4 ++-- synapse/handlers/device.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py index 0f0ddfa78..b0e1b5e66 100755 --- a/synapse/app/synctl.py +++ b/synapse/app/synctl.py @@ -38,7 +38,7 @@ def pid_running(pid): try: os.kill(pid, 0) return True - except OSError, err: + except OSError as err: if err.errno == errno.EPERM: return True return False @@ -98,7 +98,7 @@ def stop(pidfile, app): try: os.kill(pid, signal.SIGTERM) write("stopped %s" % (app,), colour=GREEN) - except OSError, err: + except OSError as err: if err.errno == errno.ESRCH: write("%s not running" % (app,), colour=YELLOW) elif err.errno == errno.EPERM: diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 40f3d2467..f7457a708 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -155,7 +155,7 @@ class DeviceHandler(BaseHandler): try: yield self.store.delete_device(user_id, device_id) - except errors.StoreError, e: + except errors.StoreError as e: if e.code == 404: # no match pass @@ -204,7 +204,7 @@ class DeviceHandler(BaseHandler): try: yield self.store.delete_devices(user_id, device_ids) - except errors.StoreError, e: + except errors.StoreError as e: if e.code == 404: # no match pass @@ -243,7 +243,7 @@ class DeviceHandler(BaseHandler): new_display_name=content.get("display_name") ) yield self.notify_device_update(user_id, [device_id]) - except errors.StoreError, e: + except errors.StoreError as e: if e.code == 404: raise errors.NotFoundError() else: From 7c7706f42b56dd61f5eb17679aa12247f7058ed5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 15:40:13 +0000 Subject: [PATCH 130/173] Fix bug where state cache used lots of memory The state cache bases its size on the sum of the size of entries. The size of the entry is calculated once on insertion, so it is important that the size of entries does not change. The DictionaryCache modified the entries size, which caused the state cache to incorrectly think it was smaller than it actually was. --- synapse/util/caches/dictionary_cache.py | 6 +++++- synapse/util/caches/lrucache.py | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index d4105822b..1709e8b42 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -132,9 +132,13 @@ class DictionaryCache(object): self._update_or_insert(key, value, known_absent) def _update_or_insert(self, key, value, known_absent): - entry = self.cache.setdefault(key, DictionaryEntry(False, set(), {})) + # We pop and reinsert as we need to tell the cache the size may have + # changed + + entry = self.cache.pop(key, DictionaryEntry(False, set(), {})) entry.value.update(value) entry.known_absent.update(known_absent) + self.cache[key] = entry def _insert(self, key, value, known_absent): self.cache[key] = DictionaryEntry(True, known_absent, value) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index f088dd430..a4bf8fa6a 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -154,14 +154,14 @@ class LruCache(object): def cache_set(key, value, callbacks=[]): node = cache.get(key, None) if node is not None: - if value != node.value: + if node.callbacks and value != node.value: for cb in node.callbacks: cb() node.callbacks.clear() - if size_callback: - cached_cache_len[0] -= size_callback(node.value) - cached_cache_len[0] += size_callback(value) + if size_callback: + cached_cache_len[0] -= size_callback(node.value) + cached_cache_len[0] += size_callback(value) node.callbacks.update(callbacks) From 9cf519769b70311d06b43a97e47cfb5a06e6ade4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 17:48:42 +0000 Subject: [PATCH 131/173] Use .iter* to avoid copies in StateHandler --- synapse/state.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 932f60250..18386556a 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -132,7 +132,7 @@ class StateHandler(object): state_map = yield self.store.get_events(state.values(), get_prev_content=False) state = { - key: state_map[e_id] for key, e_id in state.items() if e_id in state_map + key: state_map[e_id] for key, e_id in state.iteritems() if e_id in state_map } defer.returnValue(state) @@ -378,7 +378,7 @@ class StateHandler(object): new_state = resolve_events_with_state_map(state_set_ids, state_map) new_state = { - key: state_map[ev_id] for key, ev_id in new_state.items() + key: state_map[ev_id] for key, ev_id in new_state.iteritems() } return new_state @@ -458,15 +458,15 @@ class StateResolutionHandler(object): # build a map from state key to the event_ids which set that state. # dict[(str, str), set[str]) state = {} - for st in state_groups_ids.values(): - for key, e_id in st.items(): + for st in state_groups_ids.itervalues(): + for key, e_id in st.iteritems(): state.setdefault(key, set()).add(e_id) # build a map from state key to the event_ids which set that state, # including only those where there are state keys in conflict. conflicted_state = { k: list(v) - for k, v in state.items() + for k, v in state.iteritems() if len(v) > 1 } @@ -480,7 +480,7 @@ class StateResolutionHandler(object): ) else: new_state = { - key: e_ids.pop() for key, e_ids in state.items() + key: e_ids.pop() for key, e_ids in state.iteritems() } # if the new state matches any of the input state groups, we can @@ -488,8 +488,8 @@ class StateResolutionHandler(object): # which will be used as a cache key for future resolutions, but # not get persisted. state_group = None - new_state_event_ids = frozenset(new_state.values()) - for sg, events in state_groups_ids.items(): + new_state_event_ids = frozenset(new_state.iteritems()) + for sg, events in state_groups_ids.iteritems(): if new_state_event_ids == frozenset(e_id for e_id in events): state_group = sg break @@ -702,7 +702,7 @@ def _resolve_with_state(unconflicted_state_ids, conflicted_state_ds, auth_event_ auth_events = { key: state_map[ev_id] - for key, ev_id in auth_event_ids.items() + for key, ev_id in auth_event_ids.iteritems() if ev_id in state_map } @@ -740,7 +740,7 @@ def _resolve_state_events(conflicted_state, auth_events): auth_events.update(resolved_state) - for key, events in conflicted_state.items(): + for key, events in conflicted_state.iteritems(): if key[0] == EventTypes.JoinRules: logger.debug("Resolving conflicted join rules %r", events) resolved_state[key] = _resolve_auth_events( @@ -750,7 +750,7 @@ def _resolve_state_events(conflicted_state, auth_events): auth_events.update(resolved_state) - for key, events in conflicted_state.items(): + for key, events in conflicted_state.iteritems(): if key[0] == EventTypes.Member: logger.debug("Resolving conflicted member lists %r", events) resolved_state[key] = _resolve_auth_events( @@ -760,7 +760,7 @@ def _resolve_state_events(conflicted_state, auth_events): auth_events.update(resolved_state) - for key, events in conflicted_state.items(): + for key, events in conflicted_state.iteritems(): if key not in resolved_state: logger.debug("Resolving conflicted state %r:%r", key, events) resolved_state[key] = _resolve_normal_events( From 926ba76e23ea9d55638baff541cdfaeb9e01ac47 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 23:43:31 +0000 Subject: [PATCH 132/173] Replace ujson with simplejson --- synapse/api/filtering.py | 2 +- synapse/handlers/e2e_keys.py | 2 +- synapse/handlers/message.py | 6 +++--- synapse/http/server.py | 5 ++--- synapse/replication/tcp/commands.py | 2 +- synapse/rest/client/v1/room.py | 2 +- synapse/rest/client/v2_alpha/sync.py | 2 +- synapse/rest/media/v1/preview_url_resource.py | 2 +- synapse/storage/account_data.py | 2 +- synapse/storage/background_updates.py | 2 +- synapse/storage/deviceinbox.py | 12 ++++++------ synapse/storage/devices.py | 2 +- synapse/storage/end_to_end_keys.py | 2 +- synapse/storage/event_push_actions.py | 2 +- synapse/storage/events.py | 4 ++-- synapse/storage/receipts.py | 2 +- synapse/storage/room.py | 2 +- synapse/storage/roommember.py | 2 +- synapse/storage/schema/delta/25/fts.py | 4 ++-- synapse/storage/schema/delta/27/ts.py | 2 +- synapse/storage/schema/delta/31/search_update.py | 4 ++-- synapse/storage/schema/delta/33/event_fields.py | 4 ++-- synapse/storage/search.py | 2 +- synapse/storage/tags.py | 2 +- synapse/storage/transactions.py | 2 +- 25 files changed, 37 insertions(+), 38 deletions(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 83206348e..db43219d2 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -17,7 +17,7 @@ from synapse.storage.presence import UserPresenceState from synapse.types import UserID, RoomID from twisted.internet import defer -import ujson as json +import simplejson as json import jsonschema from jsonschema import FormatChecker diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 668a90e49..ce2c87e40 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ujson as json +import simplejson as json import logging from canonicaljson import encode_canonical_json diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 21f1717dd..d7413833e 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -35,7 +35,7 @@ from canonicaljson import encode_canonical_json import logging import random -import ujson +import simplejson logger = logging.getLogger(__name__) @@ -561,8 +561,8 @@ class MessageHandler(BaseHandler): # Ensure that we can round trip before trying to persist in db try: - dump = ujson.dumps(unfreeze(event.content)) - ujson.loads(dump) + dump = simplejson.dumps(unfreeze(event.content)) + simplejson.loads(dump) except Exception: logger.exception("Failed to encode content: %r", event.content) raise diff --git a/synapse/http/server.py b/synapse/http/server.py index 25466cd29..f1e9002e4 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -36,7 +36,7 @@ from twisted.web.util import redirectTo import collections import logging import urllib -import ujson +import simplejson logger = logging.getLogger(__name__) @@ -370,8 +370,7 @@ def respond_with_json(request, code, json_object, send_cors=False, if canonical_json or synapse.events.USE_FROZEN_DICTS: json_bytes = encode_canonical_json(json_object) else: - # ujson doesn't like frozen_dicts. - json_bytes = ujson.dumps(json_object, ensure_ascii=False) + json_bytes = simplejson.dumps(json_object) return respond_with_json_bytes( request, code, json_bytes, diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 171227cce..9633404f7 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -19,7 +19,7 @@ allowed to be sent by which side. """ import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 75b735b47..80989731f 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -29,7 +29,7 @@ from synapse.http.servlet import ( import logging import urllib -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index a0a8e4b8e..eb91c0b29 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -33,7 +33,7 @@ from ._base import set_timeline_upper_limit import itertools import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 40d2e664e..a413cb622 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -35,7 +35,7 @@ import os import re import fnmatch import cgi -import ujson as json +import simplejson as json import urlparse import itertools import datetime diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 56a0bde54..40a2ad8d0 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -18,7 +18,7 @@ from twisted.internet import defer from synapse.util.caches.descriptors import cached, cachedList, cachedInlineCallbacks -import ujson as json +import simplejson as json import logging logger = logging.getLogger(__name__) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 11a1b942f..8f3bff311 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -19,7 +19,7 @@ from . import engines from twisted.internet import defer -import ujson as json +import simplejson as json import logging logger = logging.getLogger(__name__) diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index 548e795da..a879e5bfc 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -14,7 +14,7 @@ # limitations under the License. import logging -import ujson +import simplejson from twisted.internet import defer @@ -85,7 +85,7 @@ class DeviceInboxStore(BackgroundUpdateStore): ) rows = [] for destination, edu in remote_messages_by_destination.items(): - edu_json = ujson.dumps(edu) + edu_json = simplejson.dumps(edu) rows.append((destination, stream_id, now_ms, edu_json)) txn.executemany(sql, rows) @@ -177,7 +177,7 @@ class DeviceInboxStore(BackgroundUpdateStore): " WHERE user_id = ?" ) txn.execute(sql, (user_id,)) - message_json = ujson.dumps(messages_by_device["*"]) + message_json = simplejson.dumps(messages_by_device["*"]) for row in txn: # Add the message for all devices for this user on this # server. @@ -199,7 +199,7 @@ class DeviceInboxStore(BackgroundUpdateStore): # Only insert into the local inbox if the device exists on # this server device = row[0] - message_json = ujson.dumps(messages_by_device[device]) + message_json = simplejson.dumps(messages_by_device[device]) messages_json_for_user[device] = message_json if messages_json_for_user: @@ -253,7 +253,7 @@ class DeviceInboxStore(BackgroundUpdateStore): messages = [] for row in txn: stream_pos = row[0] - messages.append(ujson.loads(row[1])) + messages.append(simplejson.loads(row[1])) if len(messages) < limit: stream_pos = current_stream_id return (messages, stream_pos) @@ -389,7 +389,7 @@ class DeviceInboxStore(BackgroundUpdateStore): messages = [] for row in txn: stream_pos = row[0] - messages.append(ujson.loads(row[1])) + messages.append(simplejson.loads(row[1])) if len(messages) < limit: stream_pos = current_stream_id return (messages, stream_pos) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index bd2effdf3..712106b83 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -import ujson as json +import simplejson as json from twisted.internet import defer diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 2cebb203c..ff8538ddf 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -17,7 +17,7 @@ from twisted.internet import defer from synapse.util.caches.descriptors import cached from canonicaljson import encode_canonical_json -import ujson as json +import simplejson as json from ._base import SQLBaseStore diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 8efe2fd4b..575d710d5 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -21,7 +21,7 @@ from synapse.types import RoomStreamToken from .stream import lower_bound import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d08f7571d..bcca56329 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -38,7 +38,7 @@ from functools import wraps import synapse.metrics import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 @@ -56,7 +56,7 @@ event_counter = metrics.register_counter( def encode_json(json_object): if USE_FROZEN_DICTS: - # ujson doesn't like frozen_dicts + # simplejson doesn't like frozen_dicts return encode_canonical_json(json_object) else: return json.dumps(json_object, ensure_ascii=False) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 12b3cc7f5..2c3aa3369 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -20,7 +20,7 @@ from synapse.util.caches.stream_change_cache import StreamChangeCache from twisted.internet import defer import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b..2051d8506 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -23,7 +23,7 @@ from .engines import PostgresEngine, Sqlite3Engine import collections import logging -import ujson as json +import simplejson as json import re logger = logging.getLogger(__name__) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 3e77fd390..c1ca29928 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -27,7 +27,7 @@ from synapse.api.constants import Membership, EventTypes from synapse.types import get_domain_from_id import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index 4269ac69a..e7351c3ae 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -17,7 +17,7 @@ import logging from synapse.storage.prepare_database import get_statements from synapse.storage.engines import PostgresEngine, Sqlite3Engine -import ujson +import simplejson logger = logging.getLogger(__name__) @@ -66,7 +66,7 @@ def run_create(cur, database_engine, *args, **kwargs): "max_stream_id_exclusive": max_stream_id + 1, "rows_inserted": 0, } - progress_json = ujson.dumps(progress) + progress_json = simplejson.dumps(progress) sql = ( "INSERT into background_updates (update_name, progress_json)" diff --git a/synapse/storage/schema/delta/27/ts.py b/synapse/storage/schema/delta/27/ts.py index 71b12a273..c0176c41e 100644 --- a/synapse/storage/schema/delta/27/ts.py +++ b/synapse/storage/schema/delta/27/ts.py @@ -16,7 +16,7 @@ import logging from synapse.storage.prepare_database import get_statements -import ujson +import simplejson logger = logging.getLogger(__name__) diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 470ae0c00..fe6b7d196 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -16,7 +16,7 @@ from synapse.storage.engines import PostgresEngine from synapse.storage.prepare_database import get_statements import logging -import ujson +import simplejson logger = logging.getLogger(__name__) @@ -49,7 +49,7 @@ def run_create(cur, database_engine, *args, **kwargs): "rows_inserted": 0, "have_added_indexes": False, } - progress_json = ujson.dumps(progress) + progress_json = simplejson.dumps(progress) sql = ( "INSERT into background_updates (update_name, progress_json)" diff --git a/synapse/storage/schema/delta/33/event_fields.py b/synapse/storage/schema/delta/33/event_fields.py index 83066cccc..1e002f9db 100644 --- a/synapse/storage/schema/delta/33/event_fields.py +++ b/synapse/storage/schema/delta/33/event_fields.py @@ -15,7 +15,7 @@ from synapse.storage.prepare_database import get_statements import logging -import ujson +import simplejson logger = logging.getLogger(__name__) @@ -44,7 +44,7 @@ def run_create(cur, database_engine, *args, **kwargs): "max_stream_id_exclusive": max_stream_id + 1, "rows_inserted": 0, } - progress_json = ujson.dumps(progress) + progress_json = simplejson.dumps(progress) sql = ( "INSERT into background_updates (update_name, progress_json)" diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 479b04c63..c19e4ea44 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -21,7 +21,7 @@ from synapse.storage.engines import PostgresEngine, Sqlite3Engine import logging import re -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index bff73f3f0..982a50052 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -17,7 +17,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cached from twisted.internet import defer -import ujson as json +import simplejson as json import logging logger = logging.getLogger(__name__) diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index 8f61f7ffa..f825264ea 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -23,7 +23,7 @@ from canonicaljson import encode_canonical_json from collections import namedtuple import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) From ba48755d56ba134590f7b5dcb5da44371639fc4c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 23:57:26 +0000 Subject: [PATCH 133/173] Bump version and changelog --- CHANGES.rst | 8 ++++++++ synapse/__init__.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index dcf9adc95..38ba4249b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,11 @@ +Changes in synapse v0.26.1 (2018-03-15) +======================================= + +Bug fixes: + +* Fix bug where an invalid event caused server to stop functioning correctly, + due to parsing and serializing bugs in ujson library. + Changes in synapse v0.26.0 (2018-01-05) ======================================= diff --git a/synapse/__init__.py b/synapse/__init__.py index ef8853bd2..80efad3a6 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.26.0" +__version__ = "0.26.1" From 5b631ff41a3230d8b6ed662e5aeb406c86989f9f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Mar 2018 00:07:08 +0000 Subject: [PATCH 134/173] Remove wrong comment --- synapse/storage/events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index bcca56329..73658a992 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -56,7 +56,6 @@ event_counter = metrics.register_counter( def encode_json(json_object): if USE_FROZEN_DICTS: - # simplejson doesn't like frozen_dicts return encode_canonical_json(json_object) else: return json.dumps(json_object, ensure_ascii=False) From 0092cf38ae95173519ffcc403b20b4fd2dec5c8d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Mar 2018 00:11:58 +0000 Subject: [PATCH 135/173] Newline --- CHANGES.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.rst b/CHANGES.rst index 38ba4249b..3b3292823 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,7 @@ Bug fixes: * Fix bug where an invalid event caused server to stop functioning correctly, due to parsing and serializing bugs in ujson library. + Changes in synapse v0.26.0 (2018-01-05) ======================================= From a8ce159be43560e9aea8f3be65110eea49d1f50e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 23:38:43 +0000 Subject: [PATCH 136/173] Replace some ujson with simplejson to make it work --- synapse/http/server.py | 3 ++- synapse/rest/client/v2_alpha/sync.py | 2 +- synapse/storage/events.py | 2 +- synapse/storage/events_worker.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 4b567215c..3c7a0ef97 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -38,6 +38,7 @@ import collections import logging import urllib import ujson +import simplejson logger = logging.getLogger(__name__) @@ -462,7 +463,7 @@ def respond_with_json(request, code, json_object, send_cors=False, json_bytes = encode_canonical_json(json_object) else: # ujson doesn't like frozen_dicts. - json_bytes = ujson.dumps(json_object, ensure_ascii=False) + json_bytes = simplejson.dumps(json_object) return respond_with_json_bytes( request, code, json_bytes, diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index a0a8e4b8e..eb91c0b29 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -33,7 +33,7 @@ from ._base import set_timeline_upper_limit import itertools import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 389087817..9fc65229f 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -38,7 +38,7 @@ from functools import wraps import synapse.metrics import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 86c3b48ad..2e23dd78b 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -28,7 +28,7 @@ from synapse.api.errors import SynapseError from collections import namedtuple import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 From 38f952b9bc96cf72d96bab3510e8f428a8247542 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 19 Mar 2018 09:27:36 +0000 Subject: [PATCH 137/173] spell out not to massively increase bcrypt rounds --- synapse/config/registration.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 336959094..c5384b3ad 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -77,7 +77,9 @@ class RegistrationConfig(Config): # Set the number of bcrypt rounds used to generate password hash. # Larger numbers increase the work factor needed to generate the hash. - # The default number of rounds is 12. + # The default number is 12 (which equates to 2^12 rounds). + # N.B. that increasing this will exponentially increase the time required + # to register or login - e.g. 24 => 2^24 rounds which will take >20 mins. bcrypt_rounds: 12 # Allows users to register as guests without a password/email/etc, and From 9a0d783c113ae74c55e409d33219cd77f3662b9f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Mar 2018 11:35:53 +0000 Subject: [PATCH 138/173] Add comments --- synapse/util/caches/lrucache.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index a4bf8fa6a..1c5a98209 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -154,11 +154,18 @@ class LruCache(object): def cache_set(key, value, callbacks=[]): node = cache.get(key, None) if node is not None: + # We sometimes store large objects, e.g. dicts, which cause + # the inequality check to take a long time. So let's only do + # the check if we have some callbacks to call. if node.callbacks and value != node.value: for cb in node.callbacks: cb() node.callbacks.clear() + # We don't bother to protect this by value != node.value as + # generally size_callback will be cheap compared with equality + # checks. (For example, taking the size of two dicts is quicker + # than comparing them for equality.) if size_callback: cached_cache_len[0] -= size_callback(node.value) cached_cache_len[0] += size_callback(value) From 1a3aa957ca1164268c9bbbd564491c9d46fbb7d2 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 19 Mar 2018 15:11:00 +0000 Subject: [PATCH 139/173] Update CHANGES.rst --- CHANGES.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index e25327f28..55358287a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,10 @@ +Changes in synapse v0.27.0-rc2 (2018-03-19) +=========================================== +Bugs: + +* Fix bug introduced in v0.27.0-rc1 that causes much increased memory usage in state cache (PR #3005) + + Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== From c384705ee877ba40e1a736b68ebe07434662cbaa Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 19 Mar 2018 15:11:58 +0000 Subject: [PATCH 140/173] Update __init__.py bump version --- synapse/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/__init__.py b/synapse/__init__.py index d72d88b47..71b409ba8 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.27.0-rc1" +__version__ = "0.27.0-rc2" From 610accbb7f58d7a07007dedaa054c8ad7a9f5851 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Mar 2018 16:06:02 +0000 Subject: [PATCH 141/173] Fix replication after switch to simplejson Turns out that simplejson serialises namedtuple's as dictionaries rather than tuples by default. --- synapse/replication/tcp/commands.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 9633404f7..1b4b65721 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -107,7 +107,7 @@ class RdataCommand(Command): return " ".join(( self.stream_name, str(self.token) if self.token is not None else "batch", - json.dumps(self.row), + json.dumps(self.row, namedtuple_as_object=False), )) @@ -301,7 +301,9 @@ class InvalidateCacheCommand(Command): return cls(cache_func, json.loads(keys_json)) def to_line(self): - return " ".join((self.cache_func, json.dumps(self.keys))) + return " ".join(( + self.cache_func, json.dumps(self.keys, namedtuple_as_object=False) + )) class UserIpCommand(Command): From 9aa5a0af514b3f2719dd01aa221039c5ba1c0c80 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Mar 2018 09:58:13 +0000 Subject: [PATCH 142/173] Explicitly use simplejson --- synapse/replication/tcp/commands.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 1b4b65721..0005ad587 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -19,7 +19,7 @@ allowed to be sent by which side. """ import logging -import simplejson as json +import simplejson logger = logging.getLogger(__name__) @@ -100,14 +100,14 @@ class RdataCommand(Command): return cls( stream_name, None if token == "batch" else int(token), - json.loads(row_json) + simplejson.loads(row_json) ) def to_line(self): return " ".join(( self.stream_name, str(self.token) if self.token is not None else "batch", - json.dumps(self.row, namedtuple_as_object=False), + simplejson.dumps(self.row, namedtuple_as_object=False), )) @@ -298,11 +298,11 @@ class InvalidateCacheCommand(Command): def from_line(cls, line): cache_func, keys_json = line.split(" ", 1) - return cls(cache_func, json.loads(keys_json)) + return cls(cache_func, simplejson.loads(keys_json)) def to_line(self): return " ".join(( - self.cache_func, json.dumps(self.keys, namedtuple_as_object=False) + self.cache_func, simplejson.dumps(self.keys, namedtuple_as_object=False) )) @@ -327,14 +327,14 @@ class UserIpCommand(Command): def from_line(cls, line): user_id, jsn = line.split(" ", 1) - access_token, ip, user_agent, device_id, last_seen = json.loads(jsn) + access_token, ip, user_agent, device_id, last_seen = simplejson.loads(jsn) return cls( user_id, access_token, ip, user_agent, device_id, last_seen ) def to_line(self): - return self.user_id + " " + json.dumps(( + return self.user_id + " " + simplejson.dumps(( self.access_token, self.ip, self.user_agent, self.device_id, self.last_seen, )) From f5aa027c2f9d836df7b98da6c1c5fee6b22d0a3d Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 20 Mar 2018 15:06:22 +0000 Subject: [PATCH 143/173] Update CHANGES.rst rearrange ordering of releases to match chronology --- CHANGES.rst | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 788150bcf..bac78d3c1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,21 @@ Changes in synapse v0.27.0-rc2 (2018-03-19) =========================================== -Bugs: + +Pulls in v0.26.1 + +Bug fixes: * Fix bug introduced in v0.27.0-rc1 that causes much increased memory usage in state cache (PR #3005) + + +Changes in synapse v0.26.1 (2018-03-15) +======================================= + +Bug fixes: + * Fix bug where an invalid event caused server to stop functioning correctly, - due to parsing and serializing bugs in ujson library. (pulled in from 0.26.1) + due to parsing and serializing bugs in ujson library (PR #3008) + Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== @@ -55,16 +66,6 @@ Bug fixes: * Fix slow event search, switch back from GIST to GIN indexes (PR #2769, #2848) -Changes in synapse v0.26.1 (2018-03-15) -======================================= - -Bug fixes: - -* Fix bug where an invalid event caused server to stop functioning correctly, - due to parsing and serializing bugs in ujson library. - - - Changes in synapse v0.26.0 (2018-01-05) ======================================= From 1c41b05c8c98f0b9157c791b5b8ebf5f9fe85acf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Mar 2018 17:46:26 +0000 Subject: [PATCH 144/173] Add Cache-Control headers to all JSON APIs It is especially important that sync requests don't get cached, as if a sync returns the same token given then the client will call sync with the same parameters again. If the previous response was cached it will get reused, resulting in the client tight looping making the same request and never making any progress. In general, clients will expect to get up to date data when requesting APIs, and so its safer to do a blanket no cache policy than only whitelisting APIs that we know will break things if they get cached. --- synapse/http/server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/http/server.py b/synapse/http/server.py index 1551db239..f19c068ef 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -488,6 +488,7 @@ def respond_with_json_bytes(request, code, json_bytes, send_cors=False, request.setHeader(b"Content-Type", b"application/json") request.setHeader(b"Server", version_string) request.setHeader(b"Content-Length", b"%d" % (len(json_bytes),)) + request.setHeader(b"Cache-Control", b"no-cache, no-store, must-revalidate") if send_cors: set_cors_headers(request) From fde8e8f09fb0e62d4670e33b75319b5bd57f484f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Mar 2018 11:42:16 +0000 Subject: [PATCH 145/173] Fix s/iteriterms/itervalues --- synapse/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/state.py b/synapse/state.py index 18386556a..a7f20350f 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -488,7 +488,7 @@ class StateResolutionHandler(object): # which will be used as a cache key for future resolutions, but # not get persisted. state_group = None - new_state_event_ids = frozenset(new_state.iteritems()) + new_state_event_ids = frozenset(new_state.itervalues()) for sg, events in state_groups_ids.iteritems(): if new_state_event_ids == frozenset(e_id for e_id in events): state_group = sg From 8cbbfaefc1fc597cbbef52a10dbfb8ecd4d8a8cd Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 23 Mar 2018 10:32:50 +0000 Subject: [PATCH 146/173] 404 correctly on missing paths via NoResource fixes https://github.com/matrix-org/synapse/issues/2043 and https://github.com/matrix-org/synapse/issues/2029 --- synapse/app/appservice.py | 4 ++-- synapse/app/client_reader.py | 4 ++-- synapse/app/event_creator.py | 4 ++-- synapse/app/federation_reader.py | 4 ++-- synapse/app/federation_sender.py | 4 ++-- synapse/app/frontend_proxy.py | 4 ++-- synapse/app/homeserver.py | 4 ++-- synapse/app/media_repository.py | 4 ++-- synapse/app/pusher.py | 4 ++-- synapse/app/synchrotron.py | 4 ++-- synapse/app/user_dir.py | 4 ++-- synapse/util/httpresourcetree.py | 4 ++-- 12 files changed, 24 insertions(+), 24 deletions(-) diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index c6fe4516d..f2540023a 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -36,7 +36,7 @@ from synapse.util.logcontext import LoggingContext, preserve_fn from synapse.util.manhole import manhole from synapse.util.versionstring import get_version_string from twisted.internet import reactor -from twisted.web.resource import Resource +from twisted.web.resource import NoResource logger = logging.getLogger("synapse.app.appservice") @@ -64,7 +64,7 @@ class AppserviceServer(HomeServer): if name == "metrics": resources[METRICS_PREFIX] = MetricsResource(self) - root_resource = create_resource_tree(resources, Resource()) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( bind_addresses, diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 0a8ce9bc6..267d34c88 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -44,7 +44,7 @@ from synapse.util.logcontext import LoggingContext from synapse.util.manhole import manhole from synapse.util.versionstring import get_version_string from twisted.internet import reactor -from twisted.web.resource import Resource +from twisted.web.resource import NoResource logger = logging.getLogger("synapse.app.client_reader") @@ -88,7 +88,7 @@ class ClientReaderServer(HomeServer): "/_matrix/client/api/v1": resource, }) - root_resource = create_resource_tree(resources, Resource()) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( bind_addresses, diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index 172e989b5..b915d12d5 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -52,7 +52,7 @@ from synapse.util.logcontext import LoggingContext from synapse.util.manhole import manhole from synapse.util.versionstring import get_version_string from twisted.internet import reactor -from twisted.web.resource import Resource +from twisted.web.resource import NoResource logger = logging.getLogger("synapse.app.event_creator") @@ -104,7 +104,7 @@ class EventCreatorServer(HomeServer): "/_matrix/client/api/v1": resource, }) - root_resource = create_resource_tree(resources, Resource()) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( bind_addresses, diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 20d157911..c1dc66dd1 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -41,7 +41,7 @@ from synapse.util.logcontext import LoggingContext from synapse.util.manhole import manhole from synapse.util.versionstring import get_version_string from twisted.internet import reactor -from twisted.web.resource import Resource +from twisted.web.resource import NoResource logger = logging.getLogger("synapse.app.federation_reader") @@ -77,7 +77,7 @@ class FederationReaderServer(HomeServer): FEDERATION_PREFIX: TransportLayerServer(self), }) - root_resource = create_resource_tree(resources, Resource()) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( bind_addresses, diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index f760826d2..0cc333151 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -42,7 +42,7 @@ from synapse.util.logcontext import LoggingContext, preserve_fn from synapse.util.manhole import manhole from synapse.util.versionstring import get_version_string from twisted.internet import defer, reactor -from twisted.web.resource import Resource +from twisted.web.resource import NoResource logger = logging.getLogger("synapse.app.federation_sender") @@ -91,7 +91,7 @@ class FederationSenderServer(HomeServer): if name == "metrics": resources[METRICS_PREFIX] = MetricsResource(self) - root_resource = create_resource_tree(resources, Resource()) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( bind_addresses, diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index 816c080d1..de889357c 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -44,7 +44,7 @@ from synapse.util.logcontext import LoggingContext from synapse.util.manhole import manhole from synapse.util.versionstring import get_version_string from twisted.internet import defer, reactor -from twisted.web.resource import Resource +from twisted.web.resource import NoResource logger = logging.getLogger("synapse.app.frontend_proxy") @@ -142,7 +142,7 @@ class FrontendProxyServer(HomeServer): "/_matrix/client/api/v1": resource, }) - root_resource = create_resource_tree(resources, Resource()) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( bind_addresses, diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e477c7ced..c00afbba2 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -56,7 +56,7 @@ from synapse.util.rlimit import change_resource_limit from synapse.util.versionstring import get_version_string from twisted.application import service from twisted.internet import defer, reactor -from twisted.web.resource import EncodingResourceWrapper, Resource +from twisted.web.resource import EncodingResourceWrapper, NoResource from twisted.web.server import GzipEncoderFactory from twisted.web.static import File @@ -126,7 +126,7 @@ class SynapseHomeServer(HomeServer): if WEB_CLIENT_PREFIX in resources: root_resource = RootRedirect(WEB_CLIENT_PREFIX) else: - root_resource = Resource() + root_resource = NoResource() root_resource = create_resource_tree(resources, root_resource) diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index 84c5791b3..fc8282bbc 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -43,7 +43,7 @@ from synapse.util.logcontext import LoggingContext from synapse.util.manhole import manhole from synapse.util.versionstring import get_version_string from twisted.internet import reactor -from twisted.web.resource import Resource +from twisted.web.resource import NoResource logger = logging.getLogger("synapse.app.media_repository") @@ -84,7 +84,7 @@ class MediaRepositoryServer(HomeServer): ), }) - root_resource = create_resource_tree(resources, Resource()) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( bind_addresses, diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 98a4a7c62..d5c3a8519 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -37,7 +37,7 @@ from synapse.util.logcontext import LoggingContext, preserve_fn from synapse.util.manhole import manhole from synapse.util.versionstring import get_version_string from twisted.internet import defer, reactor -from twisted.web.resource import Resource +from twisted.web.resource import NoResource logger = logging.getLogger("synapse.app.pusher") @@ -94,7 +94,7 @@ class PusherServer(HomeServer): if name == "metrics": resources[METRICS_PREFIX] = MetricsResource(self) - root_resource = create_resource_tree(resources, Resource()) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( bind_addresses, diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index abe91dcfb..508b66613 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -56,7 +56,7 @@ from synapse.util.manhole import manhole from synapse.util.stringutils import random_string from synapse.util.versionstring import get_version_string from twisted.internet import defer, reactor -from twisted.web.resource import Resource +from twisted.web.resource import NoResource logger = logging.getLogger("synapse.app.synchrotron") @@ -269,7 +269,7 @@ class SynchrotronServer(HomeServer): "/_matrix/client/api/v1": resource, }) - root_resource = create_resource_tree(resources, Resource()) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( bind_addresses, diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index 494ccb702..5f845e80d 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -43,7 +43,7 @@ from synapse.util.logcontext import LoggingContext, preserve_fn from synapse.util.manhole import manhole from synapse.util.versionstring import get_version_string from twisted.internet import reactor -from twisted.web.resource import Resource +from twisted.web.resource import NoResource logger = logging.getLogger("synapse.app.user_dir") @@ -116,7 +116,7 @@ class UserDirectoryServer(HomeServer): "/_matrix/client/api/v1": resource, }) - root_resource = create_resource_tree(resources, Resource()) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( bind_addresses, diff --git a/synapse/util/httpresourcetree.py b/synapse/util/httpresourcetree.py index 45be47159..d74784955 100644 --- a/synapse/util/httpresourcetree.py +++ b/synapse/util/httpresourcetree.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.web.resource import Resource +from twisted.web.resource import NoResource import logging @@ -45,7 +45,7 @@ def create_resource_tree(desired_tree, root_resource): for path_seg in full_path.split('/')[1:-1]: if path_seg not in last_resource.listNames(): # resource doesn't exist, so make a "dummy resource" - child_resource = Resource() + child_resource = NoResource() last_resource.putChild(path_seg, child_resource) res_id = _resource_id(last_resource, path_seg) resource_mappings[res_id] = child_resource From 8efe773ef1dc09c295fe494befbef568257095db Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 23 Mar 2018 21:38:42 +0000 Subject: [PATCH 147/173] fix typo --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index bac78d3c1..00c8a2b77 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -39,7 +39,7 @@ Features: Changes: -* Continue to factor out processing from main process and into worker processes. See updated `docs/workers.rst `_ (PR #2892 - #2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995, #2784) +* Continue to factor out processing from main process and into worker processes. See updated `docs/workers.rst `_ (PR #2892 - #2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995, #2784) * Ensure state cache is used when persisting events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) * Change the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! * No longer require a specific version of saml2 (PR #2695) Thanks to @okurz! From a052aa42e7d500f32b034bbbf1e8ad11fed02aa9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 26 Mar 2018 12:02:20 +0100 Subject: [PATCH 148/173] Linearize calls to _generate_user_id --- synapse/handlers/register.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index ed5939880..d07a8801d 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -24,7 +24,7 @@ from synapse.api.errors import ( from synapse.http.client import CaptchaServerHttpClient from synapse import types from synapse.types import UserID -from synapse.util.async import run_on_reactor +from synapse.util.async import run_on_reactor, Linearizer from synapse.util.threepids import check_3pid_allowed from ._base import BaseHandler @@ -46,6 +46,8 @@ class RegistrationHandler(BaseHandler): self.macaroon_gen = hs.get_macaroon_generator() + self._generate_user_id_linearizer = Linearizer(name="_generate_user_id_linearizer") + @defer.inlineCallbacks def check_username(self, localpart, guest_access_token=None, assigned_user_id=None): @@ -345,9 +347,10 @@ class RegistrationHandler(BaseHandler): @defer.inlineCallbacks def _generate_user_id(self, reseed=False): if reseed or self._next_generated_user_id is None: - self._next_generated_user_id = ( - yield self.store.find_next_generated_user_id_localpart() - ) + with (yield self._generate_user_id_linearizer.queue(())): + self._next_generated_user_id = ( + yield self.store.find_next_generated_user_id_localpart() + ) id = self._next_generated_user_id self._next_generated_user_id += 1 From 8d6dc106d188308f687db1f696cad7ad1b9ae83d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 26 Mar 2018 12:02:44 +0100 Subject: [PATCH 149/173] Don't use _cursor_to_dict in find_next_generated_user_id_localpart --- synapse/storage/registration.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index d809b2ba4..6b557ca0c 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -460,14 +460,12 @@ class RegistrationStore(RegistrationWorkerStore, """ def _find_next_generated_user_id(txn): txn.execute("SELECT name FROM users") - rows = self.cursor_to_dict(txn) regex = re.compile("^@(\d+):") found = set() - for r in rows: - user_id = r["name"] + for user_id, in txn: match = regex.search(user_id) if match: found.add(int(match.group(1))) From 44cd6e135806387552bab5cf7e8be256d53632b6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 26 Mar 2018 12:06:48 +0100 Subject: [PATCH 150/173] PEP8 --- synapse/handlers/register.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index d07a8801d..f1bf81ed7 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -46,7 +46,9 @@ class RegistrationHandler(BaseHandler): self.macaroon_gen = hs.get_macaroon_generator() - self._generate_user_id_linearizer = Linearizer(name="_generate_user_id_linearizer") + self._generate_user_id_linearizer = Linearizer( + name="_generate_user_id_linearizer", + ) @defer.inlineCallbacks def check_username(self, localpart, guest_access_token=None, From fecb45e0c38912f72655d53faa953e7713511e7f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 26 Mar 2018 13:31:49 +0100 Subject: [PATCH 151/173] Remove last usage of ujson --- synapse/python_dependencies.py | 1 - synapse/storage/group_server.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 91179ce53..40eedb63c 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -34,7 +34,6 @@ REQUIREMENTS = { "bcrypt": ["bcrypt>=3.1.0"], "pillow": ["PIL"], "pydenticon": ["pydenticon"], - "ujson": ["ujson"], "blist": ["blist"], "pysaml2>=3.0.0": ["saml2>=3.0.0"], "pymacaroons-pynacl": ["pymacaroons"], diff --git a/synapse/storage/group_server.py b/synapse/storage/group_server.py index 8fde1aab8..d03858234 100644 --- a/synapse/storage/group_server.py +++ b/synapse/storage/group_server.py @@ -19,7 +19,7 @@ from synapse.api.errors import SynapseError from ._base import SQLBaseStore -import ujson as json +import simplejson as json # The category ID for the "default" category. We don't store as null in the From 51406dab96cb3a4de2fad02a31984cfcb613f809 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 26 Mar 2018 14:48:19 +0100 Subject: [PATCH 152/173] version bump --- CHANGES.rst | 6 ++++++ synapse/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index bac78d3c1..92c19b13e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +Changes in synapse v0.27.0 (2018-03-26) +======================================= + +No changes since v0.27.0-rc2 + + Changes in synapse v0.27.0-rc2 (2018-03-19) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index 71b409ba8..70a029705 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.27.0-rc2" +__version__ = "0.27.0" From 91878620027d17b478ca678482f501d3cfb2edaf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 26 Mar 2018 16:19:38 +0100 Subject: [PATCH 153/173] Bump version and changelog --- CHANGES.rst | 6 ++++++ synapse/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 92c19b13e..e5e8e31a1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +Changes in synapse v0.27.1 (2018-03-19) +======================================= + +Meta release as v0.27.0 temporarily pointed to the wrong commit + + Changes in synapse v0.27.0 (2018-03-26) ======================================= diff --git a/synapse/__init__.py b/synapse/__init__.py index 70a029705..2cb6ef465 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.27.0" +__version__ = "0.27.1" From 01f72e2fc7379db7d02d5bc1eae96167aa320389 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 26 Mar 2018 16:21:26 +0100 Subject: [PATCH 154/173] Fix date --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index e5e8e31a1..ddef8c205 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,4 +1,4 @@ -Changes in synapse v0.27.1 (2018-03-19) +Changes in synapse v0.27.1 (2018-03-26) ======================================= Meta release as v0.27.0 temporarily pointed to the wrong commit From a9d7d98d3f87cc20d94c9cc4ee9285e6a2dddb2c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 26 Mar 2018 16:36:53 +0100 Subject: [PATCH 155/173] Bum version and changelog --- CHANGES.rst | 8 ++++++++ synapse/__init__.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index ddef8c205..1372de424 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,11 @@ +Changes in synapse v0.27.2 (2018-03-26) +======================================= + +Bug fixes: + +* Fix bug which broke TCP replication between workers (PR #3015) + + Changes in synapse v0.27.1 (2018-03-26) ======================================= diff --git a/synapse/__init__.py b/synapse/__init__.py index 2cb6ef465..a9d5198ab 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.27.1" +__version__ = "0.27.2" From 3f49e131d9a31d5bf198f2aa6b102562b2e747d0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Mar 2018 10:30:03 +0100 Subject: [PATCH 156/173] Add counter metrics for calculating state delta This will allow us to measure how often we calculate state deltas in event persistence that we would have been able to calculate at the same time we calculated the state for the event. --- synapse/storage/events.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 85ce6bea1..f3a362db2 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -52,6 +52,15 @@ persist_event_counter = metrics.register_counter("persisted_events") event_counter = metrics.register_counter( "persisted_events_sep", labels=["type", "origin_type", "origin_entity"] ) +state_delta_counter = metrics.register_counter( + "state_delta", +) +state_delta_single_event_counter = metrics.register_counter( + "state_delta_single_event", +) +state_delta_reuse_delta_counter = metrics.register_counter( + "state_delta_reuse_delta", +) def encode_json(json_object): @@ -368,7 +377,8 @@ class EventsStore(EventsWorkerStore): room_id, ev_ctx_rm, latest_event_ids ) - if new_latest_event_ids == set(latest_event_ids): + latest_event_ids = set(latest_event_ids) + if new_latest_event_ids == latest_event_ids: # No change in extremities, so no change in state continue @@ -389,6 +399,25 @@ class EventsStore(EventsWorkerStore): if all_single_prev_not_state: continue + state_delta_counter.inc() + if len(new_latest_event_ids) == 1: + state_delta_single_event_counter.inc() + + # This is a fairly handwavey check to see if we could + # have guessed what the delta would have been when + # processing one of these events. + # What we're interested in is if the latest extremities + # were the same when we created the event as they are + # now. We guess this by looking at the prev events and + # checking if they match up, as when this server creates + # a new event it will use the extremities as the prev + # events. + for ev, _ in ev_ctx_rm: + prev_event_ids = set(e for e, _ in ev.prev_events) + if latest_event_ids == prev_event_ids: + state_delta_reuse_delta_counter.inc() + break + logger.info( "Calculating state delta for room %s", room_id, ) From 3e0c0660b31827f9d13c1fb5153a77e70813d5d3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Mar 2018 13:01:34 +0100 Subject: [PATCH 157/173] Also do check inside linearizer --- synapse/handlers/register.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index f1bf81ed7..dd0370527 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -350,9 +350,10 @@ class RegistrationHandler(BaseHandler): def _generate_user_id(self, reseed=False): if reseed or self._next_generated_user_id is None: with (yield self._generate_user_id_linearizer.queue(())): - self._next_generated_user_id = ( - yield self.store.find_next_generated_user_id_localpart() - ) + if reseed or self._next_generated_user_id is None: + self._next_generated_user_id = ( + yield self.store.find_next_generated_user_id_localpart() + ) id = self._next_generated_user_id self._next_generated_user_id += 1 From e70287cff3adeeeff2a7a1e95fc5845feb365710 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Mar 2018 13:13:38 +0100 Subject: [PATCH 158/173] Comment --- synapse/storage/events.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index f3a362db2..ee8b3c46d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -52,12 +52,19 @@ persist_event_counter = metrics.register_counter("persisted_events") event_counter = metrics.register_counter( "persisted_events_sep", labels=["type", "origin_type", "origin_entity"] ) + +# The number of times we are recalculating the current state state_delta_counter = metrics.register_counter( "state_delta", ) +# The number of times we are recalculating state when there is only a +# single forward extremity state_delta_single_event_counter = metrics.register_counter( "state_delta_single_event", ) +# The number of times we are reculating state when we could have resonably +# calculated the delta when we calculated the state for an event we were +# persisting. state_delta_reuse_delta_counter = metrics.register_counter( "state_delta_reuse_delta", ) From 152c2ac19e6af373a9e79a8f4da80d9e77a5396f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Mar 2018 13:13:46 +0100 Subject: [PATCH 159/173] Fix indent --- synapse/storage/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ee8b3c46d..95b36d669 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -423,7 +423,7 @@ class EventsStore(EventsWorkerStore): prev_event_ids = set(e for e, _ in ev.prev_events) if latest_event_ids == prev_event_ids: state_delta_reuse_delta_counter.inc() - break + break logger.info( "Calculating state delta for room %s", room_id, From 800cfd5774d9726a00c53f2d84ce0286ab64c17b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Mar 2018 13:30:39 +0100 Subject: [PATCH 160/173] Comment --- synapse/storage/events.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 95b36d669..f3d65f433 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -415,10 +415,11 @@ class EventsStore(EventsWorkerStore): # processing one of these events. # What we're interested in is if the latest extremities # were the same when we created the event as they are - # now. We guess this by looking at the prev events and - # checking if they match up, as when this server creates - # a new event it will use the extremities as the prev - # events. + # now. When this server creates a new event (as opposed + # to receiving it over federation) it will use the + # forward extremities as the prev_events, so we can + # guess this by looking at the prev_events and checking + # if they match the current forward extremities. for ev, _ in ev_ctx_rm: prev_event_ids = set(e for e, _ in ev.prev_events) if latest_event_ids == prev_event_ids: From c2a5cf2fe32d2cd582711669b7c0ce74682e1c05 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 27 Mar 2018 17:07:31 +0100 Subject: [PATCH 161/173] factor out exception handling for keys/claim and keys/query this stuff is badly c&p'ed --- synapse/handlers/e2e_keys.py | 53 +++++++++++++++++------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 80b359b2e..41521e699 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -134,23 +135,8 @@ class E2eKeysHandler(object): if user_id in destination_query: results[user_id] = keys - except CodeMessageException as e: - failures[destination] = { - "status": e.code, "message": e.message - } - except NotRetryingDestination as e: - failures[destination] = { - "status": 503, "message": "Not ready for retry", - } - except FederationDeniedError as e: - failures[destination] = { - "status": 403, "message": "Federation Denied", - } except Exception as e: - # include ConnectionRefused and other errors - failures[destination] = { - "status": 503, "message": e.message - } + failures[destination] = _exception_to_failure(e) yield make_deferred_yieldable(defer.gatherResults([ preserve_fn(do_remote_query)(destination) @@ -252,19 +238,8 @@ class E2eKeysHandler(object): for user_id, keys in remote_result["one_time_keys"].items(): if user_id in device_keys: json_result[user_id] = keys - except CodeMessageException as e: - failures[destination] = { - "status": e.code, "message": e.message - } - except NotRetryingDestination as e: - failures[destination] = { - "status": 503, "message": "Not ready for retry", - } except Exception as e: - # include ConnectionRefused and other errors - failures[destination] = { - "status": 503, "message": e.message - } + failures[destination] = _exception_to_failure(e) yield make_deferred_yieldable(defer.gatherResults([ preserve_fn(claim_client_keys)(destination) @@ -362,6 +337,28 @@ class E2eKeysHandler(object): ) +def _exception_to_failure(e): + if isinstance(e, CodeMessageException): + return { + "status": e.code, "message": e.message, + } + + if isinstance(e, NotRetryingDestination): + return { + "status": 503, "message": "Not ready for retry", + } + + if isinstance(e, FederationDeniedError): + return { + "status": 403, "message": "Federation Denied", + } + + # include ConnectionRefused and other errors + return { + "status": 503, "message": e.message, + } + + def _one_time_keys_match(old_key_json, new_key): old_key = json.loads(old_key_json) From a134c572a6697fa6443525493e3fc13f74452d34 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 27 Mar 2018 17:15:06 +0100 Subject: [PATCH 162/173] Stringify exceptions for keys/{query,claim} Make sure we stringify any exceptions we return from keys/query and keys/claim, to avoid a 'not JSON serializable' error later Fixes #3010 --- synapse/handlers/e2e_keys.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 41521e699..325c0c4a9 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -354,8 +354,11 @@ def _exception_to_failure(e): } # include ConnectionRefused and other errors + # + # Note that some Exceptions (notably twisted's ResponseFailed etc) don't + # give a string for e.message, which simplejson then fails to serialize. return { - "status": 503, "message": e.message, + "status": 503, "message": str(e.message), } From ef520d8d0e152a24fb6660fdd2def214b6e9caae Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Tue, 27 Mar 2018 14:12:22 +0100 Subject: [PATCH 163/173] Include coarse CPU and Memory use in stats callbacks. This requires the psutil module, and is still opt-in based on the report_stats config option. --- UPGRADE.rst | 12 ++++++++++++ synapse/app/homeserver.py | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/UPGRADE.rst b/UPGRADE.rst index 2efe7ea60..f6bb1070b 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -48,6 +48,18 @@ returned by the Client-Server API: # configured on port 443. curl -kv https:///_matrix/client/versions 2>&1 | grep "Server:" +Upgrading to $NEXT_VERSION +==================== + +This release expands the anonymous usage stats sent if the opt-in +``report_stats`` configuration is set to ``true``. We now capture RSS memory +and cpu use at a very coarse level. This requires administrators to install +the optional ``psutil`` python module. + +We would appreciate it if you could assist by ensuring this module is available +and ``report_stats`` is enabled. This will let us see if performance changes to +synapse are having an impact to the general community. + Upgrading to v0.15.0 ==================== diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index c00afbba2..313be42de 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -401,6 +401,7 @@ def run(hs): start_time = clock.time() stats = {} + stats_process = None @defer.inlineCallbacks def phone_stats_home(): @@ -427,6 +428,10 @@ def run(hs): daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages + if stats_process is not None: + with stats_process.oneshot(): + stats["memory_rss"] = stats_process.memory_info().rss + stats["cpu_average"] = int(stats_process.cpu_info(interval=None)) logger.info("Reporting stats to matrix.org: %s" % (stats,)) try: @@ -438,6 +443,21 @@ def run(hs): logger.warn("Error reporting stats: %s", e) if hs.config.report_stats: + try: + import psutil + stats_process = psutil.Process() + # Ensure we can fetch both, and make the initial request for cpu_percent + # so the next request will use this as the initial point. + stats_process.memory_info().rss + stats_process.cpu_percent(interval=None) + except (ImportError, AttributeError): + logger.warn( + "report_stats enabled but psutil is not installed or incorrect version." + " Disabling reporting of memory/cpu stats." + " Ensuring psutil is available will help matrix track performance changes across releases." + ) + stats_process = None + logger.info("Scheduling stats reporting for 3 hour intervals") clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000) From a9cb1a35c85f62bb0114dabd62d118c80d66e415 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 10:57:27 +0100 Subject: [PATCH 164/173] fix tests/storage/test_user_directory.py --- tests/storage/test_user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 0891308f2..88add4521 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -62,7 +62,7 @@ class UserDirectoryStoreTestCase(unittest.TestCase): self.assertFalse(r["limited"]) self.assertEqual(1, len(r["results"])) self.assertDictEqual(r["results"][0], { - "user_id": BOB, + "d.user_id": BOB, "display_name": "bob", "avatar_url": None, }) From 01ccc9e6f25a87d7906d7907afd9e8527228215b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 28 Mar 2018 11:03:52 +0100 Subject: [PATCH 165/173] Measure time it takes to calculate state group ID --- synapse/state.py | 51 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index a7f20350f..26093c843 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -483,33 +483,34 @@ class StateResolutionHandler(object): key: e_ids.pop() for key, e_ids in state.iteritems() } - # if the new state matches any of the input state groups, we can - # use that state group again. Otherwise we will generate a state_id - # which will be used as a cache key for future resolutions, but - # not get persisted. - state_group = None - new_state_event_ids = frozenset(new_state.itervalues()) - for sg, events in state_groups_ids.iteritems(): - if new_state_event_ids == frozenset(e_id for e_id in events): - state_group = sg - break + with Measure(self.clock, "state.create_group_ids"): + # if the new state matches any of the input state groups, we can + # use that state group again. Otherwise we will generate a state_id + # which will be used as a cache key for future resolutions, but + # not get persisted. + state_group = None + new_state_event_ids = frozenset(new_state.itervalues()) + for sg, events in state_groups_ids.iteritems(): + if new_state_event_ids == frozenset(e_id for e_id in events): + state_group = sg + break - # TODO: We want to create a state group for this set of events, to - # increase cache hits, but we need to make sure that it doesn't - # end up as a prev_group without being added to the database + # TODO: We want to create a state group for this set of events, to + # increase cache hits, but we need to make sure that it doesn't + # end up as a prev_group without being added to the database - prev_group = None - delta_ids = None - for old_group, old_ids in state_groups_ids.iteritems(): - if not set(new_state) - set(old_ids): - n_delta_ids = { - k: v - for k, v in new_state.iteritems() - if old_ids.get(k) != v - } - if not delta_ids or len(n_delta_ids) < len(delta_ids): - prev_group = old_group - delta_ids = n_delta_ids + prev_group = None + delta_ids = None + for old_group, old_ids in state_groups_ids.iteritems(): + if not set(new_state) - set(old_ids): + n_delta_ids = { + k: v + for k, v in new_state.iteritems() + if old_ids.get(k) != v + } + if not delta_ids or len(n_delta_ids) < len(delta_ids): + prev_group = old_group + delta_ids = n_delta_ids cache = _StateCacheEntry( state=new_state, From 545001b9e4b1d6710145d3efe2117fbdf823fb38 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 11:19:45 +0100 Subject: [PATCH 166/173] Fix search_user_dir multiple sqlite versions do different things --- synapse/storage/user_directory.py | 4 ++-- tests/storage/test_user_directory.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index dfdcbb318..d6e289ffb 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -667,7 +667,7 @@ class UserDirectoryStore(SQLBaseStore): # The array of numbers are the weights for the various part of the # search: (domain, _, display name, localpart) sql = """ - SELECT d.user_id, display_name, avatar_url + SELECT d.user_id AS user_id, display_name, avatar_url FROM user_directory_search INNER JOIN user_directory AS d USING (user_id) %s @@ -702,7 +702,7 @@ class UserDirectoryStore(SQLBaseStore): search_query = _parse_query_sqlite(search_term) sql = """ - SELECT d.user_id, display_name, avatar_url + SELECT d.user_id AS user_id, display_name, avatar_url FROM user_directory_search INNER JOIN user_directory AS d USING (user_id) %s diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 88add4521..0891308f2 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -62,7 +62,7 @@ class UserDirectoryStoreTestCase(unittest.TestCase): self.assertFalse(r["limited"]) self.assertEqual(1, len(r["results"])) self.assertDictEqual(r["results"][0], { - "d.user_id": BOB, + "user_id": BOB, "display_name": "bob", "avatar_url": None, }) From 4ceaa7433a324afab23c4a445cabe3da965e5846 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Wed, 28 Mar 2018 12:08:09 +0100 Subject: [PATCH 167/173] As daemonizing will make a new process, defer call to init. --- synapse/app/homeserver.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 313be42de..0737945ed 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -401,7 +401,7 @@ def run(hs): start_time = clock.time() stats = {} - stats_process = None + stats_process = [] @defer.inlineCallbacks def phone_stats_home(): @@ -428,10 +428,13 @@ def run(hs): daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages - if stats_process is not None: - with stats_process.oneshot(): - stats["memory_rss"] = stats_process.memory_info().rss - stats["cpu_average"] = int(stats_process.cpu_info(interval=None)) + if len(stats_process) > 0: + stats["memory_rss"] = 0 + stats["cpu_average"] = 0 + for process in stats_process: + with process.oneshot(): + stats["memory_rss"] += process.memory_info().rss + stats["cpu_average"] += int(process.cpu_percent(interval=None)) logger.info("Reporting stats to matrix.org: %s" % (stats,)) try: @@ -442,25 +445,32 @@ def run(hs): except Exception as e: logger.warn("Error reporting stats: %s", e) - if hs.config.report_stats: + def performance_stats_init(): try: import psutil - stats_process = psutil.Process() + process = psutil.Process() # Ensure we can fetch both, and make the initial request for cpu_percent # so the next request will use this as the initial point. - stats_process.memory_info().rss - stats_process.cpu_percent(interval=None) + process.memory_info().rss + process.cpu_percent(interval=None) + logger.info("report_stats can use psutil") + stats_process.append(process) except (ImportError, AttributeError): logger.warn( - "report_stats enabled but psutil is not installed or incorrect version." - " Disabling reporting of memory/cpu stats." - " Ensuring psutil is available will help matrix track performance changes across releases." + "report_stats enabled but psutil is not installed or incorrect version." + " Disabling reporting of memory/cpu stats." + " Ensuring psutil is available will help matrix track performance changes" + " across releases." ) - stats_process = None + if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000) + # We need to defer this init for the cases that we daemonize + # otherwise the process ID we get is that of the non-daemon process + clock.call_later(15, performance_stats_init) + # We wait 5 minutes to send the first set of stats as the server can # be quite busy the first few minutes clock.call_later(5 * 60, phone_stats_home) From 33f6195d9ae91520aee9d108d60245b5265ac714 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Wed, 28 Mar 2018 14:25:25 +0100 Subject: [PATCH 168/173] Handle review comments --- synapse/app/homeserver.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 0737945ed..b935beb97 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -401,6 +401,9 @@ def run(hs): start_time = clock.time() stats = {} + + # Contains the list of processes we will be monitoring + # currently either 0 or 1 stats_process = [] @defer.inlineCallbacks @@ -428,13 +431,13 @@ def run(hs): daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages + if len(stats_process) > 0: stats["memory_rss"] = 0 stats["cpu_average"] = 0 for process in stats_process: - with process.oneshot(): - stats["memory_rss"] += process.memory_info().rss - stats["cpu_average"] += int(process.cpu_percent(interval=None)) + stats["memory_rss"] += process.memory_info().rss + stats["cpu_average"] += int(process.cpu_percent(interval=None)) logger.info("Reporting stats to matrix.org: %s" % (stats,)) try: @@ -459,8 +462,8 @@ def run(hs): logger.warn( "report_stats enabled but psutil is not installed or incorrect version." " Disabling reporting of memory/cpu stats." - " Ensuring psutil is available will help matrix track performance changes" - " across releases." + " Ensuring psutil is available will help matrix.org track performance" + " changes across releases." ) if hs.config.report_stats: @@ -469,7 +472,7 @@ def run(hs): # We need to defer this init for the cases that we daemonize # otherwise the process ID we get is that of the non-daemon process - clock.call_later(15, performance_stats_init) + clock.call_later(0, performance_stats_init) # We wait 5 minutes to send the first set of stats as the server can # be quite busy the first few minutes From fcfe7f6ad3a2a9c285ac96008395fc47e096ff4b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 29 Mar 2018 22:45:52 +0100 Subject: [PATCH 169/173] Use simplejson throughout Let's use simplejson rather than json, for consistency. --- synapse/api/errors.py | 3 ++- synapse/handlers/identity.py | 8 +++++--- synapse/storage/schema/delta/14/upgrade_appservice_db.py | 3 ++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index aa15f73f3..bee59e80d 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -15,9 +15,10 @@ """Contains exceptions and error codes.""" -import json import logging +import simplejson as json + logger = logging.getLogger(__name__) diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 9efcdff1d..91a089886 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -15,6 +15,11 @@ # limitations under the License. """Utilities for interacting with Identity Servers""" + +import logging + +import simplejson as json + from twisted.internet import defer from synapse.api.errors import ( @@ -24,9 +29,6 @@ from ._base import BaseHandler from synapse.util.async import run_on_reactor from synapse.api.errors import SynapseError, Codes -import json -import logging - logger = logging.getLogger(__name__) diff --git a/synapse/storage/schema/delta/14/upgrade_appservice_db.py b/synapse/storage/schema/delta/14/upgrade_appservice_db.py index 8755bb2e4..4d725b92f 100644 --- a/synapse/storage/schema/delta/14/upgrade_appservice_db.py +++ b/synapse/storage/schema/delta/14/upgrade_appservice_db.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import logging +import simplejson as json + logger = logging.getLogger(__name__) From 05630758f25d958bf60fde4df5f80a89e4a9a0ac Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 29 Mar 2018 22:57:28 +0100 Subject: [PATCH 170/173] Use static JSONEncoders using json.dumps with custom options requires us to create a new JSONEncoder on each call. It's more efficient to create one upfront and reuse it. --- synapse/handlers/message.py | 4 ++-- synapse/replication/tcp/commands.py | 8 +++++--- synapse/storage/events.py | 23 ++++++++--------------- synapse/util/frozenutils.py | 19 +++++++++++++++++++ 4 files changed, 34 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5a8ddc253..6de6e13b7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -27,7 +27,7 @@ from synapse.types import ( from synapse.util.async import run_on_reactor, ReadWriteLock, Limiter from synapse.util.logcontext import preserve_fn, run_in_background from synapse.util.metrics import measure_func -from synapse.util.frozenutils import unfreeze +from synapse.util.frozenutils import frozendict_json_encoder from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client from synapse.replication.http.send_event import send_event_to_master @@ -678,7 +678,7 @@ class EventCreationHandler(object): # Ensure that we can round trip before trying to persist in db try: - dump = simplejson.dumps(unfreeze(event.content)) + dump = frozendict_json_encoder.encode(event.content) simplejson.loads(dump) except Exception: logger.exception("Failed to encode content: %r", event.content) diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 0005ad587..34bcf903a 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -24,6 +24,8 @@ import simplejson logger = logging.getLogger(__name__) +_json_encoder = simplejson.JSONEncoder(namedtuple_as_object=False) + class Command(object): """The base command class. @@ -107,7 +109,7 @@ class RdataCommand(Command): return " ".join(( self.stream_name, str(self.token) if self.token is not None else "batch", - simplejson.dumps(self.row, namedtuple_as_object=False), + _json_encoder.dumps(self.row), )) @@ -302,7 +304,7 @@ class InvalidateCacheCommand(Command): def to_line(self): return " ".join(( - self.cache_func, simplejson.dumps(self.keys, namedtuple_as_object=False) + self.cache_func, _json_encoder.encode(self.keys), )) @@ -334,7 +336,7 @@ class UserIpCommand(Command): ) def to_line(self): - return self.user_id + " " + simplejson.dumps(( + return self.user_id + " " + _json_encoder.encode(( self.access_token, self.ip, self.user_agent, self.device_id, self.last_seen, )) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index f3d65f433..ece5e6c41 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -14,15 +14,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.storage.events_worker import EventsWorkerStore +from collections import OrderedDict, deque, namedtuple +from functools import wraps +import logging +import simplejson as json from twisted.internet import defer -from synapse.events import USE_FROZEN_DICTS +from synapse.storage.events_worker import EventsWorkerStore from synapse.util.async import ObservableDeferred +from synapse.util.frozenutils import frozendict_json_encoder from synapse.util.logcontext import ( - PreserveLoggingContext, make_deferred_yieldable + PreserveLoggingContext, make_deferred_yieldable, ) from synapse.util.logutils import log_function from synapse.util.metrics import Measure @@ -30,16 +34,8 @@ from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.types import get_domain_from_id - -from canonicaljson import encode_canonical_json -from collections import deque, namedtuple, OrderedDict -from functools import wraps - import synapse.metrics -import logging -import simplejson as json - # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 @@ -71,10 +67,7 @@ state_delta_reuse_delta_counter = metrics.register_counter( def encode_json(json_object): - if USE_FROZEN_DICTS: - return encode_canonical_json(json_object) - else: - return json.dumps(json_object, ensure_ascii=False) + return frozendict_json_encoder.encode(json_object) class _EventPeristenceQueue(object): diff --git a/synapse/util/frozenutils.py b/synapse/util/frozenutils.py index 6322f0f55..f497b51f4 100644 --- a/synapse/util/frozenutils.py +++ b/synapse/util/frozenutils.py @@ -14,6 +14,7 @@ # limitations under the License. from frozendict import frozendict +import simplejson as json def freeze(o): @@ -49,3 +50,21 @@ def unfreeze(o): pass return o + + +def _handle_frozendict(obj): + """Helper for EventEncoder. Makes frozendicts serializable by returning + the underlying dict + """ + if type(obj) is frozendict: + # fishing the protected dict out of the object is a bit nasty, + # but we don't really want the overhead of copying the dict. + return obj._dict + raise TypeError('Object of type %s is not JSON serializable' % + obj.__class__.__name__) + + +# A JSONEncoder which is capable of encoding frozendics without barfing +frozendict_json_encoder = json.JSONEncoder( + default=_handle_frozendict, +) From 11597ddea5c43fdd2c6593b6bf4619a7bbdf3122 Mon Sep 17 00:00:00 2001 From: Adrian Tschira Date: Fri, 30 Mar 2018 23:59:02 +0200 Subject: [PATCH 171/173] improve mxid check performance ~4x Signed-off-by: Adrian Tschira --- synapse/types.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/types.py b/synapse/types.py index 7cb24cecb..f1f41ccf9 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -12,11 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import string from synapse.api.errors import SynapseError from collections import namedtuple +import re class Requester(namedtuple("Requester", [ @@ -214,7 +214,8 @@ class GroupID(DomainSpecificString): return group_id -mxid_localpart_allowed_characters = set("_-./=" + string.ascii_lowercase + string.digits) +# A regex that matches any valid mxid characters +MXID_LOCALPART_REGEX = re.compile("^[_\-./=a-z0-9]*$") def contains_invalid_mxid_characters(localpart): @@ -226,7 +227,7 @@ def contains_invalid_mxid_characters(localpart): Returns: bool: True if there are any naughty characters """ - return any(c not in mxid_localpart_allowed_characters for c in localpart) + return not MXID_LOCALPART_REGEX.match(localpart) class StreamToken( From 3ee4ad09eb9bcd0214da83b66214afa3ddb08116 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 3 Apr 2018 15:09:48 +0100 Subject: [PATCH 172/173] Fix json encoding bug in replication json encoders have an encode method, not a dumps method. --- synapse/replication/tcp/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 34bcf903a..12aac3cc6 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -109,7 +109,7 @@ class RdataCommand(Command): return " ".join(( self.stream_name, str(self.token) if self.token is not None else "batch", - _json_encoder.dumps(self.row), + _json_encoder.encode(self.row), )) From f92963f5db236c1afb2a489a44c9afdae7d61edc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 4 Apr 2018 12:08:29 +0100 Subject: [PATCH 173/173] Revert "improve mxid check performance" --- synapse/types.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/synapse/types.py b/synapse/types.py index f1f41ccf9..7cb24cecb 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -12,11 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import string from synapse.api.errors import SynapseError from collections import namedtuple -import re class Requester(namedtuple("Requester", [ @@ -214,8 +214,7 @@ class GroupID(DomainSpecificString): return group_id -# A regex that matches any valid mxid characters -MXID_LOCALPART_REGEX = re.compile("^[_\-./=a-z0-9]*$") +mxid_localpart_allowed_characters = set("_-./=" + string.ascii_lowercase + string.digits) def contains_invalid_mxid_characters(localpart): @@ -227,7 +226,7 @@ def contains_invalid_mxid_characters(localpart): Returns: bool: True if there are any naughty characters """ - return not MXID_LOCALPART_REGEX.match(localpart) + return any(c not in mxid_localpart_allowed_characters for c in localpart) class StreamToken(