From 83d8d4d8cd699d9596e25c6167dad130f3df9ac3 Mon Sep 17 00:00:00 2001 From: Oliver Kurz Date: Mon, 20 Nov 2017 11:11:53 +0100 Subject: [PATCH 001/200] Allow use of higher versions of saml2 The package was pinned to <4.0 with 07cf96eb because "from saml2 import config" did not work. This seems to have been fixed in the mean time in the saml2 package and therefore should not stop to use a more recent version. Signed-off-by: Oliver Kurz --- synapse/python_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 7052333c1..97b631e60 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -36,7 +36,7 @@ REQUIREMENTS = { "pydenticon": ["pydenticon"], "ujson": ["ujson"], "blist": ["blist"], - "pysaml2>=3.0.0,<4.0.0": ["saml2>=3.0.0,<4.0.0"], + "pysaml2>=3.0.0": ["saml2>=3.0.0"], "pymacaroons-pynacl": ["pymacaroons"], "msgpack-python>=0.3.0": ["msgpack"], "phonenumbers>=8.2.0": ["phonenumbers"], From 19f9227643b5099666878de33453bbe361f216fc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:25:04 +0000 Subject: [PATCH 002/200] avoid 80s GIN inserts by tweaking work_mem see https://github.com/matrix-org/synapse/issues/2753 for details --- synapse/storage/search.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 479b04c63..7b1166f41 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -106,6 +106,7 @@ class SearchStore(BackgroundUpdateStore): event_search_rows.append((event_id, room_id, key, value)) if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256KB'") sql = ( "INSERT INTO event_search (event_id, room_id, key, vector)" " VALUES (?,?,?,to_tsvector('english', ?))" @@ -123,6 +124,9 @@ class SearchStore(BackgroundUpdateStore): clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] txn.executemany(sql, clump) + if isinstance(self.database_engine, PostgresEngine): + txn.execute("RESET work_mem") + progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, From e365ad329f3c7e12bb2126217acbc62bdf0b9aec Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:30:30 +0000 Subject: [PATCH 003/200] oops, tweak work_mem when actually storing --- synapse/storage/room.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b..9e2bf1ab4 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -310,6 +310,7 @@ class RoomStore(SQLBaseStore): def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256KB'") sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" @@ -323,6 +324,7 @@ class RoomStore(SQLBaseStore): event.origin_server_ts, ) ) + txn.execute("RESET work_mem") elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" From e79db0a673ef79bfa30e435bf64b5c3b75ed98d9 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:37:48 +0000 Subject: [PATCH 004/200] switch back from GIST to GIN indexes --- synapse/storage/search.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 479b04c63..ba7141563 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -31,7 +31,7 @@ class SearchStore(BackgroundUpdateStore): EVENT_SEARCH_UPDATE_NAME = "event_search" EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" - EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist" + EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin" def __init__(self, db_conn, hs): super(SearchStore, self).__init__(db_conn, hs) @@ -43,8 +43,8 @@ class SearchStore(BackgroundUpdateStore): self._background_reindex_search_order ) self.register_background_update_handler( - self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME, - self._background_reindex_gist_search + self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME, + self._background_reindex_gin_search ) @defer.inlineCallbacks @@ -145,25 +145,30 @@ class SearchStore(BackgroundUpdateStore): defer.returnValue(result) @defer.inlineCallbacks - def _background_reindex_gist_search(self, progress, batch_size): + def _background_reindex_gin_search(self, progress, batch_size): + '''This handles old synapses which used GIST indexes; converting them + back to be GIN as per the actual schema. Otherwise it crashes out + as a NOOP + ''' + def create_index(conn): conn.rollback() conn.set_session(autocommit=True) c = conn.cursor() c.execute( - "CREATE INDEX CONCURRENTLY event_search_fts_idx_gist" - " ON event_search USING GIST (vector)" + "CREATE INDEX CONCURRENTLY event_search_fts_idx" + " ON event_search USING GIN (vector)" ) - c.execute("DROP INDEX event_search_fts_idx") + c.execute("DROP INDEX event_search_fts_idx_gist") conn.set_session(autocommit=False) if isinstance(self.database_engine, PostgresEngine): yield self.runWithConnection(create_index) - yield self._end_background_update(self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME) + yield self._end_background_update(self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME) defer.returnValue(1) @defer.inlineCallbacks From a66f489678dc05fa89e6849405c37a9a390e62fc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:55:51 +0000 Subject: [PATCH 005/200] fix GIST->GIN switch --- .../schema/delta/38/postgres_fts_gist.sql | 6 ++-- .../schema/delta/46/postgres_fts_gin.sql | 17 +++++++++++ synapse/storage/search.py | 28 +++++++++++-------- 3 files changed, 37 insertions(+), 14 deletions(-) create mode 100644 synapse/storage/schema/delta/46/postgres_fts_gin.sql diff --git a/synapse/storage/schema/delta/38/postgres_fts_gist.sql b/synapse/storage/schema/delta/38/postgres_fts_gist.sql index f090a7b75..5fe27d687 100644 --- a/synapse/storage/schema/delta/38/postgres_fts_gist.sql +++ b/synapse/storage/schema/delta/38/postgres_fts_gist.sql @@ -13,5 +13,7 @@ * limitations under the License. */ - INSERT into background_updates (update_name, progress_json) - VALUES ('event_search_postgres_gist', '{}'); +-- We no longer do this given we back it out again in schema 46 + +-- INSERT into background_updates (update_name, progress_json) +-- VALUES ('event_search_postgres_gist', '{}'); diff --git a/synapse/storage/schema/delta/46/postgres_fts_gin.sql b/synapse/storage/schema/delta/46/postgres_fts_gin.sql new file mode 100644 index 000000000..31d7a817e --- /dev/null +++ b/synapse/storage/schema/delta/46/postgres_fts_gin.sql @@ -0,0 +1,17 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('event_search_postgres_gin', '{}'); diff --git a/synapse/storage/search.py b/synapse/storage/search.py index ba7141563..d3e76b58d 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -146,24 +146,28 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_gin_search(self, progress, batch_size): - '''This handles old synapses which used GIST indexes; converting them - back to be GIN as per the actual schema. Otherwise it crashes out - as a NOOP + '''This handles old synapses which used GIST indexes, if any; + converting them back to be GIN as per the actual schema. ''' def create_index(conn): - conn.rollback() - conn.set_session(autocommit=True) - c = conn.cursor() + try: + conn.rollback() + conn.set_session(autocommit=True) + c = conn.cursor() - c.execute( - "CREATE INDEX CONCURRENTLY event_search_fts_idx" - " ON event_search USING GIN (vector)" - ) + c.execute( + "CREATE INDEX CONCURRENTLY event_search_fts_idx" + " ON event_search USING GIN (vector)" + ) - c.execute("DROP INDEX event_search_fts_idx_gist") + c.execute("DROP INDEX event_search_fts_idx_gist") - conn.set_session(autocommit=False) + conn.set_session(autocommit=False) + except e: + logger.warn( + "Ignoring error %s when trying to switch from GIST to GIN" % (e,) + ) if isinstance(self.database_engine, PostgresEngine): yield self.runWithConnection(create_index) From 174eacc8ba71015003a78594ebc89cbe45d8384a Mon Sep 17 00:00:00 2001 From: hera Date: Tue, 9 Jan 2018 18:06:30 +0000 Subject: [PATCH 006/200] oops --- synapse/storage/room.py | 2 +- synapse/storage/search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9e2bf1ab4..0604f8f27 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -310,7 +310,7 @@ class RoomStore(SQLBaseStore): def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256KB'") + txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 7b1166f41..f52f3c859 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -106,7 +106,7 @@ class SearchStore(BackgroundUpdateStore): event_search_rows.append((event_id, room_id, key, value)) if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256KB'") + txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search (event_id, room_id, key, vector)" " VALUES (?,?,?,to_tsvector('english', ?))" From 4a53f3a3e8fb7fe9df052790858ca3f7dbff78f9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 10:52:32 +0000 Subject: [PATCH 007/200] Ensure media is in local cache before thumbnailing --- synapse/rest/media/v1/media_repository.py | 20 ++++++++------- synapse/rest/media/v1/media_storage.py | 27 +++++++++++++++++++++ synapse/rest/media/v1/thumbnail_resource.py | 3 ++- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 97c82c150..578d7f07c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -445,8 +445,10 @@ class MediaRepository(object): @defer.inlineCallbacks def generate_local_exact_thumbnail(self, media_id, t_width, t_height, - t_method, t_type): - input_path = self.filepaths.local_media_filepath(media_id) + t_method, t_type, url_cache): + input_path = yield self.media_storage.ensure_media_is_in_local_cache(FileInfo( + None, media_id, url_cache=url_cache, + )) thumbnailer = Thumbnailer(input_path) t_byte_source = yield make_deferred_yieldable(threads.deferToThread( @@ -459,6 +461,7 @@ class MediaRepository(object): file_info = FileInfo( server_name=None, file_id=media_id, + url_cache=url_cache, thumbnail=True, thumbnail_width=t_width, thumbnail_height=t_height, @@ -485,7 +488,9 @@ class MediaRepository(object): @defer.inlineCallbacks def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, t_width, t_height, t_method, t_type): - input_path = self.filepaths.remote_media_filepath(server_name, file_id) + input_path = yield self.media_storage.ensure_media_is_in_local_cache(FileInfo( + server_name, file_id, url_cache=False, + )) thumbnailer = Thumbnailer(input_path) t_byte_source = yield make_deferred_yieldable(threads.deferToThread( @@ -543,12 +548,9 @@ class MediaRepository(object): if not requirements: return - if server_name: - input_path = self.filepaths.remote_media_filepath(server_name, file_id) - elif url_cache: - input_path = self.filepaths.url_cache_filepath(media_id) - else: - input_path = self.filepaths.local_media_filepath(media_id) + input_path = yield self.media_storage.ensure_media_is_in_local_cache(FileInfo( + server_name, file_id, url_cache=url_cache, + )) thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 001e84578..d3f54594a 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -15,6 +15,7 @@ from twisted.internet import defer, threads from twisted.protocols.basic import FileSender +from twisted.protocols.ftp import FileConsumer # This isn't FTP specific from ._base import Responder @@ -151,6 +152,32 @@ class MediaStorage(object): defer.returnValue(None) + @defer.inlineCallbacks + def ensure_media_is_in_local_cache(self, file_info): + """Ensures that the given file is in the local cache. Attempts to + download it from storage providers if it isn't. + + Args: + file_info (FileInfo) + + Returns: + Deferred[str]: Full path to local file + """ + path = self._file_info_to_path(file_info) + local_path = os.path.join(self.local_media_directory, path) + if os.path.exists(local_path): + defer.returnValue(local_path) + + for provider in self.storage_providers: + res = yield provider.fetch(path, file_info) + if res: + with res: + with open(local_path, "w") as f: + res.write_to_consumer(FileConsumer(f)) + defer.returnValue(local_path) + + raise Exception("file could not be found") + def _file_info_to_path(self, file_info): """Converts file_info into a relative path. diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 8c9653843..49e4af514 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -162,7 +162,8 @@ class ThumbnailResource(Resource): # Okay, so we generate one. file_path = yield self.media_repo.generate_local_exact_thumbnail( - media_id, desired_width, desired_height, desired_method, desired_type + media_id, desired_width, desired_height, desired_method, desired_type, + url_cache=media_info["url_cache"], ) if file_path: From 2cf6a7bc2068350d0701b156deb2bb3a6f0da88a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 16:56:23 +0000 Subject: [PATCH 008/200] Use better file consumer --- synapse/rest/media/v1/media_storage.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index d3f54594a..5c3e4e5a6 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -15,10 +15,10 @@ from twisted.internet import defer, threads from twisted.protocols.basic import FileSender -from twisted.protocols.ftp import FileConsumer # This isn't FTP specific from ._base import Responder +from synapse.util.file_consumer import BackgroundFileConsumer from synapse.util.logcontext import make_deferred_yieldable import contextlib @@ -27,6 +27,7 @@ import logging import shutil import sys + logger = logging.getLogger(__name__) @@ -168,12 +169,17 @@ class MediaStorage(object): if os.path.exists(local_path): defer.returnValue(local_path) + dirname = os.path.dirname(local_path) + if not os.path.exists(dirname): + os.makedirs(dirname) + for provider in self.storage_providers: res = yield provider.fetch(path, file_info) if res: with res: - with open(local_path, "w") as f: - res.write_to_consumer(FileConsumer(f)) + consumer = BackgroundFileConsumer(open(local_path, "w")) + yield res.write_to_consumer(consumer) + yield consumer.wait() defer.returnValue(local_path) raise Exception("file could not be found") @@ -247,9 +253,8 @@ class FileResponder(Responder): def __init__(self, open_file): self.open_file = open_file - @defer.inlineCallbacks def write_to_consumer(self, consumer): - yield FileSender().beginFileTransfer(self.open_file, consumer) + return FileSender().beginFileTransfer(self.open_file, consumer) def __exit__(self, exc_type, exc_val, exc_tb): self.open_file.close() From ce4f66133e6d426dfd3323cef164703b2b79f6b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 16:56:35 +0000 Subject: [PATCH 009/200] Add unit tests --- tests/rest/media/__init__.py | 14 ++++ tests/rest/media/v1/__init__.py | 14 ++++ tests/rest/media/v1/test_media_storage.py | 81 +++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 tests/rest/media/__init__.py create mode 100644 tests/rest/media/v1/__init__.py create mode 100644 tests/rest/media/v1/test_media_storage.py diff --git a/tests/rest/media/__init__.py b/tests/rest/media/__init__.py new file mode 100644 index 000000000..a354d38ca --- /dev/null +++ b/tests/rest/media/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/rest/media/v1/__init__.py b/tests/rest/media/v1/__init__.py new file mode 100644 index 000000000..a354d38ca --- /dev/null +++ b/tests/rest/media/v1/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py new file mode 100644 index 000000000..c4de18157 --- /dev/null +++ b/tests/rest/media/v1/test_media_storage.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from twisted.internet import defer + +from synapse.rest.media.v1._base import FileInfo +from synapse.rest.media.v1.media_storage import MediaStorage +from synapse.rest.media.v1.filepath import MediaFilePaths +from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend + +from tests import unittest + +import os +import shutil +import tempfile + + +class MediaStorageTests(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-") + + self.primary_base_path = os.path.join(self.test_dir, "primary") + self.secondary_base_path = os.path.join(self.test_dir, "secondary") + + storage_providers = [FileStorageProviderBackend( + self.primary_base_path, self.secondary_base_path + )] + + self.filepaths = MediaFilePaths(self.primary_base_path) + self.media_storage = MediaStorage( + self.primary_base_path, self.filepaths, storage_providers, + ) + + def tearDown(self): + shutil.rmtree(self.test_dir) + + @defer.inlineCallbacks + def test_ensure_media_is_in_local_cache(self): + media_id = "some_media_id" + test_body = "Test\n" + + # First we create a file that is in a storage provider but not in the + # local primary media store + rel_path = self.filepaths.local_media_filepath_rel(media_id) + secondary_path = os.path.join(self.secondary_base_path, rel_path) + + os.makedirs(os.path.dirname(secondary_path)) + + with open(secondary_path, "w") as f: + f.write(test_body) + + # Now we run ensure_media_is_in_local_cache, which should copy the file + # to the local cache. + file_info = FileInfo(None, media_id) + local_path = yield self.media_storage.ensure_media_is_in_local_cache(file_info) + + self.assertTrue(os.path.exists(local_path)) + + # Asserts the file is under the expected local cache directory + self.assertEquals( + os.path.commonprefix([self.primary_base_path, local_path]), + self.primary_base_path, + ) + + with open(local_path) as f: + body = f.read() + + self.assertEqual(test_body, body) From 87b7d727605c8e122adb768b7487dfcae830593f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 23:51:04 +0000 Subject: [PATCH 010/200] Add some comments about the reactor tick time metric --- synapse/metrics/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 2265e6e8d..e0cfb7d08 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -146,10 +146,15 @@ def runUntilCurrentTimer(func): num_pending += 1 num_pending += len(reactor.threadCallQueue) - start = time.time() * 1000 ret = func(*args, **kwargs) end = time.time() * 1000 + + # record the amount of wallclock time spent running pending calls. + # This is a proxy for the actual amount of time between reactor polls, + # since about 25% of time is actually spent running things triggered by + # I/O events, but that is harder to capture without rewriting half the + # reactor. tick_time.inc_by(end - start) pending_calls_metric.inc_by(num_pending) From 2c8526cac795fa2aa795e1a1aaae2ffb2558824d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 20 Jan 2018 00:55:44 +0000 Subject: [PATCH 011/200] Use a connection pool for the SimpleHttpClient In particular I hope this will help the pusher, which makes many requests to sygnal, and is currently negotiating SSL for each one. --- synapse/http/client.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index 4abb479ae..930d71301 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -30,6 +30,7 @@ from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web.client import ( BrowserLikeRedirectAgent, ContentDecoderAgent, GzipDecoder, Agent, readBody, PartialDownloadError, + HTTPConnectionPool, ) from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer from twisted.web.http import PotentialDataLoss @@ -64,13 +65,19 @@ class SimpleHttpClient(object): """ def __init__(self, hs): self.hs = hs + + pool = HTTPConnectionPool(reactor) + pool.maxPersistentPerHost = 5 + pool.cachedConnectionTimeout = 2 * 60 + # The default context factory in Twisted 14.0.0 (which we require) is # BrowserLikePolicyForHTTPS which will do regular cert validation # 'like a browser' self.agent = Agent( reactor, connectTimeout=15, - contextFactory=hs.get_http_client_context_factory() + contextFactory=hs.get_http_client_context_factory(), + pool=pool, ) self.user_agent = hs.version_string self.clock = hs.get_clock() From 5552ed9a7fb1300142a7aebe7fc85b0bd2535bcf Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sat, 20 Jan 2018 22:25:23 -0700 Subject: [PATCH 012/200] Add an admin route to get all the media in a room This is intended to be used by administrators to monitor the media that is passing through their server, if they wish. Signed-off-by: Travis Ralston --- synapse/rest/client/v1/admin.py | 22 ++++++ synapse/storage/room.py | 133 ++++++++++++++++++-------------- 2 files changed, 98 insertions(+), 57 deletions(-) diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 5022808ea..0615e5d80 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -289,6 +289,27 @@ class QuarantineMediaInRoom(ClientV1RestServlet): defer.returnValue((200, {"num_quarantined": num_quarantined})) +class ListMediaInRoom(ClientV1RestServlet): + """Lists all of the media in a given room. + """ + PATTERNS = client_path_patterns("/admin/room/(?P[^/]+)/media") + + def __init__(self, hs): + super(ListMediaInRoom, self).__init__(hs) + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_GET(self, request, room_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + if not is_admin: + raise AuthError(403, "You are not a server admin") + + local_mxcs, remote_mxcs = yield self.store.get_media_mxcs_in_room(room_id) + + defer.returnValue((200, {"local": local_mxcs, "remote": remote_mxcs})) + + class ResetPasswordRestServlet(ClientV1RestServlet): """Post request to allow an administrator reset password for a user. This needs user to have administrator access in Synapse. @@ -487,3 +508,4 @@ def register_servlets(hs, http_server): SearchUsersRestServlet(hs).register(http_server) ShutdownRoomRestServlet(hs).register(http_server) QuarantineMediaInRoom(hs).register(http_server) + ListMediaInRoom(hs).register(http_server) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b..cd6899a4b 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -533,73 +533,92 @@ class RoomStore(SQLBaseStore): ) self.is_room_blocked.invalidate((room_id,)) + def get_media_mxcs_in_room(self, room_id): + def _get_media_ids_in_room(txn): + local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + local_media_mxcs = [] + remote_media_mxcs = [] + + # Convert the IDs to MXC URIs + for media_id in local_media_ids: + local_media_mxcs.append("mxc://%s/%s" % (self.hostname, media_id)) + for hostname, media_id in remote_media_ids: + remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) + + return local_media_mxcs, remote_media_mxcs + return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + def quarantine_media_ids_in_room(self, room_id, quarantined_by): """For a room loops through all events with media and quarantines the associated media """ - def _get_media_ids_in_room(txn): - mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") - - next_token = self.get_current_events_token() + 1 - + def _quarantine_media_in_room(txn): + local_media_mxcs, remote_media_mxcs = self._get_media_ids_in_room(txn, room_id) total_media_quarantined = 0 - while next_token: - sql = """ - SELECT stream_ordering, content FROM events - WHERE room_id = ? - AND stream_ordering < ? - AND contains_url = ? AND outlier = ? - ORDER BY stream_ordering DESC - LIMIT ? + # Now update all the tables to set the quarantined_by flag + + txn.executemany(""" + UPDATE local_media_repository + SET quarantined_by = ? + WHERE media_id = ? + """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) + + txn.executemany( """ - txn.execute(sql, (room_id, next_token, True, False, 100)) - - next_token = None - local_media_mxcs = [] - remote_media_mxcs = [] - for stream_ordering, content_json in txn: - next_token = stream_ordering - content = json.loads(content_json) - - content_url = content.get("url") - thumbnail_url = content.get("info", {}).get("thumbnail_url") - - for url in (content_url, thumbnail_url): - if not url: - continue - matches = mxc_re.match(url) - if matches: - hostname = matches.group(1) - media_id = matches.group(2) - if hostname == self.hostname: - local_media_mxcs.append(media_id) - else: - remote_media_mxcs.append((hostname, media_id)) - - # Now update all the tables to set the quarantined_by flag - - txn.executemany(""" - UPDATE local_media_repository + UPDATE remote_media_cache SET quarantined_by = ? - WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) - - txn.executemany( - """ - UPDATE remote_media_cache - SET quarantined_by = ? - WHERE media_origin AND media_id = ? - """, - ( - (quarantined_by, origin, media_id) - for origin, media_id in remote_media_mxcs - ) + WHERE media_origin AND media_id = ? + """, + ( + (quarantined_by, origin, media_id) + for origin, media_id in remote_media_mxcs ) + ) - total_media_quarantined += len(local_media_mxcs) - total_media_quarantined += len(remote_media_mxcs) + total_media_quarantined += len(local_media_mxcs) + total_media_quarantined += len(remote_media_mxcs) return total_media_quarantined - return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room) + + def _get_media_ids_in_room(self, txn, room_id): + mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") + + next_token = self.get_current_events_token() + 1 + local_media_mxcs = [] + remote_media_mxcs = [] + + while next_token: + sql = """ + SELECT stream_ordering, content FROM events + WHERE room_id = ? + AND stream_ordering < ? + AND contains_url = ? AND outlier = ? + ORDER BY stream_ordering DESC + LIMIT ? + """ + txn.execute(sql, (room_id, next_token, True, False, 100)) + + next_token = None + for stream_ordering, content_json in txn: + next_token = stream_ordering + content = json.loads(content_json) + + content_url = content.get("url") + thumbnail_url = content.get("info", {}).get("thumbnail_url") + + for url in (content_url, thumbnail_url): + if not url: + continue + matches = mxc_re.match(url) + if matches: + hostname = matches.group(1) + media_id = matches.group(2) + if hostname == self.hostname: + local_media_mxcs.append(media_id) + else: + remote_media_mxcs.append((hostname, media_id)) + + return local_media_mxcs, remote_media_mxcs From a94d9b6b825c6b2db375460268567e637e10709a Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sat, 20 Jan 2018 22:49:46 -0700 Subject: [PATCH 013/200] Appease the linter These are ids anyways, not mxc uris. Signed-off-by: Travis Ralston --- synapse/storage/room.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index cd6899a4b..d1d63f404 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -553,7 +553,7 @@ class RoomStore(SQLBaseStore): the associated media """ def _quarantine_media_in_room(txn): - local_media_mxcs, remote_media_mxcs = self._get_media_ids_in_room(txn, room_id) + local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -562,7 +562,7 @@ class RoomStore(SQLBaseStore): UPDATE local_media_repository SET quarantined_by = ? WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) + """, ((quarantined_by, media_id) for media_id in local_media_ids)) txn.executemany( """ @@ -572,12 +572,12 @@ class RoomStore(SQLBaseStore): """, ( (quarantined_by, origin, media_id) - for origin, media_id in remote_media_mxcs + for origin, media_id in remote_media_ids ) ) - total_media_quarantined += len(local_media_mxcs) - total_media_quarantined += len(remote_media_mxcs) + total_media_quarantined += len(local_media_ids) + total_media_quarantined += len(remote_media_ids) return total_media_quarantined From 93efd7eb04601ee103f176819283e0298c660adc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 22 Jan 2018 18:14:10 +0000 Subject: [PATCH 014/200] logging and debug for http pusher --- synapse/push/httppusher.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index c16f61452..f2517f39a 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -13,21 +13,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from synapse.push import PusherConfigException +import logging from twisted.internet import defer, reactor from twisted.internet.error import AlreadyCalled, AlreadyCancelled -import logging import push_rule_evaluator import push_tools - +import synapse +from synapse.push import PusherConfigException from synapse.util.logcontext import LoggingContext from synapse.util.metrics import Measure logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + +http_push_processed_counter = metrics.register_counter( + "http_pushes_processed", +) + +http_push_failed_counter = metrics.register_counter( + "http_pushes_failed", +) + class HttpPusher(object): INITIAL_BACKOFF_SEC = 1 # in seconds because that's what Twisted takes @@ -152,9 +161,15 @@ class HttpPusher(object): self.user_id, self.last_stream_ordering, self.max_stream_ordering ) + logger.info( + "Processing %i unprocessed push actions starting at stream_ordering %i", + len(unprocessed), self.last_stream_ordering, + ) + for push_action in unprocessed: processed = yield self._process_one(push_action) if processed: + http_push_processed_counter.inc() self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC self.last_stream_ordering = push_action['stream_ordering'] yield self.store.update_pusher_last_stream_ordering_and_success( @@ -169,6 +184,7 @@ class HttpPusher(object): self.failing_since ) else: + http_push_failed_counter.inc() if not self.failing_since: self.failing_since = self.clock.time_msec() yield self.store.update_pusher_failing_since( From 4528dd2443f4dc9e737bf4eeccedfb8807a1ea2c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 22 Jan 2018 20:15:42 +0000 Subject: [PATCH 015/200] Fix logging and add user_id --- synapse/push/httppusher.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index f2517f39a..4a03af5b2 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -162,8 +162,9 @@ class HttpPusher(object): ) logger.info( - "Processing %i unprocessed push actions starting at stream_ordering %i", - len(unprocessed), self.last_stream_ordering, + "Processing %i unprocessed push actions for %s starting at " + "stream_ordering %s", + len(unprocessed), self.user_id, self.last_stream_ordering, ) for push_action in unprocessed: From 9a72b70630e111639243c6ab7867c4d4b970e2df Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 24 Jan 2018 11:07:24 +0100 Subject: [PATCH 016/200] fix thinko on 3pid whitelisting --- synapse/rest/client/v2_alpha/register.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 3abfe3547..c6f4680a7 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -368,8 +368,8 @@ class RegisterRestServlet(RestServlet): if auth_result: for login_type in [LoginType.EMAIL_IDENTITY, LoginType.MSISDN]: if login_type in auth_result: - medium = auth_result[login_type].threepid['medium'] - address = auth_result[login_type].threepid['address'] + medium = auth_result[login_type]['medium'] + address = auth_result[login_type]['address'] if not check_3pid_allowed(self.hs, medium, address): raise SynapseError( From 349c7399663b5fce856995a3a901019f5d210cc4 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:28:44 +0000 Subject: [PATCH 017/200] synapse 500s on a call to publicRooms in the case where the number of public rooms is zero, the specific cause is due to xrange trying to use a step value of zero, but if the total room number really is zero then it makes sense to just bail and save the extra processing --- synapse/handlers/room_list.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index bb4007538..ae5db4d2c 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -186,6 +186,11 @@ class RoomListHandler(BaseHandler): logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) + #bail if no rooms to work on + if len(rooms_to_scan) == 0: + defer.returnValue([]) + + # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit # From d02e43b15f6b9b24ffe5e0c0d696f8fd71fc8af3 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:29:46 +0000 Subject: [PATCH 018/200] remove white space --- synapse/handlers/room_list.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index ae5db4d2c..9f8173644 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -190,7 +190,6 @@ class RoomListHandler(BaseHandler): if len(rooms_to_scan) == 0: defer.returnValue([]) - # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit # From 6c6e197b0a100f14fa69d8decba59e58c7c25b6c Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:47:46 +0000 Subject: [PATCH 019/200] fix PEP8 violation --- synapse/handlers/room_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 9f8173644..f466a64ed 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -186,7 +186,7 @@ class RoomListHandler(BaseHandler): logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) - #bail if no rooms to work on + # bail if no rooms to work on if len(rooms_to_scan) == 0: defer.returnValue([]) From f6320835764dbb6cac058763737d67ca6359e3a9 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:52:17 +0000 Subject: [PATCH 020/200] fix return type, should be a dict --- synapse/handlers/room_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index f466a64ed..2ee63548c 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -188,7 +188,7 @@ class RoomListHandler(BaseHandler): # bail if no rooms to work on if len(rooms_to_scan) == 0: - defer.returnValue([]) + defer.returnValue({}) # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit From 86c4f49a31fe044a727c64e40009596050cdab95 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 26 Jan 2018 00:12:02 +0000 Subject: [PATCH 021/200] rather than try reconstruct the results object, better to guard against the xrange step argument being 0 --- synapse/handlers/room_list.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 2ee63548c..cf62ead81 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -186,10 +186,6 @@ class RoomListHandler(BaseHandler): logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) - # bail if no rooms to work on - if len(rooms_to_scan) == 0: - defer.returnValue({}) - # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit # @@ -207,8 +203,8 @@ class RoomListHandler(BaseHandler): if limit: step = limit + 1 else: - step = len(rooms_to_scan) - + # step cannot be zero + step = len(rooms_to_scan) if len(rooms_to_scan) != 0 else 1 chunk = [] for i in xrange(0, len(rooms_to_scan), step): batch = rooms_to_scan[i:i + step] From 73560237d646835197e07e9e6c50674786a79a28 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 26 Jan 2018 00:15:10 +0000 Subject: [PATCH 022/200] add white space line --- synapse/handlers/room_list.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index cf62ead81..dfa09141e 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -205,6 +205,7 @@ class RoomListHandler(BaseHandler): else: # step cannot be zero step = len(rooms_to_scan) if len(rooms_to_scan) != 0 else 1 + chunk = [] for i in xrange(0, len(rooms_to_scan), step): batch = rooms_to_scan[i:i + step] From 2b918464979c958447e5405c26b0832adbb8913a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 23:12:31 +0000 Subject: [PATCH 023/200] Remove spurious unittest.DEBUG --- tests/handlers/test_e2e_keys.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 19f5ed6bc..d92bf240b 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -143,7 +143,6 @@ class E2eKeysHandlerTestCase(unittest.TestCase): except errors.SynapseError: pass - @unittest.DEBUG @defer.inlineCallbacks def test_claim_one_time_key(self): local_user = "@boris:" + self.hs.hostname From 6e9bf67f189c09a65eea4e348c5b9ac265ea096f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:35:21 +0000 Subject: [PATCH 024/200] Remove unused/bitrotted MemoryDataStore This isn't used, and looks thoroughly bitrotted. --- tests/utils.py | 164 ++++--------------------------------------------- 1 file changed, 13 insertions(+), 151 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 311604789..de33deb0b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -13,27 +13,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.http.server import HttpServer -from synapse.api.errors import cs_error, CodeMessageException, StoreError -from synapse.api.constants import EventTypes -from synapse.storage.prepare_database import prepare_database -from synapse.storage.engines import create_engine -from synapse.server import HomeServer -from synapse.federation.transport import server -from synapse.util.ratelimitutils import FederationRateLimiter - -from synapse.util.logcontext import LoggingContext - -from twisted.internet import defer, reactor -from twisted.enterprise.adbapi import ConnectionPool - -from collections import namedtuple -from mock import patch, Mock import hashlib +from inspect import getcallargs import urllib import urlparse -from inspect import getcallargs +from mock import Mock, patch +from twisted.enterprise.adbapi import ConnectionPool +from twisted.internet import defer, reactor + +from synapse.api.errors import CodeMessageException, cs_error +from synapse.federation.transport import server +from synapse.http.server import HttpServer +from synapse.server import HomeServer +from synapse.storage.engines import create_engine +from synapse.storage.prepare_database import prepare_database +from synapse.util.logcontext import LoggingContext +from synapse.util.ratelimitutils import FederationRateLimiter @defer.inlineCallbacks @@ -334,140 +330,6 @@ class SQLiteMemoryDbPool(ConnectionPool, object): return create_engine(self.config.database_config) -class MemoryDataStore(object): - - Room = namedtuple( - "Room", - ["room_id", "is_public", "creator"] - ) - - def __init__(self): - self.tokens_to_users = {} - self.paths_to_content = {} - - self.members = {} - self.rooms = {} - - self.current_state = {} - self.events = [] - - class Snapshot(namedtuple("Snapshot", "room_id user_id membership_state")): - def fill_out_prev_events(self, event): - pass - - def snapshot_room(self, room_id, user_id, state_type=None, state_key=None): - return self.Snapshot( - room_id, user_id, self.get_room_member(user_id, room_id) - ) - - def register(self, user_id, token, password_hash): - if user_id in self.tokens_to_users.values(): - raise StoreError(400, "User in use.") - self.tokens_to_users[token] = user_id - - def get_user_by_access_token(self, token): - try: - return { - "name": self.tokens_to_users[token], - } - except Exception: - raise StoreError(400, "User does not exist.") - - def get_room(self, room_id): - try: - return self.rooms[room_id] - except Exception: - return None - - def store_room(self, room_id, room_creator_user_id, is_public): - if room_id in self.rooms: - raise StoreError(409, "Conflicting room!") - - room = MemoryDataStore.Room( - room_id=room_id, - is_public=is_public, - creator=room_creator_user_id - ) - self.rooms[room_id] = room - - def get_room_member(self, user_id, room_id): - return self.members.get(room_id, {}).get(user_id) - - def get_room_members(self, room_id, membership=None): - if membership: - return [ - v for k, v in self.members.get(room_id, {}).items() - if v.membership == membership - ] - else: - return self.members.get(room_id, {}).values() - - def get_rooms_for_user_where_membership_is(self, user_id, membership_list): - return [ - m[user_id] for m in self.members.values() - if user_id in m and m[user_id].membership in membership_list - ] - - def get_room_events_stream(self, user_id=None, from_key=None, to_key=None, - limit=0, with_feedback=False): - return ([], from_key) # TODO - - def get_joined_hosts_for_room(self, room_id): - return defer.succeed([]) - - def persist_event(self, event): - if event.type == EventTypes.Member: - room_id = event.room_id - user = event.state_key - self.members.setdefault(room_id, {})[user] = event - - if hasattr(event, "state_key"): - key = (event.room_id, event.type, event.state_key) - self.current_state[key] = event - - self.events.append(event) - - def get_current_state(self, room_id, event_type=None, state_key=""): - if event_type: - key = (room_id, event_type, state_key) - if self.current_state.get(key): - return [self.current_state.get(key)] - return None - else: - return [ - e for e in self.current_state - if e[0] == room_id - ] - - def set_presence_state(self, user_localpart, state): - return defer.succeed({"state": 0}) - - def get_presence_list(self, user_localpart, accepted): - return [] - - def get_room_events_max_id(self): - return "s0" # TODO (erikj) - - def get_send_event_level(self, room_id): - return defer.succeed(0) - - def get_power_level(self, room_id, user_id): - return defer.succeed(0) - - def get_add_state_level(self, room_id): - return defer.succeed(0) - - def get_room_join_rule(self, room_id): - # TODO (erikj): This should be configurable - return defer.succeed("invite") - - def get_ops_levels(self, room_id): - return defer.succeed((5, 5, 5)) - - def insert_client_ip(self, user, access_token, ip, user_agent): - return defer.succeed(None) - - def _format_call(args, kwargs): return ", ".join( ["%r" % (a) for a in args] + From 25adde9a04b82a2261404f9b6f9a25cec086f62f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 22:01:02 +0000 Subject: [PATCH 025/200] Factor out get_db_conn to HomeServer base class This function is identical to all subclasses, so we may as well push it up to the base class to reduce duplication (and make use of it in the tests) --- synapse/app/appservice.py | 13 ------------- synapse/app/client_reader.py | 13 ------------- synapse/app/federation_reader.py | 13 ------------- synapse/app/federation_sender.py | 13 ------------- synapse/app/frontend_proxy.py | 13 ------------- synapse/app/homeserver.py | 13 ------------- synapse/app/media_repository.py | 13 ------------- synapse/app/pusher.py | 13 ------------- synapse/app/synchrotron.py | 13 ------------- synapse/app/user_dir.py | 13 ------------- synapse/server.py | 17 +++++++++++++++++ 11 files changed, 17 insertions(+), 130 deletions(-) diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index 7d0c2879a..c6fe4516d 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -49,19 +49,6 @@ class AppserviceSlaveStore( class AppserviceServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = AppserviceSlaveStore(self.get_db_conn(), self) diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index dc3f6efd4..3b3352798 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -64,19 +64,6 @@ class ClientReaderSlavedStore( class ClientReaderServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = ClientReaderSlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index a072291e1..4de43c41f 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -58,19 +58,6 @@ class FederationReaderSlavedStore( class FederationReaderServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = FederationReaderSlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index 09e9488f0..f760826d2 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -76,19 +76,6 @@ class FederationSenderSlaveStore( class FederationSenderServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = FederationSenderSlaveStore(self.get_db_conn(), self) diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index ae531c0aa..e32ee8fe9 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -118,19 +118,6 @@ class FrontendProxySlavedStore( class FrontendProxyServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = FrontendProxySlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 92ab3b311..cb82a415a 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -266,19 +266,6 @@ class SynapseHomeServer(HomeServer): except IncorrectDatabaseSetup as e: quit_with_error(e.message) - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(config_options): """ diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index eab1597aa..1ed1ca877 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -60,19 +60,6 @@ class MediaRepositorySlavedStore( class MediaRepositoryServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = MediaRepositorySlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 7fbbb0b0e..32ccea3f1 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -81,19 +81,6 @@ class PusherSlaveStore( class PusherServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = PusherSlaveStore(self.get_db_conn(), self) diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 0abba3016..f87531f1b 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -246,19 +246,6 @@ class SynchrotronApplicationService(object): class SynchrotronServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = SynchrotronSlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index a48c4a2ae..494ccb702 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -92,19 +92,6 @@ class UserDirectorySlaveStore( class UserDirectoryServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = UserDirectorySlaveStore(self.get_db_conn(), self) diff --git a/synapse/server.py b/synapse/server.py index 99693071b..ff8a8fbc4 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -307,6 +307,23 @@ class HomeServer(object): **self.db_config.get("args", {}) ) + def get_db_conn(self, run_new_connection=True): + """Makes a new connection to the database, skipping the db pool + + Returns: + Connection: a connection object implementing the PEP-249 spec + """ + # Any param beginning with cp_ is a parameter for adbapi, and should + # not be passed to the database engine. + db_params = { + k: v for k, v in self.db_config.get("args", {}).items() + if not k.startswith("cp_") + } + db_conn = self.database_engine.module.connect(**db_params) + if run_new_connection: + self.database_engine.on_new_connection(db_conn) + return db_conn + def build_media_repository_resource(self): # build the media repo resource. This indirects through the HomeServer # to ensure that we only have a single instance of From b178eca2616d8210a212fbd04be253158fc7fc47 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:12:46 +0000 Subject: [PATCH 026/200] Run on_new_connection for unit tests Configure the connectionpool used for unit tests to run the `on_new_connection` function. --- tests/utils.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index de33deb0b..ab5e2341c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -66,13 +66,19 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): if "clock" not in kargs: kargs["clock"] = MockClock() + db_engine = create_engine(config.database_config) if datastore is None: - db_pool = SQLiteMemoryDbPool() + # we need to configure the connection pool to run the on_new_connection + # function, so that we can test code that uses custom sqlite functions + # (like rank). + db_pool = SQLiteMemoryDbPool( + cp_openfun=db_engine.on_new_connection, + ) yield db_pool.prepare() hs = HomeServer( name, db_pool=db_pool, config=config, version_string="Synapse/tests", - database_engine=create_engine(config.database_config), + database_engine=db_engine, get_db_conn=db_pool.get_db_conn, room_list_handler=object(), tls_server_context_factory=Mock(), @@ -83,7 +89,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): hs = HomeServer( name, db_pool=None, datastore=datastore, config=config, version_string="Synapse/tests", - database_engine=create_engine(config.database_config), + database_engine=db_engine, room_list_handler=object(), tls_server_context_factory=Mock(), **kargs @@ -303,11 +309,15 @@ class MockClock(object): class SQLiteMemoryDbPool(ConnectionPool, object): - def __init__(self): + def __init__(self, **kwargs): + connkw = { + "cp_min": 1, + "cp_max": 1, + } + connkw.update(kwargs) + super(SQLiteMemoryDbPool, self).__init__( - "sqlite3", ":memory:", - cp_min=1, - cp_max=1, + "sqlite3", ":memory:", **connkw ) self.config = Mock() From d7eacc4f87ca8353f612171fa7e070c14e3b35ee Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 23:14:24 +0000 Subject: [PATCH 027/200] Create dbpool as normal in tests ... instead of creating our own special SQLiteMemoryDbPool, whose purpose was a bit of a mystery. For some reason this makes one of the tests run slightly slower, so bump the sleep(). Sorry. --- tests/crypto/test_keyring.py | 4 +-- tests/utils.py | 60 ++++++++++-------------------------- 2 files changed, 19 insertions(+), 45 deletions(-) diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index c899fecf5..d4ec02ffc 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -167,7 +167,7 @@ class KeyringTestCase(unittest.TestCase): # wait a tick for it to send the request to the perspectives server # (it first tries the datastore) - yield async.sleep(0.005) + yield async.sleep(1) # XXX find out why this takes so long! self.http_client.post_json.assert_called_once() self.assertIs(LoggingContext.current_context(), context_11) @@ -183,7 +183,7 @@ class KeyringTestCase(unittest.TestCase): res_deferreds_2 = kr.verify_json_objects_for_server( [("server10", json1)], ) - yield async.sleep(0.005) + yield async.sleep(01) self.http_client.post_json.assert_not_called() res_deferreds_2[0].addBoth(self.check_context, None) diff --git a/tests/utils.py b/tests/utils.py index ab5e2341c..50de4199b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -19,7 +19,6 @@ import urllib import urlparse from mock import Mock, patch -from twisted.enterprise.adbapi import ConnectionPool from twisted.internet import defer, reactor from synapse.api.errors import CodeMessageException, cs_error @@ -60,30 +59,37 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.update_user_directory = False config.use_frozen_dicts = True - config.database_config = {"name": "sqlite3"} config.ldap_enabled = False if "clock" not in kargs: kargs["clock"] = MockClock() + config.database_config = { + "name": "sqlite3", + "args": { + "database": ":memory:", + "cp_min": 1, + "cp_max": 1, + }, + } db_engine = create_engine(config.database_config) + + # we need to configure the connection pool to run the on_new_connection + # function, so that we can test code that uses custom sqlite functions + # (like rank). + config.database_config["args"]["cp_openfun"] = db_engine.on_new_connection + if datastore is None: - # we need to configure the connection pool to run the on_new_connection - # function, so that we can test code that uses custom sqlite functions - # (like rank). - db_pool = SQLiteMemoryDbPool( - cp_openfun=db_engine.on_new_connection, - ) - yield db_pool.prepare() hs = HomeServer( - name, db_pool=db_pool, config=config, + name, config=config, + db_config=config.database_config, version_string="Synapse/tests", database_engine=db_engine, - get_db_conn=db_pool.get_db_conn, room_list_handler=object(), tls_server_context_factory=Mock(), **kargs ) + yield prepare_database(hs.get_db_conn(), db_engine, config) hs.setup() else: hs = HomeServer( @@ -308,38 +314,6 @@ class MockClock(object): return d -class SQLiteMemoryDbPool(ConnectionPool, object): - def __init__(self, **kwargs): - connkw = { - "cp_min": 1, - "cp_max": 1, - } - connkw.update(kwargs) - - super(SQLiteMemoryDbPool, self).__init__( - "sqlite3", ":memory:", **connkw - ) - - self.config = Mock() - self.config.password_providers = [] - self.config.database_config = {"name": "sqlite3"} - - def prepare(self): - engine = self.create_engine() - return self.runWithConnection( - lambda conn: prepare_database(conn, engine, self.config) - ) - - def get_db_conn(self): - conn = self.connect() - engine = self.create_engine() - prepare_database(conn, engine, self.config) - return conn - - def create_engine(self): - return create_engine(self.config.database_config) - - def _format_call(args, kwargs): return ", ".join( ["%r" % (a) for a in args] + From 88b9c5cbf0e571ba955bbd483a0169a0b79c78c7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 23:54:38 +0000 Subject: [PATCH 028/200] Make it possible to run tests against postgres --- tests/utils.py | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 50de4199b..d1f59551e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -25,11 +25,17 @@ from synapse.api.errors import CodeMessageException, cs_error from synapse.federation.transport import server from synapse.http.server import HttpServer from synapse.server import HomeServer +from synapse.storage import PostgresEngine from synapse.storage.engines import create_engine from synapse.storage.prepare_database import prepare_database from synapse.util.logcontext import LoggingContext from synapse.util.ratelimitutils import FederationRateLimiter +# set this to True to run the tests against postgres instead of sqlite. +# It requires you to have a local postgres database called synapse_test, within +# which ALL TABLES WILL BE DROPPED +USE_POSTGRES_FOR_TESTS = False + @defer.inlineCallbacks def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): @@ -64,14 +70,25 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): if "clock" not in kargs: kargs["clock"] = MockClock() - config.database_config = { - "name": "sqlite3", - "args": { - "database": ":memory:", - "cp_min": 1, - "cp_max": 1, - }, - } + if USE_POSTGRES_FOR_TESTS: + config.database_config = { + "name": "psycopg2", + "args": { + "database": "synapse_test", + "cp_min": 1, + "cp_max": 5, + }, + } + else: + config.database_config = { + "name": "sqlite3", + "args": { + "database": ":memory:", + "cp_min": 1, + "cp_max": 1, + }, + } + db_engine = create_engine(config.database_config) # we need to configure the connection pool to run the on_new_connection @@ -89,7 +106,15 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): tls_server_context_factory=Mock(), **kargs ) - yield prepare_database(hs.get_db_conn(), db_engine, config) + db_conn = hs.get_db_conn() + # make sure that the database is empty + if isinstance(db_engine, PostgresEngine): + cur = db_conn.cursor() + cur.execute("SELECT tablename FROM pg_tables where schemaname='public'") + rows = cur.fetchall() + for r in rows: + cur.execute("DROP TABLE %s CASCADE" % r[0]) + yield prepare_database(db_conn, db_engine, config) hs.setup() else: hs = HomeServer( From 46022025ea35895af3cf8d15973fb94a3a6b4f38 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:20:28 +0000 Subject: [PATCH 029/200] Fix SQL for user search fix some syntax errors for user search when search_all_users is enabled fixes #2801, hopefully --- synapse/storage/user_directory.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index f150ef010..dfdcbb318 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -641,13 +641,12 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - # dummy to keep the number of binds & aliases the same + # make s.user_id null to keep the ordering algorithm happy join_clause = """ - LEFT JOIN ( - SELECT NULL as user_id WHERE NULL = ? - ) AS s USING (user_id)" + CROSS JOIN (SELECT NULL as user_id) AS s """ - where_clause = "" + join_args = () + where_clause = "1=1" else: join_clause = """ LEFT JOIN users_in_public_rooms AS p USING (user_id) @@ -656,6 +655,7 @@ class UserDirectoryStore(SQLBaseStore): WHERE user_id = ? AND share_private ) AS s USING (user_id) """ + join_args = (user_id,) where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" if isinstance(self.database_engine, PostgresEngine): @@ -697,7 +697,7 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % (join_clause, where_clause) - args = (user_id, full_query, exact_query, prefix_query, limit + 1,) + args = join_args + (full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -715,7 +715,7 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % (join_clause, where_clause) - args = (user_id, search_query, limit + 1) + args = join_args + (search_query, limit + 1) else: # This should be unreachable. raise Exception("Unrecognized database engine") From d1f3490e75df49bf417eeeaef115df16729244ce Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:25:03 +0000 Subject: [PATCH 030/200] Add tests for user directory search --- tests/storage/test_user_directory.py | 88 ++++++++++++++++++++++++++++ tests/utils.py | 1 + 2 files changed, 89 insertions(+) create mode 100644 tests/storage/test_user_directory.py diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py new file mode 100644 index 000000000..0891308f2 --- /dev/null +++ b/tests/storage/test_user_directory.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.storage import UserDirectoryStore +from synapse.storage.roommember import ProfileInfo +from tests import unittest +from tests.utils import setup_test_homeserver + +ALICE = "@alice:a" +BOB = "@bob:b" +BOBBY = "@bobby:a" + + +class UserDirectoryStoreTestCase(unittest.TestCase): + @defer.inlineCallbacks + def setUp(self): + self.hs = yield setup_test_homeserver() + self.store = UserDirectoryStore(None, self.hs) + + # alice and bob are both in !room_id. bobby is not but shares + # a homeserver with alice. + yield self.store.add_profiles_to_user_dir( + "!room:id", + { + ALICE: ProfileInfo(None, "alice"), + BOB: ProfileInfo(None, "bob"), + BOBBY: ProfileInfo(None, "bobby") + }, + ) + yield self.store.add_users_to_public_room( + "!room:id", + [ALICE, BOB], + ) + yield self.store.add_users_who_share_room( + "!room:id", + False, + ( + (ALICE, BOB), + (BOB, ALICE), + ), + ) + + @defer.inlineCallbacks + def test_search_user_dir(self): + # normally when alice searches the directory she should just find + # bob because bobby doesn't share a room with her. + r = yield self.store.search_user_dir(ALICE, "bob", 10) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual(r["results"][0], { + "user_id": BOB, + "display_name": "bob", + "avatar_url": None, + }) + + @defer.inlineCallbacks + def test_search_user_dir_all_users(self): + self.hs.config.user_directory_search_all_users = True + try: + r = yield self.store.search_user_dir(ALICE, "bob", 10) + self.assertFalse(r["limited"]) + self.assertEqual(2, len(r["results"])) + self.assertDictEqual(r["results"][0], { + "user_id": BOB, + "display_name": "bob", + "avatar_url": None, + }) + self.assertDictEqual(r["results"][1], { + "user_id": BOBBY, + "display_name": "bobby", + "avatar_url": None, + }) + finally: + self.hs.config.user_directory_search_all_users = False diff --git a/tests/utils.py b/tests/utils.py index d1f59551e..8efd3a347 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -59,6 +59,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.email_enable_notifs = False config.block_non_admin_invites = False config.federation_domain_whitelist = None + config.user_directory_search_all_users = False # disable user directory updates, because they get done in the # background, which upsets the test runner. From 02ba118f81009b1c3ae290a17c35e1b9d75e802b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jan 2018 14:30:15 +0000 Subject: [PATCH 031/200] Increase http conn pool size --- synapse/http/client.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index 930d71301..f3e4973c2 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -18,6 +18,7 @@ from OpenSSL.SSL import VERIFY_NONE from synapse.api.errors import ( CodeMessageException, MatrixCodeMessageException, SynapseError, Codes, ) +from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.logcontext import make_deferred_yieldable from synapse.util import logcontext import synapse.metrics @@ -67,7 +68,11 @@ class SimpleHttpClient(object): self.hs = hs pool = HTTPConnectionPool(reactor) - pool.maxPersistentPerHost = 5 + + # the pusher makes lots of concurrent SSL connections to sygnal, and + # tends to do so in batches, so we need to allow the pool to keep lots + # of idle connections around. + pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5)) pool.cachedConnectionTimeout = 2 * 60 # The default context factory in Twisted 14.0.0 (which we require) is From e051abd20b1978ddc53723c8233bc54742243045 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 24 Jan 2018 21:06:54 +0000 Subject: [PATCH 032/200] add appid/device_display_name to to pusher logging --- synapse/push/httppusher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 4a03af5b2..02bd013ca 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -164,7 +164,7 @@ class HttpPusher(object): logger.info( "Processing %i unprocessed push actions for %s starting at " "stream_ordering %s", - len(unprocessed), self.user_id, self.last_stream_ordering, + len(unprocessed), self.name, self.last_stream_ordering, ) for push_action in unprocessed: @@ -342,7 +342,7 @@ class HttpPusher(object): @defer.inlineCallbacks def _send_badge(self, badge): - logger.info("Sending updated badge count %d to %r", badge, self.user_id) + logger.info("Sending updated badge count %d to %s", badge, self.name) d = { 'notification': { 'id': '', From 03dd745fe28a00c8788a2147d4f5c2a852182429 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jan 2018 15:49:06 +0000 Subject: [PATCH 033/200] Better logging when pushes fail --- synapse/push/httppusher.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 02bd013ca..2cbac571b 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -333,7 +333,10 @@ class HttpPusher(object): try: resp = yield self.http_client.post_json_get_json(self.url, notification_dict) except Exception: - logger.warn("Failed to push %s ", self.url) + logger.warn( + "Failed to push event %s to %s", + event.event_id, self.name, exc_info=True, + ) defer.returnValue(False) rejected = [] if 'rejected' in resp: @@ -364,7 +367,10 @@ class HttpPusher(object): try: resp = yield self.http_client.post_json_get_json(self.url, d) except Exception: - logger.exception("Failed to push %s ", self.url) + logger.warn( + "Failed to send badge count to %s", + self.name, exc_info=True, + ) defer.returnValue(False) rejected = [] if 'rejected' in resp: From b387ee17b68e4398a8fa26fdf122b773a046e429 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 14:00:11 +0000 Subject: [PATCH 034/200] Improve exception handling in persist_event 1. use `deferred.errback()` instead of `deferred.errback(e)`, which means that a Failure object will be constructed using the current exception state, *including* its stack trace - so the stack trace is saved in the Failure, leading to better exception reports. 2. Set `consumeErrors=True` on the ObservableDeferred, because we know that there will always be at least one observer - which avoids a spurious "CRITICAL: unhandled exception in Deferred" error in the logs --- synapse/storage/events.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7a9cd3ec9..33fccfa7a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -110,7 +110,7 @@ class _EventPeristenceQueue(object): end_item.events_and_contexts.extend(events_and_contexts) return end_item.deferred.observe() - deferred = ObservableDeferred(defer.Deferred()) + deferred = ObservableDeferred(defer.Deferred(), consumeErrors=True) queue.append(self._EventPersistQueueItem( events_and_contexts=events_and_contexts, @@ -152,8 +152,8 @@ class _EventPeristenceQueue(object): try: ret = yield per_item_callback(item) item.deferred.callback(ret) - except Exception as e: - item.deferred.errback(e) + except Exception: + item.deferred.errback() finally: queue = self._event_persist_queues.pop(room_id, None) if queue: From d413a2ba981b3e8d443d2ccdd0c8de7d43b62e73 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:10:26 +0000 Subject: [PATCH 035/200] Remove unused "event_type" param on state.get_current_state_ids this param doesn't seem to be used, and is a bit pointless anyway because it can easily be replicated by the caller. It is also horrible, because it changes the return type of the method. --- synapse/state.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 1f9abf9d3..18f1edef4 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -146,8 +146,7 @@ class StateHandler(object): defer.returnValue(state) @defer.inlineCallbacks - def get_current_state_ids(self, room_id, event_type=None, state_key="", - latest_event_ids=None): + def get_current_state_ids(self, room_id, latest_event_ids=None): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) @@ -155,10 +154,6 @@ class StateHandler(object): ret = yield self.resolve_state_groups(room_id, latest_event_ids) state = ret.state - if event_type: - defer.returnValue(state.get((event_type, state_key))) - return - defer.returnValue(state) @defer.inlineCallbacks From adfc0c95391bf43d48f7069823fc0f4902803402 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jan 2018 17:39:55 +0000 Subject: [PATCH 036/200] docstring for get_current_state_ids --- synapse/state.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/synapse/state.py b/synapse/state.py index 18f1edef4..4c8247e7c 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -147,6 +147,19 @@ class StateHandler(object): @defer.inlineCallbacks def get_current_state_ids(self, room_id, latest_event_ids=None): + """Get the current state, or the state at a set of events, for a room + + Args: + room_id (str): + + latest_event_ids (iterable[str]|None): if given, the forward + extremities to resolve. If None, we look them up from the + database (via a cache) + + Returns: + Deferred[dict[(str, str), str)]]: the state dict, mapping from + (event_type, state_key) -> event_id + """ if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) From 773f0eed1efa114bb32f6e54e8edc038a04d3526 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 30 Jan 2018 15:02:51 +0000 Subject: [PATCH 037/200] Fix sql error in quarantine_media --- synapse/storage/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b..d91c85307 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -589,7 +589,7 @@ class RoomStore(SQLBaseStore): """ UPDATE remote_media_cache SET quarantined_by = ? - WHERE media_origin AND media_id = ? + WHERE media_origin = ? AND media_id = ? """, ( (quarantined_by, origin, media_id) From af19f5e9aa47dd9875df3937d514ae7708eed539 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 30 Jan 2018 17:52:03 +0000 Subject: [PATCH 038/200] Remove spurious log argument ... which would cause scary-looking and unhelpful errors in the log on dns fail --- synapse/http/endpoint.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py index e2b99ef3b..87639b915 100644 --- a/synapse/http/endpoint.py +++ b/synapse/http/endpoint.py @@ -357,8 +357,7 @@ def _get_hosts_for_srv_record(dns_client, host): def eb(res, record_type): if res.check(DNSNameError): return [] - logger.warn("Error looking up %s for %s: %s", - record_type, host, res, res.value) + logger.warn("Error looking up %s for %s: %s", record_type, host, res) return res # no logcontexts here, so we can safely fire these off and gatherResults From 63c4383927cfb759046ccf576e0c7e35a70f6168 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 31 Jan 2018 08:07:41 -0700 Subject: [PATCH 039/200] Documentation and naming Signed-off-by: Travis Ralston --- synapse/storage/room.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index d1d63f404..5dfb0e19f 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -534,8 +534,17 @@ class RoomStore(SQLBaseStore): self.is_room_blocked.invalidate((room_id,)) def get_media_mxcs_in_room(self, room_id): - def _get_media_ids_in_room(txn): - local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + """Retrieves all the local and remote media MXC URIs in a given room + + Args: + room_id (str) + + Returns: + The local and remote media as a lists of tuples where the key is + the hostname and the value is the media ID. + """ + def _get_media_mxcs_in_room_txn(txn): + local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) local_media_mxcs = [] remote_media_mxcs = [] @@ -546,14 +555,14 @@ class RoomStore(SQLBaseStore): remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) return local_media_mxcs, remote_media_mxcs - return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + return self.runInteraction("get_media_ids_in_room", _get_media_mxcs_in_room_txn) def quarantine_media_ids_in_room(self, room_id, quarantined_by): """For a room loops through all events with media and quarantines the associated media """ - def _quarantine_media_in_room(txn): - local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + def _quarantine_media_in_room_txn(txn): + local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -581,9 +590,19 @@ class RoomStore(SQLBaseStore): return total_media_quarantined - return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room) + return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room_txn) - def _get_media_ids_in_room(self, txn, room_id): + def _get_media_mxcs_in_room_txn(self, txn, room_id): + """Retrieves all the local and remote media MXC URIs in a given room + + Args: + txn (cursor) + room_id (str) + + Returns: + The local and remote media as a lists of tuples where the key is + the hostname and the value is the media ID. + """ mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") next_token = self.get_current_events_token() + 1 From 3af53c183a0ab5d30ce0fb40e9b8eee8da7ad75a Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 31 Jan 2018 08:15:59 -0700 Subject: [PATCH 040/200] Add admin api documentation for list media endpoint Signed-off-by: Travis Ralston --- docs/admin_api/media_admin_api.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 docs/admin_api/media_admin_api.md diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md new file mode 100644 index 000000000..abdbc1ea8 --- /dev/null +++ b/docs/admin_api/media_admin_api.md @@ -0,0 +1,23 @@ +# List all media in a room + +This API gets a list of known media in a room. + +The API is: +``` +GET /_matrix/client/r0/admin/room//media +``` +including an `access_token` of a server admin. + +It returns a JSON body like the following: +``` +{ + "local": [ + "mxc://localhost/xwvutsrqponmlkjihgfedcba", + "mxc://localhost/abcdefghijklmnopqrstuvwx" + ], + "remote": [ + "mxc://matrix.org/xwvutsrqponmlkjihgfedcba", + "mxc://matrix.org/abcdefghijklmnopqrstuvwx" + ] +} +``` From 326189c25a14a2506831e74f6f111754ad656916 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 31 Jan 2018 18:43:54 +0000 Subject: [PATCH 041/200] Script to move remote media to another media store --- scripts/move_remote_media_to_new_store.py | 133 ++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100755 scripts/move_remote_media_to_new_store.py diff --git a/scripts/move_remote_media_to_new_store.py b/scripts/move_remote_media_to_new_store.py new file mode 100755 index 000000000..7914ead88 --- /dev/null +++ b/scripts/move_remote_media_to_new_store.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2017 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Moves a list of remote media from one media store to another. + +The input should be a list of media files to be moved, one per line. Each line +should be formatted:: + + | + +This can be extracted from postgres with:: + + psql --tuples-only -A -c "select media_origin, filesystem_id from + matrix.remote_media_cache where ..." + +To use, pipe the above into:: + + PYTHON_PATH=. ./scripts/move_remote_media_to_new_store.py +""" + +from __future__ import print_function + +import argparse +import logging + +import sys + +import os + +import shutil + +from synapse.rest.media.v1.filepath import MediaFilePaths + +logger = logging.getLogger() + + +def main(src_repo, dest_repo): + src_paths = MediaFilePaths(src_repo) + dest_paths = MediaFilePaths(dest_repo) + for line in sys.stdin: + line = line.strip() + parts = line.split('|') + if len(parts) != 2: + print("Unable to parse input line %s" % line, file=sys.stderr) + exit(1) + + move_media(parts[0], parts[1], src_paths, dest_paths) + + +def move_media(origin_server, file_id, src_paths, dest_paths): + """Move the given file, and any thumbnails, to the dest repo + + Args: + origin_server (str): + file_id (str): + src_paths (MediaFilePaths): + dest_paths (MediaFilePaths): + """ + logger.info("%s/%s", origin_server, file_id) + + # check that the original exists + original_file = src_paths.remote_media_filepath(origin_server, file_id) + if not os.path.exists(original_file): + logger.warn( + "Original for %s/%s (%s) does not exist", + origin_server, file_id, original_file, + ) + else: + mkdir_and_move( + original_file, + dest_paths.remote_media_filepath(origin_server, file_id), + ) + + # now look for thumbnails + original_thumb_dir = src_paths.remote_media_thumbnail_dir( + origin_server, file_id, + ) + if not os.path.exists(original_thumb_dir): + return + + mkdir_and_move( + original_thumb_dir, + dest_paths.remote_media_thumbnail_dir(origin_server, file_id) + ) + + +def mkdir_and_move(original_file, dest_file): + dirname = os.path.dirname(dest_file) + if not os.path.exists(dirname): + logger.debug("mkdir %s", dirname) + os.makedirs(dirname) + logger.debug("mv %s %s", original_file, dest_file) + shutil.move(original_file, dest_file) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class = argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "-v", action='store_true', help='enable debug logging') + parser.add_argument( + "src_repo", + help="Path to source content repo", + ) + parser.add_argument( + "dest_repo", + help="Path to source content repo", + ) + args = parser.parse_args() + + logging_config = { + "level": logging.DEBUG if args.v else logging.INFO, + "format": "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s" + } + logging.basicConfig(**logging_config) + + main(args.src_repo, args.dest_repo) From e1e4ec9f9d6570e7f5a3f519113516f47ec872e4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:43:40 +0000 Subject: [PATCH 042/200] factor _get_new_state_after_events out of _calculate_state_delta This reduces the scope of a bunch of variables --- synapse/storage/events.py | 57 ++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 33fccfa7a..dd28c2efe 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -386,11 +386,18 @@ class EventsStore(SQLBaseStore): if all_single_prev_not_state: continue - state = yield self._calculate_state_delta( - room_id, ev_ctx_rm, new_latest_event_ids + logger.info( + "Calculating state delta for room %s", room_id, ) - if state: - current_state_for_room[room_id] = state + current_state = yield self._get_new_state_after_events( + ev_ctx_rm, new_latest_event_ids, + ) + if current_state is not None: + delta = yield self._calculate_state_delta( + room_id, current_state, + ) + if delta is not None: + current_state_for_room[room_id] = delta yield self.runInteraction( "persist_events", @@ -467,20 +474,22 @@ class EventsStore(SQLBaseStore): defer.returnValue(new_latest_event_ids) @defer.inlineCallbacks - def _calculate_state_delta(self, room_id, events_context, new_latest_event_ids): - """Calculate the new state deltas for a room. + def _get_new_state_after_events(self, events_context, new_latest_event_ids): + """Calculate the current state dict after adding some new events to + a room - Assumes that we are only persisting events for one room at a time. + Args: + events_context (list[(EventBase, EventContext)]): + events and contexts which are being added to the room + + new_latest_event_ids (iterable[str]): + the new forward extremities for the room. Returns: - 3-tuple (to_delete, to_insert, new_state) where both are state dicts, - i.e. (type, state_key) -> event_id. `to_delete` are the entries to - first be deleted from current_state_events, `to_insert` are entries - to insert. `new_state` is the full set of state. - May return None if there are no changes to be applied. + Deferred[dict[(str,str), str]|None]: + None if there are no changes to the room state, or + a dict of (type, state_key) -> event_id]. """ - # Now we need to work out the different state sets for - # each state extremities state_sets = [] state_groups = set() missing_event_ids = [] @@ -523,12 +532,12 @@ class EventsStore(SQLBaseStore): state_sets.extend(group_to_state.itervalues()) if not new_latest_event_ids: - current_state = {} + defer.returnValue({}) elif was_updated: if len(state_sets) == 1: # If there is only one state set, then we know what the current # state is. - current_state = state_sets[0] + defer.returnValue(state_sets[0]) else: # We work out the current state by passing the state sets to the # state resolution algorithm. It may ask for some events, including @@ -537,8 +546,7 @@ class EventsStore(SQLBaseStore): # up in the db. logger.info( - "Resolving state for %s with %i state sets", - room_id, len(state_sets), + "Resolving state with %i state sets", len(state_sets), ) events_map = {ev.event_id: ev for ev, _ in events_context} @@ -567,9 +575,22 @@ class EventsStore(SQLBaseStore): state_sets, state_map_factory=get_events, ) + defer.returnValue(current_state) else: return + @defer.inlineCallbacks + def _calculate_state_delta(self, room_id, current_state): + """Calculate the new state deltas for a room. + + Assumes that we are only persisting events for one room at a time. + + Returns: + 3-tuple (to_delete, to_insert, new_state) where both are state dicts, + i.e. (type, state_key) -> event_id. `to_delete` are the entries to + first be deleted from current_state_events, `to_insert` are entries + to insert. `new_state` is the full set of state. + """ existing_state = yield self.get_current_state_ids(room_id) existing_events = set(existing_state.itervalues()) From e16e45b1b44c9b9f7d44e6b50406268869759397 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 31 Jan 2018 15:30:38 -0700 Subject: [PATCH 043/200] pep8 Signed-off-by: Travis Ralston --- synapse/storage/room.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 5dfb0e19f..961ad5abc 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -544,14 +544,14 @@ class RoomStore(SQLBaseStore): the hostname and the value is the media ID. """ def _get_media_mxcs_in_room_txn(txn): - local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) + local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id) local_media_mxcs = [] remote_media_mxcs = [] # Convert the IDs to MXC URIs - for media_id in local_media_ids: + for media_id in local_mxcs: local_media_mxcs.append("mxc://%s/%s" % (self.hostname, media_id)) - for hostname, media_id in remote_media_ids: + for hostname, media_id in remote_mxcs: remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) return local_media_mxcs, remote_media_mxcs @@ -562,7 +562,7 @@ class RoomStore(SQLBaseStore): the associated media """ def _quarantine_media_in_room_txn(txn): - local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) + local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -571,7 +571,7 @@ class RoomStore(SQLBaseStore): UPDATE local_media_repository SET quarantined_by = ? WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_ids)) + """, ((quarantined_by, media_id) for media_id in local_mxcs)) txn.executemany( """ @@ -581,16 +581,19 @@ class RoomStore(SQLBaseStore): """, ( (quarantined_by, origin, media_id) - for origin, media_id in remote_media_ids + for origin, media_id in remote_mxcs ) ) - total_media_quarantined += len(local_media_ids) - total_media_quarantined += len(remote_media_ids) + total_media_quarantined += len(local_mxcs) + total_media_quarantined += len(remote_mxcs) return total_media_quarantined - return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room_txn) + return self.runInteraction( + "quarantine_media_in_room", + _quarantine_media_in_room_txn, + ) def _get_media_mxcs_in_room_txn(self, txn, room_id): """Retrieves all the local and remote media MXC URIs in a given room From 0cbda53819dd66df05c872ea021767336b457769 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 08:48:41 +0000 Subject: [PATCH 044/200] Rename resolve_state_groups -> resolve_state_groups_for_events (to make way for a method that actually just does the state group resolution) --- synapse/handlers/federation.py | 11 +++++------ synapse/state.py | 12 ++++++------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 677532c87..8ee9434c9 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -808,13 +808,12 @@ class FederationHandler(BaseHandler): event_ids = list(extremities.keys()) logger.debug("calling resolve_state_groups in _maybe_backfill") + resolve = logcontext.preserve_fn( + self.state_handler.resolve_state_groups_for_events + ) states = yield logcontext.make_deferred_yieldable(defer.gatherResults( - [ - logcontext.preserve_fn(self.state_handler.resolve_state_groups)( - room_id, [e] - ) - for e in event_ids - ], consumeErrors=True, + [resolve(room_id, [e]) for e in event_ids], + consumeErrors=True, )) states = dict(zip(event_ids, [s.state for s in states])) diff --git a/synapse/state.py b/synapse/state.py index 4c8247e7c..8daf90013 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -127,7 +127,7 @@ class StateHandler(object): latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_state") - ret = yield self.resolve_state_groups(room_id, latest_event_ids) + ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) state = ret.state if event_type: @@ -164,7 +164,7 @@ class StateHandler(object): latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_state_ids") - ret = yield self.resolve_state_groups(room_id, latest_event_ids) + ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) state = ret.state defer.returnValue(state) @@ -174,7 +174,7 @@ class StateHandler(object): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_user_in_room") - entry = yield self.resolve_state_groups(room_id, latest_event_ids) + entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) joined_users = yield self.store.get_joined_users_from_state(room_id, entry) defer.returnValue(joined_users) @@ -183,7 +183,7 @@ class StateHandler(object): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_hosts_in_room") - entry = yield self.resolve_state_groups(room_id, latest_event_ids) + entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) joined_hosts = yield self.store.get_joined_hosts(room_id, entry) defer.returnValue(joined_hosts) @@ -241,7 +241,7 @@ class StateHandler(object): defer.returnValue(context) logger.debug("calling resolve_state_groups from compute_event_context") - entry = yield self.resolve_state_groups( + entry = yield self.resolve_state_groups_for_events( event.room_id, [e for e, _ in event.prev_events], ) @@ -284,7 +284,7 @@ class StateHandler(object): @defer.inlineCallbacks @log_function - def resolve_state_groups(self, room_id, event_ids): + def resolve_state_groups_for_events(self, room_id, event_ids): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. From 6da4c4d3bdceb99f20bc208dc4809d3d76ba9a72 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 09:15:45 +0000 Subject: [PATCH 045/200] Factor out resolve_state_groups to a separate handler We extract the storage-independent bits of the state group resolution out to a separate functiom, and stick it in a new handler, in preparation for its use from the storage layer. --- synapse/server.py | 6 +- synapse/server.pyi | 3 + synapse/state.py | 149 ++++++++++++++++++++++++++++---------------- tests/test_state.py | 4 +- 4 files changed, 108 insertions(+), 54 deletions(-) diff --git a/synapse/server.py b/synapse/server.py index ff8a8fbc4..3173aed1d 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -66,7 +66,7 @@ from synapse.rest.media.v1.media_repository import ( MediaRepository, MediaRepositoryResource, ) -from synapse.state import StateHandler +from synapse.state import StateHandler, StateResolutionHandler from synapse.storage import DataStore from synapse.streams.events import EventSources from synapse.util import Clock @@ -102,6 +102,7 @@ class HomeServer(object): 'v1auth', 'auth', 'state_handler', + 'state_resolution_handler', 'presence_handler', 'sync_handler', 'typing_handler', @@ -224,6 +225,9 @@ class HomeServer(object): def build_state_handler(self): return StateHandler(self) + def build_state_resolution_handler(self): + return StateResolutionHandler(self) + def build_presence_handler(self): return PresenceHandler(self) diff --git a/synapse/server.pyi b/synapse/server.pyi index 41416ef25..c3a9a3847 100644 --- a/synapse/server.pyi +++ b/synapse/server.pyi @@ -34,6 +34,9 @@ class HomeServer(object): def get_state_handler(self) -> synapse.state.StateHandler: pass + def get_state_resolution_handler(self) -> synapse.state.StateResolutionHandler: + pass + def get_deactivate_account_handler(self) -> synapse.handlers.deactivate_account.DeactivateAccountHandler: pass diff --git a/synapse/state.py b/synapse/state.py index 8daf90013..5004e0f91 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -81,31 +81,19 @@ class _StateCacheEntry(object): class StateHandler(object): - """ Responsible for doing state conflict resolution. + """Fetches bits of state from the stores, and does state resolution + where necessary """ def __init__(self, hs): self.clock = hs.get_clock() self.store = hs.get_datastore() self.hs = hs - - # dict of set of event_ids -> _StateCacheEntry. - self._state_cache = None - self.resolve_linearizer = Linearizer(name="state_resolve_lock") + self._state_resolution_handler = hs.get_state_resolution_handler() def start_caching(self): - logger.debug("start_caching") - - self._state_cache = ExpiringCache( - cache_name="state_cache", - clock=self.clock, - max_len=SIZE_OF_CACHE, - expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000, - iterable=True, - reset_expiry_on_get=True, - ) - - self._state_cache.start() + # TODO: remove this shim + self._state_resolution_handler.start_caching() @defer.inlineCallbacks def get_current_state(self, room_id, event_type=None, state_key="", @@ -283,7 +271,6 @@ class StateHandler(object): defer.returnValue(context) @defer.inlineCallbacks - @log_function def resolve_state_groups_for_events(self, room_id, event_ids): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. @@ -303,13 +290,7 @@ class StateHandler(object): room_id, event_ids ) - logger.debug( - "resolve_state_groups state_groups %s", - state_groups_ids.keys() - ) - - group_names = frozenset(state_groups_ids.keys()) - if len(group_names) == 1: + if len(state_groups_ids) == 1: name, state_list = state_groups_ids.items().pop() prev_group, delta_ids = yield self.store.get_state_group_delta(name) @@ -321,6 +302,92 @@ class StateHandler(object): delta_ids=delta_ids, )) + result = yield self._state_resolution_handler.resolve_state_groups( + room_id, state_groups_ids, self._state_map_factory, + ) + defer.returnValue(result) + + def _state_map_factory(self, ev_ids): + return self.store.get_events( + ev_ids, get_prev_content=False, check_redacted=False, + ) + + def resolve_events(self, state_sets, event): + logger.info( + "Resolving state for %s with %d groups", event.room_id, len(state_sets) + ) + state_set_ids = [{ + (ev.type, ev.state_key): ev.event_id + for ev in st + } for st in state_sets] + + state_map = { + ev.event_id: ev + for st in state_sets + for ev in st + } + + with Measure(self.clock, "state._resolve_events"): + new_state = resolve_events_with_state_map(state_set_ids, state_map) + + new_state = { + key: state_map[ev_id] for key, ev_id in new_state.items() + } + + return new_state + + +class StateResolutionHandler(object): + """Responsible for doing state conflict resolution. + + Note that the storage layer depends on this handler, so all functions must + be storage-independent. + """ + def __init__(self, hs): + self.clock = hs.get_clock() + + # dict of set of event_ids -> _StateCacheEntry. + self._state_cache = None + self.resolve_linearizer = Linearizer(name="state_resolve_lock") + + def start_caching(self): + logger.debug("start_caching") + + self._state_cache = ExpiringCache( + cache_name="state_cache", + clock=self.clock, + max_len=SIZE_OF_CACHE, + expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000, + iterable=True, + reset_expiry_on_get=True, + ) + + self._state_cache.start() + + @defer.inlineCallbacks + @log_function + def resolve_state_groups(self, room_id, state_groups_ids, state_map_factory): + """Resolves conflicts between a set of state groups + + Always generates a new state group (unless we hit the cache), so should + not be called for a single state group + + Args: + room_id (str): room we are resolving for (used for logging) + state_groups_ids (dict[int, dict[(str, str), str]]): + map from state group id to the state in that state group + (where 'state' is a map from state key to event id) + + Returns: + Deferred[_StateCacheEntry]: resolved state + """ + logger.debug( + "resolve_state_groups state_groups %s", + state_groups_ids.keys() + ) + + group_names = frozenset(state_groups_ids.keys()) + with (yield self.resolve_linearizer.queue(group_names)): if self._state_cache is not None: cache = self._state_cache.get(group_names, None) @@ -351,15 +418,17 @@ class StateHandler(object): with Measure(self.clock, "state._resolve_events"): new_state = yield resolve_events_with_factory( state_groups_ids.values(), - state_map_factory=lambda ev_ids: self.store.get_events( - ev_ids, get_prev_content=False, check_redacted=False, - ), + state_map_factory=state_map_factory, ) else: new_state = { key: e_ids.pop() for key, e_ids in state.items() } + # if the new state matches any of the input state groups, we can + # use that state group again. Otherwise we will generate a state_id + # which will be used as a cache key for future resolutions, but + # not get persisted. state_group = None new_state_event_ids = frozenset(new_state.values()) for sg, events in state_groups_ids.items(): @@ -396,30 +465,6 @@ class StateHandler(object): defer.returnValue(cache) - def resolve_events(self, state_sets, event): - logger.info( - "Resolving state for %s with %d groups", event.room_id, len(state_sets) - ) - state_set_ids = [{ - (ev.type, ev.state_key): ev.event_id - for ev in st - } for st in state_sets] - - state_map = { - ev.event_id: ev - for st in state_sets - for ev in st - } - - with Measure(self.clock, "state._resolve_events"): - new_state = resolve_events_with_state_map(state_set_ids, state_map) - - new_state = { - key: state_map[ev_id] for key, ev_id in new_state.items() - } - - return new_state - def _ordered_events(events): def key_func(e): diff --git a/tests/test_state.py b/tests/test_state.py index feb84f3d4..d16e1b3b8 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -19,7 +19,7 @@ from twisted.internet import defer from synapse.events import FrozenEvent from synapse.api.auth import Auth from synapse.api.constants import EventTypes, Membership -from synapse.state import StateHandler +from synapse.state import StateHandler, StateResolutionHandler from .utils import MockClock @@ -148,11 +148,13 @@ class StateTestCase(unittest.TestCase): ) hs = Mock(spec_set=[ "get_datastore", "get_auth", "get_state_handler", "get_clock", + "get_state_resolution_handler", ]) hs.get_datastore.return_value = self.store hs.get_state_handler.return_value = None hs.get_clock.return_value = MockClock() hs.get_auth.return_value = Auth(hs) + hs.get_state_resolution_handler = lambda: StateResolutionHandler(hs) self.store.get_next_state_group.side_effect = Mock self.store.get_state_group_delta.return_value = (None, None) From a18828c12961ce254660e7641335428ec0655868 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:33:22 +0000 Subject: [PATCH 046/200] Fix docstring for StateHandler.resolve_state_groups The return type was a complete lie, so fix it --- synapse/state.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 1f9abf9d3..2bc7e584c 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -58,7 +58,11 @@ class _StateCacheEntry(object): __slots__ = ["state", "state_group", "state_id", "prev_group", "delta_ids"] def __init__(self, state, state_group, prev_group=None, delta_ids=None): + # dict[(str, str), str] map from (type, state_key) to event_id self.state = frozendict(state) + + # the ID of a state group if one and only one is involved. + # otherwise, None otherwise? self.state_group = state_group self.prev_group = prev_group @@ -280,11 +284,12 @@ class StateHandler(object): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. + Args: + room_id (str): + event_ids (list[str]): + Returns: - a Deferred tuple of (`state_group`, `state`, `prev_state`). - `state_group` is the name of a state group if one and only one is - involved. `state` is a map from (type, state_key) to event, and - `prev_state` is a list of event ids. + Deferred[_StateCacheEntry]: resolved state """ logger.debug("resolve_state_groups event_ids %s", event_ids) From e15d4ea2482827035e31945f514b9d4c67086c37 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:36:45 +0000 Subject: [PATCH 047/200] More docstring fixes Fix a couple of errors in docstrings --- synapse/state.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 2bc7e584c..1071635ac 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -434,8 +434,8 @@ def resolve_events_with_state_map(state_sets, state_map): state_sets. Returns - dict[(str, str), synapse.events.FrozenEvent]: - a map from (type, state_key) to event. + dict[(str, str), str]: + a map from (type, state_key) to event_id. """ if len(state_sets) == 1: return state_sets[0] @@ -497,8 +497,8 @@ def resolve_events_with_factory(state_sets, state_map_factory): a Deferred of dict of event_id to event. Returns - Deferred[dict[(str, str), synapse.events.FrozenEvent]]: - a map from (type, state_key) to event. + Deferred[dict[(str, str), str]]: + a map from (type, state_key) to event_id. """ if len(state_sets) == 1: defer.returnValue(state_sets[0]) From 14737ba495d914daf350a80c9b34dfefb0ae5a86 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 23:55:01 +0000 Subject: [PATCH 048/200] doc arg types for _seperate --- synapse/state.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/synapse/state.py b/synapse/state.py index 1071635ac..30b16e201 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -457,6 +457,21 @@ def _seperate(state_sets): """Takes the state_sets and figures out which keys are conflicted and which aren't. i.e., which have multiple different event_ids associated with them in different state sets. + + Args: + state_sets(list[dict[(str, str), str]]): + List of dicts of (type, state_key) -> event_id, which are the + different state groups to resolve. + + Returns: + (dict[(str, str), str], dict[(str, str), set[str]]): + A tuple of (unconflicted_state, conflicted_state), where: + + unconflicted_state is a dict mapping (type, state_key)->event_id + for unconflicted state keys. + + conflicted_state is a dict mapping (type, state_key) to a set of + event ids for conflicted state keys. """ unconflicted_state = dict(state_sets[0]) conflicted_state = {} From d5352cbba80005fb226563211c9e7179edef2d65 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 2 Feb 2018 00:35:18 +0000 Subject: [PATCH 049/200] Handle url_previews with no content-type avoid failing with an exception if the remote server doesn't give us a Content-Type header. Also, clean up the exception handling a bit. --- synapse/rest/media/v1/preview_url_resource.py | 55 ++++++++++++------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 981f01e41..31fe7aa75 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -12,6 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import cgi +import datetime +import errno +import fnmatch +import itertools +import logging +import os +import re +import shutil +import sys +import traceback +import ujson as json +import urlparse from twisted.web.server import NOT_DONE_YET from twisted.internet import defer @@ -33,18 +46,6 @@ from synapse.http.server import ( from synapse.util.async import ObservableDeferred from synapse.util.stringutils import is_ascii -import os -import re -import fnmatch -import cgi -import ujson as json -import urlparse -import itertools -import datetime -import errno -import shutil - -import logging logger = logging.getLogger(__name__) @@ -286,17 +287,28 @@ class PreviewUrlResource(Resource): url_cache=True, ) - try: - with self.media_storage.store_into_file(file_info) as (f, fname, finish): + with self.media_storage.store_into_file(file_info) as (f, fname, finish): + try: logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) + except Exception as e: # FIXME: pass through 404s and other error messages nicely + logger.warn("Error downloading %s: %r", url, e) + raise SynapseError( + 500, "Failed to download content: %s" % ( + traceback.format_exception_only(sys.exc_type, e), + ), + Codes.UNKNOWN, + ) + yield finish() - yield finish() - - media_type = headers["Content-Type"][0] + try: + if "Content-Type" in headers: + media_type = headers["Content-Type"][0] + else: + media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() content_disposition = headers.get("Content-Disposition", None) @@ -336,10 +348,11 @@ class PreviewUrlResource(Resource): ) except Exception as e: - raise SynapseError( - 500, ("Failed to download content: %s" % e), - Codes.UNKNOWN - ) + logger.error("Error handling downloaded %s: %r", url, e) + # TODO: we really ought to delete the downloaded file in this + # case, since we won't have recorded it in the db, and will + # therefore not expire it. + raise defer.returnValue({ "media_type": media_type, From 6b02fc80d173d3d4de81623d411a136abe1637e9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 2 Feb 2018 14:32:51 +0000 Subject: [PATCH 050/200] Reinstate event_search_postgres_gist handler People may have queued updates for this, so we can't just delete it. --- synapse/storage/background_updates.py | 19 +++++++++++++++++++ synapse/storage/registration.py | 7 +------ synapse/storage/search.py | 11 +++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 11a1b942f..c88759bf2 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -242,6 +242,25 @@ class BackgroundUpdateStore(SQLBaseStore): """ self._background_update_handlers[update_name] = update_handler + def register_noop_background_update(self, update_name): + """Register a noop handler for a background update. + + This is useful when we previously did a background update, but no + longer wish to do the update. In this case the background update should + be removed from the schema delta files, but there may still be some + users who have the background update queued, so this method should + also be called to clear the update. + + Args: + update_name (str): Name of update + """ + @defer.inlineCallbacks + def noop_update(progress, batch_size): + yield self._end_background_update(update_name) + defer.returnValue(1) + + self.register_background_update_handler(update_name, noop_update) + def register_background_index_update(self, update_name, index_name, table, columns, where_clause=None, unique=False, diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 3aa810981..95f75d6df 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -39,12 +39,7 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): # we no longer use refresh tokens, but it's possible that some people # might have a background update queued to build this index. Just # clear the background update. - @defer.inlineCallbacks - def noop_update(progress, batch_size): - yield self._end_background_update("refresh_tokens_device_index") - defer.returnValue(1) - self.register_background_update_handler( - "refresh_tokens_device_index", noop_update) + self.register_noop_background_update("refresh_tokens_device_index") @defer.inlineCallbacks def add_access_token_to_user(self, user_id, token, device_id=None): diff --git a/synapse/storage/search.py b/synapse/storage/search.py index d3e76b58d..13c827cf8 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -31,6 +31,7 @@ class SearchStore(BackgroundUpdateStore): EVENT_SEARCH_UPDATE_NAME = "event_search" EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" + EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist" EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin" def __init__(self, db_conn, hs): @@ -42,6 +43,16 @@ class SearchStore(BackgroundUpdateStore): self.EVENT_SEARCH_ORDER_UPDATE_NAME, self._background_reindex_search_order ) + + # we used to have a background update to turn the GIN index into a + # GIST one; we no longer do that (obviously) because we actually want + # a GIN index. However, it's possible that some people might still have + # the background update queued, so we register a handler to clear the + # background update. + self.register_noop_background_update( + self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME, + ) + self.register_background_update_handler( self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME, self._background_reindex_gin_search From 4eeae7ad657729eb8c2765da6fb40fc983c740f7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 22:57:33 +0000 Subject: [PATCH 051/200] Move store_event_search_txn to SearchStore ... as a precursor to making event storing and doing the bg update share some code. --- synapse/storage/room.py | 45 +++++++-------------------------------- synapse/storage/search.py | 35 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 37 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9f373b47e..0fcfb7f86 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -16,11 +16,9 @@ from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from ._base import SQLBaseStore -from .engines import PostgresEngine, Sqlite3Engine - import collections import logging import ujson as json @@ -40,7 +38,7 @@ RatelimitOverride = collections.namedtuple( ) -class RoomStore(SQLBaseStore): +class RoomStore(SearchStore): @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -263,8 +261,8 @@ class RoomStore(SQLBaseStore): }, ) - self._store_event_search_txn( - txn, event, "content.topic", event.content["topic"] + self.store_event_search_txn( + txn, event, "content.topic", event.content["topic"], ) def _store_room_name_txn(self, txn, event): @@ -279,14 +277,14 @@ class RoomStore(SQLBaseStore): } ) - self._store_event_search_txn( - txn, event, "content.name", event.content["name"] + self.store_event_search_txn( + txn, event, "content.name", event.content["name"], ) def _store_room_message_txn(self, txn, event): if hasattr(event, "content") and "body" in event.content: - self._store_event_search_txn( - txn, event, "content.body", event.content["body"] + self.store_event_search_txn( + txn, event, "content.body", event.content["body"], ) def _store_history_visibility_txn(self, txn, event): @@ -308,33 +306,6 @@ class RoomStore(SQLBaseStore): event.content[key] )) - def _store_event_search_txn(self, txn, event, key, value): - if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") - sql = ( - "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" - " VALUES (?,?,?,to_tsvector('english', ?),?,?)" - ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) - txn.execute("RESET work_mem") - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - def add_event_report(self, room_id, event_id, user_id, reason, content, received_ts): next_id = self._event_reports_id_gen.get_next() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index f52f3c859..205e8d001 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -246,6 +246,41 @@ class SearchStore(BackgroundUpdateStore): defer.returnValue(num_rows) + def store_event_search_txn(self, txn, event, key, value): + """Add event to the search table + + Args: + txn (cursor): + event (EventBase): + key (str): + value (str): + """ + if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256kB'") + sql = ( + "INSERT INTO event_search" + " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " VALUES (?,?,?,to_tsvector('english', ?),?,?)" + ) + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) + ) + txn.execute("RESET work_mem") + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. From bd25f9cf36ff86d1616853d88cebd2a4a83fa552 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:05:41 +0000 Subject: [PATCH 052/200] Clean up work_mem handling Add some comments and improve exception handling when twiddling work_mem for the search update --- synapse/storage/search.py | 52 ++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 205e8d001..190751bad 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import sys from twisted.internet import defer from .background_updates import BackgroundUpdateStore @@ -256,21 +256,51 @@ class SearchStore(BackgroundUpdateStore): value (str): """ if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " (event_id, room_id, key, vector, stream_ordering, " + " origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, + + # inserts to a GIN index are normally batched up into a pending + # list, and then all committed together once the list gets to a + # certain size. The trouble with that is that postgres (pre-9.5) + # uses work_mem to determine the length of the list, and work_mem + # is typically very large. + # + # We therefore reduce work_mem while we do the insert. + # + # (postgres 9.5 uses the separate gin_pending_list_limit setting, + # so doesn't suffer the same problem, but changing work_mem will + # be harmless) + + txn.execute("SET work_mem='256kB'") + try: + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) ) - ) - txn.execute("RESET work_mem") + except Exception: + # we need to reset work_mem, but doing so may throw a new + # exception and we want to preserve the original + t, v, tb = sys.exc_info() + try: + txn.execute("RESET work_mem") + except Exception as e: + logger.warn( + "exception resetting work_mem during exception " + "handling: %r", + e, + ) + raise t, v, tb + else: + txn.execute("RESET work_mem") + elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" From 80b8a28100e29e34bdc6226513575789310aa41f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:07:13 +0000 Subject: [PATCH 053/200] Factor out common code for search insert we can reuse the same code as is used for event insert, for doing the background index population. --- synapse/storage/search.py | 89 ++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 190751bad..eecf77851 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from collections import namedtuple import sys from twisted.internet import defer @@ -26,6 +27,11 @@ import ujson as json logger = logging.getLogger(__name__) +SearchEntry = namedtuple('SearchEntry', [ + 'key', 'value', 'event_id', 'room_id', 'stream_ordering', + 'origin_server_ts', +]) + class SearchStore(BackgroundUpdateStore): @@ -49,16 +55,17 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): + # we work through the events table from highest stream id to lowest target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) - INSERT_CLUMP_SIZE = 1000 TYPES = ["m.room.name", "m.room.message", "m.room.topic"] def reindex_search_txn(txn): sql = ( - "SELECT stream_ordering, event_id, room_id, type, content FROM events" + "SELECT stream_ordering, event_id, room_id, type, content, " + " origin_server_ts FROM events" " WHERE ? <= stream_ordering AND stream_ordering < ?" " AND (%s)" " ORDER BY stream_ordering DESC" @@ -67,6 +74,10 @@ class SearchStore(BackgroundUpdateStore): txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + # we could stream straight from the results into + # store_search_entries_txn with a generator function, but that + # would mean having two cursors open on the database at once. + # Instead we just build a list of results. rows = self.cursor_to_dict(txn) if not rows: return 0 @@ -79,6 +90,8 @@ class SearchStore(BackgroundUpdateStore): event_id = row["event_id"] room_id = row["room_id"] etype = row["type"] + stream_ordering = row["stream_ordering"] + origin_server_ts = row["origin_server_ts"] try: content = json.loads(row["content"]) except Exception: @@ -93,6 +106,8 @@ class SearchStore(BackgroundUpdateStore): elif etype == "m.room.name": key = "content.name" value = content["name"] + else: + raise Exception("unexpected event type %s" % etype) except (KeyError, AttributeError): # If the event is missing a necessary field then # skip over it. @@ -103,29 +118,16 @@ class SearchStore(BackgroundUpdateStore): # then skip over it continue - event_search_rows.append((event_id, room_id, key, value)) + event_search_rows.append(SearchEntry( + key=key, + value=value, + event_id=event_id, + room_id=room_id, + stream_ordering=stream_ordering, + origin_server_ts=origin_server_ts, + )) - if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") - sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): - clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - - if isinstance(self.database_engine, PostgresEngine): - txn.execute("RESET work_mem") + self.store_search_entries_txn(txn, event_search_rows) progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -255,6 +257,26 @@ class SearchStore(BackgroundUpdateStore): key (str): value (str): """ + self.store_search_entries_txn( + txn, + (SearchEntry( + key=key, + value=value, + event_id=event.event_id, + room_id=event.room_id, + stream_ordering=event.internal_metadata.stream_ordering, + origin_server_ts=event.origin_server_ts, + ),), + ) + + def store_search_entries_txn(self, txn, entries): + """Add entries to the search table + + Args: + txn (cursor): + entries (iterable[SearchEntry]): + entries to be added to the table + """ if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" @@ -262,6 +284,10 @@ class SearchStore(BackgroundUpdateStore): " origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + entry.stream_ordering, entry.origin_server_ts, + ) for entry in entries) # inserts to a GIN index are normally batched up into a pending # list, and then all committed together once the list gets to a @@ -277,14 +303,7 @@ class SearchStore(BackgroundUpdateStore): txn.execute("SET work_mem='256kB'") try: - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) + txn.executemany(sql, args) except Exception: # we need to reset work_mem, but doing so may throw a new # exception and we want to preserve the original @@ -306,7 +325,11 @@ class SearchStore(BackgroundUpdateStore): "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + ) for entry in entries) + + txn.executemany(sql, args) else: # This should be unreachable. raise Exception("Unrecognized database engine") From b5049d2e5cab5780c08c61b9d8ac70c6a9298fb1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Feb 2018 12:06:46 +0000 Subject: [PATCH 054/200] Add .vscode to gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 491047c35..c8901eb20 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,5 @@ static/client/register/register_config.js env/ *.config + +.vscode/ From a1beca0e254104ad87220d276fb56d26e5ca5809 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Feb 2018 12:44:03 +0000 Subject: [PATCH 055/200] Fix broken unit test for media storage --- tests/rest/media/v1/test_media_storage.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index c4de18157..eef38b678 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -21,6 +21,8 @@ from synapse.rest.media.v1.media_storage import MediaStorage from synapse.rest.media.v1.filepath import MediaFilePaths from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend +from mock import Mock + from tests import unittest import os @@ -35,8 +37,11 @@ class MediaStorageTests(unittest.TestCase): self.primary_base_path = os.path.join(self.test_dir, "primary") self.secondary_base_path = os.path.join(self.test_dir, "secondary") + hs = Mock() + hs.config.media_store_path = self.primary_base_path + storage_providers = [FileStorageProviderBackend( - self.primary_base_path, self.secondary_base_path + hs, self.secondary_base_path )] self.filepaths = MediaFilePaths(self.primary_base_path) From bc496df192fa20dee933590d5f21a3425388c0d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Feb 2018 17:57:51 +0000 Subject: [PATCH 056/200] report metrics on number of cache evictions --- synapse/metrics/metric.py | 11 ++++++++++- synapse/util/caches/descriptors.py | 4 ++++ synapse/util/caches/expiringcache.py | 6 +++++- synapse/util/caches/lrucache.py | 28 +++++++++++++++++++++++++--- tests/metrics/test_metric.py | 12 ++++++++++++ 5 files changed, 56 insertions(+), 5 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 1e783e5ff..ff5aa8c0e 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -193,7 +193,9 @@ class DistributionMetric(object): class CacheMetric(object): - __slots__ = ("name", "cache_name", "hits", "misses", "size_callback") + __slots__ = ( + "name", "cache_name", "hits", "misses", "evicted_size", "size_callback", + ) def __init__(self, name, size_callback, cache_name): self.name = name @@ -201,6 +203,7 @@ class CacheMetric(object): self.hits = 0 self.misses = 0 + self.evicted_size = 0 self.size_callback = size_callback @@ -210,6 +213,9 @@ class CacheMetric(object): def inc_misses(self): self.misses += 1 + def inc_evictions(self, size=1): + self.evicted_size += size + def render(self): size = self.size_callback() hits = self.hits @@ -219,6 +225,9 @@ class CacheMetric(object): """%s:hits{name="%s"} %d""" % (self.name, self.cache_name, hits), """%s:total{name="%s"} %d""" % (self.name, self.cache_name, total), """%s:size{name="%s"} %d""" % (self.name, self.cache_name, size), + """%s:evicted_size{name="%s"} %d""" % ( + self.name, self.cache_name, self.evicted_size + ), ] diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index af65bfe7b..bf3a66eae 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -75,6 +75,7 @@ class Cache(object): self.cache = LruCache( max_size=max_entries, keylen=keylen, cache_type=cache_type, size_callback=(lambda d: len(d)) if iterable else None, + evicted_callback=self._on_evicted, ) self.name = name @@ -83,6 +84,9 @@ class Cache(object): self.thread = None self.metrics = register_cache(name, self.cache) + def _on_evicted(self, evicted_count): + self.metrics.inc_evictions(evicted_count) + def check_thread(self): expected_thread = self.thread if expected_thread is None: diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 6ad53a639..0aa103eec 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -79,7 +79,11 @@ class ExpiringCache(object): while self._max_len and len(self) > self._max_len: _key, value = self._cache.popitem(last=False) if self.iterable: - self._size_estimate -= len(value.value) + removed_len = len(value.value) + self.metrics.inc_evictions(removed_len) + self._size_estimate -= removed_len + else: + self.metrics.inc_evictions() def __getitem__(self, key): try: diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index cf5fbb679..f088dd430 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -49,7 +49,24 @@ class LruCache(object): Can also set callbacks on objects when getting/setting which are fired when that key gets invalidated/evicted. """ - def __init__(self, max_size, keylen=1, cache_type=dict, size_callback=None): + def __init__(self, max_size, keylen=1, cache_type=dict, size_callback=None, + evicted_callback=None): + """ + Args: + max_size (int): + + keylen (int): + + cache_type (type): + type of underlying cache to be used. Typically one of dict + or TreeCache. + + size_callback (func(V) -> int | None): + + evicted_callback (func(int)|None): + if not None, called on eviction with the size of the evicted + entry + """ cache = cache_type() self.cache = cache # Used for introspection. list_root = _Node(None, None, None, None) @@ -61,8 +78,10 @@ class LruCache(object): def evict(): while cache_len() > max_size: todelete = list_root.prev_node - delete_node(todelete) + evicted_len = delete_node(todelete) cache.pop(todelete.key, None) + if evicted_callback: + evicted_callback(evicted_len) def synchronized(f): @wraps(f) @@ -111,12 +130,15 @@ class LruCache(object): prev_node.next_node = next_node next_node.prev_node = prev_node + deleted_len = 1 if size_callback: - cached_cache_len[0] -= size_callback(node.value) + deleted_len = size_callback(node.value) + cached_cache_len[0] -= deleted_len for cb in node.callbacks: cb() node.callbacks.clear() + return deleted_len @synchronized def cache_get(key, default=None, callbacks=[]): diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index f85455a5a..39bde6e3f 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -141,6 +141,7 @@ class CacheMetricTestCase(unittest.TestCase): 'cache:hits{name="cache_name"} 0', 'cache:total{name="cache_name"} 0', 'cache:size{name="cache_name"} 0', + 'cache:evicted_size{name="cache_name"} 0', ]) metric.inc_misses() @@ -150,6 +151,7 @@ class CacheMetricTestCase(unittest.TestCase): 'cache:hits{name="cache_name"} 0', 'cache:total{name="cache_name"} 1', 'cache:size{name="cache_name"} 1', + 'cache:evicted_size{name="cache_name"} 0', ]) metric.inc_hits() @@ -158,4 +160,14 @@ class CacheMetricTestCase(unittest.TestCase): 'cache:hits{name="cache_name"} 1', 'cache:total{name="cache_name"} 2', 'cache:size{name="cache_name"} 1', + 'cache:evicted_size{name="cache_name"} 0', + ]) + + metric.inc_evictions(2) + + self.assertEquals(metric.render(), [ + 'cache:hits{name="cache_name"} 1', + 'cache:total{name="cache_name"} 2', + 'cache:size{name="cache_name"} 1', + 'cache:evicted_size{name="cache_name"} 2', ]) From c46e75d3d8311f378f234e3de4719d6fa5d380c9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 22:57:33 +0000 Subject: [PATCH 057/200] Move store_event_search_txn to SearchStore ... as a precursor to making event storing and doing the bg update share some code. --- synapse/storage/room.py | 43 ++++++++------------------------------- synapse/storage/search.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index cf2c4dae3..fff6652e0 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -16,11 +16,9 @@ from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from ._base import SQLBaseStore -from .engines import PostgresEngine, Sqlite3Engine - import collections import logging import ujson as json @@ -40,7 +38,7 @@ RatelimitOverride = collections.namedtuple( ) -class RoomStore(SQLBaseStore): +class RoomStore(SearchStore): @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -263,8 +261,8 @@ class RoomStore(SQLBaseStore): }, ) - self._store_event_search_txn( - txn, event, "content.topic", event.content["topic"] + self.store_event_search_txn( + txn, event, "content.topic", event.content["topic"], ) def _store_room_name_txn(self, txn, event): @@ -279,14 +277,14 @@ class RoomStore(SQLBaseStore): } ) - self._store_event_search_txn( - txn, event, "content.name", event.content["name"] + self.store_event_search_txn( + txn, event, "content.name", event.content["name"], ) def _store_room_message_txn(self, txn, event): if hasattr(event, "content") and "body" in event.content: - self._store_event_search_txn( - txn, event, "content.body", event.content["body"] + self.store_event_search_txn( + txn, event, "content.body", event.content["body"], ) def _store_history_visibility_txn(self, txn, event): @@ -308,31 +306,6 @@ class RoomStore(SQLBaseStore): event.content[key] )) - def _store_event_search_txn(self, txn, event, key, value): - if isinstance(self.database_engine, PostgresEngine): - sql = ( - "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" - " VALUES (?,?,?,to_tsvector('english', ?),?,?)" - ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - def add_event_report(self, room_id, event_id, user_id, reason, content, received_ts): next_id = self._event_reports_id_gen.get_next() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 479b04c63..4f38a587c 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -242,6 +242,39 @@ class SearchStore(BackgroundUpdateStore): defer.returnValue(num_rows) + def store_event_search_txn(self, txn, event, key, value): + """Add event to the search table + + Args: + txn (cursor): + event (EventBase): + key (str): + value (str): + """ + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "INSERT INTO event_search" + " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " VALUES (?,?,?,to_tsvector('english', ?),?,?)" + ) + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. From 5ff3d23564f41e3ae82398a2f0726d8914d060a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 16:51:53 +0000 Subject: [PATCH 058/200] Split event creation into a separate handler --- synapse/handlers/message.py | 368 +++++++++++++++++++----------------- 1 file changed, 191 insertions(+), 177 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 21f1717dd..afa19bf65 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -47,21 +47,9 @@ class MessageHandler(BaseHandler): self.hs = hs self.state = hs.get_state_handler() self.clock = hs.get_clock() - self.validator = EventValidator() - self.profile_handler = hs.get_profile_handler() self.pagination_lock = ReadWriteLock() - self.pusher_pool = hs.get_pusherpool() - - # We arbitrarily limit concurrent event creation for a room to 5. - # This is to stop us from diverging history *too* much. - self.limiter = Limiter(max_count=5) - - self.action_generator = hs.get_action_generator() - - self.spam_checker = hs.get_spam_checker() - @defer.inlineCallbacks def purge_history(self, room_id, event_id): event = yield self.store.get_event(event_id) @@ -182,166 +170,6 @@ class MessageHandler(BaseHandler): defer.returnValue(chunk) - @defer.inlineCallbacks - def create_event(self, requester, event_dict, token_id=None, txn_id=None, - prev_event_ids=None): - """ - Given a dict from a client, create a new event. - - Creates an FrozenEvent object, filling out auth_events, prev_events, - etc. - - Adds display names to Join membership events. - - Args: - requester - event_dict (dict): An entire event - token_id (str) - txn_id (str) - prev_event_ids (list): The prev event ids to use when creating the event - - Returns: - Tuple of created event (FrozenEvent), Context - """ - builder = self.event_builder_factory.new(event_dict) - - with (yield self.limiter.queue(builder.room_id)): - self.validator.validate_new(builder) - - if builder.type == EventTypes.Member: - membership = builder.content.get("membership", None) - target = UserID.from_string(builder.state_key) - - if membership in {Membership.JOIN, Membership.INVITE}: - # If event doesn't include a display name, add one. - profile = self.profile_handler - content = builder.content - - try: - if "displayname" not in content: - content["displayname"] = yield profile.get_displayname(target) - if "avatar_url" not in content: - content["avatar_url"] = yield profile.get_avatar_url(target) - except Exception as e: - logger.info( - "Failed to get profile information for %r: %s", - target, e - ) - - if token_id is not None: - builder.internal_metadata.token_id = token_id - - if txn_id is not None: - builder.internal_metadata.txn_id = txn_id - - event, context = yield self._create_new_client_event( - builder=builder, - requester=requester, - prev_event_ids=prev_event_ids, - ) - - defer.returnValue((event, context)) - - @defer.inlineCallbacks - def send_nonmember_event(self, requester, event, context, ratelimit=True): - """ - Persists and notifies local clients and federation of an event. - - Args: - event (FrozenEvent) the event to send. - context (Context) the context of the event. - ratelimit (bool): Whether to rate limit this send. - is_guest (bool): Whether the sender is a guest. - """ - if event.type == EventTypes.Member: - raise SynapseError( - 500, - "Tried to send member event through non-member codepath" - ) - - # We check here if we are currently being rate limited, so that we - # don't do unnecessary work. We check again just before we actually - # send the event. - yield self.ratelimit(requester, update=False) - - user = UserID.from_string(event.sender) - - assert self.hs.is_mine(user), "User must be our own: %s" % (user,) - - if event.is_state(): - prev_state = yield self.deduplicate_state_event(event, context) - if prev_state is not None: - defer.returnValue(prev_state) - - yield self.handle_new_client_event( - requester=requester, - event=event, - context=context, - ratelimit=ratelimit, - ) - - if event.type == EventTypes.Message: - presence = self.hs.get_presence_handler() - # We don't want to block sending messages on any presence code. This - # matters as sometimes presence code can take a while. - preserve_fn(presence.bump_presence_active_time)(user) - - @defer.inlineCallbacks - def deduplicate_state_event(self, event, context): - """ - Checks whether event is in the latest resolved state in context. - - If so, returns the version of the event in context. - Otherwise, returns None. - """ - prev_event_id = context.prev_state_ids.get((event.type, event.state_key)) - prev_event = yield self.store.get_event(prev_event_id, allow_none=True) - if not prev_event: - return - - if prev_event and event.user_id == prev_event.user_id: - prev_content = encode_canonical_json(prev_event.content) - next_content = encode_canonical_json(event.content) - if prev_content == next_content: - defer.returnValue(prev_event) - return - - @defer.inlineCallbacks - def create_and_send_nonmember_event( - self, - requester, - event_dict, - ratelimit=True, - txn_id=None - ): - """ - Creates an event, then sends it. - - See self.create_event and self.send_nonmember_event. - """ - event, context = yield self.create_event( - requester, - event_dict, - token_id=requester.access_token_id, - txn_id=txn_id - ) - - spam_error = self.spam_checker.check_event_for_spam(event) - if spam_error: - if not isinstance(spam_error, basestring): - spam_error = "Spam is not permitted here" - raise SynapseError( - 403, spam_error, Codes.FORBIDDEN - ) - - yield self.send_nonmember_event( - requester, - event, - context, - ratelimit=ratelimit, - ) - defer.returnValue(event) - @defer.inlineCallbacks def get_room_data(self, user_id=None, room_id=None, event_type=None, state_key="", is_guest=False): @@ -470,6 +298,194 @@ class MessageHandler(BaseHandler): for user_id, profile in users_with_profile.iteritems() }) + +class EventCreationHandler(object): + def __init__(self, hs): + self.hs = hs + self.auth = hs.get_auth() + self.store = hs.get_datastore() + self.state = hs.get_state_handler() + self.clock = hs.get_clock() + self.validator = EventValidator() + self.profile_handler = hs.get_profile_handler() + self.event_builder_factory = hs.get_event_builder_factory() + self.server_name = hs.hostname + self.ratelimiter = hs.get_ratelimiter() + self.notifier = hs.get_notifier() + + # This is only used to get at ratelimit function, and maybe_kick_guest_users + self.base_handler = BaseHandler(hs) + + self.pusher_pool = hs.get_pusherpool() + + # We arbitrarily limit concurrent event creation for a room to 5. + # This is to stop us from diverging history *too* much. + self.limiter = Limiter(max_count=5) + + self.action_generator = hs.get_action_generator() + + self.spam_checker = hs.get_spam_checker() + + @defer.inlineCallbacks + def create_event(self, requester, event_dict, token_id=None, txn_id=None, + prev_event_ids=None): + """ + Given a dict from a client, create a new event. + + Creates an FrozenEvent object, filling out auth_events, prev_events, + etc. + + Adds display names to Join membership events. + + Args: + requester + event_dict (dict): An entire event + token_id (str) + txn_id (str) + prev_event_ids (list): The prev event ids to use when creating the event + + Returns: + Tuple of created event (FrozenEvent), Context + """ + builder = self.event_builder_factory.new(event_dict) + + with (yield self.limiter.queue(builder.room_id)): + self.validator.validate_new(builder) + + if builder.type == EventTypes.Member: + membership = builder.content.get("membership", None) + target = UserID.from_string(builder.state_key) + + if membership in {Membership.JOIN, Membership.INVITE}: + # If event doesn't include a display name, add one. + profile = self.profile_handler + content = builder.content + + try: + if "displayname" not in content: + content["displayname"] = yield profile.get_displayname(target) + if "avatar_url" not in content: + content["avatar_url"] = yield profile.get_avatar_url(target) + except Exception as e: + logger.info( + "Failed to get profile information for %r: %s", + target, e + ) + + if token_id is not None: + builder.internal_metadata.token_id = token_id + + if txn_id is not None: + builder.internal_metadata.txn_id = txn_id + + event, context = yield self._create_new_client_event( + builder=builder, + requester=requester, + prev_event_ids=prev_event_ids, + ) + + defer.returnValue((event, context)) + + @defer.inlineCallbacks + def send_nonmember_event(self, requester, event, context, ratelimit=True): + """ + Persists and notifies local clients and federation of an event. + + Args: + event (FrozenEvent) the event to send. + context (Context) the context of the event. + ratelimit (bool): Whether to rate limit this send. + is_guest (bool): Whether the sender is a guest. + """ + if event.type == EventTypes.Member: + raise SynapseError( + 500, + "Tried to send member event through non-member codepath" + ) + + # We check here if we are currently being rate limited, so that we + # don't do unnecessary work. We check again just before we actually + # send the event. + yield self.base_handler.ratelimit(requester, update=False) + + user = UserID.from_string(event.sender) + + assert self.hs.is_mine(user), "User must be our own: %s" % (user,) + + if event.is_state(): + prev_state = yield self.deduplicate_state_event(event, context) + if prev_state is not None: + defer.returnValue(prev_state) + + yield self.handle_new_client_event( + requester=requester, + event=event, + context=context, + ratelimit=ratelimit, + ) + + if event.type == EventTypes.Message: + presence = self.hs.get_presence_handler() + # We don't want to block sending messages on any presence code. This + # matters as sometimes presence code can take a while. + preserve_fn(presence.bump_presence_active_time)(user) + + @defer.inlineCallbacks + def deduplicate_state_event(self, event, context): + """ + Checks whether event is in the latest resolved state in context. + + If so, returns the version of the event in context. + Otherwise, returns None. + """ + prev_event_id = context.prev_state_ids.get((event.type, event.state_key)) + prev_event = yield self.store.get_event(prev_event_id, allow_none=True) + if not prev_event: + return + + if prev_event and event.user_id == prev_event.user_id: + prev_content = encode_canonical_json(prev_event.content) + next_content = encode_canonical_json(event.content) + if prev_content == next_content: + defer.returnValue(prev_event) + return + + @defer.inlineCallbacks + def create_and_send_nonmember_event( + self, + requester, + event_dict, + ratelimit=True, + txn_id=None + ): + """ + Creates an event, then sends it. + + See self.create_event and self.send_nonmember_event. + """ + event, context = yield self.create_event( + requester, + event_dict, + token_id=requester.access_token_id, + txn_id=txn_id + ) + + spam_error = self.spam_checker.check_event_for_spam(event) + if spam_error: + if not isinstance(spam_error, basestring): + spam_error = "Spam is not permitted here" + raise SynapseError( + 403, spam_error, Codes.FORBIDDEN + ) + + yield self.send_nonmember_event( + requester, + event, + context, + ratelimit=ratelimit, + ) + defer.returnValue(event) + @measure_func("_create_new_client_event") @defer.inlineCallbacks def _create_new_client_event(self, builder, requester=None, prev_event_ids=None): @@ -509,9 +525,7 @@ class MessageHandler(BaseHandler): builder.prev_events = prev_events builder.depth = depth - state_handler = self.state_handler - - context = yield state_handler.compute_event_context(builder) + context = yield self.state.compute_event_context(builder) if requester: context.app_service = requester.app_service @@ -551,7 +565,7 @@ class MessageHandler(BaseHandler): # We now need to go and hit out to wherever we need to hit out to. if ratelimit: - yield self.ratelimit(requester) + yield self.base_handler.ratelimit(requester) try: yield self.auth.check_from_context(event, context) @@ -567,7 +581,7 @@ class MessageHandler(BaseHandler): logger.exception("Failed to encode content: %r", event.content) raise - yield self.maybe_kick_guest_users(event, context) + yield self.base_handler.maybe_kick_guest_users(event, context) if event.type == EventTypes.CanonicalAlias: # Check the alias is acually valid (at this time at least) From 3fa362502cc6c509bac65753954c313d307035e6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 16:52:07 +0000 Subject: [PATCH 059/200] Update places where we create events --- synapse/handlers/directory.py | 7 +++---- synapse/handlers/federation.py | 18 ++++++++---------- synapse/handlers/room.py | 10 ++++------ synapse/handlers/room_member.py | 20 +++++++++++--------- synapse/rest/client/v1/admin.py | 4 ++-- synapse/rest/client/v1/room.py | 16 +++++++++------- synapse/server.py | 5 +++++ 7 files changed, 42 insertions(+), 38 deletions(-) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index a0464ae5c..8580ada60 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -34,6 +34,7 @@ class DirectoryHandler(BaseHandler): self.state = hs.get_state_handler() self.appservice_handler = hs.get_application_service_handler() + self.event_creation_handler = hs.get_event_creation_handler() self.federation = hs.get_replication_layer() self.federation.register_query_handler( @@ -249,8 +250,7 @@ class DirectoryHandler(BaseHandler): def send_room_alias_update_event(self, requester, user_id, room_id): aliases = yield self.store.get_aliases_for_room(room_id) - msg_handler = self.hs.get_handlers().message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Aliases, @@ -272,8 +272,7 @@ class DirectoryHandler(BaseHandler): if not alias_event or alias_event.content.get("alias", "") != alias_str: return - msg_handler = self.hs.get_handlers().message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.CanonicalAlias, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8ee9434c9..e6b9f5cf5 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -75,6 +75,7 @@ class FederationHandler(BaseHandler): self.is_mine_id = hs.is_mine_id self.pusher_pool = hs.get_pusherpool() self.spam_checker = hs.get_spam_checker() + self.event_creation_handler = hs.get_event_creation_handler() self.replication_layer.set_handler(self) @@ -1007,8 +1008,7 @@ class FederationHandler(BaseHandler): }) try: - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder, ) except AuthError as e: @@ -1248,8 +1248,7 @@ class FederationHandler(BaseHandler): "state_key": user_id, }) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder, ) @@ -2120,8 +2119,7 @@ class FederationHandler(BaseHandler): if (yield self.auth.check_host_in_room(room_id, self.hs.hostname)): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder ) @@ -2159,8 +2157,7 @@ class FederationHandler(BaseHandler): """ builder = self.event_builder_factory.new(event_dict) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder, ) @@ -2210,8 +2207,9 @@ class FederationHandler(BaseHandler): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event(builder=builder) + event, context = yield self.event_creation_handler._create_new_client_event( + builder=builder, + ) defer.returnValue((event, context)) @defer.inlineCallbacks diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index d1cc87a01..4ea5bf1bc 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -64,6 +64,7 @@ class RoomCreationHandler(BaseHandler): super(RoomCreationHandler, self).__init__(hs) self.spam_checker = hs.get_spam_checker() + self.event_creation_handler = hs.get_event_creation_handler() @defer.inlineCallbacks def create_room(self, requester, config, ratelimit=True): @@ -163,13 +164,11 @@ class RoomCreationHandler(BaseHandler): creation_content = config.get("creation_content", {}) - msg_handler = self.hs.get_handlers().message_handler room_member_handler = self.hs.get_handlers().room_member_handler yield self._send_events_for_new_room( requester, room_id, - msg_handler, room_member_handler, preset_config=preset_config, invite_list=invite_list, @@ -181,7 +180,7 @@ class RoomCreationHandler(BaseHandler): if "name" in config: name = config["name"] - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Name, @@ -194,7 +193,7 @@ class RoomCreationHandler(BaseHandler): if "topic" in config: topic = config["topic"] - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Topic, @@ -249,7 +248,6 @@ class RoomCreationHandler(BaseHandler): self, creator, # A Requester object. room_id, - msg_handler, room_member_handler, preset_config, invite_list, @@ -272,7 +270,7 @@ class RoomCreationHandler(BaseHandler): @defer.inlineCallbacks def send(etype, content, **kwargs): event = create(etype, content, **kwargs) - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( creator, event, ratelimit=False diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 7e6467cd1..ab58beb0f 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -46,6 +46,7 @@ class RoomMemberHandler(BaseHandler): super(RoomMemberHandler, self).__init__(hs) self.profile_handler = hs.get_profile_handler() + self.event_creation_hander = hs.get_event_creation_handler() self.member_linearizer = Linearizer(name="member") @@ -66,13 +67,12 @@ class RoomMemberHandler(BaseHandler): ): if content is None: content = {} - msg_handler = self.hs.get_handlers().message_handler content["membership"] = membership if requester.is_guest: content["kind"] = "guest" - event, context = yield msg_handler.create_event( + event, context = yield self.event_creation_hander.create_event( requester, { "type": EventTypes.Member, @@ -90,12 +90,14 @@ class RoomMemberHandler(BaseHandler): ) # Check if this event matches the previous membership event for the user. - duplicate = yield msg_handler.deduplicate_state_event(event, context) + duplicate = yield self.event_creation_hander.deduplicate_state_event( + event, context, + ) if duplicate is not None: # Discard the new event since this membership change is a no-op. defer.returnValue(duplicate) - yield msg_handler.handle_new_client_event( + yield self.event_creation_hander.handle_new_client_event( requester, event, context, @@ -394,8 +396,9 @@ class RoomMemberHandler(BaseHandler): else: requester = synapse.types.create_requester(target_user) - message_handler = self.hs.get_handlers().message_handler - prev_event = yield message_handler.deduplicate_state_event(event, context) + prev_event = yield self.event_creation_hander.deduplicate_state_event( + event, context, + ) if prev_event is not None: return @@ -412,7 +415,7 @@ class RoomMemberHandler(BaseHandler): if is_blocked: raise SynapseError(403, "This room has been blocked on this server") - yield message_handler.handle_new_client_event( + yield self.event_creation_hander.handle_new_client_event( requester, event, context, @@ -644,8 +647,7 @@ class RoomMemberHandler(BaseHandler): ) ) - msg_handler = self.hs.get_handlers().message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_hander.create_and_send_nonmember_event( requester, { "type": EventTypes.ThirdPartyInvite, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 0615e5d80..f77f64667 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -171,6 +171,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): self.store = hs.get_datastore() self.handlers = hs.get_handlers() self.state = hs.get_state_handler() + self.event_creation_handler = hs.get_event_creation_handler() @defer.inlineCallbacks def on_POST(self, request, room_id): @@ -203,8 +204,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): ) new_room_id = info["room_id"] - msg_handler = self.handlers.message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( room_creator_requester, { "type": "m.room.message", diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 867ec8602..ad6534537 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -82,6 +82,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomStateEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.event_creation_hander = hs.get_event_creation_handler() def register(self, http_server): # /room/$roomid/state/$eventtype @@ -162,15 +163,16 @@ class RoomStateEventRestServlet(ClientV1RestServlet): content=content, ) else: - msg_handler = self.handlers.message_handler - event, context = yield msg_handler.create_event( + event, context = yield self.event_creation_hander.create_event( requester, event_dict, token_id=requester.access_token_id, txn_id=txn_id, ) - yield msg_handler.send_nonmember_event(requester, event, context) + yield self.event_creation_hander.send_nonmember_event( + requester, event, context, + ) ret = {} if event: @@ -184,6 +186,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomSendEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.event_creation_hander = hs.get_event_creation_handler() def register(self, http_server): # /rooms/$roomid/send/$event_type[/$txn_id] @@ -205,8 +208,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): if 'ts' in request.args and requester.app_service: event_dict['origin_server_ts'] = parse_integer(request, "ts", 0) - msg_handler = self.handlers.message_handler - event = yield msg_handler.create_and_send_nonmember_event( + event = yield self.event_creation_hander.create_and_send_nonmember_event( requester, event_dict, txn_id=txn_id, @@ -670,6 +672,7 @@ class RoomRedactEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomRedactEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.event_creation_handler = hs.get_event_creation_handler() def register(self, http_server): PATTERNS = ("/rooms/(?P[^/]*)/redact/(?P[^/]*)") @@ -680,8 +683,7 @@ class RoomRedactEventRestServlet(ClientV1RestServlet): requester = yield self.auth.get_user_by_req(request) content = parse_json_object_from_request(request) - msg_handler = self.handlers.message_handler - event = yield msg_handler.create_and_send_nonmember_event( + event = yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Redaction, diff --git a/synapse/server.py b/synapse/server.py index 3173aed1d..fbd602d40 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -55,6 +55,7 @@ from synapse.handlers.read_marker import ReadMarkerHandler from synapse.handlers.user_directory import UserDirectoryHandler from synapse.handlers.groups_local import GroupsLocalHandler from synapse.handlers.profile import ProfileHandler +from synapse.handlers.message import EventCreationHandler from synapse.groups.groups_server import GroupsServerHandler from synapse.groups.attestations import GroupAttestionRenewer, GroupAttestationSigning from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory @@ -118,6 +119,7 @@ class HomeServer(object): 'application_service_handler', 'device_message_handler', 'profile_handler', + 'event_creation_handler', 'deactivate_account_handler', 'set_password_handler', 'notifier', @@ -276,6 +278,9 @@ class HomeServer(object): def build_profile_handler(self): return ProfileHandler(self) + def build_event_creation_handler(self): + return EventCreationHandler(self) + def build_deactivate_account_handler(self): return DeactivateAccountHandler(self) From 25c0a020f430de2781041c8104a5cf92321a60a2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 16:52:12 +0000 Subject: [PATCH 060/200] Updates tests --- tests/storage/test_redaction.py | 9 ++++----- tests/storage/test_roommember.py | 5 ++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 6afaca3a6..de6d7904e 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -36,8 +36,7 @@ class RedactionTestCase(unittest.TestCase): self.store = hs.get_datastore() self.event_builder_factory = hs.get_event_builder_factory() - self.handlers = hs.get_handlers() - self.message_handler = self.handlers.message_handler + self.event_creation_handler = hs.get_event_creation_handler() self.u_alice = UserID.from_string("@alice:test") self.u_bob = UserID.from_string("@bob:test") @@ -59,7 +58,7 @@ class RedactionTestCase(unittest.TestCase): "content": content, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) @@ -79,7 +78,7 @@ class RedactionTestCase(unittest.TestCase): "content": {"body": body, "msgtype": u"message"}, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) @@ -98,7 +97,7 @@ class RedactionTestCase(unittest.TestCase): "redacts": event_id, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 1be7d932f..4aff38bd5 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -37,8 +37,7 @@ class RoomMemberStoreTestCase(unittest.TestCase): # storage logic self.store = hs.get_datastore() self.event_builder_factory = hs.get_event_builder_factory() - self.handlers = hs.get_handlers() - self.message_handler = self.handlers.message_handler + self.event_creation_handler = hs.get_event_creation_handler() self.u_alice = UserID.from_string("@alice:test") self.u_bob = UserID.from_string("@bob:test") @@ -58,7 +57,7 @@ class RoomMemberStoreTestCase(unittest.TestCase): "content": {"membership": membership}, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) From 3c7b480ba33c68bfc4e98de57b6874c32011c8f4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:07:13 +0000 Subject: [PATCH 061/200] Factor out common code for search insert we can reuse the same code as is used for event insert, for doing the background index population. --- synapse/storage/search.py | 95 +++++++++++++++++++++++++-------------- 1 file changed, 62 insertions(+), 33 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 4f38a587c..f1ac9ba0f 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -13,19 +13,25 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import namedtuple +import logging +import re +import ujson as json + from twisted.internet import defer from .background_updates import BackgroundUpdateStore from synapse.api.errors import SynapseError from synapse.storage.engines import PostgresEngine, Sqlite3Engine -import logging -import re -import ujson as json - logger = logging.getLogger(__name__) +SearchEntry = namedtuple('SearchEntry', [ + 'key', 'value', 'event_id', 'room_id', 'stream_ordering', + 'origin_server_ts', +]) + class SearchStore(BackgroundUpdateStore): @@ -49,16 +55,17 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): + # we work through the events table from highest stream id to lowest target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) - INSERT_CLUMP_SIZE = 1000 TYPES = ["m.room.name", "m.room.message", "m.room.topic"] def reindex_search_txn(txn): sql = ( - "SELECT stream_ordering, event_id, room_id, type, content FROM events" + "SELECT stream_ordering, event_id, room_id, type, content, " + " origin_server_ts FROM events" " WHERE ? <= stream_ordering AND stream_ordering < ?" " AND (%s)" " ORDER BY stream_ordering DESC" @@ -67,6 +74,10 @@ class SearchStore(BackgroundUpdateStore): txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + # we could stream straight from the results into + # store_search_entries_txn with a generator function, but that + # would mean having two cursors open on the database at once. + # Instead we just build a list of results. rows = self.cursor_to_dict(txn) if not rows: return 0 @@ -79,6 +90,8 @@ class SearchStore(BackgroundUpdateStore): event_id = row["event_id"] room_id = row["room_id"] etype = row["type"] + stream_ordering = row["stream_ordering"] + origin_server_ts = row["origin_server_ts"] try: content = json.loads(row["content"]) except Exception: @@ -93,6 +106,8 @@ class SearchStore(BackgroundUpdateStore): elif etype == "m.room.name": key = "content.name" value = content["name"] + else: + raise Exception("unexpected event type %s" % etype) except (KeyError, AttributeError): # If the event is missing a necessary field then # skip over it. @@ -103,25 +118,16 @@ class SearchStore(BackgroundUpdateStore): # then skip over it continue - event_search_rows.append((event_id, room_id, key, value)) + event_search_rows.append(SearchEntry( + key=key, + value=value, + event_id=event_id, + room_id=room_id, + stream_ordering=stream_ordering, + origin_server_ts=origin_server_ts, + )) - if isinstance(self.database_engine, PostgresEngine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): - clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) + self.store_search_entries_txn(txn, event_search_rows) progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -251,26 +257,49 @@ class SearchStore(BackgroundUpdateStore): key (str): value (str): """ + self.store_search_entries_txn( + txn, + (SearchEntry( + key=key, + value=value, + event_id=event.event_id, + room_id=event.room_id, + stream_ordering=event.internal_metadata.stream_ordering, + origin_server_ts=event.origin_server_ts, + ),), + ) + + def store_search_entries_txn(self, txn, entries): + """Add entries to the search table + + Args: + txn (cursor): + entries (iterable[SearchEntry]): + entries to be added to the table + """ if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) + + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + entry.stream_ordering, entry.origin_server_ts, + ) for entry in entries) + + txn.executemany(sql, args) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + ) for entry in entries) + + txn.executemany(sql, args) else: # This should be unreachable. raise Exception("Unrecognized database engine") From ee6fb4cf8560534a9acc61b075c09dceeca83e85 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 20 Jan 2018 00:23:36 +0000 Subject: [PATCH 062/200] Remove redundant return value from _calculate_state_delta we already have the state from _get_new_state_after_events, so returning it from _calculate_state_delta is just confusing. --- synapse/storage/events.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dd28c2efe..2fead9eb0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -342,8 +342,20 @@ class EventsStore(SQLBaseStore): # NB: Assumes that we are only persisting events for one room # at a time. + + # map room_id->list[event_ids] giving the new forward + # extremities in each room new_forward_extremeties = {} + + # map room_id->(type,state_key)->event_id tracking the full + # state in each room after adding these events current_state_for_room = {} + + # map room_id->(to_delete, to_insert) where each entry is + # a map (type,key)->event_id giving the state delta in each + # room + state_delta_for_room = {} + if not backfilled: with Measure(self._clock, "_calculate_state_and_extrem"): # Work out the new "current state" for each room. @@ -393,11 +405,12 @@ class EventsStore(SQLBaseStore): ev_ctx_rm, new_latest_event_ids, ) if current_state is not None: + current_state_for_room[room_id] = current_state delta = yield self._calculate_state_delta( room_id, current_state, ) if delta is not None: - current_state_for_room[room_id] = delta + state_delta_for_room[room_id] = delta yield self.runInteraction( "persist_events", @@ -405,7 +418,7 @@ class EventsStore(SQLBaseStore): events_and_contexts=chunk, backfilled=backfilled, delete_existing=delete_existing, - current_state_for_room=current_state_for_room, + state_delta_for_room=state_delta_for_room, new_forward_extremeties=new_forward_extremeties, ) persist_event_counter.inc_by(len(chunk)) @@ -422,7 +435,7 @@ class EventsStore(SQLBaseStore): event_counter.inc(event.type, origin_type, origin_entity) - for room_id, (_, _, new_state) in current_state_for_room.iteritems(): + for room_id, new_state in current_state_for_room.iteritems(): self.get_current_state_ids.prefill( (room_id, ), new_state ) @@ -586,10 +599,10 @@ class EventsStore(SQLBaseStore): Assumes that we are only persisting events for one room at a time. Returns: - 3-tuple (to_delete, to_insert, new_state) where both are state dicts, + 2-tuple (to_delete, to_insert) where both are state dicts, i.e. (type, state_key) -> event_id. `to_delete` are the entries to first be deleted from current_state_events, `to_insert` are entries - to insert. `new_state` is the full set of state. + to insert. """ existing_state = yield self.get_current_state_ids(room_id) @@ -610,7 +623,7 @@ class EventsStore(SQLBaseStore): if ev_id in events_to_insert } - defer.returnValue((to_delete, to_insert, current_state)) + defer.returnValue((to_delete, to_insert)) @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, @@ -670,7 +683,7 @@ class EventsStore(SQLBaseStore): @log_function def _persist_events_txn(self, txn, events_and_contexts, backfilled, - delete_existing=False, current_state_for_room={}, + delete_existing=False, state_delta_for_room={}, new_forward_extremeties={}): """Insert some number of room events into the necessary database tables. @@ -686,7 +699,7 @@ class EventsStore(SQLBaseStore): delete_existing (bool): True to purge existing table rows for the events from the database. This is useful when retrying due to IntegrityError. - current_state_for_room (dict[str, (list[str], list[str])]): + state_delta_for_room (dict[str, (list[str], list[str])]): The current-state delta for each room. For each room, a tuple (to_delete, to_insert), being a list of event ids to be removed from the current state, and a list of event ids to be added to @@ -698,7 +711,7 @@ class EventsStore(SQLBaseStore): """ max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering - self._update_current_state_txn(txn, current_state_for_room, max_stream_order) + self._update_current_state_txn(txn, state_delta_for_room, max_stream_order) self._update_forward_extremities_txn( txn, @@ -764,7 +777,7 @@ class EventsStore(SQLBaseStore): def _update_current_state_txn(self, txn, state_delta_by_room, max_stream_order): for room_id, current_state_tuple in state_delta_by_room.iteritems(): - to_delete, to_insert, _ = current_state_tuple + to_delete, to_insert = current_state_tuple txn.executemany( "DELETE FROM current_state_events WHERE event_id = ?", [(ev_id,) for ev_id in to_delete.itervalues()], From 447aed42d22d3ece245c69f397d348b3a5b7bfa8 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 15:40:41 +0000 Subject: [PATCH 063/200] Add event_map param to resolve_state_groups --- synapse/state.py | 34 ++++++++++++++++++++++++++++++---- synapse/storage/events.py | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 273f9911c..6c2aaa5e7 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -308,7 +308,7 @@ class StateHandler(object): )) result = yield self._state_resolution_handler.resolve_state_groups( - room_id, state_groups_ids, self._state_map_factory, + room_id, state_groups_ids, None, self._state_map_factory, ) defer.returnValue(result) @@ -371,7 +371,9 @@ class StateResolutionHandler(object): @defer.inlineCallbacks @log_function - def resolve_state_groups(self, room_id, state_groups_ids, state_map_factory): + def resolve_state_groups( + self, room_id, state_groups_ids, event_map, state_map_factory, + ): """Resolves conflicts between a set of state groups Always generates a new state group (unless we hit the cache), so should @@ -383,6 +385,14 @@ class StateResolutionHandler(object): map from state group id to the state in that state group (where 'state' is a map from state key to event id) + event_map(dict[str,FrozenEvent]|None): + a dict from event_id to event, for any events that we happen to + have in flight (eg, those currently being persisted). This will be + used as a starting point fof finding the state we need; any missing + events will be requested via state_map_factory. + + If None, all events will be fetched via state_map_factory. + Returns: Deferred[_StateCacheEntry]: resolved state """ @@ -423,6 +433,7 @@ class StateResolutionHandler(object): with Measure(self.clock, "state._resolve_events"): new_state = yield resolve_events_with_factory( state_groups_ids.values(), + event_map=event_map, state_map_factory=state_map_factory, ) else: @@ -555,11 +566,20 @@ def _seperate(state_sets): @defer.inlineCallbacks -def resolve_events_with_factory(state_sets, state_map_factory): +def resolve_events_with_factory(state_sets, event_map, state_map_factory): """ Args: state_sets(list): List of dicts of (type, state_key) -> event_id, which are the different state groups to resolve. + + event_map(dict[str,FrozenEvent]|None): + a dict from event_id to event, for any events that we happen to + have in flight (eg, those currently being persisted). This will be + used as a starting point fof finding the state we need; any missing + events will be requested via state_map_factory. + + If None, all events will be fetched via state_map_factory. + state_map_factory(func): will be called with a list of event_ids that are needed, and should return with a Deferred of dict of event_id to event. @@ -580,12 +600,16 @@ def resolve_events_with_factory(state_sets, state_map_factory): for event_ids in conflicted_state.itervalues() for event_id in event_ids ) + if event_map is not None: + needed_events -= set(event_map.iterkeys()) logger.info("Asking for %d conflicted events", len(needed_events)) # dict[str, FrozenEvent]: a map from state event id to event. Only includes - # the state events which are in conflict. + # the state events which are in conflict (and those in event_map) state_map = yield state_map_factory(needed_events) + if event_map is not None: + state_map.update(event_map) # get the ids of the auth events which allow us to authenticate the # conflicted state, picking only from the unconflicting state. @@ -597,6 +621,8 @@ def resolve_events_with_factory(state_sets, state_map_factory): new_needed_events = set(auth_events.itervalues()) new_needed_events -= needed_events + if event_map is not None: + new_needed_events -= set(event_map.iterkeys()) logger.info("Asking for %d auth events", len(new_needed_events)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2fead9eb0..7b912ad41 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -586,6 +586,7 @@ class EventsStore(SQLBaseStore): current_state = yield resolve_events_with_factory( state_sets, + event_map={}, state_map_factory=get_events, ) defer.returnValue(current_state) From 9fcbbe8e7d7557fef7fe03533166b376d6fa82ef Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 09:49:15 +0000 Subject: [PATCH 064/200] Check that events being persisted have state_group --- synapse/storage/events.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7b912ad41..9bceded7b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -515,16 +515,21 @@ class EventsStore(SQLBaseStore): if ctx.current_state_ids is None: raise Exception("Unknown current state") + if ctx.state_group is None: + # I don't think this can happen, but let's double-check + raise Exception( + "Context for new extremity event %s has no state " + "group" % event_id, + ) + # If we've already seen the state group don't bother adding # it to the state sets again if ctx.state_group not in state_groups: state_sets.append(ctx.current_state_ids) if ctx.delta_ids or hasattr(ev, "state_key"): was_updated = True - if ctx.state_group: - # Add this as a seen state group (if it has a state - # group) - state_groups.add(ctx.state_group) + # Add this as a seen state group + state_groups.add(ctx.state_group) break else: # If we couldn't find it, then we'll need to pull From 225dc3b4cb8875fff52180d2f3b1e386dec26f4d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 30 Jan 2018 10:17:55 +0000 Subject: [PATCH 065/200] Flatten _get_new_state_after_events rejig the if statements to simplify the logic and reduce indentation --- synapse/storage/events.py | 92 ++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 45 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 9bceded7b..1b5dffe1c 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -503,6 +503,10 @@ class EventsStore(SQLBaseStore): None if there are no changes to the room state, or a dict of (type, state_key) -> event_id]. """ + + if not new_latest_event_ids: + defer.returnValue({}) + state_sets = [] state_groups = set() missing_event_ids = [] @@ -537,6 +541,9 @@ class EventsStore(SQLBaseStore): was_updated = True missing_event_ids.append(event_id) + if not was_updated: + return + if missing_event_ids: # Now pull out the state for any missing events from DB event_to_groups = yield self._get_state_group_for_events( @@ -549,54 +556,49 @@ class EventsStore(SQLBaseStore): group_to_state = yield self._get_state_for_groups(groups) state_sets.extend(group_to_state.itervalues()) - if not new_latest_event_ids: - defer.returnValue({}) - elif was_updated: - if len(state_sets) == 1: - # If there is only one state set, then we know what the current - # state is. - defer.returnValue(state_sets[0]) - else: - # We work out the current state by passing the state sets to the - # state resolution algorithm. It may ask for some events, including - # the events we have yet to persist, so we need a slightly more - # complicated event lookup function than simply looking the events - # up in the db. + if len(state_sets) == 1: + # If there is only one state set, then we know what the current + # state is. + defer.returnValue(state_sets[0]) - logger.info( - "Resolving state with %i state sets", len(state_sets), + # We work out the current state by passing the state sets to the + # state resolution algorithm. It may ask for some events, including + # the events we have yet to persist, so we need a slightly more + # complicated event lookup function than simply looking the events + # up in the db. + + logger.info( + "Resolving state with %i state sets", len(state_sets), + ) + + events_map = {ev.event_id: ev for ev, _ in events_context} + + @defer.inlineCallbacks + def get_events(ev_ids): + # We get the events by first looking at the list of events we + # are trying to persist, and then fetching the rest from the DB. + db = [] + to_return = {} + for ev_id in ev_ids: + ev = events_map.get(ev_id, None) + if ev: + to_return[ev_id] = ev + else: + db.append(ev_id) + + if db: + evs = yield self.get_events( + ev_ids, get_prev_content=False, check_redacted=False, ) + to_return.update(evs) + defer.returnValue(to_return) - events_map = {ev.event_id: ev for ev, _ in events_context} - - @defer.inlineCallbacks - def get_events(ev_ids): - # We get the events by first looking at the list of events we - # are trying to persist, and then fetching the rest from the DB. - db = [] - to_return = {} - for ev_id in ev_ids: - ev = events_map.get(ev_id, None) - if ev: - to_return[ev_id] = ev - else: - db.append(ev_id) - - if db: - evs = yield self.get_events( - ev_ids, get_prev_content=False, check_redacted=False, - ) - to_return.update(evs) - defer.returnValue(to_return) - - current_state = yield resolve_events_with_factory( - state_sets, - event_map={}, - state_map_factory=get_events, - ) - defer.returnValue(current_state) - else: - return + current_state = yield resolve_events_with_factory( + state_sets, + event_map={}, + state_map_factory=get_events, + ) + defer.returnValue(current_state) @defer.inlineCallbacks def _calculate_state_delta(self, room_id, current_state): From ebfe64e3d69d0047ee9902a05beaf0249f11e072 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 30 Jan 2018 11:06:15 +0000 Subject: [PATCH 066/200] Use StateResolutionHandler to resolve state in persist events ... and thus benefit (hopefully) from its cache. --- synapse/storage/events.py | 72 +++++++++++++-------------------------- 1 file changed, 24 insertions(+), 48 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 1b5dffe1c..ca1d4a398 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -27,7 +27,6 @@ from synapse.util.logutils import log_function from synapse.util.metrics import Measure from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError -from synapse.state import resolve_events_with_factory from synapse.util.caches.descriptors import cached from synapse.types import get_domain_from_id @@ -237,6 +236,8 @@ class EventsStore(SQLBaseStore): self._event_persist_queue = _EventPeristenceQueue() + self._state_resolution_handler = hs.get_state_resolution_handler() + def persist_events(self, events_and_contexts, backfilled=False): """ Write events to the database @@ -402,6 +403,7 @@ class EventsStore(SQLBaseStore): "Calculating state delta for room %s", room_id, ) current_state = yield self._get_new_state_after_events( + room_id, ev_ctx_rm, new_latest_event_ids, ) if current_state is not None: @@ -487,11 +489,14 @@ class EventsStore(SQLBaseStore): defer.returnValue(new_latest_event_ids) @defer.inlineCallbacks - def _get_new_state_after_events(self, events_context, new_latest_event_ids): + def _get_new_state_after_events(self, room_id, events_context, new_latest_event_ids): """Calculate the current state dict after adding some new events to a room Args: + room_id (str): + room to which the events are being added. Used for logging etc + events_context (list[(EventBase, EventContext)]): events and contexts which are being added to the room @@ -507,8 +512,8 @@ class EventsStore(SQLBaseStore): if not new_latest_event_ids: defer.returnValue({}) - state_sets = [] - state_groups = set() + # map from state_group to ((type, key) -> event_id) state map + state_groups = {} missing_event_ids = [] was_updated = False for event_id in new_latest_event_ids: @@ -529,11 +534,9 @@ class EventsStore(SQLBaseStore): # If we've already seen the state group don't bother adding # it to the state sets again if ctx.state_group not in state_groups: - state_sets.append(ctx.current_state_ids) + state_groups[ctx.state_group] = ctx.current_state_ids if ctx.delta_ids or hasattr(ev, "state_key"): was_updated = True - # Add this as a seen state group - state_groups.add(ctx.state_group) break else: # If we couldn't find it, then we'll need to pull @@ -550,55 +553,28 @@ class EventsStore(SQLBaseStore): missing_event_ids, ) - groups = set(event_to_groups.itervalues()) - state_groups + groups = set(event_to_groups.itervalues()) - set(state_groups.iterkeys()) if groups: group_to_state = yield self._get_state_for_groups(groups) - state_sets.extend(group_to_state.itervalues()) + state_groups.update(group_to_state) - if len(state_sets) == 1: - # If there is only one state set, then we know what the current + if len(state_groups) == 1: + # If there is only one state group, then we know what the current # state is. - defer.returnValue(state_sets[0]) + defer.returnValue(state_groups.values()[0]) - # We work out the current state by passing the state sets to the - # state resolution algorithm. It may ask for some events, including - # the events we have yet to persist, so we need a slightly more - # complicated event lookup function than simply looking the events - # up in the db. - - logger.info( - "Resolving state with %i state sets", len(state_sets), - ) - - events_map = {ev.event_id: ev for ev, _ in events_context} - - @defer.inlineCallbacks def get_events(ev_ids): - # We get the events by first looking at the list of events we - # are trying to persist, and then fetching the rest from the DB. - db = [] - to_return = {} - for ev_id in ev_ids: - ev = events_map.get(ev_id, None) - if ev: - to_return[ev_id] = ev - else: - db.append(ev_id) - - if db: - evs = yield self.get_events( - ev_ids, get_prev_content=False, check_redacted=False, - ) - to_return.update(evs) - defer.returnValue(to_return) - - current_state = yield resolve_events_with_factory( - state_sets, - event_map={}, - state_map_factory=get_events, + return self.get_events( + ev_ids, get_prev_content=False, check_redacted=False, + ) + events_map = {ev.event_id: ev for ev, _ in events_context} + logger.debug("calling resolve_state_groups from preserve_events") + res = yield self._state_resolution_handler.resolve_state_groups( + room_id, state_groups, events_map, get_events ) - defer.returnValue(current_state) + + defer.returnValue(res.state) @defer.inlineCallbacks def _calculate_state_delta(self, room_id, current_state): From 3d33eef6fcbba474664a9bccdcb8822c6f72ee8c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 14:31:24 +0000 Subject: [PATCH 067/200] Store state groups separately from events (#2784) * Split state group persist into seperate storage func * Add per database engine code for state group id gen * Move store_state_group to StateReadStore This allows other workers to use it, and so resolve state. * Hook up store_state_group * Fix tests * Rename _store_mult_state_groups_txn * Rename StateGroupReadStore * Remove redundant _have_persisted_state_group_txn * Update comments * Comment compute_event_context * Set start val for state_group_id_seq ... otherwise we try to recreate old state groups * Update comments * Don't store state for outliers * Update comment * Update docstring as state groups are ints --- synapse/events/snapshot.py | 4 +- synapse/handlers/federation.py | 24 +- synapse/replication/slave/storage/events.py | 4 +- synapse/state.py | 58 ++++- synapse/storage/__init__.py | 1 - synapse/storage/engines/postgres.py | 6 + synapse/storage/engines/sqlite3.py | 19 ++ synapse/storage/events.py | 10 +- .../schema/delta/47/state_group_seq.py | 37 +++ synapse/storage/state.py | 224 +++++++++--------- .../replication/slave/storage/test_events.py | 4 +- tests/test_state.py | 154 ++++++------ 12 files changed, 341 insertions(+), 204 deletions(-) create mode 100644 synapse/storage/schema/delta/47/state_group_seq.py diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index e9a732ff0..87e3fe7b9 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -25,7 +25,9 @@ class EventContext(object): The current state map excluding the current event. (type, state_key) -> event_id - state_group (int): state group id + state_group (int|None): state group id, if the state has been stored + as a state group. This is usually only None if e.g. the event is + an outlier. rejected (bool|str): A rejection reason if the event was rejected, else False diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8ee9434c9..643e813b1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1831,8 +1831,8 @@ class FederationHandler(BaseHandler): current_state = set(e.event_id for e in auth_events.values()) different_auth = event_auth_events - current_state - self._update_context_for_auth_events( - context, auth_events, event_key, + yield self._update_context_for_auth_events( + event, context, auth_events, event_key, ) if different_auth and not event.internal_metadata.is_outlier(): @@ -1913,8 +1913,8 @@ class FederationHandler(BaseHandler): # 4. Look at rejects and their proofs. # TODO. - self._update_context_for_auth_events( - context, auth_events, event_key, + yield self._update_context_for_auth_events( + event, context, auth_events, event_key, ) try: @@ -1923,11 +1923,15 @@ class FederationHandler(BaseHandler): logger.warn("Failed auth resolution for %r because %s", event, e) raise e - def _update_context_for_auth_events(self, context, auth_events, + @defer.inlineCallbacks + def _update_context_for_auth_events(self, event, context, auth_events, event_key): - """Update the state_ids in an event context after auth event resolution + """Update the state_ids in an event context after auth event resolution, + storing the changes as a new state group. Args: + event (Event): The event we're handling the context for + context (synapse.events.snapshot.EventContext): event context to be updated @@ -1950,7 +1954,13 @@ class FederationHandler(BaseHandler): context.prev_state_ids.update({ k: a.event_id for k, a in auth_events.iteritems() }) - context.state_group = self.store.get_next_state_group() + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=context.prev_group, + delta_ids=context.delta_ids, + current_state_ids=context.current_state_ids, + ) @defer.inlineCallbacks def construct_auth_difference(self, local_auth, remote_auth): diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 29d7296b4..8acb5df0f 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -19,7 +19,7 @@ from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore from synapse.storage.roommember import RoomMemberStore -from synapse.storage.state import StateGroupReadStore +from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore @@ -37,7 +37,7 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(StateGroupReadStore, BaseSlavedStore): +class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) diff --git a/synapse/state.py b/synapse/state.py index 273f9911c..cc93bbcb6 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -183,8 +183,15 @@ class StateHandler(object): def compute_event_context(self, event, old_state=None): """Build an EventContext structure for the event. + This works out what the current state should be for the event, and + generates a new state group if necessary. + Args: event (synapse.events.EventBase): + old_state (dict|None): The state at the event if it can't be + calculated from existing events. This is normally only specified + when receiving an event from federation where we don't have the + prev events for, e.g. when backfilling. Returns: synapse.events.snapshot.EventContext: """ @@ -208,15 +215,22 @@ class StateHandler(object): context.current_state_ids = {} context.prev_state_ids = {} context.prev_state_events = [] - context.state_group = self.store.get_next_state_group() + + # We don't store state for outliers, so we don't generate a state + # froup for it. + context.state_group = None + defer.returnValue(context) if old_state: + # We already have the state, so we don't need to calculate it. + # Let's just correctly fill out the context and create a + # new state group for it. + context = EventContext() context.prev_state_ids = { (s.type, s.state_key): s.event_id for s in old_state } - context.state_group = self.store.get_next_state_group() if event.is_state(): key = (event.type, event.state_key) @@ -229,6 +243,14 @@ class StateHandler(object): else: context.current_state_ids = context.prev_state_ids + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=None, + delta_ids=None, + current_state_ids=context.current_state_ids, + ) + context.prev_state_events = [] defer.returnValue(context) @@ -242,7 +264,8 @@ class StateHandler(object): context = EventContext() context.prev_state_ids = curr_state if event.is_state(): - context.state_group = self.store.get_next_state_group() + # If this is a state event then we need to create a new state + # group for the state after this event. key = (event.type, event.state_key) if key in context.prev_state_ids: @@ -253,24 +276,43 @@ class StateHandler(object): context.current_state_ids[key] = event.event_id if entry.state_group: + # If the state at the event has a state group assigned then + # we can use that as the prev group context.prev_group = entry.state_group context.delta_ids = { key: event.event_id } elif entry.prev_group: + # If the state at the event only has a prev group, then we can + # use that as a prev group too. context.prev_group = entry.prev_group context.delta_ids = dict(entry.delta_ids) context.delta_ids[key] = event.event_id - else: - if entry.state_group is None: - entry.state_group = self.store.get_next_state_group() - entry.state_id = entry.state_group - context.state_group = entry.state_group + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=context.prev_group, + delta_ids=context.delta_ids, + current_state_ids=context.current_state_ids, + ) + else: context.current_state_ids = context.prev_state_ids context.prev_group = entry.prev_group context.delta_ids = entry.delta_ids + if entry.state_group is None: + entry.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=entry.prev_group, + delta_ids=entry.delta_ids, + current_state_ids=context.current_state_ids, + ) + entry.state_id = entry.state_group + + context.state_group = entry.state_group + context.prev_state_events = [] defer.returnValue(context) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index d01d46338..f8fbd02ce 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -124,7 +124,6 @@ class DataStore(RoomMemberStore, RoomStore, ) self._transaction_id_gen = IdGenerator(db_conn, "sent_transactions", "id") - self._state_groups_id_gen = IdGenerator(db_conn, "state_groups", "id") self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id") self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id") self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id") diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index a6ae79dfa..8a0386c1a 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -62,3 +62,9 @@ class PostgresEngine(object): def lock_table(self, txn, table): txn.execute("LOCK TABLE %s in EXCLUSIVE MODE" % (table,)) + + def get_next_state_group_id(self, txn): + """Returns an int that can be used as a new state_group ID + """ + txn.execute("SELECT nextval('state_group_id_seq')") + return txn.fetchone()[0] diff --git a/synapse/storage/engines/sqlite3.py b/synapse/storage/engines/sqlite3.py index 755c9a1f0..60f0fa7fb 100644 --- a/synapse/storage/engines/sqlite3.py +++ b/synapse/storage/engines/sqlite3.py @@ -16,6 +16,7 @@ from synapse.storage.prepare_database import prepare_database import struct +import threading class Sqlite3Engine(object): @@ -24,6 +25,11 @@ class Sqlite3Engine(object): def __init__(self, database_module, database_config): self.module = database_module + # The current max state_group, or None if we haven't looked + # in the DB yet. + self._current_state_group_id = None + self._current_state_group_id_lock = threading.Lock() + def check_database(self, txn): pass @@ -43,6 +49,19 @@ class Sqlite3Engine(object): def lock_table(self, txn, table): return + def get_next_state_group_id(self, txn): + """Returns an int that can be used as a new state_group ID + """ + # We do application locking here since if we're using sqlite then + # we are a single process synapse. + with self._current_state_group_id_lock: + if self._current_state_group_id is None: + txn.execute("SELECT COALESCE(max(id), 0) FROM state_groups") + self._current_state_group_id = txn.fetchone()[0] + + self._current_state_group_id += 1 + return self._current_state_group_id + # Following functions taken from: https://github.com/coleifer/peewee diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2fead9eb0..af56f1ee5 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -755,9 +755,8 @@ class EventsStore(SQLBaseStore): events_and_contexts=events_and_contexts, ) - # Insert into the state_groups, state_groups_state, and - # event_to_state_groups tables. - self._store_mult_state_groups_txn(txn, events_and_contexts) + # Insert into event_to_state_groups. + self._store_event_state_mappings_txn(txn, events_and_contexts) # _store_rejected_events_txn filters out any events which were # rejected, and returns the filtered list. @@ -992,10 +991,9 @@ class EventsStore(SQLBaseStore): # an outlier in the database. We now have some state at that # so we need to update the state_groups table with that state. - # insert into the state_group, state_groups_state and - # event_to_state_groups tables. + # insert into event_to_state_groups. try: - self._store_mult_state_groups_txn(txn, ((event, context),)) + self._store_event_state_mappings_txn(txn, ((event, context),)) except Exception: logger.exception("") raise diff --git a/synapse/storage/schema/delta/47/state_group_seq.py b/synapse/storage/schema/delta/47/state_group_seq.py new file mode 100644 index 000000000..f6766501d --- /dev/null +++ b/synapse/storage/schema/delta/47/state_group_seq.py @@ -0,0 +1,37 @@ +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.storage.engines import PostgresEngine + + +def run_create(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + # if we already have some state groups, we want to start making new + # ones with a higher id. + cur.execute("SELECT max(id) FROM state_groups") + row = cur.fetchone() + + if row[0] is None: + start_val = 1 + else: + start_val = row[0] + 1 + + cur.execute( + "CREATE SEQUENCE state_group_id_seq START WITH %s", + (start_val, ), + ) + + +def run_upgrade(*args, **kwargs): + pass diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 360e3e435..adb48df73 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -42,11 +42,8 @@ class _GetStateGroupDelta(namedtuple("_GetStateGroupDelta", ("prev_group", "delt return len(self.delta_ids) if self.delta_ids else 0 -class StateGroupReadStore(SQLBaseStore): - """The read-only parts of StateGroupStore - - None of these functions write to the state tables, so are suitable for - including in the SlavedStores. +class StateGroupWorkerStore(SQLBaseStore): + """The parts of StateGroupStore that can be called from workers. """ STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" @@ -54,7 +51,7 @@ class StateGroupReadStore(SQLBaseStore): CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" def __init__(self, db_conn, hs): - super(StateGroupReadStore, self).__init__(db_conn, hs) + super(StateGroupWorkerStore, self).__init__(db_conn, hs) self._state_group_cache = DictionaryCache( "*stateGroupCache*", 100000 * CACHE_SIZE_FACTOR @@ -549,8 +546,117 @@ class StateGroupReadStore(SQLBaseStore): defer.returnValue(results) + def store_state_group(self, event_id, room_id, prev_group, delta_ids, + current_state_ids): + """Store a new set of state, returning a newly assigned state group. -class StateStore(StateGroupReadStore, BackgroundUpdateStore): + Args: + event_id (str): The event ID for which the state was calculated + room_id (str) + prev_group (int|None): A previous state group for the room, optional. + delta_ids (dict|None): The delta between state at `prev_group` and + `current_state_ids`, if `prev_group` was given. Same format as + `current_state_ids`. + current_state_ids (dict): The state to store. Map of (type, state_key) + to event_id. + + Returns: + Deferred[int]: The state group ID + """ + def _store_state_group_txn(txn): + if current_state_ids is None: + # AFAIK, this can never happen + raise Exception("current_state_ids cannot be None") + + state_group = self.database_engine.get_next_state_group_id(txn) + + self._simple_insert_txn( + txn, + table="state_groups", + values={ + "id": state_group, + "room_id": room_id, + "event_id": event_id, + }, + ) + + # We persist as a delta if we can, while also ensuring the chain + # of deltas isn't tooo long, as otherwise read performance degrades. + if prev_group: + is_in_db = self._simple_select_one_onecol_txn( + txn, + table="state_groups", + keyvalues={"id": prev_group}, + retcol="id", + allow_none=True, + ) + if not is_in_db: + raise Exception( + "Trying to persist state with unpersisted prev_group: %r" + % (prev_group,) + ) + + potential_hops = self._count_state_group_hops_txn( + txn, prev_group + ) + if prev_group and potential_hops < MAX_STATE_DELTA_HOPS: + self._simple_insert_txn( + txn, + table="state_group_edges", + values={ + "state_group": state_group, + "prev_state_group": prev_group, + }, + ) + + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": state_group, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in delta_ids.iteritems() + ], + ) + else: + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": state_group, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in current_state_ids.iteritems() + ], + ) + + # Prefill the state group cache with this group. + # It's fine to use the sequence like this as the state group map + # is immutable. (If the map wasn't immutable then this prefill could + # race with another update) + txn.call_after( + self._state_group_cache.update, + self._state_group_cache.sequence, + key=state_group, + value=dict(current_state_ids), + full=True, + ) + + return state_group + + return self.runInteraction("store_state_group", _store_state_group_txn) + + +class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): """ Keeps track of the state at a given event. This is done by the concept of `state groups`. Every event is a assigned @@ -591,27 +697,12 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): where_clause="type='m.room.member'", ) - def _have_persisted_state_group_txn(self, txn, state_group): - txn.execute( - "SELECT count(*) FROM state_groups WHERE id = ?", - (state_group,) - ) - row = txn.fetchone() - return row and row[0] - - def _store_mult_state_groups_txn(self, txn, events_and_contexts): + def _store_event_state_mappings_txn(self, txn, events_and_contexts): state_groups = {} for event, context in events_and_contexts: if event.internal_metadata.is_outlier(): continue - if context.current_state_ids is None: - # AFAIK, this can never happen - logger.error( - "Non-outlier event %s had current_state_ids==None", - event.event_id) - continue - # if the event was rejected, just give it the same state as its # predecessor. if context.rejected: @@ -620,90 +711,6 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): state_groups[event.event_id] = context.state_group - if self._have_persisted_state_group_txn(txn, context.state_group): - continue - - self._simple_insert_txn( - txn, - table="state_groups", - values={ - "id": context.state_group, - "room_id": event.room_id, - "event_id": event.event_id, - }, - ) - - # We persist as a delta if we can, while also ensuring the chain - # of deltas isn't tooo long, as otherwise read performance degrades. - if context.prev_group: - is_in_db = self._simple_select_one_onecol_txn( - txn, - table="state_groups", - keyvalues={"id": context.prev_group}, - retcol="id", - allow_none=True, - ) - if not is_in_db: - raise Exception( - "Trying to persist state with unpersisted prev_group: %r" - % (context.prev_group,) - ) - - potential_hops = self._count_state_group_hops_txn( - txn, context.prev_group - ) - if context.prev_group and potential_hops < MAX_STATE_DELTA_HOPS: - self._simple_insert_txn( - txn, - table="state_group_edges", - values={ - "state_group": context.state_group, - "prev_state_group": context.prev_group, - }, - ) - - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { - "state_group": context.state_group, - "room_id": event.room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in context.delta_ids.iteritems() - ], - ) - else: - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { - "state_group": context.state_group, - "room_id": event.room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in context.current_state_ids.iteritems() - ], - ) - - # Prefill the state group cache with this group. - # It's fine to use the sequence like this as the state group map - # is immutable. (If the map wasn't immutable then this prefill could - # race with another update) - txn.call_after( - self._state_group_cache.update, - self._state_group_cache.sequence, - key=context.state_group, - value=dict(context.current_state_ids), - full=True, - ) - self._simple_insert_many_txn( txn, table="event_to_state_groups", @@ -763,9 +770,6 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): return count - def get_next_state_group(self): - return self._state_groups_id_gen.get_next() - @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): """This background update will slowly deduplicate state by reencoding diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index 105e1228b..f430cce93 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -226,11 +226,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): context = EventContext() context.current_state_ids = state_ids context.prev_state_ids = state_ids - elif not backfill: + else: state_handler = self.hs.get_state_handler() context = yield state_handler.compute_event_context(event) - else: - context = EventContext() context.push_actions = push_actions diff --git a/tests/test_state.py b/tests/test_state.py index d16e1b3b8..a5c5e5595 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -80,14 +80,14 @@ class StateGroupStore(object): return defer.succeed(groups) - def store_state_groups(self, event, context): - if context.current_state_ids is None: - return + def store_state_group(self, event_id, room_id, prev_group, delta_ids, + current_state_ids): + state_group = self._next_group + self._next_group += 1 - state_events = dict(context.current_state_ids) + self._group_to_state[state_group] = dict(current_state_ids) - self._group_to_state[context.state_group] = state_events - self._event_to_state_group[event.event_id] = context.state_group + return state_group def get_events(self, event_ids, **kwargs): return { @@ -95,10 +95,19 @@ class StateGroupStore(object): if e_id in self._event_id_to_event } + def get_state_group_delta(self, name): + return (None, None) + def register_events(self, events): for e in events: self._event_id_to_event[e.event_id] = e + def register_event_context(self, event, context): + self._event_to_state_group[event.event_id] = context.state_group + + def register_event_id_state_group(self, event_id, state_group): + self._event_to_state_group[event_id] = state_group + class DictObj(dict): def __init__(self, **kwargs): @@ -137,15 +146,7 @@ class Graph(object): class StateTestCase(unittest.TestCase): def setUp(self): - self.store = Mock( - spec_set=[ - "get_state_groups_ids", - "add_event_hashes", - "get_events", - "get_next_state_group", - "get_state_group_delta", - ] - ) + self.store = StateGroupStore() hs = Mock(spec_set=[ "get_datastore", "get_auth", "get_state_handler", "get_clock", "get_state_resolution_handler", @@ -156,9 +157,6 @@ class StateTestCase(unittest.TestCase): hs.get_auth.return_value = Auth(hs) hs.get_state_resolution_handler = lambda: StateResolutionHandler(hs) - self.store.get_next_state_group.side_effect = Mock - self.store.get_state_group_delta.return_value = (None, None) - self.state = StateHandler(hs) self.event_id = 0 @@ -197,14 +195,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertEqual(2, len(context_store["D"].prev_state_ids)) @@ -249,16 +246,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -315,16 +309,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -398,16 +389,13 @@ class StateTestCase(unittest.TestCase): self._add_depths(nodes, edges) graph = Graph(nodes, edges) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -467,7 +455,11 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_trivial_annotate_message(self): - event = create_event(type="test_message", name="event") + prev_event_id = "prev_event_id" + event = create_event( + type="test_message", name="event2", + prev_events=[(prev_event_id, {})], + ) old_state = [ create_event(type="test1", state_key="1"), @@ -475,11 +467,11 @@ class StateTestCase(unittest.TestCase): create_event(type="test2", state_key=""), ] - group_name = "group_name_1" - - self.store.get_state_groups_ids.return_value = { - group_name: {(e.type, e.state_key): e.event_id for e in old_state}, - } + group_name = self.store.store_state_group( + prev_event_id, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state}, + ) + self.store.register_event_id_state_group(prev_event_id, group_name) context = yield self.state.compute_event_context(event) @@ -492,7 +484,11 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_trivial_annotate_state(self): - event = create_event(type="state", state_key="", name="event") + prev_event_id = "prev_event_id" + event = create_event( + type="state", state_key="", name="event2", + prev_events=[(prev_event_id, {})], + ) old_state = [ create_event(type="test1", state_key="1"), @@ -500,11 +496,11 @@ class StateTestCase(unittest.TestCase): create_event(type="test2", state_key=""), ] - group_name = "group_name_1" - - self.store.get_state_groups_ids.return_value = { - group_name: {(e.type, e.state_key): e.event_id for e in old_state}, - } + group_name = self.store.store_state_group( + prev_event_id, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state}, + ) + self.store.register_event_id_state_group(prev_event_id, group_name) context = yield self.state.compute_event_context(event) @@ -517,7 +513,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_resolve_message_conflict(self): - event = create_event(type="test_message", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test_message", name="event3", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) creation = create_event( type=EventTypes.Create, state_key="" @@ -537,12 +538,12 @@ class StateTestCase(unittest.TestCase): create_event(type="test4", state_key=""), ] - store = StateGroupStore() - store.register_events(old_state_1) - store.register_events(old_state_2) - self.store.get_events = store.get_events + self.store.register_events(old_state_1) + self.store.register_events(old_state_2) - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual(len(context.current_state_ids), 6) @@ -550,7 +551,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_resolve_state_conflict(self): - event = create_event(type="test4", state_key="", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test4", state_key="", name="event", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) creation = create_event( type=EventTypes.Create, state_key="" @@ -575,7 +581,9 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_2) self.store.get_events = store.get_events - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual(len(context.current_state_ids), 6) @@ -583,7 +591,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_standard_depth_conflict(self): - event = create_event(type="test4", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test4", name="event", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) member_event = create_event( type=EventTypes.Member, @@ -615,7 +628,9 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_2) self.store.get_events = store.get_events - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual( old_state_2[2].event_id, context.current_state_ids[("test1", "1")] @@ -639,19 +654,26 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_1) store.register_events(old_state_2) - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual( old_state_1[2].event_id, context.current_state_ids[("test1", "1")] ) - def _get_context(self, event, old_state_1, old_state_2): - group_name_1 = "group_name_1" - group_name_2 = "group_name_2" + def _get_context(self, event, prev_event_id_1, old_state_1, prev_event_id_2, + old_state_2): + sg1 = self.store.store_state_group( + prev_event_id_1, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state_1}, + ) + self.store.register_event_id_state_group(prev_event_id_1, sg1) - self.store.get_state_groups_ids.return_value = { - group_name_1: {(e.type, e.state_key): e.event_id for e in old_state_1}, - group_name_2: {(e.type, e.state_key): e.event_id for e in old_state_2}, - } + sg2 = self.store.store_state_group( + prev_event_id_2, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state_2}, + ) + self.store.register_event_id_state_group(prev_event_id_2, sg2) return self.state.compute_event_context(event) From 770b2252ca9b5a74c3e767bfe0c5e7bb2e84ebd3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 16:31:50 +0000 Subject: [PATCH 068/200] s/_create_new_client_event/create_new_client_event/ --- synapse/handlers/federation.py | 10 +++++----- synapse/handlers/message.py | 6 +++--- tests/storage/test_redaction.py | 6 +++--- tests/storage/test_roommember.py | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e6b9f5cf5..06d6c8425 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1008,7 +1008,7 @@ class FederationHandler(BaseHandler): }) try: - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) except AuthError as e: @@ -1248,7 +1248,7 @@ class FederationHandler(BaseHandler): "state_key": user_id, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) @@ -2119,7 +2119,7 @@ class FederationHandler(BaseHandler): if (yield self.auth.check_host_in_room(room_id, self.hs.hostname)): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder ) @@ -2157,7 +2157,7 @@ class FederationHandler(BaseHandler): """ builder = self.event_builder_factory.new(event_dict) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) @@ -2207,7 +2207,7 @@ class FederationHandler(BaseHandler): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) defer.returnValue((event, context)) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index afa19bf65..e8e6a89a3 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -378,7 +378,7 @@ class EventCreationHandler(object): if txn_id is not None: builder.internal_metadata.txn_id = txn_id - event, context = yield self._create_new_client_event( + event, context = yield self.create_new_client_event( builder=builder, requester=requester, prev_event_ids=prev_event_ids, @@ -486,9 +486,9 @@ class EventCreationHandler(object): ) defer.returnValue(event) - @measure_func("_create_new_client_event") + @measure_func("create_new_client_event") @defer.inlineCallbacks - def _create_new_client_event(self, builder, requester=None, prev_event_ids=None): + def create_new_client_event(self, builder, requester=None, prev_event_ids=None): if prev_event_ids: prev_events = yield self.store.add_event_hashes(prev_event_ids) prev_max_depth = yield self.store.get_max_depth_of_events(prev_event_ids) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index de6d7904e..888ddfadd 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -58,7 +58,7 @@ class RedactionTestCase(unittest.TestCase): "content": content, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) @@ -78,7 +78,7 @@ class RedactionTestCase(unittest.TestCase): "content": {"body": body, "msgtype": u"message"}, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) @@ -97,7 +97,7 @@ class RedactionTestCase(unittest.TestCase): "redacts": event_id, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 4aff38bd5..657b279e5 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -57,7 +57,7 @@ class RoomMemberStoreTestCase(unittest.TestCase): "content": {"membership": membership}, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) From 3e1e69ccafbfdf8aa7c0cd06bc4eaf948a6bafdf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 16:40:38 +0000 Subject: [PATCH 069/200] Update copyright --- synapse/handlers/federation.py | 1 + synapse/handlers/message.py | 2 +- synapse/handlers/room.py | 1 + synapse/handlers/room_member.py | 1 + synapse/rest/client/v1/admin.py | 1 + synapse/rest/client/v1/room.py | 1 + 6 files changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 06d6c8425..cba96111d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e8e6a89a3..154072181 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014 - 2016 OpenMarket Ltd -# Copyright 2017 New Vector Ltd +# Copyright 2017 - 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 4ea5bf1bc..6ab020bf4 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014 - 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ab58beb0f..37dc5e99a 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index f77f64667..20c5c6663 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index ad6534537..fbb2fc36e 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From e3624fad5f0dfd3cffcbb7c996a8d29bb2c79dbd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Feb 2018 10:30:25 +0000 Subject: [PATCH 070/200] Remove pointless ratelimit check The intention was for the check to be called as early as possible in the request, but actually was called just before the main ratelimit check, so was fairly pointless. --- synapse/handlers/message.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 154072181..a58fc37ff 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -403,11 +403,6 @@ class EventCreationHandler(object): "Tried to send member event through non-member codepath" ) - # We check here if we are currently being rate limited, so that we - # don't do unnecessary work. We check again just before we actually - # send the event. - yield self.base_handler.ratelimit(requester, update=False) - user = UserID.from_string(event.sender) assert self.hs.is_mine(user), "User must be our own: %s" % (user,) From 24dd73028ad7fc7a5109a5d97eef5a79179225d4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Feb 2018 17:22:16 +0000 Subject: [PATCH 071/200] Add replication http endpoint for event sending --- synapse/app/homeserver.py | 4 + synapse/config/workers.py | 8 ++ synapse/events/snapshot.py | 72 +++++++++++++++++ synapse/handlers/message.py | 16 ++++ synapse/replication/http/__init__.py | 31 +++++++ synapse/replication/http/send_event.py | 108 +++++++++++++++++++++++++ synapse/storage/appservice.py | 13 +++ synapse/types.py | 63 ++++++++++++--- 8 files changed, 303 insertions(+), 12 deletions(-) create mode 100644 synapse/replication/http/__init__.py create mode 100644 synapse/replication/http/send_event.py diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index cb82a415a..e375f2bbc 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -38,6 +38,7 @@ from synapse.metrics import register_memory_metrics from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, \ check_requirements +from synapse.replication.http import ReplicationRestResource, REPLICATION_PREFIX from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from synapse.rest import ClientRestResource from synapse.rest.key.v1.server_key_resource import LocalKey @@ -219,6 +220,9 @@ class SynapseHomeServer(HomeServer): if name == "metrics" and self.get_config().enable_metrics: resources[METRICS_PREFIX] = MetricsResource(self) + if name == "replication": + resources[REPLICATION_PREFIX] = ReplicationRestResource(self) + return resources def start_listening(self): diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 4b6884918..80baf0ce0 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -33,8 +33,16 @@ class WorkerConfig(Config): self.worker_pid_file = config.get("worker_pid_file") self.worker_log_file = config.get("worker_log_file") self.worker_log_config = config.get("worker_log_config") + + # The host used to connect to the main synapse self.worker_replication_host = config.get("worker_replication_host", None) + + # The port on the main synapse for TCP replication self.worker_replication_port = config.get("worker_replication_port", None) + + # The port on the main synapse for HTTP replication endpoint + self.worker_replication_http_port = config.get("worker_replication_http_port") + self.worker_name = config.get("worker_name", self.worker_app) self.worker_main_http_uri = config.get("worker_main_http_uri", None) diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 87e3fe7b9..7b80444f7 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -14,6 +14,9 @@ # limitations under the License. +from frozendict import frozendict + + class EventContext(object): """ Attributes: @@ -73,3 +76,72 @@ class EventContext(object): self.prev_state_events = None self.app_service = None + + def serialize(self): + """Converts self to a type that can be serialized as JSON, and then + deserialized by `deserialize` + + Returns: + dict + """ + return { + "current_state_ids": _encode_state_dict(self.current_state_ids), + "prev_state_ids": _encode_state_dict(self.prev_state_ids), + "state_group": self.state_group, + "rejected": self.rejected, + "push_actions": self.push_actions, + "prev_group": self.prev_group, + "delta_ids": _encode_state_dict(self.delta_ids), + "prev_state_events": self.prev_state_events, + "app_service_id": self.app_service.id if self.app_service else None + } + + @staticmethod + def deserialize(store, input): + """Converts a dict that was produced by `serialize` back into a + EventContext. + + Args: + store (DataStore): Used to convert AS ID to AS object + input (dict): A dict produced by `serialize` + + Returns: + EventContext + """ + context = EventContext() + context.current_state_ids = _decode_state_dict(input["current_state_ids"]) + context.prev_state_ids = _decode_state_dict(input["prev_state_ids"]) + context.state_group = input["state_group"] + context.rejected = input["rejected"] + context.push_actions = input["push_actions"] + context.prev_group = input["prev_group"] + context.delta_ids = _decode_state_dict(input["delta_ids"]) + context.prev_state_events = input["prev_state_events"] + + app_service_id = input["app_service_id"] + if app_service_id: + context.app_service = store.get_app_service_by_id(app_service_id) + + return context + + +def _encode_state_dict(state_dict): + """Since dicts of (type, state_key) -> event_id cannot be serialized in + JSON we need to convert them to a form that can. + """ + if state_dict is None: + return None + + return [ + (etype, state_key, v) + for (etype, state_key), v in state_dict.iteritems() + ] + + +def _decode_state_dict(input): + """Decodes a state dict encoded using `_encode_state_dict` above + """ + if input is None: + return None + + return frozendict({(etype, state_key,): v for etype, state_key, v in input}) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index a58fc37ff..92c153f30 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -28,6 +28,7 @@ from synapse.util.logcontext import preserve_fn from synapse.util.metrics import measure_func from synapse.util.frozenutils import unfreeze from synapse.visibility import filter_events_for_client +from synapse.replication.http.send_event import send_event_to_master from ._base import BaseHandler @@ -312,6 +313,9 @@ class EventCreationHandler(object): self.server_name = hs.hostname self.ratelimiter = hs.get_ratelimiter() self.notifier = hs.get_notifier() + self.config = hs.config + + self.http_client = hs.get_simple_http_client() # This is only used to get at ratelimit function, and maybe_kick_guest_users self.base_handler = BaseHandler(hs) @@ -559,6 +563,18 @@ class EventCreationHandler(object): ): # We now need to go and hit out to wherever we need to hit out to. + # If we're a worker we need to hit out to the master. + if self.config.worker_app: + yield send_event_to_master( + self.http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + event=event, + context=context, + ) + return + if ratelimit: yield self.base_handler.ratelimit(requester) diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py new file mode 100644 index 000000000..b378b4164 --- /dev/null +++ b/synapse/replication/http/__init__.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import send_event + +from synapse.http.server import JsonResource + + +REPLICATION_PREFIX = "/_synapse/replication" + + +class ReplicationRestResource(JsonResource): + def __init__(self, hs): + JsonResource.__init__(self, hs, canonical_json=False) + self.register_servlets(hs) + + def register_servlets(self, hs): + send_event.register_servlets(hs, self) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py new file mode 100644 index 000000000..ff9b9d2f1 --- /dev/null +++ b/synapse/replication/http/send_event.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.events import FrozenEvent +from synapse.events.snapshot import EventContext +from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.util.metrics import Measure +from synapse.types import Requester + +import logging +import re + +logger = logging.getLogger(__name__) + + +def send_event_to_master(client, host, port, requester, event, context): + """Send event to be handled on the master + + Args: + client (SimpleHttpClient) + host (str): host of master + port (int): port on master listening for HTTP replication + requester (Requester) + event (FrozenEvent) + context (EventContext) + """ + uri = "http://%s:%s/_synapse/replication/send_event" % (host, port,) + + payload = { + "event": event.get_pdu_json(), + "internal_metadata": event.internal_metadata.get_dict(), + "rejected_reason": event.rejected_reason, + "context": context.serialize(), + "requester": requester.serialize(), + } + + return client.post_json_get_json(uri, payload) + + +class ReplicationSendEventRestServlet(RestServlet): + """Handles events newly created on workers, including persisting and + notifying. + + The API looks like: + + POST /_synapse/replication/send_event + + { + "event": { .. serialized event .. }, + "internal_metadata": { .. serialized internal_metadata .. }, + "rejected_reason": .., // The event.rejected_reason field + "context": { .. serialized event context .. }, + "requester": { .. serialized requester .. }, + } + """ + PATTERNS = [re.compile("^/_synapse/replication/send_event$")] + + def __init__(self, hs): + super(ReplicationSendEventRestServlet, self).__init__() + + self.event_creation_handler = hs.get_event_creation_handler() + self.store = hs.get_datastore() + self.clock = hs.get_clock() + + @defer.inlineCallbacks + def on_POST(self, request): + with Measure(self.clock, "repl_send_event_parse"): + content = parse_json_object_from_request(request) + + event_dict = content["event"] + internal_metadata = content["internal_metadata"] + rejected_reason = content["rejected_reason"] + event = FrozenEvent(event_dict, internal_metadata, rejected_reason) + + requester = Requester.deserialize(self.store, content["requester"]) + context = EventContext.deserialize(self.store, content["context"]) + + if requester.user: + request.authenticated_entity = requester.user.to_string() + + logger.info( + "Got event to send with ID: %s into room: %s", + event.event_id, event.room_id, + ) + + yield self.event_creation_handler.handle_new_client_event( + requester, event, context, + ) + + defer.returnValue((200, {})) + + +def register_servlets(hs, http_server): + ReplicationSendEventRestServlet(hs).register(http_server) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index d8c84b714..79673b427 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -99,6 +99,19 @@ class ApplicationServiceStore(SQLBaseStore): return service return None + def get_app_service_by_id(self, as_id): + """Get the application service with the given appservice ID. + + Args: + as_id (str): The application service ID. + Returns: + synapse.appservice.ApplicationService or None. + """ + for service in self.services_cache: + if service.id == as_id: + return service + return None + def get_app_service_rooms(self, service): """Get a list of RoomsForUser for this application service. diff --git a/synapse/types.py b/synapse/types.py index 6e76c016d..7cb24cecb 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -19,20 +19,59 @@ from synapse.api.errors import SynapseError from collections import namedtuple -Requester = namedtuple("Requester", [ +class Requester(namedtuple("Requester", [ "user", "access_token_id", "is_guest", "device_id", "app_service", -]) -""" -Represents the user making a request +])): + """ + Represents the user making a request -Attributes: - user (UserID): id of the user making the request - access_token_id (int|None): *ID* of the access token used for this - request, or None if it came via the appservice API or similar - is_guest (bool): True if the user making this request is a guest user - device_id (str|None): device_id which was set at authentication time - app_service (ApplicationService|None): the AS requesting on behalf of the user -""" + Attributes: + user (UserID): id of the user making the request + access_token_id (int|None): *ID* of the access token used for this + request, or None if it came via the appservice API or similar + is_guest (bool): True if the user making this request is a guest user + device_id (str|None): device_id which was set at authentication time + app_service (ApplicationService|None): the AS requesting on behalf of the user + """ + + def serialize(self): + """Converts self to a type that can be serialized as JSON, and then + deserialized by `deserialize` + + Returns: + dict + """ + return { + "user_id": self.user.to_string(), + "access_token_id": self.access_token_id, + "is_guest": self.is_guest, + "device_id": self.device_id, + "app_server_id": self.app_service.id if self.app_service else None, + } + + @staticmethod + def deserialize(store, input): + """Converts a dict that was produced by `serialize` back into a + Requester. + + Args: + store (DataStore): Used to convert AS ID to AS object + input (dict): A dict produced by `serialize` + + Returns: + Requester + """ + appservice = None + if input["app_server_id"]: + appservice = store.get_app_service_by_id(input["app_server_id"]) + + return Requester( + user=UserID.from_string(input["user_id"]), + access_token_id=input["access_token_id"], + is_guest=input["is_guest"], + device_id=input["device_id"], + app_service=appservice, + ) def create_requester(user_id, access_token_id=None, is_guest=False, From 8ec2e638be6f9205451d51dc839c94d0dd8999d4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 10:55:40 +0000 Subject: [PATCH 072/200] Add event_creator worker --- synapse/app/event_creator.py | 170 ++++++++++++++++++++ synapse/replication/slave/storage/events.py | 20 +++ synapse/rest/client/v1/room.py | 1 - 3 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 synapse/app/event_creator.py diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py new file mode 100644 index 000000000..b2ce39925 --- /dev/null +++ b/synapse/app/event_creator.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import sys + +import synapse +from synapse import events +from synapse.app import _base +from synapse.config._base import ConfigError +from synapse.config.homeserver import HomeServerConfig +from synapse.config.logger import setup_logging +from synapse.crypto import context_factory +from synapse.http.server import JsonResource +from synapse.http.site import SynapseSite +from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore +from synapse.replication.slave.storage.client_ips import SlavedClientIpStore +from synapse.replication.slave.storage.devices import SlavedDeviceStore +from synapse.replication.slave.storage.events import SlavedEventStore +from synapse.replication.slave.storage.registration import SlavedRegistrationStore +from synapse.replication.slave.storage.room import RoomStore +from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.rest.client.v1.room import RoomSendEventRestServlet +from synapse.server import HomeServer +from synapse.storage.engines import create_engine +from synapse.util.httpresourcetree import create_resource_tree +from synapse.util.logcontext import LoggingContext +from synapse.util.manhole import manhole +from synapse.util.versionstring import get_version_string +from twisted.internet import reactor +from twisted.web.resource import Resource + +logger = logging.getLogger("synapse.app.event_creator") + + +class EventCreatorSlavedStore( + SlavedDeviceStore, + SlavedClientIpStore, + SlavedApplicationServiceStore, + SlavedEventStore, + SlavedRegistrationStore, + RoomStore, + BaseSlavedStore, +): + pass + + +class EventCreatorServer(HomeServer): + def setup(self): + logger.info("Setting up.") + self.datastore = EventCreatorSlavedStore(self.get_db_conn(), self) + logger.info("Finished setting up.") + + def _listen_http(self, listener_config): + port = listener_config["port"] + bind_addresses = listener_config["bind_addresses"] + site_tag = listener_config.get("tag", port) + resources = {} + for res in listener_config["resources"]: + for name in res["names"]: + if name == "metrics": + resources[METRICS_PREFIX] = MetricsResource(self) + elif name == "client": + resource = JsonResource(self, canonical_json=False) + RoomSendEventRestServlet(self).register(resource) + resources.update({ + "/_matrix/client/r0": resource, + "/_matrix/client/unstable": resource, + "/_matrix/client/v2_alpha": resource, + "/_matrix/client/api/v1": resource, + }) + + root_resource = create_resource_tree(resources, Resource()) + + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, + ) + ) + + logger.info("Synapse event creator now listening on port %d", port) + + def start_listening(self, listeners): + for listener in listeners: + if listener["type"] == "http": + self._listen_http(listener) + elif listener["type"] == "manhole": + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, + ) + ) + else: + logger.warn("Unrecognized listener type: %s", listener["type"]) + + self.get_tcp_replication().start_replication(self) + + def build_tcp_replication(self): + return ReplicationClientHandler(self.get_datastore()) + + +def start(config_options): + try: + config = HomeServerConfig.load_config( + "Synapse event creator", config_options + ) + except ConfigError as e: + sys.stderr.write("\n" + e.message + "\n") + sys.exit(1) + + assert config.worker_app == "synapse.app.event_creator" + + assert config.worker_replication_http_port is not None + + setup_logging(config, use_worker_options=True) + + events.USE_FROZEN_DICTS = config.use_frozen_dicts + + database_engine = create_engine(config.database_config) + + tls_server_context_factory = context_factory.ServerContextFactory(config) + + ss = EventCreatorServer( + config.server_name, + db_config=config.database_config, + tls_server_context_factory=tls_server_context_factory, + config=config, + version_string="Synapse/" + get_version_string(synapse), + database_engine=database_engine, + ) + + ss.setup() + ss.get_handlers() + ss.start_listening(config.worker_listeners) + + def start(): + ss.get_state_handler().start_caching() + ss.get_datastore().start_profiling() + + reactor.callWhenRunning(start) + + _base.start_worker_reactor("synapse-event-creator", config) + + +if __name__ == '__main__': + with LoggingContext("main"): + start(sys.argv[1:]) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 8acb5df0f..f8c164b48 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -21,6 +21,7 @@ from synapse.storage.event_push_actions import EventPushActionsStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore +from synapse.storage.signatures import SignatureStore from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker @@ -170,6 +171,25 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): get_federation_out_pos = DataStore.get_federation_out_pos.__func__ update_federation_out_pos = DataStore.update_federation_out_pos.__func__ + get_latest_event_ids_and_hashes_in_room = ( + DataStore.get_latest_event_ids_and_hashes_in_room.__func__ + ) + _get_latest_event_ids_and_hashes_in_room = ( + DataStore._get_latest_event_ids_and_hashes_in_room.__func__ + ) + _get_event_reference_hashes_txn = ( + DataStore._get_event_reference_hashes_txn.__func__ + ) + add_event_hashes = ( + DataStore.add_event_hashes.__func__ + ) + get_event_reference_hashes = ( + SignatureStore.__dict__["get_event_reference_hashes"] + ) + get_event_reference_hash = ( + SignatureStore.__dict__["get_event_reference_hash"] + ) + def stream_positions(self): result = super(SlavedEventStore, self).stream_positions() result["events"] = self._stream_id_gen.get_current_token() diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index fbb2fc36e..817fd4784 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -186,7 +186,6 @@ class RoomSendEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomSendEventRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() self.event_creation_hander = hs.get_event_creation_handler() def register(self, http_server): From 50fe92cd26d06cd09a722521488bea2a9d1ffdea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 17:27:00 +0000 Subject: [PATCH 073/200] Move presence handling into handle_new_client_event As we want to have it run on the main synapse instance --- synapse/handlers/message.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 92c153f30..6be3f4d77 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -423,12 +423,6 @@ class EventCreationHandler(object): ratelimit=ratelimit, ) - if event.type == EventTypes.Message: - presence = self.hs.get_presence_handler() - # We don't want to block sending messages on any presence code. This - # matters as sometimes presence code can take a while. - preserve_fn(presence.bump_presence_active_time)(user) - @defer.inlineCallbacks def deduplicate_state_event(self, event, context): """ @@ -708,3 +702,9 @@ class EventCreationHandler(object): ) preserve_fn(_notify)() + + if event.type == EventTypes.Message: + presence = self.hs.get_presence_handler() + # We don't want to block sending messages on any presence code. This + # matters as sometimes presence code can take a while. + preserve_fn(presence.bump_presence_active_time)(requester.user) From f133228cb35b7803910688e7060772cb9e64f01a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 17:23:13 +0000 Subject: [PATCH 074/200] Add note in docs/workers.rst --- docs/workers.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/workers.rst b/docs/workers.rst index b39f79058..213d57e47 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -207,3 +207,14 @@ the ``worker_main_http_uri`` setting in the frontend_proxy worker configuration file. For example:: worker_main_http_uri: http://127.0.0.1:8008 + + +``synapse.app.event_creator`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Handles non-state event creation. It can handle REST endpoints matching: + + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send + +It will create events locally and then send them on to the main synapse +instance to be persisted and handled. From 5fa571a91b851d372e07217c091b7e3a9ef3d116 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Feb 2018 13:35:08 +0000 Subject: [PATCH 075/200] Tell storage providers about new file so they can upload --- synapse/rest/media/v1/media_storage.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index e8e8b3986..3f8d4b9c2 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -70,6 +70,12 @@ class MediaStorage(object): _write_file_synchronously, source, fname, )) + # Tell the storage providers about the new file. They'll decide + # if they should upload it and whether to do so synchronously + # or not. + for provider in self.storage_providers: + yield provider.store_file(path, file_info) + defer.returnValue(fname) @contextlib.contextmanager From 671540dccf3996620ffe65705904fb911e21fb68 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:27:08 +0000 Subject: [PATCH 076/200] rename delete_old_state -> purge_history (beacause it deletes more than state) --- synapse/handlers/message.py | 2 +- synapse/storage/events.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 21f1717dd..1c7860bb0 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -72,7 +72,7 @@ class MessageHandler(BaseHandler): depth = event.depth with (yield self.pagination_lock.write(room_id)): - yield self.store.delete_old_state(room_id, depth) + yield self.store.purge_history(room_id, depth) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7a9cd3ec9..21533970d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2031,16 +2031,16 @@ class EventsStore(SQLBaseStore): ) return self.runInteraction("get_all_new_events", get_all_new_events_txn) - def delete_old_state(self, room_id, topological_ordering): - return self.runInteraction( - "delete_old_state", - self._delete_old_state_txn, room_id, topological_ordering - ) - - def _delete_old_state_txn(self, txn, room_id, topological_ordering): - """Deletes old room state + def purge_history(self, room_id, topological_ordering): + """Deletes room history before a certain point """ + return self.runInteraction( + "purge_history", + self._purge_history_txn, room_id, topological_ordering + ) + + def _purge_history_txn(self, txn, room_id, topological_ordering): # Tables that should be pruned: # event_auth # event_backward_extremities From 61ffaa8137ac962f84a077bb53c4a1b06b21b49b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:34:35 +0000 Subject: [PATCH 077/200] bump purge logging to info this thing takes ages and the only sign of any progress is the logs, so having some logs is useful. --- synapse/storage/events.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 21533970d..803a4e247 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2081,7 +2081,7 @@ class EventsStore(SQLBaseStore): 400, "topological_ordering is greater than forward extremeties" ) - logger.debug("[purge] looking for events to delete") + logger.info("[purge] looking for events to delete") txn.execute( "SELECT event_id, state_key FROM events" @@ -2102,7 +2102,7 @@ class EventsStore(SQLBaseStore): for event_id, state_key in event_rows: txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) - logger.debug("[purge] Finding new backward extremities") + logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremeties by finding # all events that point to events that are to be purged @@ -2116,7 +2116,7 @@ class EventsStore(SQLBaseStore): ) new_backwards_extrems = txn.fetchall() - logger.debug("[purge] replacing backward extremities: %r", new_backwards_extrems) + logger.info("[purge] replacing backward extremities: %r", new_backwards_extrems) txn.execute( "DELETE FROM event_backward_extremities WHERE room_id = ?", @@ -2132,7 +2132,7 @@ class EventsStore(SQLBaseStore): ] ) - logger.debug("[purge] finding redundant state groups") + logger.info("[purge] finding redundant state groups") # Get all state groups that are only referenced by events that are # to be deleted. @@ -2149,15 +2149,15 @@ class EventsStore(SQLBaseStore): ) state_rows = txn.fetchall() - logger.debug("[purge] found %i redundant state groups", len(state_rows)) + logger.info("[purge] found %i redundant state groups", len(state_rows)) # make a set of the redundant state groups, so that we can look them up # efficiently state_groups_to_delete = set([sg for sg, in state_rows]) # Now we get all the state groups that rely on these state groups - logger.debug("[purge] finding state groups which depend on redundant" - " state groups") + logger.info("[purge] finding state groups which depend on redundant" + " state groups") remaining_state_groups = [] for i in xrange(0, len(state_rows), 100): chunk = [sg for sg, in state_rows[i:i + 100]] @@ -2182,7 +2182,7 @@ class EventsStore(SQLBaseStore): # Now we turn the state groups that reference to-be-deleted state # groups to non delta versions. for sg in remaining_state_groups: - logger.debug("[purge] de-delta-ing remaining state group %s", sg) + logger.info("[purge] de-delta-ing remaining state group %s", sg) curr_state = self._get_state_groups_from_groups_txn( txn, [sg], types=None ) @@ -2219,7 +2219,7 @@ class EventsStore(SQLBaseStore): ], ) - logger.debug("[purge] removing redundant state groups") + logger.info("[purge] removing redundant state groups") txn.executemany( "DELETE FROM state_groups_state WHERE state_group = ?", state_rows @@ -2230,13 +2230,13 @@ class EventsStore(SQLBaseStore): ) # Delete all non-state - logger.debug("[purge] removing events from event_to_state_groups") + logger.info("[purge] removing events from event_to_state_groups") txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", [(event_id,) for event_id, _ in event_rows] ) - logger.debug("[purge] updating room_depth") + logger.info("[purge] updating room_depth") txn.execute( "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", (topological_ordering, room_id,) @@ -2258,7 +2258,8 @@ class EventsStore(SQLBaseStore): "event_signatures", "rejections", ): - logger.debug("[purge] removing remote non-state events from %s", table) + logger.info("[purge] removing remote non-state events from %s", + table) txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), @@ -2266,7 +2267,7 @@ class EventsStore(SQLBaseStore): ) # Mark all state and own events as outliers - logger.debug("[purge] marking remaining events as outliers") + logger.info("[purge] marking remaining events as outliers") txn.executemany( "UPDATE events SET outlier = ?" " WHERE event_id = ?", From e571aef06d3b1af3946e790841f4b8a3a4cfdebf Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:40:29 +0000 Subject: [PATCH 078/200] purge: Move cache invalidation to more appropriate place it was a bit of a non-sequitur there --- synapse/storage/events.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 803a4e247..24d997830 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2099,9 +2099,6 @@ class EventsStore(SQLBaseStore): "[purge] found %i events before cutoff, of which %i are remote" " non-state events to delete", len(event_rows), len(to_delete)) - for event_id, state_key in event_rows: - txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) - logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremeties by finding @@ -2229,12 +2226,15 @@ class EventsStore(SQLBaseStore): state_rows ) - # Delete all non-state logger.info("[purge] removing events from event_to_state_groups") txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", [(event_id,) for event_id, _ in event_rows] ) + for event_id, _ in event_rows: + txn.call_after(self._get_state_group_for_event.invalidate, ( + event_id, + )) logger.info("[purge] updating room_depth") txn.execute( From 74fcbf741b3a7b95b5cc44478050e8a40fb7dc46 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 8 Feb 2018 18:44:52 +0000 Subject: [PATCH 079/200] delete_local_events for purge_history Add a flag which makes the purger delete local events --- docs/admin_api/purge_history_api.rst | 14 +++++++++-- synapse/handlers/message.py | 4 ++-- synapse/http/servlet.py | 18 +++++++++++--- synapse/rest/client/v1/admin.py | 11 ++++++++- synapse/storage/events.py | 35 ++++++++++++++++++++++------ 5 files changed, 67 insertions(+), 15 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index 08b330636..b4e5bd9d7 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -4,8 +4,6 @@ Purge History API The purge history API allows server admins to purge historic events from their database, reclaiming disk space. -**NB!** This will not delete local events (locally sent messages content etc) from the database, but will remove lots of the metadata about them and does dramatically reduce the on disk space usage - Depending on the amount of history being purged a call to the API may take several minutes or longer. During this period users will not be able to paginate further back in the room from the point being purged from. @@ -15,3 +13,15 @@ The API is simply: ``POST /_matrix/client/r0/admin/purge_history//`` including an ``access_token`` of a server admin. + +By default, events sent by local users are not deleted, as they may represent +the only copies of this content in existence. (Events sent by remote users are +deleted, and room state data before the cutoff is always removed). + +To delete local events as well, set ``delete_local_events`` in the body: + +.. code:: json + + { + "delete_local_events": True, + } diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1c7860bb0..276d1a772 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -63,7 +63,7 @@ class MessageHandler(BaseHandler): self.spam_checker = hs.get_spam_checker() @defer.inlineCallbacks - def purge_history(self, room_id, event_id): + def purge_history(self, room_id, event_id, delete_local_events=False): event = yield self.store.get_event(event_id) if event.room_id != room_id: @@ -72,7 +72,7 @@ class MessageHandler(BaseHandler): depth = event.depth with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history(room_id, depth) + yield self.store.purge_history(room_id, depth, delete_local_events) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 71420e54d..ef8e62901 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -148,11 +148,13 @@ def parse_string_from_args(args, name, default=None, required=False, return default -def parse_json_value_from_request(request): +def parse_json_value_from_request(request, allow_empty_body=False): """Parse a JSON value from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into None Returns: The JSON value. @@ -165,6 +167,9 @@ def parse_json_value_from_request(request): except Exception: raise SynapseError(400, "Error reading JSON content.") + if not content_bytes and allow_empty_body: + return None + try: content = simplejson.loads(content_bytes) except Exception as e: @@ -174,17 +179,24 @@ def parse_json_value_from_request(request): return content -def parse_json_object_from_request(request): +def parse_json_object_from_request(request, allow_empty_body=False): """Parse a JSON object from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into an empty dict. Raises: SynapseError if the request body couldn't be decoded as JSON or if it wasn't a JSON object. """ - content = parse_json_value_from_request(request) + content = parse_json_value_from_request( + request, allow_empty_body=allow_empty_body, + ) + + if allow_empty_body and content is None: + return {} if type(content) != dict: message = "Content must be a JSON object." diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 5022808ea..f954d2ea6 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -128,7 +128,16 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): if not is_admin: raise AuthError(403, "You are not a server admin") - yield self.handlers.message_handler.purge_history(room_id, event_id) + body = parse_json_object_from_request(request, allow_empty_body=True) + + delete_local_events = bool( + body.get("delete_local_history", False) + ) + + yield self.handlers.message_handler.purge_history( + room_id, event_id, + delete_local_events=delete_local_events, + ) defer.returnValue((200, {})) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 24d997830..11a2ff2d8 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2031,16 +2031,32 @@ class EventsStore(SQLBaseStore): ) return self.runInteraction("get_all_new_events", get_all_new_events_txn) - def purge_history(self, room_id, topological_ordering): + def purge_history( + self, room_id, topological_ordering, delete_local_events, + ): """Deletes room history before a certain point + + Args: + room_id (str): + + topological_ordering (int): + minimum topo ordering to preserve + + delete_local_events (bool): + if True, we will delete local events as well as remote ones + (instead of just marking them as outliers and deleting their + state groups). """ return self.runInteraction( "purge_history", - self._purge_history_txn, room_id, topological_ordering + self._purge_history_txn, room_id, topological_ordering, + delete_local_events, ) - def _purge_history_txn(self, txn, room_id, topological_ordering): + def _purge_history_txn( + self, txn, room_id, topological_ordering, delete_local_events, + ): # Tables that should be pruned: # event_auth # event_backward_extremities @@ -2093,11 +2109,14 @@ class EventsStore(SQLBaseStore): to_delete = [ (event_id,) for event_id, state_key in event_rows - if state_key is None and not self.hs.is_mine_id(event_id) + if state_key is None and ( + delete_local_events or not self.hs.is_mine_id(event_id) + ) ] logger.info( - "[purge] found %i events before cutoff, of which %i are remote" - " non-state events to delete", len(event_rows), len(to_delete)) + "[purge] found %i events before cutoff, of which %i can be deleted", + len(event_rows), len(to_delete), + ) logger.info("[purge] Finding new backward extremities") @@ -2273,7 +2292,9 @@ class EventsStore(SQLBaseStore): " WHERE event_id = ?", [ (True, event_id,) for event_id, state_key in event_rows - if state_key is not None or self.hs.is_mine_id(event_id) + if state_key is not None or ( + not delete_local_events and self.hs.is_mine_id(event_id) + ) ] ) From 39a6b3549638c70e3aaf51b361576fbd729eb655 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Feb 2018 12:13:34 +0000 Subject: [PATCH 080/200] purge: move room_depth update to end ... to avoid locking the table for too long --- synapse/storage/events.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 11a2ff2d8..238a2006b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2255,12 +2255,6 @@ class EventsStore(SQLBaseStore): event_id, )) - logger.info("[purge] updating room_depth") - txn.execute( - "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", - (topological_ordering, room_id,) - ) - # Delete all remote non-state events for table in ( "events", @@ -2298,6 +2292,18 @@ class EventsStore(SQLBaseStore): ] ) + # synapse tries to take out an exclusive lock on room_depth whenever it + # persists events (because upsert), and once we run this update, we + # will block that for the rest of our transaction. + # + # So, let's stick it at the end so that we don't block event + # persistence. + logger.info("[purge] updating room_depth") + txn.execute( + "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", + (topological_ordering, room_id,) + ) + logger.info("[purge] done") @defer.inlineCallbacks From 32c7b8e48b5c79de5b722afb4c2b79c6c712cdc5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Feb 2018 17:18:07 +0000 Subject: [PATCH 081/200] Update workers docs to include http port --- docs/workers.rst | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/docs/workers.rst b/docs/workers.rst index 213d57e47..b687807e5 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -30,17 +30,29 @@ requests made to the federation port. The caveats regarding running a reverse-proxy on the federation port still apply (see https://github.com/matrix-org/synapse/blob/master/README.rst#reverse-proxying-the-federation-port). -To enable workers, you need to add a replication listener to the master synapse, e.g.:: +To enable workers, you need to add two replication listeners to the master +synapse, e.g.:: listeners: + # The TCP replication port - port: 9092 bind_address: '127.0.0.1' type: replication + # The HTTP replication port + - port: 9093 + bind_address: '127.0.0.1' + type: http + resources: + - names: [replication] -Under **no circumstances** should this replication API listener be exposed to the -public internet; it currently implements no authentication whatsoever and is +Under **no circumstances** should these replication API listeners be exposed to +the public internet; it currently implements no authentication whatsoever and is unencrypted. +(Roughly, the TCP port is used for streaming data from the master to the +workers, and the HTTP port for the workers to communicate with the main +synapse process.) + You then create a set of configs for the various worker processes. These should be worker configuration files, and should be stored in a dedicated subdirectory, to allow synctl to manipulate them. @@ -52,8 +64,10 @@ You should minimise the number of overrides though to maintain a usable config. You must specify the type of worker application (``worker_app``). The currently available worker applications are listed below. You must also specify the -replication endpoint that it's talking to on the main synapse process -(``worker_replication_host`` and ``worker_replication_port``). +replication endpoints that it's talking to on the main synapse process. +``worker_replication_host`` should specify the host of the main synapse, +``worker_replication_port`` should point to the TCP replication listener port and +``worker_replication_http_port`` should point to the HTTP replication port. For instance:: @@ -62,6 +76,7 @@ For instance:: # The replication listener on the synapse to talk to. worker_replication_host: 127.0.0.1 worker_replication_port: 9092 + worker_replication_http_port: 9093 worker_listeners: - type: http From 8fd1a324564510be55a7c1e6b6339f736f5c525a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 13 Feb 2018 13:04:41 +0000 Subject: [PATCH 082/200] Fix typos in purge api & doc * It's supposed to be purge_local_events, not ..._history * Fix the doc to have valid json --- docs/admin_api/purge_history_api.rst | 2 +- synapse/rest/client/v1/admin.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index b4e5bd9d7..a3a17e9f9 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -23,5 +23,5 @@ To delete local events as well, set ``delete_local_events`` in the body: .. code:: json { - "delete_local_events": True, + "delete_local_events": true } diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 2ad486c67..6073cc6fa 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -131,9 +131,7 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): body = parse_json_object_from_request(request, allow_empty_body=True) - delete_local_events = bool( - body.get("delete_local_history", False) - ) + delete_local_events = bool(body.get("delete_local_events", False)) yield self.handlers.message_handler.purge_history( room_id, event_id, From 630caf8a703250e0f568000958faee42f9336b72 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 13 Feb 2018 14:29:22 +0000 Subject: [PATCH 083/200] style nit --- synapse/storage/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ca1d4a398..3d5eb9bc0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -528,7 +528,7 @@ class EventsStore(SQLBaseStore): # I don't think this can happen, but let's double-check raise Exception( "Context for new extremity event %s has no state " - "group" % event_id, + "group" % (event_id, ), ) # If we've already seen the state group don't bother adding From 4a6d5517049c5b8b9e43df43a10a0dda5db07244 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 2 Feb 2018 15:25:27 +0000 Subject: [PATCH 084/200] GIN reindex: Fix syntax errors, improve exception handling --- synapse/storage/search.py | 40 ++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 13c827cf8..076ecff29 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -157,28 +157,42 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_gin_search(self, progress, batch_size): - '''This handles old synapses which used GIST indexes, if any; + """This handles old synapses which used GIST indexes, if any; converting them back to be GIN as per the actual schema. - ''' + """ def create_index(conn): + conn.rollback() + + # we have to set autocommit, because postgres refuses to + # CREATE INDEX CONCURRENTLY without it. + conn.set_session(autocommit=True) + try: - conn.rollback() - conn.set_session(autocommit=True) c = conn.cursor() + # if we skipped the conversion to GIST, we may already/still + # have an event_search_fts_idx; unfortunately postgres 9.4 + # doesn't support CREATE INDEX IF EXISTS so we just catch the + # exception and ignore it. + import psycopg2 + try: + c.execute( + "CREATE INDEX CONCURRENTLY event_search_fts_idx" + " ON event_search USING GIN (vector)" + ) + except psycopg2.ProgrammingError as e: + logger.warn( + "Ignoring error %r when trying to switch from GIST to GIN", + e + ) + + # we should now be able to delete the GIST index. c.execute( - "CREATE INDEX CONCURRENTLY event_search_fts_idx" - " ON event_search USING GIN (vector)" + "DROP INDEX IF EXISTS event_search_fts_idx_gist" ) - - c.execute("DROP INDEX event_search_fts_idx_gist") - + finally: conn.set_session(autocommit=False) - except e: - logger.warn( - "Ignoring error %s when trying to switch from GIST to GIN" % (e,) - ) if isinstance(self.database_engine, PostgresEngine): yield self.runWithConnection(create_index) From 0b27ae8dc3957e77561b2ff35a5a127532f6f9f1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:12:27 +0000 Subject: [PATCH 085/200] move search reindex to schema 47 We're up to schema v47 on develop now, so this will have to go in there to have an effect. This might cause an error if somebody has already run it in the v46 guise, and runs it again in the v47 guise, because it will cause a duplicate entry in the bbackground_updates table. On the other hand, the entry is removed once it is complete, and it is unlikely that anyone other than matrix.org has run it on v46. The update itself is harmless to re-run because it deliberately copes with the index already existing. --- synapse/storage/schema/delta/38/postgres_fts_gist.sql | 2 +- synapse/storage/schema/delta/{46 => 47}/postgres_fts_gin.sql | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename synapse/storage/schema/delta/{46 => 47}/postgres_fts_gin.sql (100%) diff --git a/synapse/storage/schema/delta/38/postgres_fts_gist.sql b/synapse/storage/schema/delta/38/postgres_fts_gist.sql index 5fe27d687..515e6b8e8 100644 --- a/synapse/storage/schema/delta/38/postgres_fts_gist.sql +++ b/synapse/storage/schema/delta/38/postgres_fts_gist.sql @@ -13,7 +13,7 @@ * limitations under the License. */ --- We no longer do this given we back it out again in schema 46 +-- We no longer do this given we back it out again in schema 47 -- INSERT into background_updates (update_name, progress_json) -- VALUES ('event_search_postgres_gist', '{}'); diff --git a/synapse/storage/schema/delta/46/postgres_fts_gin.sql b/synapse/storage/schema/delta/47/postgres_fts_gin.sql similarity index 100% rename from synapse/storage/schema/delta/46/postgres_fts_gin.sql rename to synapse/storage/schema/delta/47/postgres_fts_gin.sql From d627174da222d1e90334f580003d6c8075284124 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 13 Feb 2018 16:51:21 +0000 Subject: [PATCH 086/200] Fix log message in purge_history (we don't just remove remote events) --- synapse/storage/events.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d5bc423f1..bbb6aa992 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2287,8 +2287,7 @@ class EventsStore(SQLBaseStore): "event_signatures", "rejections", ): - logger.info("[purge] removing remote non-state events from %s", - table) + logger.info("[purge] removing events from %s", table) txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), From 059d3a6c8e55ab7e5318793b8d7c4546bb850d33 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Feb 2018 17:53:56 +0000 Subject: [PATCH 087/200] Update docs --- docs/workers.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/workers.rst b/docs/workers.rst index b687807e5..dee04bbf3 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -50,7 +50,7 @@ the public internet; it currently implements no authentication whatsoever and is unencrypted. (Roughly, the TCP port is used for streaming data from the master to the -workers, and the HTTP port for the workers to communicate with the main +workers, and the HTTP port for the workers to send data to the main synapse process.) You then create a set of configs for the various worker processes. These @@ -69,6 +69,9 @@ replication endpoints that it's talking to on the main synapse process. ``worker_replication_port`` should point to the TCP replication listener port and ``worker_replication_http_port`` should point to the HTTP replication port. +Currently, only the ``event_creator`` worker requires specifying +``worker_replication_http_port``. + For instance:: worker_app: synapse.app.synchrotron From 5fcbf1e07c5f7c2ce0ec44c2569116507caa0183 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 14 Feb 2018 11:02:22 +0000 Subject: [PATCH 088/200] Rework event purge to use a temporary table ... which should speed things up by reducing the amount of data being shuffled across the connection --- synapse/storage/events.py | 93 ++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index bbb6aa992..5a2e6a03d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2115,23 +2115,44 @@ class EventsStore(SQLBaseStore): logger.info("[purge] looking for events to delete") + # we build a temporary table listing the events so that we don't have + # to keep shovelling the list back and forth across the connection. + txn.execute( - "SELECT event_id, state_key FROM events" - " LEFT JOIN state_events USING (room_id, event_id)" - " WHERE room_id = ? AND topological_ordering < ?", - (room_id, topological_ordering,) + "CREATE TEMPORARY TABLE events_to_purge (" + " event_id TEXT NOT NULL," + " should_delete BOOLEAN NOT NULL" + ")" + ) + + # create an index on should_delete because later we'll be looking for + # the should_delete / shouldn't_delete subsets + txn.execute("CREATE INDEX ON events_to_purge(should_delete)") + + should_delete_expr = "state_key IS NULL" + should_delete_params = () + if not delete_local_events: + should_delete_expr += " AND event_id NOT LIKE ?" + should_delete_params += ("%:" + self.hs.hostname, ) + + should_delete_params += (room_id, topological_ordering) + + txn.execute( + "INSERT INTO events_to_purge" + " SELECT event_id, %s" + " FROM events AS e LEFT JOIN state_events USING (event_id)" + " WHERE e.room_id = ? AND topological_ordering < ?" % ( + should_delete_expr, + ), + should_delete_params, + ) + txn.execute( + "SELECT event_id, should_delete FROM events_to_purge" ) event_rows = txn.fetchall() - - to_delete = [ - (event_id,) for event_id, state_key in event_rows - if state_key is None and ( - delete_local_events or not self.hs.is_mine_id(event_id) - ) - ] logger.info( "[purge] found %i events before cutoff, of which %i can be deleted", - len(event_rows), len(to_delete), + len(event_rows), sum(1 for e in event_rows if e[1]), ) logger.info("[purge] Finding new backward extremities") @@ -2139,12 +2160,11 @@ class EventsStore(SQLBaseStore): # We calculate the new entries for the backward extremeties by finding # all events that point to events that are to be purged txn.execute( - "SELECT DISTINCT e.event_id FROM events as e" - " INNER JOIN event_edges as ed ON e.event_id = ed.prev_event_id" - " INNER JOIN events as e2 ON e2.event_id = ed.event_id" - " WHERE e.room_id = ? AND e.topological_ordering < ?" - " AND e2.topological_ordering >= ?", - (room_id, topological_ordering, topological_ordering) + "SELECT DISTINCT e.event_id FROM events_to_purge AS e" + " INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id" + " INNER JOIN events AS e2 ON e2.event_id = ed.event_id" + " WHERE e2.topological_ordering >= ?", + (topological_ordering, ) ) new_backwards_extrems = txn.fetchall() @@ -2172,12 +2192,11 @@ class EventsStore(SQLBaseStore): "SELECT state_group FROM event_to_state_groups" " INNER JOIN events USING (event_id)" " WHERE state_group IN (" - " SELECT DISTINCT state_group FROM events" + " SELECT DISTINCT state_group FROM events_to_purge" " INNER JOIN event_to_state_groups USING (event_id)" - " WHERE room_id = ? AND topological_ordering < ?" " )" " GROUP BY state_group HAVING MAX(topological_ordering) < ?", - (room_id, topological_ordering, topological_ordering) + (topological_ordering, ) ) state_rows = txn.fetchall() @@ -2262,9 +2281,9 @@ class EventsStore(SQLBaseStore): ) logger.info("[purge] removing events from event_to_state_groups") - txn.executemany( - "DELETE FROM event_to_state_groups WHERE event_id = ?", - [(event_id,) for event_id, _ in event_rows] + txn.execute( + "DELETE FROM event_to_state_groups " + "WHERE event_id IN (SELECT event_id from events_to_purge)" ) for event_id, _ in event_rows: txn.call_after(self._get_state_group_for_event.invalidate, ( @@ -2289,22 +2308,26 @@ class EventsStore(SQLBaseStore): ): logger.info("[purge] removing events from %s", table) - txn.executemany( - "DELETE FROM %s WHERE event_id = ?" % (table,), - to_delete + txn.execute( + "DELETE FROM %s WHERE event_id IN (" + " SELECT event_id FROM events_to_purge WHERE should_delete" + ")" % (table,), ) # Mark all state and own events as outliers logger.info("[purge] marking remaining events as outliers") - txn.executemany( + txn.execute( "UPDATE events SET outlier = ?" - " WHERE event_id = ?", - [ - (True, event_id,) for event_id, state_key in event_rows - if state_key is not None or ( - not delete_local_events and self.hs.is_mine_id(event_id) - ) - ] + " WHERE event_id IN (" + " SELECT event_id FROM events_to_purge " + " WHERE NOT should_delete" + ")", + (True,), + ) + + # we're now done with the temporary table + txn.execute( + "DROP TABLE events_to_purge" ) # synapse tries to take out an exclusive lock on room_depth whenever it From 278d21b5e498e301a43a217595ce75ed7729256f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 14 Feb 2018 15:44:51 +0000 Subject: [PATCH 089/200] purge_history: fix index use event_push_actions doesn't have an index on event_id, so we need to specify room_id. --- synapse/storage/events.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 5a2e6a03d..c81bc75ea 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2314,6 +2314,20 @@ class EventsStore(SQLBaseStore): ")" % (table,), ) + # event_push_actions lacks an index on event_id, and has one on + # (room_id, event_id) instead. + for table in ( + "event_push_actions", + ): + logger.info("[purge] removing events from %s", table) + + txn.execute( + "DELETE FROM %s WHERE room_id = ? AND event_id IN (" + " SELECT event_id FROM events_to_purge WHERE should_delete" + ")" % (table,), + (room_id, ) + ) + # Mark all state and own events as outliers logger.info("[purge] marking remaining events as outliers") txn.execute( From 5978dccff09e647509bb92e8125aa02e87f7a0a2 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 14 Feb 2018 15:54:09 +0000 Subject: [PATCH 090/200] remove overzealous exception handling --- synapse/storage/search.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 8d294d497..2755acff4 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -16,7 +16,6 @@ from collections import namedtuple import logging import re -import sys import ujson as json from twisted.internet import defer @@ -335,25 +334,18 @@ class SearchStore(BackgroundUpdateStore): # (postgres 9.5 uses the separate gin_pending_list_limit setting, # so doesn't suffer the same problem, but changing work_mem will # be harmless) + # + # Note that we don't need to worry about restoring it on + # exception, because exceptions will cause the transaction to be + # rolled back, including the effects of the SET command. + # + # Also: we use SET rather than SET LOCAL because there's lots of + # other stuff going on in this transaction, which want to have the + # normal work_mem setting. txn.execute("SET work_mem='256kB'") - try: - txn.executemany(sql, args) - except Exception: - # we need to reset work_mem, but doing so may throw a new - # exception and we want to preserve the original - t, v, tb = sys.exc_info() - try: - txn.execute("RESET work_mem") - except Exception as e: - logger.warn( - "exception resetting work_mem during exception " - "handling: %r", - e, - ) - raise t, v, tb - else: - txn.execute("RESET work_mem") + txn.executemany(sql, args) + txn.execute("RESET work_mem") elif isinstance(self.database_engine, Sqlite3Engine): sql = ( From ac27f6a35ebb63d502769edec642fbf70a178a60 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 14 Feb 2018 16:41:12 +0000 Subject: [PATCH 091/200] purge_history: handle sqlite asshattery apparently creating a temporary table commits the transaction. because that's a useful thing. --- synapse/storage/events.py | 46 +++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c81bc75ea..90e910f61 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2093,6 +2093,27 @@ class EventsStore(SQLBaseStore): # state_groups # state_groups_state + # we will build a temporary table listing the events so that we don't + # have to keep shovelling the list back and forth across the + # connection. Annoyingly the python sqlite driver commits the + # transaction on CREATE, so let's do this first. + # + # furthermore, we might already have the table from a previous (failed) + # purge attempt, so let's drop the table first. + + txn.execute("DROP TABLE IF EXISTS events_to_purge") + + txn.execute( + "CREATE TEMPORARY TABLE events_to_purge (" + " event_id TEXT NOT NULL," + " should_delete BOOLEAN NOT NULL" + ")" + ) + + # create an index on should_delete because later we'll be looking for + # the should_delete / shouldn't_delete subsets + txn.execute("CREATE INDEX ON events_to_purge(should_delete)") + # First ensure that we're not about to delete all the forward extremeties txn.execute( "SELECT e.event_id, e.depth FROM events as e " @@ -2115,20 +2136,6 @@ class EventsStore(SQLBaseStore): logger.info("[purge] looking for events to delete") - # we build a temporary table listing the events so that we don't have - # to keep shovelling the list back and forth across the connection. - - txn.execute( - "CREATE TEMPORARY TABLE events_to_purge (" - " event_id TEXT NOT NULL," - " should_delete BOOLEAN NOT NULL" - ")" - ) - - # create an index on should_delete because later we'll be looking for - # the should_delete / shouldn't_delete subsets - txn.execute("CREATE INDEX ON events_to_purge(should_delete)") - should_delete_expr = "state_key IS NULL" should_delete_params = () if not delete_local_events: @@ -2339,11 +2346,6 @@ class EventsStore(SQLBaseStore): (True,), ) - # we're now done with the temporary table - txn.execute( - "DROP TABLE events_to_purge" - ) - # synapse tries to take out an exclusive lock on room_depth whenever it # persists events (because upsert), and once we run this update, we # will block that for the rest of our transaction. @@ -2356,6 +2358,12 @@ class EventsStore(SQLBaseStore): (topological_ordering, room_id,) ) + # finally, drop the temp table. this will commit the txn in sqlite, + # so make sure to keep this actually last. + txn.execute( + "DROP TABLE events_to_purge" + ) + logger.info("[purge] done") @defer.inlineCallbacks From 39bf47319f002614d8de11948d09db7648b26315 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 14 Feb 2018 16:42:19 +0000 Subject: [PATCH 092/200] purge_history: fix sqlite syntax error apparently sqlite insists on indexes being named --- synapse/storage/events.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 90e910f61..28cce2979 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2112,7 +2112,10 @@ class EventsStore(SQLBaseStore): # create an index on should_delete because later we'll be looking for # the should_delete / shouldn't_delete subsets - txn.execute("CREATE INDEX ON events_to_purge(should_delete)") + txn.execute( + "CREATE INDEX events_to_purge_should_delete" + " ON events_to_purge(should_delete)", + ) # First ensure that we're not about to delete all the forward extremeties txn.execute( From 92c52df702654df094abb69c9aa24f302cdc6130 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Feb 2018 17:55:18 +0000 Subject: [PATCH 093/200] Make store_file use store_into_file --- synapse/rest/media/v1/media_storage.py | 37 ++++++++------------------ 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 3f8d4b9c2..83471b317 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -58,23 +58,13 @@ class MediaStorage(object): Returns: Deferred[str]: the file path written to in the primary media store """ - path = self._file_info_to_path(file_info) - fname = os.path.join(self.local_media_directory, path) - dirname = os.path.dirname(fname) - if not os.path.exists(dirname): - os.makedirs(dirname) - - # Write to the main repository - yield make_deferred_yieldable(threads.deferToThread( - _write_file_synchronously, source, fname, - )) - - # Tell the storage providers about the new file. They'll decide - # if they should upload it and whether to do so synchronously - # or not. - for provider in self.storage_providers: - yield provider.store_file(path, file_info) + with self.store_into_file(file_info) as (f, fname, finish_cb): + # Write to the main repository + yield make_deferred_yieldable(threads.deferToThread( + _write_file_synchronously, source, f, + )) + yield finish_cb() defer.returnValue(fname) @@ -240,21 +230,16 @@ class MediaStorage(object): ) -def _write_file_synchronously(source, fname): - """Write `source` to the path `fname` synchronously. Should be called +def _write_file_synchronously(source, dest): + """Write `source` to the file like `dest` synchronously. Should be called from a thread. Args: - source: A file like object to be written - fname (str): Path to write to + source: A file like object that's to be written + dest: A file like object to be written to """ - dirname = os.path.dirname(fname) - if not os.path.exists(dirname): - os.makedirs(dirname) - source.seek(0) # Ensure we read from the start of the file - with open(fname, "wb") as f: - shutil.copyfileobj(source, f) + shutil.copyfileobj(source, dest) class FileResponder(Responder): From ef344b10e529d1c0b023f009a8aae806e8a981bc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 11:02:43 +0000 Subject: [PATCH 094/200] Don't log errors propogated from send_event --- synapse/replication/http/send_event.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index ff9b9d2f1..7b21a2213 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -15,6 +15,7 @@ from twisted.internet import defer +from synapse.api.errors import SynapseError, MatrixCodeMessageException from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext from synapse.http.servlet import RestServlet, parse_json_object_from_request @@ -27,6 +28,7 @@ import re logger = logging.getLogger(__name__) +@defer.inlineCallbacks def send_event_to_master(client, host, port, requester, event, context): """Send event to be handled on the master @@ -48,7 +50,14 @@ def send_event_to_master(client, host, port, requester, event, context): "requester": requester.serialize(), } - return client.post_json_get_json(uri, payload) + try: + result = yield client.post_json_get_json(uri, payload) + except MatrixCodeMessageException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the master process that we should send to the client. (And + # importantly, not stack traces everywhere) + raise SynapseError(e.code, e.msg, e.errcode) + defer.returnValue(result) class ReplicationSendEventRestServlet(RestServlet): From fd1601c5965a8ba065341257f9d15fe31d49eb48 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 10:51:46 +0000 Subject: [PATCH 095/200] Fix state group storage bug in workers We needed to move `_count_state_group_hops_txn` to the StateGroupWorkerStore. --- synapse/storage/state.py | 82 ++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index adb48df73..d0a840456 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -655,6 +655,47 @@ class StateGroupWorkerStore(SQLBaseStore): return self.runInteraction("store_state_group", _store_state_group_txn) + def _count_state_group_hops_txn(self, txn, state_group): + """Given a state group, count how many hops there are in the tree. + + This is used to ensure the delta chains don't get too long. + """ + if isinstance(self.database_engine, PostgresEngine): + sql = (""" + WITH RECURSIVE state(state_group) AS ( + VALUES(?::bigint) + UNION ALL + SELECT prev_state_group FROM state_group_edges e, state s + WHERE s.state_group = e.state_group + ) + SELECT count(*) FROM state; + """) + + txn.execute(sql, (state_group,)) + row = txn.fetchone() + if row and row[0]: + return row[0] + else: + return 0 + else: + # We don't use WITH RECURSIVE on sqlite3 as there are distributions + # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) + next_group = state_group + count = 0 + + while next_group: + next_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"state_group": next_group}, + retcol="prev_state_group", + allow_none=True, + ) + if next_group: + count += 1 + + return count + class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): """ Keeps track of the state at a given event. @@ -729,47 +770,6 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): (event_id,), state_group_id ) - def _count_state_group_hops_txn(self, txn, state_group): - """Given a state group, count how many hops there are in the tree. - - This is used to ensure the delta chains don't get too long. - """ - if isinstance(self.database_engine, PostgresEngine): - sql = (""" - WITH RECURSIVE state(state_group) AS ( - VALUES(?::bigint) - UNION ALL - SELECT prev_state_group FROM state_group_edges e, state s - WHERE s.state_group = e.state_group - ) - SELECT count(*) FROM state; - """) - - txn.execute(sql, (state_group,)) - row = txn.fetchone() - if row and row[0]: - return row[0] - else: - return 0 - else: - # We don't use WITH RECURSIVE on sqlite3 as there are distributions - # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) - next_group = state_group - count = 0 - - while next_group: - next_group = self._simple_select_one_onecol_txn( - txn, - table="state_group_edges", - keyvalues={"state_group": next_group}, - retcol="prev_state_group", - allow_none=True, - ) - if next_group: - count += 1 - - return count - @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): """This background update will slowly deduplicate state by reencoding From 106906a65e647d94a9d2faf1b3a626bc1f608a25 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 13:53:18 +0000 Subject: [PATCH 096/200] Don't serialize current state over replication --- synapse/events/snapshot.py | 41 ++++++++++++++++++++++---- synapse/replication/http/send_event.py | 4 +-- synapse/storage/state.py | 14 +++++++++ 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 7b80444f7..f9445bef1 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from twisted.internet import defer from frozendict import frozendict @@ -77,16 +78,30 @@ class EventContext(object): self.app_service = None - def serialize(self): + def serialize(self, event): """Converts self to a type that can be serialized as JSON, and then deserialized by `deserialize` + Args: + event (FrozenEvent): The event that this context relates to + Returns: dict """ + + # We don't serialize the full state dicts, instead they get pulled out + # of the DB on the other side. However, the other side can't figure out + # the prev_state_ids, so if we're a state event we include the event + # id that we replaced in the state. + if event.is_state(): + prev_state_id = self.prev_state_ids.get((event.type, event.state_key)) + else: + prev_state_id = None + return { - "current_state_ids": _encode_state_dict(self.current_state_ids), - "prev_state_ids": _encode_state_dict(self.prev_state_ids), + "prev_state_id": prev_state_id, + "event_type": event.type, + "event_state_key": event.state_key if event.is_state() else None, "state_group": self.state_group, "rejected": self.rejected, "push_actions": self.push_actions, @@ -97,6 +112,7 @@ class EventContext(object): } @staticmethod + @defer.inlineCallbacks def deserialize(store, input): """Converts a dict that was produced by `serialize` back into a EventContext. @@ -109,8 +125,6 @@ class EventContext(object): EventContext """ context = EventContext() - context.current_state_ids = _decode_state_dict(input["current_state_ids"]) - context.prev_state_ids = _decode_state_dict(input["prev_state_ids"]) context.state_group = input["state_group"] context.rejected = input["rejected"] context.push_actions = input["push_actions"] @@ -118,11 +132,26 @@ class EventContext(object): context.delta_ids = _decode_state_dict(input["delta_ids"]) context.prev_state_events = input["prev_state_events"] + # We use the state_group and prev_state_id stuff to pull the + # current_state_ids out of the DB and construct prev_state_ids. + prev_state_id = input["prev_state_id"] + event_type = input["event_type"] + event_state_key = input["event_state_key"] + + context.current_state_ids = yield store.get_state_ids_for_group( + context.state_group, + ) + if prev_state_id and event_state_key: + context.prev_state_ids = dict(context.current_state_ids) + context.prev_state_ids[(event_type, event_state_key)] = prev_state_id + else: + context.prev_state_ids = context.current_state_ids + app_service_id = input["app_service_id"] if app_service_id: context.app_service = store.get_app_service_by_id(app_service_id) - return context + defer.returnValue(context) def _encode_state_dict(state_dict): diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 7b21a2213..468f4b68f 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -46,7 +46,7 @@ def send_event_to_master(client, host, port, requester, event, context): "event": event.get_pdu_json(), "internal_metadata": event.internal_metadata.get_dict(), "rejected_reason": event.rejected_reason, - "context": context.serialize(), + "context": context.serialize(event), "requester": requester.serialize(), } @@ -96,7 +96,7 @@ class ReplicationSendEventRestServlet(RestServlet): event = FrozenEvent(event_dict, internal_metadata, rejected_reason) requester = Requester.deserialize(self.store, content["requester"]) - context = EventContext.deserialize(self.store, content["context"]) + context = yield EventContext.deserialize(self.store, content["context"]) if requester.user: request.authenticated_entity = requester.user.to_string() diff --git a/synapse/storage/state.py b/synapse/storage/state.py index d0a840456..2b325e1c1 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -139,6 +139,20 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue(group_to_state) + @defer.inlineCallbacks + def get_state_ids_for_group(self, state_group): + """Get the state IDs for the given state group + + Args: + state_group (int) + + Returns: + Deferred[dict]: Resolves to a map of (type, state_key) -> event_id + """ + group_to_state = yield self._get_state_for_groups((state_group,)) + + defer.returnValue(group_to_state[state_group]) + @defer.inlineCallbacks def get_state_groups(self, room_id, event_ids): """ Get the state groups for the given list of event_ids From acac21248cf1834233831383ee52198ca1bd010c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 15:01:12 +0000 Subject: [PATCH 097/200] Store push actions in staging area --- synapse/push/bulk_push_rule_evaluator.py | 3 +++ synapse/storage/event_push_actions.py | 27 +++++++++++++++++++ .../schema/delta/47/push_actions_staging.sql | 24 +++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 synapse/storage/schema/delta/47/push_actions_staging.sql diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 425a017bd..841ccbd1f 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -191,6 +191,9 @@ class BulkPushRuleEvaluator(object): actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: actions_by_user[uid] = actions + yield self.store.add_push_actions_to_staging( + event.event_id, uid, actions, + ) break defer.returnValue(actions_by_user) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 8efe2fd4b..80c3cfe95 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -738,6 +738,33 @@ class EventPushActionsStore(SQLBaseStore): (rotate_to_stream_ordering,) ) + def add_push_actions_to_staging(self, event_id, user_id, actions): + """Add the push actions for the user and event to the push + action staging area. + + Args: + event_id (str) + user_id (str) + actions (list) + + Returns: + Deferred + """ + + is_highlight = _action_has_highlight(actions) + + return self._simple_insert( + table="event_push_actions_staging", + values={ + "event_id": event_id, + "user_id": user_id, + "actions": _serialize_action(actions, is_highlight), + "notif": True, + "highlight": is_highlight, + }, + desc="add_push_actions_to_staging", + ) + def _action_has_highlight(actions): for action in actions: diff --git a/synapse/storage/schema/delta/47/push_actions_staging.sql b/synapse/storage/schema/delta/47/push_actions_staging.sql new file mode 100644 index 000000000..ec4b1d7d4 --- /dev/null +++ b/synapse/storage/schema/delta/47/push_actions_staging.sql @@ -0,0 +1,24 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE TABLE event_push_actions_staging ( + event_id TEXT NOT NULL, + user_id TEXT NOT NULL, + actions TEXT NOT NULL, + notif SMALLINT NOT NULL, + highlight SMALLINT NOT NULL +); + +CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id); From c714c6185367e39123530cb7f89584004434c473 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 15:13:36 +0000 Subject: [PATCH 098/200] Update event_push_actions table from staging table --- synapse/storage/event_push_actions.py | 55 ++++++++++++++++++--------- synapse/storage/events.py | 2 +- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 80c3cfe95..34ff9be73 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -88,33 +88,50 @@ class EventPushActionsStore(SQLBaseStore): self._rotate_notifs, 30 * 60 * 1000 ) - def _set_push_actions_for_event_and_users_txn(self, txn, event, tuples): + def _set_push_actions_for_event_and_users_txn(self, txn, event): """ Args: event: the event set actions for tuples: list of tuples of (user_id, actions) """ - values = [] - for uid, actions in tuples: - is_highlight = 1 if _action_has_highlight(actions) else 0 - values.append({ - 'room_id': event.room_id, - 'event_id': event.event_id, - 'user_id': uid, - 'actions': _serialize_action(actions, is_highlight), - 'stream_ordering': event.internal_metadata.stream_ordering, - 'topological_ordering': event.depth, - 'notif': 1, - 'highlight': is_highlight, - }) + sql = """ + INSERT INTO event_push_actions ( + room_id, event_id, user_id, actions, stream_ordering, + topological_ordering, notif, highlight + ) + SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight + FROM event_push_actions_staging + WHERE event_id = ? + """ - for uid, __ in tuples: + txn.execute(sql, ( + event.room_id, event.internal_metadata.stream_ordering, + event.depth, event.event_id, + )) + + user_ids = self._simple_select_onecol_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + retcol="user_id", + ) + + self._simple_delete_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + ) + + for uid in user_ids: txn.call_after( self.get_unread_event_push_actions_by_room_for_user.invalidate_many, - (event.room_id, uid) + (event.room_id, uid,) ) - self._simple_insert_many_txn(txn, "event_push_actions", values) @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( @@ -751,7 +768,7 @@ class EventPushActionsStore(SQLBaseStore): Deferred """ - is_highlight = _action_has_highlight(actions) + is_highlight = is_highlight = 1 if _action_has_highlight(actions) else 0 return self._simple_insert( table="event_push_actions_staging", @@ -759,7 +776,7 @@ class EventPushActionsStore(SQLBaseStore): "event_id": event_id, "user_id": user_id, "actions": _serialize_action(actions, is_highlight), - "notif": True, + "notif": 1, "highlight": is_highlight, }, desc="add_push_actions_to_staging", diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 28cce2979..ca64aacb1 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1170,7 +1170,7 @@ class EventsStore(SQLBaseStore): # Insert all the push actions into the event_push_actions table. if context.push_actions: self._set_push_actions_for_event_and_users_txn( - txn, event, context.push_actions + txn, event, ) if event.type == EventTypes.Redaction and event.redacts is not None: From 4810f7effd0fc3fd97f9edaf8ea0af48477adb0a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 15:18:37 +0000 Subject: [PATCH 099/200] Remove context.push_actions --- synapse/events/snapshot.py | 4 ---- synapse/push/action_generator.py | 6 +----- synapse/push/bulk_push_rule_evaluator.py | 9 +++------ synapse/storage/events.py | 7 +++---- tests/replication/slave/storage/test_events.py | 5 ++++- 5 files changed, 11 insertions(+), 20 deletions(-) diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index f9445bef1..8e684d91b 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -52,7 +52,6 @@ class EventContext(object): "prev_state_ids", "state_group", "rejected", - "push_actions", "prev_group", "delta_ids", "prev_state_events", @@ -67,7 +66,6 @@ class EventContext(object): self.state_group = None self.rejected = False - self.push_actions = [] # A previously persisted state group and a delta between that # and this state. @@ -104,7 +102,6 @@ class EventContext(object): "event_state_key": event.state_key if event.is_state() else None, "state_group": self.state_group, "rejected": self.rejected, - "push_actions": self.push_actions, "prev_group": self.prev_group, "delta_ids": _encode_state_dict(self.delta_ids), "prev_state_events": self.prev_state_events, @@ -127,7 +124,6 @@ class EventContext(object): context = EventContext() context.state_group = input["state_group"] context.rejected = input["rejected"] - context.push_actions = input["push_actions"] context.prev_group = input["prev_group"] context.delta_ids = _decode_state_dict(input["delta_ids"]) context.prev_state_events = input["prev_state_events"] diff --git a/synapse/push/action_generator.py b/synapse/push/action_generator.py index fe09d50d5..8f619a7a1 100644 --- a/synapse/push/action_generator.py +++ b/synapse/push/action_generator.py @@ -40,10 +40,6 @@ class ActionGenerator(object): @defer.inlineCallbacks def handle_push_actions_for_event(self, event, context): with Measure(self.clock, "action_for_event_by_user"): - actions_by_user = yield self.bulk_evaluator.action_for_event_by_user( + yield self.bulk_evaluator.action_for_event_by_user( event, context ) - - context.push_actions = [ - (uid, actions) for uid, actions in actions_by_user.iteritems() - ] diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 841ccbd1f..1140788aa 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -137,14 +137,13 @@ class BulkPushRuleEvaluator(object): @defer.inlineCallbacks def action_for_event_by_user(self, event, context): - """Given an event and context, evaluate the push rules and return - the results + """Given an event and context, evaluate the push rules and insert the + results into the event_push_actions_staging table. Returns: - dict of user_id -> action + Deferred """ rules_by_user = yield self._get_rules_for_event(event, context) - actions_by_user = {} room_members = yield self.store.get_joined_users_from_context( event, context @@ -190,12 +189,10 @@ class BulkPushRuleEvaluator(object): if matches: actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: - actions_by_user[uid] = actions yield self.store.add_push_actions_to_staging( event.event_id, uid, actions, ) break - defer.returnValue(actions_by_user) def _condition_checker(evaluator, conditions, uid, display_name, cache): diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ca64aacb1..52b7b3474 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1168,10 +1168,9 @@ class EventsStore(SQLBaseStore): for event, context in events_and_contexts: # Insert all the push actions into the event_push_actions table. - if context.push_actions: - self._set_push_actions_for_event_and_users_txn( - txn, event, - ) + self._set_push_actions_for_event_and_users_txn( + txn, event, + ) if event.type == EventTypes.Redaction and event.redacts is not None: # Remove the entries in the event_push_actions table for the diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index f430cce93..4780f2ab7 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -230,7 +230,10 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): state_handler = self.hs.get_state_handler() context = yield state_handler.compute_event_context(event) - context.push_actions = push_actions + for user_id, actions in push_actions: + yield self.master_store.add_push_actions_to_staging( + event.event_id, user_id, actions, + ) ordering = None if backfill: From b96278d6fe499e47133d2d2e82b9d3a0074d7005 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 15:37:40 +0000 Subject: [PATCH 100/200] Ensure that we delete staging push actions on errors --- synapse/handlers/message.py | 12 +++++++++--- synapse/storage/event_push_actions.py | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1c3ac03f2..d99d8049b 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -683,9 +683,15 @@ class EventCreationHandler(object): event, context ) - (event_stream_id, max_stream_id) = yield self.store.persist_event( - event, context=context - ) + try: + (event_stream_id, max_stream_id) = yield self.store.persist_event( + event, context=context + ) + except: # noqa: E722, as we reraise the exception this is fine. + # Ensure that we actually remove the entries in the push actions + # staging area + preserve_fn(self.store.remove_push_actions_from_staging)(event.event_id) + raise # this intentionally does not yield: we don't care about the result # and don't need to wait for it. diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 34ff9be73..28226455b 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -782,6 +782,22 @@ class EventPushActionsStore(SQLBaseStore): desc="add_push_actions_to_staging", ) + def remove_push_actions_from_staging(self, event_id): + """Called if we failed to persist the event to ensure that stale push + actions don't build up in the DB + + Args: + event_id (str) + """ + + return self._simple_delete( + table="event_push_actions_staging", + keyvalues={ + "event_id": event_id, + }, + desc="remove_push_actions_from_staging", + ) + def _action_has_highlight(actions): for action in actions: From 3a061cae2626a3aeb680f9100f39c8c7ebf13fef Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 16:24:07 +0000 Subject: [PATCH 101/200] Fix unit test --- tests/storage/test_event_push_actions.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 313548835..d483e7cf9 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -62,6 +62,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): {"notify_count": noitf_count, "highlight_count": highlight_count} ) + @defer.inlineCallbacks def _inject_actions(stream, action): event = Mock() event.room_id = room_id @@ -69,11 +70,12 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): event.internal_metadata.stream_ordering = stream event.depth = stream - tuples = [(user_id, action)] - - return self.store.runInteraction( + yield self.store.add_push_actions_to_staging( + event.event_id, user_id, action, + ) + yield self.store.runInteraction( "", self.store._set_push_actions_for_event_and_users_txn, - event, tuples + event, ) def _rotate(stream): From 012e8e142a4ca7d87e1ffd66cce44b23bf943e9c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Feb 2018 11:35:01 +0000 Subject: [PATCH 102/200] Comments --- synapse/push/bulk_push_rule_evaluator.py | 3 +++ synapse/storage/event_push_actions.py | 3 ++- synapse/storage/schema/delta/47/push_actions_staging.sql | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 1140788aa..bf4f1c583 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -189,6 +189,9 @@ class BulkPushRuleEvaluator(object): if matches: actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: + # Push rules say we should notify the user of this event, + # so we mark it in the DB in the staging area. (This + # will then get handled when we persist the event) yield self.store.add_push_actions_to_staging( event.event_id, uid, actions, ) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 28226455b..ea56d4d06 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -762,7 +762,8 @@ class EventPushActionsStore(SQLBaseStore): Args: event_id (str) user_id (str) - actions (list) + actions (list[dict|str]): An action can either be a string or + dict. Returns: Deferred diff --git a/synapse/storage/schema/delta/47/push_actions_staging.sql b/synapse/storage/schema/delta/47/push_actions_staging.sql index ec4b1d7d4..edccf4a96 100644 --- a/synapse/storage/schema/delta/47/push_actions_staging.sql +++ b/synapse/storage/schema/delta/47/push_actions_staging.sql @@ -13,6 +13,10 @@ * limitations under the License. */ +-- Temporary staging area for push actions that have been calculated for an +-- event, but the event hasn't yet been persisted. +-- When the event is persisted the rows are moved over to the +-- event_push_actions table. CREATE TABLE event_push_actions_staging ( event_id TEXT NOT NULL, user_id TEXT NOT NULL, From 6af025d3c4c19ab8a6f90b667b8c4259606ba47a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Feb 2018 11:35:31 +0000 Subject: [PATCH 103/200] Fix typo of double is_highlight --- synapse/storage/event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index ea56d4d06..f787431b7 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -769,7 +769,7 @@ class EventPushActionsStore(SQLBaseStore): Deferred """ - is_highlight = is_highlight = 1 if _action_has_highlight(actions) else 0 + is_highlight = 1 if _action_has_highlight(actions) else 0 return self._simple_insert( table="event_push_actions_staging", From d1a3325f9969385ec6cb090f80c61ae576d38e42 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 16 Feb 2018 14:02:31 +0000 Subject: [PATCH 104/200] (Really) fix tablescan of event_push_actions on purge commit 278d21b5 added new code to avoid the tablescan, but didn't remove the old :/ --- synapse/storage/events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 52b7b3474..73177e0bc 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2309,7 +2309,6 @@ class EventsStore(SQLBaseStore): "event_edge_hashes", "event_edges", "event_forward_extremities", - "event_push_actions", "event_reference_hashes", "event_search", "event_signatures", From 3acd616979d59c6ce6a63a4f896390d058bceacc Mon Sep 17 00:00:00 2001 From: Pascal Bach Date: Sun, 18 Feb 2018 11:08:15 +0100 Subject: [PATCH 105/200] Update pynacl dependency to 1.2.1 or higher Signed-off-by: Pascal Bach --- synapse/python_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 97b631e60..5d65b5fd6 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -24,7 +24,7 @@ REQUIREMENTS = { "unpaddedbase64>=1.1.0": ["unpaddedbase64>=1.1.0"], "canonicaljson>=1.0.0": ["canonicaljson>=1.0.0"], "signedjson>=1.0.0": ["signedjson>=1.0.0"], - "pynacl==0.3.0": ["nacl==0.3.0", "nacl.bindings"], + "pynacl>=1.2.1": ["nacl>=1.2.1", "nacl.bindings"], "service_identity>=1.0.0": ["service_identity>=1.0.0"], "Twisted>=16.0.0": ["twisted>=16.0.0"], "pyopenssl>=0.14": ["OpenSSL>=0.14"], From 6ff8c87484d13c00fddc87b0bcc3f4cd691c81ff Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 11:30:54 +0000 Subject: [PATCH 106/200] Batch inserts into event_push_actions_staging --- synapse/push/bulk_push_rule_evaluator.py | 15 ++++--- synapse/storage/event_push_actions.py | 53 ++++++++++++++++-------- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index bf4f1c583..64e9a1da5 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -144,6 +144,7 @@ class BulkPushRuleEvaluator(object): Deferred """ rules_by_user = yield self._get_rules_for_event(event, context) + actions_by_user = {} room_members = yield self.store.get_joined_users_from_context( event, context @@ -189,14 +190,16 @@ class BulkPushRuleEvaluator(object): if matches: actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: - # Push rules say we should notify the user of this event, - # so we mark it in the DB in the staging area. (This - # will then get handled when we persist the event) - yield self.store.add_push_actions_to_staging( - event.event_id, uid, actions, - ) + actions_by_user[uid] = actions break + # Push rules say we should notify the user of this event, + # so we mark it in the DB in the staging area. (This + # will then get handled when we persist the event) + yield self.store.add_push_actions_to_staging( + event.event_id, actions_by_user, + ) + def _condition_checker(evaluator, conditions, uid, display_name, cache): for cond in conditions: diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index f787431b7..04e8836e6 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -755,32 +755,51 @@ class EventPushActionsStore(SQLBaseStore): (rotate_to_stream_ordering,) ) - def add_push_actions_to_staging(self, event_id, user_id, actions): - """Add the push actions for the user and event to the push - action staging area. + def add_push_actions_to_staging(self, event_id, user_id_actions): + """Add the push actions for the event to the push action staging area. Args: event_id (str) - user_id (str) - actions (list[dict|str]): An action can either be a string or - dict. + user_id_actions (dict[str, list[dict|str])]): A dictionary mapping + user_id to list of push actions, where an action can either be + a string or dict. Returns: Deferred """ - is_highlight = 1 if _action_has_highlight(actions) else 0 + if not user_id_actions: + return - return self._simple_insert( - table="event_push_actions_staging", - values={ - "event_id": event_id, - "user_id": user_id, - "actions": _serialize_action(actions, is_highlight), - "notif": 1, - "highlight": is_highlight, - }, - desc="add_push_actions_to_staging", + # This is a helper function for generating the necessary tuple that + # can be used to inert into the `event_push_actions_staging` table. + def _gen_entry(user_id, actions): + is_highlight = 1 if _action_has_highlight(actions) else 0 + return ( + event_id, # event_id column + user_id, # user_id column + _serialize_action(actions, is_highlight), # actions column + 1, # notif column + is_highlight, # highlight column + ) + + def _add_push_actions_to_staging_txn(txn): + # We don't use _simple_insert_many here to avoid the overhead + # of generating lists of dicts. + + sql = """ + INSERT INTO event_push_actions_staging + (event_id, user_id, actions, notif, highlight) + VALUES (?, ?, ?, ?, ?) + """ + + txn.executemany(sql, ( + _gen_entry(user_id, actions) + for user_id, actions in user_id_actions.iteritems() + )) + + return self.runInteraction( + "add_push_actions_to_staging", _add_push_actions_to_staging_txn ) def remove_push_actions_from_staging(self, event_id): From d874d4f2d78956973d589bd32aa64d752d14cdc9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 11:36:56 +0000 Subject: [PATCH 107/200] Delete from push_actions_staging in federation too --- synapse/handlers/federation.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 46bcf8b08..8832ba58b 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1447,16 +1447,24 @@ class FederationHandler(BaseHandler): auth_events=auth_events, ) - if not event.internal_metadata.is_outlier() and not backfilled: - yield self.action_generator.handle_push_actions_for_event( - event, context - ) + try: + if not event.internal_metadata.is_outlier() and not backfilled: + yield self.action_generator.handle_push_actions_for_event( + event, context + ) - event_stream_id, max_stream_id = yield self.store.persist_event( - event, - context=context, - backfilled=backfilled, - ) + event_stream_id, max_stream_id = yield self.store.persist_event( + event, + context=context, + backfilled=backfilled, + ) + except: # noqa: E722, as we reraise the exception this is fine. + # Ensure that we actually remove the entries in the push actions + # staging area + logcontext.preserve_fn( + self.store.remove_push_actions_from_staging + )(event.event_id) + raise if not backfilled: # this intentionally does not yield: we don't care about the result From e440e2845642a27f15d073aea58ce20b2785f66e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 11:41:40 +0000 Subject: [PATCH 108/200] Fix unit tests --- tests/replication/slave/storage/test_events.py | 10 ++++++---- tests/storage/test_event_push_actions.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index 4780f2ab7..cb058d314 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -230,10 +230,12 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): state_handler = self.hs.get_state_handler() context = yield state_handler.compute_event_context(event) - for user_id, actions in push_actions: - yield self.master_store.add_push_actions_to_staging( - event.event_id, user_id, actions, - ) + yield self.master_store.add_push_actions_to_staging( + event.event_id, { + user_id: actions + for user_id, actions in push_actions + }, + ) ordering = None if backfill: diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index d483e7cf9..550f9ba66 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -71,7 +71,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): event.depth = stream yield self.store.add_push_actions_to_staging( - event.event_id, user_id, action, + event.event_id, {user_id: action}, ) yield self.store.runInteraction( "", self.store._set_push_actions_for_event_and_users_txn, From ad0ccf15ea35603c9453523acb3f43661fdbaa12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 12:29:50 +0000 Subject: [PATCH 109/200] Refactor _set_push_actions_for_event_and_users_txn to use events_and_contexts --- synapse/storage/event_push_actions.py | 65 +++++++++++++++------------ synapse/storage/events.py | 11 ++--- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index f787431b7..dac350548 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -88,11 +88,13 @@ class EventPushActionsStore(SQLBaseStore): self._rotate_notifs, 30 * 60 * 1000 ) - def _set_push_actions_for_event_and_users_txn(self, txn, event): - """ + def _set_push_actions_for_event_and_users_txn(self, txn, events_and_contexts): + """Handles moving push actions from staging table to main + event_push_actions table for all events in `events_and_contexts`. + Args: - event: the event set actions for - tuples: list of tuples of (user_id, actions) + events_and_contexts (list[(EventBase, EventContext)]): events + we are persisting """ sql = """ @@ -105,34 +107,39 @@ class EventPushActionsStore(SQLBaseStore): WHERE event_id = ? """ - txn.execute(sql, ( - event.room_id, event.internal_metadata.stream_ordering, - event.depth, event.event_id, - )) + if events_and_contexts: + txn.executemany(sql, ( + ( + event.room_id, event.internal_metadata.stream_ordering, + event.depth, event.event_id, + ) + for event, _ in events_and_contexts + )) - user_ids = self._simple_select_onecol_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - retcol="user_id", - ) - - self._simple_delete_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - ) - - for uid in user_ids: - txn.call_after( - self.get_unread_event_push_actions_by_room_for_user.invalidate_many, - (event.room_id, uid,) + for event, _ in events_and_contexts: + user_ids = self._simple_select_onecol_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + retcol="user_id", ) + for uid in user_ids: + txn.call_after( + self.get_unread_event_push_actions_by_room_for_user.invalidate_many, + (event.room_id, uid,) + ) + + txn.executemany( + "DELETE FROM event_push_actions_staging WHERE event_id = ?", + ( + (event.event_id,) + for event, _ in events_and_contexts + ) + ) + @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( self, room_id, user_id, last_read_event_id diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 73177e0bc..c8b8abc2e 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1162,16 +1162,17 @@ class EventsStore(SQLBaseStore): backfilled (bool): True if the events were backfilled """ + # Insert all the push actions into the event_push_actions table. + self._set_push_actions_for_event_and_users_txn( + txn, + events_and_contexts=events_and_contexts, + ) + if not events_and_contexts: # nothing to do here return for event, context in events_and_contexts: - # Insert all the push actions into the event_push_actions table. - self._set_push_actions_for_event_and_users_txn( - txn, event, - ) - if event.type == EventTypes.Redaction and event.redacts is not None: # Remove the entries in the event_push_actions table for the # redacted event. From 24087bffa932660cf4482b465b759f7161465f8a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 12:33:04 +0000 Subject: [PATCH 110/200] Ensure all push actions are deleted from staging --- synapse/storage/event_push_actions.py | 10 +++++++++- synapse/storage/events.py | 11 ++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index dac350548..6a122b05a 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -88,13 +88,21 @@ class EventPushActionsStore(SQLBaseStore): self._rotate_notifs, 30 * 60 * 1000 ) - def _set_push_actions_for_event_and_users_txn(self, txn, events_and_contexts): + def _set_push_actions_for_event_and_users_txn(self, txn, events_and_contexts, + all_events_and_contexts): """Handles moving push actions from staging table to main event_push_actions table for all events in `events_and_contexts`. + Also ensures that all events in `all_events_and_contexts` are removed + from the push action staging area. + Args: events_and_contexts (list[(EventBase, EventContext)]): events we are persisting + all_events_and_contexts (list[(EventBase, EventContext)]): all + events that we were going to persist. This includes events + we've already persisted, etc, that wouldn't appear in + events_and_context. """ sql = """ diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c8b8abc2e..7f8561a0c 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -693,6 +693,8 @@ class EventsStore(SQLBaseStore): list of the event ids which are the forward extremities. """ + all_events_and_contexts = events_and_contexts + max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering self._update_current_state_txn(txn, state_delta_for_room, max_stream_order) @@ -755,6 +757,7 @@ class EventsStore(SQLBaseStore): self._update_metadata_tables_txn( txn, events_and_contexts=events_and_contexts, + all_events_and_contexts=all_events_and_contexts, backfilled=backfilled, ) @@ -1152,13 +1155,18 @@ class EventsStore(SQLBaseStore): ec for ec in events_and_contexts if ec[0] not in to_remove ] - def _update_metadata_tables_txn(self, txn, events_and_contexts, backfilled): + def _update_metadata_tables_txn(self, txn, events_and_contexts, + all_events_and_contexts, backfilled): """Update all the miscellaneous tables for new events Args: txn (twisted.enterprise.adbapi.Connection): db connection events_and_contexts (list[(EventBase, EventContext)]): events we are persisting + all_events_and_contexts (list[(EventBase, EventContext)]): all + events that we were going to persist. This includes events + we've already persisted, etc, that wouldn't appear in + events_and_context. backfilled (bool): True if the events were backfilled """ @@ -1166,6 +1174,7 @@ class EventsStore(SQLBaseStore): self._set_push_actions_for_event_and_users_txn( txn, events_and_contexts=events_and_contexts, + all_events_and_contexts=all_events_and_contexts, ) if not events_and_contexts: From 25634ed1523b88f82edaa73124f3afd057fbb16d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 12:40:44 +0000 Subject: [PATCH 111/200] Fix test --- tests/storage/test_event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index d483e7cf9..dc90e5c24 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -75,7 +75,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): ) yield self.store.runInteraction( "", self.store._set_push_actions_for_event_and_users_txn, - event, + [(event, None)], [(event, None)], ) def _rotate(stream): From f5ac4dc2d46d329e7053259c61ad402269903ee3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Feb 2018 12:17:14 +0000 Subject: [PATCH 112/200] Split ReceiptsStore --- synapse/replication/slave/storage/receipts.py | 33 +----- synapse/storage/__init__.py | 3 - synapse/storage/receipts.py | 109 ++++++++++-------- 3 files changed, 69 insertions(+), 76 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index b371574ec..4e845ec04 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +17,7 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore -from synapse.storage.receipts import ReceiptsStore -from synapse.util.caches.stream_change_cache import StreamChangeCache +from synapse.storage.receipts import ReceiptsWorkerStore # So, um, we want to borrow a load of functions intended for reading from # a DataStore, but we don't want to take functions that either write to the @@ -29,36 +28,14 @@ from synapse.util.caches.stream_change_cache import StreamChangeCache # the method descriptor on the DataStore and chuck them into our class. -class SlavedReceiptsStore(BaseSlavedStore): +class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - super(SlavedReceiptsStore, self).__init__(db_conn, hs) - - self._receipts_id_gen = SlavedIdTracker( + receipts_id_gen = SlavedIdTracker( db_conn, "receipts_linearized", "stream_id" ) - self._receipts_stream_cache = StreamChangeCache( - "ReceiptsRoomChangeCache", self._receipts_id_gen.get_current_token() - ) - - get_receipts_for_user = ReceiptsStore.__dict__["get_receipts_for_user"] - get_linearized_receipts_for_room = ( - ReceiptsStore.__dict__["get_linearized_receipts_for_room"] - ) - _get_linearized_receipts_for_rooms = ( - ReceiptsStore.__dict__["_get_linearized_receipts_for_rooms"] - ) - get_last_receipt_event_id_for_user = ( - ReceiptsStore.__dict__["get_last_receipt_event_id_for_user"] - ) - - get_max_receipt_stream_id = DataStore.get_max_receipt_stream_id.__func__ - get_all_updated_receipts = DataStore.get_all_updated_receipts.__func__ - - get_linearized_receipts_for_rooms = ( - DataStore.get_linearized_receipts_for_rooms.__func__ - ) + super(SlavedReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) def stream_positions(self): result = super(SlavedReceiptsStore, self).stream_positions() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index f8fbd02ce..e1c4fe086 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -104,9 +104,6 @@ class DataStore(RoomMemberStore, RoomStore, db_conn, "events", "stream_ordering", step=-1, extra_tables=[("ex_outlier_stream", "event_stream_ordering")] ) - self._receipts_id_gen = StreamIdGenerator( - db_conn, "receipts_linearized", "stream_id" - ) self._account_data_id_gen = StreamIdGenerator( db_conn, "account_data_max_stream_id", "stream_id" ) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 12b3cc7f5..aa62474a4 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +15,7 @@ # limitations under the License. from ._base import SQLBaseStore +from .util.id_generators import StreamIdGenerator from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList, cached from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -26,9 +28,17 @@ import ujson as json logger = logging.getLogger(__name__) -class ReceiptsStore(SQLBaseStore): - def __init__(self, db_conn, hs): - super(ReceiptsStore, self).__init__(db_conn, hs) +class ReceiptsWorkerStore(SQLBaseStore): + def __init__(self, receipts_id_gen, db_conn, hs): + """ + Args: + receipts_id_gen (StreamIdGenerator|SlavedIdTracker) + db_conn: Database connection + hs (Homeserver) + """ + super(ReceiptsWorkerStore, self).__init__(db_conn, hs) + + self._receipts_id_gen = receipts_id_gen self._receipts_stream_cache = StreamChangeCache( "ReceiptsRoomChangeCache", self._receipts_id_gen.get_current_token() @@ -39,26 +49,6 @@ class ReceiptsStore(SQLBaseStore): receipts = yield self.get_receipts_for_room(room_id, "m.read") defer.returnValue(set(r['user_id'] for r in receipts)) - def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, - user_id): - if receipt_type != "m.read": - return - - # Returns an ObservableDeferred - res = self.get_users_with_read_receipts_in_room.cache.get( - room_id, None, update_metrics=False, - ) - - if res: - if isinstance(res, defer.Deferred) and res.called: - res = res.result - if user_id in res: - # We'd only be adding to the set, so no point invalidating if the - # user is already there - return - - self.get_users_with_read_receipts_in_room.invalidate((room_id,)) - @cached(num_args=2) def get_receipts_for_room(self, room_id, receipt_type): return self._simple_select_list( @@ -273,6 +263,57 @@ class ReceiptsStore(SQLBaseStore): def get_max_receipt_stream_id(self): return self._receipts_id_gen.get_current_token() + def get_all_updated_receipts(self, last_id, current_id, limit=None): + if last_id == current_id: + return defer.succeed([]) + + def get_all_updated_receipts_txn(txn): + sql = ( + "SELECT stream_id, room_id, receipt_type, user_id, event_id, data" + " FROM receipts_linearized" + " WHERE ? < stream_id AND stream_id <= ?" + " ORDER BY stream_id ASC" + ) + args = [last_id, current_id] + if limit is not None: + sql += " LIMIT ?" + args.append(limit) + txn.execute(sql, args) + + return txn.fetchall() + return self.runInteraction( + "get_all_updated_receipts", get_all_updated_receipts_txn + ) + + +class ReceiptsStore(ReceiptsWorkerStore): + def __init__(self, db_conn, hs): + receipts_id_gen = StreamIdGenerator( + db_conn, "receipts_linearized", "stream_id" + ) + + super(ReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) + + def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, + user_id): + if receipt_type != "m.read": + return + + # Returns an ObservableDeferred + res = self.get_users_with_read_receipts_in_room.cache.get( + room_id, None, update_metrics=False, + ) + + if res: + if isinstance(res, defer.Deferred) and res.called: + res = res.result + if user_id in res: + # We'd only be adding to the set, so no point invalidating if the + # user is already there + return + + self.get_users_with_read_receipts_in_room.invalidate((room_id,)) + def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, data, stream_id): txn.call_after( @@ -457,25 +498,3 @@ class ReceiptsStore(SQLBaseStore): "data": json.dumps(data), } ) - - def get_all_updated_receipts(self, last_id, current_id, limit=None): - if last_id == current_id: - return defer.succeed([]) - - def get_all_updated_receipts_txn(txn): - sql = ( - "SELECT stream_id, room_id, receipt_type, user_id, event_id, data" - " FROM receipts_linearized" - " WHERE ? < stream_id AND stream_id <= ?" - " ORDER BY stream_id ASC" - ) - args = [last_id, current_id] - if limit is not None: - sql += " LIMIT ?" - args.append(limit) - txn.execute(sql, args) - - return txn.fetchall() - return self.runInteraction( - "get_all_updated_receipts", get_all_updated_receipts_txn - ) From e316bbb4c07cad97c4cff5bc0c5b0dc2cd7bc519 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 17:33:18 +0000 Subject: [PATCH 113/200] Use abstract base class to access stream IDs --- synapse/replication/slave/storage/receipts.py | 9 +++- synapse/storage/receipts.py | 42 ++++++++++++------- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index 4e845ec04..a2eb4a02d 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -31,11 +31,16 @@ from synapse.storage.receipts import ReceiptsWorkerStore class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - receipts_id_gen = SlavedIdTracker( + # We instansiate this first as the ReceiptsWorkerStore constructor + # needs to be able to call get_max_receipt_stream_id + self._receipts_id_gen = SlavedIdTracker( db_conn, "receipts_linearized", "stream_id" ) - super(SlavedReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) + super(SlavedReceiptsStore, self).__init__(db_conn, hs) + + def get_max_receipt_stream_id(self): + return self._receipts_id_gen.get_current_token() def stream_positions(self): result = super(SlavedReceiptsStore, self).stream_positions() diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index aa62474a4..b11cf7ff6 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -21,6 +21,7 @@ from synapse.util.caches.stream_change_cache import StreamChangeCache from twisted.internet import defer +import abc import logging import ujson as json @@ -29,21 +30,30 @@ logger = logging.getLogger(__name__) class ReceiptsWorkerStore(SQLBaseStore): - def __init__(self, receipts_id_gen, db_conn, hs): - """ - Args: - receipts_id_gen (StreamIdGenerator|SlavedIdTracker) - db_conn: Database connection - hs (Homeserver) - """ + """This is an abstract base class where subclasses must implement + `get_max_receipt_stream_id` which can be called in the initializer. + """ + + # This ABCMeta metaclass ensures that we cannot be instantiated without + # the abstract methods being implemented. + __metaclass__ = abc.ABCMeta + + def __init__(self, db_conn, hs): super(ReceiptsWorkerStore, self).__init__(db_conn, hs) - self._receipts_id_gen = receipts_id_gen - self._receipts_stream_cache = StreamChangeCache( - "ReceiptsRoomChangeCache", self._receipts_id_gen.get_current_token() + "ReceiptsRoomChangeCache", self.get_max_receipt_stream_id() ) + @abc.abstractmethod + def get_max_receipt_stream_id(self): + """Get the current max stream ID for receipts stream + + Returns: + int + """ + pass + @cachedInlineCallbacks() def get_users_with_read_receipts_in_room(self, room_id): receipts = yield self.get_receipts_for_room(room_id, "m.read") @@ -260,9 +270,6 @@ class ReceiptsWorkerStore(SQLBaseStore): } defer.returnValue(results) - def get_max_receipt_stream_id(self): - return self._receipts_id_gen.get_current_token() - def get_all_updated_receipts(self, last_id, current_id, limit=None): if last_id == current_id: return defer.succeed([]) @@ -288,11 +295,16 @@ class ReceiptsWorkerStore(SQLBaseStore): class ReceiptsStore(ReceiptsWorkerStore): def __init__(self, db_conn, hs): - receipts_id_gen = StreamIdGenerator( + # We instansiate this first as the ReceiptsWorkerStore constructor + # needs to be able to call get_max_receipt_stream_id + self._receipts_id_gen = StreamIdGenerator( db_conn, "receipts_linearized", "stream_id" ) - super(ReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) + super(ReceiptsStore, self).__init__(db_conn, hs) + + def get_max_receipt_stream_id(self): + return self._receipts_id_gen.get_current_token() def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, user_id): From 95e4cffd859b0fc3fcd54c755fcc0da403f97b94 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 17:58:40 +0000 Subject: [PATCH 114/200] Fix comment --- synapse/replication/slave/storage/receipts.py | 2 +- synapse/storage/receipts.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index a2eb4a02d..f0e29e983 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -31,7 +31,7 @@ from synapse.storage.receipts import ReceiptsWorkerStore class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - # We instansiate this first as the ReceiptsWorkerStore constructor + # We instantiate this first as the ReceiptsWorkerStore constructor # needs to be able to call get_max_receipt_stream_id self._receipts_id_gen = SlavedIdTracker( db_conn, "receipts_linearized", "stream_id" diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index b11cf7ff6..c2a6613a6 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -295,7 +295,7 @@ class ReceiptsWorkerStore(SQLBaseStore): class ReceiptsStore(ReceiptsWorkerStore): def __init__(self, db_conn, hs): - # We instansiate this first as the ReceiptsWorkerStore constructor + # We instantiate this first as the ReceiptsWorkerStore constructor # needs to be able to call get_max_receipt_stream_id self._receipts_id_gen = StreamIdGenerator( db_conn, "receipts_linearized", "stream_id" From 8fbb4d0d19031d8cd3742285fb9b36c3bdfc52a0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 17:59:23 +0000 Subject: [PATCH 115/200] Raise exception in abstract method --- synapse/storage/receipts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index c2a6613a6..40530632c 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -52,7 +52,7 @@ class ReceiptsWorkerStore(SQLBaseStore): Returns: int """ - pass + raise NotImplementedError() @cachedInlineCallbacks() def get_users_with_read_receipts_in_room(self, room_id): From ca9b9d970374766f7b53dc5b1762cfa1b0e1a5dd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Feb 2018 12:08:42 +0000 Subject: [PATCH 116/200] Split AccountDataStore and TagStore --- .../replication/slave/storage/account_data.py | 38 +--------- synapse/storage/__init__.py | 8 -- synapse/storage/account_data.py | 75 +++++++++++++++---- synapse/storage/tags.py | 15 ++-- 4 files changed, 69 insertions(+), 67 deletions(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index efbd87918..6c95261aa 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -15,48 +15,18 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore -from synapse.storage.account_data import AccountDataStore -from synapse.storage.tags import TagsStore -from synapse.util.caches.stream_change_cache import StreamChangeCache +from synapse.storage.account_data import AccountDataWorkerStore +from synapse.storage.tags import TagsWorkerStore -class SlavedAccountDataStore(BaseSlavedStore): +class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - super(SlavedAccountDataStore, self).__init__(db_conn, hs) self._account_data_id_gen = SlavedIdTracker( db_conn, "account_data_max_stream_id", "stream_id", ) - self._account_data_stream_cache = StreamChangeCache( - "AccountDataAndTagsChangeCache", - self._account_data_id_gen.get_current_token(), - ) - get_account_data_for_user = ( - AccountDataStore.__dict__["get_account_data_for_user"] - ) - - get_global_account_data_by_type_for_users = ( - AccountDataStore.__dict__["get_global_account_data_by_type_for_users"] - ) - - get_global_account_data_by_type_for_user = ( - AccountDataStore.__dict__["get_global_account_data_by_type_for_user"] - ) - - get_tags_for_user = TagsStore.__dict__["get_tags_for_user"] - get_tags_for_room = ( - DataStore.get_tags_for_room.__func__ - ) - get_account_data_for_room = ( - DataStore.get_account_data_for_room.__func__ - ) - - get_updated_tags = DataStore.get_updated_tags.__func__ - get_updated_account_data_for_user = ( - DataStore.get_updated_account_data_for_user.__func__ - ) + super(SlavedAccountDataStore, self).__init__(db_conn, hs) def get_max_account_data_stream_id(self): return self._account_data_id_gen.get_current_token() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index e1c4fe086..e221284ee 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -104,9 +104,6 @@ class DataStore(RoomMemberStore, RoomStore, db_conn, "events", "stream_ordering", step=-1, extra_tables=[("ex_outlier_stream", "event_stream_ordering")] ) - self._account_data_id_gen = StreamIdGenerator( - db_conn, "account_data_max_stream_id", "stream_id" - ) self._presence_id_gen = StreamIdGenerator( db_conn, "presence_stream", "stream_id" ) @@ -159,11 +156,6 @@ class DataStore(RoomMemberStore, RoomStore, "MembershipStreamChangeCache", events_max, ) - account_max = self._account_data_id_gen.get_current_token() - self._account_data_stream_cache = StreamChangeCache( - "AccountDataAndTagsChangeCache", account_max, - ) - self._presence_on_startup = self._get_active_presence(db_conn) presence_cache_prefill, min_presence_val = self._get_cache_dict( diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 56a0bde54..66fed4bdc 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -13,18 +13,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore from twisted.internet import defer +from ._base import SQLBaseStore +from .util.id_generators import StreamIdGenerator + +from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.caches.descriptors import cached, cachedList, cachedInlineCallbacks +import abc import ujson as json import logging logger = logging.getLogger(__name__) -class AccountDataStore(SQLBaseStore): +class AccountDataWorkerStore(SQLBaseStore): + """This is an abstract base class where subclasses must implement + `get_max_account_data_stream_id` which can be called in the initializer. + """ + + # This ABCMeta metaclass ensures that we cannot be instantiated without + # the abstract methods being implemented. + __metaclass__ = abc.ABCMeta + + def __init__(self, db_conn, hs): + account_max = self.get_max_account_data_stream_id() + self._account_data_stream_cache = StreamChangeCache( + "AccountDataAndTagsChangeCache", account_max, + ) + + super(AccountDataWorkerStore, self).__init__(db_conn, hs) + + @abc.abstractmethod + def get_max_account_data_stream_id(self): + """Get the current max stream ID for account data stream + + Returns: + int + """ + raise NotImplementedError() @cached() def get_account_data_for_user(self, user_id): @@ -209,6 +237,36 @@ class AccountDataStore(SQLBaseStore): "get_updated_account_data_for_user", get_updated_account_data_for_user_txn ) + @cachedInlineCallbacks(num_args=2, cache_context=True, max_entries=5000) + def is_ignored_by(self, ignored_user_id, ignorer_user_id, cache_context): + ignored_account_data = yield self.get_global_account_data_by_type_for_user( + "m.ignored_user_list", ignorer_user_id, + on_invalidate=cache_context.invalidate, + ) + if not ignored_account_data: + defer.returnValue(False) + + defer.returnValue( + ignored_user_id in ignored_account_data.get("ignored_users", {}) + ) + + +class AccountDataStore(AccountDataWorkerStore): + def __init__(self, db_conn, hs): + self._account_data_id_gen = StreamIdGenerator( + db_conn, "account_data_max_stream_id", "stream_id" + ) + + super(AccountDataStore, self).__init__(db_conn, hs) + + def get_max_account_data_stream_id(self): + """Get the current max stream id for the private user data stream + + Returns: + A deferred int. + """ + return self._account_data_id_gen.get_current_token() + @defer.inlineCallbacks def add_account_data_to_room(self, user_id, room_id, account_data_type, content): """Add some account_data to a room for a user. @@ -321,16 +379,3 @@ class AccountDataStore(SQLBaseStore): "update_account_data_max_stream_id", _update, ) - - @cachedInlineCallbacks(num_args=2, cache_context=True, max_entries=5000) - def is_ignored_by(self, ignored_user_id, ignorer_user_id, cache_context): - ignored_account_data = yield self.get_global_account_data_by_type_for_user( - "m.ignored_user_list", ignorer_user_id, - on_invalidate=cache_context.invalidate, - ) - if not ignored_account_data: - defer.returnValue(False) - - defer.returnValue( - ignored_user_id in ignored_account_data.get("ignored_users", {}) - ) diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index bff73f3f0..484d66991 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore +from .account_data import AccountDataWorkerStore + from synapse.util.caches.descriptors import cached from twisted.internet import defer @@ -23,15 +24,7 @@ import logging logger = logging.getLogger(__name__) -class TagsStore(SQLBaseStore): - def get_max_account_data_stream_id(self): - """Get the current max stream id for the private user data stream - - Returns: - A deferred int. - """ - return self._account_data_id_gen.get_current_token() - +class TagsWorkerStore(AccountDataWorkerStore): @cached() def get_tags_for_user(self, user_id): """Get all the tags for a user. @@ -170,6 +163,8 @@ class TagsStore(SQLBaseStore): row["tag"]: json.loads(row["content"]) for row in rows }) + +class TagsStore(TagsWorkerStore): @defer.inlineCallbacks def add_tag_to_room(self, user_id, room_id, tag, content): """Add a tag to a room for a user. From cbaad969f951c2db067ced709154151a7dfaf6f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 10:39:27 +0000 Subject: [PATCH 117/200] Split PushRulesStore --- .../replication/slave/storage/push_rule.py | 23 ++---- synapse/storage/__init__.py | 12 ---- synapse/storage/push_rule.py | 71 ++++++++++++++----- 3 files changed, 61 insertions(+), 45 deletions(-) diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index 83e880fdd..0e3d9a87d 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -15,29 +15,15 @@ from .events import SlavedEventStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore -from synapse.storage.push_rule import PushRuleStore -from synapse.util.caches.stream_change_cache import StreamChangeCache +from synapse.storage.push_rule import PushRulesWorkerStore -class SlavedPushRuleStore(SlavedEventStore): +class SlavedPushRuleStore(PushRulesWorkerStore, SlavedEventStore): def __init__(self, db_conn, hs): - super(SlavedPushRuleStore, self).__init__(db_conn, hs) self._push_rules_stream_id_gen = SlavedIdTracker( db_conn, "push_rules_stream", "stream_id", ) - self.push_rules_stream_cache = StreamChangeCache( - "PushRulesStreamChangeCache", - self._push_rules_stream_id_gen.get_current_token(), - ) - - get_push_rules_for_user = PushRuleStore.__dict__["get_push_rules_for_user"] - get_push_rules_enabled_for_user = ( - PushRuleStore.__dict__["get_push_rules_enabled_for_user"] - ) - have_push_rules_changed_for_user = ( - DataStore.have_push_rules_changed_for_user.__func__ - ) + super(SlavedPushRuleStore, self).__init__(db_conn, hs) def get_push_rules_stream_token(self): return ( @@ -45,6 +31,9 @@ class SlavedPushRuleStore(SlavedEventStore): self._stream_id_gen.get_current_token(), ) + def get_max_push_rules_stream_id(self): + return self._push_rules_stream_id_gen.get_current_token() + def stream_positions(self): result = super(SlavedPushRuleStore, self).stream_positions() result["push_rules"] = self._push_rules_stream_id_gen.get_current_token() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index e1c4fe086..cd2759858 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -177,18 +177,6 @@ class DataStore(RoomMemberStore, RoomStore, prefilled_cache=presence_cache_prefill ) - push_rules_prefill, push_rules_id = self._get_cache_dict( - db_conn, "push_rules_stream", - entity_column="user_id", - stream_column="stream_id", - max_value=self._push_rules_stream_id_gen.get_current_token()[0], - ) - - self.push_rules_stream_cache = StreamChangeCache( - "PushRulesStreamChangeCache", push_rules_id, - prefilled_cache=push_rules_prefill, - ) - max_device_inbox_id = self._device_inbox_id_gen.get_current_token() device_inbox_prefill, min_device_inbox_id = self._get_cache_dict( db_conn, "device_inbox", diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 8758b1c0c..b35bd7a64 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -15,10 +15,12 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList +from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.push.baserules import list_with_base_rules from synapse.api.constants import EventTypes from twisted.internet import defer +import abc import logging import simplejson as json @@ -48,7 +50,39 @@ def _load_rules(rawrules, enabled_map): return rules -class PushRuleStore(SQLBaseStore): +class PushRulesWorkerStore(SQLBaseStore): + """This is an abstract base class where subclasses must implement + `get_max_push_rules_stream_id` which can be called in the initializer. + """ + + # This ABCMeta metaclass ensures that we cannot be instantiated without + # the abstract methods being implemented. + __metaclass__ = abc.ABCMeta + + def __init__(self, db_conn, hs): + super(PushRulesWorkerStore, self).__init__(db_conn, hs) + + push_rules_prefill, push_rules_id = self._get_cache_dict( + db_conn, "push_rules_stream", + entity_column="user_id", + stream_column="stream_id", + max_value=self.get_max_push_rules_stream_id(), + ) + + self.push_rules_stream_cache = StreamChangeCache( + "PushRulesStreamChangeCache", push_rules_id, + prefilled_cache=push_rules_prefill, + ) + + @abc.abstractmethod + def get_max_push_rules_stream_id(self): + """Get the position of the push rules stream. + + Returns: + int + """ + raise NotImplementedError() + @cachedInlineCallbacks(max_entries=5000) def get_push_rules_for_user(self, user_id): rows = yield self._simple_select_list( @@ -89,6 +123,24 @@ class PushRuleStore(SQLBaseStore): r['rule_id']: False if r['enabled'] == 0 else True for r in results }) + def have_push_rules_changed_for_user(self, user_id, last_id): + if not self.push_rules_stream_cache.has_entity_changed(user_id, last_id): + return defer.succeed(False) + else: + def have_push_rules_changed_txn(txn): + sql = ( + "SELECT COUNT(stream_id) FROM push_rules_stream" + " WHERE user_id = ? AND ? < stream_id" + ) + txn.execute(sql, (user_id, last_id)) + count, = txn.fetchone() + return bool(count) + return self.runInteraction( + "have_push_rules_changed", have_push_rules_changed_txn + ) + + +class PushRuleStore(PushRulesWorkerStore): @cachedList(cached_method_name="get_push_rules_for_user", list_name="user_ids", num_args=1, inlineCallbacks=True) def bulk_get_push_rules(self, user_ids): @@ -526,21 +578,8 @@ class PushRuleStore(SQLBaseStore): room stream ordering it corresponds to.""" return self._push_rules_stream_id_gen.get_current_token() - def have_push_rules_changed_for_user(self, user_id, last_id): - if not self.push_rules_stream_cache.has_entity_changed(user_id, last_id): - return defer.succeed(False) - else: - def have_push_rules_changed_txn(txn): - sql = ( - "SELECT COUNT(stream_id) FROM push_rules_stream" - " WHERE user_id = ? AND ? < stream_id" - ) - txn.execute(sql, (user_id, last_id)) - count, = txn.fetchone() - return bool(count) - return self.runInteraction( - "have_push_rules_changed", have_push_rules_changed_txn - ) + def get_max_push_rules_stream_id(self): + return self.get_push_rules_stream_token()[0] class RuleNotFoundException(Exception): From 6f727653717d9fc32f5676f4d991fbbe85c1c812 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 10:54:18 +0000 Subject: [PATCH 118/200] Split PusherStore --- synapse/replication/slave/storage/pushers.py | 11 ++--------- synapse/storage/pusher.py | 10 ++++++---- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py index 4e8d68ece..e352c3235 100644 --- a/synapse/replication/slave/storage/pushers.py +++ b/synapse/replication/slave/storage/pushers.py @@ -16,10 +16,10 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore +from synapse.storage.pusher import PusherWorkerStore -class SlavedPusherStore(BaseSlavedStore): +class SlavedPusherStore(PusherWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedPusherStore, self).__init__(db_conn, hs) @@ -28,13 +28,6 @@ class SlavedPusherStore(BaseSlavedStore): extra_tables=[("deleted_pushers", "stream_id")], ) - get_all_pushers = DataStore.get_all_pushers.__func__ - get_pushers_by = DataStore.get_pushers_by.__func__ - get_pushers_by_app_id_and_pushkey = ( - DataStore.get_pushers_by_app_id_and_pushkey.__func__ - ) - _decode_pushers_rows = DataStore._decode_pushers_rows.__func__ - def stream_positions(self): result = super(SlavedPusherStore, self).stream_positions() result["pushers"] = self._pushers_id_gen.get_current_token() diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index 3d8b4d5d5..b0159c70c 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -27,7 +27,7 @@ import types logger = logging.getLogger(__name__) -class PusherStore(SQLBaseStore): +class PusherWorkerStore(SQLBaseStore): def _decode_pushers_rows(self, rows): for r in rows: dataJson = r['data'] @@ -102,9 +102,6 @@ class PusherStore(SQLBaseStore): rows = yield self.runInteraction("get_all_pushers", get_pushers) defer.returnValue(rows) - def get_pushers_stream_token(self): - return self._pushers_id_gen.get_current_token() - def get_all_updated_pushers(self, last_id, current_id, limit): if last_id == current_id: return defer.succeed(([], [])) @@ -177,6 +174,11 @@ class PusherStore(SQLBaseStore): "get_all_updated_pushers_rows", get_all_updated_pushers_rows_txn ) + +class PusherStore(PusherWorkerStore): + def get_pushers_stream_token(self): + return self._pushers_id_gen.get_current_token() + @cachedInlineCallbacks(num_args=1, max_entries=15000) def get_if_user_has_pusher(self, user_id): # This only exists for the cachedList decorator From d15d237b0de69d9e8ea533060df9a8bb844b9a4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:01:13 +0000 Subject: [PATCH 119/200] Split out EventPushActionWorkerStore --- synapse/replication/slave/storage/events.py | 23 +--- synapse/storage/event_push_actions.py | 144 ++++++++++---------- 2 files changed, 76 insertions(+), 91 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f8c164b48..c5d6c6bd8 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -17,7 +17,7 @@ import logging from synapse.api.constants import EventTypes from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore -from synapse.storage.event_push_actions import EventPushActionsStore +from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore @@ -38,7 +38,8 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): +class SlavedEventStore(EventPushActionsWorkerStore, StateGroupWorkerStore, + BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) @@ -80,30 +81,12 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): get_invited_rooms_for_user = RoomMemberStore.__dict__[ "get_invited_rooms_for_user" ] - get_unread_event_push_actions_by_room_for_user = ( - EventPushActionsStore.__dict__["get_unread_event_push_actions_by_room_for_user"] - ) - _get_unread_counts_by_receipt_txn = ( - DataStore._get_unread_counts_by_receipt_txn.__func__ - ) - _get_unread_counts_by_pos_txn = ( - DataStore._get_unread_counts_by_pos_txn.__func__ - ) get_recent_event_ids_for_room = ( StreamStore.__dict__["get_recent_event_ids_for_room"] ) _get_joined_hosts_cache = RoomMemberStore.__dict__["_get_joined_hosts_cache"] has_room_changed_since = DataStore.has_room_changed_since.__func__ - get_unread_push_actions_for_user_in_range_for_http = ( - DataStore.get_unread_push_actions_for_user_in_range_for_http.__func__ - ) - get_unread_push_actions_for_user_in_range_for_email = ( - DataStore.get_unread_push_actions_for_user_in_range_for_email.__func__ - ) - get_push_action_users_in_range = ( - DataStore.get_push_action_users_in_range.__func__ - ) get_event = DataStore.get_event.__func__ get_events = DataStore.get_events.__func__ get_rooms_for_user_where_membership_is = ( diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index f787431b7..124583835 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -62,77 +62,7 @@ def _deserialize_action(actions, is_highlight): return DEFAULT_NOTIF_ACTION -class EventPushActionsStore(SQLBaseStore): - EPA_HIGHLIGHT_INDEX = "epa_highlight_index" - - def __init__(self, db_conn, hs): - super(EventPushActionsStore, self).__init__(db_conn, hs) - - self.register_background_index_update( - self.EPA_HIGHLIGHT_INDEX, - index_name="event_push_actions_u_highlight", - table="event_push_actions", - columns=["user_id", "stream_ordering"], - ) - - self.register_background_index_update( - "event_push_actions_highlights_index", - index_name="event_push_actions_highlights_index", - table="event_push_actions", - columns=["user_id", "room_id", "topological_ordering", "stream_ordering"], - where_clause="highlight=1" - ) - - self._doing_notif_rotation = False - self._rotate_notif_loop = self._clock.looping_call( - self._rotate_notifs, 30 * 60 * 1000 - ) - - def _set_push_actions_for_event_and_users_txn(self, txn, event): - """ - Args: - event: the event set actions for - tuples: list of tuples of (user_id, actions) - """ - - sql = """ - INSERT INTO event_push_actions ( - room_id, event_id, user_id, actions, stream_ordering, - topological_ordering, notif, highlight - ) - SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight - FROM event_push_actions_staging - WHERE event_id = ? - """ - - txn.execute(sql, ( - event.room_id, event.internal_metadata.stream_ordering, - event.depth, event.event_id, - )) - - user_ids = self._simple_select_onecol_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - retcol="user_id", - ) - - self._simple_delete_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - ) - - for uid in user_ids: - txn.call_after( - self.get_unread_event_push_actions_by_room_for_user.invalidate_many, - (event.room_id, uid,) - ) - +class EventPushActionsWorkerStore(SQLBaseStore): @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( self, room_id, user_id, last_read_event_id @@ -449,6 +379,78 @@ class EventPushActionsStore(SQLBaseStore): # Now return the first `limit` defer.returnValue(notifs[:limit]) + +class EventPushActionsStore(EventPushActionsWorkerStore): + EPA_HIGHLIGHT_INDEX = "epa_highlight_index" + + def __init__(self, db_conn, hs): + super(EventPushActionsStore, self).__init__(db_conn, hs) + + self.register_background_index_update( + self.EPA_HIGHLIGHT_INDEX, + index_name="event_push_actions_u_highlight", + table="event_push_actions", + columns=["user_id", "stream_ordering"], + ) + + self.register_background_index_update( + "event_push_actions_highlights_index", + index_name="event_push_actions_highlights_index", + table="event_push_actions", + columns=["user_id", "room_id", "topological_ordering", "stream_ordering"], + where_clause="highlight=1" + ) + + self._doing_notif_rotation = False + self._rotate_notif_loop = self._clock.looping_call( + self._rotate_notifs, 30 * 60 * 1000 + ) + + def _set_push_actions_for_event_and_users_txn(self, txn, event): + """ + Args: + event: the event set actions for + tuples: list of tuples of (user_id, actions) + """ + + sql = """ + INSERT INTO event_push_actions ( + room_id, event_id, user_id, actions, stream_ordering, + topological_ordering, notif, highlight + ) + SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight + FROM event_push_actions_staging + WHERE event_id = ? + """ + + txn.execute(sql, ( + event.room_id, event.internal_metadata.stream_ordering, + event.depth, event.event_id, + )) + + user_ids = self._simple_select_onecol_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + retcol="user_id", + ) + + self._simple_delete_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + ) + + for uid in user_ids: + txn.call_after( + self.get_unread_event_push_actions_by_room_for_user.invalidate_many, + (event.room_id, uid,) + ) + @defer.inlineCallbacks def get_push_actions_for_user(self, user_id, before=None, limit=50, only_highlight=False): From c96d547f4dd3c10d3c4be66c40cb07232a16a987 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:03:42 +0000 Subject: [PATCH 120/200] Actually use new param --- synapse/storage/event_push_actions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 6a122b05a..214ace27c 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -140,11 +140,13 @@ class EventPushActionsStore(SQLBaseStore): (event.room_id, uid,) ) + # Now we delete the staging area for *all* events that were being + # persisted. txn.executemany( "DELETE FROM event_push_actions_staging WHERE event_id = ?", ( (event.event_id,) - for event, _ in events_and_contexts + for event, _ in all_events_and_contexts ) ) From 573712da6b720cb808e61e4cbd5426e85e58a161 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:29:49 +0000 Subject: [PATCH 121/200] Update comments --- synapse/push/bulk_push_rule_evaluator.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 64e9a1da5..7c680659b 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -190,12 +190,13 @@ class BulkPushRuleEvaluator(object): if matches: actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: + # Push rules say we should notify the user of this event actions_by_user[uid] = actions break - # Push rules say we should notify the user of this event, - # so we mark it in the DB in the staging area. (This - # will then get handled when we persist the event) + # Mark in the DB staging area the push actions for users who should be + # notified for this event. (This will then get handled when we persist + # the event) yield self.store.add_push_actions_to_staging( event.event_id, actions_by_user, ) From 27b094f382a33b7d69eb592951a463c3e53af5b4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:41:48 +0000 Subject: [PATCH 122/200] Split out get_events and co into a worker store --- synapse/replication/slave/storage/events.py | 13 +- synapse/storage/events.py | 695 ++++++++++---------- 2 files changed, 352 insertions(+), 356 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f8c164b48..74a81a0a5 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -18,6 +18,7 @@ from synapse.api.constants import EventTypes from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore +from synapse.storage.events import EventsWorkerStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore @@ -38,7 +39,7 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): +class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) @@ -104,8 +105,6 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): get_push_action_users_in_range = ( DataStore.get_push_action_users_in_range.__func__ ) - get_event = DataStore.get_event.__func__ - get_events = DataStore.get_events.__func__ get_rooms_for_user_where_membership_is = ( DataStore.get_rooms_for_user_where_membership_is.__func__ ) @@ -135,14 +134,6 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): _set_before_and_after = staticmethod(DataStore._set_before_and_after) - _get_events = DataStore._get_events.__func__ - _get_events_from_cache = DataStore._get_events_from_cache.__func__ - - _invalidate_get_event_cache = DataStore._invalidate_get_event_cache.__func__ - _enqueue_events = DataStore._enqueue_events.__func__ - _do_fetch = DataStore._do_fetch.__func__ - _fetch_event_rows = DataStore._fetch_event_rows.__func__ - _get_event_from_row = DataStore._get_event_from_row.__func__ _get_rooms_for_user_where_membership_is_txn = ( DataStore._get_rooms_for_user_where_membership_is_txn.__func__ ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 73177e0bc..681a33314 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -199,7 +199,356 @@ def _retry_on_integrity_error(func): return f -class EventsStore(SQLBaseStore): +class EventsWorkerStore(SQLBaseStore): + def __init__(self, db_conn, hs): + super(EventsWorkerStore, self).__init__(db_conn, hs) + + self._event_persist_queue = _EventPeristenceQueue() + + @defer.inlineCallbacks + def get_event(self, event_id, check_redacted=True, + get_prev_content=False, allow_rejected=False, + allow_none=False): + """Get an event from the database by event_id. + + Args: + event_id (str): The event_id of the event to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + allow_none (bool): If True, return None if no event found, if + False throw an exception. + + Returns: + Deferred : A FrozenEvent. + """ + events = yield self._get_events( + [event_id], + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + if not events and not allow_none: + raise SynapseError(404, "Could not find event %s" % (event_id,)) + + defer.returnValue(events[0] if events else None) + + @defer.inlineCallbacks + def get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + """Get events from the database + + Args: + event_ids (list): The event_ids of the events to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + + Returns: + Deferred : Dict from event_id to event. + """ + events = yield self._get_events( + event_ids, + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + defer.returnValue({e.event_id: e for e in events}) + + @defer.inlineCallbacks + def _get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + if not event_ids: + defer.returnValue([]) + + event_id_list = event_ids + event_ids = set(event_ids) + + event_entry_map = self._get_events_from_cache( + event_ids, + allow_rejected=allow_rejected, + ) + + missing_events_ids = [e for e in event_ids if e not in event_entry_map] + + if missing_events_ids: + missing_events = yield self._enqueue_events( + missing_events_ids, + check_redacted=check_redacted, + allow_rejected=allow_rejected, + ) + + event_entry_map.update(missing_events) + + events = [] + for event_id in event_id_list: + entry = event_entry_map.get(event_id, None) + if not entry: + continue + + if allow_rejected or not entry.event.rejected_reason: + if check_redacted and entry.redacted_event: + event = entry.redacted_event + else: + event = entry.event + + events.append(event) + + if get_prev_content: + if "replaces_state" in event.unsigned: + prev = yield self.get_event( + event.unsigned["replaces_state"], + get_prev_content=False, + allow_none=True, + ) + if prev: + event.unsigned = dict(event.unsigned) + event.unsigned["prev_content"] = prev.content + event.unsigned["prev_sender"] = prev.sender + + defer.returnValue(events) + + def _invalidate_get_event_cache(self, event_id): + self._get_event_cache.invalidate((event_id,)) + + def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): + """Fetch events from the caches + + Args: + events (list(str)): list of event_ids to fetch + allow_rejected (bool): Whether to teturn events that were rejected + update_metrics (bool): Whether to update the cache hit ratio metrics + + Returns: + dict of event_id -> _EventCacheEntry for each event_id in cache. If + allow_rejected is `False` then there will still be an entry but it + will be `None` + """ + event_map = {} + + for event_id in events: + ret = self._get_event_cache.get( + (event_id,), None, + update_metrics=update_metrics, + ) + if not ret: + continue + + if allow_rejected or not ret.event.rejected_reason: + event_map[event_id] = ret + else: + event_map[event_id] = None + + return event_map + + def _do_fetch(self, conn): + """Takes a database connection and waits for requests for events from + the _event_fetch_list queue. + """ + event_list = [] + i = 0 + while True: + try: + with self._event_fetch_lock: + event_list = self._event_fetch_list + self._event_fetch_list = [] + + if not event_list: + single_threaded = self.database_engine.single_threaded + if single_threaded or i > EVENT_QUEUE_ITERATIONS: + self._event_fetch_ongoing -= 1 + return + else: + self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) + i += 1 + continue + i = 0 + + event_id_lists = zip(*event_list)[0] + event_ids = [ + item for sublist in event_id_lists for item in sublist + ] + + rows = self._new_transaction( + conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids + ) + + row_dict = { + r["event_id"]: r + for r in rows + } + + # We only want to resolve deferreds from the main thread + def fire(lst, res): + for ids, d in lst: + if not d.called: + try: + with PreserveLoggingContext(): + d.callback([ + res[i] + for i in ids + if i in res + ]) + except Exception: + logger.exception("Failed to callback") + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list, row_dict) + except Exception as e: + logger.exception("do_fetch") + + # We only want to resolve deferreds from the main thread + def fire(evs): + for _, d in evs: + if not d.called: + with PreserveLoggingContext(): + d.errback(e) + + if event_list: + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list) + + @defer.inlineCallbacks + def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): + """Fetches events from the database using the _event_fetch_list. This + allows batch and bulk fetching of events - it allows us to fetch events + without having to create a new transaction for each request for events. + """ + if not events: + defer.returnValue({}) + + events_d = defer.Deferred() + with self._event_fetch_lock: + self._event_fetch_list.append( + (events, events_d) + ) + + self._event_fetch_lock.notify() + + if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: + self._event_fetch_ongoing += 1 + should_start = True + else: + should_start = False + + if should_start: + with PreserveLoggingContext(): + self.runWithConnection( + self._do_fetch + ) + + logger.debug("Loading %d events", len(events)) + with PreserveLoggingContext(): + rows = yield events_d + logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) + + if not allow_rejected: + rows[:] = [r for r in rows if not r["rejects"]] + + res = yield make_deferred_yieldable(defer.gatherResults( + [ + preserve_fn(self._get_event_from_row)( + row["internal_metadata"], row["json"], row["redacts"], + rejected_reason=row["rejects"], + ) + for row in rows + ], + consumeErrors=True + )) + + defer.returnValue({ + e.event.event_id: e + for e in res if e + }) + + def _fetch_event_rows(self, txn, events): + rows = [] + N = 200 + for i in range(1 + len(events) / N): + evs = events[i * N:(i + 1) * N] + if not evs: + break + + sql = ( + "SELECT " + " e.event_id as event_id, " + " e.internal_metadata," + " e.json," + " r.redacts as redacts," + " rej.event_id as rejects " + " FROM event_json as e" + " LEFT JOIN rejections as rej USING (event_id)" + " LEFT JOIN redactions as r ON e.event_id = r.redacts" + " WHERE e.event_id IN (%s)" + ) % (",".join(["?"] * len(evs)),) + + txn.execute(sql, evs) + rows.extend(self.cursor_to_dict(txn)) + + return rows + + @defer.inlineCallbacks + def _get_event_from_row(self, internal_metadata, js, redacted, + rejected_reason=None): + with Measure(self._clock, "_get_event_from_row"): + d = json.loads(js) + internal_metadata = json.loads(internal_metadata) + + if rejected_reason: + rejected_reason = yield self._simple_select_one_onecol( + table="rejections", + keyvalues={"event_id": rejected_reason}, + retcol="reason", + desc="_get_event_from_row_rejected_reason", + ) + + original_ev = FrozenEvent( + d, + internal_metadata_dict=internal_metadata, + rejected_reason=rejected_reason, + ) + + redacted_event = None + if redacted: + redacted_event = prune_event(original_ev) + + redaction_id = yield self._simple_select_one_onecol( + table="redactions", + keyvalues={"redacts": redacted_event.event_id}, + retcol="event_id", + desc="_get_event_from_row_redactions", + ) + + redacted_event.unsigned["redacted_by"] = redaction_id + # Get the redaction event. + + because = yield self.get_event( + redaction_id, + check_redacted=False, + allow_none=True, + ) + + if because: + # It's fine to do add the event directly, since get_pdu_json + # will serialise this field correctly + redacted_event.unsigned["redacted_because"] = because + + cache_entry = _EventCacheEntry( + event=original_ev, + redacted_event=redacted_event, + ) + + self._get_event_cache.prefill((original_ev.event_id,), cache_entry) + + defer.returnValue(cache_entry) + + +class EventsStore(EventsWorkerStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" @@ -234,8 +583,6 @@ class EventsStore(SQLBaseStore): psql_only=True, ) - self._event_persist_queue = _EventPeristenceQueue() - self._state_resolution_handler = hs.get_state_resolution_handler() def persist_events(self, events_and_contexts, backfilled=False): @@ -609,62 +956,6 @@ class EventsStore(SQLBaseStore): defer.returnValue((to_delete, to_insert)) - @defer.inlineCallbacks - def get_event(self, event_id, check_redacted=True, - get_prev_content=False, allow_rejected=False, - allow_none=False): - """Get an event from the database by event_id. - - Args: - event_id (str): The event_id of the event to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - allow_none (bool): If True, return None if no event found, if - False throw an exception. - - Returns: - Deferred : A FrozenEvent. - """ - events = yield self._get_events( - [event_id], - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - if not events and not allow_none: - raise SynapseError(404, "Could not find event %s" % (event_id,)) - - defer.returnValue(events[0] if events else None) - - @defer.inlineCallbacks - def get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - """Get events from the database - - Args: - event_ids (list): The event_ids of the events to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - - Returns: - Deferred : Dict from event_id to event. - """ - events = yield self._get_events( - event_ids, - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - defer.returnValue({e.event_id: e for e in events}) - @log_function def _persist_events_txn(self, txn, events_and_contexts, backfilled, delete_existing=False, state_delta_for_room={}, @@ -1375,292 +1666,6 @@ class EventsStore(SQLBaseStore): "have_events", f, ) - @defer.inlineCallbacks - def _get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - if not event_ids: - defer.returnValue([]) - - event_id_list = event_ids - event_ids = set(event_ids) - - event_entry_map = self._get_events_from_cache( - event_ids, - allow_rejected=allow_rejected, - ) - - missing_events_ids = [e for e in event_ids if e not in event_entry_map] - - if missing_events_ids: - missing_events = yield self._enqueue_events( - missing_events_ids, - check_redacted=check_redacted, - allow_rejected=allow_rejected, - ) - - event_entry_map.update(missing_events) - - events = [] - for event_id in event_id_list: - entry = event_entry_map.get(event_id, None) - if not entry: - continue - - if allow_rejected or not entry.event.rejected_reason: - if check_redacted and entry.redacted_event: - event = entry.redacted_event - else: - event = entry.event - - events.append(event) - - if get_prev_content: - if "replaces_state" in event.unsigned: - prev = yield self.get_event( - event.unsigned["replaces_state"], - get_prev_content=False, - allow_none=True, - ) - if prev: - event.unsigned = dict(event.unsigned) - event.unsigned["prev_content"] = prev.content - event.unsigned["prev_sender"] = prev.sender - - defer.returnValue(events) - - def _invalidate_get_event_cache(self, event_id): - self._get_event_cache.invalidate((event_id,)) - - def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): - """Fetch events from the caches - - Args: - events (list(str)): list of event_ids to fetch - allow_rejected (bool): Whether to teturn events that were rejected - update_metrics (bool): Whether to update the cache hit ratio metrics - - Returns: - dict of event_id -> _EventCacheEntry for each event_id in cache. If - allow_rejected is `False` then there will still be an entry but it - will be `None` - """ - event_map = {} - - for event_id in events: - ret = self._get_event_cache.get( - (event_id,), None, - update_metrics=update_metrics, - ) - if not ret: - continue - - if allow_rejected or not ret.event.rejected_reason: - event_map[event_id] = ret - else: - event_map[event_id] = None - - return event_map - - def _do_fetch(self, conn): - """Takes a database connection and waits for requests for events from - the _event_fetch_list queue. - """ - event_list = [] - i = 0 - while True: - try: - with self._event_fetch_lock: - event_list = self._event_fetch_list - self._event_fetch_list = [] - - if not event_list: - single_threaded = self.database_engine.single_threaded - if single_threaded or i > EVENT_QUEUE_ITERATIONS: - self._event_fetch_ongoing -= 1 - return - else: - self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) - i += 1 - continue - i = 0 - - event_id_lists = zip(*event_list)[0] - event_ids = [ - item for sublist in event_id_lists for item in sublist - ] - - rows = self._new_transaction( - conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids - ) - - row_dict = { - r["event_id"]: r - for r in rows - } - - # We only want to resolve deferreds from the main thread - def fire(lst, res): - for ids, d in lst: - if not d.called: - try: - with PreserveLoggingContext(): - d.callback([ - res[i] - for i in ids - if i in res - ]) - except Exception: - logger.exception("Failed to callback") - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list, row_dict) - except Exception as e: - logger.exception("do_fetch") - - # We only want to resolve deferreds from the main thread - def fire(evs): - for _, d in evs: - if not d.called: - with PreserveLoggingContext(): - d.errback(e) - - if event_list: - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list) - - @defer.inlineCallbacks - def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): - """Fetches events from the database using the _event_fetch_list. This - allows batch and bulk fetching of events - it allows us to fetch events - without having to create a new transaction for each request for events. - """ - if not events: - defer.returnValue({}) - - events_d = defer.Deferred() - with self._event_fetch_lock: - self._event_fetch_list.append( - (events, events_d) - ) - - self._event_fetch_lock.notify() - - if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: - self._event_fetch_ongoing += 1 - should_start = True - else: - should_start = False - - if should_start: - with PreserveLoggingContext(): - self.runWithConnection( - self._do_fetch - ) - - logger.debug("Loading %d events", len(events)) - with PreserveLoggingContext(): - rows = yield events_d - logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) - - if not allow_rejected: - rows[:] = [r for r in rows if not r["rejects"]] - - res = yield make_deferred_yieldable(defer.gatherResults( - [ - preserve_fn(self._get_event_from_row)( - row["internal_metadata"], row["json"], row["redacts"], - rejected_reason=row["rejects"], - ) - for row in rows - ], - consumeErrors=True - )) - - defer.returnValue({ - e.event.event_id: e - for e in res if e - }) - - def _fetch_event_rows(self, txn, events): - rows = [] - N = 200 - for i in range(1 + len(events) / N): - evs = events[i * N:(i + 1) * N] - if not evs: - break - - sql = ( - "SELECT " - " e.event_id as event_id, " - " e.internal_metadata," - " e.json," - " r.redacts as redacts," - " rej.event_id as rejects " - " FROM event_json as e" - " LEFT JOIN rejections as rej USING (event_id)" - " LEFT JOIN redactions as r ON e.event_id = r.redacts" - " WHERE e.event_id IN (%s)" - ) % (",".join(["?"] * len(evs)),) - - txn.execute(sql, evs) - rows.extend(self.cursor_to_dict(txn)) - - return rows - - @defer.inlineCallbacks - def _get_event_from_row(self, internal_metadata, js, redacted, - rejected_reason=None): - with Measure(self._clock, "_get_event_from_row"): - d = json.loads(js) - internal_metadata = json.loads(internal_metadata) - - if rejected_reason: - rejected_reason = yield self._simple_select_one_onecol( - table="rejections", - keyvalues={"event_id": rejected_reason}, - retcol="reason", - desc="_get_event_from_row_rejected_reason", - ) - - original_ev = FrozenEvent( - d, - internal_metadata_dict=internal_metadata, - rejected_reason=rejected_reason, - ) - - redacted_event = None - if redacted: - redacted_event = prune_event(original_ev) - - redaction_id = yield self._simple_select_one_onecol( - table="redactions", - keyvalues={"redacts": redacted_event.event_id}, - retcol="event_id", - desc="_get_event_from_row_redactions", - ) - - redacted_event.unsigned["redacted_by"] = redaction_id - # Get the redaction event. - - because = yield self.get_event( - redaction_id, - check_redacted=False, - allow_none=True, - ) - - if because: - # It's fine to do add the event directly, since get_pdu_json - # will serialise this field correctly - redacted_event.unsigned["redacted_because"] = because - - cache_entry = _EventCacheEntry( - event=original_ev, - redacted_event=redacted_event, - ) - - self._get_event_cache.prefill((original_ev.event_id,), cache_entry) - - defer.returnValue(cache_entry) - @defer.inlineCallbacks def count_daily_messages(self): """ From 46244b27591f8674364ddefa9ae62cecb161fea3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:20:42 +0000 Subject: [PATCH 123/200] Split AS stores --- .../replication/slave/storage/appservice.py | 34 ++++--------------- synapse/storage/appservice.py | 26 +++++++++----- 2 files changed, 23 insertions(+), 37 deletions(-) diff --git a/synapse/replication/slave/storage/appservice.py b/synapse/replication/slave/storage/appservice.py index 0d3f31a50..3b9ded009 100644 --- a/synapse/replication/slave/storage/appservice.py +++ b/synapse/replication/slave/storage/appservice.py @@ -13,33 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import BaseSlavedStore -from synapse.storage import DataStore -from synapse.config.appservice import load_appservices -from synapse.storage.appservice import _make_exclusive_regex +from synapse.storage.appservice import ( + ApplicationServiceWorkerStore, ApplicationServiceTransactionWorkerStore, +) -class SlavedApplicationServiceStore(BaseSlavedStore): - def __init__(self, db_conn, hs): - super(SlavedApplicationServiceStore, self).__init__(db_conn, hs) - self.services_cache = load_appservices( - hs.config.server_name, - hs.config.app_service_config_files - ) - self.exclusive_user_regex = _make_exclusive_regex(self.services_cache) - - get_app_service_by_token = DataStore.get_app_service_by_token.__func__ - get_app_service_by_user_id = DataStore.get_app_service_by_user_id.__func__ - get_app_services = DataStore.get_app_services.__func__ - get_new_events_for_appservice = DataStore.get_new_events_for_appservice.__func__ - create_appservice_txn = DataStore.create_appservice_txn.__func__ - get_appservices_by_state = DataStore.get_appservices_by_state.__func__ - get_oldest_unsent_txn = DataStore.get_oldest_unsent_txn.__func__ - _get_last_txn = DataStore._get_last_txn.__func__ - complete_appservice_txn = DataStore.complete_appservice_txn.__func__ - get_appservice_state = DataStore.get_appservice_state.__func__ - set_appservice_last_pos = DataStore.set_appservice_last_pos.__func__ - set_appservice_state = DataStore.set_appservice_state.__func__ - get_if_app_services_interested_in_user = ( - DataStore.get_if_app_services_interested_in_user.__func__ - ) +class SlavedApplicationServiceStore(ApplicationServiceTransactionWorkerStore, + ApplicationServiceWorkerStore): + pass diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 79673b427..f66cc9809 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -20,6 +20,7 @@ from twisted.internet import defer from synapse.api.constants import Membership from synapse.appservice import AppServiceTransaction from synapse.config.appservice import load_appservices +from synapse.storage.events import EventsWorkerStore from synapse.storage.roommember import RoomsForUser from ._base import SQLBaseStore @@ -46,17 +47,16 @@ def _make_exclusive_regex(services_cache): return exclusive_user_regex -class ApplicationServiceStore(SQLBaseStore): - +class ApplicationServiceWorkerStore(SQLBaseStore): def __init__(self, db_conn, hs): - super(ApplicationServiceStore, self).__init__(db_conn, hs) - self.hostname = hs.hostname self.services_cache = load_appservices( hs.hostname, hs.config.app_service_config_files ) self.exclusive_user_regex = _make_exclusive_regex(self.services_cache) + super(ApplicationServiceWorkerStore, self).__init__(db_conn, hs) + def get_app_services(self): return self.services_cache @@ -112,6 +112,13 @@ class ApplicationServiceStore(SQLBaseStore): return service return None + +class ApplicationServiceStore(ApplicationServiceWorkerStore): + + def __init__(self, db_conn, hs): + super(ApplicationServiceStore, self).__init__(db_conn, hs) + self.hostname = hs.hostname + def get_app_service_rooms(self, service): """Get a list of RoomsForUser for this application service. @@ -184,11 +191,8 @@ class ApplicationServiceStore(SQLBaseStore): return rooms_for_user_matching_user_id -class ApplicationServiceTransactionStore(SQLBaseStore): - - def __init__(self, db_conn, hs): - super(ApplicationServiceTransactionStore, self).__init__(db_conn, hs) - +class ApplicationServiceTransactionWorkerStore(ApplicationServiceWorkerStore, + EventsWorkerStore): @defer.inlineCallbacks def get_appservices_by_state(self, state): """Get a list of application services based on their state. @@ -433,3 +437,7 @@ class ApplicationServiceTransactionStore(SQLBaseStore): events = yield self._get_events(event_ids) defer.returnValue((upper_bound, events)) + + +class ApplicationServiceTransactionStore(ApplicationServiceTransactionWorkerStore): + pass From 3dec9c66b3c2af1bf5b7283d7db443b65ebbd8a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 12:01:08 +0000 Subject: [PATCH 124/200] Split out RoomMemberStore --- synapse/replication/slave/storage/events.py | 30 +- synapse/storage/roommember.py | 360 ++++++++++---------- 2 files changed, 184 insertions(+), 206 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 74a81a0a5..0dc87aee7 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -19,7 +19,7 @@ from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore from synapse.storage.events import EventsWorkerStore -from synapse.storage.roommember import RoomMemberStore +from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore from synapse.storage.signatures import SignatureStore @@ -39,7 +39,8 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore): +class SlavedEventStore(RoomMemberWorkerStore, EventsWorkerStore, + StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) @@ -69,18 +70,9 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore # Cached functions can't be accessed through a class instance so we need # to reach inside the __dict__ to extract them. - get_rooms_for_user = RoomMemberStore.__dict__["get_rooms_for_user"] - get_users_in_room = RoomMemberStore.__dict__["get_users_in_room"] - get_hosts_in_room = RoomMemberStore.__dict__["get_hosts_in_room"] - get_users_who_share_room_with_user = ( - RoomMemberStore.__dict__["get_users_who_share_room_with_user"] - ) get_latest_event_ids_in_room = EventFederationStore.__dict__[ "get_latest_event_ids_in_room" ] - get_invited_rooms_for_user = RoomMemberStore.__dict__[ - "get_invited_rooms_for_user" - ] get_unread_event_push_actions_by_room_for_user = ( EventPushActionsStore.__dict__["get_unread_event_push_actions_by_room_for_user"] ) @@ -93,7 +85,6 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore get_recent_event_ids_for_room = ( StreamStore.__dict__["get_recent_event_ids_for_room"] ) - _get_joined_hosts_cache = RoomMemberStore.__dict__["_get_joined_hosts_cache"] has_room_changed_since = DataStore.has_room_changed_since.__func__ get_unread_push_actions_for_user_in_range_for_http = ( @@ -105,9 +96,6 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore get_push_action_users_in_range = ( DataStore.get_push_action_users_in_range.__func__ ) - get_rooms_for_user_where_membership_is = ( - DataStore.get_rooms_for_user_where_membership_is.__func__ - ) get_membership_changes_for_user = ( DataStore.get_membership_changes_for_user.__func__ ) @@ -116,27 +104,15 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore DataStore.get_room_events_stream_for_room.__func__ ) get_events_around = DataStore.get_events_around.__func__ - get_joined_users_from_state = DataStore.get_joined_users_from_state.__func__ - get_joined_users_from_context = DataStore.get_joined_users_from_context.__func__ - _get_joined_users_from_context = ( - RoomMemberStore.__dict__["_get_joined_users_from_context"] - ) - - get_joined_hosts = DataStore.get_joined_hosts.__func__ - _get_joined_hosts = RoomMemberStore.__dict__["_get_joined_hosts"] get_recent_events_for_room = DataStore.get_recent_events_for_room.__func__ get_room_events_stream_for_rooms = ( DataStore.get_room_events_stream_for_rooms.__func__ ) - is_host_joined = RoomMemberStore.__dict__["is_host_joined"] get_stream_token_for_event = DataStore.get_stream_token_for_event.__func__ _set_before_and_after = staticmethod(DataStore._set_before_and_after) - _get_rooms_for_user_where_membership_is_txn = ( - DataStore._get_rooms_for_user_where_membership_is_txn.__func__ - ) _get_events_around_txn = DataStore._get_events_around_txn.__func__ get_backfill_events = DataStore.get_backfill_events.__func__ diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 3e77fd390..6574fe74b 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -17,7 +17,7 @@ from twisted.internet import defer from collections import namedtuple -from ._base import SQLBaseStore +from synapse.storage.events import EventsWorkerStore from synapse.util.async import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -48,97 +48,7 @@ ProfileInfo = namedtuple( _MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update" -class RoomMemberStore(SQLBaseStore): - def __init__(self, db_conn, hs): - super(RoomMemberStore, self).__init__(db_conn, hs) - self.register_background_update_handler( - _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile - ) - - def _store_room_members_txn(self, txn, events, backfilled): - """Store a room member in the database. - """ - self._simple_insert_many_txn( - txn, - table="room_memberships", - values=[ - { - "event_id": event.event_id, - "user_id": event.state_key, - "sender": event.user_id, - "room_id": event.room_id, - "membership": event.membership, - "display_name": event.content.get("displayname", None), - "avatar_url": event.content.get("avatar_url", None), - } - for event in events - ] - ) - - for event in events: - txn.call_after( - self._membership_stream_cache.entity_has_changed, - event.state_key, event.internal_metadata.stream_ordering - ) - txn.call_after( - self.get_invited_rooms_for_user.invalidate, (event.state_key,) - ) - - # We update the local_invites table only if the event is "current", - # i.e., its something that has just happened. - # The only current event that can also be an outlier is if its an - # invite that has come in across federation. - is_new_state = not backfilled and ( - not event.internal_metadata.is_outlier() - or event.internal_metadata.is_invite_from_remote() - ) - is_mine = self.hs.is_mine_id(event.state_key) - if is_new_state and is_mine: - if event.membership == Membership.INVITE: - self._simple_insert_txn( - txn, - table="local_invites", - values={ - "event_id": event.event_id, - "invitee": event.state_key, - "inviter": event.sender, - "room_id": event.room_id, - "stream_id": event.internal_metadata.stream_ordering, - } - ) - else: - sql = ( - "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - txn.execute(sql, ( - event.internal_metadata.stream_ordering, - event.event_id, - event.room_id, - event.state_key, - )) - - @defer.inlineCallbacks - def locally_reject_invite(self, user_id, room_id): - sql = ( - "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - def f(txn, stream_ordering): - txn.execute(sql, ( - stream_ordering, - True, - room_id, - user_id, - )) - - with self._stream_id_gen.get_next() as stream_ordering: - yield self.runInteraction("locally_reject_invite", f, stream_ordering) - +class RoomMemberWorkerStore(EventsWorkerStore): @cachedInlineCallbacks(max_entries=100000, iterable=True, cache_context=True) def get_hosts_in_room(self, room_id, cache_context): """Returns the set of all hosts currently in the room @@ -295,89 +205,6 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(user_who_share_room) - def forget(self, user_id, room_id): - """Indicate that user_id wishes to discard history for room_id.""" - def f(txn): - sql = ( - "UPDATE" - " room_memberships" - " SET" - " forgotten = 1" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - ) - txn.execute(sql, (user_id, room_id)) - - txn.call_after(self.was_forgotten_at.invalidate_all) - txn.call_after(self.did_forget.invalidate, (user_id, room_id)) - self._invalidate_cache_and_stream( - txn, self.who_forgot_in_room, (room_id,) - ) - return self.runInteraction("forget_membership", f) - - @cachedInlineCallbacks(num_args=2) - def did_forget(self, user_id, room_id): - """Returns whether user_id has elected to discard history for room_id. - - Returns False if they have since re-joined.""" - def f(txn): - sql = ( - "SELECT" - " COUNT(*)" - " FROM" - " room_memberships" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - " AND" - " forgotten = 0" - ) - txn.execute(sql, (user_id, room_id)) - rows = txn.fetchall() - return rows[0][0] - count = yield self.runInteraction("did_forget_membership", f) - defer.returnValue(count == 0) - - @cachedInlineCallbacks(num_args=3) - def was_forgotten_at(self, user_id, room_id, event_id): - """Returns whether user_id has elected to discard history for room_id at - event_id. - - event_id must be a membership event.""" - def f(txn): - sql = ( - "SELECT" - " forgotten" - " FROM" - " room_memberships" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - " AND" - " event_id = ?" - ) - txn.execute(sql, (user_id, room_id, event_id)) - rows = txn.fetchall() - return rows[0][0] - forgot = yield self.runInteraction("did_forget_membership_at", f) - defer.returnValue(forgot == 1) - - @cached() - def who_forgot_in_room(self, room_id): - return self._simple_select_list( - table="room_memberships", - retcols=("user_id", "event_id"), - keyvalues={ - "room_id": room_id, - "forgotten": 1, - }, - desc="who_forgot" - ) - def get_joined_users_from_context(self, event, context): state_group = context.state_group if not state_group: @@ -600,6 +427,185 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(joined_hosts) + @cached(max_entries=10000, iterable=True) + def _get_joined_hosts_cache(self, room_id): + return _JoinedHostsCache(self, room_id) + + +class RoomMemberStore(RoomMemberWorkerStore): + def __init__(self, db_conn, hs): + super(RoomMemberStore, self).__init__(db_conn, hs) + self.register_background_update_handler( + _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile + ) + + def _store_room_members_txn(self, txn, events, backfilled): + """Store a room member in the database. + """ + self._simple_insert_many_txn( + txn, + table="room_memberships", + values=[ + { + "event_id": event.event_id, + "user_id": event.state_key, + "sender": event.user_id, + "room_id": event.room_id, + "membership": event.membership, + "display_name": event.content.get("displayname", None), + "avatar_url": event.content.get("avatar_url", None), + } + for event in events + ] + ) + + for event in events: + txn.call_after( + self._membership_stream_cache.entity_has_changed, + event.state_key, event.internal_metadata.stream_ordering + ) + txn.call_after( + self.get_invited_rooms_for_user.invalidate, (event.state_key,) + ) + + # We update the local_invites table only if the event is "current", + # i.e., its something that has just happened. + # The only current event that can also be an outlier is if its an + # invite that has come in across federation. + is_new_state = not backfilled and ( + not event.internal_metadata.is_outlier() + or event.internal_metadata.is_invite_from_remote() + ) + is_mine = self.hs.is_mine_id(event.state_key) + if is_new_state and is_mine: + if event.membership == Membership.INVITE: + self._simple_insert_txn( + txn, + table="local_invites", + values={ + "event_id": event.event_id, + "invitee": event.state_key, + "inviter": event.sender, + "room_id": event.room_id, + "stream_id": event.internal_metadata.stream_ordering, + } + ) + else: + sql = ( + "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + txn.execute(sql, ( + event.internal_metadata.stream_ordering, + event.event_id, + event.room_id, + event.state_key, + )) + + @defer.inlineCallbacks + def locally_reject_invite(self, user_id, room_id): + sql = ( + "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + def f(txn, stream_ordering): + txn.execute(sql, ( + stream_ordering, + True, + room_id, + user_id, + )) + + with self._stream_id_gen.get_next() as stream_ordering: + yield self.runInteraction("locally_reject_invite", f, stream_ordering) + + def forget(self, user_id, room_id): + """Indicate that user_id wishes to discard history for room_id.""" + def f(txn): + sql = ( + "UPDATE" + " room_memberships" + " SET" + " forgotten = 1" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + ) + txn.execute(sql, (user_id, room_id)) + + txn.call_after(self.was_forgotten_at.invalidate_all) + txn.call_after(self.did_forget.invalidate, (user_id, room_id)) + self._invalidate_cache_and_stream( + txn, self.who_forgot_in_room, (room_id,) + ) + return self.runInteraction("forget_membership", f) + + @cachedInlineCallbacks(num_args=2) + def did_forget(self, user_id, room_id): + """Returns whether user_id has elected to discard history for room_id. + + Returns False if they have since re-joined.""" + def f(txn): + sql = ( + "SELECT" + " COUNT(*)" + " FROM" + " room_memberships" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + " AND" + " forgotten = 0" + ) + txn.execute(sql, (user_id, room_id)) + rows = txn.fetchall() + return rows[0][0] + count = yield self.runInteraction("did_forget_membership", f) + defer.returnValue(count == 0) + + @cachedInlineCallbacks(num_args=3) + def was_forgotten_at(self, user_id, room_id, event_id): + """Returns whether user_id has elected to discard history for room_id at + event_id. + + event_id must be a membership event.""" + def f(txn): + sql = ( + "SELECT" + " forgotten" + " FROM" + " room_memberships" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + " AND" + " event_id = ?" + ) + txn.execute(sql, (user_id, room_id, event_id)) + rows = txn.fetchall() + return rows[0][0] + forgot = yield self.runInteraction("did_forget_membership_at", f) + defer.returnValue(forgot == 1) + + @cached() + def who_forgot_in_room(self, room_id): + return self._simple_select_list( + table="room_memberships", + retcols=("user_id", "event_id"), + keyvalues={ + "room_id": room_id, + "forgotten": 1, + }, + desc="who_forgot" + ) + @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): target_min_stream_id = progress.get( @@ -675,10 +681,6 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(result) - @cached(max_entries=10000, iterable=True) - def _get_joined_hosts_cache(self, room_id): - return _JoinedHostsCache(self, room_id) - class _JoinedHostsCache(object): """Cache for joined hosts in a room that is optimised to handle updates From faeb369f158a3ca6ba8f48ca1d551b2b53f4c53a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 15:19:54 +0000 Subject: [PATCH 125/200] Fix missing invalidations for receipt storage --- synapse/replication/slave/storage/receipts.py | 2 ++ synapse/storage/receipts.py | 28 +++++++++---------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index f0e29e983..1647072f6 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -53,6 +53,8 @@ class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): self.get_last_receipt_event_id_for_user.invalidate( (user_id, room_id, receipt_type) ) + self._invalidate_get_users_with_receipts_in_room(room_id, receipt_type, user_id) + self.get_receipts_for_room.invalidate((room_id, receipt_type)) def process_replication_rows(self, stream_name, token, rows): if stream_name == "receipts": diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 40530632c..eac8694e0 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -292,20 +292,6 @@ class ReceiptsWorkerStore(SQLBaseStore): "get_all_updated_receipts", get_all_updated_receipts_txn ) - -class ReceiptsStore(ReceiptsWorkerStore): - def __init__(self, db_conn, hs): - # We instantiate this first as the ReceiptsWorkerStore constructor - # needs to be able to call get_max_receipt_stream_id - self._receipts_id_gen = StreamIdGenerator( - db_conn, "receipts_linearized", "stream_id" - ) - - super(ReceiptsStore, self).__init__(db_conn, hs) - - def get_max_receipt_stream_id(self): - return self._receipts_id_gen.get_current_token() - def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, user_id): if receipt_type != "m.read": @@ -326,6 +312,20 @@ class ReceiptsStore(ReceiptsWorkerStore): self.get_users_with_read_receipts_in_room.invalidate((room_id,)) + +class ReceiptsStore(ReceiptsWorkerStore): + def __init__(self, db_conn, hs): + # We instantiate this first as the ReceiptsWorkerStore constructor + # needs to be able to call get_max_receipt_stream_id + self._receipts_id_gen = StreamIdGenerator( + db_conn, "receipts_linearized", "stream_id" + ) + + super(ReceiptsStore, self).__init__(db_conn, hs) + + def get_max_receipt_stream_id(self): + return self._receipts_id_gen.get_current_token() + def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, data, stream_id): txn.call_after( From bb73f55fc6559658080d6cdd5672506fda7843ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:31:16 +0000 Subject: [PATCH 126/200] Use absolute imports --- synapse/replication/slave/storage/account_data.py | 4 ++-- synapse/storage/account_data.py | 4 ++-- synapse/storage/tags.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 6c95261aa..f76dc5a56 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -13,8 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import BaseSlavedStore -from ._slaved_id_tracker import SlavedIdTracker +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.storage.account_data import AccountDataWorkerStore from synapse.storage.tags import TagsWorkerStore diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 66fed4bdc..0d6acbd9a 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -15,8 +15,8 @@ from twisted.internet import defer -from ._base import SQLBaseStore -from .util.id_generators import StreamIdGenerator +from synapse.storage._base import SQLBaseStore +from synapse.storage.util.id_generators import StreamIdGenerator from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.caches.descriptors import cached, cachedList, cachedInlineCallbacks diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 484d66991..a8d0bf80c 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .account_data import AccountDataWorkerStore +from synapse.storage.account_data import AccountDataWorkerStore from synapse.util.caches.descriptors import cached from twisted.internet import defer From 26d37f7a63cf33ac8e3f8346a6e982c7b2f6cbb1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:33:55 +0000 Subject: [PATCH 127/200] Update copyright --- synapse/replication/slave/storage/account_data.py | 1 + synapse/storage/account_data.py | 1 + synapse/storage/tags.py | 1 + 3 files changed, 3 insertions(+) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index f76dc5a56..6c8d2954d 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 0d6acbd9a..466194e96 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index a8d0bf80c..fc46bf7bb 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 7e6cf89dc2a7fc3c159459b34562719ab61713ba Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:39:19 +0000 Subject: [PATCH 128/200] Update copyright --- synapse/replication/slave/storage/push_rule.py | 1 + synapse/storage/__init__.py | 1 + synapse/storage/push_rule.py | 1 + 3 files changed, 3 insertions(+) diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index 0e3d9a87d..bb2c40b6e 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index cd2759858..26faf985b 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index b35bd7a64..583efb7bd 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From c2ecfcc3a4374d47db38465510e4c480da353ebb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:41:34 +0000 Subject: [PATCH 129/200] Update copyright --- synapse/replication/slave/storage/pushers.py | 1 + synapse/storage/pusher.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py index e352c3235..a7cd5a729 100644 --- a/synapse/replication/slave/storage/pushers.py +++ b/synapse/replication/slave/storage/pushers.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index b0159c70c..f4af3e4ca 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From eb9b5eec819a2d594bc237652d831e64517d0c93 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:42:39 +0000 Subject: [PATCH 130/200] Update copyright --- synapse/storage/event_push_actions.py | 1 + synapse/storage/events.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 124583835..4cabf70ad 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 73177e0bc..c636da4b7 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 3bd760628bc0b178f6709b9ea3439a44ebcebab2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:49:18 +0000 Subject: [PATCH 131/200] _event_persist_queue shouldn't be in worker store --- synapse/storage/events.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 681a33314..32da81c47 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -200,10 +200,6 @@ def _retry_on_integrity_error(func): class EventsWorkerStore(SQLBaseStore): - def __init__(self, db_conn, hs): - super(EventsWorkerStore, self).__init__(db_conn, hs) - - self._event_persist_queue = _EventPeristenceQueue() @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, @@ -583,6 +579,10 @@ class EventsStore(EventsWorkerStore): psql_only=True, ) + + + self._event_persist_queue = _EventPeristenceQueue() + self._state_resolution_handler = hs.get_state_resolution_handler() def persist_events(self, events_and_contexts, backfilled=False): From 5d0f6658489287dc19ce9e089fc61b06208f3fc4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:49:58 +0000 Subject: [PATCH 132/200] Remove redundant clock --- synapse/storage/events.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 32da81c47..f6aa3612e 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -550,7 +550,6 @@ class EventsStore(EventsWorkerStore): def __init__(self, db_conn, hs): super(EventsStore, self).__init__(db_conn, hs) - self._clock = hs.get_clock() self.register_background_update_handler( self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts ) @@ -579,8 +578,6 @@ class EventsStore(EventsWorkerStore): psql_only=True, ) - - self._event_persist_queue = _EventPeristenceQueue() self._state_resolution_handler = hs.get_state_resolution_handler() From bf8a36e0805a1626d335f78ddb90ba25220bbfa1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:52:10 +0000 Subject: [PATCH 133/200] Update copyright --- synapse/replication/slave/storage/events.py | 1 + synapse/storage/events.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 74a81a0a5..f35cba289 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index f6aa3612e..84a6f6782 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From eba93b05bfaa1e6cb5bd66621021f6fff750ab97 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 11:01:21 +0000 Subject: [PATCH 134/200] Split EventsWorkerStore into separate file --- synapse/replication/slave/storage/events.py | 2 +- synapse/storage/events.py | 365 +----------------- synapse/storage/events_worker.py | 395 ++++++++++++++++++++ 3 files changed, 401 insertions(+), 361 deletions(-) create mode 100644 synapse/storage/events_worker.py diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f35cba289..5edfacc9e 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -19,7 +19,7 @@ from synapse.api.constants import EventTypes from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore -from synapse.storage.events import EventsWorkerStore +from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 84a6f6782..99d6cca58 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -13,16 +13,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore -from twisted.internet import defer, reactor +from synapse.storage.events_worker import EventsWorkerStore -from synapse.events import FrozenEvent, USE_FROZEN_DICTS -from synapse.events.utils import prune_event +from twisted.internet import defer + +from synapse.events import USE_FROZEN_DICTS from synapse.util.async import ObservableDeferred from synapse.util.logcontext import ( - preserve_fn, PreserveLoggingContext, make_deferred_yieldable + PreserveLoggingContext, make_deferred_yieldable ) from synapse.util.logutils import log_function from synapse.util.metrics import Measure @@ -62,16 +62,6 @@ def encode_json(json_object): return json.dumps(json_object, ensure_ascii=False) -# These values are used in the `enqueus_event` and `_do_fetch` methods to -# control how we batch/bulk fetch events from the database. -# The values are plucked out of thing air to make initial sync run faster -# on jki.re -# TODO: Make these configurable. -EVENT_QUEUE_THREADS = 3 # Max number of threads that will fetch events -EVENT_QUEUE_ITERATIONS = 3 # No. times we block waiting for requests for events -EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events - - class _EventPeristenceQueue(object): """Queues up events so that they can be persisted in bulk with only one concurrent transaction per room. @@ -200,351 +190,6 @@ def _retry_on_integrity_error(func): return f -class EventsWorkerStore(SQLBaseStore): - - @defer.inlineCallbacks - def get_event(self, event_id, check_redacted=True, - get_prev_content=False, allow_rejected=False, - allow_none=False): - """Get an event from the database by event_id. - - Args: - event_id (str): The event_id of the event to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - allow_none (bool): If True, return None if no event found, if - False throw an exception. - - Returns: - Deferred : A FrozenEvent. - """ - events = yield self._get_events( - [event_id], - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - if not events and not allow_none: - raise SynapseError(404, "Could not find event %s" % (event_id,)) - - defer.returnValue(events[0] if events else None) - - @defer.inlineCallbacks - def get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - """Get events from the database - - Args: - event_ids (list): The event_ids of the events to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - - Returns: - Deferred : Dict from event_id to event. - """ - events = yield self._get_events( - event_ids, - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - defer.returnValue({e.event_id: e for e in events}) - - @defer.inlineCallbacks - def _get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - if not event_ids: - defer.returnValue([]) - - event_id_list = event_ids - event_ids = set(event_ids) - - event_entry_map = self._get_events_from_cache( - event_ids, - allow_rejected=allow_rejected, - ) - - missing_events_ids = [e for e in event_ids if e not in event_entry_map] - - if missing_events_ids: - missing_events = yield self._enqueue_events( - missing_events_ids, - check_redacted=check_redacted, - allow_rejected=allow_rejected, - ) - - event_entry_map.update(missing_events) - - events = [] - for event_id in event_id_list: - entry = event_entry_map.get(event_id, None) - if not entry: - continue - - if allow_rejected or not entry.event.rejected_reason: - if check_redacted and entry.redacted_event: - event = entry.redacted_event - else: - event = entry.event - - events.append(event) - - if get_prev_content: - if "replaces_state" in event.unsigned: - prev = yield self.get_event( - event.unsigned["replaces_state"], - get_prev_content=False, - allow_none=True, - ) - if prev: - event.unsigned = dict(event.unsigned) - event.unsigned["prev_content"] = prev.content - event.unsigned["prev_sender"] = prev.sender - - defer.returnValue(events) - - def _invalidate_get_event_cache(self, event_id): - self._get_event_cache.invalidate((event_id,)) - - def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): - """Fetch events from the caches - - Args: - events (list(str)): list of event_ids to fetch - allow_rejected (bool): Whether to teturn events that were rejected - update_metrics (bool): Whether to update the cache hit ratio metrics - - Returns: - dict of event_id -> _EventCacheEntry for each event_id in cache. If - allow_rejected is `False` then there will still be an entry but it - will be `None` - """ - event_map = {} - - for event_id in events: - ret = self._get_event_cache.get( - (event_id,), None, - update_metrics=update_metrics, - ) - if not ret: - continue - - if allow_rejected or not ret.event.rejected_reason: - event_map[event_id] = ret - else: - event_map[event_id] = None - - return event_map - - def _do_fetch(self, conn): - """Takes a database connection and waits for requests for events from - the _event_fetch_list queue. - """ - event_list = [] - i = 0 - while True: - try: - with self._event_fetch_lock: - event_list = self._event_fetch_list - self._event_fetch_list = [] - - if not event_list: - single_threaded = self.database_engine.single_threaded - if single_threaded or i > EVENT_QUEUE_ITERATIONS: - self._event_fetch_ongoing -= 1 - return - else: - self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) - i += 1 - continue - i = 0 - - event_id_lists = zip(*event_list)[0] - event_ids = [ - item for sublist in event_id_lists for item in sublist - ] - - rows = self._new_transaction( - conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids - ) - - row_dict = { - r["event_id"]: r - for r in rows - } - - # We only want to resolve deferreds from the main thread - def fire(lst, res): - for ids, d in lst: - if not d.called: - try: - with PreserveLoggingContext(): - d.callback([ - res[i] - for i in ids - if i in res - ]) - except Exception: - logger.exception("Failed to callback") - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list, row_dict) - except Exception as e: - logger.exception("do_fetch") - - # We only want to resolve deferreds from the main thread - def fire(evs): - for _, d in evs: - if not d.called: - with PreserveLoggingContext(): - d.errback(e) - - if event_list: - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list) - - @defer.inlineCallbacks - def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): - """Fetches events from the database using the _event_fetch_list. This - allows batch and bulk fetching of events - it allows us to fetch events - without having to create a new transaction for each request for events. - """ - if not events: - defer.returnValue({}) - - events_d = defer.Deferred() - with self._event_fetch_lock: - self._event_fetch_list.append( - (events, events_d) - ) - - self._event_fetch_lock.notify() - - if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: - self._event_fetch_ongoing += 1 - should_start = True - else: - should_start = False - - if should_start: - with PreserveLoggingContext(): - self.runWithConnection( - self._do_fetch - ) - - logger.debug("Loading %d events", len(events)) - with PreserveLoggingContext(): - rows = yield events_d - logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) - - if not allow_rejected: - rows[:] = [r for r in rows if not r["rejects"]] - - res = yield make_deferred_yieldable(defer.gatherResults( - [ - preserve_fn(self._get_event_from_row)( - row["internal_metadata"], row["json"], row["redacts"], - rejected_reason=row["rejects"], - ) - for row in rows - ], - consumeErrors=True - )) - - defer.returnValue({ - e.event.event_id: e - for e in res if e - }) - - def _fetch_event_rows(self, txn, events): - rows = [] - N = 200 - for i in range(1 + len(events) / N): - evs = events[i * N:(i + 1) * N] - if not evs: - break - - sql = ( - "SELECT " - " e.event_id as event_id, " - " e.internal_metadata," - " e.json," - " r.redacts as redacts," - " rej.event_id as rejects " - " FROM event_json as e" - " LEFT JOIN rejections as rej USING (event_id)" - " LEFT JOIN redactions as r ON e.event_id = r.redacts" - " WHERE e.event_id IN (%s)" - ) % (",".join(["?"] * len(evs)),) - - txn.execute(sql, evs) - rows.extend(self.cursor_to_dict(txn)) - - return rows - - @defer.inlineCallbacks - def _get_event_from_row(self, internal_metadata, js, redacted, - rejected_reason=None): - with Measure(self._clock, "_get_event_from_row"): - d = json.loads(js) - internal_metadata = json.loads(internal_metadata) - - if rejected_reason: - rejected_reason = yield self._simple_select_one_onecol( - table="rejections", - keyvalues={"event_id": rejected_reason}, - retcol="reason", - desc="_get_event_from_row_rejected_reason", - ) - - original_ev = FrozenEvent( - d, - internal_metadata_dict=internal_metadata, - rejected_reason=rejected_reason, - ) - - redacted_event = None - if redacted: - redacted_event = prune_event(original_ev) - - redaction_id = yield self._simple_select_one_onecol( - table="redactions", - keyvalues={"redacts": redacted_event.event_id}, - retcol="event_id", - desc="_get_event_from_row_redactions", - ) - - redacted_event.unsigned["redacted_by"] = redaction_id - # Get the redaction event. - - because = yield self.get_event( - redaction_id, - check_redacted=False, - allow_none=True, - ) - - if because: - # It's fine to do add the event directly, since get_pdu_json - # will serialise this field correctly - redacted_event.unsigned["redacted_because"] = because - - cache_entry = _EventCacheEntry( - event=original_ev, - redacted_event=redacted_event, - ) - - self._get_event_cache.prefill((original_ev.event_id,), cache_entry) - - defer.returnValue(cache_entry) - - class EventsStore(EventsWorkerStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py new file mode 100644 index 000000000..86c3b48ad --- /dev/null +++ b/synapse/storage/events_worker.py @@ -0,0 +1,395 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ._base import SQLBaseStore + +from twisted.internet import defer, reactor + +from synapse.events import FrozenEvent +from synapse.events.utils import prune_event + +from synapse.util.logcontext import ( + preserve_fn, PreserveLoggingContext, make_deferred_yieldable +) +from synapse.util.metrics import Measure +from synapse.api.errors import SynapseError + +from collections import namedtuple + +import logging +import ujson as json + +# these are only included to make the type annotations work +from synapse.events import EventBase # noqa: F401 +from synapse.events.snapshot import EventContext # noqa: F401 + +logger = logging.getLogger(__name__) + + +# These values are used in the `enqueus_event` and `_do_fetch` methods to +# control how we batch/bulk fetch events from the database. +# The values are plucked out of thing air to make initial sync run faster +# on jki.re +# TODO: Make these configurable. +EVENT_QUEUE_THREADS = 3 # Max number of threads that will fetch events +EVENT_QUEUE_ITERATIONS = 3 # No. times we block waiting for requests for events +EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events + + +_EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event")) + + +class EventsWorkerStore(SQLBaseStore): + + @defer.inlineCallbacks + def get_event(self, event_id, check_redacted=True, + get_prev_content=False, allow_rejected=False, + allow_none=False): + """Get an event from the database by event_id. + + Args: + event_id (str): The event_id of the event to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + allow_none (bool): If True, return None if no event found, if + False throw an exception. + + Returns: + Deferred : A FrozenEvent. + """ + events = yield self._get_events( + [event_id], + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + if not events and not allow_none: + raise SynapseError(404, "Could not find event %s" % (event_id,)) + + defer.returnValue(events[0] if events else None) + + @defer.inlineCallbacks + def get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + """Get events from the database + + Args: + event_ids (list): The event_ids of the events to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + + Returns: + Deferred : Dict from event_id to event. + """ + events = yield self._get_events( + event_ids, + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + defer.returnValue({e.event_id: e for e in events}) + + @defer.inlineCallbacks + def _get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + if not event_ids: + defer.returnValue([]) + + event_id_list = event_ids + event_ids = set(event_ids) + + event_entry_map = self._get_events_from_cache( + event_ids, + allow_rejected=allow_rejected, + ) + + missing_events_ids = [e for e in event_ids if e not in event_entry_map] + + if missing_events_ids: + missing_events = yield self._enqueue_events( + missing_events_ids, + check_redacted=check_redacted, + allow_rejected=allow_rejected, + ) + + event_entry_map.update(missing_events) + + events = [] + for event_id in event_id_list: + entry = event_entry_map.get(event_id, None) + if not entry: + continue + + if allow_rejected or not entry.event.rejected_reason: + if check_redacted and entry.redacted_event: + event = entry.redacted_event + else: + event = entry.event + + events.append(event) + + if get_prev_content: + if "replaces_state" in event.unsigned: + prev = yield self.get_event( + event.unsigned["replaces_state"], + get_prev_content=False, + allow_none=True, + ) + if prev: + event.unsigned = dict(event.unsigned) + event.unsigned["prev_content"] = prev.content + event.unsigned["prev_sender"] = prev.sender + + defer.returnValue(events) + + def _invalidate_get_event_cache(self, event_id): + self._get_event_cache.invalidate((event_id,)) + + def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): + """Fetch events from the caches + + Args: + events (list(str)): list of event_ids to fetch + allow_rejected (bool): Whether to teturn events that were rejected + update_metrics (bool): Whether to update the cache hit ratio metrics + + Returns: + dict of event_id -> _EventCacheEntry for each event_id in cache. If + allow_rejected is `False` then there will still be an entry but it + will be `None` + """ + event_map = {} + + for event_id in events: + ret = self._get_event_cache.get( + (event_id,), None, + update_metrics=update_metrics, + ) + if not ret: + continue + + if allow_rejected or not ret.event.rejected_reason: + event_map[event_id] = ret + else: + event_map[event_id] = None + + return event_map + + def _do_fetch(self, conn): + """Takes a database connection and waits for requests for events from + the _event_fetch_list queue. + """ + event_list = [] + i = 0 + while True: + try: + with self._event_fetch_lock: + event_list = self._event_fetch_list + self._event_fetch_list = [] + + if not event_list: + single_threaded = self.database_engine.single_threaded + if single_threaded or i > EVENT_QUEUE_ITERATIONS: + self._event_fetch_ongoing -= 1 + return + else: + self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) + i += 1 + continue + i = 0 + + event_id_lists = zip(*event_list)[0] + event_ids = [ + item for sublist in event_id_lists for item in sublist + ] + + rows = self._new_transaction( + conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids + ) + + row_dict = { + r["event_id"]: r + for r in rows + } + + # We only want to resolve deferreds from the main thread + def fire(lst, res): + for ids, d in lst: + if not d.called: + try: + with PreserveLoggingContext(): + d.callback([ + res[i] + for i in ids + if i in res + ]) + except Exception: + logger.exception("Failed to callback") + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list, row_dict) + except Exception as e: + logger.exception("do_fetch") + + # We only want to resolve deferreds from the main thread + def fire(evs): + for _, d in evs: + if not d.called: + with PreserveLoggingContext(): + d.errback(e) + + if event_list: + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list) + + @defer.inlineCallbacks + def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): + """Fetches events from the database using the _event_fetch_list. This + allows batch and bulk fetching of events - it allows us to fetch events + without having to create a new transaction for each request for events. + """ + if not events: + defer.returnValue({}) + + events_d = defer.Deferred() + with self._event_fetch_lock: + self._event_fetch_list.append( + (events, events_d) + ) + + self._event_fetch_lock.notify() + + if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: + self._event_fetch_ongoing += 1 + should_start = True + else: + should_start = False + + if should_start: + with PreserveLoggingContext(): + self.runWithConnection( + self._do_fetch + ) + + logger.debug("Loading %d events", len(events)) + with PreserveLoggingContext(): + rows = yield events_d + logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) + + if not allow_rejected: + rows[:] = [r for r in rows if not r["rejects"]] + + res = yield make_deferred_yieldable(defer.gatherResults( + [ + preserve_fn(self._get_event_from_row)( + row["internal_metadata"], row["json"], row["redacts"], + rejected_reason=row["rejects"], + ) + for row in rows + ], + consumeErrors=True + )) + + defer.returnValue({ + e.event.event_id: e + for e in res if e + }) + + def _fetch_event_rows(self, txn, events): + rows = [] + N = 200 + for i in range(1 + len(events) / N): + evs = events[i * N:(i + 1) * N] + if not evs: + break + + sql = ( + "SELECT " + " e.event_id as event_id, " + " e.internal_metadata," + " e.json," + " r.redacts as redacts," + " rej.event_id as rejects " + " FROM event_json as e" + " LEFT JOIN rejections as rej USING (event_id)" + " LEFT JOIN redactions as r ON e.event_id = r.redacts" + " WHERE e.event_id IN (%s)" + ) % (",".join(["?"] * len(evs)),) + + txn.execute(sql, evs) + rows.extend(self.cursor_to_dict(txn)) + + return rows + + @defer.inlineCallbacks + def _get_event_from_row(self, internal_metadata, js, redacted, + rejected_reason=None): + with Measure(self._clock, "_get_event_from_row"): + d = json.loads(js) + internal_metadata = json.loads(internal_metadata) + + if rejected_reason: + rejected_reason = yield self._simple_select_one_onecol( + table="rejections", + keyvalues={"event_id": rejected_reason}, + retcol="reason", + desc="_get_event_from_row_rejected_reason", + ) + + original_ev = FrozenEvent( + d, + internal_metadata_dict=internal_metadata, + rejected_reason=rejected_reason, + ) + + redacted_event = None + if redacted: + redacted_event = prune_event(original_ev) + + redaction_id = yield self._simple_select_one_onecol( + table="redactions", + keyvalues={"redacts": redacted_event.event_id}, + retcol="event_id", + desc="_get_event_from_row_redactions", + ) + + redacted_event.unsigned["redacted_by"] = redaction_id + # Get the redaction event. + + because = yield self.get_event( + redaction_id, + check_redacted=False, + allow_none=True, + ) + + if because: + # It's fine to do add the event directly, since get_pdu_json + # will serialise this field correctly + redacted_event.unsigned["redacted_because"] = because + + cache_entry = _EventCacheEntry( + event=original_ev, + redacted_event=redacted_event, + ) + + self._get_event_cache.prefill((original_ev.event_id,), cache_entry) + + defer.returnValue(cache_entry) From 70349872c2dcff20a7b174bf0fcfdd5b8e47eec3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 11:14:35 +0000 Subject: [PATCH 135/200] Update copyright --- synapse/replication/slave/storage/events.py | 1 + synapse/storage/events.py | 1 + synapse/storage/roommember.py | 1 + 3 files changed, 3 insertions(+) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 0dc87aee7..ef7a42d80 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 681a33314..3e3229b40 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 6574fe74b..b9158b989 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 6ae9a3d2a6cd9db7e07fda270728cf15351a5a0b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:45:00 +0000 Subject: [PATCH 136/200] Update copyright --- synapse/replication/slave/storage/appservice.py | 1 + synapse/storage/appservice.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/replication/slave/storage/appservice.py b/synapse/replication/slave/storage/appservice.py index 3b9ded009..8cae3076f 100644 --- a/synapse/replication/slave/storage/appservice.py +++ b/synapse/replication/slave/storage/appservice.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index f66cc9809..063906f5a 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 8956f0147aecc27a0590eeeecd130d05b7a55767 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Feb 2018 10:06:51 +0000 Subject: [PATCH 137/200] Add comment --- synapse/storage/appservice.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 063906f5a..90fb51d43 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -441,4 +441,8 @@ class ApplicationServiceTransactionWorkerStore(ApplicationServiceWorkerStore, class ApplicationServiceTransactionStore(ApplicationServiceTransactionWorkerStore): + # This is currently empty due to there not being any AS storage functions + # that can't be run on the workers. Since this may change in future, and + # to keep consistency with the other stores, we keep this empty class for + # now. pass From 493e25d5545389264f696be0e07544bf82a0818a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Feb 2018 12:01:36 +0000 Subject: [PATCH 138/200] Move storage functions for push calculations This will allow push actions for an event to be calculated on workers. --- synapse/app/pusher.py | 5 - synapse/app/synchrotron.py | 8 +- synapse/storage/event_push_actions.py | 126 +++++++++++++------------- synapse/storage/push_rule.py | 14 ++- synapse/storage/pusher.py | 22 +++-- synapse/storage/roommember.py | 24 ++--- 6 files changed, 101 insertions(+), 98 deletions(-) diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 32ccea3f1..98a4a7c62 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -32,7 +32,6 @@ from synapse.replication.tcp.client import ReplicationClientHandler from synapse.server import HomeServer from synapse.storage import DataStore from synapse.storage.engines import create_engine -from synapse.storage.roommember import RoomMemberStore from synapse.util.httpresourcetree import create_resource_tree from synapse.util.logcontext import LoggingContext, preserve_fn from synapse.util.manhole import manhole @@ -75,10 +74,6 @@ class PusherSlaveStore( DataStore.get_profile_displayname.__func__ ) - who_forgot_in_room = ( - RoomMemberStore.__dict__["who_forgot_in_room"] - ) - class PusherServer(HomeServer): def setup(self): diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index f87531f1b..abe91dcfb 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -62,8 +62,6 @@ logger = logging.getLogger("synapse.app.synchrotron") class SynchrotronSlavedStore( - SlavedPushRuleStore, - SlavedEventStore, SlavedReceiptsStore, SlavedAccountDataStore, SlavedApplicationServiceStore, @@ -73,14 +71,12 @@ class SynchrotronSlavedStore( SlavedGroupServerStore, SlavedDeviceInboxStore, SlavedDeviceStore, + SlavedPushRuleStore, + SlavedEventStore, SlavedClientIpStore, RoomStore, BaseSlavedStore, ): - who_forgot_in_room = ( - RoomMemberStore.__dict__["who_forgot_in_room"] - ) - did_forget = ( RoomMemberStore.__dict__["did_forget"] ) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index fe6887414..6454045c2 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -380,6 +380,69 @@ class EventPushActionsWorkerStore(SQLBaseStore): # Now return the first `limit` defer.returnValue(notifs[:limit]) + def add_push_actions_to_staging(self, event_id, user_id_actions): + """Add the push actions for the event to the push action staging area. + + Args: + event_id (str) + user_id_actions (dict[str, list[dict|str])]): A dictionary mapping + user_id to list of push actions, where an action can either be + a string or dict. + + Returns: + Deferred + """ + + if not user_id_actions: + return + + # This is a helper function for generating the necessary tuple that + # can be used to inert into the `event_push_actions_staging` table. + def _gen_entry(user_id, actions): + is_highlight = 1 if _action_has_highlight(actions) else 0 + return ( + event_id, # event_id column + user_id, # user_id column + _serialize_action(actions, is_highlight), # actions column + 1, # notif column + is_highlight, # highlight column + ) + + def _add_push_actions_to_staging_txn(txn): + # We don't use _simple_insert_many here to avoid the overhead + # of generating lists of dicts. + + sql = """ + INSERT INTO event_push_actions_staging + (event_id, user_id, actions, notif, highlight) + VALUES (?, ?, ?, ?, ?) + """ + + txn.executemany(sql, ( + _gen_entry(user_id, actions) + for user_id, actions in user_id_actions.iteritems() + )) + + return self.runInteraction( + "add_push_actions_to_staging", _add_push_actions_to_staging_txn + ) + + def remove_push_actions_from_staging(self, event_id): + """Called if we failed to persist the event to ensure that stale push + actions don't build up in the DB + + Args: + event_id (str) + """ + + return self._simple_delete( + table="event_push_actions_staging", + keyvalues={ + "event_id": event_id, + }, + desc="remove_push_actions_from_staging", + ) + class EventPushActionsStore(EventPushActionsWorkerStore): EPA_HIGHLIGHT_INDEX = "epa_highlight_index" @@ -775,69 +838,6 @@ class EventPushActionsStore(EventPushActionsWorkerStore): (rotate_to_stream_ordering,) ) - def add_push_actions_to_staging(self, event_id, user_id_actions): - """Add the push actions for the event to the push action staging area. - - Args: - event_id (str) - user_id_actions (dict[str, list[dict|str])]): A dictionary mapping - user_id to list of push actions, where an action can either be - a string or dict. - - Returns: - Deferred - """ - - if not user_id_actions: - return - - # This is a helper function for generating the necessary tuple that - # can be used to inert into the `event_push_actions_staging` table. - def _gen_entry(user_id, actions): - is_highlight = 1 if _action_has_highlight(actions) else 0 - return ( - event_id, # event_id column - user_id, # user_id column - _serialize_action(actions, is_highlight), # actions column - 1, # notif column - is_highlight, # highlight column - ) - - def _add_push_actions_to_staging_txn(txn): - # We don't use _simple_insert_many here to avoid the overhead - # of generating lists of dicts. - - sql = """ - INSERT INTO event_push_actions_staging - (event_id, user_id, actions, notif, highlight) - VALUES (?, ?, ?, ?, ?) - """ - - txn.executemany(sql, ( - _gen_entry(user_id, actions) - for user_id, actions in user_id_actions.iteritems() - )) - - return self.runInteraction( - "add_push_actions_to_staging", _add_push_actions_to_staging_txn - ) - - def remove_push_actions_from_staging(self, event_id): - """Called if we failed to persist the event to ensure that stale push - actions don't build up in the DB - - Args: - event_id (str) - """ - - return self._simple_delete( - table="event_push_actions_staging", - keyvalues={ - "event_id": event_id, - }, - desc="remove_push_actions_from_staging", - ) - def _action_has_highlight(actions): for action in actions: diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 583efb7bd..04a0b59a3 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -15,6 +15,10 @@ # limitations under the License. from ._base import SQLBaseStore +from synapse.storage.appservice import ApplicationServiceWorkerStore +from synapse.storage.pusher import PusherWorkerStore +from synapse.storage.receipts import ReceiptsWorkerStore +from synapse.storage.roommember import RoomMemberWorkerStore from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.push.baserules import list_with_base_rules @@ -51,7 +55,11 @@ def _load_rules(rawrules, enabled_map): return rules -class PushRulesWorkerStore(SQLBaseStore): +class PushRulesWorkerStore(ApplicationServiceWorkerStore, + ReceiptsWorkerStore, + PusherWorkerStore, + RoomMemberWorkerStore, + SQLBaseStore): """This is an abstract base class where subclasses must implement `get_max_push_rules_stream_id` which can be called in the initializer. """ @@ -140,8 +148,6 @@ class PushRulesWorkerStore(SQLBaseStore): "have_push_rules_changed", have_push_rules_changed_txn ) - -class PushRuleStore(PushRulesWorkerStore): @cachedList(cached_method_name="get_push_rules_for_user", list_name="user_ids", num_args=1, inlineCallbacks=True) def bulk_get_push_rules(self, user_ids): @@ -281,6 +287,8 @@ class PushRuleStore(PushRulesWorkerStore): results.setdefault(row['user_name'], {})[row['rule_id']] = enabled defer.returnValue(results) + +class PushRuleStore(PushRulesWorkerStore): @defer.inlineCallbacks def add_push_rule( self, user_id, rule_id, priority_class, conditions, actions, diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index f4af3e4ca..307660b99 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -175,11 +175,6 @@ class PusherWorkerStore(SQLBaseStore): "get_all_updated_pushers_rows", get_all_updated_pushers_rows_txn ) - -class PusherStore(PusherWorkerStore): - def get_pushers_stream_token(self): - return self._pushers_id_gen.get_current_token() - @cachedInlineCallbacks(num_args=1, max_entries=15000) def get_if_user_has_pusher(self, user_id): # This only exists for the cachedList decorator @@ -201,6 +196,11 @@ class PusherStore(PusherWorkerStore): defer.returnValue(result) + +class PusherStore(PusherWorkerStore): + def get_pushers_stream_token(self): + return self._pushers_id_gen.get_current_token() + @defer.inlineCallbacks def add_pusher(self, user_id, access_token, kind, app_id, app_display_name, device_display_name, @@ -233,14 +233,18 @@ class PusherStore(PusherWorkerStore): ) if newly_inserted: - # get_if_user_has_pusher only cares if the user has - # at least *one* pusher. - self.get_if_user_has_pusher.invalidate(user_id,) + self.runInteraction( + "add_pusher", + self._invalidate_cache_and_stream, + self.get_if_user_has_pusher, (user_id,) + ) @defer.inlineCallbacks def delete_pusher_by_app_id_pushkey_user_id(self, app_id, pushkey, user_id): def delete_pusher_txn(txn, stream_id): - txn.call_after(self.get_if_user_has_pusher.invalidate, (user_id,)) + self._invalidate_cache_and_stream( + txn, self.get_if_user_has_pusher, (user_id,) + ) self._simple_delete_one_txn( txn, diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index b9158b989..d79877dac 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -432,6 +432,18 @@ class RoomMemberWorkerStore(EventsWorkerStore): def _get_joined_hosts_cache(self, room_id): return _JoinedHostsCache(self, room_id) + @cached() + def who_forgot_in_room(self, room_id): + return self._simple_select_list( + table="room_memberships", + retcols=("user_id", "event_id"), + keyvalues={ + "room_id": room_id, + "forgotten": 1, + }, + desc="who_forgot" + ) + class RoomMemberStore(RoomMemberWorkerStore): def __init__(self, db_conn, hs): @@ -595,18 +607,6 @@ class RoomMemberStore(RoomMemberWorkerStore): forgot = yield self.runInteraction("did_forget_membership_at", f) defer.returnValue(forgot == 1) - @cached() - def who_forgot_in_room(self, room_id): - return self._simple_select_list( - table="room_memberships", - retcols=("user_id", "event_id"), - keyvalues={ - "room_id": room_id, - "forgotten": 1, - }, - desc="who_forgot" - ) - @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): target_min_stream_id = progress.get( From 28e973ac119e0b4ec5b9e45772a572a94d0e6643 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 16:30:10 +0000 Subject: [PATCH 139/200] Calculate push actions on worker --- synapse/app/event_creator.py | 8 +++ synapse/handlers/message.py | 84 ++++++++++++++++++-------- synapse/replication/http/send_event.py | 2 +- 3 files changed, 69 insertions(+), 25 deletions(-) diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index b2ce39925..fc0b9e8c0 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -27,10 +27,14 @@ from synapse.http.server import JsonResource from synapse.http.site import SynapseSite from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore from synapse.replication.slave.storage.devices import SlavedDeviceStore from synapse.replication.slave.storage.events import SlavedEventStore +from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore +from synapse.replication.slave.storage.pushers import SlavedPusherStore +from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.room import RoomStore from synapse.replication.tcp.client import ReplicationClientHandler @@ -48,6 +52,10 @@ logger = logging.getLogger("synapse.app.event_creator") class EventCreatorSlavedStore( + SlavedAccountDataStore, + SlavedPusherStore, + SlavedReceiptsStore, + SlavedPushRuleStore, SlavedDeviceStore, SlavedClientIpStore, SlavedApplicationServiceStore, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index d99d8049b..4c186965a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -553,21 +553,67 @@ class EventCreationHandler(object): event, context, ratelimit=True, - extra_users=[] + extra_users=[], ): - # We now need to go and hit out to wherever we need to hit out to. + """Processes a new event. This includes checking auth, persisting it, + notifying users, sending to remote servers, etc. - # If we're a worker we need to hit out to the master. - if self.config.worker_app: - yield send_event_to_master( - self.http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - requester=requester, - event=event, - context=context, + If called from a worker will hit out to the master process for final + processing. + + Args: + requester (Requester) + event (FrozenEvent) + context (EventContext) + ratelimit (bool) + extra_users (list(str)): Any extra users to notify about event + """ + + yield self.action_generator.handle_push_actions_for_event( + event, context + ) + + try: + # We now need to go and hit out to wherever we need to hit out to. + + # If we're a worker we need to hit out to the master. + if self.config.worker_app: + yield send_event_to_master( + self.http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + event=event, + context=context, + ) + return + + yield self.persist_and_notify_client_event( + requester, + event, + context, + ratelimit=ratelimit, + extra_users=extra_users, ) - return + except: # noqa: E722, as we reraise the exception this is fine. + # Ensure that we actually remove the entries in the push actions + # staging area, if we calculated them. + preserve_fn(self.store.remove_push_actions_from_staging)(event.event_id) + raise + + @defer.inlineCallbacks + def persist_and_notify_client_event( + self, + requester, + event, + context, + ratelimit=True, + extra_users=[], + ): + """Called when we have fully built and authed the event. This should + only be run on master. + """ + assert not self.config.worker_app if ratelimit: yield self.base_handler.ratelimit(requester) @@ -679,20 +725,10 @@ class EventCreationHandler(object): "Changing the room create event is forbidden", ) - yield self.action_generator.handle_push_actions_for_event( - event, context + (event_stream_id, max_stream_id) = yield self.store.persist_event( + event, context=context ) - try: - (event_stream_id, max_stream_id) = yield self.store.persist_event( - event, context=context - ) - except: # noqa: E722, as we reraise the exception this is fine. - # Ensure that we actually remove the entries in the push actions - # staging area - preserve_fn(self.store.remove_push_actions_from_staging)(event.event_id) - raise - # this intentionally does not yield: we don't care about the result # and don't need to wait for it. preserve_fn(self.pusher_pool.on_new_notifications)( diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 468f4b68f..3a99a88bc 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -106,7 +106,7 @@ class ReplicationSendEventRestServlet(RestServlet): event.event_id, event.room_id, ) - yield self.event_creation_handler.handle_new_client_event( + yield self.event_creation_handler.persist_and_notify_client_event( requester, event, context, ) From f756f961eab7ae6e53052ee419413c74d171d144 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:05:27 +0000 Subject: [PATCH 140/200] Fixup comments --- synapse/handlers/message.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4c186965a..c4151d73e 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -574,8 +574,6 @@ class EventCreationHandler(object): ) try: - # We now need to go and hit out to wherever we need to hit out to. - # If we're a worker we need to hit out to the master. if self.config.worker_app: yield send_event_to_master( @@ -610,8 +608,10 @@ class EventCreationHandler(object): ratelimit=True, extra_users=[], ): - """Called when we have fully built and authed the event. This should - only be run on master. + """Called when we have fully built the event, and have already + calculated the push actions for the event. + + This should only be run on master. """ assert not self.config.worker_app From 6b8604239f4c9463023e59664e7810ba58b8f428 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:08:28 +0000 Subject: [PATCH 141/200] Correctly send ratelimit and extra_users params --- synapse/handlers/message.py | 2 ++ synapse/replication/http/send_event.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c4151d73e..5f88f84d3 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -583,6 +583,8 @@ class EventCreationHandler(object): requester=requester, event=event, context=context, + ratelimit=ratelimit, + extra_users=extra_users, ) return diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 3a99a88bc..439bfbb4f 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -29,7 +29,8 @@ logger = logging.getLogger(__name__) @defer.inlineCallbacks -def send_event_to_master(client, host, port, requester, event, context): +def send_event_to_master(client, host, port, requester, event, context, + ratelimit, extra_users): """Send event to be handled on the master Args: @@ -39,6 +40,8 @@ def send_event_to_master(client, host, port, requester, event, context): requester (Requester) event (FrozenEvent) context (EventContext) + ratelimit (bool) + extra_users (list(str)): Any extra users to notify about event """ uri = "http://%s:%s/_synapse/replication/send_event" % (host, port,) @@ -48,6 +51,8 @@ def send_event_to_master(client, host, port, requester, event, context): "rejected_reason": event.rejected_reason, "context": context.serialize(event), "requester": requester.serialize(), + "ratelimit": ratelimit, + "extra_users": extra_users, } try: @@ -74,6 +79,8 @@ class ReplicationSendEventRestServlet(RestServlet): "rejected_reason": .., // The event.rejected_reason field "context": { .. serialized event context .. }, "requester": { .. serialized requester .. }, + "ratelimit": true, + "extra_users": [], } """ PATTERNS = [re.compile("^/_synapse/replication/send_event$")] @@ -98,6 +105,9 @@ class ReplicationSendEventRestServlet(RestServlet): requester = Requester.deserialize(self.store, content["requester"]) context = yield EventContext.deserialize(self.store, content["context"]) + ratelimit = content["ratelimit"] + extra_users = content["extra_users"] + if requester.user: request.authenticated_entity = requester.user.to_string() @@ -108,6 +118,8 @@ class ReplicationSendEventRestServlet(RestServlet): yield self.event_creation_handler.persist_and_notify_client_event( requester, event, context, + ratelimit=ratelimit, + extra_users=extra_users, ) defer.returnValue((200, {})) From f381d6381344eb442f46ae27f29e039175721ff5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:18:33 +0000 Subject: [PATCH 142/200] Check event auth on the worker --- synapse/handlers/message.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5f88f84d3..7d28c2745 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -569,6 +569,20 @@ class EventCreationHandler(object): extra_users (list(str)): Any extra users to notify about event """ + try: + yield self.auth.check_from_context(event, context) + except AuthError as err: + logger.warn("Denying new event %r because %s", event, err) + raise err + + # Ensure that we can round trip before trying to persist in db + try: + dump = ujson.dumps(unfreeze(event.content)) + ujson.loads(dump) + except Exception: + logger.exception("Failed to encode content: %r", event.content) + raise + yield self.action_generator.handle_push_actions_for_event( event, context ) @@ -610,8 +624,8 @@ class EventCreationHandler(object): ratelimit=True, extra_users=[], ): - """Called when we have fully built the event, and have already - calculated the push actions for the event. + """Called when we have fully built the event, have already + calculated the push actions for the event, and checked auth. This should only be run on master. """ @@ -620,20 +634,6 @@ class EventCreationHandler(object): if ratelimit: yield self.base_handler.ratelimit(requester) - try: - yield self.auth.check_from_context(event, context) - except AuthError as err: - logger.warn("Denying new event %r because %s", event, err) - raise err - - # Ensure that we can round trip before trying to persist in db - try: - dump = ujson.dumps(unfreeze(event.content)) - ujson.loads(dump) - except Exception: - logger.exception("Failed to encode content: %r", event.content) - raise - yield self.base_handler.maybe_kick_guest_users(event, context) if event.type == EventTypes.CanonicalAlias: From 8ded8ba2c755f254bd98e15db7bc865eed997f07 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:20:34 +0000 Subject: [PATCH 143/200] Make repl send_event idempotent and retry on timeouts If we treated timeouts as failures on the worker we would attempt to clean up e.g. push actions while the master might still process the event. --- synapse/replication/http/send_event.py | 44 ++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 439bfbb4f..73cd3d91d 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -15,10 +15,15 @@ from twisted.internet import defer -from synapse.api.errors import SynapseError, MatrixCodeMessageException +from synapse.api.errors import ( + SynapseError, MatrixCodeMessageException, CodeMessageException, +) from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.util.async import sleep +from synapse.util.caches.response_cache import ResponseCache +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.metrics import Measure from synapse.types import Requester @@ -43,7 +48,9 @@ def send_event_to_master(client, host, port, requester, event, context, ratelimit (bool) extra_users (list(str)): Any extra users to notify about event """ - uri = "http://%s:%s/_synapse/replication/send_event" % (host, port,) + uri = "http://%s:%s/_synapse/replication/send_event/%s" % ( + host, port, event.event_id, + ) payload = { "event": event.get_pdu_json(), @@ -56,7 +63,20 @@ def send_event_to_master(client, host, port, requester, event, context, } try: - result = yield client.post_json_get_json(uri, payload) + # We keep retrying the same request for timeouts. This is so that we + # have a good idea that the request has either succeeded or failed on + # the master, and so whether we should clean up or not. + while True: + try: + result = yield client.put_json(uri, payload) + break + except CodeMessageException as e: + if e.code != 504: + raise + + # If we timed out we probably don't need to worry about backing + # off too much, but lets just wait a little anyway. + yield sleep(1) except MatrixCodeMessageException as e: # We convert to SynapseError as we know that it was a SynapseError # on the master process that we should send to the client. (And @@ -71,7 +91,7 @@ class ReplicationSendEventRestServlet(RestServlet): The API looks like: - POST /_synapse/replication/send_event + POST /_synapse/replication/send_event/:event_id { "event": { .. serialized event .. }, @@ -83,7 +103,7 @@ class ReplicationSendEventRestServlet(RestServlet): "extra_users": [], } """ - PATTERNS = [re.compile("^/_synapse/replication/send_event$")] + PATTERNS = [re.compile("^/_synapse/replication/send_event/(?P[^/]+)$")] def __init__(self, hs): super(ReplicationSendEventRestServlet, self).__init__() @@ -92,8 +112,20 @@ class ReplicationSendEventRestServlet(RestServlet): self.store = hs.get_datastore() self.clock = hs.get_clock() + # The responses are tiny, so we may as well cache them for a while + self.response_cache = ResponseCache(hs, timeout_ms=30 * 60 * 1000) + + def on_PUT(self, request, event_id): + result = self.response_cache.get(event_id) + if not result: + result = self.response_cache.set( + event_id, + preserve_fn(self._handle_request)(request) + ) + return make_deferred_yieldable(result) + @defer.inlineCallbacks - def on_POST(self, request): + def _handle_request(self, request): with Measure(self.clock, "repl_send_event_parse"): content = parse_json_object_from_request(request) From 89f90d808ad923374a204377b0bc8ed15a9385a9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:59:16 +0000 Subject: [PATCH 144/200] Add some logging --- synapse/replication/http/send_event.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 73cd3d91d..f490622b6 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -72,6 +72,7 @@ def send_event_to_master(client, host, port, requester, event, context, break except CodeMessageException as e: if e.code != 504: + logger.warn("send_event request timed out") raise # If we timed out we probably don't need to worry about backing @@ -118,6 +119,7 @@ class ReplicationSendEventRestServlet(RestServlet): def on_PUT(self, request, event_id): result = self.response_cache.get(event_id) if not result: + logger.warn("Returning cached response") result = self.response_cache.set( event_id, preserve_fn(self._handle_request)(request) From 157298f9862bd06e9268aed9cdffbc1c912fd74d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:59:45 +0000 Subject: [PATCH 145/200] Don't do preserve_fn for every request --- synapse/replication/http/send_event.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index f490622b6..665a56d2e 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -122,10 +122,11 @@ class ReplicationSendEventRestServlet(RestServlet): logger.warn("Returning cached response") result = self.response_cache.set( event_id, - preserve_fn(self._handle_request)(request) + self._handle_request(request) ) return make_deferred_yieldable(result) + @preserve_fn @defer.inlineCallbacks def _handle_request(self, request): with Measure(self.clock, "repl_send_event_parse"): From 126b9bf96f2c989e7402b5e0177ee39a2a20940e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 12:05:33 +0000 Subject: [PATCH 146/200] Log in the correct places --- synapse/replication/http/send_event.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 665a56d2e..70f2fe456 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -72,9 +72,10 @@ def send_event_to_master(client, host, port, requester, event, context, break except CodeMessageException as e: if e.code != 504: - logger.warn("send_event request timed out") raise + logger.warn("send_event request timed out") + # If we timed out we probably don't need to worry about backing # off too much, but lets just wait a little anyway. yield sleep(1) @@ -119,11 +120,12 @@ class ReplicationSendEventRestServlet(RestServlet): def on_PUT(self, request, event_id): result = self.response_cache.get(event_id) if not result: - logger.warn("Returning cached response") result = self.response_cache.set( event_id, self._handle_request(request) ) + else: + logger.warn("Returning cached response") return make_deferred_yieldable(result) @preserve_fn From 3a75de923b9183c073bbddae1e08fae546a11f7a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Mar 2018 12:19:09 +0000 Subject: [PATCH 147/200] Rewrite make_deferred_yieldable avoiding inlineCallbacks ... because (a) it's actually simpler (b) it might be marginally more performant? --- synapse/util/logcontext.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 94fa7cac9..a8dea15c1 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -299,10 +299,6 @@ def preserve_fn(f): Useful for wrapping functions that return a deferred which you don't yield on. """ - def reset_context(result): - LoggingContext.set_current_context(LoggingContext.sentinel) - return result - def g(*args, **kwargs): current = LoggingContext.current_context() res = f(*args, **kwargs) @@ -323,12 +319,11 @@ def preserve_fn(f): # which is supposed to have a single entry and exit point. But # by spawning off another deferred, we are effectively # adding a new exit point.) - res.addBoth(reset_context) + res.addBoth(_set_context_cb, LoggingContext.sentinel) return res return g -@defer.inlineCallbacks def make_deferred_yieldable(deferred): """Given a deferred, make it follow the Synapse logcontext rules: @@ -342,9 +337,16 @@ def make_deferred_yieldable(deferred): (This is more-or-less the opposite operation to preserve_fn.) """ - with PreserveLoggingContext(): - r = yield deferred - defer.returnValue(r) + if isinstance(deferred, defer.Deferred) and not deferred.called: + prev_context = LoggingContext.set_current_context(LoggingContext.sentinel) + deferred.addBoth(_set_context_cb, prev_context) + return deferred + + +def _set_context_cb(result, context): + """A callback function which just sets the logging context""" + LoggingContext.set_current_context(context) + return result # modules to ignore in `logcontext_tracer` From 0c8ba5dd1ce3e5cec201165c50f69aaa5c68c45d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:39:45 +0000 Subject: [PATCH 148/200] Split up RoomStore --- synapse/replication/slave/storage/room.py | 21 +- synapse/storage/room.py | 239 +++++++++++----------- 2 files changed, 125 insertions(+), 135 deletions(-) diff --git a/synapse/replication/slave/storage/room.py b/synapse/replication/slave/storage/room.py index f51038403..5ae167015 100644 --- a/synapse/replication/slave/storage/room.py +++ b/synapse/replication/slave/storage/room.py @@ -14,32 +14,19 @@ # limitations under the License. from ._base import BaseSlavedStore -from synapse.storage import DataStore -from synapse.storage.room import RoomStore +from synapse.storage.room import RoomWorkerStore from ._slaved_id_tracker import SlavedIdTracker -class RoomStore(BaseSlavedStore): +class RoomStore(RoomWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(RoomStore, self).__init__(db_conn, hs) self._public_room_id_gen = SlavedIdTracker( db_conn, "public_room_list_stream", "stream_id" ) - get_public_room_ids = DataStore.get_public_room_ids.__func__ - get_current_public_room_stream_id = ( - DataStore.get_current_public_room_stream_id.__func__ - ) - get_public_room_ids_at_stream_id = ( - RoomStore.__dict__["get_public_room_ids_at_stream_id"] - ) - get_public_room_ids_at_stream_id_txn = ( - DataStore.get_public_room_ids_at_stream_id_txn.__func__ - ) - get_published_at_stream_id_txn = ( - DataStore.get_published_at_stream_id_txn.__func__ - ) - get_public_room_changes = DataStore.get_public_room_changes.__func__ + def get_current_public_room_stream_id(self): + return self._public_room_id_gen.get_current_token() def stream_positions(self): result = super(RoomStore, self).stream_positions() diff --git a/synapse/storage/room.py b/synapse/storage/room.py index fff6652e0..7f2c08d7a 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -16,6 +16,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.storage._base import SQLBaseStore from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -38,7 +39,126 @@ RatelimitOverride = collections.namedtuple( ) -class RoomStore(SearchStore): +class RoomWorkerStore(SQLBaseStore): + def get_public_room_ids(self): + return self._simple_select_onecol( + table="rooms", + keyvalues={ + "is_public": True, + }, + retcol="room_id", + desc="get_public_room_ids", + ) + + @cached(num_args=2, max_entries=100) + def get_public_room_ids_at_stream_id(self, stream_id, network_tuple): + """Get pulbic rooms for a particular list, or across all lists. + + Args: + stream_id (int) + network_tuple (ThirdPartyInstanceID): The list to use (None, None) + means the main list, None means all lsits. + """ + return self.runInteraction( + "get_public_room_ids_at_stream_id", + self.get_public_room_ids_at_stream_id_txn, + stream_id, network_tuple=network_tuple + ) + + def get_public_room_ids_at_stream_id_txn(self, txn, stream_id, + network_tuple): + return { + rm + for rm, vis in self.get_published_at_stream_id_txn( + txn, stream_id, network_tuple=network_tuple + ).items() + if vis + } + + def get_published_at_stream_id_txn(self, txn, stream_id, network_tuple): + if network_tuple: + # We want to get from a particular list. No aggregation required. + + sql = (""" + SELECT room_id, visibility FROM public_room_list_stream + INNER JOIN ( + SELECT room_id, max(stream_id) AS stream_id + FROM public_room_list_stream + WHERE stream_id <= ? %s + GROUP BY room_id + ) grouped USING (room_id, stream_id) + """) + + if network_tuple.appservice_id is not None: + txn.execute( + sql % ("AND appservice_id = ? AND network_id = ?",), + (stream_id, network_tuple.appservice_id, network_tuple.network_id,) + ) + else: + txn.execute( + sql % ("AND appservice_id IS NULL",), + (stream_id,) + ) + return dict(txn) + else: + # We want to get from all lists, so we need to aggregate the results + + logger.info("Executing full list") + + sql = (""" + SELECT room_id, visibility + FROM public_room_list_stream + INNER JOIN ( + SELECT + room_id, max(stream_id) AS stream_id, appservice_id, + network_id + FROM public_room_list_stream + WHERE stream_id <= ? + GROUP BY room_id, appservice_id, network_id + ) grouped USING (room_id, stream_id) + """) + + txn.execute( + sql, + (stream_id,) + ) + + results = {} + # A room is visible if its visible on any list. + for room_id, visibility in txn: + results[room_id] = bool(visibility) or results.get(room_id, False) + + return results + + def get_public_room_changes(self, prev_stream_id, new_stream_id, + network_tuple): + def get_public_room_changes_txn(txn): + then_rooms = self.get_public_room_ids_at_stream_id_txn( + txn, prev_stream_id, network_tuple + ) + + now_rooms_dict = self.get_published_at_stream_id_txn( + txn, new_stream_id, network_tuple + ) + + now_rooms_visible = set( + rm for rm, vis in now_rooms_dict.items() if vis + ) + now_rooms_not_visible = set( + rm for rm, vis in now_rooms_dict.items() if not vis + ) + + newly_visible = now_rooms_visible - then_rooms + newly_unpublished = now_rooms_not_visible & then_rooms + + return newly_visible, newly_unpublished + + return self.runInteraction( + "get_public_room_changes", get_public_room_changes_txn + ) + + +class RoomStore(RoomWorkerStore, SearchStore): @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -225,16 +345,6 @@ class RoomStore(SearchStore): ) self.hs.get_notifier().on_new_replication_data() - def get_public_room_ids(self): - return self._simple_select_onecol( - table="rooms", - keyvalues={ - "is_public": True, - }, - retcol="room_id", - desc="get_public_room_ids", - ) - def get_room_count(self): """Retrieve a list of all rooms """ @@ -326,113 +436,6 @@ class RoomStore(SearchStore): def get_current_public_room_stream_id(self): return self._public_room_id_gen.get_current_token() - @cached(num_args=2, max_entries=100) - def get_public_room_ids_at_stream_id(self, stream_id, network_tuple): - """Get pulbic rooms for a particular list, or across all lists. - - Args: - stream_id (int) - network_tuple (ThirdPartyInstanceID): The list to use (None, None) - means the main list, None means all lsits. - """ - return self.runInteraction( - "get_public_room_ids_at_stream_id", - self.get_public_room_ids_at_stream_id_txn, - stream_id, network_tuple=network_tuple - ) - - def get_public_room_ids_at_stream_id_txn(self, txn, stream_id, - network_tuple): - return { - rm - for rm, vis in self.get_published_at_stream_id_txn( - txn, stream_id, network_tuple=network_tuple - ).items() - if vis - } - - def get_published_at_stream_id_txn(self, txn, stream_id, network_tuple): - if network_tuple: - # We want to get from a particular list. No aggregation required. - - sql = (""" - SELECT room_id, visibility FROM public_room_list_stream - INNER JOIN ( - SELECT room_id, max(stream_id) AS stream_id - FROM public_room_list_stream - WHERE stream_id <= ? %s - GROUP BY room_id - ) grouped USING (room_id, stream_id) - """) - - if network_tuple.appservice_id is not None: - txn.execute( - sql % ("AND appservice_id = ? AND network_id = ?",), - (stream_id, network_tuple.appservice_id, network_tuple.network_id,) - ) - else: - txn.execute( - sql % ("AND appservice_id IS NULL",), - (stream_id,) - ) - return dict(txn) - else: - # We want to get from all lists, so we need to aggregate the results - - logger.info("Executing full list") - - sql = (""" - SELECT room_id, visibility - FROM public_room_list_stream - INNER JOIN ( - SELECT - room_id, max(stream_id) AS stream_id, appservice_id, - network_id - FROM public_room_list_stream - WHERE stream_id <= ? - GROUP BY room_id, appservice_id, network_id - ) grouped USING (room_id, stream_id) - """) - - txn.execute( - sql, - (stream_id,) - ) - - results = {} - # A room is visible if its visible on any list. - for room_id, visibility in txn: - results[room_id] = bool(visibility) or results.get(room_id, False) - - return results - - def get_public_room_changes(self, prev_stream_id, new_stream_id, - network_tuple): - def get_public_room_changes_txn(txn): - then_rooms = self.get_public_room_ids_at_stream_id_txn( - txn, prev_stream_id, network_tuple - ) - - now_rooms_dict = self.get_published_at_stream_id_txn( - txn, new_stream_id, network_tuple - ) - - now_rooms_visible = set( - rm for rm, vis in now_rooms_dict.items() if vis - ) - now_rooms_not_visible = set( - rm for rm, vis in now_rooms_dict.items() if not vis - ) - - newly_visible = now_rooms_visible - then_rooms - newly_unpublished = now_rooms_not_visible & then_rooms - - return newly_visible, newly_unpublished - - return self.runInteraction( - "get_public_room_changes", get_public_room_changes_txn - ) - def get_all_new_public_rooms(self, prev_id, current_id, limit): def get_all_new_public_rooms(txn): sql = (""" From a9a2d66cdd0abc2339641808698e63cb06c4a038 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 14:16:02 +0000 Subject: [PATCH 149/200] Split out SignatureStore and EventFederationStore --- synapse/replication/slave/storage/events.py | 50 +--- synapse/storage/event_federation.py | 264 ++++++++++---------- synapse/storage/signatures.py | 8 +- 3 files changed, 148 insertions(+), 174 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index de0b26f43..d2495ff99 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -17,13 +17,13 @@ import logging from synapse.api.constants import EventTypes from synapse.storage import DataStore -from synapse.storage.event_federation import EventFederationStore +from synapse.storage.event_federation import EventFederationWorkerStore from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore -from synapse.storage.signatures import SignatureStore +from synapse.storage.signatures import SignatureWorkerStore from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker @@ -40,8 +40,12 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, - EventsWorkerStore, StateGroupWorkerStore, +class SlavedEventStore(EventFederationWorkerStore, + RoomMemberWorkerStore, + EventPushActionsWorkerStore, + EventsWorkerStore, + StateGroupWorkerStore, + SignatureWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): @@ -72,9 +76,6 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, # Cached functions can't be accessed through a class instance so we need # to reach inside the __dict__ to extract them. - get_latest_event_ids_in_room = EventFederationStore.__dict__[ - "get_latest_event_ids_in_room" - ] get_recent_event_ids_for_room = ( StreamStore.__dict__["get_recent_event_ids_for_room"] @@ -100,48 +101,13 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, _get_events_around_txn = DataStore._get_events_around_txn.__func__ - get_backfill_events = DataStore.get_backfill_events.__func__ - _get_backfill_events = DataStore._get_backfill_events.__func__ - get_missing_events = DataStore.get_missing_events.__func__ - _get_missing_events = DataStore._get_missing_events.__func__ - - get_auth_chain = DataStore.get_auth_chain.__func__ - get_auth_chain_ids = DataStore.get_auth_chain_ids.__func__ - _get_auth_chain_ids_txn = DataStore._get_auth_chain_ids_txn.__func__ - get_room_max_stream_ordering = DataStore.get_room_max_stream_ordering.__func__ - get_forward_extremeties_for_room = ( - DataStore.get_forward_extremeties_for_room.__func__ - ) - _get_forward_extremeties_for_room = ( - EventFederationStore.__dict__["_get_forward_extremeties_for_room"] - ) - get_all_new_events_stream = DataStore.get_all_new_events_stream.__func__ get_federation_out_pos = DataStore.get_federation_out_pos.__func__ update_federation_out_pos = DataStore.update_federation_out_pos.__func__ - get_latest_event_ids_and_hashes_in_room = ( - DataStore.get_latest_event_ids_and_hashes_in_room.__func__ - ) - _get_latest_event_ids_and_hashes_in_room = ( - DataStore._get_latest_event_ids_and_hashes_in_room.__func__ - ) - _get_event_reference_hashes_txn = ( - DataStore._get_event_reference_hashes_txn.__func__ - ) - add_event_hashes = ( - DataStore.add_event_hashes.__func__ - ) - get_event_reference_hashes = ( - SignatureStore.__dict__["get_event_reference_hashes"] - ) - get_event_reference_hash = ( - SignatureStore.__dict__["get_event_reference_hash"] - ) - def stream_positions(self): result = super(SlavedEventStore, self).stream_positions() result["events"] = self._stream_id_gen.get_current_token() diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 55a05c59d..00ee82d30 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -15,7 +15,10 @@ from twisted.internet import defer -from ._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore +from synapse.storage.events import EventsWorkerStore +from synapse.storage.signatures import SignatureWorkerStore + from synapse.api.errors import StoreError from synapse.util.caches.descriptors import cached from unpaddedbase64 import encode_base64 @@ -27,30 +30,8 @@ from Queue import PriorityQueue, Empty logger = logging.getLogger(__name__) -class EventFederationStore(SQLBaseStore): - """ Responsible for storing and serving up the various graphs associated - with an event. Including the main event graph and the auth chains for an - event. - - Also has methods for getting the front (latest) and back (oldest) edges - of the event graphs. These are used to generate the parents for new events - and backfilling from another server respectively. - """ - - EVENT_AUTH_STATE_ONLY = "event_auth_state_only" - - def __init__(self, db_conn, hs): - super(EventFederationStore, self).__init__(db_conn, hs) - - self.register_background_update_handler( - self.EVENT_AUTH_STATE_ONLY, - self._background_delete_non_state_event_auth, - ) - - hs.get_clock().looping_call( - self._delete_old_forward_extrem_cache, 60 * 60 * 1000 - ) - +class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, + SQLBaseStore): def get_auth_chain(self, event_ids, include_given=False): """Get auth events for given event_ids. The events *must* be state events. @@ -228,88 +209,6 @@ class EventFederationStore(SQLBaseStore): return int(min_depth) if min_depth is not None else None - def _update_min_depth_for_room_txn(self, txn, room_id, depth): - min_depth = self._get_min_depth_interaction(txn, room_id) - - if min_depth and depth >= min_depth: - return - - self._simple_upsert_txn( - txn, - table="room_depth", - keyvalues={ - "room_id": room_id, - }, - values={ - "min_depth": depth, - }, - ) - - def _handle_mult_prev_events(self, txn, events): - """ - For the given event, update the event edges table and forward and - backward extremities tables. - """ - self._simple_insert_many_txn( - txn, - table="event_edges", - values=[ - { - "event_id": ev.event_id, - "prev_event_id": e_id, - "room_id": ev.room_id, - "is_state": False, - } - for ev in events - for e_id, _ in ev.prev_events - ], - ) - - self._update_backward_extremeties(txn, events) - - def _update_backward_extremeties(self, txn, events): - """Updates the event_backward_extremities tables based on the new/updated - events being persisted. - - This is called for new events *and* for events that were outliers, but - are now being persisted as non-outliers. - - Forward extremities are handled when we first start persisting the events. - """ - events_by_room = {} - for ev in events: - events_by_room.setdefault(ev.room_id, []).append(ev) - - query = ( - "INSERT INTO event_backward_extremities (event_id, room_id)" - " SELECT ?, ? WHERE NOT EXISTS (" - " SELECT 1 FROM event_backward_extremities" - " WHERE event_id = ? AND room_id = ?" - " )" - " AND NOT EXISTS (" - " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " - " AND outlier = ?" - " )" - ) - - txn.executemany(query, [ - (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) - for ev in events for e_id, _ in ev.prev_events - if not ev.internal_metadata.is_outlier() - ]) - - query = ( - "DELETE FROM event_backward_extremities" - " WHERE event_id = ? AND room_id = ?" - ) - txn.executemany( - query, - [ - (ev.event_id, ev.room_id) for ev in events - if not ev.internal_metadata.is_outlier() - ] - ) - def get_forward_extremeties_for_room(self, room_id, stream_ordering): """For a given room_id and stream_ordering, return the forward extremeties of the room at that point in "time". @@ -371,28 +270,6 @@ class EventFederationStore(SQLBaseStore): get_forward_extremeties_for_room_txn ) - def _delete_old_forward_extrem_cache(self): - def _delete_old_forward_extrem_cache_txn(txn): - # Delete entries older than a month, while making sure we don't delete - # the only entries for a room. - sql = (""" - DELETE FROM stream_ordering_to_exterm - WHERE - room_id IN ( - SELECT room_id - FROM stream_ordering_to_exterm - WHERE stream_ordering > ? - ) AND stream_ordering < ? - """) - txn.execute( - sql, - (self.stream_ordering_month_ago, self.stream_ordering_month_ago,) - ) - return self.runInteraction( - "_delete_old_forward_extrem_cache", - _delete_old_forward_extrem_cache_txn - ) - def get_backfill_events(self, room_id, event_list, limit): """Get a list of Events for a given topic that occurred before (and including) the events in event_list. Return a list of max size `limit` @@ -522,6 +399,135 @@ class EventFederationStore(SQLBaseStore): return event_results + +class EventFederationStore(EventFederationWorkerStore): + """ Responsible for storing and serving up the various graphs associated + with an event. Including the main event graph and the auth chains for an + event. + + Also has methods for getting the front (latest) and back (oldest) edges + of the event graphs. These are used to generate the parents for new events + and backfilling from another server respectively. + """ + + EVENT_AUTH_STATE_ONLY = "event_auth_state_only" + + def __init__(self, db_conn, hs): + super(EventFederationStore, self).__init__(db_conn, hs) + + self.register_background_update_handler( + self.EVENT_AUTH_STATE_ONLY, + self._background_delete_non_state_event_auth, + ) + + hs.get_clock().looping_call( + self._delete_old_forward_extrem_cache, 60 * 60 * 1000 + ) + + def _update_min_depth_for_room_txn(self, txn, room_id, depth): + min_depth = self._get_min_depth_interaction(txn, room_id) + + if min_depth and depth >= min_depth: + return + + self._simple_upsert_txn( + txn, + table="room_depth", + keyvalues={ + "room_id": room_id, + }, + values={ + "min_depth": depth, + }, + ) + + def _handle_mult_prev_events(self, txn, events): + """ + For the given event, update the event edges table and forward and + backward extremities tables. + """ + self._simple_insert_many_txn( + txn, + table="event_edges", + values=[ + { + "event_id": ev.event_id, + "prev_event_id": e_id, + "room_id": ev.room_id, + "is_state": False, + } + for ev in events + for e_id, _ in ev.prev_events + ], + ) + + self._update_backward_extremeties(txn, events) + + def _update_backward_extremeties(self, txn, events): + """Updates the event_backward_extremities tables based on the new/updated + events being persisted. + + This is called for new events *and* for events that were outliers, but + are now being persisted as non-outliers. + + Forward extremities are handled when we first start persisting the events. + """ + events_by_room = {} + for ev in events: + events_by_room.setdefault(ev.room_id, []).append(ev) + + query = ( + "INSERT INTO event_backward_extremities (event_id, room_id)" + " SELECT ?, ? WHERE NOT EXISTS (" + " SELECT 1 FROM event_backward_extremities" + " WHERE event_id = ? AND room_id = ?" + " )" + " AND NOT EXISTS (" + " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " + " AND outlier = ?" + " )" + ) + + txn.executemany(query, [ + (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) + for ev in events for e_id, _ in ev.prev_events + if not ev.internal_metadata.is_outlier() + ]) + + query = ( + "DELETE FROM event_backward_extremities" + " WHERE event_id = ? AND room_id = ?" + ) + txn.executemany( + query, + [ + (ev.event_id, ev.room_id) for ev in events + if not ev.internal_metadata.is_outlier() + ] + ) + + def _delete_old_forward_extrem_cache(self): + def _delete_old_forward_extrem_cache_txn(txn): + # Delete entries older than a month, while making sure we don't delete + # the only entries for a room. + sql = (""" + DELETE FROM stream_ordering_to_exterm + WHERE + room_id IN ( + SELECT room_id + FROM stream_ordering_to_exterm + WHERE stream_ordering > ? + ) AND stream_ordering < ? + """) + txn.execute( + sql, + (self.stream_ordering_month_ago, self.stream_ordering_month_ago,) + ) + return self.runInteraction( + "_delete_old_forward_extrem_cache", + _delete_old_forward_extrem_cache_txn + ) + def clean_room_for_join(self, room_id): return self.runInteraction( "clean_room_for_join", diff --git a/synapse/storage/signatures.py b/synapse/storage/signatures.py index 67d5d9969..e6eeb1b64 100644 --- a/synapse/storage/signatures.py +++ b/synapse/storage/signatures.py @@ -22,9 +22,7 @@ from synapse.crypto.event_signing import compute_event_reference_hash from synapse.util.caches.descriptors import cached, cachedList -class SignatureStore(SQLBaseStore): - """Persistence for event signatures and hashes""" - +class SignatureWorkerStore(SQLBaseStore): @cached() def get_event_reference_hash(self, event_id): return self._get_event_reference_hashes_txn(event_id) @@ -74,6 +72,10 @@ class SignatureStore(SQLBaseStore): txn.execute(query, (event_id, )) return {k: v for k, v in txn} + +class SignatureStore(SignatureWorkerStore): + """Persistence for event signatures and hashes""" + def _store_event_reference_hashes_txn(self, txn, events): """Store a hash for a PDU Args: From 6411f725bedbc4701e9c624ae23f47d52ff0bd7c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 14:05:41 +0000 Subject: [PATCH 150/200] Calculate stream_ordering_month_ago correctly on workers --- synapse/replication/slave/storage/events.py | 1 - synapse/storage/__init__.py | 15 -- synapse/storage/event_push_actions.py | 149 +++++++++++--------- 3 files changed, 85 insertions(+), 80 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index de0b26f43..a4d7430f9 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -67,7 +67,6 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, "MembershipStreamChangeCache", events_max, ) - self.stream_ordering_month_ago = 0 self._stream_order_on_start = self.get_room_max_stream_ordering() # Cached functions can't be accessed through a class instance so we need diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0f136f8a0..b3cdcfdc2 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -20,7 +20,6 @@ from synapse.storage.devices import DeviceStore from .appservice import ( ApplicationServiceStore, ApplicationServiceTransactionStore ) -from ._base import LoggingTransaction from .directory import DirectoryStore from .events import EventsStore from .presence import PresenceStore, UserPresenceState @@ -228,20 +227,6 @@ class DataStore(RoomMemberStore, RoomStore, prefilled_cache=_group_updates_prefill, ) - cur = LoggingTransaction( - db_conn.cursor(), - name="_find_stream_orderings_for_times_txn", - database_engine=self.database_engine, - after_callbacks=[], - final_callbacks=[], - ) - self._find_stream_orderings_for_times_txn(cur) - cur.close() - - self.find_stream_orderings_looping_call = self._clock.looping_call( - self._find_stream_orderings_for_times, 10 * 60 * 1000 - ) - self._stream_order_on_start = self.get_room_max_stream_ordering() self._min_stream_order_on_start = self.get_room_min_stream_ordering() diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 6454045c2..c08bebe11 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore, LoggingTransaction from twisted.internet import defer from synapse.util.async import sleep from synapse.util.caches.descriptors import cachedInlineCallbacks @@ -64,6 +64,27 @@ def _deserialize_action(actions, is_highlight): class EventPushActionsWorkerStore(SQLBaseStore): + def __init__(self, db_conn, hs): + super(EventPushActionsWorkerStore, self).__init__(db_conn, hs) + + # These get correctly ste by _find_stream_orderings_for_times_txn + self.stream_ordering_month_ago = 0 + self.stream_ordering_day_ago = 0 + + cur = LoggingTransaction( + db_conn.cursor(), + name="_find_stream_orderings_for_times_txn", + database_engine=self.database_engine, + after_callbacks=[], + final_callbacks=[], + ) + self._find_stream_orderings_for_times_txn(cur) + cur.close() + + self.find_stream_orderings_looping_call = self._clock.looping_call( + self._find_stream_orderings_for_times, 10 * 60 * 1000 + ) + @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( self, room_id, user_id, last_read_event_id @@ -443,6 +464,69 @@ class EventPushActionsWorkerStore(SQLBaseStore): desc="remove_push_actions_from_staging", ) + @defer.inlineCallbacks + def _find_stream_orderings_for_times(self): + yield self.runInteraction( + "_find_stream_orderings_for_times", + self._find_stream_orderings_for_times_txn + ) + + def _find_stream_orderings_for_times_txn(self, txn): + logger.info("Searching for stream ordering 1 month ago") + self.stream_ordering_month_ago = self._find_first_stream_ordering_after_ts_txn( + txn, self._clock.time_msec() - 30 * 24 * 60 * 60 * 1000 + ) + logger.info( + "Found stream ordering 1 month ago: it's %d", + self.stream_ordering_month_ago + ) + logger.info("Searching for stream ordering 1 day ago") + self.stream_ordering_day_ago = self._find_first_stream_ordering_after_ts_txn( + txn, self._clock.time_msec() - 24 * 60 * 60 * 1000 + ) + logger.info( + "Found stream ordering 1 day ago: it's %d", + self.stream_ordering_day_ago + ) + + def _find_first_stream_ordering_after_ts_txn(self, txn, ts): + """ + Find the stream_ordering of the first event that was received after + a given timestamp. This is relatively slow as there is no index on + received_ts but we can then use this to delete push actions before + this. + + received_ts must necessarily be in the same order as stream_ordering + and stream_ordering is indexed, so we manually binary search using + stream_ordering + """ + txn.execute("SELECT MAX(stream_ordering) FROM events") + max_stream_ordering = txn.fetchone()[0] + + if max_stream_ordering is None: + return 0 + + range_start = 0 + range_end = max_stream_ordering + + sql = ( + "SELECT received_ts FROM events" + " WHERE stream_ordering > ?" + " ORDER BY stream_ordering" + " LIMIT 1" + ) + + while range_end - range_start > 1: + middle = int((range_end + range_start) / 2) + txn.execute(sql, (middle,)) + middle_ts = txn.fetchone()[0] + if ts > middle_ts: + range_start = middle + else: + range_end = middle + + return range_end + class EventPushActionsStore(EventPushActionsWorkerStore): EPA_HIGHLIGHT_INDEX = "epa_highlight_index" @@ -650,69 +734,6 @@ class EventPushActionsStore(EventPushActionsWorkerStore): WHERE room_id = ? AND user_id = ? AND stream_ordering <= ? """, (room_id, user_id, stream_ordering)) - @defer.inlineCallbacks - def _find_stream_orderings_for_times(self): - yield self.runInteraction( - "_find_stream_orderings_for_times", - self._find_stream_orderings_for_times_txn - ) - - def _find_stream_orderings_for_times_txn(self, txn): - logger.info("Searching for stream ordering 1 month ago") - self.stream_ordering_month_ago = self._find_first_stream_ordering_after_ts_txn( - txn, self._clock.time_msec() - 30 * 24 * 60 * 60 * 1000 - ) - logger.info( - "Found stream ordering 1 month ago: it's %d", - self.stream_ordering_month_ago - ) - logger.info("Searching for stream ordering 1 day ago") - self.stream_ordering_day_ago = self._find_first_stream_ordering_after_ts_txn( - txn, self._clock.time_msec() - 24 * 60 * 60 * 1000 - ) - logger.info( - "Found stream ordering 1 day ago: it's %d", - self.stream_ordering_day_ago - ) - - def _find_first_stream_ordering_after_ts_txn(self, txn, ts): - """ - Find the stream_ordering of the first event that was received after - a given timestamp. This is relatively slow as there is no index on - received_ts but we can then use this to delete push actions before - this. - - received_ts must necessarily be in the same order as stream_ordering - and stream_ordering is indexed, so we manually binary search using - stream_ordering - """ - txn.execute("SELECT MAX(stream_ordering) FROM events") - max_stream_ordering = txn.fetchone()[0] - - if max_stream_ordering is None: - return 0 - - range_start = 0 - range_end = max_stream_ordering - - sql = ( - "SELECT received_ts FROM events" - " WHERE stream_ordering > ?" - " ORDER BY stream_ordering" - " LIMIT 1" - ) - - while range_end - range_start > 1: - middle = int((range_end + range_start) / 2) - txn.execute(sql, (middle,)) - middle_ts = txn.fetchone()[0] - if ts > middle_ts: - range_start = middle - else: - range_end = middle - - return range_end - @defer.inlineCallbacks def _rotate_notifs(self): if self._doing_notif_rotation or self.stream_ordering_day_ago is None: From 784f036306a020fcde495887c2881209b913b9b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:54:37 +0000 Subject: [PATCH 151/200] Move RoomMemberHandler out of Handlers --- synapse/handlers/__init__.py | 2 - synapse/handlers/_base.py | 2 +- synapse/handlers/federation.py | 4 +- synapse/handlers/profile.py | 2 +- synapse/handlers/room.py | 4 +- synapse/handlers/room_member.py | 52 ++++++++++++++---------- synapse/rest/client/v1/admin.py | 7 ++-- synapse/rest/client/v1/room.py | 19 +++++---- synapse/rest/client/v2_alpha/register.py | 2 +- synapse/server.py | 5 +++ tests/rest/client/v1/test_typing.py | 2 +- 11 files changed, 57 insertions(+), 44 deletions(-) diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index 53213cdcc..8f8fd82eb 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -17,7 +17,6 @@ from .register import RegistrationHandler from .room import ( RoomCreationHandler, RoomContextHandler, ) -from .room_member import RoomMemberHandler from .message import MessageHandler from .federation import FederationHandler from .directory import DirectoryHandler @@ -49,7 +48,6 @@ class Handlers(object): self.registration_handler = RegistrationHandler(hs) self.message_handler = MessageHandler(hs) self.room_creation_handler = RoomCreationHandler(hs) - self.room_member_handler = RoomMemberHandler(hs) self.federation_handler = FederationHandler(hs) self.directory_handler = DirectoryHandler(hs) self.admin_handler = AdminHandler(hs) diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index faa5609c0..e089e66fd 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -158,7 +158,7 @@ class BaseHandler(object): # homeserver. requester = synapse.types.create_requester( target_user, is_guest=True) - handler = self.hs.get_handlers().room_member_handler + handler = self.hs.get_room_member_handler() yield handler.update_membership( requester, target_user, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8832ba58b..520612683 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2153,7 +2153,7 @@ class FederationHandler(BaseHandler): raise e yield self._check_signature(event, context) - member_handler = self.hs.get_handlers().room_member_handler + member_handler = self.hs.get_room_member_handler() yield member_handler.send_membership_event(None, event, context) else: destinations = set(x.split(":", 1)[-1] for x in (sender_user_id, room_id)) @@ -2197,7 +2197,7 @@ class FederationHandler(BaseHandler): # TODO: Make sure the signatures actually are correct. event.signatures.update(returned_invite.signatures) - member_handler = self.hs.get_handlers().room_member_handler + member_handler = self.hs.get_room_member_handler() yield member_handler.send_membership_event(None, event, context) @defer.inlineCallbacks diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 9800e2445..c9c287903 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -233,7 +233,7 @@ class ProfileHandler(BaseHandler): ) for room_id in room_ids: - handler = self.hs.get_handlers().room_member_handler + handler = self.hs.get_room_member_handler() try: # Assume the target_user isn't a guest, # because we don't let guests set profile or avatar data. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 6ab020bf4..6c425828c 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -165,7 +165,7 @@ class RoomCreationHandler(BaseHandler): creation_content = config.get("creation_content", {}) - room_member_handler = self.hs.get_handlers().room_member_handler + room_member_handler = self.hs.get_room_member_handler() yield self._send_events_for_new_room( requester, @@ -224,7 +224,7 @@ class RoomCreationHandler(BaseHandler): id_server = invite_3pid["id_server"] address = invite_3pid["address"] medium = invite_3pid["medium"] - yield self.hs.get_handlers().room_member_handler.do_3pid_invite( + yield self.hs.get_room_member_handler().do_3pid_invite( room_id, requester.user, medium, diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 37dc5e99a..0329432f5 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -30,24 +30,33 @@ from synapse.api.errors import AuthError, SynapseError, Codes from synapse.types import UserID, RoomID from synapse.util.async import Linearizer from synapse.util.distributor import user_left_room, user_joined_room -from ._base import BaseHandler logger = logging.getLogger(__name__) id_server_scheme = "https://" -class RoomMemberHandler(BaseHandler): +class RoomMemberHandler(object): # TODO(paul): This handler currently contains a messy conflation of # low-level API that works on UserID objects and so on, and REST-level # API that takes ID strings and returns pagination chunks. These concerns # ought to be separated out a lot better. def __init__(self, hs): - super(RoomMemberHandler, self).__init__(hs) + self.store = hs.get_datastore() + self.auth = hs.get_auth() + self.state_handler = hs.get_state_handler() + self.config = hs.config + self.is_mine = hs.is_mine + self.is_mine_id = hs.is_mine_id + self.simple_http_client = hs.get_simple_http_client() + self.federation_handler = hs.get_handlers().federation_handler + self.directory_handler = hs.get_handlers().directory_handler + self.registration_handler = hs.get_handlers().registration_handler self.profile_handler = hs.get_profile_handler() self.event_creation_hander = hs.get_event_creation_handler() + self.replication_layer = hs.get_replication_layer() self.member_linearizer = Linearizer(name="member") @@ -138,7 +147,7 @@ class RoomMemberHandler(BaseHandler): # join dance for now, since we're kinda implicitly checking # that we are allowed to join when we decide whether or not we # need to do the invite/join dance. - yield self.hs.get_handlers().federation_handler.do_invite_join( + yield self.federation_handler.do_invite_join( remote_room_hosts, room_id, user.to_string(), @@ -204,8 +213,7 @@ class RoomMemberHandler(BaseHandler): # if this is a join with a 3pid signature, we may need to turn a 3pid # invite into a normal invite before we can handle the join. if third_party_signed is not None: - replication = self.hs.get_replication_layer() - yield replication.exchange_third_party_invite( + yield self.replication_layer.exchange_third_party_invite( third_party_signed["sender"], target.to_string(), room_id, @@ -226,7 +234,7 @@ class RoomMemberHandler(BaseHandler): requester.user, ) if not is_requester_admin: - if self.hs.config.block_non_admin_invites: + if self.config.block_non_admin_invites: logger.info( "Blocking invite: user is not admin and non-admin " "invites disabled" @@ -286,7 +294,7 @@ class RoomMemberHandler(BaseHandler): if not is_host_in_room: inviter = yield self.get_inviter(target.to_string(), room_id) - if inviter and not self.hs.is_mine(inviter): + if inviter and not self.is_mine(inviter): remote_room_hosts.append(inviter.domain) content["membership"] = Membership.JOIN @@ -311,7 +319,7 @@ class RoomMemberHandler(BaseHandler): if not inviter: raise SynapseError(404, "Not a known room") - if self.hs.is_mine(inviter): + if self.is_mine(inviter): # the inviter was on our server, but has now left. Carry on # with the normal rejection codepath. # @@ -321,7 +329,7 @@ class RoomMemberHandler(BaseHandler): else: # send the rejection to the inviter's HS. remote_room_hosts = remote_room_hosts + [inviter.domain] - fed_handler = self.hs.get_handlers().federation_handler + fed_handler = self.federation_handler try: ret = yield fed_handler.do_remotely_reject_invite( remote_room_hosts, @@ -393,7 +401,7 @@ class RoomMemberHandler(BaseHandler): "Sender (%s) must be same as requester (%s)" % (sender, requester.user) ) - assert self.hs.is_mine(sender), "Sender must be our own: %s" % (sender,) + assert self.is_mine(sender), "Sender must be our own: %s" % (sender,) else: requester = synapse.types.create_requester(target_user) @@ -477,7 +485,7 @@ class RoomMemberHandler(BaseHandler): Raises: SynapseError if room alias could not be found. """ - directory_handler = self.hs.get_handlers().directory_handler + directory_handler = self.directory_handler mapping = yield directory_handler.get_association(room_alias) if not mapping: @@ -508,7 +516,7 @@ class RoomMemberHandler(BaseHandler): requester, txn_id ): - if self.hs.config.block_non_admin_invites: + if self.config.block_non_admin_invites: is_requester_admin = yield self.auth.is_server_admin( requester.user, ) @@ -555,7 +563,7 @@ class RoomMemberHandler(BaseHandler): str: the matrix ID of the 3pid, or None if it is not recognized. """ try: - data = yield self.hs.get_simple_http_client().get_json( + data = yield self.simple_http_client.get_json( "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server,), { "medium": medium, @@ -578,7 +586,7 @@ class RoomMemberHandler(BaseHandler): if server_hostname not in data["signatures"]: raise AuthError(401, "No signature from server %s" % (server_hostname,)) for key_name, signature in data["signatures"][server_hostname].items(): - key_data = yield self.hs.get_simple_http_client().get_json( + key_data = yield self.simple_http_client.get_json( "%s%s/_matrix/identity/api/v1/pubkey/%s" % (id_server_scheme, server_hostname, key_name,), ) @@ -603,7 +611,7 @@ class RoomMemberHandler(BaseHandler): user, txn_id ): - room_state = yield self.hs.get_state_handler().get_current_state(room_id) + room_state = yield self.state_handler.get_current_state(room_id) inviter_display_name = "" inviter_avatar_url = "" @@ -727,15 +735,15 @@ class RoomMemberHandler(BaseHandler): "sender_avatar_url": inviter_avatar_url, } - if self.hs.config.invite_3pid_guest: - registration_handler = self.hs.get_handlers().registration_handler + if self.config.invite_3pid_guest: + registration_handler = self.registration_handler guest_access_token = yield registration_handler.guest_access_token_for( medium=medium, address=address, inviter_user_id=inviter_user_id, ) - guest_user_info = yield self.hs.get_auth().get_user_by_access_token( + guest_user_info = yield self.auth.get_user_by_access_token( guest_access_token ) @@ -744,7 +752,7 @@ class RoomMemberHandler(BaseHandler): "guest_user_id": guest_user_info["user"].to_string(), }) - data = yield self.hs.get_simple_http_client().post_urlencoded_get_json( + data = yield self.simple_http_client.post_urlencoded_get_json( is_url, invite_config ) @@ -793,10 +801,10 @@ class RoomMemberHandler(BaseHandler): # first member event? create_event_id = current_state_ids.get(("m.room.create", "")) if len(current_state_ids) == 1 and create_event_id: - defer.returnValue(self.hs.is_mine_id(create_event_id)) + defer.returnValue(self.is_mine_id(create_event_id)) for etype, state_key in current_state_ids: - if etype != EventTypes.Member or not self.hs.is_mine_id(state_key): + if etype != EventTypes.Member or not self.is_mine_id(state_key): continue event_id = current_state_ids[(etype, state_key)] diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 6073cc6fa..3917eee42 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -180,6 +180,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): self.handlers = hs.get_handlers() self.state = hs.get_state_handler() self.event_creation_handler = hs.get_event_creation_handler() + self.room_member_handler = hs.get_room_member_handler() @defer.inlineCallbacks def on_POST(self, request, room_id): @@ -238,7 +239,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): logger.info("Kicking %r from %r...", user_id, room_id) target_requester = create_requester(user_id) - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=target_requester, target=target_requester.user, room_id=room_id, @@ -247,9 +248,9 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): ratelimit=False ) - yield self.handlers.room_member_handler.forget(target_requester.user, room_id) + yield self.room_member_handler.forget(target_requester.user, room_id) - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=target_requester, target=target_requester.user, room_id=new_room_id, diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 817fd4784..9d745174c 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -84,6 +84,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): super(RoomStateEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() self.event_creation_hander = hs.get_event_creation_handler() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): # /room/$roomid/state/$eventtype @@ -156,7 +157,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): if event_type == EventTypes.Member: membership = content.get("membership", None) - event = yield self.handlers.room_member_handler.update_membership( + event = yield self.room_member_handler.update_membership( requester, target=UserID.from_string(state_key), room_id=room_id, @@ -229,7 +230,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): class JoinRoomAliasServlet(ClientV1RestServlet): def __init__(self, hs): super(JoinRoomAliasServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): # /join/$room_identifier[/$txn_id] @@ -257,7 +258,7 @@ class JoinRoomAliasServlet(ClientV1RestServlet): except Exception: remote_room_hosts = None elif RoomAlias.is_valid(room_identifier): - handler = self.handlers.room_member_handler + handler = self.room_member_handler room_alias = RoomAlias.from_string(room_identifier) room_id, remote_room_hosts = yield handler.lookup_room_alias(room_alias) room_id = room_id.to_string() @@ -266,7 +267,7 @@ class JoinRoomAliasServlet(ClientV1RestServlet): room_identifier, )) - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=requester, target=requester.user, room_id=room_id, @@ -562,7 +563,7 @@ class RoomEventContextServlet(ClientV1RestServlet): class RoomForgetRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomForgetRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): PATTERNS = ("/rooms/(?P[^/]*)/forget") @@ -575,7 +576,7 @@ class RoomForgetRestServlet(ClientV1RestServlet): allow_guest=False, ) - yield self.handlers.room_member_handler.forget( + yield self.room_member_handler.forget( user=requester.user, room_id=room_id, ) @@ -593,7 +594,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomMembershipRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): # /rooms/$roomid/[invite|join|leave] @@ -622,7 +623,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): content = {} if membership_action == "invite" and self._has_3pid_invite_keys(content): - yield self.handlers.room_member_handler.do_3pid_invite( + yield self.room_member_handler.do_3pid_invite( room_id, requester.user, content["medium"], @@ -644,7 +645,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): if 'reason' in content and membership_action in ['kick', 'ban']: event_content = {'reason': content['reason']} - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=requester, target=target, room_id=room_id, diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index c6f4680a7..0ba62bddc 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -183,7 +183,7 @@ class RegisterRestServlet(RestServlet): self.auth_handler = hs.get_auth_handler() self.registration_handler = hs.get_handlers().registration_handler self.identity_handler = hs.get_handlers().identity_handler - self.room_member_handler = hs.get_handlers().room_member_handler + self.room_member_handler = hs.get_room_member_handler() self.device_handler = hs.get_device_handler() self.macaroon_gen = hs.get_macaroon_generator() diff --git a/synapse/server.py b/synapse/server.py index fbd602d40..5b6effbe3 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -45,6 +45,7 @@ from synapse.handlers.device import DeviceHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.presence import PresenceHandler from synapse.handlers.room_list import RoomListHandler +from synapse.handlers.room_member import RoomMemberHandler from synapse.handlers.set_password import SetPasswordHandler from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import TypingHandler @@ -145,6 +146,7 @@ class HomeServer(object): 'groups_attestation_signing', 'groups_attestation_renewer', 'spam_checker', + 'room_member_handler', ] def __init__(self, hostname, **kwargs): @@ -382,6 +384,9 @@ class HomeServer(object): def build_spam_checker(self): return SpamChecker(self) + def build_room_member_handler(self): + return RoomMemberHandler(self) + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index a269e6f56..e46534cd3 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -95,7 +95,7 @@ class RoomTypingTestCase(RestTestCase): else: if remotedomains is not None: remotedomains.add(member.domain) - hs.get_handlers().room_member_handler.fetch_room_distributions_into = ( + hs.get_room_member_handler().fetch_room_distributions_into = ( fetch_room_distributions_into ) From f793bc38770caf81dc34b9033d7dd2c9bfc0d79b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 13:56:03 +0000 Subject: [PATCH 152/200] Split out stream store --- synapse/replication/slave/storage/events.py | 54 +-- synapse/storage/__init__.py | 8 - synapse/storage/stream.py | 350 +++++++++++--------- 3 files changed, 202 insertions(+), 210 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index de0b26f43..517a9f0ec 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -22,9 +22,8 @@ from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.state import StateGroupWorkerStore -from synapse.storage.stream import StreamStore +from synapse.storage.stream import StreamWorkerStore from synapse.storage.signatures import SignatureStore -from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker @@ -41,34 +40,20 @@ logger = logging.getLogger(__name__) class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, - EventsWorkerStore, StateGroupWorkerStore, + StreamWorkerStore, EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - super(SlavedEventStore, self).__init__(db_conn, hs) self._stream_id_gen = SlavedIdTracker( db_conn, "events", "stream_ordering", ) self._backfill_id_gen = SlavedIdTracker( db_conn, "events", "stream_ordering", step=-1 ) - events_max = self._stream_id_gen.get_current_token() - event_cache_prefill, min_event_val = self._get_cache_dict( - db_conn, "events", - entity_column="room_id", - stream_column="stream_ordering", - max_value=events_max, - ) - self._events_stream_cache = StreamChangeCache( - "EventsRoomStreamChangeCache", min_event_val, - prefilled_cache=event_cache_prefill, - ) - self._membership_stream_cache = StreamChangeCache( - "MembershipStreamChangeCache", events_max, - ) + + super(SlavedEventStore, self).__init__(db_conn, hs) self.stream_ordering_month_ago = 0 - self._stream_order_on_start = self.get_room_max_stream_ordering() # Cached functions can't be accessed through a class instance so we need # to reach inside the __dict__ to extract them. @@ -76,30 +61,6 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, "get_latest_event_ids_in_room" ] - get_recent_event_ids_for_room = ( - StreamStore.__dict__["get_recent_event_ids_for_room"] - ) - has_room_changed_since = DataStore.has_room_changed_since.__func__ - - get_membership_changes_for_user = ( - DataStore.get_membership_changes_for_user.__func__ - ) - get_room_events_max_id = DataStore.get_room_events_max_id.__func__ - get_room_events_stream_for_room = ( - DataStore.get_room_events_stream_for_room.__func__ - ) - get_events_around = DataStore.get_events_around.__func__ - - get_recent_events_for_room = DataStore.get_recent_events_for_room.__func__ - get_room_events_stream_for_rooms = ( - DataStore.get_room_events_stream_for_rooms.__func__ - ) - get_stream_token_for_event = DataStore.get_stream_token_for_event.__func__ - - _set_before_and_after = staticmethod(DataStore._set_before_and_after) - - _get_events_around_txn = DataStore._get_events_around_txn.__func__ - get_backfill_events = DataStore.get_backfill_events.__func__ _get_backfill_events = DataStore._get_backfill_events.__func__ get_missing_events = DataStore.get_missing_events.__func__ @@ -120,8 +81,11 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, get_all_new_events_stream = DataStore.get_all_new_events_stream.__func__ - get_federation_out_pos = DataStore.get_federation_out_pos.__func__ - update_federation_out_pos = DataStore.update_federation_out_pos.__func__ + def get_room_max_stream_ordering(self): + return self._stream_id_gen.get_current_token() + + def get_room_min_stream_ordering(self): + return self._backfill_id_gen.get_current_token() get_latest_event_ids_and_hashes_in_room = ( DataStore.get_latest_event_ids_and_hashes_in_room.__func__ diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0f136f8a0..0ce76d7a8 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -148,14 +148,6 @@ class DataStore(RoomMemberStore, RoomStore, stream_column="stream_ordering", max_value=events_max, ) - self._events_stream_cache = StreamChangeCache( - "EventsRoomStreamChangeCache", min_event_val, - prefilled_cache=event_cache_prefill, - ) - - self._membership_stream_cache = StreamChangeCache( - "MembershipStreamChangeCache", events_max, - ) self._presence_on_startup = self._get_active_presence(db_conn) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 52bdce5be..057f30db3 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -35,13 +35,17 @@ what sort order was used: from twisted.internet import defer -from ._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore +from synapse.storage.events import EventsWorkerStore + from synapse.util.caches.descriptors import cached from synapse.api.constants import EventTypes from synapse.types import RoomStreamToken +from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.storage.engines import PostgresEngine, Sqlite3Engine +import abc import logging @@ -143,81 +147,28 @@ def filter_to_clause(event_filter): return " AND ".join(clauses), args -class StreamStore(SQLBaseStore): - @defer.inlineCallbacks - def get_appservice_room_stream(self, service, from_key, to_key, limit=0): - # NB this lives here instead of appservice.py so we can reuse the - # 'private' StreamToken class in this file. - if limit: - limit = max(limit, MAX_STREAM_SIZE) - else: - limit = MAX_STREAM_SIZE +class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): + __metaclass__ = abc.ABCMeta - # From and to keys should be integers from ordering. - from_id = RoomStreamToken.parse_stream_token(from_key) - to_id = RoomStreamToken.parse_stream_token(to_key) + def __init__(self, db_conn, hs): + super(StreamWorkerStore, self).__init__(db_conn, hs) - if from_key == to_key: - defer.returnValue(([], to_key)) - return - - # select all the events between from/to with a sensible limit - sql = ( - "SELECT e.event_id, e.room_id, e.type, s.state_key, " - "e.stream_ordering FROM events AS e " - "LEFT JOIN state_events as s ON " - "e.event_id = s.event_id " - "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " - "ORDER BY stream_ordering ASC LIMIT %(limit)d " - ) % { - "limit": limit - } - - def f(txn): - # pull out all the events between the tokens - txn.execute(sql, (from_id.stream, to_id.stream,)) - rows = self.cursor_to_dict(txn) - - # Logic: - # - We want ALL events which match the AS room_id regex - # - We want ALL events which match the rooms represented by the AS - # room_alias regex - # - We want ALL events for rooms that AS users have joined. - # This is currently supported via get_app_service_rooms (which is - # used for the Notifier listener rooms). We can't reasonably make a - # SQL query for these room IDs, so we'll pull all the events between - # from/to and filter in python. - rooms_for_as = self._get_app_service_rooms_txn(txn, service) - room_ids_for_as = [r.room_id for r in rooms_for_as] - - def app_service_interested(row): - if row["room_id"] in room_ids_for_as: - return True - - if row["type"] == EventTypes.Member: - if service.is_interested_in_user(row.get("state_key")): - return True - return False - - return [r for r in rows if app_service_interested(r)] - - rows = yield self.runInteraction("get_appservice_room_stream", f) - - ret = yield self._get_events( - [r["event_id"] for r in rows], - get_prev_content=True + events_max = self.get_room_max_stream_ordering() + event_cache_prefill, min_event_val = self._get_cache_dict( + db_conn, "events", + entity_column="room_id", + stream_column="stream_ordering", + max_value=events_max, + ) + self._events_stream_cache = StreamChangeCache( + "EventsRoomStreamChangeCache", min_event_val, + prefilled_cache=event_cache_prefill, + ) + self._membership_stream_cache = StreamChangeCache( + "MembershipStreamChangeCache", events_max, ) - self._set_before_and_after(ret, rows, topo_order=from_id is None) - - if rows: - key = "s%d" % max(r["stream_ordering"] for r in rows) - else: - # Assume we didn't get anything because there was nothing to - # get. - key = to_key - - defer.returnValue((ret, key)) + self._stream_order_on_start = self.get_room_max_stream_ordering() @defer.inlineCallbacks def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0, @@ -380,88 +331,6 @@ class StreamStore(SQLBaseStore): defer.returnValue(ret) - @defer.inlineCallbacks - def paginate_room_events(self, room_id, from_key, to_key=None, - direction='b', limit=-1, event_filter=None): - # Tokens really represent positions between elements, but we use - # the convention of pointing to the event before the gap. Hence - # we have a bit of asymmetry when it comes to equalities. - args = [False, room_id] - if direction == 'b': - order = "DESC" - bounds = upper_bound( - RoomStreamToken.parse(from_key), self.database_engine - ) - if to_key: - bounds = "%s AND %s" % (bounds, lower_bound( - RoomStreamToken.parse(to_key), self.database_engine - )) - else: - order = "ASC" - bounds = lower_bound( - RoomStreamToken.parse(from_key), self.database_engine - ) - if to_key: - bounds = "%s AND %s" % (bounds, upper_bound( - RoomStreamToken.parse(to_key), self.database_engine - )) - - filter_clause, filter_args = filter_to_clause(event_filter) - - if filter_clause: - bounds += " AND " + filter_clause - args.extend(filter_args) - - if int(limit) > 0: - args.append(int(limit)) - limit_str = " LIMIT ?" - else: - limit_str = "" - - sql = ( - "SELECT * FROM events" - " WHERE outlier = ? AND room_id = ? AND %(bounds)s" - " ORDER BY topological_ordering %(order)s," - " stream_ordering %(order)s %(limit)s" - ) % { - "bounds": bounds, - "order": order, - "limit": limit_str - } - - def f(txn): - txn.execute(sql, args) - - rows = self.cursor_to_dict(txn) - - if rows: - topo = rows[-1]["topological_ordering"] - toke = rows[-1]["stream_ordering"] - if direction == 'b': - # Tokens are positions between events. - # This token points *after* the last event in the chunk. - # We need it to point to the event before it in the chunk - # when we are going backwards so we subtract one from the - # stream part. - toke -= 1 - next_token = str(RoomStreamToken(topo, toke)) - else: - # TODO (erikj): We should work out what to do here instead. - next_token = to_key if to_key else from_key - - return rows, next_token, - - rows, token = yield self.runInteraction("paginate_room_events", f) - - events = yield self._get_events( - [r["event_id"] for r in rows], - get_prev_content=True - ) - - self._set_before_and_after(events, rows) - - defer.returnValue((events, token)) - @defer.inlineCallbacks def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None): rows, token = yield self.get_recent_event_ids_for_room( @@ -542,7 +411,7 @@ class StreamStore(SQLBaseStore): `room_id` causes it to return the current room specific topological token. """ - token = yield self._stream_id_gen.get_current_token() + token = yield self.get_room_max_stream_ordering() if room_id is None: defer.returnValue("s%d" % (token,)) else: @@ -552,11 +421,13 @@ class StreamStore(SQLBaseStore): ) defer.returnValue("t%d-%d" % (topo, token)) + @abc.abstractmethod def get_room_max_stream_ordering(self): - return self._stream_id_gen.get_current_token() + raise NotImplementedError() + @abc.abstractmethod def get_room_min_stream_ordering(self): - return self._backfill_id_gen.get_current_token() + raise NotImplementedError() def get_stream_token_for_event(self, event_id): """The stream token for an event @@ -832,3 +703,168 @@ class StreamStore(SQLBaseStore): def has_room_changed_since(self, room_id, stream_id): return self._events_stream_cache.has_entity_changed(room_id, stream_id) + + +class StreamStore(StreamWorkerStore): + def get_room_max_stream_ordering(self): + return self._stream_id_gen.get_current_token() + + def get_room_min_stream_ordering(self): + return self._backfill_id_gen.get_current_token() + + @defer.inlineCallbacks + def get_appservice_room_stream(self, service, from_key, to_key, limit=0): + # NB this lives here instead of appservice.py so we can reuse the + # 'private' StreamToken class in this file. + if limit: + limit = max(limit, MAX_STREAM_SIZE) + else: + limit = MAX_STREAM_SIZE + + # From and to keys should be integers from ordering. + from_id = RoomStreamToken.parse_stream_token(from_key) + to_id = RoomStreamToken.parse_stream_token(to_key) + + if from_key == to_key: + defer.returnValue(([], to_key)) + return + + # select all the events between from/to with a sensible limit + sql = ( + "SELECT e.event_id, e.room_id, e.type, s.state_key, " + "e.stream_ordering FROM events AS e " + "LEFT JOIN state_events as s ON " + "e.event_id = s.event_id " + "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " + "ORDER BY stream_ordering ASC LIMIT %(limit)d " + ) % { + "limit": limit + } + + def f(txn): + # pull out all the events between the tokens + txn.execute(sql, (from_id.stream, to_id.stream,)) + rows = self.cursor_to_dict(txn) + + # Logic: + # - We want ALL events which match the AS room_id regex + # - We want ALL events which match the rooms represented by the AS + # room_alias regex + # - We want ALL events for rooms that AS users have joined. + # This is currently supported via get_app_service_rooms (which is + # used for the Notifier listener rooms). We can't reasonably make a + # SQL query for these room IDs, so we'll pull all the events between + # from/to and filter in python. + rooms_for_as = self._get_app_service_rooms_txn(txn, service) + room_ids_for_as = [r.room_id for r in rooms_for_as] + + def app_service_interested(row): + if row["room_id"] in room_ids_for_as: + return True + + if row["type"] == EventTypes.Member: + if service.is_interested_in_user(row.get("state_key")): + return True + return False + + return [r for r in rows if app_service_interested(r)] + + rows = yield self.runInteraction("get_appservice_room_stream", f) + + ret = yield self._get_events( + [r["event_id"] for r in rows], + get_prev_content=True + ) + + self._set_before_and_after(ret, rows, topo_order=from_id is None) + + if rows: + key = "s%d" % max(r["stream_ordering"] for r in rows) + else: + # Assume we didn't get anything because there was nothing to + # get. + key = to_key + + defer.returnValue((ret, key)) + + @defer.inlineCallbacks + def paginate_room_events(self, room_id, from_key, to_key=None, + direction='b', limit=-1, event_filter=None): + # Tokens really represent positions between elements, but we use + # the convention of pointing to the event before the gap. Hence + # we have a bit of asymmetry when it comes to equalities. + args = [False, room_id] + if direction == 'b': + order = "DESC" + bounds = upper_bound( + RoomStreamToken.parse(from_key), self.database_engine + ) + if to_key: + bounds = "%s AND %s" % (bounds, lower_bound( + RoomStreamToken.parse(to_key), self.database_engine + )) + else: + order = "ASC" + bounds = lower_bound( + RoomStreamToken.parse(from_key), self.database_engine + ) + if to_key: + bounds = "%s AND %s" % (bounds, upper_bound( + RoomStreamToken.parse(to_key), self.database_engine + )) + + filter_clause, filter_args = filter_to_clause(event_filter) + + if filter_clause: + bounds += " AND " + filter_clause + args.extend(filter_args) + + if int(limit) > 0: + args.append(int(limit)) + limit_str = " LIMIT ?" + else: + limit_str = "" + + sql = ( + "SELECT * FROM events" + " WHERE outlier = ? AND room_id = ? AND %(bounds)s" + " ORDER BY topological_ordering %(order)s," + " stream_ordering %(order)s %(limit)s" + ) % { + "bounds": bounds, + "order": order, + "limit": limit_str + } + + def f(txn): + txn.execute(sql, args) + + rows = self.cursor_to_dict(txn) + + if rows: + topo = rows[-1]["topological_ordering"] + toke = rows[-1]["stream_ordering"] + if direction == 'b': + # Tokens are positions between events. + # This token points *after* the last event in the chunk. + # We need it to point to the event before it in the chunk + # when we are going backwards so we subtract one from the + # stream part. + toke -= 1 + next_token = str(RoomStreamToken(topo, toke)) + else: + # TODO (erikj): We should work out what to do here instead. + next_token = to_key if to_key else from_key + + return rows, next_token, + + rows, token = yield self.runInteraction("paginate_room_events", f) + + events = yield self._get_events( + [r["event_id"] for r in rows], + get_prev_content=True + ) + + self._set_before_and_after(events, rows) + + defer.returnValue((events, token)) From 22004b524e5264afb0e883bee486f669ea58833c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:59:40 +0000 Subject: [PATCH 153/200] Fix comment typo --- synapse/storage/event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index c08bebe11..848d8bd72 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -67,7 +67,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): def __init__(self, db_conn, hs): super(EventPushActionsWorkerStore, self).__init__(db_conn, hs) - # These get correctly ste by _find_stream_orderings_for_times_txn + # These get correctly set by _find_stream_orderings_for_times_txn self.stream_ordering_month_ago = 0 self.stream_ordering_day_ago = 0 From 872ff95ed49c9cb30ab5f256c5ff539430e658db Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:00:05 +0000 Subject: [PATCH 154/200] Default stream_ordering_*_ago to None --- synapse/storage/event_push_actions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 848d8bd72..716429356 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -68,8 +68,8 @@ class EventPushActionsWorkerStore(SQLBaseStore): super(EventPushActionsWorkerStore, self).__init__(db_conn, hs) # These get correctly set by _find_stream_orderings_for_times_txn - self.stream_ordering_month_ago = 0 - self.stream_ordering_day_ago = 0 + self.stream_ordering_month_ago = None + self.stream_ordering_day_ago = None cur = LoggingTransaction( db_conn.cursor(), From 1b2af116502e6ebcd2ae24178754145e59ee7c24 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:20:57 +0000 Subject: [PATCH 155/200] Document abstract class and method better --- synapse/storage/stream.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 057f30db3..a2527d2a3 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -148,6 +148,11 @@ def filter_to_clause(event_filter): class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): + """This is an abstract base class where subclasses must implement + `get_room_max_stream_ordering` and `get_room_min_stream_ordering` + which can be called in the initializer. + """ + __metaclass__ = abc.ABCMeta def __init__(self, db_conn, hs): @@ -170,6 +175,14 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): self._stream_order_on_start = self.get_room_max_stream_ordering() + @abc.abstractmethod + def get_room_max_stream_ordering(self): + raise NotImplementedError() + + @abc.abstractmethod + def get_room_min_stream_ordering(self): + raise NotImplementedError() + @defer.inlineCallbacks def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0, order='DESC'): @@ -421,14 +434,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) defer.returnValue("t%d-%d" % (topo, token)) - @abc.abstractmethod - def get_room_max_stream_ordering(self): - raise NotImplementedError() - - @abc.abstractmethod - def get_room_min_stream_ordering(self): - raise NotImplementedError() - def get_stream_token_for_event(self, event_id): """The stream token for an event Args: From 884b26ae4150f19bd1e020c3eed934e978518a09 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:23:48 +0000 Subject: [PATCH 156/200] Remove unused variables --- synapse/storage/__init__.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0ce76d7a8..22c156c15 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -141,14 +141,6 @@ class DataStore(RoomMemberStore, RoomStore, else: self._cache_id_gen = None - events_max = self._stream_id_gen.get_current_token() - event_cache_prefill, min_event_val = self._get_cache_dict( - db_conn, "events", - entity_column="room_id", - stream_column="stream_ordering", - max_value=events_max, - ) - self._presence_on_startup = self._get_active_presence(db_conn) presence_cache_prefill, min_presence_val = self._get_cache_dict( @@ -196,6 +188,7 @@ class DataStore(RoomMemberStore, RoomStore, "DeviceListFederationStreamChangeCache", device_list_max, ) + events_max = self._stream_id_gen.get_current_token() curr_state_delta_prefill, min_curr_state_delta_id = self._get_cache_dict( db_conn, "current_state_delta_stream", entity_column="room_id", From 7c371834ccbdf33f7070981bf23cbf11d1c1c333 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:40:27 +0000 Subject: [PATCH 157/200] Stub out broken function only used for cache --- synapse/storage/signatures.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/signatures.py b/synapse/storage/signatures.py index e6eeb1b64..9e6eaaa53 100644 --- a/synapse/storage/signatures.py +++ b/synapse/storage/signatures.py @@ -25,7 +25,9 @@ from synapse.util.caches.descriptors import cached, cachedList class SignatureWorkerStore(SQLBaseStore): @cached() def get_event_reference_hash(self, event_id): - return self._get_event_reference_hashes_txn(event_id) + # This is a dummy function to allow get_event_reference_hashes + # to use its cache + raise NotImplementedError() @cachedList(cached_method_name="get_event_reference_hash", list_name="event_ids", num_args=1) From 529c026ac149705d0c9948fed71e3f0ca069b759 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:49:12 +0000 Subject: [PATCH 158/200] Move back to hs.is_mine --- synapse/handlers/room_member.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 0329432f5..7ecdf8724 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -43,12 +43,11 @@ class RoomMemberHandler(object): # ought to be separated out a lot better. def __init__(self, hs): + self.hs = hs self.store = hs.get_datastore() self.auth = hs.get_auth() self.state_handler = hs.get_state_handler() self.config = hs.config - self.is_mine = hs.is_mine - self.is_mine_id = hs.is_mine_id self.simple_http_client = hs.get_simple_http_client() self.federation_handler = hs.get_handlers().federation_handler @@ -294,7 +293,7 @@ class RoomMemberHandler(object): if not is_host_in_room: inviter = yield self.get_inviter(target.to_string(), room_id) - if inviter and not self.is_mine(inviter): + if inviter and not self.hs.is_mine(inviter): remote_room_hosts.append(inviter.domain) content["membership"] = Membership.JOIN @@ -319,7 +318,7 @@ class RoomMemberHandler(object): if not inviter: raise SynapseError(404, "Not a known room") - if self.is_mine(inviter): + if self.hs.is_mine(inviter): # the inviter was on our server, but has now left. Carry on # with the normal rejection codepath. # @@ -401,7 +400,7 @@ class RoomMemberHandler(object): "Sender (%s) must be same as requester (%s)" % (sender, requester.user) ) - assert self.is_mine(sender), "Sender must be our own: %s" % (sender,) + assert self.hs.is_mine(sender), "Sender must be our own: %s" % (sender,) else: requester = synapse.types.create_requester(target_user) @@ -801,10 +800,10 @@ class RoomMemberHandler(object): # first member event? create_event_id = current_state_ids.get(("m.room.create", "")) if len(current_state_ids) == 1 and create_event_id: - defer.returnValue(self.is_mine_id(create_event_id)) + defer.returnValue(self.hs.is_mine_id(create_event_id)) for etype, state_key in current_state_ids: - if etype != EventTypes.Member or not self.is_mine_id(state_key): + if etype != EventTypes.Member or not self.hs.is_mine_id(state_key): continue event_id = current_state_ids[(etype, state_key)] From 33bebb63f34aa947a9a48920589cd63d27f1235c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:30:57 +0000 Subject: [PATCH 159/200] Add some caches to help read marker API --- synapse/replication/slave/storage/account_data.py | 1 + synapse/storage/account_data.py | 2 ++ synapse/storage/events.py | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 6c8d2954d..970ac4c24 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -56,6 +56,7 @@ class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlaved (row.data_type, row.user_id,) ) self.get_account_data_for_user.invalidate((row.user_id,)) + self.get_account_data_for_room.invalidate((row.user_id, row.room_id,)) self._account_data_stream_cache.entity_has_changed( row.user_id, token ) diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 466194e96..c062e03d1 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -133,6 +133,7 @@ class AccountDataWorkerStore(SQLBaseStore): for row in rows }) + @cached(num_args=2) def get_account_data_for_room(self, user_id, room_id): """Get all the client account_data for a user for a room. @@ -310,6 +311,7 @@ class AccountDataStore(AccountDataWorkerStore): self._account_data_stream_cache.entity_has_changed(user_id, next_id) self.get_account_data_for_user.invalidate((user_id,)) + self.get_account_data_for_room.invalidate((user_id, room_id,)) result = self._account_data_id_gen.get_current_token() defer.returnValue(result) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index b63392a6c..057b1be4d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -28,7 +28,7 @@ from synapse.util.logutils import log_function from synapse.util.metrics import Measure from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.types import get_domain_from_id from canonicaljson import encode_canonical_json @@ -2033,7 +2033,7 @@ class EventsStore(EventsWorkerStore): to_2, so_2 = yield self._get_event_ordering(event_id2) defer.returnValue((to_1, so_1) > (to_2, so_2)) - @defer.inlineCallbacks + @cachedInlineCallbacks(max_entries=5000) def _get_event_ordering(self, event_id): res = yield self._simple_select_one( table="events", From a83c514d1f8e1573246e0235c97ba140cbff12db Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:53:04 +0000 Subject: [PATCH 160/200] Improve caching for read_marker API We add a new storage function to get a paritcular type of room account data. This allows us to prefill the cache when updating that acount data. --- synapse/handlers/read_marker.py | 6 ++-- .../replication/slave/storage/account_data.py | 3 ++ synapse/storage/account_data.py | 35 +++++++++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/read_marker.py b/synapse/handlers/read_marker.py index b5b0303d5..5142ae153 100644 --- a/synapse/handlers/read_marker.py +++ b/synapse/handlers/read_marker.py @@ -41,9 +41,9 @@ class ReadMarkerHandler(BaseHandler): """ with (yield self.read_marker_linearizer.queue((room_id, user_id))): - account_data = yield self.store.get_account_data_for_room(user_id, room_id) - - existing_read_marker = account_data.get("m.fully_read", None) + existing_read_marker = yield self.store.get_account_data_for_room_and_type( + user_id, room_id, "m.fully_read", + ) should_update = True diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 970ac4c24..355d14ff7 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -57,6 +57,9 @@ class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlaved ) self.get_account_data_for_user.invalidate((row.user_id,)) self.get_account_data_for_room.invalidate((row.user_id, row.room_id,)) + self.get_account_data_for_room_and_type.invalidate( + (row.user_id, row.room_id, row.account_data_type,), + ) self._account_data_stream_cache.entity_has_changed( row.user_id, token ) diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index c062e03d1..077b4faa5 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -157,6 +157,38 @@ class AccountDataWorkerStore(SQLBaseStore): "get_account_data_for_room", get_account_data_for_room_txn ) + @cached(num_args=3, max_entries=5000) + def get_account_data_for_room_and_type(self, user_id, room_id, account_data_type): + """Get all the client account_data for a user for a room. + + Args: + user_id(str): The user to get the account_data for. + room_id(str): The room to get the account_data for. + account_data_type (str): The account data type to get. + Returns: + A deferred dict of the room account_data for that type, or None if + there isn't any set. + """ + def get_account_data_for_room_and_type_txn(txn): + content_json = self._simple_select_one_onecol_txn( + txn, + table="room_account_data", + keyvalues={ + "user_id": user_id, + "room_id": room_id, + "account_data_type": account_data_type, + }, + retcol="content", + allow_none=True + ) + + return json.loads(content_json) if content_json else None + + return self.runInteraction( + "get_account_data_for_room_and_type", + get_account_data_for_room_and_type_txn, + ) + def get_all_updated_account_data(self, last_global_id, last_room_id, current_id, limit): """Get all the client account_data that has changed on the server @@ -312,6 +344,9 @@ class AccountDataStore(AccountDataWorkerStore): self._account_data_stream_cache.entity_has_changed(user_id, next_id) self.get_account_data_for_user.invalidate((user_id,)) self.get_account_data_for_room.invalidate((user_id, room_id,)) + self.get_account_data_for_room_and_type.prefill( + (user_id, room_id, account_data_type,), content, + ) result = self._account_data_id_gen.get_current_token() defer.returnValue(result) From 4b44f05f1941478e2274b4894bfd025deedb9992 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:00:35 +0000 Subject: [PATCH 161/200] Fewer lies are better --- synapse/storage/account_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 077b4faa5..e70c9423e 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -159,14 +159,14 @@ class AccountDataWorkerStore(SQLBaseStore): @cached(num_args=3, max_entries=5000) def get_account_data_for_room_and_type(self, user_id, room_id, account_data_type): - """Get all the client account_data for a user for a room. + """Get the client account_data of given type for a user for a room. Args: user_id(str): The user to get the account_data for. room_id(str): The room to get the account_data for. account_data_type (str): The account data type to get. Returns: - A deferred dict of the room account_data for that type, or None if + A deferred of the room account_data for that type, or None if there isn't any set. """ def get_account_data_for_room_and_type_txn(txn): From 65cf454fd1dda2def0920973e90de3ff305462d4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:10:40 +0000 Subject: [PATCH 162/200] Remove unused DataStore --- synapse/replication/slave/storage/events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f0d70a673..b1f64ef0d 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -16,7 +16,6 @@ import logging from synapse.api.constants import EventTypes -from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationWorkerStore from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.events_worker import EventsWorkerStore From 89b7232ff8ac7a7d3dad74b437d06678a6adca5e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:49:37 +0000 Subject: [PATCH 163/200] Fix typo in getting replication account data processing --- synapse/replication/slave/storage/account_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 355d14ff7..d9ba6d69b 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -58,7 +58,7 @@ class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlaved self.get_account_data_for_user.invalidate((row.user_id,)) self.get_account_data_for_room.invalidate((row.user_id, row.room_id,)) self.get_account_data_for_room_and_type.invalidate( - (row.user_id, row.room_id, row.account_data_type,), + (row.user_id, row.room_id, row.data_type,), ) self._account_data_stream_cache.entity_has_changed( row.user_id, token From d960d23830cb8bffe49a1eafed21bf66c25e4235 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 2 Mar 2018 11:03:18 +0000 Subject: [PATCH 164/200] Add missing yield during 3pid signature checks --- synapse/handlers/room_member.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 7ecdf8724..ed3b97730 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -573,7 +573,7 @@ class RoomMemberHandler(object): if "mxid" in data: if "signatures" not in data: raise AuthError(401, "No signatures on 3pid binding") - self.verify_any_signature(data, id_server) + yield self.verify_any_signature(data, id_server) defer.returnValue(data["mxid"]) except IOError as e: From fafa3e7114bdadd52f190892821ae61862ebfb2c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 18:19:34 +0000 Subject: [PATCH 165/200] Split registration store --- .../replication/slave/storage/registration.py | 18 +-- synapse/storage/registration.py | 118 +++++++++--------- 2 files changed, 64 insertions(+), 72 deletions(-) diff --git a/synapse/replication/slave/storage/registration.py b/synapse/replication/slave/storage/registration.py index e27c7332d..7323bf0f1 100644 --- a/synapse/replication/slave/storage/registration.py +++ b/synapse/replication/slave/storage/registration.py @@ -14,20 +14,8 @@ # limitations under the License. from ._base import BaseSlavedStore -from synapse.storage import DataStore -from synapse.storage.registration import RegistrationStore +from synapse.storage.registration import RegistrationWorkerStore -class SlavedRegistrationStore(BaseSlavedStore): - def __init__(self, db_conn, hs): - super(SlavedRegistrationStore, self).__init__(db_conn, hs) - - # TODO: use the cached version and invalidate deleted tokens - get_user_by_access_token = RegistrationStore.__dict__[ - "get_user_by_access_token" - ] - - _query_for_auth = DataStore._query_for_auth.__func__ - get_user_by_id = RegistrationStore.__dict__[ - "get_user_by_id" - ] +class SlavedRegistrationStore(RegistrationWorkerStore, BaseSlavedStore): + pass diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 95f75d6df..d809b2ba4 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -19,10 +19,70 @@ from twisted.internet import defer from synapse.api.errors import StoreError, Codes from synapse.storage import background_updates +from synapse.storage._base import SQLBaseStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -class RegistrationStore(background_updates.BackgroundUpdateStore): +class RegistrationWorkerStore(SQLBaseStore): + @cached() + def get_user_by_id(self, user_id): + return self._simple_select_one( + table="users", + keyvalues={ + "name": user_id, + }, + retcols=["name", "password_hash", "is_guest"], + allow_none=True, + desc="get_user_by_id", + ) + + @cached() + def get_user_by_access_token(self, token): + """Get a user from the given access token. + + Args: + token (str): The access token of a user. + Returns: + defer.Deferred: None, if the token did not match, otherwise dict + including the keys `name`, `is_guest`, `device_id`, `token_id`. + """ + return self.runInteraction( + "get_user_by_access_token", + self._query_for_auth, + token + ) + + @defer.inlineCallbacks + def is_server_admin(self, user): + res = yield self._simple_select_one_onecol( + table="users", + keyvalues={"name": user.to_string()}, + retcol="admin", + allow_none=True, + desc="is_server_admin", + ) + + defer.returnValue(res if res else False) + + def _query_for_auth(self, txn, token): + sql = ( + "SELECT users.name, users.is_guest, access_tokens.id as token_id," + " access_tokens.device_id" + " FROM users" + " INNER JOIN access_tokens on users.name = access_tokens.user_id" + " WHERE token = ?" + ) + + txn.execute(sql, (token,)) + rows = self.cursor_to_dict(txn) + if rows: + return rows[0] + + return None + + +class RegistrationStore(RegistrationWorkerStore, + background_updates.BackgroundUpdateStore): def __init__(self, db_conn, hs): super(RegistrationStore, self).__init__(db_conn, hs) @@ -187,18 +247,6 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): ) txn.call_after(self.is_guest.invalidate, (user_id,)) - @cached() - def get_user_by_id(self, user_id): - return self._simple_select_one( - table="users", - keyvalues={ - "name": user_id, - }, - retcols=["name", "password_hash", "is_guest"], - allow_none=True, - desc="get_user_by_id", - ) - def get_users_by_id_case_insensitive(self, user_id): """Gets users that match user_id case insensitively. Returns a mapping of user_id -> password_hash. @@ -304,34 +352,6 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): return self.runInteraction("delete_access_token", f) - @cached() - def get_user_by_access_token(self, token): - """Get a user from the given access token. - - Args: - token (str): The access token of a user. - Returns: - defer.Deferred: None, if the token did not match, otherwise dict - including the keys `name`, `is_guest`, `device_id`, `token_id`. - """ - return self.runInteraction( - "get_user_by_access_token", - self._query_for_auth, - token - ) - - @defer.inlineCallbacks - def is_server_admin(self, user): - res = yield self._simple_select_one_onecol( - table="users", - keyvalues={"name": user.to_string()}, - retcol="admin", - allow_none=True, - desc="is_server_admin", - ) - - defer.returnValue(res if res else False) - @cachedInlineCallbacks() def is_guest(self, user_id): res = yield self._simple_select_one_onecol( @@ -344,22 +364,6 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): defer.returnValue(res if res else False) - def _query_for_auth(self, txn, token): - sql = ( - "SELECT users.name, users.is_guest, access_tokens.id as token_id," - " access_tokens.device_id" - " FROM users" - " INNER JOIN access_tokens on users.name = access_tokens.user_id" - " WHERE token = ?" - ) - - txn.execute(sql, (token,)) - rows = self.cursor_to_dict(txn) - if rows: - return rows[0] - - return None - @defer.inlineCallbacks def user_add_threepid(self, user_id, medium, address, validated_at, added_at): yield self._simple_upsert("user_threepids", { From efb79820b4924e13b2c7d1145cf891fd5d441c2a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 2 Mar 2018 14:43:29 +0000 Subject: [PATCH 166/200] Fix bug with delayed cache invalidation stream We poked the notifier before updated the current token for the cache invalidation stream. This mean that sometimes the update wouldn't be sent until the next time a cache was invalidated. --- synapse/storage/_base.py | 26 ++++++++++++++------------ synapse/storage/event_push_actions.py | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 68125006e..2fbebd490 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -48,16 +48,16 @@ class LoggingTransaction(object): passed to the constructor. Adds logging and metrics to the .execute() method.""" __slots__ = [ - "txn", "name", "database_engine", "after_callbacks", "final_callbacks", + "txn", "name", "database_engine", "after_callbacks", "exception_callbacks", ] def __init__(self, txn, name, database_engine, after_callbacks, - final_callbacks): + exception_callbacks): object.__setattr__(self, "txn", txn) object.__setattr__(self, "name", name) object.__setattr__(self, "database_engine", database_engine) object.__setattr__(self, "after_callbacks", after_callbacks) - object.__setattr__(self, "final_callbacks", final_callbacks) + object.__setattr__(self, "exception_callbacks", exception_callbacks) def call_after(self, callback, *args, **kwargs): """Call the given callback on the main twisted thread after the @@ -66,8 +66,8 @@ class LoggingTransaction(object): """ self.after_callbacks.append((callback, args, kwargs)) - def call_finally(self, callback, *args, **kwargs): - self.final_callbacks.append((callback, args, kwargs)) + def call_on_exception(self, callback, *args, **kwargs): + self.exception_callbacks.append((callback, args, kwargs)) def __getattr__(self, name): return getattr(self.txn, name) @@ -215,7 +215,7 @@ class SQLBaseStore(object): self._clock.looping_call(loop, 10000) - def _new_transaction(self, conn, desc, after_callbacks, final_callbacks, + def _new_transaction(self, conn, desc, after_callbacks, exception_callbacks, logging_context, func, *args, **kwargs): start = time.time() * 1000 txn_id = self._TXN_ID @@ -236,7 +236,7 @@ class SQLBaseStore(object): txn = conn.cursor() txn = LoggingTransaction( txn, name, self.database_engine, after_callbacks, - final_callbacks, + exception_callbacks, ) r = func(txn, *args, **kwargs) conn.commit() @@ -308,11 +308,11 @@ class SQLBaseStore(object): current_context = LoggingContext.current_context() after_callbacks = [] - final_callbacks = [] + exception_callbacks = [] def inner_func(conn, *args, **kwargs): return self._new_transaction( - conn, desc, after_callbacks, final_callbacks, current_context, + conn, desc, after_callbacks, exception_callbacks, current_context, func, *args, **kwargs ) @@ -321,9 +321,10 @@ class SQLBaseStore(object): for after_callback, after_args, after_kwargs in after_callbacks: after_callback(*after_args, **after_kwargs) - finally: - for after_callback, after_args, after_kwargs in final_callbacks: + except: # noqa: E722, as we reraise the exception this is fine. + for after_callback, after_args, after_kwargs in exception_callbacks: after_callback(*after_args, **after_kwargs) + raise defer.returnValue(result) @@ -1000,7 +1001,8 @@ class SQLBaseStore(object): # __exit__ called after the transaction finishes. ctx = self._cache_id_gen.get_next() stream_id = ctx.__enter__() - txn.call_finally(ctx.__exit__, None, None, None) + txn.call_on_exception(ctx.__exit__, None, None, None) + txn.call_after(ctx.__exit__, None, None, None) txn.call_after(self.hs.get_notifier().on_new_replication_data) self._simple_insert_txn( diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 716429356..912e8db1d 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -76,7 +76,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): name="_find_stream_orderings_for_times_txn", database_engine=self.database_engine, after_callbacks=[], - final_callbacks=[], + exception_callbacks=[], ) self._find_stream_orderings_for_times_txn(cur) cur.close() From 06a14876e5d78891a5b3ae6c0f454faefd696e79 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 5 Mar 2018 11:53:39 +0000 Subject: [PATCH 167/200] Add find_first_stream_ordering_after_ts Expose this as a public function which can be called outside a txn --- synapse/storage/event_push_actions.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 716429356..54b54dcc6 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -489,6 +489,27 @@ class EventPushActionsWorkerStore(SQLBaseStore): self.stream_ordering_day_ago ) + def find_first_stream_ordering_after_ts(self, ts): + """Gets the stream ordering corresponding to a given timestamp. + + Specifically, finds the stream_ordering of the first event that was + received after the timestamp. This is done by a binary search on the + events table, since there is no index on received_ts, so is + relatively slow. + + Args: + ts (int): timestamp in millis + + Returns: + Deferred[int]: stream ordering of the first event received after + the timestamp + """ + return self.runInteraction( + "_find_first_stream_ordering_after_ts_txn", + self._find_first_stream_ordering_after_ts_txn, + ts, + ) + def _find_first_stream_ordering_after_ts_txn(self, txn, ts): """ Find the stream_ordering of the first event that was received after From c818fcab1152439d79b03494816775b9ee3db613 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 5 Mar 2018 11:47:48 +0000 Subject: [PATCH 168/200] Test and fix find_first_stream_ordering_after_ts It seemed to suffer from a bunch of off-by-one errors. --- synapse/storage/event_push_actions.py | 66 ++++++++++++++++++----- tests/storage/test_event_push_actions.py | 67 ++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 14 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 54b54dcc6..a8c303e11 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -493,15 +493,15 @@ class EventPushActionsWorkerStore(SQLBaseStore): """Gets the stream ordering corresponding to a given timestamp. Specifically, finds the stream_ordering of the first event that was - received after the timestamp. This is done by a binary search on the - events table, since there is no index on received_ts, so is + received on or after the timestamp. This is done by a binary search on + the events table, since there is no index on received_ts, so is relatively slow. Args: ts (int): timestamp in millis Returns: - Deferred[int]: stream ordering of the first event received after + Deferred[int]: stream ordering of the first event received on/after the timestamp """ return self.runInteraction( @@ -510,16 +510,24 @@ class EventPushActionsWorkerStore(SQLBaseStore): ts, ) - def _find_first_stream_ordering_after_ts_txn(self, txn, ts): + @staticmethod + def _find_first_stream_ordering_after_ts_txn(txn, ts): """ - Find the stream_ordering of the first event that was received after - a given timestamp. This is relatively slow as there is no index on - received_ts but we can then use this to delete push actions before + Find the stream_ordering of the first event that was received on or + after a given timestamp. This is relatively slow as there is no index + on received_ts but we can then use this to delete push actions before this. received_ts must necessarily be in the same order as stream_ordering and stream_ordering is indexed, so we manually binary search using stream_ordering + + Args: + txn (twisted.enterprise.adbapi.Transaction): + ts (int): timestamp to search for + + Returns: + int: stream ordering """ txn.execute("SELECT MAX(stream_ordering) FROM events") max_stream_ordering = txn.fetchone()[0] @@ -527,23 +535,53 @@ class EventPushActionsWorkerStore(SQLBaseStore): if max_stream_ordering is None: return 0 + # We want the first stream_ordering in which received_ts is greater + # than or equal to ts. Call this point X. + # + # We maintain the invariants: + # + # range_start <= X <= range_end + # range_start = 0 - range_end = max_stream_ordering + range_end = max_stream_ordering + 1 + # Given a stream_ordering, look up the timestamp at that + # stream_ordering. + # + # The array may be sparse (we may be missing some stream_orderings). + # We treat the gaps as the same as having the same value as the + # preceding entry, because we will pick the lowest stream_ordering + # which satisfies our requirement of received_ts >= ts. + # + # For example, if our array of events indexed by stream_ordering is + # [10, , 20], we should treat this as being equivalent to + # [10, 10, 20]. + # sql = ( "SELECT received_ts FROM events" - " WHERE stream_ordering > ?" - " ORDER BY stream_ordering" + " WHERE stream_ordering <= ?" + " ORDER BY stream_ordering DESC" " LIMIT 1" ) - while range_end - range_start > 1: - middle = int((range_end + range_start) / 2) + while range_end - range_start > 0: + middle = (range_end + range_start) // 2 txn.execute(sql, (middle,)) - middle_ts = txn.fetchone()[0] + row = txn.fetchone() + if row is None: + # no rows with stream_ordering<=middle + range_start = middle + 1 + continue + + middle_ts = row[0] if ts > middle_ts: - range_start = middle + # we got a timestamp lower than the one we were looking for. + # definitely need to look higher: X > middle. + range_start = middle + 1 else: + # we got a timestamp higher than (or the same as) the one we + # were looking for. We aren't yet sure about the point we + # looked up, but we can be sure that X <= middle. range_end = middle return range_end diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 6c1aad149..dbaaa12e2 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -127,3 +127,70 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): yield _assert_counts(1, 1) yield _rotate(10) yield _assert_counts(1, 1) + + @tests.unittest.DEBUG + @defer.inlineCallbacks + def test_find_first_stream_ordering_after_ts(self): + def add_event(so, ts): + return self.store._simple_insert("events", { + "stream_ordering": so, + "received_ts": ts, + "event_id": "event%i" % so, + "type": "", + "room_id": "", + "content": "", + "processed": True, + "outlier": False, + "topological_ordering": 0, + "depth": 0, + }) + + # start with the base case where there are no events in the table + r = yield self.store.find_first_stream_ordering_after_ts(11) + self.assertEqual(r, 0) + + # now with one event + yield add_event(2, 10) + r = yield self.store.find_first_stream_ordering_after_ts(9) + self.assertEqual(r, 2) + r = yield self.store.find_first_stream_ordering_after_ts(10) + self.assertEqual(r, 2) + r = yield self.store.find_first_stream_ordering_after_ts(11) + self.assertEqual(r, 3) + + # add a bunch of dummy events to the events table + for (stream_ordering, ts) in ( + (3, 110), + (4, 120), + (5, 120), + (10, 130), + (20, 140), + ): + yield add_event(stream_ordering, ts) + + r = yield self.store.find_first_stream_ordering_after_ts(110) + self.assertEqual(r, 3, + "First event after 110ms should be 3, was %i" % r) + + # 4 and 5 are both after 12: we want 4 rather than 5 + r = yield self.store.find_first_stream_ordering_after_ts(120) + self.assertEqual(r, 4, + "First event after 120ms should be 4, was %i" % r) + + r = yield self.store.find_first_stream_ordering_after_ts(129) + self.assertEqual(r, 10, + "First event after 129ms should be 10, was %i" % r) + + # check we can get the last event + r = yield self.store.find_first_stream_ordering_after_ts(140) + self.assertEqual(r, 20, + "First event after 14ms should be 20, was %i" % r) + + # off the end + r = yield self.store.find_first_stream_ordering_after_ts(160) + self.assertEqual(r, 21) + + # check we can find an event at ordering zero + yield add_event(0, 5) + r = yield self.store.find_first_stream_ordering_after_ts(1) + self.assertEqual(r, 0) From 2c911d75e820e321c3e9c885f74ffeabd666b60f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 5 Mar 2018 12:24:49 +0000 Subject: [PATCH 169/200] Fix comment typo --- tests/storage/test_event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index dbaaa12e2..575374c6a 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -172,7 +172,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): self.assertEqual(r, 3, "First event after 110ms should be 3, was %i" % r) - # 4 and 5 are both after 12: we want 4 rather than 5 + # 4 and 5 are both after 120: we want 4 rather than 5 r = yield self.store.find_first_stream_ordering_after_ts(120) self.assertEqual(r, 4, "First event after 120ms should be 4, was %i" % r) From f8bfcd7e0d2fc6399eb654a41773cd603b4037fc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Mar 2018 23:20:54 +0000 Subject: [PATCH 170/200] Provide a means to pass a timestamp to purge_history --- docs/admin_api/purge_history_api.rst | 11 +++++- synapse/handlers/message.py | 14 +++---- synapse/rest/client/v1/admin.py | 58 ++++++++++++++++++++++++++-- synapse/storage/stream.py | 27 +++++++++++++ 4 files changed, 96 insertions(+), 14 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index a3a17e9f9..acf1bc574 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -8,9 +8,9 @@ Depending on the amount of history being purged a call to the API may take several minutes or longer. During this period users will not be able to paginate further back in the room from the point being purged from. -The API is simply: +The API is: -``POST /_matrix/client/r0/admin/purge_history//`` +``POST /_matrix/client/r0/admin/purge_history/[/]`` including an ``access_token`` of a server admin. @@ -25,3 +25,10 @@ To delete local events as well, set ``delete_local_events`` in the body: { "delete_local_events": true } + +The caller must specify the point in the room to purge up to. This can be +specified by including an event_id in the URI, or by setting a +``purge_up_to_event_id`` or ``purge_up_to_ts`` in the request body. If an event +id is given, that event (and others at the same graph depth) will be retained. +If ``purge_up_to_ts`` is given, it should be a timestamp since the unix epoch, +in milliseconds. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 7d28c2745..dd00d8a86 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -52,16 +52,12 @@ class MessageHandler(BaseHandler): self.pagination_lock = ReadWriteLock() @defer.inlineCallbacks - def purge_history(self, room_id, event_id, delete_local_events=False): - event = yield self.store.get_event(event_id) - - if event.room_id != room_id: - raise SynapseError(400, "Event is for wrong room.") - - depth = event.depth - + def purge_history(self, room_id, topological_ordering, + delete_local_events=False): with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history(room_id, depth, delete_local_events) + yield self.store.purge_history( + room_id, topological_ordering, delete_local_events, + ) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 3917eee42..dcf6215da 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -17,7 +17,7 @@ from twisted.internet import defer from synapse.api.constants import Membership -from synapse.api.errors import AuthError, SynapseError +from synapse.api.errors import AuthError, SynapseError, Codes from synapse.types import UserID, create_requester from synapse.http.servlet import parse_json_object_from_request @@ -114,12 +114,18 @@ class PurgeMediaCacheRestServlet(ClientV1RestServlet): class PurgeHistoryRestServlet(ClientV1RestServlet): PATTERNS = client_path_patterns( - "/admin/purge_history/(?P[^/]*)/(?P[^/]*)" + "/admin/purge_history/(?P[^/]*)(/(?P[^/]+))?" ) def __init__(self, hs): + """ + + Args: + hs (synapse.server.HomeServer) + """ super(PurgeHistoryRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.store = hs.get_datastore() @defer.inlineCallbacks def on_POST(self, request, room_id, event_id): @@ -133,8 +139,54 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): delete_local_events = bool(body.get("delete_local_events", False)) + # establish the topological ordering we should keep events from. The + # user can provide an event_id in the URL or the request body, or can + # provide a timestamp in the request body. + if event_id is None: + event_id = body.get('purge_up_to_event_id') + + if event_id is not None: + event = yield self.store.get_event(event_id) + + if event.room_id != room_id: + raise SynapseError(400, "Event is for wrong room.") + + depth = event.depth + logger.info( + "[purge] purging up to depth %i (event_id %s)", + depth, event_id, + ) + elif 'purge_up_to_ts' in body: + ts = body['purge_up_to_ts'] + if not isinstance(ts, int): + raise SynapseError( + 400, "purge_up_to_ts must be an int", + errcode=Codes.BAD_JSON, + ) + + stream_ordering = ( + yield self.store.find_first_stream_ordering_after_ts(ts) + ) + + (_, depth, _) = ( + yield self.store.get_room_event_after_stream_ordering( + room_id, stream_ordering, + ) + ) + logger.info( + "[purge] purging up to depth %i (received_ts %i => " + "stream_ordering %i)", + depth, ts, stream_ordering, + ) + else: + raise SynapseError( + 400, + "must specify purge_up_to_event_id or purge_up_to_ts", + errcode=Codes.BAD_JSON, + ) + yield self.handlers.message_handler.purge_history( - room_id, event_id, + room_id, depth, delete_local_events=delete_local_events, ) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index a2527d2a3..515a04699 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -416,6 +416,33 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): "get_recent_events_for_room", get_recent_events_for_room_txn ) + def get_room_event_after_stream_ordering(self, room_id, stream_ordering): + """Gets details of the first event in a room at or after a stream ordering + + Args: + room_id (str): + stream_ordering (int): + + Returns: + Deferred[(int, int, str)]: + (stream ordering, topological ordering, event_id) + """ + def _f(txn): + sql = ( + "SELECT stream_ordering, topological_ordering, event_id" + " FROM events" + " WHERE room_id = ? AND stream_ordering >= ?" + " AND NOT outlier" + " ORDER BY stream_ordering" + " LIMIT 1" + ) + txn.execute(sql, (room_id, stream_ordering, )) + return txn.fetchone() + + return self.runInteraction( + "get_room_event_after_stream_ordering", _f, + ) + @defer.inlineCallbacks def get_room_events_max_id(self, room_id=None): """Returns the current token for rooms stream. From 2e223163ff59fa7d6030654dd8c74e58f8aa1deb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 15:11:30 +0000 Subject: [PATCH 171/200] Split Directory store --- .../replication/slave/storage/directory.py | 8 ++-- synapse/storage/directory.py | 45 ++++++++++--------- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/synapse/replication/slave/storage/directory.py b/synapse/replication/slave/storage/directory.py index 7301d885f..6deecd396 100644 --- a/synapse/replication/slave/storage/directory.py +++ b/synapse/replication/slave/storage/directory.py @@ -14,10 +14,8 @@ # limitations under the License. from ._base import BaseSlavedStore -from synapse.storage.directory import DirectoryStore +from synapse.storage.directory import DirectoryWorkerStore -class DirectoryStore(BaseSlavedStore): - get_aliases_for_room = DirectoryStore.__dict__[ - "get_aliases_for_room" - ] +class DirectoryStore(DirectoryWorkerStore, BaseSlavedStore): + pass diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 79e7c540a..041b0b457 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -29,8 +29,7 @@ RoomAliasMapping = namedtuple( ) -class DirectoryStore(SQLBaseStore): - +class DirectoryWorkerStore(SQLBaseStore): @defer.inlineCallbacks def get_association_from_room_alias(self, room_alias): """ Get's the room_id and server list for a given room_alias @@ -69,6 +68,28 @@ class DirectoryStore(SQLBaseStore): RoomAliasMapping(room_id, room_alias.to_string(), servers) ) + def get_room_alias_creator(self, room_alias): + return self._simple_select_one_onecol( + table="room_aliases", + keyvalues={ + "room_alias": room_alias, + }, + retcol="creator", + desc="get_room_alias_creator", + allow_none=True + ) + + @cached(max_entries=5000) + def get_aliases_for_room(self, room_id): + return self._simple_select_onecol( + "room_aliases", + {"room_id": room_id}, + "room_alias", + desc="get_aliases_for_room", + ) + + +class DirectoryStore(DirectoryWorkerStore): @defer.inlineCallbacks def create_room_alias_association(self, room_alias, room_id, servers, creator=None): """ Creates an associatin between a room alias and room_id/servers @@ -116,17 +137,6 @@ class DirectoryStore(SQLBaseStore): ) defer.returnValue(ret) - def get_room_alias_creator(self, room_alias): - return self._simple_select_one_onecol( - table="room_aliases", - keyvalues={ - "room_alias": room_alias, - }, - retcol="creator", - desc="get_room_alias_creator", - allow_none=True - ) - @defer.inlineCallbacks def delete_room_alias(self, room_alias): room_id = yield self.runInteraction( @@ -162,15 +172,6 @@ class DirectoryStore(SQLBaseStore): return room_id - @cached(max_entries=5000) - def get_aliases_for_room(self, room_id): - return self._simple_select_onecol( - "room_aliases", - {"room_id": room_id}, - "room_alias", - desc="get_aliases_for_room", - ) - def update_aliases_for_room(self, old_room_id, new_room_id, creator): def _update_aliases_for_room_txn(txn): sql = "UPDATE room_aliases SET room_id = ?, creator = ? WHERE room_id = ?" From 69ce365b793ac35c8e9ee6c3b3a0d04d0e61db4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 15:12:22 +0000 Subject: [PATCH 172/200] Fix cache invalidation on deletion --- synapse/storage/directory.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 041b0b457..d0c005975 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -145,7 +145,6 @@ class DirectoryStore(DirectoryWorkerStore): room_alias, ) - self.get_aliases_for_room.invalidate((room_id,)) defer.returnValue(room_id) def _delete_room_alias_txn(self, txn, room_alias): @@ -170,6 +169,10 @@ class DirectoryStore(DirectoryWorkerStore): (room_alias.to_string(),) ) + self._invalidate_cache_and_stream( + txn, self.get_aliases_for_room, (room_id,) + ) + return room_id def update_aliases_for_room(self, old_room_id, new_room_id, creator): From d4ffe61d4fb71953bff0f94ff5d1603afe7d46f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 15:42:57 +0000 Subject: [PATCH 173/200] Remove ability for AS users to call /events and /sync This functionality has been deprecated for a while as well as being broken for a while. Instead of fixing it lets just remove it entirely. See: https://github.com/matrix-org/matrix-doc/issues/1144 --- synapse/handlers/room.py | 9 ++-- synapse/handlers/sync.py | 10 +++-- synapse/storage/appservice.py | 82 +++-------------------------------- synapse/storage/stream.py | 76 -------------------------------- 4 files changed, 14 insertions(+), 163 deletions(-) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 6c425828c..8df8fcbba 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -475,12 +475,9 @@ class RoomEventSource(object): user.to_string() ) if app_service: - events, end_key = yield self.store.get_appservice_room_stream( - service=app_service, - from_key=from_key, - to_key=to_key, - limit=limit, - ) + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() else: room_events = yield self.store.get_membership_changes_for_user( user.to_string(), from_key, to_key diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b12988f3c..56b86356f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -998,8 +998,9 @@ class SyncHandler(object): app_service = self.store.get_app_service_by_user_id(user_id) if app_service: - rooms = yield self.store.get_app_service_rooms(app_service) - joined_room_ids = set(r.room_id for r in rooms) + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() else: joined_room_ids = yield self.store.get_rooms_for_user(user_id) @@ -1030,8 +1031,9 @@ class SyncHandler(object): app_service = self.store.get_app_service_by_user_id(user_id) if app_service: - rooms = yield self.store.get_app_service_rooms(app_service) - joined_room_ids = set(r.room_id for r in rooms) + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() else: joined_room_ids = yield self.store.get_rooms_for_user(user_id) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 90fb51d43..12ea8a158 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -18,11 +18,9 @@ import re import simplejson as json from twisted.internet import defer -from synapse.api.constants import Membership from synapse.appservice import AppServiceTransaction from synapse.config.appservice import load_appservices from synapse.storage.events import EventsWorkerStore -from synapse.storage.roommember import RoomsForUser from ._base import SQLBaseStore @@ -115,81 +113,11 @@ class ApplicationServiceWorkerStore(SQLBaseStore): class ApplicationServiceStore(ApplicationServiceWorkerStore): - - def __init__(self, db_conn, hs): - super(ApplicationServiceStore, self).__init__(db_conn, hs) - self.hostname = hs.hostname - - def get_app_service_rooms(self, service): - """Get a list of RoomsForUser for this application service. - - Application services may be "interested" in lots of rooms depending on - the room ID, the room aliases, or the members in the room. This function - takes all of these into account and returns a list of RoomsForUser which - represent the entire list of room IDs that this application service - wants to know about. - - Args: - service: The application service to get a room list for. - Returns: - A list of RoomsForUser. - """ - return self.runInteraction( - "get_app_service_rooms", - self._get_app_service_rooms_txn, - service, - ) - - def _get_app_service_rooms_txn(self, txn, service): - # get all rooms matching the room ID regex. - room_entries = self._simple_select_list_txn( - txn=txn, table="rooms", keyvalues=None, retcols=["room_id"] - ) - matching_room_list = set([ - r["room_id"] for r in room_entries if - service.is_interested_in_room(r["room_id"]) - ]) - - # resolve room IDs for matching room alias regex. - room_alias_mappings = self._simple_select_list_txn( - txn=txn, table="room_aliases", keyvalues=None, - retcols=["room_id", "room_alias"] - ) - matching_room_list |= set([ - r["room_id"] for r in room_alias_mappings if - service.is_interested_in_alias(r["room_alias"]) - ]) - - # get all rooms for every user for this AS. This is scoped to users on - # this HS only. - user_list = self._simple_select_list_txn( - txn=txn, table="users", keyvalues=None, retcols=["name"] - ) - user_list = [ - u["name"] for u in user_list if - service.is_interested_in_user(u["name"]) - ] - rooms_for_user_matching_user_id = set() # RoomsForUser list - for user_id in user_list: - # FIXME: This assumes this store is linked with RoomMemberStore :( - rooms_for_user = self._get_rooms_for_user_where_membership_is_txn( - txn=txn, - user_id=user_id, - membership_list=[Membership.JOIN] - ) - rooms_for_user_matching_user_id |= set(rooms_for_user) - - # make RoomsForUser tuples for room ids and aliases which are not in the - # main rooms_for_user_list - e.g. they are rooms which do not have AS - # registered users in it. - known_room_ids = [r.room_id for r in rooms_for_user_matching_user_id] - missing_rooms_for_user = [ - RoomsForUser(r, service.sender, "join") for r in - matching_room_list if r not in known_room_ids - ] - rooms_for_user_matching_user_id |= set(missing_rooms_for_user) - - return rooms_for_user_matching_user_id + # This is currently empty due to there not being any AS storage functions + # that can't be run on the workers. Since this may change in future, and + # to keep consistency with the other stores, we keep this empty class for + # now. + pass class ApplicationServiceTransactionWorkerStore(ApplicationServiceWorkerStore, diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index a2527d2a3..b78151cd8 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -39,7 +39,6 @@ from synapse.storage._base import SQLBaseStore from synapse.storage.events import EventsWorkerStore from synapse.util.caches.descriptors import cached -from synapse.api.constants import EventTypes from synapse.types import RoomStreamToken from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.logcontext import make_deferred_yieldable, preserve_fn @@ -717,81 +716,6 @@ class StreamStore(StreamWorkerStore): def get_room_min_stream_ordering(self): return self._backfill_id_gen.get_current_token() - @defer.inlineCallbacks - def get_appservice_room_stream(self, service, from_key, to_key, limit=0): - # NB this lives here instead of appservice.py so we can reuse the - # 'private' StreamToken class in this file. - if limit: - limit = max(limit, MAX_STREAM_SIZE) - else: - limit = MAX_STREAM_SIZE - - # From and to keys should be integers from ordering. - from_id = RoomStreamToken.parse_stream_token(from_key) - to_id = RoomStreamToken.parse_stream_token(to_key) - - if from_key == to_key: - defer.returnValue(([], to_key)) - return - - # select all the events between from/to with a sensible limit - sql = ( - "SELECT e.event_id, e.room_id, e.type, s.state_key, " - "e.stream_ordering FROM events AS e " - "LEFT JOIN state_events as s ON " - "e.event_id = s.event_id " - "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " - "ORDER BY stream_ordering ASC LIMIT %(limit)d " - ) % { - "limit": limit - } - - def f(txn): - # pull out all the events between the tokens - txn.execute(sql, (from_id.stream, to_id.stream,)) - rows = self.cursor_to_dict(txn) - - # Logic: - # - We want ALL events which match the AS room_id regex - # - We want ALL events which match the rooms represented by the AS - # room_alias regex - # - We want ALL events for rooms that AS users have joined. - # This is currently supported via get_app_service_rooms (which is - # used for the Notifier listener rooms). We can't reasonably make a - # SQL query for these room IDs, so we'll pull all the events between - # from/to and filter in python. - rooms_for_as = self._get_app_service_rooms_txn(txn, service) - room_ids_for_as = [r.room_id for r in rooms_for_as] - - def app_service_interested(row): - if row["room_id"] in room_ids_for_as: - return True - - if row["type"] == EventTypes.Member: - if service.is_interested_in_user(row.get("state_key")): - return True - return False - - return [r for r in rows if app_service_interested(r)] - - rows = yield self.runInteraction("get_appservice_room_stream", f) - - ret = yield self._get_events( - [r["event_id"] for r in rows], - get_prev_content=True - ) - - self._set_before_and_after(ret, rows, topo_order=from_id is None) - - if rows: - key = "s%d" % max(r["stream_ordering"] for r in rows) - else: - # Assume we didn't get anything because there was nothing to - # get. - key = to_key - - defer.returnValue((ret, key)) - @defer.inlineCallbacks def paginate_room_events(self, room_id, from_key, to_key=None, direction='b', limit=-1, event_filter=None): From ed9b5eced4f17dfb0a92167a6281e13054821d6f Mon Sep 17 00:00:00 2001 From: Krombel Date: Mon, 5 Mar 2018 17:51:09 +0100 Subject: [PATCH 174/200] use bcrypt.checkpw in bcrypt 3.1.0 checkpw got introduced (already 2 years ago) This makes use of that with enhancements which might get introduced by that Signed-Off-by: Matthias Kesler --- synapse/handlers/auth.py | 6 ++++-- synapse/python_dependencies.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 258cc345d..a5365c4fe 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -863,8 +863,10 @@ class AuthHandler(BaseHandler): """ def _do_validate_hash(): - return bcrypt.hashpw(password.encode('utf8') + self.hs.config.password_pepper, - stored_hash.encode('utf8')) == stored_hash + return bcrypt.checkpw( + password.encode('utf8') + self.hs.config.password_pepper, + stored_hash.encode('utf8') + ) if stored_hash: return make_deferred_yieldable(threads.deferToThread(_do_validate_hash)) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 5d65b5fd6..91179ce53 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -31,7 +31,7 @@ REQUIREMENTS = { "pyyaml": ["yaml"], "pyasn1": ["pyasn1"], "daemonize": ["daemonize"], - "bcrypt": ["bcrypt"], + "bcrypt": ["bcrypt>=3.1.0"], "pillow": ["PIL"], "pydenticon": ["pydenticon"], "ujson": ["ujson"], From 8cb44da4aa569188faa2a94aae6bc093aa8e22ec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 12:06:19 +0000 Subject: [PATCH 175/200] Fix race in sync when joining room The race happens when the user joins a room at the same time as doing a sync. We fetch the current token and then get the rooms the user is in. If the join happens after the current token, but before we get the rooms we end up sending down a partial room entry in the sync. This is fixed by looking at the stream ordering of the membership returned by get_rooms_for_user, and handling the case when that stream ordering is after the current token. --- synapse/handlers/sync.py | 103 +++++++++++++++++++++++++--------- synapse/storage/events.py | 2 +- synapse/storage/roommember.py | 27 ++++++++- 3 files changed, 102 insertions(+), 30 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 56b86356f..163d80417 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -235,10 +235,10 @@ class SyncHandler(object): defer.returnValue(rules) @defer.inlineCallbacks - def ephemeral_by_room(self, sync_config, now_token, since_token=None): + def ephemeral_by_room(self, sync_result_builder, now_token, since_token=None): """Get the ephemeral events for each room the user is in Args: - sync_config (SyncConfig): The flags, filters and user for the sync. + sync_result_builder(SyncResultBuilder) now_token (StreamToken): Where the server is currently up to. since_token (StreamToken): Where the server was when the client last synced. @@ -248,10 +248,12 @@ class SyncHandler(object): typing events for that room. """ + sync_config = sync_result_builder.sync_config + with Measure(self.clock, "ephemeral_by_room"): typing_key = since_token.typing_key if since_token else "0" - room_ids = yield self.store.get_rooms_for_user(sync_config.user.to_string()) + room_ids = sync_result_builder.joined_room_ids typing_source = self.event_sources.sources["typing"] typing, typing_key = yield typing_source.get_new_events( @@ -565,10 +567,22 @@ class SyncHandler(object): # Always use the `now_token` in `SyncResultBuilder` now_token = yield self.event_sources.get_current_token() + user_id = sync_config.user.to_string() + app_service = self.store.get_app_service_by_user_id(user_id) + if app_service: + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() + else: + joined_room_ids = yield self.get_rooms_for_user_at( + user_id, now_token.room_stream_id, + ) + sync_result_builder = SyncResultBuilder( sync_config, full_state, since_token=since_token, now_token=now_token, + joined_room_ids=joined_room_ids, ) account_data_by_room = yield self._generate_sync_entry_for_account_data( @@ -603,7 +617,6 @@ class SyncHandler(object): device_id = sync_config.device_id one_time_key_counts = {} if device_id: - user_id = sync_config.user.to_string() one_time_key_counts = yield self.store.count_e2e_one_time_keys( user_id, device_id ) @@ -891,7 +904,7 @@ class SyncHandler(object): ephemeral_by_room = {} else: now_token, ephemeral_by_room = yield self.ephemeral_by_room( - sync_result_builder.sync_config, + sync_result_builder, now_token=sync_result_builder.now_token, since_token=sync_result_builder.since_token, ) @@ -996,16 +1009,8 @@ class SyncHandler(object): if rooms_changed: defer.returnValue(True) - app_service = self.store.get_app_service_by_user_id(user_id) - if app_service: - # We no longer support AS users using /sync directly. - # See https://github.com/matrix-org/matrix-doc/issues/1144 - raise NotImplementedError() - else: - joined_room_ids = yield self.store.get_rooms_for_user(user_id) - stream_id = RoomStreamToken.parse_stream_token(since_token.room_key).stream - for room_id in joined_room_ids: + for room_id in sync_result_builder.joined_room_ids: if self.store.has_room_changed_since(room_id, stream_id): defer.returnValue(True) defer.returnValue(False) @@ -1029,14 +1034,6 @@ class SyncHandler(object): assert since_token - app_service = self.store.get_app_service_by_user_id(user_id) - if app_service: - # We no longer support AS users using /sync directly. - # See https://github.com/matrix-org/matrix-doc/issues/1144 - raise NotImplementedError() - else: - joined_room_ids = yield self.store.get_rooms_for_user(user_id) - # Get a list of membership change events that have happened. rooms_changed = yield self.store.get_membership_changes_for_user( user_id, since_token.room_key, now_token.room_key @@ -1059,7 +1056,7 @@ class SyncHandler(object): # we do send down the room, and with full state, where necessary old_state_ids = None - if room_id in joined_room_ids and non_joins: + if room_id in sync_result_builder.joined_room_ids and non_joins: # Always include if the user (re)joined the room, especially # important so that device list changes are calculated correctly. # If there are non join member events, but we are still in the room, @@ -1069,7 +1066,7 @@ class SyncHandler(object): # User is in the room so we don't need to do the invite/leave checks continue - if room_id in joined_room_ids or has_join: + if room_id in sync_result_builder.joined_room_ids or has_join: old_state_ids = yield self.get_state_at(room_id, since_token) old_mem_ev_id = old_state_ids.get((EventTypes.Member, user_id), None) old_mem_ev = None @@ -1081,7 +1078,7 @@ class SyncHandler(object): newly_joined_rooms.append(room_id) # If user is in the room then we don't need to do the invite/leave checks - if room_id in joined_room_ids: + if room_id in sync_result_builder.joined_room_ids: continue if not non_joins: @@ -1148,7 +1145,7 @@ class SyncHandler(object): # Get all events for rooms we're currently joined to. room_to_events = yield self.store.get_room_events_stream_for_rooms( - room_ids=joined_room_ids, + room_ids=sync_result_builder.joined_room_ids, from_key=since_token.room_key, to_key=now_token.room_key, limit=timeline_limit + 1, @@ -1156,7 +1153,7 @@ class SyncHandler(object): # We loop through all room ids, even if there are no new events, in case # there are non room events taht we need to notify about. - for room_id in joined_room_ids: + for room_id in sync_result_builder.joined_room_ids: room_entry = room_to_events.get(room_id, None) if room_entry: @@ -1364,6 +1361,54 @@ class SyncHandler(object): else: raise Exception("Unrecognized rtype: %r", room_builder.rtype) + @defer.inlineCallbacks + def get_rooms_for_user_at(self, user_id, stream_ordering): + """Get set of joined rooms for a user at the given stream ordering. + + The stream ordering *must* be recent, otherwise this may throw an + exception if older than a month. (This function is called with the + current token, which should be perfectly fine). + + Args: + user_id (str) + stream_ordering (int) + + ReturnValue: + Deferred[frozenset[str]]: Set of room_ids the user is in at given + stream_ordering. + """ + joined_rooms = yield self.store.get_rooms_for_user_with_stream_ordering( + user_id, + ) + + joined_room_ids = set() + + # We need to check that the stream ordering of the join for each room + # is before the stream_ordering asked for. This might not be the case + # if the user joins a room between us getting the current token and + # calling `get_rooms_for_user_with_stream_ordering`. + # If the membership's stream ordering is after the given stream + # ordering, we need to go and work out if the user was in the room + # before. + for room_id, membeship_stream_ordering in joined_rooms: + if membeship_stream_ordering <= stream_ordering: + joined_room_ids.add(room_id) + continue + + logger.info("SH joined_room_ids membership after current token") + + extrems = yield self.store.get_forward_extremeties_for_room( + room_id, stream_ordering, + ) + users_in_room = yield self.state.get_current_user_in_room( + room_id, extrems, + ) + if user_id in users_in_room: + joined_room_ids.add(room_id) + + joined_room_ids = frozenset(joined_room_ids) + defer.returnValue(joined_room_ids) + def _action_has_highlight(actions): for action in actions: @@ -1413,7 +1458,8 @@ def _calculate_state(timeline_contains, timeline_start, previous, current): class SyncResultBuilder(object): "Used to help build up a new SyncResult for a user" - def __init__(self, sync_config, full_state, since_token, now_token): + def __init__(self, sync_config, full_state, since_token, now_token, + joined_room_ids): """ Args: sync_config(SyncConfig) @@ -1425,6 +1471,7 @@ class SyncResultBuilder(object): self.full_state = full_state self.since_token = since_token self.now_token = now_token + self.joined_room_ids = joined_room_ids self.presence = [] self.account_data = [] diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 057b1be4d..826fad307 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -754,7 +754,7 @@ class EventsStore(EventsWorkerStore): for member in members_changed: self._invalidate_cache_and_stream( - txn, self.get_rooms_for_user, (member,) + txn, self.get_rooms_for_user_with_stream_ordering, (member,) ) for host in set(get_domain_from_id(u) for u in members_changed): diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index d79877dac..52e19e16b 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -38,6 +38,11 @@ RoomsForUser = namedtuple( ("room_id", "sender", "membership", "event_id", "stream_ordering") ) +GetRoomsForUserWithStreamOrdering = namedtuple( + "_GetRoomsForUserWithStreamOrdering", + ("room_id", "stream_ordering",) +) + # We store this using a namedtuple so that we save about 3x space over using a # dict. @@ -181,12 +186,32 @@ class RoomMemberWorkerStore(EventsWorkerStore): return results @cachedInlineCallbacks(max_entries=500000, iterable=True) - def get_rooms_for_user(self, user_id): + def get_rooms_for_user_with_stream_ordering(self, user_id): """Returns a set of room_ids the user is currently joined to + + Args: + user_id (str) + + Returns: + Deferred[frozenset[GetRoomsForUserWithStreamOrdering]]: Returns + the rooms the user is in currently, along with the stream ordering + of the most recent join for that user and room. """ rooms = yield self.get_rooms_for_user_where_membership_is( user_id, membership_list=[Membership.JOIN], ) + defer.returnValue(frozenset( + GetRoomsForUserWithStreamOrdering(r.room_id, r.stream_ordering) + for r in rooms + )) + + @defer.inlineCallbacks + def get_rooms_for_user(self, user_id, on_invalidate=None): + """Returns a set of room_ids the user is currently joined to + """ + rooms = yield self.get_rooms_for_user_with_stream_ordering( + user_id, on_invalidate=on_invalidate, + ) defer.returnValue(frozenset(r.room_id for r in rooms)) @cachedInlineCallbacks(max_entries=500000, cache_context=True, iterable=True) From 02a1296ad634ff8200abe539d27a53a6f850081d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 13:12:08 +0000 Subject: [PATCH 176/200] Fix typo --- synapse/handlers/sync.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 163d80417..b323e0e6b 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1390,8 +1390,8 @@ class SyncHandler(object): # If the membership's stream ordering is after the given stream # ordering, we need to go and work out if the user was in the room # before. - for room_id, membeship_stream_ordering in joined_rooms: - if membeship_stream_ordering <= stream_ordering: + for room_id, membership_stream_ordering in joined_rooms: + if membership_stream_ordering <= stream_ordering: joined_room_ids.add(room_id) continue From a56d54dcb7d3d42fc417d6af82a46d86edf6f73d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 13:29:49 +0000 Subject: [PATCH 177/200] Fix up log message --- synapse/handlers/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b323e0e6b..0f713ce03 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1395,7 +1395,7 @@ class SyncHandler(object): joined_room_ids.add(room_id) continue - logger.info("SH joined_room_ids membership after current token") + logger.info("User joined room after current token: %s", room_id) extrems = yield self.store.get_forward_extremeties_for_room( room_id, stream_ordering, From 20f40348d4ea55cc5b98528673e26bac7396a3cb Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Mar 2018 19:59:24 +0000 Subject: [PATCH 178/200] Factor run_in_background out from preserve_fn It annoys me that we create temporary function objects when there's really no need for it. Let's factor the gubbins out of preserve_fn and start using it. --- docs/log_contexts.rst | 8 +++--- synapse/util/logcontext.py | 51 +++++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/docs/log_contexts.rst b/docs/log_contexts.rst index b19b7fa1e..82ac4f91e 100644 --- a/docs/log_contexts.rst +++ b/docs/log_contexts.rst @@ -279,9 +279,9 @@ Obviously that option means that the operations done in that might be fixed by setting a different logcontext via a ``with LoggingContext(...)`` in ``background_operation``). -The second option is to use ``logcontext.preserve_fn``, which wraps a function -so that it doesn't reset the logcontext even when it returns an incomplete -deferred, and adds a callback to the returned deferred to reset the +The second option is to use ``logcontext.run_in_background``, which wraps a +function so that it doesn't reset the logcontext even when it returns an +incomplete deferred, and adds a callback to the returned deferred to reset the logcontext. In other words, it turns a function that follows the Synapse rules about logcontexts and Deferreds into one which behaves more like an external function — the opposite operation to that described in the previous section. @@ -293,7 +293,7 @@ It can be used like this: def do_request_handling(): yield foreground_operation() - logcontext.preserve_fn(background_operation)() + logcontext.run_in_background(background_operation) # this will now be logged against the request context logger.debug("Request handling complete") diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index a8dea15c1..d660ec785 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -292,36 +292,41 @@ class PreserveLoggingContext(object): def preserve_fn(f): - """Wraps a function, to ensure that the current context is restored after + """Function decorator which wraps the function with run_in_background""" + def g(*args, **kwargs): + return run_in_background(f, *args, **kwargs) + return g + + +def run_in_background(f, *args, **kwargs): + """Calls a function, ensuring that the current context is restored after return from the function, and that the sentinel context is set once the deferred returned by the funtion completes. Useful for wrapping functions that return a deferred which you don't yield on. """ - def g(*args, **kwargs): - current = LoggingContext.current_context() - res = f(*args, **kwargs) - if isinstance(res, defer.Deferred) and not res.called: - # The function will have reset the context before returning, so - # we need to restore it now. - LoggingContext.set_current_context(current) + current = LoggingContext.current_context() + res = f(*args, **kwargs) + if isinstance(res, defer.Deferred) and not res.called: + # The function will have reset the context before returning, so + # we need to restore it now. + LoggingContext.set_current_context(current) - # The original context will be restored when the deferred - # completes, but there is nothing waiting for it, so it will - # get leaked into the reactor or some other function which - # wasn't expecting it. We therefore need to reset the context - # here. - # - # (If this feels asymmetric, consider it this way: we are - # effectively forking a new thread of execution. We are - # probably currently within a ``with LoggingContext()`` block, - # which is supposed to have a single entry and exit point. But - # by spawning off another deferred, we are effectively - # adding a new exit point.) - res.addBoth(_set_context_cb, LoggingContext.sentinel) - return res - return g + # The original context will be restored when the deferred + # completes, but there is nothing waiting for it, so it will + # get leaked into the reactor or some other function which + # wasn't expecting it. We therefore need to reset the context + # here. + # + # (If this feels asymmetric, consider it this way: we are + # effectively forking a new thread of execution. We are + # probably currently within a ``with LoggingContext()`` block, + # which is supposed to have a single entry and exit point. But + # by spawning off another deferred, we are effectively + # adding a new exit point.) + res.addBoth(_set_context_cb, LoggingContext.sentinel) + return res def make_deferred_yieldable(deferred): From dbe80a286b85f9427763344c260921745c2ca78d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Mar 2018 16:17:27 +0000 Subject: [PATCH 179/200] refactor JsonResource rephrase the OPTIONS and unrecognised request handling so that they look similar to the common flow. --- synapse/http/server.py | 84 ++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 165c684d0..d774476e5 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -276,49 +277,54 @@ class JsonResource(HttpServer, resource.Resource): This checks if anyone has registered a callback for that method and path. """ + callback, group_dict = self._get_handler_for_request(request) + + servlet_instance = getattr(callback, "__self__", None) + if servlet_instance is not None: + servlet_classname = servlet_instance.__class__.__name__ + else: + servlet_classname = "%r" % callback + + request_metrics.name = servlet_classname + + # Now trigger the callback. If it returns a response, we send it + # here. If it throws an exception, that is handled by the wrapper + # installed by @request_handler. + + kwargs = intern_dict({ + name: urllib.unquote(value).decode("UTF-8") if value else value + for name, value in group_dict.items() + }) + + callback_return = yield callback(request, **kwargs) + if callback_return is not None: + code, response = callback_return + self._send_response(request, code, response) + + def _get_handler_for_request(self, request): + """Finds a callback method to handle the given request + + Args: + request (twisted.web.http.Request): + + Returns: + Tuple[Callable, dict[str, str]]: callback method, and the dict + mapping keys to path components as specified in the handler's + path match regexp + """ if request.method == "OPTIONS": - self._send_response(request, 200, {}) - return + return _options_handler, {} # Loop through all the registered callbacks to check if the method # and path regex match for path_entry in self.path_regexs.get(request.method, []): m = path_entry.pattern.match(request.path) - if not m: - continue - - # We found a match! First update the metrics object to indicate - # which servlet is handling the request. - - callback = path_entry.callback - - servlet_instance = getattr(callback, "__self__", None) - if servlet_instance is not None: - servlet_classname = servlet_instance.__class__.__name__ - else: - servlet_classname = "%r" % callback - - request_metrics.name = servlet_classname - - # Now trigger the callback. If it returns a response, we send it - # here. If it throws an exception, that is handled by the wrapper - # installed by @request_handler. - - kwargs = intern_dict({ - name: urllib.unquote(value).decode("UTF-8") if value else value - for name, value in m.groupdict().items() - }) - - callback_return = yield callback(request, **kwargs) - if callback_return is not None: - code, response = callback_return - self._send_response(request, code, response) - - return + if m: + # We found a match! + return path_entry.callback, m.groupdict() # Huh. No one wanted to handle that? Fiiiiiine. Send 400. - request_metrics.name = self.__class__.__name__ + ".UnrecognizedRequest" - raise UnrecognizedRequestError() + return _unrecognised_request_handler, {} def _send_response(self, request, code, response_json_object, response_code_message=None): @@ -335,6 +341,14 @@ class JsonResource(HttpServer, resource.Resource): ) +def _options_handler(request): + return {} + + +def _unrecognised_request_handler(request): + raise UnrecognizedRequestError() + + class RequestMetrics(object): def start(self, clock, name): self.start = clock.time_msec() From 88541f9009a7ca39c85cac7483d6a240ef497d33 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Mar 2018 16:19:18 +0000 Subject: [PATCH 180/200] Add a metric which increments when a request is received It's useful to know when there are peaks in incoming requests - which isn't quite the same as there being peaks in outgoing responses, due to the time taken to handle requests. --- synapse/http/server.py | 12 ++++++++++-- synapse/metrics/__init__.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index d774476e5..6c5d8bb55 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -60,6 +60,11 @@ response_count = metrics.register_counter( ) ) +requests_counter = metrics.register_counter( + "requests_received", + labels=["method", "servlet", ], +) + outgoing_responses_counter = metrics.register_counter( "responses", labels=["method", "code"], @@ -146,7 +151,8 @@ def wrap_request_handler(request_handler, include_metrics=False): # at the servlet name. For most requests that name will be # JsonResource (or a subclass), and JsonResource._async_render # will update it once it picks a servlet. - request_metrics.start(self.clock, name=self.__class__.__name__) + servlet_name = self.__class__.__name__ + request_metrics.start(self.clock, name=servlet_name) request_context.request = request_id with request.processing(): @@ -155,6 +161,7 @@ def wrap_request_handler(request_handler, include_metrics=False): if include_metrics: yield request_handler(self, request, request_metrics) else: + requests_counter.inc(request.method, servlet_name) yield request_handler(self, request) except CodeMessageException as e: code = e.code @@ -286,6 +293,7 @@ class JsonResource(HttpServer, resource.Resource): servlet_classname = "%r" % callback request_metrics.name = servlet_classname + requests_counter.inc(request.method, servlet_classname) # Now trigger the callback. If it returns a response, we send it # here. If it throws an exception, that is handled by the wrapper @@ -342,7 +350,7 @@ class JsonResource(HttpServer, resource.Resource): def _options_handler(request): - return {} + return 200, {} def _unrecognised_request_handler(request): diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index e0cfb7d08..50d99d7a5 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -57,15 +57,31 @@ class Metrics(object): return metric def register_counter(self, *args, **kwargs): + """ + Returns: + CounterMetric + """ return self._register(CounterMetric, *args, **kwargs) def register_callback(self, *args, **kwargs): + """ + Returns: + CallbackMetric + """ return self._register(CallbackMetric, *args, **kwargs) def register_distribution(self, *args, **kwargs): + """ + Returns: + DistributionMetric + """ return self._register(DistributionMetric, *args, **kwargs) def register_cache(self, *args, **kwargs): + """ + Returns: + CacheMetric + """ return self._register(CacheMetric, *args, **kwargs) From 58dd148c4f67e1cb6b150bf43a437b33089cfe5e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Mar 2018 18:05:41 +0000 Subject: [PATCH 181/200] Add some docstrings to help figure this out --- synapse/http/server.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 6c5d8bb55..4b567215c 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -237,7 +237,7 @@ class JsonResource(HttpServer, resource.Resource): """ This implements the HttpServer interface and provides JSON support for Resources. - Register callbacks via register_path() + Register callbacks via register_paths() Callbacks can return a tuple of status code and a dict in which case the the dict will automatically be sent to the client as a JSON object. @@ -318,7 +318,11 @@ class JsonResource(HttpServer, resource.Resource): Returns: Tuple[Callable, dict[str, str]]: callback method, and the dict mapping keys to path components as specified in the handler's - path match regexp + path match regexp. + + The callback will normally be a method registered via + register_paths, so will return (possibly via Deferred) either + None, or a tuple of (http code, response body). """ if request.method == "OPTIONS": return _options_handler, {} @@ -350,10 +354,30 @@ class JsonResource(HttpServer, resource.Resource): def _options_handler(request): + """Request handler for OPTIONS requests + + This is a request handler suitable for return from + _get_handler_for_request. It returns a 200 and an empty body. + + Args: + request (twisted.web.http.Request): + + Returns: + Tuple[int, dict]: http code, response body. + """ return 200, {} def _unrecognised_request_handler(request): + """Request handler for unrecognised requests + + This is a request handler suitable for return from + _get_handler_for_request. It actually just raises an + UnrecognizedRequestError. + + Args: + request (twisted.web.http.Request): + """ raise UnrecognizedRequestError() From 1708412f569dc28931a3704d679b41b92ac788b9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Mar 2018 17:32:46 +0000 Subject: [PATCH 182/200] Return an error when doing two purges on a room Queuing up purges doesn't sound like a good thing. --- synapse/handlers/message.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index dd00d8a86..6eb8d19dc 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -50,15 +50,26 @@ class MessageHandler(BaseHandler): self.clock = hs.get_clock() self.pagination_lock = ReadWriteLock() + self._purges_in_progress_by_room = set() @defer.inlineCallbacks def purge_history(self, room_id, topological_ordering, delete_local_events=False): - with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history( - room_id, topological_ordering, delete_local_events, + if room_id in self._purges_in_progress_by_room: + raise SynapseError( + 400, + "History purge already in progress for %s" % (room_id, ), ) + self._purges_in_progress_by_room.add(room_id) + try: + with (yield self.pagination_lock.write(room_id)): + yield self.store.purge_history( + room_id, topological_ordering, delete_local_events, + ) + finally: + self._purges_in_progress_by_room.discard(room_id) + @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, as_client_event=True, event_filter=None): From e48c7aac4d827b66182adf80ab9804f42db186c9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 8 Mar 2018 11:47:28 +0000 Subject: [PATCH 183/200] Add transactional API to history purge Make the purge request return quickly, and allow scripts to poll for updates. --- docs/admin_api/purge_history_api.rst | 27 +++++++ synapse/handlers/message.py | 104 +++++++++++++++++++++++++-- synapse/rest/client/v1/admin.py | 38 +++++++++- 3 files changed, 161 insertions(+), 8 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index acf1bc574..ea2922da5 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -32,3 +32,30 @@ specified by including an event_id in the URI, or by setting a id is given, that event (and others at the same graph depth) will be retained. If ``purge_up_to_ts`` is given, it should be a timestamp since the unix epoch, in milliseconds. + +The API starts the purge running, and returns immediately with a JSON body with +a purge id: + +.. code:: json + + { + "purge_id": "" + } + +Purge status query +------------------ + +It is possible to poll for updates on recent purges with a second API; + +``GET /_matrix/client/r0/admin/purge_history_status/`` + +(again, with a suitable ``access_token``). This API returns a JSON body like +the following: + +.. code:: json + + { + "status": "active" + } + +The status will be one of ``active``, ``complete``, or ``failed``. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 6eb8d19dc..42aab91c5 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer +from twisted.internet import defer, reactor +from twisted.python.failure import Failure from synapse.api.constants import EventTypes, Membership from synapse.api.errors import AuthError, Codes, SynapseError @@ -24,9 +25,10 @@ from synapse.types import ( UserID, RoomAlias, RoomStreamToken, ) from synapse.util.async import run_on_reactor, ReadWriteLock, Limiter -from synapse.util.logcontext import preserve_fn +from synapse.util.logcontext import preserve_fn, run_in_background from synapse.util.metrics import measure_func from synapse.util.frozenutils import unfreeze +from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client from synapse.replication.http.send_event import send_event_to_master @@ -41,6 +43,36 @@ import ujson logger = logging.getLogger(__name__) +class PurgeStatus(object): + """Object tracking the status of a purge request + + This class contains information on the progress of a purge request, for + return by get_purge_status. + + Attributes: + status (int): Tracks whether this request has completed. One of + STATUS_{ACTIVE,COMPLETE,FAILED} + """ + + STATUS_ACTIVE = 0 + STATUS_COMPLETE = 1 + STATUS_FAILED = 2 + + STATUS_TEXT = { + STATUS_ACTIVE: "active", + STATUS_COMPLETE: "complete", + STATUS_FAILED: "failed", + } + + def __init__(self): + self.status = PurgeStatus.STATUS_ACTIVE + + def asdict(self): + return { + "status": PurgeStatus.STATUS_TEXT[self.status] + } + + class MessageHandler(BaseHandler): def __init__(self, hs): @@ -51,25 +83,87 @@ class MessageHandler(BaseHandler): self.pagination_lock = ReadWriteLock() self._purges_in_progress_by_room = set() + # map from purge id to PurgeStatus + self._purges_by_id = {} - @defer.inlineCallbacks - def purge_history(self, room_id, topological_ordering, - delete_local_events=False): + def start_purge_history(self, room_id, topological_ordering, + delete_local_events=False): + """Start off a history purge on a room. + + Args: + room_id (str): The room to purge from + + topological_ordering (int): minimum topo ordering to preserve + delete_local_events (bool): True to delete local events as well as + remote ones + + Returns: + str: unique ID for this purge transaction. + """ if room_id in self._purges_in_progress_by_room: raise SynapseError( 400, "History purge already in progress for %s" % (room_id, ), ) + purge_id = random_string(16) + + # we log the purge_id here so that it can be tied back to the + # request id in the log lines. + logger.info("[purge] starting purge_id %s", purge_id) + + self._purges_by_id[purge_id] = PurgeStatus() + run_in_background( + self._purge_history, + purge_id, room_id, topological_ordering, delete_local_events, + ) + return purge_id + + @defer.inlineCallbacks + def _purge_history(self, purge_id, room_id, topological_ordering, + delete_local_events): + """Carry out a history purge on a room. + + Args: + purge_id (str): The id for this purge + room_id (str): The room to purge from + topological_ordering (int): minimum topo ordering to preserve + delete_local_events (bool): True to delete local events as well as + remote ones + + Returns: + Deferred + """ self._purges_in_progress_by_room.add(room_id) try: with (yield self.pagination_lock.write(room_id)): yield self.store.purge_history( room_id, topological_ordering, delete_local_events, ) + logger.info("[purge] complete") + self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE + except Exception: + logger.error("[purge] failed: %s", Failure().getTraceback().rstrip()) + self._purges_by_id[purge_id].status = PurgeStatus.STATUS_FAILED finally: self._purges_in_progress_by_room.discard(room_id) + # remove the purge from the list 24 hours after it completes + def clear_purge(): + del self._purges_by_id[purge_id] + reactor.callLater(24 * 3600, clear_purge) + + def get_purge_status(self, purge_id): + """Get the current status of an active purge + + Args: + purge_id (str): purge_id returned by start_purge_history + + Returns: + PurgeStatus|None + """ + return self._purges_by_id.get(purge_id) + @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, as_client_event=True, event_filter=None): diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index dcf6215da..303419d28 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -17,7 +17,7 @@ from twisted.internet import defer from synapse.api.constants import Membership -from synapse.api.errors import AuthError, SynapseError, Codes +from synapse.api.errors import AuthError, SynapseError, Codes, NotFoundError from synapse.types import UserID, create_requester from synapse.http.servlet import parse_json_object_from_request @@ -185,12 +185,43 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): errcode=Codes.BAD_JSON, ) - yield self.handlers.message_handler.purge_history( + purge_id = yield self.handlers.message_handler.start_purge_history( room_id, depth, delete_local_events=delete_local_events, ) - defer.returnValue((200, {})) + defer.returnValue((200, { + "purge_id": purge_id, + })) + + +class PurgeHistoryStatusRestServlet(ClientV1RestServlet): + PATTERNS = client_path_patterns( + "/admin/purge_history_status/(?P[^/]+)" + ) + + def __init__(self, hs): + """ + + Args: + hs (synapse.server.HomeServer) + """ + super(PurgeHistoryStatusRestServlet, self).__init__(hs) + self.handlers = hs.get_handlers() + + @defer.inlineCallbacks + def on_GET(self, request, purge_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + + if not is_admin: + raise AuthError(403, "You are not a server admin") + + purge_status = self.handlers.message_handler.get_purge_status(purge_id) + if purge_status is None: + raise NotFoundError("purge id '%s' not found" % purge_id) + + defer.returnValue((200, purge_status.asdict())) class DeactivateAccountRestServlet(ClientV1RestServlet): @@ -561,6 +592,7 @@ class SearchUsersRestServlet(ClientV1RestServlet): def register_servlets(hs, http_server): WhoisRestServlet(hs).register(http_server) PurgeMediaCacheRestServlet(hs).register(http_server) + PurgeHistoryStatusRestServlet(hs).register(http_server) DeactivateAccountRestServlet(hs).register(http_server) PurgeHistoryRestServlet(hs).register(http_server) UsersRestServlet(hs).register(http_server) From 889a2a853a83057d4f218bb828bf686db786d590 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 13 Mar 2018 09:57:54 +0000 Subject: [PATCH 184/200] Add Measure block for persist_events This seems like a useful thing to measure. --- synapse/storage/events.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 826fad307..389087817 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -283,10 +283,11 @@ class EventsStore(EventsWorkerStore): def _maybe_start_persisting(self, room_id): @defer.inlineCallbacks def persisting_queue(item): - yield self._persist_events( - item.events_and_contexts, - backfilled=item.backfilled, - ) + with Measure(self._clock, "persist_events"): + yield self._persist_events( + item.events_and_contexts, + backfilled=item.backfilled, + ) self._event_persist_queue.handle_queue(room_id, persisting_queue) From c3f79c9da56931453ab86a4c726da5a02f18fe1e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Mar 2018 16:17:08 +0000 Subject: [PATCH 185/200] Split out edu/query registration to a separate class --- synapse/federation/federation_server.py | 117 ++++++++++++++---------- synapse/handlers/device.py | 6 +- synapse/handlers/devicemessage.py | 2 +- synapse/handlers/directory.py | 2 +- synapse/handlers/e2e_keys.py | 2 +- synapse/handlers/presence.py | 10 +- synapse/handlers/profile.py | 2 +- synapse/handlers/receipts.py | 2 +- synapse/handlers/typing.py | 2 +- synapse/server.py | 5 + 10 files changed, 90 insertions(+), 60 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 9849953c9..5b1914f2f 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -17,7 +17,7 @@ import logging import simplejson as json from twisted.internet import defer -from synapse.api.errors import AuthError, FederationError, SynapseError +from synapse.api.errors import AuthError, FederationError, SynapseError, NotFoundError from synapse.crypto.event_signing import compute_event_signature from synapse.federation.federation_base import ( FederationBase, @@ -56,6 +56,8 @@ class FederationServer(FederationBase): self._server_linearizer = async.Linearizer("fed_server") self._transaction_linearizer = async.Linearizer("fed_txn_handler") + self.registry = hs.get_federation_registry() + # We cache responses to state queries, as they take a while and often # come in waves. self._state_resp_cache = ResponseCache(hs, timeout_ms=30000) @@ -67,35 +69,6 @@ class FederationServer(FederationBase): """ self.handler = handler - def register_edu_handler(self, edu_type, handler): - if edu_type in self.edu_handlers: - raise KeyError("Already have an EDU handler for %s" % (edu_type,)) - - self.edu_handlers[edu_type] = handler - - def register_query_handler(self, query_type, handler): - """Sets the handler callable that will be used to handle an incoming - federation Query of the given type. - - Args: - query_type (str): Category name of the query, which should match - the string used by make_query. - handler (callable): Invoked to handle incoming queries of this type - - handler is invoked as: - result = handler(args) - - where 'args' is a dict mapping strings to strings of the query - arguments. It should return a Deferred that will eventually yield an - object to encode as JSON. - """ - if query_type in self.query_handlers: - raise KeyError( - "Already have a Query handler for %s" % (query_type,) - ) - - self.query_handlers[query_type] = handler - @defer.inlineCallbacks @log_function def on_backfill_request(self, origin, room_id, versions, limit): @@ -229,16 +202,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def received_edu(self, origin, edu_type, content): received_edus_counter.inc() - - if edu_type in self.edu_handlers: - try: - yield self.edu_handlers[edu_type](origin, content) - except SynapseError as e: - logger.info("Failed to handle edu %r: %r", edu_type, e) - except Exception as e: - logger.exception("Failed to handle edu %r", edu_type) - else: - logger.warn("Received EDU of type %s with no handler", edu_type) + yield self.registry.on_edu(edu_type, origin, content) @defer.inlineCallbacks @log_function @@ -328,14 +292,8 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_query_request(self, query_type, args): received_queries_counter.inc(query_type) - - if query_type in self.query_handlers: - response = yield self.query_handlers[query_type](args) - defer.returnValue((200, response)) - else: - defer.returnValue( - (404, "No handler for Query type '%s'" % (query_type,)) - ) + resp = yield self.registry.on_query(query_type, args) + defer.returnValue((200, resp)) @defer.inlineCallbacks def on_make_join_request(self, room_id, user_id): @@ -607,3 +565,66 @@ class FederationServer(FederationBase): origin, room_id, event_dict ) defer.returnValue(ret) + + +class FederationHandlerRegistry(object): + """Allows classes to register themselves as handlers for a given EDU or + query type for incoming federation traffic. + """ + def __init__(self): + self.edu_handlers = {} + self.query_handlers = {} + + def register_edu_handler(self, edu_type, handler): + """Sets the handler callable that will be used to handle an incoming + federation EDU of the given type. + + Args: + edu_type (str): The type of the incoming EDU to register handler for + handler (Callable[str, dict]): A callable invoked on incoming EDU + of the given type. The arguments are the origin server name and + the EDU contents. + """ + if edu_type in self.edu_handlers: + raise KeyError("Already have an EDU handler for %s" % (edu_type,)) + + self.edu_handlers[edu_type] = handler + + def register_query_handler(self, query_type, handler): + """Sets the handler callable that will be used to handle an incoming + federation query of the given type. + + Args: + query_type (str): Category name of the query, which should match + the string used by make_query. + handler (Callable[dict] -> Deferred[dict]): Invoked to handle + incoming queries of this type. The return will be yielded + on and the result used as the response to the query request. + """ + if query_type in self.query_handlers: + raise KeyError( + "Already have a Query handler for %s" % (query_type,) + ) + + self.query_handlers[query_type] = handler + + @defer.inlineCallbacks + def on_edu(self, edu_type, origin, content): + handler = self.edu_handlers.get(edu_type) + if not handler: + logger.warn("No handler registered for EDU type %s", edu_type) + + try: + yield handler(origin, content) + except SynapseError as e: + logger.info("Failed to handle edu %r: %r", edu_type, e) + except Exception as e: + logger.exception("Failed to handle edu %r", edu_type) + + def on_query(self, query_type, args): + handler = self.query_handlers.get(query_type) + if not handler: + logger.warn("No handler registered for query type %s", query_type) + raise NotFoundError("No handler for Query type '%s'" % (query_type,)) + + return handler(args) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 0e8345385..9e58dbe64 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -41,10 +41,12 @@ class DeviceHandler(BaseHandler): self._edu_updater = DeviceListEduUpdater(hs, self) - self.federation.register_edu_handler( + federation_registry = hs.get_federation_registry() + + federation_registry.register_edu_handler( "m.device_list_update", self._edu_updater.incoming_device_list_update, ) - self.federation.register_query_handler( + federation_registry.register_query_handler( "user_devices", self.on_federation_query_user_devices, ) diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index d996aa90b..f147a20b7 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -37,7 +37,7 @@ class DeviceMessageHandler(object): self.is_mine = hs.is_mine self.federation = hs.get_federation_sender() - hs.get_replication_layer().register_edu_handler( + hs.get_federation_registry().register_edu_handler( "m.direct_to_device", self.on_direct_to_device_edu ) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 8580ada60..e955cb1f3 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -37,7 +37,7 @@ class DirectoryHandler(BaseHandler): self.event_creation_handler = hs.get_event_creation_handler() self.federation = hs.get_replication_layer() - self.federation.register_query_handler( + hs.get_federation_registry().register_query_handler( "directory", self.on_directory_query ) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 9aa95f89e..57f50a4e2 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -40,7 +40,7 @@ class E2eKeysHandler(object): # doesn't really work as part of the generic query API, because the # query request requires an object POST, but we abuse the # "query handler" interface. - self.federation.register_query_handler( + hs.get_federation_registry().register_query_handler( "client_keys", self.on_federation_query_client_keys ) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index cb158ba96..b11ae7835 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -98,24 +98,26 @@ class PresenceHandler(object): self.state = hs.get_state_handler() - self.replication.register_edu_handler( + federation_registry = hs.get_federation_registry() + + federation_registry.register_edu_handler( "m.presence", self.incoming_presence ) - self.replication.register_edu_handler( + federation_registry.register_edu_handler( "m.presence_invite", lambda origin, content: self.invite_presence( observed_user=UserID.from_string(content["observed_user"]), observer_user=UserID.from_string(content["observer_user"]), ) ) - self.replication.register_edu_handler( + federation_registry.register_edu_handler( "m.presence_accept", lambda origin, content: self.accept_presence( observed_user=UserID.from_string(content["observed_user"]), observer_user=UserID.from_string(content["observer_user"]), ) ) - self.replication.register_edu_handler( + federation_registry.register_edu_handler( "m.presence_deny", lambda origin, content: self.deny_presence( observed_user=UserID.from_string(content["observed_user"]), diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index c9c287903..c386c79bb 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -32,7 +32,7 @@ class ProfileHandler(BaseHandler): super(ProfileHandler, self).__init__(hs) self.federation = hs.get_replication_layer() - self.federation.register_query_handler( + hs.get_federation_registry().register_query_handler( "profile", self.on_profile_query ) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 052576527..3f215c2b4 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -35,7 +35,7 @@ class ReceiptsHandler(BaseHandler): self.store = hs.get_datastore() self.hs = hs self.federation = hs.get_federation_sender() - hs.get_replication_layer().register_edu_handler( + hs.get_federation_registry().register_edu_handler( "m.receipt", self._received_remote_receipt ) self.clock = self.hs.get_clock() diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 82dedbbc9..77c0cf146 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -56,7 +56,7 @@ class TypingHandler(object): self.federation = hs.get_federation_sender() - hs.get_replication_layer().register_edu_handler("m.typing", self._recv_edu) + hs.get_federation_registry().register_edu_handler("m.typing", self._recv_edu) hs.get_distributor().observe("user_left_room", self.user_left_room) diff --git a/synapse/server.py b/synapse/server.py index 5b6effbe3..1bc8d6f70 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -34,6 +34,7 @@ from synapse.events.builder import EventBuilderFactory from synapse.events.spamcheck import SpamChecker from synapse.federation import initialize_http_replication from synapse.federation.send_queue import FederationRemoteSendQueue +from synapse.federation.federation_server import FederationHandlerRegistry from synapse.federation.transport.client import TransportLayerClient from synapse.federation.transaction_queue import TransactionQueue from synapse.handlers import Handlers @@ -147,6 +148,7 @@ class HomeServer(object): 'groups_attestation_renewer', 'spam_checker', 'room_member_handler', + 'federation_registry', ] def __init__(self, hostname, **kwargs): @@ -387,6 +389,9 @@ class HomeServer(object): def build_room_member_handler(self): return RoomMemberHandler(self) + def build_federation_registry(self): + return FederationHandlerRegistry() + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) From 631a73f7ef7d89c43be47cf01c7c27a1d633052d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 10:39:19 +0000 Subject: [PATCH 186/200] Fix tests --- tests/handlers/test_directory.py | 9 ++++----- tests/handlers/test_profile.py | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index 571277390..b10392149 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -35,21 +35,20 @@ class DirectoryTestCase(unittest.TestCase): @defer.inlineCallbacks def setUp(self): - self.mock_federation = Mock(spec=[ - "make_query", - "register_edu_handler", - ]) + self.mock_federation = Mock() + self.mock_registry = Mock() self.query_handlers = {} def register_query_handler(query_type, handler): self.query_handlers[query_type] = handler - self.mock_federation.register_query_handler = register_query_handler + self.mock_registry.register_query_handler = register_query_handler hs = yield setup_test_homeserver( http_client=None, resource_for_federation=Mock(), replication_layer=self.mock_federation, + federation_registry=self.mock_registry, ) hs.handlers = DirectoryHandlers(hs) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index a5f47181d..73223ffbd 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -37,23 +37,22 @@ class ProfileTestCase(unittest.TestCase): @defer.inlineCallbacks def setUp(self): - self.mock_federation = Mock(spec=[ - "make_query", - "register_edu_handler", - ]) + self.mock_federation = Mock() + self.mock_registry = Mock() self.query_handlers = {} def register_query_handler(query_type, handler): self.query_handlers[query_type] = handler - self.mock_federation.register_query_handler = register_query_handler + self.mock_registry.register_query_handler = register_query_handler hs = yield setup_test_homeserver( http_client=None, handlers=None, resource_for_federation=Mock(), replication_layer=self.mock_federation, + federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ "send_message", ]) From e05bf34117de19705b36a4803085ea93f7381928 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Mar 2018 14:07:39 +0000 Subject: [PATCH 187/200] Move property setting from ReplicationLayer to FederationBase --- synapse/federation/federation_base.py | 6 ++++++ synapse/federation/federation_client.py | 1 + synapse/federation/federation_server.py | 6 ++++++ synapse/federation/replication.py | 22 ---------------------- 4 files changed, 13 insertions(+), 22 deletions(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 7918d3e44..79eaa3103 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -27,7 +27,13 @@ logger = logging.getLogger(__name__) class FederationBase(object): def __init__(self, hs): + self.hs = hs + + self.server_name = hs.hostname + self.keyring = hs.get_keyring() self.spam_checker = hs.get_spam_checker() + self.store = hs.get_datastore() + self._clock = hs.get_clock() @defer.inlineCallbacks def _check_sigs_and_hash_and_fetch(self, origin, pdus, outlier=False, diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 813907f7f..38440da5b 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -58,6 +58,7 @@ class FederationClient(FederationBase): self._clear_tried_cache, 60 * 1000, ) self.state = hs.get_state_handler() + self.transport_layer = hs.get_federation_transport_client() def _clear_tried_cache(self): """Clear pdu_destination_tried cache""" diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 5b1914f2f..dd73fc50b 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -23,6 +23,8 @@ from synapse.federation.federation_base import ( FederationBase, event_from_pdu_json, ) + +from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction import synapse.metrics from synapse.types import get_domain_from_id @@ -56,6 +58,10 @@ class FederationServer(FederationBase): self._server_linearizer = async.Linearizer("fed_server") self._transaction_linearizer = async.Linearizer("fed_txn_handler") + self.transaction_actions = TransactionActions(self.store) + + self.handler = None + self.registry = hs.get_federation_registry() # We cache responses to state queries, as they take a while and often diff --git a/synapse/federation/replication.py b/synapse/federation/replication.py index 62d865ec4..b8b3a3f93 100644 --- a/synapse/federation/replication.py +++ b/synapse/federation/replication.py @@ -20,8 +20,6 @@ a given transport. from .federation_client import FederationClient from .federation_server import FederationServer -from .persistence import TransactionActions - import logging @@ -47,26 +45,6 @@ class ReplicationLayer(FederationClient, FederationServer): """ def __init__(self, hs, transport_layer): - self.server_name = hs.hostname - - self.keyring = hs.get_keyring() - - self.transport_layer = transport_layer - - self.federation_client = self - - self.store = hs.get_datastore() - - self.handler = None - self.edu_handlers = {} - self.query_handlers = {} - - self._clock = hs.get_clock() - - self.transaction_actions = TransactionActions(self.store) - - self.hs = hs - super(ReplicationLayer, self).__init__(hs) def __str__(self): From 265b993b8afd2501b2aa3a50670f39d6d97eddb7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Mar 2018 14:34:31 +0000 Subject: [PATCH 188/200] Split replication layer into two --- synapse/app/homeserver.py | 2 +- synapse/federation/federation_server.py | 10 +--------- synapse/federation/transport/server.py | 2 +- synapse/handlers/device.py | 3 +-- synapse/handlers/directory.py | 2 +- synapse/handlers/e2e_keys.py | 2 +- synapse/handlers/federation.py | 4 +--- synapse/handlers/presence.py | 1 - synapse/handlers/profile.py | 2 +- synapse/handlers/room_list.py | 2 +- synapse/handlers/room_member.py | 3 +-- synapse/server.py | 13 +++++++++---- 12 files changed, 19 insertions(+), 27 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e375f2bbc..503f461ab 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -348,7 +348,7 @@ def setup(config_options): hs.get_state_handler().start_caching() hs.get_datastore().start_profiling() hs.get_datastore().start_doing_background_updates() - hs.get_replication_layer().start_get_pdu_cache() + hs.get_replication_client().start_get_pdu_cache() register_memory_metrics(hs) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index dd73fc50b..740ef9628 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -54,27 +54,19 @@ class FederationServer(FederationBase): super(FederationServer, self).__init__(hs) self.auth = hs.get_auth() + self.handler = hs.get_handlers().federation_handler self._server_linearizer = async.Linearizer("fed_server") self._transaction_linearizer = async.Linearizer("fed_txn_handler") self.transaction_actions = TransactionActions(self.store) - self.handler = None - self.registry = hs.get_federation_registry() # We cache responses to state queries, as they take a while and often # come in waves. self._state_resp_cache = ResponseCache(hs, timeout_ms=30000) - def set_handler(self, handler): - """Sets the handler that the replication layer will use to communicate - receipt of new PDUs from other home servers. The required methods are - documented on :py:class:`.ReplicationHandler`. - """ - self.handler = handler - @defer.inlineCallbacks @log_function def on_backfill_request(self, origin, room_id, versions, limit): diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 06c16ba4f..04b83e691 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -1190,7 +1190,7 @@ GROUP_ATTESTATION_SERVLET_CLASSES = ( def register_servlets(hs, resource, authenticator, ratelimiter): for servletclass in FEDERATION_SERVLET_CLASSES: servletclass( - handler=hs.get_replication_layer(), + handler=hs.get_replication_server(), authenticator=authenticator, ratelimiter=ratelimiter, server_name=hs.hostname, diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 9e58dbe64..fcf41630d 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -37,7 +37,6 @@ class DeviceHandler(BaseHandler): self.state = hs.get_state_handler() self._auth_handler = hs.get_auth_handler() self.federation_sender = hs.get_federation_sender() - self.federation = hs.get_replication_layer() self._edu_updater = DeviceListEduUpdater(hs, self) @@ -432,7 +431,7 @@ class DeviceListEduUpdater(object): def __init__(self, hs, device_handler): self.store = hs.get_datastore() - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() self.clock = hs.get_clock() self.device_handler = device_handler diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index e955cb1f3..dfe04eb1c 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -36,7 +36,7 @@ class DirectoryHandler(BaseHandler): self.appservice_handler = hs.get_application_service_handler() self.event_creation_handler = hs.get_event_creation_handler() - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() hs.get_federation_registry().register_query_handler( "directory", self.on_directory_query ) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 57f50a4e2..0ca8d036e 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) class E2eKeysHandler(object): def __init__(self, hs): self.store = hs.get_datastore() - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() self.device_handler = hs.get_device_handler() self.is_mine = hs.is_mine self.clock = hs.get_clock() diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 520612683..cfd437916 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -68,7 +68,7 @@ class FederationHandler(BaseHandler): self.hs = hs self.store = hs.get_datastore() - self.replication_layer = hs.get_replication_layer() + self.replication_layer = hs.get_replication_client() self.state_handler = hs.get_state_handler() self.server_name = hs.hostname self.keyring = hs.get_keyring() @@ -78,8 +78,6 @@ class FederationHandler(BaseHandler): self.spam_checker = hs.get_spam_checker() self.event_creation_handler = hs.get_event_creation_handler() - self.replication_layer.set_handler(self) - # When joining a room we need to queue any events for that room up self.room_queues = {} self._room_pdu_linearizer = Linearizer("fed_room_pdu") diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index b11ae7835..a5e501897 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -93,7 +93,6 @@ class PresenceHandler(object): self.store = hs.get_datastore() self.wheel_timer = WheelTimer() self.notifier = hs.get_notifier() - self.replication = hs.get_replication_layer() self.federation = hs.get_federation_sender() self.state = hs.get_state_handler() diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index c386c79bb..0cfac60d7 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -31,7 +31,7 @@ class ProfileHandler(BaseHandler): def __init__(self, hs): super(ProfileHandler, self).__init__(hs) - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() hs.get_federation_registry().register_query_handler( "profile", self.on_profile_query ) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index dfa09141e..f79bd8902 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -409,7 +409,7 @@ class RoomListHandler(BaseHandler): def _get_remote_list_cached(self, server_name, limit=None, since_token=None, search_filter=None, include_all_networks=False, third_party_instance_id=None,): - repl_layer = self.hs.get_replication_layer() + repl_layer = self.hs.get_replication_client() if search_filter: # We can't cache when asking for search return repl_layer.get_public_rooms( diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730..e2f052771 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -55,7 +55,6 @@ class RoomMemberHandler(object): self.registration_handler = hs.get_handlers().registration_handler self.profile_handler = hs.get_profile_handler() self.event_creation_hander = hs.get_event_creation_handler() - self.replication_layer = hs.get_replication_layer() self.member_linearizer = Linearizer(name="member") @@ -212,7 +211,7 @@ class RoomMemberHandler(object): # if this is a join with a 3pid signature, we may need to turn a 3pid # invite into a normal invite before we can handle the join. if third_party_signed is not None: - yield self.replication_layer.exchange_third_party_invite( + yield self.federation_handler.exchange_third_party_invite( third_party_signed["sender"], target.to_string(), room_id, diff --git a/synapse/server.py b/synapse/server.py index 1bc8d6f70..894e9c2ac 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -32,7 +32,8 @@ from synapse.appservice.scheduler import ApplicationServiceScheduler from synapse.crypto.keyring import Keyring from synapse.events.builder import EventBuilderFactory from synapse.events.spamcheck import SpamChecker -from synapse.federation import initialize_http_replication +from synapse.federation.federation_client import FederationClient +from synapse.federation.federation_server import FederationServer from synapse.federation.send_queue import FederationRemoteSendQueue from synapse.federation.federation_server import FederationHandlerRegistry from synapse.federation.transport.client import TransportLayerClient @@ -100,7 +101,8 @@ class HomeServer(object): DEPENDENCIES = [ 'http_client', 'db_pool', - 'replication_layer', + 'replication_client', + 'replication_server', 'handlers', 'v1auth', 'auth', @@ -197,8 +199,11 @@ class HomeServer(object): def get_ratelimiter(self): return self.ratelimiter - def build_replication_layer(self): - return initialize_http_replication(self) + def build_replication_client(self): + return FederationClient(self) + + def build_replication_server(self): + return FederationServer(self) def build_handlers(self): return Handlers(self) From 6ea27fafad7c290b8f082fedfa8ff7948cf9f1fd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 10:15:49 +0000 Subject: [PATCH 189/200] Fix tests --- tests/handlers/test_directory.py | 2 +- tests/handlers/test_e2e_keys.py | 2 +- tests/handlers/test_profile.py | 3 ++- tests/handlers/test_typing.py | 2 +- tests/replication/slave/storage/_base.py | 2 +- tests/rest/client/v1/test_events.py | 2 +- tests/rest/client/v1/test_profile.py | 2 +- tests/rest/client/v1/test_rooms.py | 16 ++++++++-------- tests/rest/client/v1/test_typing.py | 2 +- tests/storage/test_appservice.py | 10 +++++----- 10 files changed, 22 insertions(+), 21 deletions(-) diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index b10392149..b4f36b27a 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -47,7 +47,7 @@ class DirectoryTestCase(unittest.TestCase): hs = yield setup_test_homeserver( http_client=None, resource_for_federation=Mock(), - replication_layer=self.mock_federation, + replication_client=self.mock_federation, federation_registry=self.mock_registry, ) hs.handlers = DirectoryHandlers(hs) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index d92bf240b..fe73f2b96 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -34,7 +34,7 @@ class E2eKeysHandlerTestCase(unittest.TestCase): def setUp(self): self.hs = yield utils.setup_test_homeserver( handlers=None, - replication_layer=mock.Mock(), + replication_client=mock.Mock(), ) self.handler = synapse.handlers.e2e_keys.E2eKeysHandler(self.hs) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 73223ffbd..c69043768 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -51,7 +51,8 @@ class ProfileTestCase(unittest.TestCase): http_client=None, handlers=None, resource_for_federation=Mock(), - replication_layer=self.mock_federation, + replication_client=self.mock_federation, + replication_server=Mock(), federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ "send_message", diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index fcd380b03..a433bbfa8 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -81,7 +81,7 @@ class TypingNotificationsTestCase(unittest.TestCase): "get_current_state_deltas", ]), state_handler=self.state_handler, - handlers=None, + handlers=Mock(), notifier=mock_notifier, resource_for_client=Mock(), resource_for_federation=self.mock_federation_resource, diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 74f104e3b..ceffdaad5 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -31,7 +31,7 @@ class BaseSlavedStoreTestCase(unittest.TestCase): self.hs = yield setup_test_homeserver( "blue", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index e9698bfdc..f04bf7dfd 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -114,7 +114,7 @@ class EventStreamPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_profile.py b/tests/rest/client/v1/test_profile.py index dddcf51b6..feddcf024 100644 --- a/tests/rest/client/v1/test_profile.py +++ b/tests/rest/client/v1/test_profile.py @@ -45,7 +45,7 @@ class ProfileTestCase(unittest.TestCase): http_client=None, resource_for_client=self.mock_resource, federation=Mock(), - replication_layer=Mock(), + replication_client=Mock(), profile_handler=self.mock_handler ) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 9f3725538..2c0708b0d 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -46,7 +46,7 @@ class RoomPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -409,7 +409,7 @@ class RoomsMemberListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -493,7 +493,7 @@ class RoomsCreateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -582,7 +582,7 @@ class RoomTopicTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -697,7 +697,7 @@ class RoomMemberStateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -829,7 +829,7 @@ class RoomMessagesTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -929,7 +929,7 @@ class RoomInitialSyncTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), @@ -1003,7 +1003,7 @@ class RoomMessageListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index e46534cd3..62639e3ad 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -47,7 +47,7 @@ class RoomTypingTestCase(RestTestCase): "red", clock=self.clock, http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index 13d81f972..cc0df7f66 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -42,7 +42,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) self.as_token = "token1" @@ -119,7 +119,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) self.db_pool = hs.get_db_pool() @@ -455,7 +455,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) ApplicationServiceStore(None, hs) @@ -473,7 +473,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) with self.assertRaises(ConfigError) as cm: @@ -497,7 +497,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) with self.assertRaises(ConfigError) as cm: From ea7b3c4b1b829703a780418e6bb6b7860a5b5451 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 11:00:04 +0000 Subject: [PATCH 190/200] Remove unused ReplicationLayer --- synapse/federation/__init__.py | 8 ----- synapse/federation/replication.py | 51 ------------------------------- 2 files changed, 59 deletions(-) delete mode 100644 synapse/federation/replication.py diff --git a/synapse/federation/__init__.py b/synapse/federation/__init__.py index 2e32d245b..f5f0bdfca 100644 --- a/synapse/federation/__init__.py +++ b/synapse/federation/__init__.py @@ -15,11 +15,3 @@ """ This package includes all the federation specific logic. """ - -from .replication import ReplicationLayer - - -def initialize_http_replication(hs): - transport = hs.get_federation_transport_client() - - return ReplicationLayer(hs, transport) diff --git a/synapse/federation/replication.py b/synapse/federation/replication.py deleted file mode 100644 index b8b3a3f93..000000000 --- a/synapse/federation/replication.py +++ /dev/null @@ -1,51 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This layer is responsible for replicating with remote home servers using -a given transport. -""" - -from .federation_client import FederationClient -from .federation_server import FederationServer - -import logging - - -logger = logging.getLogger(__name__) - - -class ReplicationLayer(FederationClient, FederationServer): - """This layer is responsible for replicating with remote home servers over - the given transport. I.e., does the sending and receiving of PDUs to - remote home servers. - - The layer communicates with the rest of the server via a registered - ReplicationHandler. - - In more detail, the layer: - * Receives incoming data and processes it into transactions and pdus. - * Fetches any PDUs it thinks it might have missed. - * Keeps the current state for contexts up to date by applying the - suitable conflict resolution. - * Sends outgoing pdus wrapped in transactions. - * Fills out the references to previous pdus/transactions appropriately - for outgoing data. - """ - - def __init__(self, hs, transport_layer): - super(ReplicationLayer, self).__init__(hs) - - def __str__(self): - return "" % self.server_name From d023ecb8109718cd95c4dd86e916a459fc610547 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 11:08:10 +0000 Subject: [PATCH 191/200] Don't build handlers on workers unnecessarily --- synapse/app/client_reader.py | 1 - synapse/app/event_creator.py | 1 - synapse/app/federation_reader.py | 1 - synapse/app/frontend_proxy.py | 1 - synapse/app/media_repository.py | 1 - 5 files changed, 5 deletions(-) diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 3b3352798..0a8ce9bc6 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -156,7 +156,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index fc0b9e8c0..eb593c527 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -161,7 +161,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 4de43c41f..20d157911 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -144,7 +144,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index e32ee8fe9..816c080d1 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -211,7 +211,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index 1ed1ca877..84c5791b3 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -158,7 +158,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): From 31becf4ac3a1c8c675ecab481b07cffb9aa24fd8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:28:52 +0000 Subject: [PATCH 192/200] Make functions private that can be --- synapse/handlers/room_member.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730..2a6b7e9f8 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -138,7 +138,7 @@ class RoomMemberHandler(object): defer.returnValue(event) @defer.inlineCallbacks - def remote_join(self, remote_room_hosts, room_id, user, content): + def _remote_join(self, remote_room_hosts, room_id, user, content): if len(remote_room_hosts) == 0: raise SynapseError(404, "No known servers") @@ -292,7 +292,7 @@ class RoomMemberHandler(object): raise AuthError(403, "Guest access not allowed") if not is_host_in_room: - inviter = yield self.get_inviter(target.to_string(), room_id) + inviter = yield self._get_inviter(target.to_string(), room_id) if inviter and not self.hs.is_mine(inviter): remote_room_hosts.append(inviter.domain) @@ -306,7 +306,7 @@ class RoomMemberHandler(object): if requester.is_guest: content["kind"] = "guest" - ret = yield self.remote_join( + ret = yield self._remote_join( remote_room_hosts, room_id, target, content ) defer.returnValue(ret) @@ -314,7 +314,7 @@ class RoomMemberHandler(object): elif effective_membership_state == Membership.LEAVE: if not is_host_in_room: # perhaps we've been invited - inviter = yield self.get_inviter(target.to_string(), room_id) + inviter = yield self._get_inviter(target.to_string(), room_id) if not inviter: raise SynapseError(404, "Not a known room") @@ -496,7 +496,7 @@ class RoomMemberHandler(object): defer.returnValue((RoomID.from_string(room_id), servers)) @defer.inlineCallbacks - def get_inviter(self, user_id, room_id): + def _get_inviter(self, user_id, room_id): invite = yield self.store.get_invite_for_user_in_room( user_id=user_id, room_id=room_id, @@ -573,7 +573,7 @@ class RoomMemberHandler(object): if "mxid" in data: if "signatures" not in data: raise AuthError(401, "No signatures on 3pid binding") - yield self.verify_any_signature(data, id_server) + yield self._verify_any_signature(data, id_server) defer.returnValue(data["mxid"]) except IOError as e: @@ -581,7 +581,7 @@ class RoomMemberHandler(object): defer.returnValue(None) @defer.inlineCallbacks - def verify_any_signature(self, data, server_hostname): + def _verify_any_signature(self, data, server_hostname): if server_hostname not in data["signatures"]: raise AuthError(401, "No signature from server %s" % (server_hostname,)) for key_name, signature in data["signatures"][server_hostname].items(): From d0fcc48f9dfc09531619faf23d407807eec46df9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:39:58 +0000 Subject: [PATCH 193/200] extra_users is actually a list of UserIDs --- synapse/handlers/message.py | 2 +- synapse/replication/http/send_event.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 42aab91c5..4f97c8db7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -667,7 +667,7 @@ class EventCreationHandler(object): event (FrozenEvent) context (EventContext) ratelimit (bool) - extra_users (list(str)): Any extra users to notify about event + extra_users (list(UserID)): Any extra users to notify about event """ try: diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 70f2fe456..bbe2f967b 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -25,7 +25,7 @@ from synapse.util.async import sleep from synapse.util.caches.response_cache import ResponseCache from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.metrics import Measure -from synapse.types import Requester +from synapse.types import Requester, UserID import logging import re @@ -46,7 +46,7 @@ def send_event_to_master(client, host, port, requester, event, context, event (FrozenEvent) context (EventContext) ratelimit (bool) - extra_users (list(str)): Any extra users to notify about event + extra_users (list(UserID)): Any extra users to notify about event """ uri = "http://%s:%s/_synapse/replication/send_event/%s" % ( host, port, event.event_id, @@ -59,7 +59,7 @@ def send_event_to_master(client, host, port, requester, event, context, "context": context.serialize(event), "requester": requester.serialize(), "ratelimit": ratelimit, - "extra_users": extra_users, + "extra_users": [u.to_string() for u in extra_users], } try: @@ -143,7 +143,7 @@ class ReplicationSendEventRestServlet(RestServlet): context = yield EventContext.deserialize(self.store, content["context"]) ratelimit = content["ratelimit"] - extra_users = content["extra_users"] + extra_users = [UserID.from_string(u) for u in content["extra_users"]] if requester.user: request.authenticated_entity = requester.user.to_string() From 0f942f68c106b9d0fb89d0eaef9fa942b6d003ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:31:11 +0000 Subject: [PATCH 194/200] Factor out _remote_reject_invite in RoomMember --- synapse/handlers/room_member.py | 50 ++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730..6c8acfbf0 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -154,6 +154,30 @@ class RoomMemberHandler(object): ) yield user_joined_room(self.distributor, user, room_id) + @defer.inlineCallbacks + def _remote_reject_invite(self, remote_room_hosts, room_id, target): + fed_handler = self.federation_handler + try: + ret = yield fed_handler.do_remotely_reject_invite( + remote_room_hosts, + room_id, + target.to_string(), + ) + defer.returnValue(ret) + except Exception as e: + # if we were unable to reject the exception, just mark + # it as rejected on our end and plough ahead. + # + # The 'except' clause is very broad, but we need to + # capture everything from DNS failures upwards + # + logger.warn("Failed to reject invite: %s", e) + + yield self.store.locally_reject_invite( + target.to_string(), room_id + ) + defer.returnValue({}) + @defer.inlineCallbacks def update_membership( self, @@ -328,28 +352,10 @@ class RoomMemberHandler(object): else: # send the rejection to the inviter's HS. remote_room_hosts = remote_room_hosts + [inviter.domain] - fed_handler = self.federation_handler - try: - ret = yield fed_handler.do_remotely_reject_invite( - remote_room_hosts, - room_id, - target.to_string(), - ) - defer.returnValue(ret) - except Exception as e: - # if we were unable to reject the exception, just mark - # it as rejected on our end and plough ahead. - # - # The 'except' clause is very broad, but we need to - # capture everything from DNS failures upwards - # - logger.warn("Failed to reject invite: %s", e) - - yield self.store.locally_reject_invite( - target.to_string(), room_id - ) - - defer.returnValue({}) + res = yield self._remote_reject_invite( + remote_room_hosts, room_id, target, + ) + defer.returnValue(res) res = yield self._local_membership_update( requester=requester, From f43b6d6d9b7e6f6a94ba3e1886cec6ca864fad43 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 11:29:17 +0000 Subject: [PATCH 195/200] Fix docstring types --- synapse/federation/federation_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 5b1914f2f..90302c953 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -581,7 +581,7 @@ class FederationHandlerRegistry(object): Args: edu_type (str): The type of the incoming EDU to register handler for - handler (Callable[str, dict]): A callable invoked on incoming EDU + handler (Callable[[str, dict]]): A callable invoked on incoming EDU of the given type. The arguments are the origin server name and the EDU contents. """ @@ -597,7 +597,7 @@ class FederationHandlerRegistry(object): Args: query_type (str): Category name of the query, which should match the string used by make_query. - handler (Callable[dict] -> Deferred[dict]): Invoked to handle + handler (Callable[[dict], Deferred[dict]]): Invoked to handle incoming queries of this type. The return will be yielded on and the result used as the response to the query request. """ From 8b3573a8b209c60b03d5ef7f4dfed9ccb9e9f7b3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 12:04:38 +0000 Subject: [PATCH 196/200] Refactor get_or_register_3pid_guest --- synapse/handlers/register.py | 26 ++++++++++++++++++++++---- synapse/handlers/room_member.py | 10 +++------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 9021d4d57..ed5939880 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -446,16 +446,34 @@ class RegistrationHandler(BaseHandler): return self.hs.get_auth_handler() @defer.inlineCallbacks - def guest_access_token_for(self, medium, address, inviter_user_id): + def get_or_register_3pid_guest(self, medium, address, inviter_user_id): + """Get a guest access token for a 3PID, creating a guest account if + one doesn't already exist. + + Args: + medium (str) + address (str) + inviter_user_id (str): The user ID who is trying to invite the + 3PID + + Returns: + Deferred[(str, str)]: A 2-tuple of `(user_id, access_token)` of the + 3PID guest account. + """ access_token = yield self.store.get_3pid_guest_access_token(medium, address) if access_token: - defer.returnValue(access_token) + user_info = yield self.auth.get_user_by_access_token( + access_token + ) - _, access_token = yield self.register( + defer.returnValue((user_info["user"].to_string(), access_token)) + + user_id, access_token = yield self.register( generate_token=True, make_guest=True ) access_token = yield self.store.save_or_get_3pid_guest_access_token( medium, address, access_token, inviter_user_id ) - defer.returnValue(access_token) + + defer.returnValue((user_id, access_token)) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730..c3c720536 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -735,20 +735,16 @@ class RoomMemberHandler(object): } if self.config.invite_3pid_guest: - registration_handler = self.registration_handler - guest_access_token = yield registration_handler.guest_access_token_for( + rh = self.registration_handler + guest_user_id, guest_access_token = yield rh.get_or_register_3pid_guest( medium=medium, address=address, inviter_user_id=inviter_user_id, ) - guest_user_info = yield self.auth.get_user_by_access_token( - guest_access_token - ) - invite_config.update({ "guest_access_token": guest_access_token, - "guest_user_id": guest_user_info["user"].to_string(), + "guest_user_id": guest_user_id, }) data = yield self.simple_http_client.post_urlencoded_get_json( From f5160d4a3e559ba23f3e6002a8f9172dff4b3d60 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 12:12:55 +0000 Subject: [PATCH 197/200] RoomMembershipRestServlet doesn't handle /forget Due to the order we register the REST handlers `/forget` was handled by the correct handler. --- synapse/rest/client/v1/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 9d745174c..f8999d64d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -599,7 +599,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): def register(self, http_server): # /rooms/$roomid/[invite|join|leave] PATTERNS = ("/rooms/(?P[^/]*)/" - "(?Pjoin|invite|leave|ban|unban|kick|forget)") + "(?Pjoin|invite|leave|ban|unban|kick)") register_txn_path(self, PATTERNS, http_server) @defer.inlineCallbacks From ea3442c15c32ba98c407c71722cb80821d99d160 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 13:16:21 +0000 Subject: [PATCH 198/200] Add docstring --- synapse/handlers/room_member.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 6c8acfbf0..da35e604d 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -139,6 +139,19 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def remote_join(self, remote_room_hosts, room_id, user, content): + """Try and join a room that this server is not in + + Args: + remote_room_hosts (list[str]): List of servers that can be used + to join via. + room_id (str): Room that we are trying to join + user (UserID): User who is trying to join + content (dict): A dict that should be used as the content of the + join event. + + Returns: + Deferred + """ if len(remote_room_hosts) == 0: raise SynapseError(404, "No known servers") @@ -156,6 +169,19 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def _remote_reject_invite(self, remote_room_hosts, room_id, target): + """Attempt to reject an invite for a room this server is not in. If we + fail to do so we locally mark the invite as rejected. + + Args: + remote_room_hosts (list[str]): List of servers to use to try and + reject invite + room_id (str) + target (UserID): The user rejecting the invite + + Returns: + Deferred[dict]: A dictionary to be returned to the client, may + include event_id etc, or nothing if we locally rejected + """ fed_handler = self.federation_handler try: ret = yield fed_handler.do_remotely_reject_invite( From cea462e2857d3af2007521f131adb46e4f5ce6fe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 13:22:21 +0000 Subject: [PATCH 199/200] s/replication_server/federation_server --- synapse/federation/transport/server.py | 2 +- synapse/server.py | 4 ++-- tests/handlers/test_profile.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 04b83e691..a66a6b069 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -1190,7 +1190,7 @@ GROUP_ATTESTATION_SERVLET_CLASSES = ( def register_servlets(hs, resource, authenticator, ratelimiter): for servletclass in FEDERATION_SERVLET_CLASSES: servletclass( - handler=hs.get_replication_server(), + handler=hs.get_federation_server(), authenticator=authenticator, ratelimiter=ratelimiter, server_name=hs.hostname, diff --git a/synapse/server.py b/synapse/server.py index 894e9c2ac..802a79384 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -102,7 +102,7 @@ class HomeServer(object): 'http_client', 'db_pool', 'replication_client', - 'replication_server', + 'federation_server', 'handlers', 'v1auth', 'auth', @@ -202,7 +202,7 @@ class HomeServer(object): def build_replication_client(self): return FederationClient(self) - def build_replication_server(self): + def build_federation_server(self): return FederationServer(self) def build_handlers(self): diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index c69043768..f9f828471 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -52,7 +52,7 @@ class ProfileTestCase(unittest.TestCase): handlers=None, resource_for_federation=Mock(), replication_client=self.mock_federation, - replication_server=Mock(), + federation_server=Mock(), federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ "send_message", From cb9f8e527c09315eea05955ec970154ea2fb9729 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 13:26:52 +0000 Subject: [PATCH 200/200] s/replication_client/federation_client/ --- synapse/app/homeserver.py | 2 +- synapse/handlers/device.py | 2 +- synapse/handlers/directory.py | 2 +- synapse/handlers/e2e_keys.py | 2 +- synapse/handlers/federation.py | 2 +- synapse/handlers/profile.py | 2 +- synapse/handlers/room_list.py | 2 +- synapse/server.py | 4 ++-- tests/handlers/test_directory.py | 2 +- tests/handlers/test_e2e_keys.py | 2 +- tests/handlers/test_profile.py | 2 +- tests/replication/slave/storage/_base.py | 2 +- tests/rest/client/v1/test_events.py | 2 +- tests/rest/client/v1/test_profile.py | 2 +- tests/rest/client/v1/test_rooms.py | 16 ++++++++-------- tests/rest/client/v1/test_typing.py | 2 +- tests/storage/test_appservice.py | 10 +++++----- 17 files changed, 29 insertions(+), 29 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 503f461ab..e477c7ced 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -348,7 +348,7 @@ def setup(config_options): hs.get_state_handler().start_caching() hs.get_datastore().start_profiling() hs.get_datastore().start_doing_background_updates() - hs.get_replication_client().start_get_pdu_cache() + hs.get_federation_client().start_get_pdu_cache() register_memory_metrics(hs) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index fcf41630d..40f3d2467 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -431,7 +431,7 @@ class DeviceListEduUpdater(object): def __init__(self, hs, device_handler): self.store = hs.get_datastore() - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() self.clock = hs.get_clock() self.device_handler = device_handler diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index dfe04eb1c..c5b6e75e0 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -36,7 +36,7 @@ class DirectoryHandler(BaseHandler): self.appservice_handler = hs.get_application_service_handler() self.event_creation_handler = hs.get_event_creation_handler() - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( "directory", self.on_directory_query ) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 0ca8d036e..31b1ece13 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) class E2eKeysHandler(object): def __init__(self, hs): self.store = hs.get_datastore() - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() self.device_handler = hs.get_device_handler() self.is_mine = hs.is_mine self.clock = hs.get_clock() diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index cfd437916..080aca3d7 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -68,7 +68,7 @@ class FederationHandler(BaseHandler): self.hs = hs self.store = hs.get_datastore() - self.replication_layer = hs.get_replication_client() + self.replication_layer = hs.get_federation_client() self.state_handler = hs.get_state_handler() self.server_name = hs.hostname self.keyring = hs.get_keyring() diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 0cfac60d7..cb710fe79 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -31,7 +31,7 @@ class ProfileHandler(BaseHandler): def __init__(self, hs): super(ProfileHandler, self).__init__(hs) - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( "profile", self.on_profile_query ) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index f79bd8902..5d81f59b4 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -409,7 +409,7 @@ class RoomListHandler(BaseHandler): def _get_remote_list_cached(self, server_name, limit=None, since_token=None, search_filter=None, include_all_networks=False, third_party_instance_id=None,): - repl_layer = self.hs.get_replication_client() + repl_layer = self.hs.get_federation_client() if search_filter: # We can't cache when asking for search return repl_layer.get_public_rooms( diff --git a/synapse/server.py b/synapse/server.py index 802a79384..43c6e0a6d 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -101,7 +101,7 @@ class HomeServer(object): DEPENDENCIES = [ 'http_client', 'db_pool', - 'replication_client', + 'federation_client', 'federation_server', 'handlers', 'v1auth', @@ -199,7 +199,7 @@ class HomeServer(object): def get_ratelimiter(self): return self.ratelimiter - def build_replication_client(self): + def build_federation_client(self): return FederationClient(self) def build_federation_server(self): diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index b4f36b27a..7e5332e27 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -47,7 +47,7 @@ class DirectoryTestCase(unittest.TestCase): hs = yield setup_test_homeserver( http_client=None, resource_for_federation=Mock(), - replication_client=self.mock_federation, + federation_client=self.mock_federation, federation_registry=self.mock_registry, ) hs.handlers = DirectoryHandlers(hs) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index fe73f2b96..d1bd87b89 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -34,7 +34,7 @@ class E2eKeysHandlerTestCase(unittest.TestCase): def setUp(self): self.hs = yield utils.setup_test_homeserver( handlers=None, - replication_client=mock.Mock(), + federation_client=mock.Mock(), ) self.handler = synapse.handlers.e2e_keys.E2eKeysHandler(self.hs) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index f9f828471..458296ee4 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -51,7 +51,7 @@ class ProfileTestCase(unittest.TestCase): http_client=None, handlers=None, resource_for_federation=Mock(), - replication_client=self.mock_federation, + federation_client=self.mock_federation, federation_server=Mock(), federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index ceffdaad5..64e07a8c9 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -31,7 +31,7 @@ class BaseSlavedStoreTestCase(unittest.TestCase): self.hs = yield setup_test_homeserver( "blue", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index f04bf7dfd..2b89c0a3c 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -114,7 +114,7 @@ class EventStreamPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_profile.py b/tests/rest/client/v1/test_profile.py index feddcf024..deac7f100 100644 --- a/tests/rest/client/v1/test_profile.py +++ b/tests/rest/client/v1/test_profile.py @@ -45,7 +45,7 @@ class ProfileTestCase(unittest.TestCase): http_client=None, resource_for_client=self.mock_resource, federation=Mock(), - replication_client=Mock(), + federation_client=Mock(), profile_handler=self.mock_handler ) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 2c0708b0d..7e8966a1a 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -46,7 +46,7 @@ class RoomPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -409,7 +409,7 @@ class RoomsMemberListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -493,7 +493,7 @@ class RoomsCreateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -582,7 +582,7 @@ class RoomTopicTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -697,7 +697,7 @@ class RoomMemberStateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -829,7 +829,7 @@ class RoomMessagesTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -929,7 +929,7 @@ class RoomInitialSyncTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), @@ -1003,7 +1003,7 @@ class RoomMessageListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 62639e3ad..2ec4ecab5 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -47,7 +47,7 @@ class RoomTypingTestCase(RestTestCase): "red", clock=self.clock, http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index cc0df7f66..c2e39a728 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -42,7 +42,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) self.as_token = "token1" @@ -119,7 +119,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) self.db_pool = hs.get_db_pool() @@ -455,7 +455,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) ApplicationServiceStore(None, hs) @@ -473,7 +473,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) with self.assertRaises(ConfigError) as cm: @@ -497,7 +497,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) with self.assertRaises(ConfigError) as cm: