From 069ae2a5d62976eab50c27e6e746c3fb1eeb0761 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 13 Dec 2017 15:11:22 +0000 Subject: [PATCH 001/171] Bump changelog and version --- CHANGES.rst | 37 +++++++++++++++++++++++++++++++++++++ synapse/__init__.py | 2 +- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index aff6688b9..4bbc7a753 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,40 @@ +Changes in synapse v0.26.0 (2017-12-13) +======================================= + +Features: + +* Add ability for ASes to publicise groups for their users (PR #2686) +* Add all local users to the user_directory and optionally search them (PR + #2723) +* Add support for custom login types for validating users (PR #2729) + + +Changes: + +* Update example Prometheus config to new format (PR #2648) Thanks to + @krombel! +* Rename redact_content option to include_content in Push API (PR #2650) +* Declare support for r0.3.0 (PR #2677) +* Improve upserts (PR #2684, #2688, #2689, #2713) +* Improve documentation of workers (PR #2700) +* Improve tracebacks on exceptions (PR #2705) +* Allow guest access to group APIs for reading (PR #2715) +* Support for posting content in federation_client script (PR #2716) +* Delete devices and pushers on logouts etc (PR #2722) + + +Bug fixes: + +* Fix database port script (PR #2673) +* Fix internal server error on login with ldap_auth_provider (PR #2678) Thanks + to @jkolo! +* Fix error on sqlite 3.7 (PR #2697) +* Fix OPTIONS on preview_url (PR #2707) +* Fix error handling on dns lookup (PR #2711) +* Fix wrong avatars when inviting multiple users when creating room (PR #2717) +* Fix 500 when joining matrix-dev (PR #2719) + + Changes in synapse v0.25.1 (2017-11-17) ======================================= diff --git a/synapse/__init__.py b/synapse/__init__.py index 8c3d7a210..fb2a88a3f 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.25.1" +__version__ = "0.26.0-rc1" From 80573e39005fbc9795cadf03b42f6e1a342b6623 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 13 Dec 2017 15:15:33 +0000 Subject: [PATCH 002/171] Fix rc version number --- CHANGES.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 4bbc7a753..77b65ebe0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,5 +1,5 @@ -Changes in synapse v0.26.0 (2017-12-13) -======================================= +Changes in synapse v0.26.0-rc1 (2017-12-13) +=========================================== Features: From 9419bb5776d656bbc0a47e8123099794da6da40e Mon Sep 17 00:00:00 2001 From: Vincent Breitmoser Date: Sat, 16 Dec 2017 22:09:53 +0200 Subject: [PATCH 003/171] mention federation tester more prominently in the readme --- README.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.rst b/README.rst index 6f146b63b..76fe2e413 100644 --- a/README.rst +++ b/README.rst @@ -632,6 +632,11 @@ largest boxes pause for thought.) Troubleshooting --------------- + +You can use the federation tester to check if your homeserver is all set: +``https://matrix.org/federationtester/api/report?server_name=`` +If any of the attributes under "checks" is false, federation won't work. + The typical failure mode with federation is that when you try to join a room, it is rejected with "401: Unauthorized". Generally this means that other servers in the room couldn't access yours. (Joining a room over federation is a From 75c1b8df01273aafbbe25afb47582021dbb82498 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 20 Dec 2017 19:31:00 +0000 Subject: [PATCH 004/171] Better logging when login can't find a 3pid --- synapse/rest/client/v1/login.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 5669ecb72..45844aa2d 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -191,19 +191,25 @@ class LoginRestServlet(ClientV1RestServlet): # convert threepid identifiers to user IDs if identifier["type"] == "m.id.thirdparty": - if 'medium' not in identifier or 'address' not in identifier: + address = identifier.get('address') + medium = identifier.get('medium') + + if medium is None or address is None: raise SynapseError(400, "Invalid thirdparty identifier") - address = identifier['address'] - if identifier['medium'] == 'email': + if medium == 'email': # For emails, transform the address to lowercase. # We store all email addreses as lowercase in the DB. # (See add_threepid in synapse/handlers/auth.py) address = address.lower() user_id = yield self.hs.get_datastore().get_user_id_by_threepid( - identifier['medium'], address + medium, address, ) if not user_id: + logger.warn( + "unknown 3pid identifier medium %s, address %r", + medium, address, + ) raise LoginError(403, "", errcode=Codes.FORBIDDEN) identifier = { From a7b726ad181935da16ede550e1202b77632823f3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 30 Dec 2017 18:40:19 +0000 Subject: [PATCH 005/171] federation_client: clean up imports --- synapse/federation/federation_client.py | 31 +++++++++++-------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index b8f02f539..0f754f933 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -14,28 +14,25 @@ # limitations under the License. -from twisted.internet import defer - -from .federation_base import FederationBase -from synapse.api.constants import Membership - -from synapse.api.errors import ( - CodeMessageException, HttpResponseException, SynapseError, -) -from synapse.util import unwrapFirstError, logcontext -from synapse.util.caches.expiringcache import ExpiringCache -from synapse.util.logutils import log_function -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn -from synapse.events import FrozenEvent, builder -import synapse.metrics - -from synapse.util.retryutils import NotRetryingDestination - import copy import itertools import logging import random +from twisted.internet import defer + +from synapse.api.constants import Membership +from synapse.api.errors import ( + CodeMessageException, HttpResponseException, SynapseError, +) +from synapse.events import FrozenEvent, builder +from synapse.federation.federation_base import FederationBase +import synapse.metrics +from synapse.util import logcontext, unwrapFirstError +from synapse.util.caches.expiringcache import ExpiringCache +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn +from synapse.util.logutils import log_function +from synapse.util.retryutils import NotRetryingDestination logger = logging.getLogger(__name__) From 65abc90fb6f10a91d72bc4518a8d155251d23a6f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 30 Dec 2017 18:40:19 +0000 Subject: [PATCH 006/171] federation_server: clean up imports --- synapse/federation/federation_server.py | 31 +++++++++++-------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index a2327f24b..5fdfbbeee 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -12,25 +12,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer - -from .federation_base import FederationBase -from .units import Transaction, Edu - -from synapse.util import async -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn -from synapse.util.logutils import log_function -from synapse.util.caches.response_cache import ResponseCache -from synapse.events import FrozenEvent -from synapse.types import get_domain_from_id -import synapse.metrics - -from synapse.api.errors import AuthError, FederationError, SynapseError - -from synapse.crypto.event_signing import compute_event_signature +import logging import simplejson as json -import logging +from twisted.internet import defer + +from synapse.api.errors import AuthError, FederationError, SynapseError +from synapse.crypto.event_signing import compute_event_signature +from synapse.events import FrozenEvent +from synapse.federation.federation_base import FederationBase +from synapse.federation.units import Edu, Transaction +import synapse.metrics +from synapse.types import get_domain_from_id +from synapse.util import async +from synapse.util.caches.response_cache import ResponseCache +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn +from synapse.util.logutils import log_function # when processing incoming transactions, we try to handle multiple rooms in # parallel, up to this limit. From 3079f80d4a67714294660e27214cd2a55bb7ecf1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 30 Dec 2017 18:40:19 +0000 Subject: [PATCH 007/171] Factor out `event_from_pdu_json` turns out we have two copies of this, and neither needs to be an instance method --- synapse/federation/federation_base.py | 20 ++++++++++++++ synapse/federation/federation_client.py | 36 +++++++++++-------------- synapse/federation/federation_server.py | 25 +++++++---------- 3 files changed, 44 insertions(+), 37 deletions(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index a0f5d40eb..6476cea89 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -16,6 +16,7 @@ import logging from synapse.api.errors import SynapseError from synapse.crypto.event_signing import check_event_content_hash +from synapse.events import FrozenEvent from synapse.events.utils import prune_event from synapse.util import unwrapFirstError, logcontext from twisted.internet import defer @@ -169,3 +170,22 @@ class FederationBase(object): ) return deferreds + + +def event_from_pdu_json(pdu_json, outlier=False): + """Construct a FrozenEvent from an event json received over federation + + Args: + pdu_json (object): pdu as received over federation + outlier (bool): True to mark this event as an outlier + + Returns: + FrozenEvent + """ + event = FrozenEvent( + pdu_json + ) + + event.internal_metadata.outlier = outlier + + return event diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 0f754f933..b1fe03f70 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -25,8 +25,11 @@ from synapse.api.constants import Membership from synapse.api.errors import ( CodeMessageException, HttpResponseException, SynapseError, ) -from synapse.events import FrozenEvent, builder -from synapse.federation.federation_base import FederationBase +from synapse.events import builder +from synapse.federation.federation_base import ( + FederationBase, + event_from_pdu_json, +) import synapse.metrics from synapse.util import logcontext, unwrapFirstError from synapse.util.caches.expiringcache import ExpiringCache @@ -181,7 +184,7 @@ class FederationClient(FederationBase): logger.debug("backfill transaction_data=%s", repr(transaction_data)) pdus = [ - self.event_from_pdu_json(p, outlier=False) + event_from_pdu_json(p, outlier=False) for p in transaction_data["pdus"] ] @@ -241,7 +244,7 @@ class FederationClient(FederationBase): logger.debug("transaction_data %r", transaction_data) pdu_list = [ - self.event_from_pdu_json(p, outlier=outlier) + event_from_pdu_json(p, outlier=outlier) for p in transaction_data["pdus"] ] @@ -333,11 +336,11 @@ class FederationClient(FederationBase): ) pdus = [ - self.event_from_pdu_json(p, outlier=True) for p in result["pdus"] + event_from_pdu_json(p, outlier=True) for p in result["pdus"] ] auth_chain = [ - self.event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, outlier=True) for p in result.get("auth_chain", []) ] @@ -438,7 +441,7 @@ class FederationClient(FederationBase): ) auth_chain = [ - self.event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, outlier=True) for p in res["auth_chain"] ] @@ -567,12 +570,12 @@ class FederationClient(FederationBase): logger.debug("Got content: %s", content) state = [ - self.event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, outlier=True) for p in content.get("state", []) ] auth_chain = [ - self.event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, outlier=True) for p in content.get("auth_chain", []) ] @@ -647,7 +650,7 @@ class FederationClient(FederationBase): logger.debug("Got response to send_invite: %s", pdu_dict) - pdu = self.event_from_pdu_json(pdu_dict) + pdu = event_from_pdu_json(pdu_dict) # Check signatures are correct. pdu = yield self._check_sigs_and_hash(pdu) @@ -737,7 +740,7 @@ class FederationClient(FederationBase): ) auth_chain = [ - self.event_from_pdu_json(e) + event_from_pdu_json(e) for e in content["auth_chain"] ] @@ -785,7 +788,7 @@ class FederationClient(FederationBase): ) events = [ - self.event_from_pdu_json(e) + event_from_pdu_json(e) for e in content.get("events", []) ] @@ -802,15 +805,6 @@ class FederationClient(FederationBase): defer.returnValue(signed_events) - def event_from_pdu_json(self, pdu_json, outlier=False): - event = FrozenEvent( - pdu_json - ) - - event.internal_metadata.outlier = outlier - - return event - @defer.inlineCallbacks def forward_third_party_invite(self, destinations, room_id, event_dict): for destination in destinations: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 5fdfbbeee..9849953c9 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -19,8 +19,10 @@ from twisted.internet import defer from synapse.api.errors import AuthError, FederationError, SynapseError from synapse.crypto.event_signing import compute_event_signature -from synapse.events import FrozenEvent -from synapse.federation.federation_base import FederationBase +from synapse.federation.federation_base import ( + FederationBase, + event_from_pdu_json, +) from synapse.federation.units import Edu, Transaction import synapse.metrics from synapse.types import get_domain_from_id @@ -169,7 +171,7 @@ class FederationServer(FederationBase): p["age_ts"] = request_time - int(p["age"]) del p["age"] - event = self.event_from_pdu_json(p) + event = event_from_pdu_json(p) room_id = event.room_id pdus_by_room.setdefault(room_id, []).append(event) @@ -343,7 +345,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_invite_request(self, origin, content): - pdu = self.event_from_pdu_json(content) + pdu = event_from_pdu_json(content) ret_pdu = yield self.handler.on_invite_request(origin, pdu) time_now = self._clock.time_msec() defer.returnValue((200, {"event": ret_pdu.get_pdu_json(time_now)})) @@ -351,7 +353,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_send_join_request(self, origin, content): logger.debug("on_send_join_request: content: %s", content) - pdu = self.event_from_pdu_json(content) + pdu = event_from_pdu_json(content) logger.debug("on_send_join_request: pdu sigs: %s", pdu.signatures) res_pdus = yield self.handler.on_send_join_request(origin, pdu) time_now = self._clock.time_msec() @@ -371,7 +373,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_send_leave_request(self, origin, content): logger.debug("on_send_leave_request: content: %s", content) - pdu = self.event_from_pdu_json(content) + pdu = event_from_pdu_json(content) logger.debug("on_send_leave_request: pdu sigs: %s", pdu.signatures) yield self.handler.on_send_leave_request(origin, pdu) defer.returnValue((200, {})) @@ -408,7 +410,7 @@ class FederationServer(FederationBase): """ with (yield self._server_linearizer.queue((origin, room_id))): auth_chain = [ - self.event_from_pdu_json(e) + event_from_pdu_json(e) for e in content["auth_chain"] ] @@ -583,15 +585,6 @@ class FederationServer(FederationBase): def __str__(self): return "" % self.server_name - def event_from_pdu_json(self, pdu_json, outlier=False): - event = FrozenEvent( - pdu_json - ) - - event.internal_metadata.outlier = outlier - - return event - @defer.inlineCallbacks def exchange_third_party_invite( self, From bd91857028e0b7adf046a379a0eee030a92c1249 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 30 Dec 2017 18:40:19 +0000 Subject: [PATCH 008/171] Check missing fields in event_from_pdu_json Return a 400 rather than a 500 when somebody messes up their send_join --- synapse/federation/federation_base.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 6476cea89..7918d3e44 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -18,6 +18,7 @@ from synapse.api.errors import SynapseError from synapse.crypto.event_signing import check_event_content_hash from synapse.events import FrozenEvent from synapse.events.utils import prune_event +from synapse.http.servlet import assert_params_in_request from synapse.util import unwrapFirstError, logcontext from twisted.internet import defer @@ -181,7 +182,13 @@ def event_from_pdu_json(pdu_json, outlier=False): Returns: FrozenEvent + + Raises: + SynapseError: if the pdu is missing required fields """ + # we could probably enforce a bunch of other fields here (room_id, sender, + # origin, etc etc) + assert_params_in_request(pdu_json, ('event_id', 'type')) event = FrozenEvent( pdu_json ) From 864a6d297767dd6b91e66a64f74d1ddefcd10ef1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 5 Jan 2018 10:54:01 +0000 Subject: [PATCH 009/171] Bump version and changelog --- CHANGES.rst | 6 ++++++ synapse/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 77b65ebe0..dcf9adc95 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +Changes in synapse v0.26.0 (2018-01-05) +======================================= + +No changes since v0.26.0-rc1 + + Changes in synapse v0.26.0-rc1 (2017-12-13) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index fb2a88a3f..ef8853bd2 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.26.0-rc1" +__version__ = "0.26.0" From 840f72356e93971dedd7c66117c293866060c54f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 5 Jan 2018 12:30:28 +0000 Subject: [PATCH 010/171] Remove 'verbosity'/'log_file' from generated cfg ... because these only really exist to confuse people nowadays. Also bring log config more into line with the generated log config, by making `level_for_storage` apply to the `synapse.storage.SQL` logger rather than `synapse.storage`. --- synapse/config/logger.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index a1d6e4d4f..9e8003ad6 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -79,12 +79,6 @@ class LoggingConfig(Config): os.path.join(config_dir_path, server_name + ".log.config") ) return """ - # Logging verbosity level. Ignored if log_config is specified. - verbose: 0 - - # File to write logging to. Ignored if log_config is specified. - log_file: "%(log_file)s" - # A yaml python logging config file log_config: "%(log_config)s" """ % locals() @@ -150,6 +144,9 @@ def setup_logging(config, use_worker_options=False): ) if log_config is None: + # We don't have a logfile, so fall back to the 'verbosity' param from + # the config or cmdline. (Note that we generate a log config for new + # installs, so this will be an unusual case) level = logging.INFO level_for_storage = logging.INFO if config.verbosity: @@ -157,11 +154,10 @@ def setup_logging(config, use_worker_options=False): if config.verbosity > 1: level_for_storage = logging.DEBUG - # FIXME: we need a logging.WARN for a -q quiet option logger = logging.getLogger('') logger.setLevel(level) - logging.getLogger('synapse.storage').setLevel(level_for_storage) + logging.getLogger('synapse.storage.SQL').setLevel(level_for_storage) formatter = logging.Formatter(log_format) if log_file: From eccfc8e928020e7b009979a629278718d49f8e24 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sun, 7 Jan 2018 19:52:58 -0700 Subject: [PATCH 011/171] Fix templating error with unban permission message Fixes https://github.com/matrix-org/synapse/issues/2759 Signed-off-by: Travis Ralston --- synapse/event_auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/event_auth.py b/synapse/event_auth.py index 061ee86b1..cd5627e36 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -319,7 +319,7 @@ def _is_membership_change_allowed(event, auth_events): # TODO (erikj): Implement kicks. if target_banned and user_level < ban_level: raise AuthError( - 403, "You cannot unban user &s." % (target_user_id,) + 403, "You cannot unban user %s." % (target_user_id,) ) elif target_user_id != event.user_id: kick_level = _get_named_level(auth_events, "kick", 50) From 3a556f1ea024d93ba2c1a93a88137fe82f0b3298 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 11:27:19 +0000 Subject: [PATCH 012/171] Make indentation of generated log config consistent (we had a mix of 2- and 4-space indents) --- synapse/config/logger.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 9e8003ad6..9ee46c4de 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -28,27 +28,27 @@ DEFAULT_LOG_CONFIG = Template(""" version: 1 formatters: - precise: - format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s\ -- %(message)s' + precise: + format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - \ +%(request)s - %(message)s' filters: - context: - (): synapse.util.logcontext.LoggingContextFilter - request: "" + context: + (): synapse.util.logcontext.LoggingContextFilter + request: "" handlers: - file: - class: logging.handlers.RotatingFileHandler - formatter: precise - filename: ${log_file} - maxBytes: 104857600 - backupCount: 10 - filters: [context] - console: - class: logging.StreamHandler - formatter: precise - filters: [context] + file: + class: logging.handlers.RotatingFileHandler + formatter: precise + filename: ${log_file} + maxBytes: 104857600 + backupCount: 10 + filters: [context] + console: + class: logging.StreamHandler + formatter: precise + filters: [context] loggers: synapse: From 0211464ba2eb188ee519030ada2ed6b517b41513 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 11:28:33 +0000 Subject: [PATCH 013/171] Fix broken config UTs https://github.com/matrix-org/synapse/pull/2755 broke log-config generation, which in turn broke the unit tests. --- synapse/config/logger.py | 4 ++-- tests/config/test_generate.py | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 9e8003ad6..092212518 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -74,7 +74,6 @@ class LoggingConfig(Config): self.log_file = self.abspath(config.get("log_file")) def default_config(self, config_dir_path, server_name, **kwargs): - log_file = self.abspath("homeserver.log") log_config = self.abspath( os.path.join(config_dir_path, server_name + ".log.config") ) @@ -117,9 +116,10 @@ class LoggingConfig(Config): def generate_files(self, config): log_config = config.get("log_config") if log_config and not os.path.exists(log_config): + log_file = self.abspath("homeserver.log") with open(log_config, "wb") as log_config_file: log_config_file.write( - DEFAULT_LOG_CONFIG.substitute(log_file=config["log_file"]) + DEFAULT_LOG_CONFIG.substitute(log_file=log_file) ) diff --git a/tests/config/test_generate.py b/tests/config/test_generate.py index 8f57fbeb2..879159cce 100644 --- a/tests/config/test_generate.py +++ b/tests/config/test_generate.py @@ -12,9 +12,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os.path +import re import shutil import tempfile + from synapse.config.homeserver import HomeServerConfig from tests import unittest @@ -23,7 +26,6 @@ class ConfigGenerationTestCase(unittest.TestCase): def setUp(self): self.dir = tempfile.mkdtemp() - print self.dir self.file = os.path.join(self.dir, "homeserver.yaml") def tearDown(self): @@ -48,3 +50,16 @@ class ConfigGenerationTestCase(unittest.TestCase): ]), set(os.listdir(self.dir)) ) + + self.assert_log_filename_is( + os.path.join(self.dir, "lemurs.win.log.config"), + os.path.join(os.getcwd(), "homeserver.log"), + ) + + def assert_log_filename_is(self, log_config_file, expected): + with open(log_config_file) as f: + config = f.read() + # find the 'filename' line + matches = re.findall("^\s*filename:\s*(.*)$", config, re.M) + self.assertEqual(1, len(matches)) + self.assertEqual(matches[0], expected) From 142fb0a7d40f17d834e1e4d13963839195c1040b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 12:03:12 +0000 Subject: [PATCH 014/171] Disable user_directory updates for UTs Fix flakiness in the UTs caused by the user_directory being updated in the background --- tests/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/utils.py b/tests/utils.py index ed8a7360f..44e5f7509 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -58,6 +58,10 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.email_enable_notifs = False config.block_non_admin_invites = False + # disable user directory updates, because they get done in the + # background, which upsets the test runner. + config.update_user_directory = False + config.use_frozen_dicts = True config.database_config = {"name": "sqlite3"} config.ldap_enabled = False From 374c4d4cedf2366f145934d750fd309209b0de62 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 12:04:52 +0000 Subject: [PATCH 015/171] Remove dead code pointless function is pointless --- tests/rest/client/v1/test_rooms.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index de376fb51..9f3725538 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -515,9 +515,6 @@ class RoomsCreateTestCase(RestTestCase): synapse.rest.client.v1.room.register_servlets(hs, self.mock_resource) - def tearDown(self): - pass - @defer.inlineCallbacks def test_post_room_no_keys(self): # POST with no config keys, expect new room id From f8fa5ae4af770cdf69e2d335f3aff0b823b34009 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 12:05:25 +0000 Subject: [PATCH 016/171] enable twisted delayedcall debugging in UTs --- tests/unittest.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/unittest.py b/tests/unittest.py index 38715972d..7b478c429 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import twisted from twisted.trial import unittest import logging @@ -65,6 +65,10 @@ class TestCase(unittest.TestCase): @around(self) def setUp(orig): + # enable debugging of delayed calls - this means that we get a + # traceback when a unit test exits leaving things on the reactor. + twisted.internet.base.DelayedCall.debug = True + old_level = logging.getLogger().level if old_level != level: From b30cd5b107acafce43fb63c471c086b8df4d981a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 11:31:00 +0000 Subject: [PATCH 017/171] Remove dead code related to default thumbnails --- synapse/rest/media/v1/thumbnail_resource.py | 76 +-------------------- synapse/storage/media_repository.py | 3 - 2 files changed, 3 insertions(+), 76 deletions(-) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 68d56b2b1..779fd3e9b 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -85,11 +85,6 @@ class ThumbnailResource(Resource): respond_404(request) return - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.local_media_filepath(media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) if thumbnail_infos: @@ -114,9 +109,7 @@ class ThumbnailResource(Resource): yield respond_with_file(request, t_type, file_path) else: - yield self._respond_default_thumbnail( - request, media_info, width, height, method, m_type, - ) + respond_404(request) @defer.inlineCallbacks def _select_or_generate_local_thumbnail(self, request, media_id, desired_width, @@ -128,11 +121,6 @@ class ThumbnailResource(Resource): respond_404(request) return - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.local_media_filepath(media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) for info in thumbnail_infos: t_w = info["thumbnail_width"] == desired_width @@ -166,10 +154,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - yield self._respond_default_thumbnail( - request, media_info, desired_width, desired_height, - desired_method, desired_type, - ) + respond_404(request) @defer.inlineCallbacks def _select_or_generate_remote_thumbnail(self, request, server_name, media_id, @@ -177,11 +162,6 @@ class ThumbnailResource(Resource): desired_method, desired_type): media_info = yield self.media_repo.get_remote_media(server_name, media_id) - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.remote_media_filepath(server_name, media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) @@ -213,23 +193,13 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - yield self._respond_default_thumbnail( - request, media_info, desired_width, desired_height, - desired_method, desired_type, - ) + respond_404(request) @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead. - media_info = yield self.media_repo.get_remote_media(server_name, media_id) - - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.remote_media_filepath(server_name, media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) @@ -250,47 +220,7 @@ class ThumbnailResource(Resource): ) yield respond_with_file(request, t_type, file_path, t_length) else: - yield self._respond_default_thumbnail( - request, media_info, width, height, method, m_type, - ) - - @defer.inlineCallbacks - def _respond_default_thumbnail(self, request, media_info, width, height, - method, m_type): - # XXX: how is this meant to work? store.get_default_thumbnails - # appears to always return [] so won't this always 404? - media_type = media_info["media_type"] - top_level_type = media_type.split("/")[0] - sub_type = media_type.split("/")[-1].split(";")[0] - thumbnail_infos = yield self.store.get_default_thumbnails( - top_level_type, sub_type, - ) - if not thumbnail_infos: - thumbnail_infos = yield self.store.get_default_thumbnails( - top_level_type, "_default", - ) - if not thumbnail_infos: - thumbnail_infos = yield self.store.get_default_thumbnails( - "_default", "_default", - ) - if not thumbnail_infos: respond_404(request) - return - - thumbnail_info = self._select_thumbnail( - width, height, "crop", m_type, thumbnail_infos - ) - - t_width = thumbnail_info["thumbnail_width"] - t_height = thumbnail_info["thumbnail_height"] - t_type = thumbnail_info["thumbnail_type"] - t_method = thumbnail_info["thumbnail_method"] - t_length = thumbnail_info["thumbnail_length"] - - file_path = self.filepaths.default_thumbnail( - top_level_type, sub_type, t_width, t_height, t_type, t_method, - ) - yield respond_with_file(request, t_type, file_path, t_length) def _select_thumbnail(self, desired_width, desired_height, desired_method, desired_type, thumbnail_infos): diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index a66ff7c1e..6ebc37249 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -29,9 +29,6 @@ class MediaRepositoryStore(BackgroundUpdateStore): where_clause='url_cache IS NOT NULL', ) - def get_default_thumbnails(self, top_level_type, sub_type): - return [] - def get_local_media(self, media_id): """Get the metadata for a local piece of media Returns: From 51c9d9ed65af58d4fb5657bad877dbf1f0dcaf39 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 14:39:12 +0000 Subject: [PATCH 018/171] Add /room/{id}/event/{id} to synapse Turns out that there is a valid usecase for retrieving event by id (notably having received a push), but event ids should be scoped to room, so /event/{id} is wrong. --- synapse/rest/client/v1/room.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 75b735b47..682a0af9f 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -487,13 +487,35 @@ class RoomInitialSyncRestServlet(ClientV1RestServlet): defer.returnValue((200, content)) -class RoomEventContext(ClientV1RestServlet): +class RoomEventServlet(ClientV1RestServlet): + PATTERNS = client_path_patterns( + "/rooms/(?P[^/]*)/event/(?P[^/]*)$" + ) + + def __init__(self, hs): + super(RoomEventServlet, self).__init__(hs) + self.clock = hs.get_clock() + self.event_handler = hs.get_event_handler() + + @defer.inlineCallbacks + def on_GET(self, request, room_id, event_id): + requester = yield self.auth.get_user_by_req(request) + event = yield self.event_handler.get_event(requester.user, event_id) + + time_now = self.clock.time_msec() + if event: + defer.returnValue((200, serialize_event(event, time_now))) + else: + defer.returnValue((404, "Event not found.")) + + +class RoomEventContextServlet(ClientV1RestServlet): PATTERNS = client_path_patterns( "/rooms/(?P[^/]*)/context/(?P[^/]*)$" ) def __init__(self, hs): - super(RoomEventContext, self).__init__(hs) + super(RoomEventContextServlet, self).__init__(hs) self.clock = hs.get_clock() self.handlers = hs.get_handlers() @@ -803,4 +825,5 @@ def register_servlets(hs, http_server): RoomTypingRestServlet(hs).register(http_server) SearchRestServlet(hs).register(http_server) JoinedRoomsRestServlet(hs).register(http_server) - RoomEventContext(hs).register(http_server) + RoomEventServlet(hs).register(http_server) + RoomEventContextServlet(hs).register(http_server) From b6c9deffda94f230085d9974379643497749e7f5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 15:53:23 +0000 Subject: [PATCH 019/171] Remove dead TODO --- synapse/rest/media/v1/thumbnail_resource.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 779fd3e9b..70dbf7f5c 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -198,8 +198,6 @@ class ThumbnailResource(Resource): @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): - # TODO: Don't download the whole remote file - # We should proxy the thumbnail from the remote server instead. thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) From 47ca5eb8822ddc376b098a2747e1dbb85b2ce32b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 5 Jan 2018 15:09:43 +0000 Subject: [PATCH 020/171] Split out add_file_headers --- synapse/rest/media/v1/_base.py | 70 ++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 95fa95fce..57a450981 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -70,38 +70,11 @@ def respond_with_file(request, media_type, file_path, logger.debug("Responding with %r", file_path) if os.path.isfile(file_path): - request.setHeader(b"Content-Type", media_type.encode("UTF-8")) - if upload_name: - if is_ascii(upload_name): - request.setHeader( - b"Content-Disposition", - b"inline; filename=%s" % ( - urllib.quote(upload_name.encode("utf-8")), - ), - ) - else: - request.setHeader( - b"Content-Disposition", - b"inline; filename*=utf-8''%s" % ( - urllib.quote(upload_name.encode("utf-8")), - ), - ) - - # cache for at least a day. - # XXX: we might want to turn this off for data we don't want to - # recommend caching as it's sensitive or private - or at least - # select private. don't bother setting Expires as all our - # clients are smart enough to be happy with Cache-Control - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) if file_size is None: stat = os.stat(file_path) file_size = stat.st_size - request.setHeader( - b"Content-Length", b"%d" % (file_size,) - ) + add_file_headers(request, media_type, file_size, upload_name) with open(file_path, "rb") as f: yield logcontext.make_deferred_yieldable( @@ -111,3 +84,44 @@ def respond_with_file(request, media_type, file_path, finish_request(request) else: respond_404(request) + + +def add_file_headers(request, media_type, file_size, upload_name): + """Adds the correct response headers in preparation for responding with the + media. + + Args: + request (twisted.web.http.Request) + media_type (str): The media/content type. + file_size (int): Size in bytes of the media, if known. + upload_name (str): The name of the requested file, if any. + """ + request.setHeader(b"Content-Type", media_type.encode("UTF-8")) + if upload_name: + if is_ascii(upload_name): + request.setHeader( + b"Content-Disposition", + b"inline; filename=%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), + ) + else: + request.setHeader( + b"Content-Disposition", + b"inline; filename*=utf-8''%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), + ) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our + # clients are smart enough to be happy with Cache-Control + request.setHeader( + b"Cache-Control", b"public,max-age=86400,s-maxage=86400" + ) + + request.setHeader( + b"Content-Length", b"%d" % (file_size,) + ) From 1ee787912b1fda1f9255627b0ea5f69cf021679d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 16:58:09 +0000 Subject: [PATCH 021/171] Add some helper classes --- synapse/rest/media/v1/_base.py | 73 ++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 57a450981..131082048 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -125,3 +125,76 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader( b"Content-Length", b"%d" % (file_size,) ) + + +@defer.inlineCallbacks +def respond_with_responder(request, responder, media_type, file_size, upload_name=None): + """Responds to the request with given responder. If responder is None then + returns 404. + + Args: + request (twisted.web.http.Request) + responder (Responder) + media_type (str): The media/content type. + file_size (int): Size in bytes of the media, if known. + upload_name (str): The name of the requested file, if any. + """ + if not responder: + respond_404(request) + return + + add_file_headers(request, media_type, file_size, upload_name) + yield responder.write_to_consumer(request) + finish_request(request) + + +class Responder(object): + """Represents a response that can be streamed to the requester. + + Either `write_to_consumer` or `cancel` must be called to clean up any open + resources. + """ + def write_to_consumer(self, consumer): + """Stream response into consumer + + Args: + consumer (IConsumer) + + Returns: + Deferred: Resolves once the response has finished being written + """ + pass + + def cancel(self): + """Called when the responder is not going to be used after all. + """ + pass + + +class FileInfo(object): + """Details about a requested/uploaded file. + + Attributes: + server_name (str): The server name where the media originated from, + or None if local. + file_id (str): The local ID of the file. For local files this is the + same as the media_id + media_type (str): Type of the file + url_cache (bool): If the file is for the url preview cache + thumbnail (bool): Whether the file is a thumbnail or not. + thumbnail_width (int) + thumbnail_height (int) + thumbnail_method (int) + thumbnail_type (str) + """ + def __init__(self, server_name, file_id, url_cache=False, + thumbnail=False, thumbnail_width=None, thumbnail_height=None, + thumbnail_method=None, thumbnail_type=None): + self.server_name = server_name + self.file_id = file_id + self.url_cache = url_cache + self.thumbnail = thumbnail + self.thumbnail_width = thumbnail_width + self.thumbnail_height = thumbnail_height + self.thumbnail_method = thumbnail_method + self.thumbnail_type = thumbnail_type From ada470bccbf08486ae9e48afbcc4ebfd8161e93b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:07:30 +0000 Subject: [PATCH 022/171] Add MediaStorage class --- synapse/rest/media/v1/media_storage.py | 198 +++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 synapse/rest/media/v1/media_storage.py diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py new file mode 100644 index 000000000..052745e6f --- /dev/null +++ b/synapse/rest/media/v1/media_storage.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vecotr Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, threads +from twisted.protocols.basic import FileSender + +from ._base import Responder + +from synapse.util.logcontext import make_deferred_yieldable + +import contextlib +import os +import logging +import shutil + +logger = logging.getLogger(__name__) + + +class MediaStorage(object): + """Responsible for storing/fetching files from local sources. + """ + + def __init__(self, local_media_directory, filepaths): + self.local_media_directory = local_media_directory + self.filepaths = filepaths + + @defer.inlineCallbacks + def store_file(self, source, file_info): + """Write `source` to the on disk media store, and also any other + configured storage providers + + Args: + source: A file like object that should be written + file_info (FileInfo): Info about the file to store + + Returns: + Deferred[str]: the file path written to in the primary media store + """ + path = self._file_info_to_path(file_info) + fname = os.path.join(self.local_media_directory, path) + + dirname = os.path.dirname(fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + # Write to the main repository + yield make_deferred_yieldable(threads.deferToThread( + _write_file_synchronously, source, fname, + )) + + defer.returnValue(fname) + + @contextlib.contextmanager + def store_into_file(self, file_info): + """Context manager used to get a file like object to write into, as + described by file_info. + + Actually yields a 3-tuple (file, fname, finish_cb), where finish_cb is a + function that returns a Deferred that must be waited on after the file + has been successfully written to. + + Args: + file_info (FileInfo): Info about the file to store + + Example: + + with media_storage.store_into_file(info) as (f, fname, finish_cb): + # .. write into f ... + yield finish_cb() + """ + + path = self._file_info_to_path(file_info) + fname = os.path.join(self.local_media_directory, path) + + dirname = os.path.dirname(fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + finished_called = [False] + + def finish(): + # This will be used later when we want to hit out to other storage + # places + finished_called[0] = True + return defer.succeed(None) + + try: + with open(fname, "wb") as f: + yield f, fname, finish + except Exception as e: + try: + os.remove(fname) + except Exception: + pass + raise e + + if not finished_called: + raise Exception("Fnished callback not called") + + @defer.inlineCallbacks + def fetch_media(self, file_info): + """Attempts to fetch media described by file_info from the local cache + and configured storage providers. + + Args: + file_info (FileInfo) + + Returns: + Deferred(Responder): Returns a Responder if the file was found, + otherwise None. + """ + + path = self._file_info_to_path(file_info) + local_path = os.path.join(self.local_media_directory, path) + if os.path.exists(local_path): + defer.returnValue(FileResponder(open(local_path, "rb"))) + + defer.returnValue(None) + + def _file_info_to_path(self, file_info): + """Converts file_info into a relative path. + """ + if file_info.url_cache: + return self.filepaths.url_cache_filepath_rel(file_info.file_id) + + if file_info.server_name: + if file_info.thumbnail: + return self.filepaths.remote_media_thumbnail_rel( + server_name=file_info.server_name, + file_id=file_info.file_id, + width=file_info.thumbnail_width, + height=file_info.thumbnail_height, + content_type=file_info.thumbnail_type, + method=file_info.thumbnail_method + ) + return self.filepaths.remote_media_filepath_rel( + file_info.server_name, file_info.file_id, + ) + + if file_info.thumbnail: + return self.filepaths.local_media_thumbnail_rel( + media_id=file_info.file_id, + width=file_info.thumbnail_width, + height=file_info.thumbnail_height, + content_type=file_info.thumbnail_type, + method=file_info.thumbnail_method + ) + return self.filepaths.local_media_filepath_rel( + file_info.file_id, + ) + + +def _write_file_synchronously(source, fname): + """Write `source` to the path `fname` synchronously. Should be called + from a thread. + + Args: + source: A file like object to be written + fname (str): Path to write to + """ + dirname = os.path.dirname(fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + source.seek(0) # Ensure we read from the start of the file + with open(fname, "wb") as f: + shutil.copyfileobj(source, f) + + +class FileResponder(Responder): + """Wraps an open file that can be sent to a request. + + Args: + open_file (file): A file like object to be streamed ot the client, + is closed when finished streaming. + """ + def __init__(self, open_file): + self.open_file = open_file + + @defer.inlineCallbacks + def write_to_consumer(self, consumer): + with self.open_file: + yield FileSender().beginFileTransfer(self.open_file, consumer) + + def cancel(self): + self.open_file.close() From dd3092c3a357be2453e25293b5590d627f3cfc48 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:45:11 +0000 Subject: [PATCH 023/171] Use MediaStorage for local files --- synapse/rest/media/v1/download_resource.py | 24 +-- synapse/rest/media/v1/media_repository.py | 168 +++++++++------------ 2 files changed, 73 insertions(+), 119 deletions(-) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 6879249c8..2a5afa9ff 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -57,34 +57,12 @@ class DownloadResource(Resource): ) server_name, media_id, name = parse_media_id(request) if server_name == self.server_name: - yield self._respond_local_file(request, media_id, name) + yield self.media_repo.get_local_media(request, media_id, name) else: yield self._respond_remote_file( request, server_name, media_id, name ) - @defer.inlineCallbacks - def _respond_local_file(self, request, media_id, name): - media_info = yield self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - respond_404(request) - return - - media_type = media_info["media_type"] - media_length = media_info["media_length"] - upload_name = name if name else media_info["upload_name"] - if media_info["url_cache"]: - # TODO: Check the file still exists, if it doesn't we can redownload - # it from the url `media_info["url_cache"]` - file_path = self.filepaths.url_cache_filepath(media_id) - else: - file_path = self.filepaths.local_media_filepath(media_id) - - yield respond_with_file( - request, media_type, file_path, media_length, - upload_name=upload_name, - ) - @defer.inlineCallbacks def _respond_remote_file(self, request, server_name, media_id, name): # don't forward requests for remote media if allow_remote is false diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index eed9056a2..6ad9320b6 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vecotr Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +19,7 @@ import twisted.internet.error import twisted.web.http from twisted.web.resource import Resource +from ._base import respond_404, RequestWriter, FileInfo, respond_with_responder from .upload_resource import UploadResource from .download_resource import DownloadResource from .thumbnail_resource import ThumbnailResource @@ -25,6 +27,7 @@ from .identicon_resource import IdenticonResource from .preview_url_resource import PreviewUrlResource from .filepath import MediaFilePaths from .thumbnailer import Thumbnailer +from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.util.stringutils import random_string @@ -33,7 +36,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \ from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn +from synapse.util.logcontext import make_deferred_yieldable from synapse.util.retryutils import NotRetryingDestination import os @@ -74,6 +77,8 @@ class MediaRepository(object): self.recently_accessed_remotes = set() + self.media_storage = MediaStorage(self.primary_base_path, self.filepaths) + self.clock.looping_call( self._update_recently_accessed_remotes, UPDATE_RECENTLY_ACCESSED_REMOTES_TS @@ -88,72 +93,6 @@ class MediaRepository(object): media, self.clock.time_msec() ) - @staticmethod - def _makedirs(filepath): - dirname = os.path.dirname(filepath) - if not os.path.exists(dirname): - os.makedirs(dirname) - - @staticmethod - def _write_file_synchronously(source, fname): - """Write `source` to the path `fname` synchronously. Should be called - from a thread. - - Args: - source: A file like object to be written - fname (str): Path to write to - """ - MediaRepository._makedirs(fname) - source.seek(0) # Ensure we read from the start of the file - with open(fname, "wb") as f: - shutil.copyfileobj(source, f) - - @defer.inlineCallbacks - def write_to_file_and_backup(self, source, path): - """Write `source` to the on disk media store, and also the backup store - if configured. - - Args: - source: A file like object that should be written - path (str): Relative path to write file to - - Returns: - Deferred[str]: the file path written to in the primary media store - """ - fname = os.path.join(self.primary_base_path, path) - - # Write to the main repository - yield make_deferred_yieldable(threads.deferToThread( - self._write_file_synchronously, source, fname, - )) - - # Write to backup repository - yield self.copy_to_backup(path) - - defer.returnValue(fname) - - @defer.inlineCallbacks - def copy_to_backup(self, path): - """Copy a file from the primary to backup media store, if configured. - - Args: - path(str): Relative path to write file to - """ - if self.backup_base_path: - primary_fname = os.path.join(self.primary_base_path, path) - backup_fname = os.path.join(self.backup_base_path, path) - - # We can either wait for successful writing to the backup repository - # or write in the background and immediately return - if self.synchronous_backup_media_store: - yield make_deferred_yieldable(threads.deferToThread( - shutil.copyfile, primary_fname, backup_fname, - )) - else: - preserve_fn(threads.deferToThread)( - shutil.copyfile, primary_fname, backup_fname, - ) - @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): @@ -171,10 +110,13 @@ class MediaRepository(object): """ media_id = random_string(24) - fname = yield self.write_to_file_and_backup( - content, self.filepaths.local_media_filepath_rel(media_id) + file_info = FileInfo( + server_name=None, + file_id=media_id, ) + fname = yield self.media_storage.store_file(content, file_info) + logger.info("Stored local media in file %r", fname) yield self.store.store_local_media( @@ -194,6 +136,30 @@ class MediaRepository(object): defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) + @defer.inlineCallbacks + def get_local_media(self, request, media_id, name): + """Responds to reqests for local media, if exists, or returns 404. + """ + media_info = yield self.store.get_local_media(media_id) + if not media_info or media_info["quarantined_by"]: + respond_404(request) + return + + media_type = media_info["media_type"] + media_length = media_info["media_length"] + upload_name = name if name else media_info["upload_name"] + url_cache = media_info["url_cache"] + + file_info = FileInfo( + None, media_id, + url_cache=url_cache, + ) + + responder = yield self.media_storage.fetch_media(file_info) + yield respond_with_responder( + request, responder, media_type, media_length, upload_name, + ) + @defer.inlineCallbacks def get_remote_media(self, server_name, media_id): key = (server_name, media_id) @@ -368,11 +334,18 @@ class MediaRepository(object): if t_byte_source: try: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.local_media_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method - ) + file_info = FileInfo( + server_name=None, + file_id=media_id, + thumbnail=True, + thumbnail_width=t_width, + thumbnail_height=t_height, + thumbnail_method=t_method, + thumbnail_type=t_type, + ) + + output_path = yield self.media_storage.store_file( + t_byte_source, file_info, ) finally: t_byte_source.close() @@ -400,11 +373,18 @@ class MediaRepository(object): if t_byte_source: try: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) + file_info = FileInfo( + server_name=server_name, + file_id=media_id, + thumbnail=True, + thumbnail_width=t_width, + thumbnail_height=t_height, + thumbnail_method=t_method, + thumbnail_type=t_type, + ) + + output_path = yield self.media_storage.store_file( + t_byte_source, file_info, ) finally: t_byte_source.close() @@ -472,20 +452,6 @@ class MediaRepository(object): # Now we generate the thumbnails for each dimension, store it for (t_width, t_height, t_type), t_method in thumbnails.iteritems(): - # Work out the correct file name for thumbnail - if server_name: - file_path = self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) - elif url_cache: - file_path = self.filepaths.url_cache_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method - ) - else: - file_path = self.filepaths.local_media_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method - ) - # Generate the thumbnail if t_method == "crop": t_byte_source = yield make_deferred_yieldable(threads.deferToThread( @@ -505,9 +471,19 @@ class MediaRepository(object): continue try: - # Write to disk - output_path = yield self.write_to_file_and_backup( - t_byte_source, file_path, + file_info = FileInfo( + server_name=server_name, + file_id=media_id, + thumbnail=True, + thumbnail_width=t_width, + thumbnail_height=t_height, + thumbnail_method=t_method, + thumbnail_type=t_type, + url_cache=url_cache, + ) + + output_path = yield self.media_storage.store_file( + t_byte_source, file_info, ) finally: t_byte_source.close() From 9e20840e0296eb9b814bb0e7130342c5c9a19e3d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:52:06 +0000 Subject: [PATCH 024/171] Use MediaStorage for remote media --- synapse/rest/media/v1/download_resource.py | 43 +--- synapse/rest/media/v1/media_repository.py | 245 ++++++++++++--------- 2 files changed, 154 insertions(+), 134 deletions(-) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 2a5afa9ff..5dc92972c 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -14,7 +14,7 @@ # limitations under the License. import synapse.http.servlet -from ._base import parse_media_id, respond_with_file, respond_404 +from ._base import parse_media_id, respond_404 from twisted.web.resource import Resource from synapse.http.server import request_handler, set_cors_headers @@ -59,35 +59,14 @@ class DownloadResource(Resource): if server_name == self.server_name: yield self.media_repo.get_local_media(request, media_id, name) else: - yield self._respond_remote_file( - request, server_name, media_id, name - ) + allow_remote = synapse.http.servlet.parse_boolean( + request, "allow_remote", default=True) + if not allow_remote: + logger.info( + "Rejecting request for remote media %s/%s due to allow_remote", + server_name, media_id, + ) + respond_404(request) + return - @defer.inlineCallbacks - def _respond_remote_file(self, request, server_name, media_id, name): - # don't forward requests for remote media if allow_remote is false - allow_remote = synapse.http.servlet.parse_boolean( - request, "allow_remote", default=True) - if not allow_remote: - logger.info( - "Rejecting request for remote media %s/%s due to allow_remote", - server_name, media_id, - ) - respond_404(request) - return - - media_info = yield self.media_repo.get_remote_media(server_name, media_id) - - media_type = media_info["media_type"] - media_length = media_info["media_length"] - filesystem_id = media_info["filesystem_id"] - upload_name = name if name else media_info["upload_name"] - - file_path = self.filepaths.remote_media_filepath( - server_name, filesystem_id - ) - - yield respond_with_file( - request, media_type, file_path, media_length, - upload_name=upload_name, - ) + yield self.media_repo.get_remote_media(request, server_name, media_id, name) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 6ad9320b6..07820fab6 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -19,7 +19,7 @@ import twisted.internet.error import twisted.web.http from twisted.web.resource import Resource -from ._base import respond_404, RequestWriter, FileInfo, respond_with_responder +from ._base import respond_404, FileInfo, respond_with_responder from .upload_resource import UploadResource from .download_resource import DownloadResource from .thumbnail_resource import ThumbnailResource @@ -161,124 +161,165 @@ class MediaRepository(object): ) @defer.inlineCallbacks - def get_remote_media(self, server_name, media_id): + def get_remote_media(self, request, server_name, media_id, name): + """Respond to requests for remote media. + """ + self.recently_accessed_remotes.add((server_name, media_id)) + + # We linearize here to ensure that we don't try and download remote + # media mutliple times concurrently key = (server_name, media_id) with (yield self.remote_media_linearizer.queue(key)): - media_info = yield self._get_remote_media_impl(server_name, media_id) - defer.returnValue(media_info) + responder, media_info = yield self._get_remote_media_impl( + server_name, media_id, + ) + + # We purposefully stream the file outside the lock + if responder: + media_type = media_info["media_type"] + media_length = media_info["media_length"] + upload_name = name if name else media_info["upload_name"] + yield respond_with_responder( + request, responder, media_type, media_length, upload_name, + ) + else: + respond_404(request) @defer.inlineCallbacks def _get_remote_media_impl(self, server_name, media_id): + """Looks for media in local cache, if not there then attempt to + download from remote server. + + Returns: + Deferred((Respodner, media_info)) + """ media_info = yield self.store.get_cached_remote_media( server_name, media_id ) - if not media_info: - media_info = yield self._download_remote_file( - server_name, media_id - ) - elif media_info["quarantined_by"]: - raise NotFoundError() + + # file_id is the ID we use to track the file locally. If we've already + # seen the file then reuse the existing ID, otherwise genereate a new + # one. + if media_info: + file_id = media_info["filesystem_id"] else: - self.recently_accessed_remotes.add((server_name, media_id)) - yield self.store.update_cached_last_access_time( - [(server_name, media_id)], self.clock.time_msec() - ) - defer.returnValue(media_info) + file_id = random_string(24) + + file_info = FileInfo(server_name, file_id) + + # If we have an entry in the DB, try and look for it + if media_info: + if media_info["quarantined_by"]: + raise NotFoundError() + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + defer.returnValue((responder, media_info)) + + # Failed to find the file anywhere, lets download it. + + media_info = yield self._download_remote_file( + server_name, media_id, file_id + ) + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + defer.returnValue((responder, media_info)) + + defer.returnValue((None, media_info)) @defer.inlineCallbacks - def _download_remote_file(self, server_name, media_id): - file_id = random_string(24) + def _download_remote_file(self, server_name, media_id, file_id): + """Attempt to download the remote file from the given server name, + using the given file_id as the local id. + """ - fpath = self.filepaths.remote_media_filepath_rel( - server_name, file_id + file_info = FileInfo( + server_name=server_name, + file_id=file_id, ) - fname = os.path.join(self.primary_base_path, fpath) - self._makedirs(fname) - try: - with open(fname, "wb") as f: - request_path = "/".join(( - "/_matrix/media/v1/download", server_name, media_id, - )) + with self.media_storage.store_into_file(file_info) as (f, fname, finish): + request_path = "/".join(( + "/_matrix/media/v1/download", server_name, media_id, + )) + try: + length, headers = yield self.client.get_file( + server_name, request_path, output_stream=f, + max_size=self.max_upload_size, args={ + # tell the remote server to 404 if it doesn't + # recognise the server_name, to make sure we don't + # end up with a routing loop. + "allow_remote": "false", + } + ) + except twisted.internet.error.DNSLookupError as e: + logger.warn("HTTP error fetching remote media %s/%s: %r", + server_name, media_id, e) + raise NotFoundError() + + except HttpResponseException as e: + logger.warn("HTTP error fetching remote media %s/%s: %s", + server_name, media_id, e.response) + if e.code == twisted.web.http.NOT_FOUND: + raise SynapseError.from_http_response_exception(e) + raise SynapseError(502, "Failed to fetch remote media") + + except SynapseError: + logger.exception("Failed to fetch remote media %s/%s", + server_name, media_id) + raise + except NotRetryingDestination: + logger.warn("Not retrying destination %r", server_name) + raise SynapseError(502, "Failed to fetch remote media") + except Exception: + logger.exception("Failed to fetch remote media %s/%s", + server_name, media_id) + raise SynapseError(502, "Failed to fetch remote media") + + yield finish() + + media_type = headers["Content-Type"][0] + + time_now_ms = self.clock.time_msec() + + content_disposition = headers.get("Content-Disposition", None) + if content_disposition: + _, params = cgi.parse_header(content_disposition[0],) + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith("utf-8''"): + upload_name = upload_name_utf8[7:] + + # If there isn't check for an ascii name. + if not upload_name: + upload_name_ascii = params.get("filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + upload_name = upload_name_ascii + + if upload_name: + upload_name = urlparse.unquote(upload_name) try: - length, headers = yield self.client.get_file( - server_name, request_path, output_stream=f, - max_size=self.max_upload_size, args={ - # tell the remote server to 404 if it doesn't - # recognise the server_name, to make sure we don't - # end up with a routing loop. - "allow_remote": "false", - } - ) - except twisted.internet.error.DNSLookupError as e: - logger.warn("HTTP error fetching remote media %s/%s: %r", - server_name, media_id, e) - raise NotFoundError() + upload_name = upload_name.decode("utf-8") + except UnicodeDecodeError: + upload_name = None + else: + upload_name = None - except HttpResponseException as e: - logger.warn("HTTP error fetching remote media %s/%s: %s", - server_name, media_id, e.response) - if e.code == twisted.web.http.NOT_FOUND: - raise SynapseError.from_http_response_exception(e) - raise SynapseError(502, "Failed to fetch remote media") + logger.info("Stored remote media in file %r", fname) - except SynapseError: - logger.exception("Failed to fetch remote media %s/%s", - server_name, media_id) - raise - except NotRetryingDestination: - logger.warn("Not retrying destination %r", server_name) - raise SynapseError(502, "Failed to fetch remote media") - except Exception: - logger.exception("Failed to fetch remote media %s/%s", - server_name, media_id) - raise SynapseError(502, "Failed to fetch remote media") - - yield self.copy_to_backup(fpath) - - media_type = headers["Content-Type"][0] - time_now_ms = self.clock.time_msec() - - content_disposition = headers.get("Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0],) - upload_name = None - - # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] - - # If there isn't check for an ascii name. - if not upload_name: - upload_name_ascii = params.get("filename", None) - if upload_name_ascii and is_ascii(upload_name_ascii): - upload_name = upload_name_ascii - - if upload_name: - upload_name = urlparse.unquote(upload_name) - try: - upload_name = upload_name.decode("utf-8") - except UnicodeDecodeError: - upload_name = None - else: - upload_name = None - - logger.info("Stored remote media in file %r", fname) - - yield self.store.store_cached_remote_media( - origin=server_name, - media_id=media_id, - media_type=media_type, - time_now_ms=self.clock.time_msec(), - upload_name=upload_name, - media_length=length, - filesystem_id=file_id, - ) - except Exception: - os.remove(fname) - raise + yield self.store.store_cached_remote_media( + origin=server_name, + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=upload_name, + media_length=length, + filesystem_id=file_id, + ) media_info = { "media_type": media_type, From 9d30a7691c7a54bf2d299ea3cdbdf4af74dd0af5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 11:08:46 +0000 Subject: [PATCH 025/171] Make ThumbnailResource use MediaStorage --- synapse/rest/media/v1/media_repository.py | 4 +- synapse/rest/media/v1/thumbnail_resource.py | 110 ++++++++++++-------- 2 files changed, 67 insertions(+), 47 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 07820fab6..0c84f1be0 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -637,7 +637,9 @@ class MediaRepositoryResource(Resource): self.putChild("upload", UploadResource(hs, media_repo)) self.putChild("download", DownloadResource(hs, media_repo)) - self.putChild("thumbnail", ThumbnailResource(hs, media_repo)) + self.putChild("thumbnail", ThumbnailResource( + hs, media_repo, media_repo.media_storage, + )) self.putChild("identicon", IdenticonResource()) if hs.config.url_preview_enabled: self.putChild("preview_url", PreviewUrlResource(hs, media_repo)) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 70dbf7f5c..f59f30066 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -14,7 +14,10 @@ # limitations under the License. -from ._base import parse_media_id, respond_404, respond_with_file +from ._base import ( + parse_media_id, respond_404, respond_with_file, FileInfo, + respond_with_responder, +) from twisted.web.resource import Resource from synapse.http.servlet import parse_string, parse_integer from synapse.http.server import request_handler, set_cors_headers @@ -30,12 +33,12 @@ logger = logging.getLogger(__name__) class ThumbnailResource(Resource): isLeaf = True - def __init__(self, hs, media_repo): + def __init__(self, hs, media_repo, media_storage): Resource.__init__(self) self.store = hs.get_datastore() - self.filepaths = media_repo.filepaths self.media_repo = media_repo + self.media_storage = media_storage self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.server_name = hs.hostname self.version_string = hs.version_string @@ -91,23 +94,22 @@ class ThumbnailResource(Resource): thumbnail_info = self._select_thumbnail( width, height, method, m_type, thumbnail_infos ) - t_width = thumbnail_info["thumbnail_width"] - t_height = thumbnail_info["thumbnail_height"] - t_type = thumbnail_info["thumbnail_type"] - t_method = thumbnail_info["thumbnail_method"] - if media_info["url_cache"]: - # TODO: Check the file still exists, if it doesn't we can redownload - # it from the url `media_info["url_cache"]` - file_path = self.filepaths.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method, - ) - else: - file_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method, - ) - yield respond_with_file(request, t_type, file_path) + file_info = FileInfo( + server_name=None, file_id=media_id, + url_cache=media_info["url_cache"], + thumbnail=True, + thumbnail_width=thumbnail_info["thumbnail_width"], + thumbnail_height=thumbnail_info["thumbnail_height"], + thumbnail_type=thumbnail_info["thumbnail_type"], + thumbnail_method=thumbnail_info["thumbnail_method"], + ) + t_type = file_info.thumbnail_type + t_length = thumbnail_info["thumbnail_length"] + + responder = yield self.media_storage.fetch_media(file_info) + yield respond_with_responder(request, responder, t_type, t_length) else: respond_404(request) @@ -129,20 +131,23 @@ class ThumbnailResource(Resource): t_type = info["thumbnail_type"] == desired_type if t_w and t_h and t_method and t_type: - if media_info["url_cache"]: - # TODO: Check the file still exists, if it doesn't we can redownload - # it from the url `media_info["url_cache"]` - file_path = self.filepaths.url_cache_thumbnail( - media_id, desired_width, desired_height, desired_type, - desired_method, - ) - else: - file_path = self.filepaths.local_media_thumbnail( - media_id, desired_width, desired_height, desired_type, - desired_method, - ) - yield respond_with_file(request, desired_type, file_path) - return + file_info = FileInfo( + server_name=None, file_id=media_id, + url_cache=media_info["url_cache"], + thumbnail=True, + thumbnail_width=info["thumbnail_width"], + thumbnail_height=info["thumbnail_height"], + thumbnail_type=info["thumbnail_type"], + thumbnail_method=info["thumbnail_method"], + ) + + t_type = file_info.thumbnail_type + t_length = info["thumbnail_length"] + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + yield respond_with_responder(request, responder, t_type, t_length) + return logger.debug("We don't have a local thumbnail of that size. Generating") @@ -175,12 +180,22 @@ class ThumbnailResource(Resource): t_type = info["thumbnail_type"] == desired_type if t_w and t_h and t_method and t_type: - file_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, desired_width, desired_height, - desired_type, desired_method, + file_info = FileInfo( + server_name=None, file_id=media_id, + thumbnail=True, + thumbnail_width=info["thumbnail_width"], + thumbnail_height=info["thumbnail_height"], + thumbnail_type=info["thumbnail_type"], + thumbnail_method=info["thumbnail_method"], ) - yield respond_with_file(request, desired_type, file_path) - return + + t_type = file_info.thumbnail_type + t_length = info["thumbnail_length"] + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + yield respond_with_responder(request, responder, t_type, t_length) + return logger.debug("We don't have a local thumbnail of that size. Generating") @@ -206,17 +221,20 @@ class ThumbnailResource(Resource): thumbnail_info = self._select_thumbnail( width, height, method, m_type, thumbnail_infos ) - t_width = thumbnail_info["thumbnail_width"] - t_height = thumbnail_info["thumbnail_height"] - t_type = thumbnail_info["thumbnail_type"] - t_method = thumbnail_info["thumbnail_method"] - file_id = thumbnail_info["filesystem_id"] + file_info = FileInfo( + server_name=None, file_id=media_id, + thumbnail=True, + thumbnail_width=thumbnail_info["thumbnail_width"], + thumbnail_height=thumbnail_info["thumbnail_height"], + thumbnail_type=thumbnail_info["thumbnail_type"], + thumbnail_method=thumbnail_info["thumbnail_method"], + ) + + t_type = file_info.thumbnail_type t_length = thumbnail_info["thumbnail_length"] - file_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method, - ) - yield respond_with_file(request, t_type, file_path, t_length) + responder = yield self.media_storage.fetch_media(file_info) + yield respond_with_responder(request, responder, t_type, t_length) else: respond_404(request) From 2442e9876c2622e345ab62a414e509d4f5cecb4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 14:36:07 +0000 Subject: [PATCH 026/171] Make PreviewUrlResource use MediaStorage --- synapse/rest/media/v1/media_repository.py | 4 +++- synapse/rest/media/v1/preview_url_resource.py | 18 +++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 0c84f1be0..9aba9f13f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -642,4 +642,6 @@ class MediaRepositoryResource(Resource): )) self.putChild("identicon", IdenticonResource()) if hs.config.url_preview_enabled: - self.putChild("preview_url", PreviewUrlResource(hs, media_repo)) + self.putChild("preview_url", PreviewUrlResource( + hs, media_repo, media_repo.media_storage, + )) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 40d2e664e..f3dbbb3fe 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -17,6 +17,8 @@ from twisted.web.server import NOT_DONE_YET from twisted.internet import defer from twisted.web.resource import Resource +from ._base import FileInfo + from synapse.api.errors import ( SynapseError, Codes, ) @@ -49,7 +51,7 @@ logger = logging.getLogger(__name__) class PreviewUrlResource(Resource): isLeaf = True - def __init__(self, hs, media_repo): + def __init__(self, hs, media_repo, media_storage): Resource.__init__(self) self.auth = hs.get_auth() @@ -62,6 +64,7 @@ class PreviewUrlResource(Resource): self.client = SpiderHttpClient(hs) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path + self.media_storage = media_storage self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist @@ -273,19 +276,21 @@ class PreviewUrlResource(Resource): file_id = datetime.date.today().isoformat() + '_' + random_string(16) - fpath = self.filepaths.url_cache_filepath_rel(file_id) - fname = os.path.join(self.primary_base_path, fpath) - self.media_repo._makedirs(fname) + file_info = FileInfo( + server_name=None, + file_id=file_id, + url_cache=True, + ) try: - with open(fname, "wb") as f: + with self.media_storage.store_into_file(file_info) as (f, fname, finish): logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) # FIXME: pass through 404s and other error messages nicely - yield self.media_repo.copy_to_backup(fpath) + yield finish() media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() @@ -327,7 +332,6 @@ class PreviewUrlResource(Resource): ) except Exception as e: - os.remove(fname) raise SynapseError( 500, ("Failed to download content: %s" % e), Codes.UNKNOWN From 8f03aa9f61e1d99dfcde972f4e5f0f52919db49f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:19:55 +0000 Subject: [PATCH 027/171] Add StorageProvider concept --- synapse/rest/media/v1/media_repository.py | 29 ++++- synapse/rest/media/v1/media_storage.py | 15 ++- synapse/rest/media/v1/storage_provider.py | 127 ++++++++++++++++++++++ 3 files changed, 162 insertions(+), 9 deletions(-) create mode 100644 synapse/rest/media/v1/storage_provider.py diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 9aba9f13f..7938fe7bc 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -27,6 +27,9 @@ from .identicon_resource import IdenticonResource from .preview_url_resource import PreviewUrlResource from .filepath import MediaFilePaths from .thumbnailer import Thumbnailer +from .storage_provider import ( + StorageProviderWrapper, FileStorageProviderBackend, +) from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient @@ -66,10 +69,6 @@ class MediaRepository(object): self.primary_base_path = hs.config.media_store_path self.filepaths = MediaFilePaths(self.primary_base_path) - self.backup_base_path = hs.config.backup_media_store_path - - self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store - self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.thumbnail_requirements = hs.config.thumbnail_requirements @@ -77,7 +76,27 @@ class MediaRepository(object): self.recently_accessed_remotes = set() - self.media_storage = MediaStorage(self.primary_base_path, self.filepaths) + # List of StorageProvider's where we should search for media and + # potentially upload to. + self.storage_providers = [] + + # TODO: Move this into config and allow other storage providers to be + # defined. + if hs.config.backup_media_store_path: + backend = FileStorageProviderBackend( + self.primary_base_path, hs.config.backup_media_store_path, + ) + provider = StorageProviderWrapper( + backend, + store=True, + store_synchronous=hs.config.synchronous_backup_media_store, + store_remote=True, + ) + self.storage_providers.append(provider) + + self.media_storage = MediaStorage( + self.primary_base_path, self.filepaths, self.storage_providers, + ) self.clock.looping_call( self._update_recently_accessed_remotes, diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 052745e6f..a1ec6cadb 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -32,9 +32,10 @@ class MediaStorage(object): """Responsible for storing/fetching files from local sources. """ - def __init__(self, local_media_directory, filepaths): + def __init__(self, local_media_directory, filepaths, storage_providers): self.local_media_directory = local_media_directory self.filepaths = filepaths + self.storage_providers = storage_providers @defer.inlineCallbacks def store_file(self, source, file_info): @@ -90,11 +91,12 @@ class MediaStorage(object): finished_called = [False] + @defer.inlineCallbacks def finish(): - # This will be used later when we want to hit out to other storage - # places + for provider in self.storage_providers: + yield provider.store_file(path, file_info) + finished_called[0] = True - return defer.succeed(None) try: with open(fname, "wb") as f: @@ -127,6 +129,11 @@ class MediaStorage(object): if os.path.exists(local_path): defer.returnValue(FileResponder(open(local_path, "rb"))) + for provider in self.storage_providers: + res = yield provider.fetch(path, file_info) + if res: + defer.returnValue(res) + defer.returnValue(None) def _file_info_to_path(self, file_info): diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py new file mode 100644 index 000000000..2ad602e10 --- /dev/null +++ b/synapse/rest/media/v1/storage_provider.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, threads + +from .media_storage import FileResponder + +from synapse.util.logcontext import preserve_fn + +import logging +import os +import shutil + + +logger = logging.getLogger(__name__) + + +class StorageProvider(object): + """A storage provider is a service that can store uploaded media and + retrieve them. + """ + def store_file(self, path, file_info): + """Store the file described by file_info. The actual contents can be + retrieved by reading the file in file_info.upload_path. + + Args: + path (str): Relative path of file in local cache + file_info (FileInfo) + + Returns: + Deferred + """ + pass + + def fetch(self, path, file_info): + """Attempt to fetch the file described by file_info and stream it + into writer. + + Args: + path (str): Relative path of file in local cache + file_info (FileInfo) + + Returns: + Deferred(Responder): Returns a Responder if the provider has the file, + otherwise returns None. + """ + pass + + +class StorageProviderWrapper(StorageProvider): + """Wraps a storage provider and provides various config options + + Args: + backend (StorageProvider) + store (bool): Whether to store new files or not. + store_synchronous (bool): Whether to wait for file to be successfully + uploaded, or todo the upload in the backgroud. + store_remote (bool): Whether remote media should be uploaded + """ + def __init__(self, backend, store, store_synchronous, store_remote): + self.backend = backend + self.store = store + self.store_synchronous = store_synchronous + self.store_remote = store_remote + + def store_file(self, path, file_info): + if not self.store: + return defer.succeed(None) + + if file_info.server_name and not self.store_remote: + return defer.succeed(None) + + if self.store_synchronous: + return self.backend.store_file(path, file_info) + else: + # TODO: Handle errors. + preserve_fn(self.backend.store_file)(path, file_info) + return defer.succeed(None) + + def fetch(self, path, file_info): + return self.backend.fetch(path, file_info) + + +class FileStorageProviderBackend(StorageProvider): + """A storage provider that stores files in a directory on a filesystem. + + Args: + cache_directory (str): Base path of the local media repository + base_directory (str): Base path to store new files + """ + + def __init__(self, cache_directory, base_directory): + self.cache_directory = cache_directory + self.base_directory = base_directory + + def store_file(self, path, file_info): + """See StorageProvider.store_file""" + + primary_fname = os.path.join(self.cache_directory, path) + backup_fname = os.path.join(self.base_directory, path) + + dirname = os.path.dirname(backup_fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + return threads.deferToThread( + shutil.copyfile, primary_fname, backup_fname, + ) + + def fetch(self, path, file_info): + """See StorageProvider.fetch""" + + backup_fname = os.path.join(self.base_directory, path) + if os.path.isfile(backup_fname): + return FileResponder(open(backup_fname, "rb")) From 19f9227643b5099666878de33453bbe361f216fc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:25:04 +0000 Subject: [PATCH 028/171] avoid 80s GIN inserts by tweaking work_mem see https://github.com/matrix-org/synapse/issues/2753 for details --- synapse/storage/search.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 479b04c63..7b1166f41 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -106,6 +106,7 @@ class SearchStore(BackgroundUpdateStore): event_search_rows.append((event_id, room_id, key, value)) if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256KB'") sql = ( "INSERT INTO event_search (event_id, room_id, key, vector)" " VALUES (?,?,?,to_tsvector('english', ?))" @@ -123,6 +124,9 @@ class SearchStore(BackgroundUpdateStore): clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] txn.executemany(sql, clump) + if isinstance(self.database_engine, PostgresEngine): + txn.execute("RESET work_mem") + progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, From e365ad329f3c7e12bb2126217acbc62bdf0b9aec Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:30:30 +0000 Subject: [PATCH 029/171] oops, tweak work_mem when actually storing --- synapse/storage/room.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b..9e2bf1ab4 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -310,6 +310,7 @@ class RoomStore(SQLBaseStore): def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256KB'") sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" @@ -323,6 +324,7 @@ class RoomStore(SQLBaseStore): event.origin_server_ts, ) ) + txn.execute("RESET work_mem") elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" From 174eacc8ba71015003a78594ebc89cbe45d8384a Mon Sep 17 00:00:00 2001 From: hera Date: Tue, 9 Jan 2018 18:06:30 +0000 Subject: [PATCH 030/171] oops --- synapse/storage/room.py | 2 +- synapse/storage/search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9e2bf1ab4..0604f8f27 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -310,7 +310,7 @@ class RoomStore(SQLBaseStore): def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256KB'") + txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 7b1166f41..f52f3c859 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -106,7 +106,7 @@ class SearchStore(BackgroundUpdateStore): event_search_rows.append((event_id, room_id, key, value)) if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256KB'") + txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search (event_id, room_id, key, vector)" " VALUES (?,?,?,to_tsvector('english', ?))" From d90e8ea444092f500c721567fe055d48eb110528 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 18:27:35 +0000 Subject: [PATCH 031/171] Update http request metrics before calling servlet Make sure that we set the servlet name in the metrics object *before* calling the servlet, in case the servlet throws an exception. --- synapse/http/server.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 25466cd29..6e8f4c9c5 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -107,6 +107,10 @@ def wrap_request_handler(request_handler, include_metrics=False): with LoggingContext(request_id) as request_context: with Measure(self.clock, "wrapped_request_handler"): request_metrics = RequestMetrics() + # we start the request metrics timer here with an initial stab + # at the servlet name. For most requests that name will be + # JsonResource (or a subclass), and JsonResource._async_render + # will update it once it picks a servlet. request_metrics.start(self.clock, name=self.__class__.__name__) request_context.request = request_id @@ -249,12 +253,23 @@ class JsonResource(HttpServer, resource.Resource): if not m: continue - # We found a match! Trigger callback and then return the - # returned response. We pass both the request and any - # matched groups from the regex to the callback. + # We found a match! First update the metrics object to indicate + # which servlet is handling the request. callback = path_entry.callback + servlet_instance = getattr(callback, "__self__", None) + if servlet_instance is not None: + servlet_classname = servlet_instance.__class__.__name__ + else: + servlet_classname = "%r" % callback + + request_metrics.name = servlet_classname + + # Now trigger the callback. If it returns a response, we send it + # here. If it throws an exception, that is handled by the wrapper + # installed by @request_handler. + kwargs = intern_dict({ name: urllib.unquote(value).decode("UTF-8") if value else value for name, value in m.groupdict().items() @@ -265,17 +280,10 @@ class JsonResource(HttpServer, resource.Resource): code, response = callback_return self._send_response(request, code, response) - servlet_instance = getattr(callback, "__self__", None) - if servlet_instance is not None: - servlet_classname = servlet_instance.__class__.__name__ - else: - servlet_classname = "%r" % callback - - request_metrics.name = servlet_classname - return # Huh. No one wanted to handle that? Fiiiiiine. Send 400. + request_metrics.name = self.__class__.__name__ + ".UnrecognizedRequest" raise UnrecognizedRequestError() def _send_response(self, request, code, response_json_object, From 8c9d5b4873dc5e54c1383e3d31060fd32b08fae1 Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Wed, 10 Jan 2018 15:04:02 +0000 Subject: [PATCH 032/171] Fix publicised groups API (singular) over federation which was missing its fed client API, since there is no other API it might as well reuse the bulk one and unwrap it Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> --- synapse/handlers/groups_local.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py index 7e5d3f148..0d3feacd4 100644 --- a/synapse/handlers/groups_local.py +++ b/synapse/handlers/groups_local.py @@ -383,11 +383,11 @@ class GroupsLocalHandler(object): defer.returnValue({"groups": result}) else: - result = yield self.transport_client.get_publicised_groups_for_user( - get_domain_from_id(user_id), user_id - ) + result = yield self.transport_client.bulk_get_publicised_groups( + get_domain_from_id(user_id), [user_id], + )["users"][user_id] # TODO: Verify attestations - defer.returnValue(result) + defer.returnValue({"groups": result}) @defer.inlineCallbacks def bulk_get_publicised_groups(self, user_ids, proxy=True): From 5f07f5694c9a0535e64262c3726007e966ba72ea Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Wed, 10 Jan 2018 15:11:35 +0000 Subject: [PATCH 033/171] fix order of operations derp and also use `.get` to default to {} Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> --- synapse/handlers/groups_local.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py index 0d3feacd4..e4d0cc8b0 100644 --- a/synapse/handlers/groups_local.py +++ b/synapse/handlers/groups_local.py @@ -383,9 +383,10 @@ class GroupsLocalHandler(object): defer.returnValue({"groups": result}) else: - result = yield self.transport_client.bulk_get_publicised_groups( + bulk_result = yield self.transport_client.bulk_get_publicised_groups( get_domain_from_id(user_id), [user_id], - )["users"][user_id] + ) + result = bulk_result.get("users", {}).get(user_id) # TODO: Verify attestations defer.returnValue({"groups": result}) From f8e1ab5fee708bea93c54b39dc802a08c7ed7e21 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 10 Jan 2018 18:01:28 +0000 Subject: [PATCH 034/171] Do bcrypt hashing in a background thread --- synapse/handlers/auth.py | 24 ++++++++++++++++-------- synapse/handlers/register.py | 2 +- synapse/handlers/set_password.py | 2 +- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 2f30f183c..af9060fdb 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer +from twisted.internet import defer, threads from ._base import BaseHandler from synapse.api.constants import LoginType @@ -22,6 +22,7 @@ from synapse.module_api import ModuleApi from synapse.types import UserID from synapse.util.async import run_on_reactor from synapse.util.caches.expiringcache import ExpiringCache +from synapse.util.logcontext import make_deferred_yieldable from twisted.web.client import PartialDownloadError @@ -626,7 +627,7 @@ class AuthHandler(BaseHandler): if not lookupres: defer.returnValue(None) (user_id, password_hash) = lookupres - result = self.validate_hash(password, password_hash) + result = yield self.validate_hash(password, password_hash) if not result: logger.warn("Failed password login for user %s", user_id) defer.returnValue(None) @@ -754,10 +755,13 @@ class AuthHandler(BaseHandler): password (str): Password to hash. Returns: - Hashed password (str). + Deferred(str): Hashed password. """ - return bcrypt.hashpw(password.encode('utf8') + self.hs.config.password_pepper, - bcrypt.gensalt(self.bcrypt_rounds)) + def _do_hash(): + return bcrypt.hashpw(password.encode('utf8') + self.hs.config.password_pepper, + bcrypt.gensalt(self.bcrypt_rounds)) + + return make_deferred_yieldable(threads.deferToThread(_do_hash)) def validate_hash(self, password, stored_hash): """Validates that self.hash(password) == stored_hash. @@ -767,13 +771,17 @@ class AuthHandler(BaseHandler): stored_hash (str): Expected hash value. Returns: - Whether self.hash(password) == stored_hash (bool). + Deferred(bool): Whether self.hash(password) == stored_hash. """ - if stored_hash: + + def _do_validate_hash(): return bcrypt.hashpw(password.encode('utf8') + self.hs.config.password_pepper, stored_hash.encode('utf8')) == stored_hash + + if stored_hash: + return make_deferred_yieldable(threads.deferToThread(_do_validate_hash)) else: - return False + return defer.succeed(False) class MacaroonGeneartor(object): diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index f6e7e5856..132e2052c 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -130,7 +130,7 @@ class RegistrationHandler(BaseHandler): yield run_on_reactor() password_hash = None if password: - password_hash = self.auth_handler().hash(password) + password_hash = yield self.auth_handler().hash(password) if localpart: yield self.check_username(localpart, guest_access_token=guest_access_token) diff --git a/synapse/handlers/set_password.py b/synapse/handlers/set_password.py index 44414e1dc..e057ae54c 100644 --- a/synapse/handlers/set_password.py +++ b/synapse/handlers/set_password.py @@ -31,7 +31,7 @@ class SetPasswordHandler(BaseHandler): @defer.inlineCallbacks def set_password(self, user_id, newpassword, requester=None): - password_hash = self._auth_handler.hash(newpassword) + password_hash = yield self._auth_handler.hash(newpassword) except_device_id = requester.device_id if requester else None except_access_token_id = requester.access_token_id if requester else None From 7cd34512d850fce57bdf1dcfb3f61d5315baf639 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 11 Jan 2018 11:15:20 +0000 Subject: [PATCH 035/171] When using synctl with workers, don't start the main synapse automatically --- CHANGES.rst | 7 +++++++ synapse/app/synctl.py | 34 ++++++++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index dcf9adc95..24e4e7a38 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,10 @@ +Unreleased +========== + +synctl no longer starts the main synapse when using ``-a`` option with workers. +A new worker file should be added with ``worker_app: synapse.app.homeserver`` + + Changes in synapse v0.26.0 (2018-01-05) ======================================= diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py index 3bd7ef7bb..049956746 100755 --- a/synapse/app/synctl.py +++ b/synapse/app/synctl.py @@ -184,6 +184,9 @@ def main(): worker_configfiles.append(worker_configfile) if options.all_processes: + # To start the main synapse with -a you need to add a worker file + # with worker_app == "synapse.app.homeserver" + start_stop_synapse = False worker_configdir = options.all_processes if not os.path.isdir(worker_configdir): write( @@ -200,14 +203,29 @@ def main(): with open(worker_configfile) as stream: worker_config = yaml.load(stream) worker_app = worker_config["worker_app"] - worker_pidfile = worker_config["worker_pid_file"] - worker_daemonize = worker_config["worker_daemonize"] - assert worker_daemonize, "In config %r: expected '%s' to be True" % ( - worker_configfile, "worker_daemonize") - worker_cache_factor = worker_config.get("synctl_cache_factor") - workers.append(Worker( - worker_app, worker_configfile, worker_pidfile, worker_cache_factor, - )) + if worker_app == "synapse.app.homeserver": + # We need to special case all of this to pick up options that may + # be set in the main config file or in this worker config file. + worker_pidfile = ( + worker_config.get("worker_pid_file") + or worker_config("pid_file") + or pidfile + ) + workers.append(Worker( + "synapse.app.homeserver", + worker_configfile, + worker_pidfile, + worker_config.get("synctl_cache_factor") or cache_factor, + )) + else: + worker_pidfile = worker_config["worker_pid_file"] + worker_daemonize = worker_config["worker_daemonize"] + assert worker_daemonize, "In config %r: expected '%s' to be True" % ( + worker_configfile, "worker_daemonize") + worker_cache_factor = worker_config.get("synctl_cache_factor") + workers.append(Worker( + worker_app, worker_configfile, worker_pidfile, worker_cache_factor, + )) action = options.action From f68e4cf690df9bbc70540804fd8cc590d40c2149 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 10:11:12 +0000 Subject: [PATCH 036/171] Refactor --- synapse/app/synctl.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py index 049956746..4bd8f735f 100755 --- a/synapse/app/synctl.py +++ b/synapse/app/synctl.py @@ -211,21 +211,16 @@ def main(): or worker_config("pid_file") or pidfile ) - workers.append(Worker( - "synapse.app.homeserver", - worker_configfile, - worker_pidfile, - worker_config.get("synctl_cache_factor") or cache_factor, - )) + worker_cache_factor = worker_config.get("synctl_cache_factor") or cache_factor else: worker_pidfile = worker_config["worker_pid_file"] worker_daemonize = worker_config["worker_daemonize"] assert worker_daemonize, "In config %r: expected '%s' to be True" % ( worker_configfile, "worker_daemonize") worker_cache_factor = worker_config.get("synctl_cache_factor") - workers.append(Worker( - worker_app, worker_configfile, worker_pidfile, worker_cache_factor, - )) + workers.append(Worker( + worker_app, worker_configfile, worker_pidfile, worker_cache_factor, + )) action = options.action From f4d93ae424bec48f7d0d68d885942dc83d5780d7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 10:39:27 +0000 Subject: [PATCH 037/171] Actually make it work --- synapse/app/synctl.py | 12 ++++++++++-- synapse/config/workers.py | 5 +++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py index 4bd8f735f..0f0ddfa78 100755 --- a/synapse/app/synctl.py +++ b/synapse/app/synctl.py @@ -207,11 +207,19 @@ def main(): # We need to special case all of this to pick up options that may # be set in the main config file or in this worker config file. worker_pidfile = ( - worker_config.get("worker_pid_file") - or worker_config("pid_file") + worker_config.get("pid_file") or pidfile ) worker_cache_factor = worker_config.get("synctl_cache_factor") or cache_factor + daemonize = worker_config.get("daemonize") or config.get("daemonize") + assert daemonize, "Main process must have daemonize set to true" + + # The master process doesn't support using worker_* config. + for key in worker_config: + if key == "worker_app": # But we allow worker_app + continue + assert not key.startswith("worker_"), \ + "Main process cannot use worker_* config" else: worker_pidfile = worker_config["worker_pid_file"] worker_daemonize = worker_config["worker_daemonize"] diff --git a/synapse/config/workers.py b/synapse/config/workers.py index c5a5a8919..4b6884918 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -23,6 +23,11 @@ class WorkerConfig(Config): def read_config(self, config): self.worker_app = config.get("worker_app") + + # Canonicalise worker_app so that master always has None + if self.worker_app == "synapse.app.homeserver": + self.worker_app = None + self.worker_listeners = config.get("worker_listeners") self.worker_daemonize = config.get("worker_daemonize") self.worker_pid_file = config.get("worker_pid_file") From 227c491510e09bc201b835a7dfa84aaeafb3cdc6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:15:31 +0000 Subject: [PATCH 038/171] Comments --- synapse/rest/media/v1/_base.py | 9 +++--- synapse/rest/media/v1/media_repository.py | 39 ++++++++++++++++++++--- synapse/rest/media/v1/media_storage.py | 32 ++++++++++++++++--- 3 files changed, 65 insertions(+), 15 deletions(-) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 131082048..03df875b4 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -134,9 +134,9 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam Args: request (twisted.web.http.Request) - responder (Responder) + responder (Responder|None) media_type (str): The media/content type. - file_size (int): Size in bytes of the media, if known. + file_size (int): Size in bytes of the media. If not known it should be None upload_name (str): The name of the requested file, if any. """ if not responder: @@ -179,13 +179,12 @@ class FileInfo(object): or None if local. file_id (str): The local ID of the file. For local files this is the same as the media_id - media_type (str): Type of the file url_cache (bool): If the file is for the url preview cache thumbnail (bool): Whether the file is a thumbnail or not. thumbnail_width (int) thumbnail_height (int) - thumbnail_method (int) - thumbnail_type (str) + thumbnail_method (str) + thumbnail_type (str): Content type of thumbnail, e.g. image/png """ def __init__(self, server_name, file_id, url_cache=False, thumbnail=False, thumbnail_width=None, thumbnail_height=None, diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 7938fe7bc..6508dbf17 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018 New Vecotr Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -76,7 +76,7 @@ class MediaRepository(object): self.recently_accessed_remotes = set() - # List of StorageProvider's where we should search for media and + # List of StorageProviders where we should search for media and # potentially upload to. self.storage_providers = [] @@ -158,6 +158,16 @@ class MediaRepository(object): @defer.inlineCallbacks def get_local_media(self, request, media_id, name): """Responds to reqests for local media, if exists, or returns 404. + + Args: + request(twisted.web.http.Request) + media_id (str) + name (str|None): Optional name that, if specified, will be used as + the filename in the Content-Disposition header of the response. + + Retruns: + Deferred: Resolves once a response has successfully been written + to request """ media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: @@ -182,18 +192,29 @@ class MediaRepository(object): @defer.inlineCallbacks def get_remote_media(self, request, server_name, media_id, name): """Respond to requests for remote media. + + Args: + request(twisted.web.http.Request) + server_name (str): Remote server_name where the media originated. + media_id (str) + name (str|None): Optional name that, if specified, will be used as + the filename in the Content-Disposition header of the response. + + Retruns: + Deferred: Resolves once a response has successfully been written + to request """ self.recently_accessed_remotes.add((server_name, media_id)) # We linearize here to ensure that we don't try and download remote - # media mutliple times concurrently + # media multiple times concurrently key = (server_name, media_id) with (yield self.remote_media_linearizer.queue(key)): responder, media_info = yield self._get_remote_media_impl( server_name, media_id, ) - # We purposefully stream the file outside the lock + # We deliberately stream the file outside the lock if responder: media_type = media_info["media_type"] media_length = media_info["media_length"] @@ -210,7 +231,7 @@ class MediaRepository(object): download from remote server. Returns: - Deferred((Respodner, media_info)) + Deferred[(Responder, media_info)] """ media_info = yield self.store.get_cached_remote_media( server_name, media_id @@ -251,6 +272,14 @@ class MediaRepository(object): def _download_remote_file(self, server_name, media_id, file_id): """Attempt to download the remote file from the given server name, using the given file_id as the local id. + + Args: + server_name (str): Originating server + media_id (str) + file_id (str): Local file ID + + Returns: + Deferred[MediaInfo] """ file_info = FileInfo( diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index a1ec6cadb..49d2b7cd4 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -30,6 +30,12 @@ logger = logging.getLogger(__name__) class MediaStorage(object): """Responsible for storing/fetching files from local sources. + + Args: + local_media_directory (str): Base path where we store media on disk + filepaths (MediaFilePaths) + storage_providers ([StorageProvider]): List of StorageProvider that are + used to fetch and store files. """ def __init__(self, local_media_directory, filepaths, storage_providers): @@ -68,9 +74,16 @@ class MediaStorage(object): """Context manager used to get a file like object to write into, as described by file_info. - Actually yields a 3-tuple (file, fname, finish_cb), where finish_cb is a - function that returns a Deferred that must be waited on after the file - has been successfully written to. + Actually yields a 3-tuple (file, fname, finish_cb), where file is a file + like object that can be written to, fname is the absolute path of file + on disk, and finish_cb is a function that returns a Deferred. + + fname can be used to read the contents from after upload, e.g. to + generate thumbnails. + + finish_cb must be called and waited on after the file has been + successfully been written to. Should not be called if there was an + error. Args: file_info (FileInfo): Info about the file to store @@ -109,7 +122,7 @@ class MediaStorage(object): raise e if not finished_called: - raise Exception("Fnished callback not called") + raise Exception("Finished callback not called") @defer.inlineCallbacks def fetch_media(self, file_info): @@ -120,7 +133,7 @@ class MediaStorage(object): file_info (FileInfo) Returns: - Deferred(Responder): Returns a Responder if the file was found, + Deferred[Responder|None]: Returns a Responder if the file was found, otherwise None. """ @@ -138,6 +151,15 @@ class MediaStorage(object): def _file_info_to_path(self, file_info): """Converts file_info into a relative path. + + The path is suitable for storing files under a directory, e.g. used to + store files on local FS under the base media repository directory. + + Args: + file_info (FileInfo) + + Returns: + str """ if file_info.url_cache: return self.filepaths.url_cache_filepath_rel(file_info.file_id) From 4d88958cf6f9ed28ecd78990a8f51119eb294279 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:23:54 +0000 Subject: [PATCH 039/171] Make class var local --- synapse/rest/media/v1/media_repository.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 6508dbf17..65b16ce4c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -78,7 +78,7 @@ class MediaRepository(object): # List of StorageProviders where we should search for media and # potentially upload to. - self.storage_providers = [] + storage_providers = [] # TODO: Move this into config and allow other storage providers to be # defined. @@ -92,10 +92,10 @@ class MediaRepository(object): store_synchronous=hs.config.synchronous_backup_media_store, store_remote=True, ) - self.storage_providers.append(provider) + storage_providers.append(provider) self.media_storage = MediaStorage( - self.primary_base_path, self.filepaths, self.storage_providers, + self.primary_base_path, self.filepaths, storage_providers, ) self.clock.looping_call( From c6c009603cd72c2dfd777658b871dc66b08f7aa4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:24:05 +0000 Subject: [PATCH 040/171] Remove unused variables --- synapse/rest/media/v1/download_resource.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 5dc92972c..3443db91c 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -32,11 +32,8 @@ class DownloadResource(Resource): def __init__(self, hs, media_repo): Resource.__init__(self) - self.filepaths = media_repo.filepaths self.media_repo = media_repo self.server_name = hs.hostname - self.store = hs.get_datastore() - self.version_string = hs.version_string self.clock = hs.get_clock() def render_GET(self, request): From 1e4edd1717d1a3ef5ada210882d6798c520626eb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:28:32 +0000 Subject: [PATCH 041/171] Remove unnecessary condition --- synapse/rest/media/v1/media_repository.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 65b16ce4c..5c50646bc 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -263,10 +263,7 @@ class MediaRepository(object): ) responder = yield self.media_storage.fetch_media(file_info) - if responder: - defer.returnValue((responder, media_info)) - - defer.returnValue((None, media_info)) + defer.returnValue((responder, media_info)) @defer.inlineCallbacks def _download_remote_file(self, server_name, media_id, file_id): From dcc8eded4172670137b5dd9be1e4c70328cf85f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 13:16:27 +0000 Subject: [PATCH 042/171] Add missing class var --- synapse/rest/media/v1/download_resource.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 3443db91c..fe7e17596 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -34,7 +34,10 @@ class DownloadResource(Resource): self.media_repo = media_repo self.server_name = hs.hostname + + # Both of these are expected by @request_handler() self.clock = hs.get_clock() + self.version_string = hs.version_string def render_GET(self, request): self._async_render_GET(request) From 85a4d78213f6987c920043532bca428bb582a46b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 13:32:03 +0000 Subject: [PATCH 043/171] Make Responder a context manager --- synapse/rest/media/v1/_base.py | 14 ++++++++------ synapse/rest/media/v1/media_storage.py | 5 ++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 03df875b4..1145904ae 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -144,15 +144,16 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam return add_file_headers(request, media_type, file_size, upload_name) - yield responder.write_to_consumer(request) + with responder: + yield responder.write_to_consumer(request) finish_request(request) class Responder(object): """Represents a response that can be streamed to the requester. - Either `write_to_consumer` or `cancel` must be called to clean up any open - resources. + Responder is a context manager which *must* be used, so that any resources + held can be cleaned up. """ def write_to_consumer(self, consumer): """Stream response into consumer @@ -165,9 +166,10 @@ class Responder(object): """ pass - def cancel(self): - """Called when the responder is not going to be used after all. - """ + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): pass diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 49d2b7cd4..b6e7a19e1 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -220,8 +220,7 @@ class FileResponder(Responder): @defer.inlineCallbacks def write_to_consumer(self, consumer): - with self.open_file: - yield FileSender().beginFileTransfer(self.open_file, consumer) + yield FileSender().beginFileTransfer(self.open_file, consumer) - def cancel(self): + def __exit__(self, exc_type, exc_val, exc_tb): self.open_file.close() From e21370ba54607c9eb78869bc7ce5ab3d6f896fdd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 14:44:02 +0000 Subject: [PATCH 044/171] Correctly reraise exception --- synapse/rest/media/v1/media_storage.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index b6e7a19e1..001e84578 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -24,6 +24,7 @@ import contextlib import os import logging import shutil +import sys logger = logging.getLogger(__name__) @@ -114,12 +115,13 @@ class MediaStorage(object): try: with open(fname, "wb") as f: yield f, fname, finish - except Exception as e: + except Exception: + t, v, tb = sys.exc_info() try: os.remove(fname) except Exception: pass - raise e + raise t, v, tb if not finished_called: raise Exception("Finished callback not called") From 694f1c1b185a8431679d39a80b7567ae68605e17 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 15:02:46 +0000 Subject: [PATCH 045/171] Fix up comments --- synapse/rest/media/v1/_base.py | 4 ++-- synapse/rest/media/v1/media_repository.py | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 1145904ae..e7ac01da0 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -136,8 +136,8 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam request (twisted.web.http.Request) responder (Responder|None) media_type (str): The media/content type. - file_size (int): Size in bytes of the media. If not known it should be None - upload_name (str): The name of the requested file, if any. + file_size (int|None): Size in bytes of the media. If not known it should be None + upload_name (str|None): The name of the requested file, if any. """ if not responder: respond_404(request) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 5c50646bc..45bc53420 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -161,11 +161,12 @@ class MediaRepository(object): Args: request(twisted.web.http.Request) - media_id (str) + media_id (str): The media ID of the content. (This is the same as + the file_id for local content.) name (str|None): Optional name that, if specified, will be used as the filename in the Content-Disposition header of the response. - Retruns: + Returns: Deferred: Resolves once a response has successfully been written to request """ @@ -196,11 +197,12 @@ class MediaRepository(object): Args: request(twisted.web.http.Request) server_name (str): Remote server_name where the media originated. - media_id (str) + media_id (str): The media ID of the content (as defined by the + remote server). name (str|None): Optional name that, if specified, will be used as the filename in the Content-Disposition header of the response. - Retruns: + Returns: Deferred: Resolves once a response has successfully been written to request """ @@ -230,6 +232,11 @@ class MediaRepository(object): """Looks for media in local cache, if not there then attempt to download from remote server. + Args: + server_name (str): Remote server_name where the media originated. + media_id (str): The media ID of the content (as defined by the + remote server). + Returns: Deferred[(Responder, media_info)] """ @@ -272,7 +279,9 @@ class MediaRepository(object): Args: server_name (str): Originating server - media_id (str) + media_id (str): The media ID of the content (as defined by the + remote server). This is different than the file_id, which is + locally generated. file_id (str): Local file ID Returns: From 21bf87a146e54d9c111abb6f39a1bcbdc0563df2 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 12 Jan 2018 15:38:06 +0000 Subject: [PATCH 046/171] Reinstate media download on thumbnail request We need to actually download the remote media when we get a request for a thumbnail. --- synapse/rest/media/v1/thumbnail_resource.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 70dbf7f5c..53e48aba2 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -198,6 +198,11 @@ class ThumbnailResource(Resource): @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): + # TODO: Don't download the whole remote file + # We should proxy the thumbnail from the remote server instead of + # downloading the remote file and generating our own thumbnails. + yield self.media_repo.get_remote_media(server_name, media_id) + thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) From 19d274085fa939c440667759d38a8a255216899b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 12 Jan 2018 23:21:32 +0000 Subject: [PATCH 047/171] Make Counter render floats Prometheus handles all metrics as floats, and sometimes we store non-integer values in them (notably, durations in seconds), so let's render them as floats too. (Note that the standard client libraries also treat Counters as floats.) --- synapse/metrics/metric.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index e87b2b80a..1d054dd55 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -50,7 +50,14 @@ class BaseMetric(object): class CounterMetric(BaseMetric): """The simplest kind of metric; one that stores a monotonically-increasing - integer that counts events.""" + value that counts events or running totals. + + Example use cases for Counters: + - Number of requests processed + - Number of items that were inserted into a queue + - Total amount of data that a system has processed + Counters can only go up (and be reset when the process restarts). + """ def __init__(self, *args, **kwargs): super(CounterMetric, self).__init__(*args, **kwargs) @@ -59,7 +66,7 @@ class CounterMetric(BaseMetric): # Scalar metrics are never empty if self.is_scalar(): - self.counts[()] = 0 + self.counts[()] = 0. def inc_by(self, incr, *values): if len(values) != self.dimension(): @@ -78,7 +85,7 @@ class CounterMetric(BaseMetric): self.inc_by(1, *values) def render_item(self, k): - return ["%s%s %d" % (self.name, self._render_key(k), self.counts[k])] + return ["%s%s %.12g" % (self.name, self._render_key(k), self.counts[k])] def render(self): return map_concat(self.render_item, sorted(self.counts.keys())) From 80fa610f9c8702d6b7256be9d97668de29ba2e06 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 16:52:52 +0000 Subject: [PATCH 048/171] Add some comments to metrics classes --- synapse/metrics/metric.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 1d054dd55..c5f0bcbc1 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -24,8 +24,16 @@ def map_concat(func, items): class BaseMetric(object): + """Base class for metrics which report a single value per label set + """ def __init__(self, name, labels=[]): + """ + Args: + name (str): principal name for this metric + labels (list(str)): names of the labels which will be reported + for this metric + """ self.name = name self.labels = labels # OK not to clone as we never write it @@ -36,7 +44,7 @@ class BaseMetric(object): return not len(self.labels) def _render_labelvalue(self, value): - # TODO: some kind of value escape + # TODO: escape backslashes, quotes and newlines return '"%s"' % (value) def _render_key(self, values): @@ -47,6 +55,20 @@ class BaseMetric(object): for k, v in zip(self.labels, values)]) ) + def render(self): + """Render this metric + + Each metric is rendered as: + + name{label1="val1",label2="val2"} value + + https://prometheus.io/docs/instrumenting/exposition_formats/#text-format-details + + Returns: + iterable[str]: rendered metrics + """ + raise NotImplementedError() + class CounterMetric(BaseMetric): """The simplest kind of metric; one that stores a monotonically-increasing @@ -62,6 +84,10 @@ class CounterMetric(BaseMetric): def __init__(self, *args, **kwargs): super(CounterMetric, self).__init__(*args, **kwargs) + # dict[list[str]]: value for each set of label values. the keys are the + # label values, in the same order as the labels in self.labels. + # + # (if the metric is a scalar, the (single) key is the empty list). self.counts = {} # Scalar metrics are never empty From 992018d1c07a727e54c4ad5b4079f5f5de8fec5d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 16:58:41 +0000 Subject: [PATCH 049/171] mechanism to render metrics with alternative names --- synapse/metrics/metric.py | 53 +++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index c5f0bcbc1..f480aae61 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -17,24 +17,33 @@ from itertools import chain -# TODO(paul): I can't believe Python doesn't have one of these -def map_concat(func, items): - # flatten a list-of-lists - return list(chain.from_iterable(map(func, items))) +def flatten(items): + """Flatten a list of lists + + Args: + items: iterable[iterable[X]] + + Returns: + list[X]: flattened list + """ + return list(chain.from_iterable(items)) class BaseMetric(object): """Base class for metrics which report a single value per label set """ - def __init__(self, name, labels=[]): + def __init__(self, name, labels=[], alternative_names=[]): """ Args: name (str): principal name for this metric labels (list(str)): names of the labels which will be reported for this metric + alternative_names (iterable(str)): list of alternative names for + this metric. This can be useful to provide a migration path + when renaming metrics. """ - self.name = name + self._names = [name] + list(alternative_names) self.labels = labels # OK not to clone as we never write it def dimension(self): @@ -55,6 +64,22 @@ class BaseMetric(object): for k, v in zip(self.labels, values)]) ) + def _render_for_labels(self, label_values, value): + """Render this metric for a single set of labels + + Args: + label_values (list[str]): values for each of the labels + value: value of the metric at with these labels + + Returns: + iterable[str]: rendered metric + """ + rendered_labels = self._render_key(label_values) + return ( + "%s%s %.12g" % (name, rendered_labels, value) + for name in self._names + ) + def render(self): """Render this metric @@ -110,11 +135,11 @@ class CounterMetric(BaseMetric): def inc(self, *values): self.inc_by(1, *values) - def render_item(self, k): - return ["%s%s %.12g" % (self.name, self._render_key(k), self.counts[k])] - def render(self): - return map_concat(self.render_item, sorted(self.counts.keys())) + return flatten( + self._render_for_labels(k, self.counts[k]) + for k in sorted(self.counts.keys()) + ) class CallbackMetric(BaseMetric): @@ -131,10 +156,12 @@ class CallbackMetric(BaseMetric): value = self.callback() if self.is_scalar(): - return ["%s %.12g" % (self.name, value)] + return list(self._render_for_labels([], value)) - return ["%s%s %.12g" % (self.name, self._render_key(k), value[k]) - for k in sorted(value.keys())] + return flatten( + self._render_for_labels(k, value[k]) + for k in sorted(value.keys()) + ) class DistributionMetric(object): From 39f4e29d0151b56a3c8528e3149cd5765b9f600d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 17:00:12 +0000 Subject: [PATCH 050/171] Reorganise request and block metrics In order to circumvent the number of duplicate foo:count metrics increasing without bounds, it's time for a rearrangement. The following are all deprecated, and replaced with synapse_util_metrics_block_count: synapse_util_metrics_block_timer:count synapse_util_metrics_block_ru_utime:count synapse_util_metrics_block_ru_stime:count synapse_util_metrics_block_db_txn_count:count synapse_util_metrics_block_db_txn_duration:count The following are all deprecated, and replaced with synapse_http_server_response_count: synapse_http_server_requests synapse_http_server_response_time:count synapse_http_server_response_ru_utime:count synapse_http_server_response_ru_stime:count synapse_http_server_response_db_txn_count:count synapse_http_server_response_db_txn_duration:count The following are renamed (the old metrics are kept for now, but deprecated): synapse_util_metrics_block_timer:total -> synapse_util_metrics_block_time_seconds synapse_util_metrics_block_ru_utime:total -> synapse_util_metrics_block_ru_utime_seconds synapse_util_metrics_block_ru_stime:total -> synapse_util_metrics_block_ru_stime_seconds synapse_util_metrics_block_db_txn_count:total -> synapse_util_metrics_block_db_txn_count synapse_util_metrics_block_db_txn_duration:total -> synapse_util_metrics_block_db_txn_duration_seconds synapse_http_server_response_time:total -> synapse_http_server_response_time_seconds synapse_http_server_response_ru_utime:total -> synapse_http_server_response_ru_utime_seconds synapse_http_server_response_ru_stime:total -> synapse_http_server_response_ru_stime_seconds synapse_http_server_response_db_txn_count:total -> synapse_http_server_response_db_txn_count synapse_http_server_response_db_txn_duration:total synapse_http_server_response_db_txn_duration_seconds --- synapse/http/server.py | 56 ++++++++++++++++++++++++++++++----------- synapse/util/metrics.py | 53 ++++++++++++++++++++++++++++++-------- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 6e8f4c9c5..269b65ca4 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -42,34 +42,62 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -incoming_requests_counter = metrics.register_counter( - "requests", +# total number of responses served, split by method/servlet/tag +response_count = metrics.register_counter( + "response_count", labels=["method", "servlet", "tag"], + alternative_names=( + # the following are all deprecated aliases for the same metric + metrics.name_prefix + x for x in ( + "_requests", + "_response_time:count", + "_response_ru_utime:count", + "_response_ru_stime:count", + "_response_db_txn_count:count", + "_response_db_txn_duration:count", + ) + ) ) + outgoing_responses_counter = metrics.register_counter( "responses", labels=["method", "code"], ) -response_timer = metrics.register_distribution( - "response_time", - labels=["method", "servlet", "tag"] +response_timer = metrics.register_counter( + "response_time_seconds", + labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_time:total", + ), ) -response_ru_utime = metrics.register_distribution( - "response_ru_utime", labels=["method", "servlet", "tag"] +response_ru_utime = metrics.register_counter( + "response_ru_utime_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_ru_utime:total", + ), ) -response_ru_stime = metrics.register_distribution( - "response_ru_stime", labels=["method", "servlet", "tag"] +response_ru_stime = metrics.register_counter( + "response_ru_stime_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_ru_stime:total", + ), ) -response_db_txn_count = metrics.register_distribution( - "response_db_txn_count", labels=["method", "servlet", "tag"] +response_db_txn_count = metrics.register_counter( + "response_db_txn_count", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_db_txn_count:total", + ), ) -response_db_txn_duration = metrics.register_distribution( - "response_db_txn_duration", labels=["method", "servlet", "tag"] +response_db_txn_duration = metrics.register_counter( + "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_db_txn_duration:total", + ), ) @@ -330,7 +358,7 @@ class RequestMetrics(object): ) return - incoming_requests_counter.inc(request.method, self.name, tag) + response_count.inc(request.method, self.name, tag) response_timer.inc_by( clock.time_msec() - self.start, request.method, diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 4ea930d3e..8d22ff306 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -27,25 +27,56 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -block_timer = metrics.register_distribution( - "block_timer", - labels=["block_name"] +# total number of times we have hit this block +response_count = metrics.register_counter( + "block_count", + labels=["block_name"], + alternative_names=( + # the following are all deprecated aliases for the same metric + metrics.name_prefix + x for x in ( + "_block_timer:count", + "_block_ru_utime:count", + "_block_ru_stime:count", + "_block_db_txn_count:count", + "_block_db_txn_duration:count", + ) + ) ) -block_ru_utime = metrics.register_distribution( - "block_ru_utime", labels=["block_name"] +block_timer = metrics.register_counter( + "block_time_seconds", + labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_timer:total", + ), ) -block_ru_stime = metrics.register_distribution( - "block_ru_stime", labels=["block_name"] +block_ru_utime = metrics.register_counter( + "block_ru_utime_seconds", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_ru_utime:total", + ), ) -block_db_txn_count = metrics.register_distribution( - "block_db_txn_count", labels=["block_name"] +block_ru_stime = metrics.register_counter( + "block_ru_stime_seconds", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_ru_stime:total", + ), ) -block_db_txn_duration = metrics.register_distribution( - "block_db_txn_duration", labels=["block_name"] +block_db_txn_count = metrics.register_counter( + "block_db_txn_count", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_db_txn_count:total", + ), +) + +block_db_txn_duration = metrics.register_counter( + "block_db_txn_duration_seconds", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_db_txn_count:total", + ), ) From 5c3c32f16f99b11b91b34c0829db98896373ea75 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 17:45:55 +0000 Subject: [PATCH 051/171] Metrics for number of RDATA commands received I found myself wishing we had this. --- synapse/replication/tcp/protocol.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index d59503b90..0a9a290af 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -517,25 +517,28 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): self.send_error("Wrong remote") def on_RDATA(self, cmd): + stream_name = cmd.stream_name + inbound_rdata_count.inc(stream_name) + try: - row = STREAMS_MAP[cmd.stream_name].ROW_TYPE(*cmd.row) + row = STREAMS_MAP[stream_name].ROW_TYPE(*cmd.row) except Exception: logger.exception( "[%s] Failed to parse RDATA: %r %r", - self.id(), cmd.stream_name, cmd.row + self.id(), stream_name, cmd.row ) raise if cmd.token is None: # I.e. this is part of a batch of updates for this stream. Batch # until we get an update for the stream with a non None token - self.pending_batches.setdefault(cmd.stream_name, []).append(row) + self.pending_batches.setdefault(stream_name, []).append(row) else: # Check if this is the last of a batch of updates - rows = self.pending_batches.pop(cmd.stream_name, []) + rows = self.pending_batches.pop(stream_name, []) rows.append(row) - self.handler.on_rdata(cmd.stream_name, cmd.token, rows) + self.handler.on_rdata(stream_name, cmd.token, rows) def on_POSITION(self, cmd): self.handler.on_position(cmd.stream_name, cmd.token) @@ -644,3 +647,9 @@ metrics.register_callback( }, labels=["command", "name", "conn_id"], ) + +# number of updates received for each RDATA stream +inbound_rdata_count = metrics.register_counter( + "inbound_rdata_count", + labels=["stream_name"], +) From a027c2af8d348554cad4855094a6f46ef21bfad7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 18:20:30 +0000 Subject: [PATCH 052/171] Metrics for events processed in appservice and fed sender More metrics I wished I'd had --- synapse/federation/transaction_queue.py | 4 ++++ synapse/handlers/appservice.py | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 3e7809b04..9d39f4658 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -42,6 +42,8 @@ sent_edus_counter = client_metrics.register_counter("sent_edus") sent_transactions_counter = client_metrics.register_counter("sent_transactions") +events_processed_counter = client_metrics.register_counter("events_processed") + class TransactionQueue(object): """This class makes sure we only have one transaction in flight at @@ -205,6 +207,8 @@ class TransactionQueue(object): self._send_pdu(event, destinations) + events_processed_counter.inc_by(len(events)) + yield self.store.update_federation_out_pos( "events", next_token ) diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index feca3e4c1..3dd3fa2a2 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -15,6 +15,7 @@ from twisted.internet import defer +import synapse from synapse.api.constants import EventTypes from synapse.util.metrics import Measure from synapse.util.logcontext import make_deferred_yieldable, preserve_fn @@ -23,6 +24,10 @@ import logging logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + +events_processed_counter = metrics.register_counter("events_processed") + def log_failure(failure): logger.error( @@ -103,6 +108,8 @@ class ApplicationServicesHandler(object): service, event ) + events_processed_counter.inc_by(len(events)) + yield self.store.set_appservice_last_pos(upper_bound) finally: self.is_processing = False From a4c5e4a6451dfc7b378c10b916635de6bdafa80a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 11:06:42 +0000 Subject: [PATCH 053/171] Fix thumbnailing remote files --- synapse/rest/media/v1/media_repository.py | 28 +++++++++++++++++++++ synapse/rest/media/v1/thumbnail_resource.py | 4 +-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 45bc53420..2608fab5d 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -227,6 +227,34 @@ class MediaRepository(object): else: respond_404(request) + @defer.inlineCallbacks + def get_remote_media_info(self, server_name, media_id): + """Gets the media info associated with the remote file, downloading + if necessary. + + Args: + server_name (str): Remote server_name where the media originated. + media_id (str): The media ID of the content (as defined by the + remote server). + + Returns: + Deferred[dict]: The media_info of the file + """ + # We linearize here to ensure that we don't try and download remote + # media multiple times concurrently + key = (server_name, media_id) + with (yield self.remote_media_linearizer.queue(key)): + responder, media_info = yield self._get_remote_media_impl( + server_name, media_id, + ) + + # Ensure we actually use the responder so that it releases resources + if responder: + with responder: + pass + + defer.returnValue(media_info) + @defer.inlineCallbacks def _get_remote_media_impl(self, server_name, media_id): """Looks for media in local cache, if not there then attempt to diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 835540c3d..70cea7782 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -165,7 +165,7 @@ class ThumbnailResource(Resource): def _select_or_generate_remote_thumbnail(self, request, server_name, media_id, desired_width, desired_height, desired_method, desired_type): - media_info = yield self.media_repo.get_remote_media(server_name, media_id) + media_info = yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, @@ -216,7 +216,7 @@ class ThumbnailResource(Resource): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead of # downloading the remote file and generating our own thumbnails. - yield self.media_repo.get_remote_media(server_name, media_id) + yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, From 64ddec1bc0a1d23a285d560e34986441b3f8c854 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 11:47:36 +0000 Subject: [PATCH 054/171] Fix a logcontext leak in persist_events ObserveableDeferred expects its callbacks to be called without any logcontexts, whereas it turns out we were calling them with the logcontext of the request which initiated the persistence loop. It seems wrong that we are attributing work done in the persistence loop to the request that happened to initiate it, so let's solve this by dropping the logcontext for it. (I'm not sure this actually causes any real problems other than messages in the debug log, but let's clean it up anyway) --- synapse/storage/events.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d08f7571d..ad1d78270 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -146,6 +146,9 @@ class _EventPeristenceQueue(object): try: queue = self._get_drainining_queue(room_id) for item in queue: + # handle_queue_loop runs in the sentinel logcontext, so + # there is no need to preserve_fn when running the + # callbacks on the deferred. try: ret = yield per_item_callback(item) item.deferred.callback(ret) @@ -157,7 +160,11 @@ class _EventPeristenceQueue(object): self._event_persist_queues[room_id] = queue self._currently_persisting_rooms.discard(room_id) - preserve_fn(handle_queue_loop)() + # set handle_queue_loop off on the background. We don't want to + # attribute work done in it to the current request, so we drop the + # logcontext altogether. + with PreserveLoggingContext(): + handle_queue_loop() def _get_drainining_queue(self, room_id): queue = self._event_persist_queues.setdefault(room_id, deque()) From c5b589f2e8205ca0253534cf5826b807253bb8ea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 12:01:40 +0000 Subject: [PATCH 055/171] Log when we respond with 404 --- synapse/rest/media/v1/media_repository.py | 1 + synapse/rest/media/v1/thumbnail_resource.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 2608fab5d..b12fabd94 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -285,6 +285,7 @@ class MediaRepository(object): # If we have an entry in the DB, try and look for it if media_info: if media_info["quarantined_by"]: + logger.info("Media is quarentined") raise NotFoundError() responder = yield self.media_storage.fetch_media(file_info) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 70cea7782..b8c38eb31 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -85,6 +85,7 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -111,6 +112,7 @@ class ThumbnailResource(Resource): responder = yield self.media_storage.fetch_media(file_info) yield respond_with_responder(request, responder, t_type, t_length) else: + logger.info("Couldn't find any generated thumbnails") respond_404(request) @defer.inlineCallbacks @@ -120,6 +122,7 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -159,6 +162,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: + logger.warn("Failed to generate local thumbnail") respond_404(request) @defer.inlineCallbacks @@ -197,7 +201,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a local thumbnail of that size. Generating") + logger.debug("We don't have a remote thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_remote_exact_thumbnail( @@ -208,6 +212,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: + logger.warn("Failed to generate remote thumbnail") respond_404(request) @defer.inlineCallbacks @@ -241,6 +246,7 @@ class ThumbnailResource(Resource): responder = yield self.media_storage.fetch_media(file_info) yield respond_with_responder(request, responder, t_type, t_length) else: + logger.info("Failed to find any generated thumbnails") respond_404(request) def _select_thumbnail(self, desired_width, desired_height, desired_method, From 9795b9ebb11953747e6362fe123725326885667b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 12:02:06 +0000 Subject: [PATCH 056/171] Correctly use server_name/file_id when generating/fetching remote thumbnails --- synapse/rest/media/v1/media_repository.py | 7 +++++-- synapse/rest/media/v1/thumbnail_resource.py | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index b12fabd94..e82dcfdc2 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -539,7 +539,10 @@ class MediaRepository(object): Deferred[dict]: Dict with "width" and "height" keys of original image """ media_type = media_info["media_type"] - file_id = media_info.get("filesystem_id") + if server_name: + file_id = media_info["filesystem_id"] + else: + file_id = media_id requirements = self._get_thumbnail_requirements(media_type) if not requirements: return @@ -597,7 +600,7 @@ class MediaRepository(object): try: file_info = FileInfo( server_name=server_name, - file_id=media_id, + file_id=file_id, thumbnail=True, thumbnail_width=t_width, thumbnail_height=t_height, diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index b8c38eb31..e20d6f10b 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -185,7 +185,7 @@ class ThumbnailResource(Resource): if t_w and t_h and t_method and t_type: file_info = FileInfo( - server_name=None, file_id=media_id, + server_name=server_name, file_id=media_info["filesystem_id"], thumbnail=True, thumbnail_width=info["thumbnail_width"], thumbnail_height=info["thumbnail_height"], @@ -221,7 +221,7 @@ class ThumbnailResource(Resource): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead of # downloading the remote file and generating our own thumbnails. - yield self.media_repo.get_remote_media_info(server_name, media_id) + media_info = yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, @@ -232,7 +232,7 @@ class ThumbnailResource(Resource): width, height, method, m_type, thumbnail_infos ) file_info = FileInfo( - server_name=None, file_id=media_id, + server_name=server_name, file_id=media_info["filesystem_id"], thumbnail=True, thumbnail_width=thumbnail_info["thumbnail_width"], thumbnail_height=thumbnail_info["thumbnail_height"], From ee7a1cabd8c6d218b838295fde6999dcbc23036b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 13:04:01 +0000 Subject: [PATCH 057/171] document metrics changes --- CHANGES.rst | 5 +++- docs/metrics-howto.rst | 61 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 24e4e7a38..a7ed49e10 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,7 +2,10 @@ Unreleased ========== synctl no longer starts the main synapse when using ``-a`` option with workers. -A new worker file should be added with ``worker_app: synapse.app.homeserver`` +A new worker file should be added with ``worker_app: synapse.app.homeserver``. + +This release also begins the process of renaming a number of the metrics +reported to prometheus. See `docs/metrics-howto.rst `_. Changes in synapse v0.26.0 (2018-01-05) diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst index 143cd0f42..8acc479bc 100644 --- a/docs/metrics-howto.rst +++ b/docs/metrics-howto.rst @@ -16,7 +16,7 @@ How to monitor Synapse metrics using Prometheus metrics_port: 9092 Also ensure that ``enable_metrics`` is set to ``True``. - + Restart synapse. 3. Add a prometheus target for synapse. @@ -28,11 +28,58 @@ How to monitor Synapse metrics using Prometheus static_configs: - targets: ["my.server.here:9092"] - If your prometheus is older than 1.5.2, you will need to replace + If your prometheus is older than 1.5.2, you will need to replace ``static_configs`` in the above with ``target_groups``. - + Restart prometheus. + +Block and response metrics renamed for 0.27.0 +--------------------------------------------- + +Synapse 0.27.0 begins the process of rationalising the duplicate ``*:count`` +metrics reported for the resource tracking for code blocks and HTTP requests. + +At the same time, the corresponding ``*:total`` metrics are being renamed, as +the ``:total`` suffix no longer makes sense in the absence of a corresponding +``:count`` metric. + +To enable a graceful migration path, this release just adds new names for the +metrics being renamed. A future release will remove the old ones. + +The following table shows the new metrics, and the old metrics which they are +replacing. + +==================================================== =================================================== +New name Old name +==================================================== =================================================== +synapse_util_metrics_block_count synapse_util_metrics_block_timer:count +synapse_util_metrics_block_count synapse_util_metrics_block_ru_utime:count +synapse_util_metrics_block_count synapse_util_metrics_block_ru_stime:count +synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_count:count +synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_duration:count + +synapse_util_metrics_block_time_seconds synapse_util_metrics_block_timer:total +synapse_util_metrics_block_ru_utime_seconds synapse_util_metrics_block_ru_utime:total +synapse_util_metrics_block_ru_stime_seconds synapse_util_metrics_block_ru_stime:total +synapse_util_metrics_block_db_txn_count synapse_util_metrics_block_db_txn_count:total +synapse_util_metrics_block_db_txn_duration_seconds synapse_util_metrics_block_db_txn_duration:total + +synapse_http_server_response_count synapse_http_server_requests +synapse_http_server_response_count synapse_http_server_response_time:count +synapse_http_server_response_count synapse_http_server_response_ru_utime:count +synapse_http_server_response_count synapse_http_server_response_ru_stime:count +synapse_http_server_response_count synapse_http_server_response_db_txn_count:count +synapse_http_server_response_count synapse_http_server_response_db_txn_duration:count + +synapse_http_server_response_time_seconds synapse_http_server_response_time:total +synapse_http_server_response_ru_utime_seconds synapse_http_server_response_ru_utime:total +synapse_http_server_response_ru_stime_seconds synapse_http_server_response_ru_stime:total +synapse_http_server_response_db_txn_count synapse_http_server_response_db_txn_count:total +synapse_http_server_response_db_txn_duration_seconds synapse_http_server_response_db_txn_duration:total +==================================================== =================================================== + + Standard Metric Names --------------------- @@ -42,7 +89,7 @@ have been changed to seconds, from miliseconds. ================================== ============================= New name Old name ----------------------------------- ----------------------------- +================================== ============================= process_cpu_user_seconds_total process_resource_utime / 1000 process_cpu_system_seconds_total process_resource_stime / 1000 process_open_fds (no 'type' label) process_fds @@ -52,8 +99,8 @@ The python-specific counts of garbage collector performance have been renamed. =========================== ====================== New name Old name ---------------------------- ---------------------- -python_gc_time reactor_gc_time +=========================== ====================== +python_gc_time reactor_gc_time python_gc_unreachable_total reactor_gc_unreachable python_gc_counts reactor_gc_counts =========================== ====================== @@ -62,7 +109,7 @@ The twisted-specific reactor metrics have been renamed. ==================================== ===================== New name Old name ------------------------------------- --------------------- +==================================== ===================== python_twisted_reactor_pending_calls reactor_pending_calls python_twisted_reactor_tick_time reactor_tick_time ==================================== ===================== From 307f88dfb6c157ee2bda6f9b8b3dee82cad490aa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 13:53:43 +0000 Subject: [PATCH 058/171] Fix up log lines --- synapse/rest/media/v1/media_repository.py | 2 +- synapse/rest/media/v1/thumbnail_resource.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e82dcfdc2..d191f9d2f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -285,7 +285,7 @@ class MediaRepository(object): # If we have an entry in the DB, try and look for it if media_info: if media_info["quarantined_by"]: - logger.info("Media is quarentined") + logger.info("Media is quarantined") raise NotFoundError() responder = yield self.media_storage.fetch_media(file_info) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index e20d6f10b..1451f6f10 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -85,7 +85,9 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: - logger.info("Media is quarantined") + if media_info: + logger.info("Media is quarantined") + respond_404(request) return @@ -122,7 +124,8 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: - logger.info("Media is quarantined") + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -152,7 +155,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a local thumbnail of that size. Generating") + logger.debug("We don't have a thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_local_exact_thumbnail( @@ -162,7 +165,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - logger.warn("Failed to generate local thumbnail") + logger.warn("Failed to generate thumbnail") respond_404(request) @defer.inlineCallbacks @@ -201,7 +204,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a remote thumbnail of that size. Generating") + logger.debug("We don't have a thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_remote_exact_thumbnail( @@ -212,7 +215,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - logger.warn("Failed to generate remote thumbnail") + logger.warn("Failed to generate thumbnail") respond_404(request) @defer.inlineCallbacks From 5dfc83704b9f338c975a03bad7854218658b3a80 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 14:32:56 +0000 Subject: [PATCH 059/171] Fix typo --- synapse/rest/media/v1/thumbnail_resource.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 1451f6f10..8c9653843 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -84,10 +84,11 @@ class ThumbnailResource(Resource): method, m_type): media_info = yield self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - if media_info: - logger.info("Media is quarantined") - + if not media_info: + respond_404(request) + return + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -123,9 +124,11 @@ class ThumbnailResource(Resource): desired_type): media_info = yield self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - if media_info["quarantined_by"]: - logger.info("Media is quarantined") + if not media_info: + respond_404(request) + return + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return From 44a498418c62a835aae9bff8550f844888b3ab84 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 Jan 2018 22:40:51 +0000 Subject: [PATCH 060/171] Optimise LoggingContext creation and copying It turns out that the only thing we use the __dict__ of LoggingContext for is `request`, and given we create lots of LoggingContexts and then copy them every time we do a db transaction or log line, using the __dict__ seems a bit redundant. Let's try to optimise things by making the request attribute explicit. --- synapse/util/logcontext.py | 25 ++++++++++++++++++------- tests/crypto/test_keyring.py | 14 +++++++------- tests/util/test_logcontext.py | 16 ++++++++-------- 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 48c9f6802..ca71a1fc2 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -52,13 +52,16 @@ except Exception: class LoggingContext(object): """Additional context for log formatting. Contexts are scoped within a "with" block. + Args: name (str): Name for the context for debugging. """ __slots__ = [ - "previous_context", "name", "usage_start", "usage_end", "main_thread", - "__dict__", "tag", "alive", + "previous_context", "name", "ru_stime", "ru_utime", + "db_txn_count", "db_txn_duration", "usage_start", "usage_end", + "main_thread", "alive", + "request", "tag", ] thread_local = threading.local() @@ -96,7 +99,9 @@ class LoggingContext(object): self.db_txn_count = 0 self.db_txn_duration = 0. self.usage_start = None + self.usage_end = None self.main_thread = threading.current_thread() + self.request = None self.tag = "" self.alive = True @@ -105,7 +110,11 @@ class LoggingContext(object): @classmethod def current_context(cls): - """Get the current logging context from thread local storage""" + """Get the current logging context from thread local storage + + Returns: + LoggingContext: the current logging context + """ return getattr(cls.thread_local, "current_context", cls.sentinel) @classmethod @@ -155,11 +164,13 @@ class LoggingContext(object): self.alive = False def copy_to(self, record): - """Copy fields from this context to the record""" - for key, value in self.__dict__.items(): - setattr(record, key, value) + """Copy logging fields from this context to a log record or + another LoggingContext + """ - record.ru_utime, record.ru_stime = self.get_resource_usage() + # 'request' is the only field we currently use in the logger, so that's + # all we need to copy + record.request = self.request def start(self): if threading.current_thread() is not self.main_thread: diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index 570312da8..c899fecf5 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -68,7 +68,7 @@ class KeyringTestCase(unittest.TestCase): def check_context(self, _, expected): self.assertEquals( - getattr(LoggingContext.current_context(), "test_key", None), + getattr(LoggingContext.current_context(), "request", None), expected ) @@ -82,7 +82,7 @@ class KeyringTestCase(unittest.TestCase): lookup_2_deferred = defer.Deferred() with LoggingContext("one") as context_one: - context_one.test_key = "one" + context_one.request = "one" wait_1_deferred = kr.wait_for_previous_lookups( ["server1"], @@ -96,7 +96,7 @@ class KeyringTestCase(unittest.TestCase): wait_1_deferred.addBoth(self.check_context, "one") with LoggingContext("two") as context_two: - context_two.test_key = "two" + context_two.request = "two" # set off another wait. It should block because the first lookup # hasn't yet completed. @@ -137,7 +137,7 @@ class KeyringTestCase(unittest.TestCase): @defer.inlineCallbacks def get_perspectives(**kwargs): self.assertEquals( - LoggingContext.current_context().test_key, "11", + LoggingContext.current_context().request, "11", ) with logcontext.PreserveLoggingContext(): yield persp_deferred @@ -145,7 +145,7 @@ class KeyringTestCase(unittest.TestCase): self.http_client.post_json.side_effect = get_perspectives with LoggingContext("11") as context_11: - context_11.test_key = "11" + context_11.request = "11" # start off a first set of lookups res_deferreds = kr.verify_json_objects_for_server( @@ -173,7 +173,7 @@ class KeyringTestCase(unittest.TestCase): self.assertIs(LoggingContext.current_context(), context_11) context_12 = LoggingContext("12") - context_12.test_key = "12" + context_12.request = "12" with logcontext.PreserveLoggingContext(context_12): # a second request for a server with outstanding requests # should block rather than start a second call @@ -211,7 +211,7 @@ class KeyringTestCase(unittest.TestCase): sentinel_context = LoggingContext.current_context() with LoggingContext("one") as context_one: - context_one.test_key = "one" + context_one.request = "one" defer = kr.verify_json_for_server("server9", {}) try: diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py index e2f7765f4..4850722bc 100644 --- a/tests/util/test_logcontext.py +++ b/tests/util/test_logcontext.py @@ -12,12 +12,12 @@ class LoggingContextTestCase(unittest.TestCase): def _check_test_key(self, value): self.assertEquals( - LoggingContext.current_context().test_key, value + LoggingContext.current_context().request, value ) def test_with_context(self): with LoggingContext() as context_one: - context_one.test_key = "test" + context_one.request = "test" self._check_test_key("test") @defer.inlineCallbacks @@ -25,14 +25,14 @@ class LoggingContextTestCase(unittest.TestCase): @defer.inlineCallbacks def competing_callback(): with LoggingContext() as competing_context: - competing_context.test_key = "competing" + competing_context.request = "competing" yield sleep(0) self._check_test_key("competing") reactor.callLater(0, competing_callback) with LoggingContext() as context_one: - context_one.test_key = "one" + context_one.request = "one" yield sleep(0) self._check_test_key("one") @@ -43,14 +43,14 @@ class LoggingContextTestCase(unittest.TestCase): @defer.inlineCallbacks def cb(): - context_one.test_key = "one" + context_one.request = "one" yield function() self._check_test_key("one") callback_completed[0] = True with LoggingContext() as context_one: - context_one.test_key = "one" + context_one.request = "one" # fire off function, but don't wait on it. logcontext.preserve_fn(cb)() @@ -107,7 +107,7 @@ class LoggingContextTestCase(unittest.TestCase): sentinel_context = LoggingContext.current_context() with LoggingContext() as context_one: - context_one.test_key = "one" + context_one.request = "one" d1 = logcontext.make_deferred_yieldable(blocking_function()) # make sure that the context was reset by make_deferred_yieldable @@ -124,7 +124,7 @@ class LoggingContextTestCase(unittest.TestCase): argument isn't actually a deferred""" with LoggingContext() as context_one: - context_one.test_key = "one" + context_one.request = "one" d1 = logcontext.make_deferred_yieldable("bum") self._check_test_key("one") From 6324b65f08b3f8dbfee6fef0079e2a87cb1c2c85 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 Jan 2018 18:17:54 +0000 Subject: [PATCH 061/171] Track db txn time in millisecs ... to reduce the amount of floating-point foo we do. --- synapse/http/server.py | 4 +++- synapse/http/site.py | 6 +++--- synapse/util/logcontext.py | 9 ++++++--- synapse/util/metrics.py | 8 +++++--- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 269b65ca4..0f30e6fd5 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -93,6 +93,8 @@ response_db_txn_count = metrics.register_counter( ), ) +# seconds spent waiting for db txns, excluding scheduling time, when processing +# this request response_db_txn_duration = metrics.register_counter( "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"], alternative_names=( @@ -377,7 +379,7 @@ class RequestMetrics(object): context.db_txn_count, request.method, self.name, tag ) response_db_txn_duration.inc_by( - context.db_txn_duration, request.method, self.name, tag + context.db_txn_duration_ms / 1000., request.method, self.name, tag ) diff --git a/synapse/http/site.py b/synapse/http/site.py index cd1492b1c..dc64f0f6f 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -66,10 +66,10 @@ class SynapseRequest(Request): context = LoggingContext.current_context() ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count - db_txn_duration = context.db_txn_duration + db_txn_duration_ms = context.db_txn_duration_ms except Exception: ru_utime, ru_stime = (0, 0) - db_txn_count, db_txn_duration = (0, 0) + db_txn_count, db_txn_duration_ms = (0, 0) self.site.access_logger.info( "%s - %s - {%s}" @@ -81,7 +81,7 @@ class SynapseRequest(Request): int(time.time() * 1000) - self.start_time, int(ru_utime * 1000), int(ru_stime * 1000), - int(db_txn_duration * 1000), + db_txn_duration_ms, int(db_txn_count), self.sentLength, self.code, diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index ca71a1fc2..a78e53812 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -59,7 +59,7 @@ class LoggingContext(object): __slots__ = [ "previous_context", "name", "ru_stime", "ru_utime", - "db_txn_count", "db_txn_duration", "usage_start", "usage_end", + "db_txn_count", "db_txn_duration_ms", "usage_start", "usage_end", "main_thread", "alive", "request", "tag", ] @@ -97,7 +97,10 @@ class LoggingContext(object): self.ru_stime = 0. self.ru_utime = 0. self.db_txn_count = 0 - self.db_txn_duration = 0. + + # ms spent waiting for db txns, excluding scheduling time + self.db_txn_duration_ms = 0 + self.usage_start = None self.usage_end = None self.main_thread = threading.current_thread() @@ -205,7 +208,7 @@ class LoggingContext(object): def add_database_transaction(self, duration_ms): self.db_txn_count += 1 - self.db_txn_duration += duration_ms / 1000. + self.db_txn_duration_ms += duration_ms class LoggingContextFilter(logging.Filter): diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 8d22ff306..d25629cc5 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -72,6 +72,7 @@ block_db_txn_count = metrics.register_counter( ), ) +# seconds spent waiting for db txns, excluding scheduling time, in this block block_db_txn_duration = metrics.register_counter( "block_db_txn_duration_seconds", labels=["block_name"], alternative_names=( @@ -95,7 +96,7 @@ def measure_func(name): class Measure(object): __slots__ = [ "clock", "name", "start_context", "start", "new_context", "ru_utime", - "ru_stime", "db_txn_count", "db_txn_duration", "created_context" + "ru_stime", "db_txn_count", "db_txn_duration_ms", "created_context" ] def __init__(self, clock, name): @@ -115,7 +116,7 @@ class Measure(object): self.ru_utime, self.ru_stime = self.start_context.get_resource_usage() self.db_txn_count = self.start_context.db_txn_count - self.db_txn_duration = self.start_context.db_txn_duration + self.db_txn_duration_ms = self.start_context.db_txn_duration_ms def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_type, Exception) or not self.start_context: @@ -145,7 +146,8 @@ class Measure(object): context.db_txn_count - self.db_txn_count, self.name ) block_db_txn_duration.inc_by( - context.db_txn_duration - self.db_txn_duration, self.name + (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000., + self.name ) if self.created_context: From 0a90d9ede4a39d720afd131866f98b51aa591cf7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:03:05 +0000 Subject: [PATCH 062/171] Move setting of file_id up to caller --- synapse/rest/media/v1/media_repository.py | 21 +++++++++---------- synapse/rest/media/v1/preview_url_resource.py | 6 ++++-- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d191f9d2f..35c1dcbc9 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -151,7 +151,7 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_thumbnails(None, media_id, media_info) + yield self._generate_thumbnails(None, media_id, media_id, media_info) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -413,7 +413,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, media_info + server_name, media_id, file_id, media_info, ) defer.returnValue(media_info) @@ -525,24 +525,23 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=False): + def _generate_thumbnails(self, server_name, media_id, file_id, media_info, + url_cache=False): """Generate and store thumbnails for an image. Args: - server_name(str|None): The server name if remote media, else None if local - media_id(str) - media_info(dict) - url_cache(bool): If we are thumbnailing images downloaded for the URL cache, + server_name (str|None): The server name if remote media, else None if local + media_id (str): The media ID of the content. (This is the same as + the file_id for local content) + file_id (str): Local file ID + media_info (dict) + url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer Returns: Deferred[dict]: Dict with "width" and "height" keys of original image """ media_type = media_info["media_type"] - if server_name: - file_id = media_info["filesystem_id"] - else: - file_id = media_id requirements = self._get_thumbnail_requirements(media_type) if not requirements: return diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index f3dbbb3fe..5ddf581bd 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -185,8 +185,9 @@ class PreviewUrlResource(Resource): logger.debug("got media_info of '%s'" % media_info) if _is_media(media_info['media_type']): + file_id = media_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, media_info['filesystem_id'], media_info, url_cache=True, + None, file_id, file_id, media_info, url_cache=True, ) og = { @@ -231,8 +232,9 @@ class PreviewUrlResource(Resource): if _is_media(image_info['media_type']): # TODO: make sure we don't choke on white-on-transparent images + file_id = image_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, image_info['filesystem_id'], image_info, url_cache=True, + None, file_id, file_id, image_info, url_cache=True, ) if dims: og["og:image:width"] = dims['width'] From 6368e5c0ab39e43ee6950ba94a826c44db7d43f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:17:38 +0000 Subject: [PATCH 063/171] Change _generate_thumbnails to take media_type --- synapse/rest/media/v1/media_repository.py | 11 ++++++----- synapse/rest/media/v1/preview_url_resource.py | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 35c1dcbc9..e08cf8af4 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -151,7 +151,9 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_thumbnails(None, media_id, media_id, media_info) + yield self._generate_thumbnails( + None, media_id, media_id, media_info["media_type"], + ) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -413,7 +415,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, file_id, media_info, + server_name, media_id, file_id, media_info["media_type"], ) defer.returnValue(media_info) @@ -525,7 +527,7 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_thumbnails(self, server_name, media_id, file_id, media_info, + def _generate_thumbnails(self, server_name, media_id, file_id, media_type, url_cache=False): """Generate and store thumbnails for an image. @@ -534,14 +536,13 @@ class MediaRepository(object): media_id (str): The media ID of the content. (This is the same as the file_id for local content) file_id (str): Local file ID - media_info (dict) + media_type (str) url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer Returns: Deferred[dict]: Dict with "width" and "height" keys of original image """ - media_type = media_info["media_type"] requirements = self._get_thumbnail_requirements(media_type) if not requirements: return diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 5ddf581bd..981f01e41 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -187,7 +187,8 @@ class PreviewUrlResource(Resource): if _is_media(media_info['media_type']): file_id = media_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, file_id, file_id, media_info, url_cache=True, + None, file_id, file_id, media_info["media_type"], + url_cache=True, ) og = { @@ -234,7 +235,8 @@ class PreviewUrlResource(Resource): # TODO: make sure we don't choke on white-on-transparent images file_id = image_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, file_id, file_id, image_info, url_cache=True, + None, file_id, file_id, image_info["media_type"], + url_cache=True, ) if dims: og["og:image:width"] = dims['width'] From d863f68cab9665335b1657feeab5d00724dddd95 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:24:15 +0000 Subject: [PATCH 064/171] Use local vars --- synapse/rest/media/v1/media_repository.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e08cf8af4..22f86781f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -146,13 +146,9 @@ class MediaRepository(object): media_length=content_length, user_id=auth_user, ) - media_info = { - "media_type": media_type, - "media_length": content_length, - } yield self._generate_thumbnails( - None, media_id, media_id, media_info["media_type"], + None, media_id, media_id, media_type, ) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -415,7 +411,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, file_id, media_info["media_type"], + server_name, media_id, file_id, media_type, ) defer.returnValue(media_info) From 5e97ca7ee61761f6826fd1c4220877dc787d88dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 16 Jan 2018 16:52:31 +0000 Subject: [PATCH 065/171] fix typo --- synapse/config/tls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 4748f71c2..29eb012dd 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -96,7 +96,7 @@ class TlsConfig(Config): # certificates returned by this server match one of the fingerprints. # # Synapse automatically adds the fingerprint of its own certificate - # to the list. So if federation traffic is handle directly by synapse + # to the list. So if federation traffic is handled directly by synapse # then no modification to the list is required. # # If synapse is run behind a load balancer that handles the TLS then it From 8615f19d20f4a2048773b60ce840aab48f3e11b0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 16:17:24 +0000 Subject: [PATCH 066/171] rework runInteraction in terms of runConnection ... so that we can share the code --- synapse/storage/_base.py | 51 ++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index b971f0cb1..986617674 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -291,33 +291,33 @@ class SQLBaseStore(object): @defer.inlineCallbacks def runInteraction(self, desc, func, *args, **kwargs): - """Wraps the .runInteraction() method on the underlying db_pool.""" - current_context = LoggingContext.current_context() + """Starts a transaction on the database and runs a given function - start_time = time.time() * 1000 + Arguments: + desc (str): description of the transaction, for logging and metrics + func (func): callback function, which will be called with a + database transaction (twisted.enterprise.adbapi.Transaction) as + its first argument, followed by `args` and `kwargs`. + + args (list): positional args to pass to `func` + kwargs (dict): named args to pass to `func` + + Returns: + Deferred: The result of func + """ + current_context = LoggingContext.current_context() after_callbacks = [] final_callbacks = [] def inner_func(conn, *args, **kwargs): - with LoggingContext("runInteraction") as context: - sql_scheduling_timer.inc_by(time.time() * 1000 - start_time) - - if self.database_engine.is_connection_closed(conn): - logger.debug("Reconnecting closed database connection") - conn.reconnect() - - current_context.copy_to(context) - return self._new_transaction( - conn, desc, after_callbacks, final_callbacks, current_context, - func, *args, **kwargs - ) + return self._new_transaction( + conn, desc, after_callbacks, final_callbacks, current_context, + func, *args, **kwargs + ) try: - with PreserveLoggingContext(): - result = yield self._db_pool.runWithConnection( - inner_func, *args, **kwargs - ) + result = yield self.runWithConnection(inner_func, *args, **kwargs) for after_callback, after_args, after_kwargs in after_callbacks: after_callback(*after_args, **after_kwargs) @@ -329,7 +329,18 @@ class SQLBaseStore(object): @defer.inlineCallbacks def runWithConnection(self, func, *args, **kwargs): - """Wraps the .runInteraction() method on the underlying db_pool.""" + """Wraps the .runWithConnection() method on the underlying db_pool. + + Arguments: + func (func): callback function, which will be called with a + database connection (twisted.enterprise.adbapi.Connection) as + its first argument, followed by `args` and `kwargs`. + args (list): positional args to pass to `func` + kwargs (dict): named args to pass to `func` + + Returns: + Deferred: The result of func + """ current_context = LoggingContext.current_context() start_time = time.time() * 1000 From 3d12d97415ac6d6a4ab8188af31c7df12c5d19f8 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 12 Jan 2018 00:27:14 +0000 Subject: [PATCH 067/171] Track DB scheduling delay per-request For each request, track the amount of time spent waiting for a db connection. This entails adding it to the LoggingContext and we may as well add metrics for it while we are passing. --- synapse/http/server.py | 7 +++++++ synapse/http/site.py | 4 +++- synapse/storage/_base.py | 4 +++- synapse/util/logcontext.py | 18 +++++++++++++++++- synapse/util/metrics.py | 14 +++++++++++++- 5 files changed, 43 insertions(+), 4 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 0f30e6fd5..7b6418bc2 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -102,6 +102,10 @@ response_db_txn_duration = metrics.register_counter( ), ) +# seconds spent waiting for a db connection, when processing this request +response_db_sched_duration = metrics.register_counter( + "response_db_sched_duration_seconds", labels=["method", "servlet", "tag"] +) _next_request_id = 0 @@ -381,6 +385,9 @@ class RequestMetrics(object): response_db_txn_duration.inc_by( context.db_txn_duration_ms / 1000., request.method, self.name, tag ) + response_db_sched_duration.inc_by( + context.db_sched_duration_ms / 1000., request.method, self.name, tag + ) class RootRedirect(resource.Resource): diff --git a/synapse/http/site.py b/synapse/http/site.py index dc64f0f6f..e422c8dfa 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -67,13 +67,14 @@ class SynapseRequest(Request): ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count db_txn_duration_ms = context.db_txn_duration_ms + db_sched_duration_ms = context.db_sched_duration_ms except Exception: ru_utime, ru_stime = (0, 0) db_txn_count, db_txn_duration_ms = (0, 0) self.site.access_logger.info( "%s - %s - {%s}" - " Processed request: %dms (%dms, %dms) (%dms/%d)" + " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)" " %sB %s \"%s %s %s\" \"%s\"", self.getClientIP(), self.site.site_tag, @@ -81,6 +82,7 @@ class SynapseRequest(Request): int(time.time() * 1000) - self.start_time, int(ru_utime * 1000), int(ru_stime * 1000), + db_sched_duration_ms, db_txn_duration_ms, int(db_txn_count), self.sentLength, diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 986617674..68125006e 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -347,7 +347,9 @@ class SQLBaseStore(object): def inner_func(conn, *args, **kwargs): with LoggingContext("runWithConnection") as context: - sql_scheduling_timer.inc_by(time.time() * 1000 - start_time) + sched_duration_ms = time.time() * 1000 - start_time + sql_scheduling_timer.inc_by(sched_duration_ms) + current_context.add_database_scheduled(sched_duration_ms) if self.database_engine.is_connection_closed(conn): logger.debug("Reconnecting closed database connection") diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index a78e53812..94fa7cac9 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -59,7 +59,8 @@ class LoggingContext(object): __slots__ = [ "previous_context", "name", "ru_stime", "ru_utime", - "db_txn_count", "db_txn_duration_ms", "usage_start", "usage_end", + "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", + "usage_start", "usage_end", "main_thread", "alive", "request", "tag", ] @@ -86,6 +87,9 @@ class LoggingContext(object): def add_database_transaction(self, duration_ms): pass + def add_database_scheduled(self, sched_ms): + pass + def __nonzero__(self): return False @@ -101,6 +105,9 @@ class LoggingContext(object): # ms spent waiting for db txns, excluding scheduling time self.db_txn_duration_ms = 0 + # ms spent waiting for db txns to be scheduled + self.db_sched_duration_ms = 0 + self.usage_start = None self.usage_end = None self.main_thread = threading.current_thread() @@ -210,6 +217,15 @@ class LoggingContext(object): self.db_txn_count += 1 self.db_txn_duration_ms += duration_ms + def add_database_scheduled(self, sched_ms): + """Record a use of the database pool + + Args: + sched_ms (int): number of milliseconds it took us to get a + connection + """ + self.db_sched_duration_ms += sched_ms + class LoggingContextFilter(logging.Filter): """Logging filter that adds values from the current logging context to each diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index d25629cc5..059bb7fed 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -80,6 +80,11 @@ block_db_txn_duration = metrics.register_counter( ), ) +# seconds spent waiting for a db connection, in this block +block_db_sched_duration = metrics.register_counter( + "block_db_sched_duration_seconds", labels=["block_name"], +) + def measure_func(name): def wrapper(func): @@ -96,7 +101,9 @@ def measure_func(name): class Measure(object): __slots__ = [ "clock", "name", "start_context", "start", "new_context", "ru_utime", - "ru_stime", "db_txn_count", "db_txn_duration_ms", "created_context" + "ru_stime", + "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", + "created_context", ] def __init__(self, clock, name): @@ -117,6 +124,7 @@ class Measure(object): self.ru_utime, self.ru_stime = self.start_context.get_resource_usage() self.db_txn_count = self.start_context.db_txn_count self.db_txn_duration_ms = self.start_context.db_txn_duration_ms + self.db_sched_duration_ms = self.start_context.db_sched_duration_ms def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_type, Exception) or not self.start_context: @@ -149,6 +157,10 @@ class Measure(object): (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000., self.name ) + block_db_sched_duration.inc_by( + (context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000., + self.name + ) if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb) From 936482d507ffbf59d9aee58f851ec6ff2a120424 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 17:58:16 +0000 Subject: [PATCH 068/171] Fix 'NoneType' object has no attribute 'writeHeaders' Avoid throwing a (harmless) exception when we try to write an error response to an http request where the client has disconnected. This comes up as a CRITICAL error in the logs which tends to mislead people into thinking there's an actual problem --- synapse/http/server.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 269b65ca4..8077e2279 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -316,15 +316,6 @@ class JsonResource(HttpServer, resource.Resource): def _send_response(self, request, code, response_json_object, response_code_message=None): - # could alternatively use request.notifyFinish() and flip a flag when - # the Deferred fires, but since the flag is RIGHT THERE it seems like - # a waste. - if request._disconnected: - logger.warn( - "Not sending response to request %s, already disconnected.", - request) - return - outgoing_responses_counter.inc(request.method, str(code)) # TODO: Only enable CORS for the requests that need it. @@ -400,6 +391,15 @@ class RootRedirect(resource.Resource): def respond_with_json(request, code, json_object, send_cors=False, response_code_message=None, pretty_print=False, version_string="", canonical_json=True): + # could alternatively use request.notifyFinish() and flip a flag when + # the Deferred fires, but since the flag is RIGHT THERE it seems like + # a waste. + if request._disconnected: + logger.warn( + "Not sending response to request %s, already disconnected.", + request) + return + if pretty_print: json_bytes = encode_pretty_printed_json(json_object) + "\n" else: From d728c47142019da2896bba7a84ac92e9959fd7af Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 10:06:14 +0000 Subject: [PATCH 069/171] Add docstring --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 22f86781f..97c82c150 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -532,7 +532,7 @@ class MediaRepository(object): media_id (str): The media ID of the content. (This is the same as the file_id for local content) file_id (str): Local file ID - media_type (str) + media_type (str): The content type of the file url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer From 05f98a22249974ce40a461d12da93af0bc624319 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 16:42:43 +0000 Subject: [PATCH 070/171] Keep track of last access time for local media --- synapse/rest/media/v1/media_repository.py | 32 +++++++++++++++---- synapse/storage/media_repository.py | 23 +++++++++++-- synapse/storage/prepare_database.py | 2 +- .../schema/delta/47/last_access_media.sql | 19 +++++++++++ 4 files changed, 65 insertions(+), 11 deletions(-) create mode 100644 synapse/storage/schema/delta/47/last_access_media.sql diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 97c82c150..b2c76440b 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -53,7 +53,7 @@ import urlparse logger = logging.getLogger(__name__) -UPDATE_RECENTLY_ACCESSED_REMOTES_TS = 60 * 1000 +UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 class MediaRepository(object): @@ -75,6 +75,7 @@ class MediaRepository(object): self.remote_media_linearizer = Linearizer(name="media_remote") self.recently_accessed_remotes = set() + self.recently_accessed_locals = set() # List of StorageProviders where we should search for media and # potentially upload to. @@ -99,19 +100,34 @@ class MediaRepository(object): ) self.clock.looping_call( - self._update_recently_accessed_remotes, - UPDATE_RECENTLY_ACCESSED_REMOTES_TS + self._update_recently_accessed, + UPDATE_RECENTLY_ACCESSED_TS, ) @defer.inlineCallbacks - def _update_recently_accessed_remotes(self): - media = self.recently_accessed_remotes + def _update_recently_accessed(self): + remote_media = self.recently_accessed_remotes self.recently_accessed_remotes = set() + local_media = self.recently_accessed_locals + self.recently_accessed_locals = set() + yield self.store.update_cached_last_access_time( - media, self.clock.time_msec() + local_media, remote_media, self.clock.time_msec() ) + def mark_recently_accessed(self, server_name, media_id): + """Mark the given media as recently accessed. + + Args: + server_name (str|None): Origin server of media, or None if local + media_id (str): The media ID of the content + """ + if server_name: + self.recently_accessed_remotes.add((server_name, media_id)) + else: + self.recently_accessed_locals.add(media_id) + @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): @@ -173,6 +189,8 @@ class MediaRepository(object): respond_404(request) return + self.mark_recently_accessed(None, media_id) + media_type = media_info["media_type"] media_length = media_info["media_length"] upload_name = name if name else media_info["upload_name"] @@ -204,7 +222,7 @@ class MediaRepository(object): Deferred: Resolves once a response has successfully been written to request """ - self.recently_accessed_remotes.add((server_name, media_id)) + self.mark_recently_accessed(server_name, media_id) # We linearize here to ensure that we don't try and download remote # media multiple times concurrently diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 6ebc37249..e6cdbb054 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -173,7 +173,14 @@ class MediaRepositoryStore(BackgroundUpdateStore): desc="store_cached_remote_media", ) - def update_cached_last_access_time(self, origin_id_tuples, time_ts): + def update_cached_last_access_time(self, local_media, remote_media, time_ms): + """Updates the last access time of the given media + + Args: + local_media (iterable[str]): Set of media_ids + remote_media (iterable[(str, str)]): Set of (server_name, media_id) + time_ms: Current time in milliseconds + """ def update_cache_txn(txn): sql = ( "UPDATE remote_media_cache SET last_access_ts = ?" @@ -181,8 +188,18 @@ class MediaRepositoryStore(BackgroundUpdateStore): ) txn.executemany(sql, ( - (time_ts, media_origin, media_id) - for media_origin, media_id in origin_id_tuples + (time_ms, media_origin, media_id) + for media_origin, media_id in remote_media + )) + + sql = ( + "UPDATE local_media_repository SET last_access_ts = ?" + " WHERE media_id = ?" + ) + + txn.executemany(sql, ( + (time_ms, media_id) + for media_id in local_media )) return self.runInteraction("update_cached_last_access_time", update_cache_txn) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index d1691bbac..c845a0cec 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 46 +SCHEMA_VERSION = 47 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/47/last_access_media.sql b/synapse/storage/schema/delta/47/last_access_media.sql new file mode 100644 index 000000000..bc754ac86 --- /dev/null +++ b/synapse/storage/schema/delta/47/last_access_media.sql @@ -0,0 +1,19 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- drop the unique constraint on deleted_pushers so that we can just insert +-- into it rather than upserting. + +ALTER TABLE local_media_repository ADD COLUMN last_access_ts BIGINT; From 300edc23482fbc637a64a9e1cc1235a1fa7f9562 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 11:53:04 +0000 Subject: [PATCH 071/171] Update last access time when thumbnails are viewed --- synapse/rest/media/v1/thumbnail_resource.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 8c9653843..c09f2dec4 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -67,6 +67,7 @@ class ThumbnailResource(Resource): yield self._respond_local_thumbnail( request, media_id, width, height, method, m_type ) + self.media_repo.mark_recently_accessed(server_name, media_id) else: if self.dynamic_thumbnails: yield self._select_or_generate_remote_thumbnail( @@ -78,6 +79,7 @@ class ThumbnailResource(Resource): request, server_name, media_id, width, height, method, m_type ) + self.media_repo.mark_recently_accessed(None, media_id) @defer.inlineCallbacks def _respond_local_thumbnail(self, request, media_id, width, height, From a5213df1f7448c1810e6ae842448c81abe57ba69 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 18:25:28 +0000 Subject: [PATCH 072/171] Sanity checking for user ids Check the user_id passed to a couple of APIs for validity, to avoid "IndexError: list index out of range" exception which looks scary and results in a 500 rather than a more useful error. Fixes #1432, among other things --- synapse/handlers/devicemessage.py | 14 +++++++++++--- synapse/handlers/e2e_keys.py | 13 ++++++++----- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index f7fad15c6..d996aa90b 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -17,7 +17,8 @@ import logging from twisted.internet import defer -from synapse.types import get_domain_from_id +from synapse.api.errors import SynapseError +from synapse.types import get_domain_from_id, UserID from synapse.util.stringutils import random_string @@ -33,7 +34,7 @@ class DeviceMessageHandler(object): """ self.store = hs.get_datastore() self.notifier = hs.get_notifier() - self.is_mine_id = hs.is_mine_id + self.is_mine = hs.is_mine self.federation = hs.get_federation_sender() hs.get_replication_layer().register_edu_handler( @@ -52,6 +53,12 @@ class DeviceMessageHandler(object): message_type = content["type"] message_id = content["message_id"] for user_id, by_device in content["messages"].items(): + # we use UserID.from_string to catch invalid user ids + if not self.is_mine(UserID.from_string(user_id)): + logger.warning("Request for keys for non-local user %s", + user_id) + raise SynapseError(400, "Not a user here") + messages_by_device = { device_id: { "content": message_content, @@ -77,7 +84,8 @@ class DeviceMessageHandler(object): local_messages = {} remote_messages = {} for user_id, by_device in messages.items(): - if self.is_mine_id(user_id): + # we use UserID.from_string to catch invalid user ids + if self.is_mine(UserID.from_string(user_id)): messages_by_device = { device_id: { "content": message_content, diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 668a90e49..5af8abf66 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -20,7 +20,7 @@ from canonicaljson import encode_canonical_json from twisted.internet import defer from synapse.api.errors import SynapseError, CodeMessageException -from synapse.types import get_domain_from_id +from synapse.types import get_domain_from_id, UserID from synapse.util.logcontext import preserve_fn, make_deferred_yieldable from synapse.util.retryutils import NotRetryingDestination @@ -32,7 +32,7 @@ class E2eKeysHandler(object): self.store = hs.get_datastore() self.federation = hs.get_replication_layer() self.device_handler = hs.get_device_handler() - self.is_mine_id = hs.is_mine_id + self.is_mine = hs.is_mine self.clock = hs.get_clock() # doesn't really work as part of the generic query API, because the @@ -70,7 +70,8 @@ class E2eKeysHandler(object): remote_queries = {} for user_id, device_ids in device_keys_query.items(): - if self.is_mine_id(user_id): + # we use UserID.from_string to catch invalid user ids + if self.is_mine(UserID.from_string(user_id)): local_query[user_id] = device_ids else: remote_queries[user_id] = device_ids @@ -170,7 +171,8 @@ class E2eKeysHandler(object): result_dict = {} for user_id, device_ids in query.items(): - if not self.is_mine_id(user_id): + # we use UserID.from_string to catch invalid user ids + if not self.is_mine(UserID.from_string(user_id)): logger.warning("Request for keys for non-local user %s", user_id) raise SynapseError(400, "Not a user here") @@ -213,7 +215,8 @@ class E2eKeysHandler(object): remote_queries = {} for user_id, device_keys in query.get("one_time_keys", {}).items(): - if self.is_mine_id(user_id): + # we use UserID.from_string to catch invalid user ids + if self.is_mine(UserID.from_string(user_id)): for device_id, algorithm in device_keys.items(): local_query.append((user_id, device_id, algorithm)) else: From 2fb3a28c9894f2cd1ed2ba3404a519d0bbd754cc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 14:59:44 +0000 Subject: [PATCH 073/171] Remove lost comment --- synapse/storage/schema/delta/47/last_access_media.sql | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/storage/schema/delta/47/last_access_media.sql b/synapse/storage/schema/delta/47/last_access_media.sql index bc754ac86..f505fb22b 100644 --- a/synapse/storage/schema/delta/47/last_access_media.sql +++ b/synapse/storage/schema/delta/47/last_access_media.sql @@ -13,7 +13,4 @@ * limitations under the License. */ --- drop the unique constraint on deleted_pushers so that we can just insert --- into it rather than upserting. - ALTER TABLE local_media_repository ADD COLUMN last_access_ts BIGINT; From 390093d45e1951b1a1d8a034667d2e84b3bf064d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 17 Jan 2018 15:44:31 +0000 Subject: [PATCH 074/171] Split resolve_events into two functions ... so that the return type doesn't depend on the arg types --- synapse/state.py | 45 +++++++++++++++++++++++---------------- synapse/storage/events.py | 4 ++-- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 9e624b493..1f9abf9d3 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -341,7 +341,7 @@ class StateHandler(object): if conflicted_state: logger.info("Resolving conflicted state for %r", room_id) with Measure(self.clock, "state._resolve_events"): - new_state = yield resolve_events( + new_state = yield resolve_events_with_factory( state_groups_ids.values(), state_map_factory=lambda ev_ids: self.store.get_events( ev_ids, get_prev_content=False, check_redacted=False, @@ -404,7 +404,7 @@ class StateHandler(object): } with Measure(self.clock, "state._resolve_events"): - new_state = resolve_events(state_set_ids, state_map) + new_state = resolve_events_with_state_map(state_set_ids, state_map) new_state = { key: state_map[ev_id] for key, ev_id in new_state.items() @@ -420,19 +420,17 @@ def _ordered_events(events): return sorted(events, key=key_func) -def resolve_events(state_sets, state_map_factory): +def resolve_events_with_state_map(state_sets, state_map): """ Args: state_sets(list): List of dicts of (type, state_key) -> event_id, which are the different state groups to resolve. - state_map_factory(dict|callable): If callable, then will be called - with a list of event_ids that are needed, and should return with - a Deferred of dict of event_id to event. Otherwise, should be - a dict from event_id to event of all events in state_sets. + state_map(dict): a dict from event_id to event, for all events in + state_sets. Returns - dict[(str, str), synapse.events.FrozenEvent] is a map from - (type, state_key) to event. + dict[(str, str), synapse.events.FrozenEvent]: + a map from (type, state_key) to event. """ if len(state_sets) == 1: return state_sets[0] @@ -441,13 +439,6 @@ def resolve_events(state_sets, state_map_factory): state_sets, ) - if callable(state_map_factory): - return _resolve_with_state_fac( - unconflicted_state, conflicted_state, state_map_factory - ) - - state_map = state_map_factory - auth_events = _create_auth_events_from_maps( unconflicted_state, conflicted_state, state_map ) @@ -491,8 +482,26 @@ def _seperate(state_sets): @defer.inlineCallbacks -def _resolve_with_state_fac(unconflicted_state, conflicted_state, - state_map_factory): +def resolve_events_with_factory(state_sets, state_map_factory): + """ + Args: + state_sets(list): List of dicts of (type, state_key) -> event_id, + which are the different state groups to resolve. + state_map_factory(func): will be called + with a list of event_ids that are needed, and should return with + a Deferred of dict of event_id to event. + + Returns + Deferred[dict[(str, str), synapse.events.FrozenEvent]]: + a map from (type, state_key) to event. + """ + if len(state_sets) == 1: + defer.returnValue(state_sets[0]) + + unconflicted_state, conflicted_state = _seperate( + state_sets, + ) + needed_events = set( event_id for event_ids in conflicted_state.itervalues() diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ad1d78270..c5292a531 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -27,7 +27,7 @@ from synapse.util.logutils import log_function from synapse.util.metrics import Measure from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError -from synapse.state import resolve_events +from synapse.state import resolve_events_with_factory from synapse.util.caches.descriptors import cached from synapse.types import get_domain_from_id @@ -557,7 +557,7 @@ class EventsStore(SQLBaseStore): to_return.update(evs) defer.returnValue(to_return) - current_state = yield resolve_events( + current_state = yield resolve_events_with_factory( state_sets, state_map_factory=get_events, ) From 2d9ab533f9faa3f98eea166b05d3a0fb7fc2f80c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 17 Jan 2018 15:58:52 +0000 Subject: [PATCH 075/171] fix SQL when searching all users --- synapse/storage/user_directory.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index c9bff408e..f150ef010 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -641,8 +641,13 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - join_clause = "" - where_clause = "?<>''" # naughty hack to keep the same number of binds + # dummy to keep the number of binds & aliases the same + join_clause = """ + LEFT JOIN ( + SELECT NULL as user_id WHERE NULL = ? + ) AS s USING (user_id)" + """ + where_clause = "" else: join_clause = """ LEFT JOIN users_in_public_rooms AS p USING (user_id) From bc67e7d260631d3fa7bc78653376e15dc0771364 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 16:43:03 +0000 Subject: [PATCH 076/171] Add decent impl of a FileConsumer Twisted core doesn't have a general purpose one, so we need to write one ourselves. Features: - All writing happens in background thread - Supports both push and pull producers - Push producers get paused if the consumer falls behind --- synapse/util/file_consumer.py | 158 +++++++++++++++++++++++++++++++ tests/util/test_file_consumer.py | 138 +++++++++++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100644 synapse/util/file_consumer.py create mode 100644 tests/util/test_file_consumer.py diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py new file mode 100644 index 000000000..de478fcb3 --- /dev/null +++ b/synapse/util/file_consumer.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vecotr Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, threads, reactor + +from synapse.util.logcontext import make_deferred_yieldable + +import Queue + + +class BackgroundFileConsumer(object): + """A consumer that writes to a file like object. Supports both push + and pull producers + + Args: + file_obj (file): The file like object to write to. Closed when + finished. + """ + + # For PushProducers pause if we have this many unwritten slices + _PAUSE_ON_QUEUE_SIZE = 5 + # And resume once the size of the queue is less than this + _RESUME_ON_QUEUE_SIZE = 2 + + def __init__(self, file_obj): + self.file_obj = file_obj + + # Producer we're registered with + self.producer = None + + # True if PushProducer, false if PullProducer + self.streaming = False + + # Queue of slices of bytes to be written. When producer calls + # unregister a final None is sent. + self.bytes_queue = Queue.Queue() + + # Deferred that is resolved when finished writing + self.finished_deferred = None + + # If the _writer thread throws an exception it gets stored here. + self._write_exception = None + + # A deferred that gets resolved when the bytes_queue gets empty. + # Mainly used for tests. + self._notify_empty_deferred = None + + def registerProducer(self, producer, streaming): + """Part of IProducer interface + + Args: + producer (IProducer) + streaming (bool): True if push based producer, False if pull + based. + """ + self.producer = producer + self.streaming = streaming + self.finished_deferred = threads.deferToThread(self._writer) + if not streaming: + self.producer.resumeProducing() + + self.paused_producer = False + + def unregisterProducer(self): + """Part of IProducer interface + """ + self.producer = None + if not self.finished_deferred.called: + self.bytes_queue.put_nowait(None) + + def write(self, bytes): + """Part of IProducer interface + """ + if self._write_exception: + raise self._write_exception + + if self.finished_deferred.called: + raise Exception("consumer has closed") + + self.bytes_queue.put_nowait(bytes) + + # If this is a pushed based consumer and the queue is getting behind + # then we pause the producer. + if self.streaming and self.bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: + self.paused_producer = True + self.producer.pauseProducing() + + def _writer(self): + """This is run in a background thread to write to the file. + """ + try: + while self.producer or not self.bytes_queue.empty(): + # If we've paused the producer check if we should resume the + # producer. + if self.producer and self.paused_producer: + if self.bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: + reactor.callFromThread(self._resume_paused_producer) + + if self._notify_empty and self.bytes_queue.empty(): + reactor.callFromThread(self._notify_empty) + + bytes = self.bytes_queue.get() + + # If we get a None (or empty list) then that's a signal used + # to indicate we should check if we should stop. + if bytes: + self.file_obj.write(bytes) + + # If its a pull producer then we need to explicitly ask for + # more stuff. + if not self.streaming and self.producer: + reactor.callFromThread(self.producer.resumeProducing) + except Exception as e: + self._write_exception = e + raise + finally: + self.file_obj.close() + + def wait(self): + """Returns a deferred that resolves when finished writing to file + """ + return make_deferred_yieldable(self.finished_deferred) + + def _resume_paused_producer(self): + """Gets called if we should resume producing after being paused + """ + if self.paused_producer and self.producer: + self.paused_producer = False + self.producer.resumeProducing() + + def _notify_empty(self): + """Called when the _writer thread thinks the queue may be empty and + we should notify anything waiting on `wait_for_writes` + """ + if self._notify_empty_deferred and self.bytes_queue.empty(): + d = self._notify_empty_deferred + self._notify_empty_deferred = None + d.callback(None) + + def wait_for_writes(self): + """Wait for the write queue to be empty and for writes to have + finished. This is mainly useful for tests. + """ + if not self._notify_empty_deferred: + self._notify_empty_deferred = defer.Deferred() + return self._notify_empty_deferred diff --git a/tests/util/test_file_consumer.py b/tests/util/test_file_consumer.py new file mode 100644 index 000000000..8acb68f0c --- /dev/null +++ b/tests/util/test_file_consumer.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from twisted.internet import defer +from mock import NonCallableMock + +from synapse.util.file_consumer import BackgroundFileConsumer + +from tests import unittest +from StringIO import StringIO + +import threading + + +class FileConsumerTests(unittest.TestCase): + + @defer.inlineCallbacks + def test_pull_consumer(self): + string_file = StringIO() + consumer = BackgroundFileConsumer(string_file) + + try: + producer = DummyPullProducer() + + yield producer.register_with_consumer(consumer) + + yield producer.write_and_wait("Foo") + + self.assertEqual(string_file.getvalue(), "Foo") + + yield producer.write_and_wait("Bar") + + self.assertEqual(string_file.getvalue(), "FooBar") + finally: + consumer.unregisterProducer() + + yield consumer.wait() + + self.assertTrue(string_file.closed) + + @defer.inlineCallbacks + def test_push_consumer(self): + string_file = StringIO() + consumer = BackgroundFileConsumer(string_file) + + try: + producer = NonCallableMock(spec_set=[]) + + consumer.registerProducer(producer, True) + + consumer.write("Foo") + yield consumer.wait_for_writes() + + self.assertEqual(string_file.getvalue(), "Foo") + + consumer.write("Bar") + yield consumer.wait_for_writes() + + self.assertEqual(string_file.getvalue(), "FooBar") + finally: + consumer.unregisterProducer() + + yield consumer.wait() + + self.assertTrue(string_file.closed) + + @defer.inlineCallbacks + def test_push_producer_feedback(self): + string_file = BlockingStringWrite() + consumer = BackgroundFileConsumer(string_file) + + try: + producer = NonCallableMock(spec_set=["pauseProducing", "resumeProducing"]) + + consumer.registerProducer(producer, True) + + with string_file.write_lock: + for _ in range(consumer._PAUSE_ON_QUEUE_SIZE): + consumer.write("Foo") + + producer.pauseProducing.assert_called_once() + + yield consumer.wait_for_writes() + producer.resumeProducing.assert_called_once() + finally: + consumer.unregisterProducer() + + yield consumer.wait() + + self.assertTrue(string_file.closed) + + +class DummyPullProducer(object): + def __init__(self): + self.consumer = None + self.deferred = defer.Deferred() + + def resumeProducing(self): + d = self.deferred + self.deferred = defer.Deferred() + d.callback(None) + + def write_and_wait(self, bytes): + d = self.deferred + self.consumer.write(bytes) + return d + + def register_with_consumer(self, consumer): + d = self.deferred + self.consumer = consumer + self.consumer.registerProducer(self, False) + return d + + +class BlockingStringWrite(object): + def __init__(self): + self.buffer = "" + self.closed = False + self.write_lock = threading.Lock() + + def write(self, bytes): + self.buffer += bytes + + def close(self): + self.closed = True From 1224612a798ce9f14f0d44e1246f87da15a959f1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 17 Jan 2018 16:01:59 +0000 Subject: [PATCH 077/171] Log room when doing state resolution Mostly because it helps figure out what is prompting the resolution --- synapse/storage/events.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d08f7571d..ba0da8364 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -528,6 +528,12 @@ class EventsStore(SQLBaseStore): # the events we have yet to persist, so we need a slightly more # complicated event lookup function than simply looking the events # up in the db. + + logger.info( + "Resolving state for %s with %i state sets", + room_id, len(state_sets), + ) + events_map = {ev.event_id: ev for ev, _ in events_context} @defer.inlineCallbacks From a177325b49be4793c8ed21147f8d301a0649a2b6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:02:43 +0000 Subject: [PATCH 078/171] Fix comments --- synapse/util/file_consumer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index de478fcb3..5284c7967 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2018 New Vecotr Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -58,7 +58,7 @@ class BackgroundFileConsumer(object): self._notify_empty_deferred = None def registerProducer(self, producer, streaming): - """Part of IProducer interface + """Part of IConsumer interface Args: producer (IProducer) @@ -91,7 +91,7 @@ class BackgroundFileConsumer(object): self.bytes_queue.put_nowait(bytes) - # If this is a pushed based consumer and the queue is getting behind + # If this is a PushProducer and the queue is getting behind # then we pause the producer. if self.streaming and self.bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: self.paused_producer = True From 28b338ed9bafc2017a635848e14a2a25b78d0016 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:04:41 +0000 Subject: [PATCH 079/171] Move definition of paused_producer to __init__ --- synapse/util/file_consumer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 5284c7967..54c9da957 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -43,6 +43,10 @@ class BackgroundFileConsumer(object): # True if PushProducer, false if PullProducer self.streaming = False + # For PushProducers, indicates whether we've paused the producer and + # need to call resumeProducing before we get more data. + self.paused_producer = False + # Queue of slices of bytes to be written. When producer calls # unregister a final None is sent. self.bytes_queue = Queue.Queue() @@ -71,8 +75,6 @@ class BackgroundFileConsumer(object): if not streaming: self.producer.resumeProducing() - self.paused_producer = False - def unregisterProducer(self): """Part of IProducer interface """ From 17b54389feb3855a33406149a8a59f0327bb3ad1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:05:34 +0000 Subject: [PATCH 080/171] Fix _notify_empty typo --- synapse/util/file_consumer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 54c9da957..479e48061 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -110,7 +110,7 @@ class BackgroundFileConsumer(object): if self.bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: reactor.callFromThread(self._resume_paused_producer) - if self._notify_empty and self.bytes_queue.empty(): + if self._notify_empty_deferred and self.bytes_queue.empty(): reactor.callFromThread(self._notify_empty) bytes = self.bytes_queue.get() From dc519602ac0f35d39a70c91f0e6057e865a61dfc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:07:17 +0000 Subject: [PATCH 081/171] Ensure we registerProducer isn't called twice --- synapse/util/file_consumer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 479e48061..d7bbb0aeb 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -69,6 +69,9 @@ class BackgroundFileConsumer(object): streaming (bool): True if push based producer, False if pull based. """ + if self.producer: + raise Exception("registerProducer called twice") + self.producer = producer self.streaming = streaming self.finished_deferred = threads.deferToThread(self._writer) From ce236f8ac890842e105fee0df96c79f3d8ab8783 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 18 Jan 2018 11:30:49 +0000 Subject: [PATCH 082/171] better exception logging in callbackmetrics when we fail to render a metric, give a clue as to which metric it was --- synapse/metrics/metric.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index f480aae61..1e783e5ff 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -15,6 +15,9 @@ from itertools import chain +import logging + +logger = logging.getLogger(__name__) def flatten(items): @@ -153,7 +156,11 @@ class CallbackMetric(BaseMetric): self.callback = callback def render(self): - value = self.callback() + try: + value = self.callback() + except Exception: + logger.exception("Failed to render %s", self.name) + return ["# FAILED to render " + self.name] if self.is_scalar(): return list(self._render_for_labels([], value)) From 2f18a2647b6b9cc07c3cc5f2bec3e1bab67d0eea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:10:12 +0000 Subject: [PATCH 083/171] Make all fields private --- synapse/util/file_consumer.py | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index d7bbb0aeb..d19d48665 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -35,24 +35,24 @@ class BackgroundFileConsumer(object): _RESUME_ON_QUEUE_SIZE = 2 def __init__(self, file_obj): - self.file_obj = file_obj + self._file_obj = file_obj # Producer we're registered with - self.producer = None + self._producer = None # True if PushProducer, false if PullProducer self.streaming = False # For PushProducers, indicates whether we've paused the producer and # need to call resumeProducing before we get more data. - self.paused_producer = False + self._paused_producer = False # Queue of slices of bytes to be written. When producer calls # unregister a final None is sent. - self.bytes_queue = Queue.Queue() + self._bytes_queue = Queue.Queue() # Deferred that is resolved when finished writing - self.finished_deferred = None + self._finished_deferred = None # If the _writer thread throws an exception it gets stored here. self._write_exception = None @@ -69,21 +69,21 @@ class BackgroundFileConsumer(object): streaming (bool): True if push based producer, False if pull based. """ - if self.producer: + if self._producer: raise Exception("registerProducer called twice") - self.producer = producer + self._producer = producer self.streaming = streaming - self.finished_deferred = threads.deferToThread(self._writer) + self._finished_deferred = threads.deferToThread(self._writer) if not streaming: - self.producer.resumeProducing() + self._producer.resumeProducing() def unregisterProducer(self): """Part of IProducer interface """ - self.producer = None - if not self.finished_deferred.called: - self.bytes_queue.put_nowait(None) + self._producer = None + if not self._finished_deferred.called: + self._bytes_queue.put_nowait(None) def write(self, bytes): """Part of IProducer interface @@ -91,65 +91,65 @@ class BackgroundFileConsumer(object): if self._write_exception: raise self._write_exception - if self.finished_deferred.called: + if self._finished_deferred.called: raise Exception("consumer has closed") - self.bytes_queue.put_nowait(bytes) + self._bytes_queue.put_nowait(bytes) # If this is a PushProducer and the queue is getting behind # then we pause the producer. - if self.streaming and self.bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: - self.paused_producer = True - self.producer.pauseProducing() + if self.streaming and self._bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: + self._paused_producer = True + self._producer.pauseProducing() def _writer(self): """This is run in a background thread to write to the file. """ try: - while self.producer or not self.bytes_queue.empty(): + while self._producer or not self._bytes_queue.empty(): # If we've paused the producer check if we should resume the # producer. - if self.producer and self.paused_producer: - if self.bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: + if self._producer and self._paused_producer: + if self._bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: reactor.callFromThread(self._resume_paused_producer) - if self._notify_empty_deferred and self.bytes_queue.empty(): + if self._notify_empty_deferred and self._bytes_queue.empty(): reactor.callFromThread(self._notify_empty) - bytes = self.bytes_queue.get() + bytes = self._bytes_queue.get() # If we get a None (or empty list) then that's a signal used # to indicate we should check if we should stop. if bytes: - self.file_obj.write(bytes) + self._file_obj.write(bytes) # If its a pull producer then we need to explicitly ask for # more stuff. - if not self.streaming and self.producer: - reactor.callFromThread(self.producer.resumeProducing) + if not self.streaming and self._producer: + reactor.callFromThread(self._producer.resumeProducing) except Exception as e: self._write_exception = e raise finally: - self.file_obj.close() + self._file_obj.close() def wait(self): """Returns a deferred that resolves when finished writing to file """ - return make_deferred_yieldable(self.finished_deferred) + return make_deferred_yieldable(self._finished_deferred) def _resume_paused_producer(self): """Gets called if we should resume producing after being paused """ - if self.paused_producer and self.producer: - self.paused_producer = False - self.producer.resumeProducing() + if self._paused_producer and self._producer: + self._paused_producer = False + self._producer.resumeProducing() def _notify_empty(self): """Called when the _writer thread thinks the queue may be empty and we should notify anything waiting on `wait_for_writes` """ - if self._notify_empty_deferred and self.bytes_queue.empty(): + if self._notify_empty_deferred and self._bytes_queue.empty(): d = self._notify_empty_deferred self._notify_empty_deferred = None d.callback(None) From 1432f7ccd5a01e43d0c5417f3d2f4a6a0fbf5bfb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:53:21 +0000 Subject: [PATCH 084/171] Move test stuff to tests --- synapse/util/file_consumer.py | 26 +-------------- tests/util/test_file_consumer.py | 54 +++++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index d19d48665..324103524 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer, threads, reactor +from twisted.internet import threads, reactor from synapse.util.logcontext import make_deferred_yieldable @@ -57,10 +57,6 @@ class BackgroundFileConsumer(object): # If the _writer thread throws an exception it gets stored here. self._write_exception = None - # A deferred that gets resolved when the bytes_queue gets empty. - # Mainly used for tests. - self._notify_empty_deferred = None - def registerProducer(self, producer, streaming): """Part of IConsumer interface @@ -113,9 +109,6 @@ class BackgroundFileConsumer(object): if self._bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: reactor.callFromThread(self._resume_paused_producer) - if self._notify_empty_deferred and self._bytes_queue.empty(): - reactor.callFromThread(self._notify_empty) - bytes = self._bytes_queue.get() # If we get a None (or empty list) then that's a signal used @@ -144,20 +137,3 @@ class BackgroundFileConsumer(object): if self._paused_producer and self._producer: self._paused_producer = False self._producer.resumeProducing() - - def _notify_empty(self): - """Called when the _writer thread thinks the queue may be empty and - we should notify anything waiting on `wait_for_writes` - """ - if self._notify_empty_deferred and self._bytes_queue.empty(): - d = self._notify_empty_deferred - self._notify_empty_deferred = None - d.callback(None) - - def wait_for_writes(self): - """Wait for the write queue to be empty and for writes to have - finished. This is mainly useful for tests. - """ - if not self._notify_empty_deferred: - self._notify_empty_deferred = defer.Deferred() - return self._notify_empty_deferred diff --git a/tests/util/test_file_consumer.py b/tests/util/test_file_consumer.py index 8acb68f0c..76e223425 100644 --- a/tests/util/test_file_consumer.py +++ b/tests/util/test_file_consumer.py @@ -14,7 +14,7 @@ # limitations under the License. -from twisted.internet import defer +from twisted.internet import defer, reactor from mock import NonCallableMock from synapse.util.file_consumer import BackgroundFileConsumer @@ -53,7 +53,7 @@ class FileConsumerTests(unittest.TestCase): @defer.inlineCallbacks def test_push_consumer(self): - string_file = StringIO() + string_file = BlockingStringWrite() consumer = BackgroundFileConsumer(string_file) try: @@ -62,14 +62,14 @@ class FileConsumerTests(unittest.TestCase): consumer.registerProducer(producer, True) consumer.write("Foo") - yield consumer.wait_for_writes() + yield string_file.wait_for_n_writes(1) - self.assertEqual(string_file.getvalue(), "Foo") + self.assertEqual(string_file.buffer, "Foo") consumer.write("Bar") - yield consumer.wait_for_writes() + yield string_file.wait_for_n_writes(2) - self.assertEqual(string_file.getvalue(), "FooBar") + self.assertEqual(string_file.buffer, "FooBar") finally: consumer.unregisterProducer() @@ -85,15 +85,22 @@ class FileConsumerTests(unittest.TestCase): try: producer = NonCallableMock(spec_set=["pauseProducing", "resumeProducing"]) + resume_deferred = defer.Deferred() + producer.resumeProducing.side_effect = lambda: resume_deferred.callback(None) + consumer.registerProducer(producer, True) + number_writes = 0 with string_file.write_lock: for _ in range(consumer._PAUSE_ON_QUEUE_SIZE): consumer.write("Foo") + number_writes += 1 producer.pauseProducing.assert_called_once() - yield consumer.wait_for_writes() + yield string_file.wait_for_n_writes(number_writes) + + yield resume_deferred producer.resumeProducing.assert_called_once() finally: consumer.unregisterProducer() @@ -131,8 +138,39 @@ class BlockingStringWrite(object): self.closed = False self.write_lock = threading.Lock() + self._notify_write_deferred = None + self._number_of_writes = 0 + def write(self, bytes): - self.buffer += bytes + with self.write_lock: + self.buffer += bytes + self._number_of_writes += 1 + + reactor.callFromThread(self._notify_write) def close(self): self.closed = True + + def _notify_write(self): + "Called by write to indicate a write happened" + with self.write_lock: + if not self._notify_write_deferred: + return + d = self._notify_write_deferred + self._notify_write_deferred = None + d.callback(None) + + @defer.inlineCallbacks + def wait_for_n_writes(self, n): + "Wait for n writes to have happened" + while True: + with self.write_lock: + if n <= self._number_of_writes: + return + + if not self._notify_write_deferred: + self._notify_write_deferred = defer.Deferred() + + d = self._notify_write_deferred + + yield d From be0dfcd4a29859f4c707c2b3cf1da38c5115d251 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:57:23 +0000 Subject: [PATCH 085/171] Do logcontexts correctly --- synapse/util/file_consumer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 324103524..90a2608d6 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -15,7 +15,7 @@ from twisted.internet import threads, reactor -from synapse.util.logcontext import make_deferred_yieldable +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn import Queue @@ -70,7 +70,7 @@ class BackgroundFileConsumer(object): self._producer = producer self.streaming = streaming - self._finished_deferred = threads.deferToThread(self._writer) + self._finished_deferred = preserve_fn(threads.deferToThread)(self._writer) if not streaming: self._producer.resumeProducing() From 4a53f3a3e8fb7fe9df052790858ca3f7dbff78f9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 10:52:32 +0000 Subject: [PATCH 086/171] Ensure media is in local cache before thumbnailing --- synapse/rest/media/v1/media_repository.py | 20 ++++++++------- synapse/rest/media/v1/media_storage.py | 27 +++++++++++++++++++++ synapse/rest/media/v1/thumbnail_resource.py | 3 ++- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 97c82c150..578d7f07c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -445,8 +445,10 @@ class MediaRepository(object): @defer.inlineCallbacks def generate_local_exact_thumbnail(self, media_id, t_width, t_height, - t_method, t_type): - input_path = self.filepaths.local_media_filepath(media_id) + t_method, t_type, url_cache): + input_path = yield self.media_storage.ensure_media_is_in_local_cache(FileInfo( + None, media_id, url_cache=url_cache, + )) thumbnailer = Thumbnailer(input_path) t_byte_source = yield make_deferred_yieldable(threads.deferToThread( @@ -459,6 +461,7 @@ class MediaRepository(object): file_info = FileInfo( server_name=None, file_id=media_id, + url_cache=url_cache, thumbnail=True, thumbnail_width=t_width, thumbnail_height=t_height, @@ -485,7 +488,9 @@ class MediaRepository(object): @defer.inlineCallbacks def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, t_width, t_height, t_method, t_type): - input_path = self.filepaths.remote_media_filepath(server_name, file_id) + input_path = yield self.media_storage.ensure_media_is_in_local_cache(FileInfo( + server_name, file_id, url_cache=False, + )) thumbnailer = Thumbnailer(input_path) t_byte_source = yield make_deferred_yieldable(threads.deferToThread( @@ -543,12 +548,9 @@ class MediaRepository(object): if not requirements: return - if server_name: - input_path = self.filepaths.remote_media_filepath(server_name, file_id) - elif url_cache: - input_path = self.filepaths.url_cache_filepath(media_id) - else: - input_path = self.filepaths.local_media_filepath(media_id) + input_path = yield self.media_storage.ensure_media_is_in_local_cache(FileInfo( + server_name, file_id, url_cache=url_cache, + )) thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 001e84578..d3f54594a 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -15,6 +15,7 @@ from twisted.internet import defer, threads from twisted.protocols.basic import FileSender +from twisted.protocols.ftp import FileConsumer # This isn't FTP specific from ._base import Responder @@ -151,6 +152,32 @@ class MediaStorage(object): defer.returnValue(None) + @defer.inlineCallbacks + def ensure_media_is_in_local_cache(self, file_info): + """Ensures that the given file is in the local cache. Attempts to + download it from storage providers if it isn't. + + Args: + file_info (FileInfo) + + Returns: + Deferred[str]: Full path to local file + """ + path = self._file_info_to_path(file_info) + local_path = os.path.join(self.local_media_directory, path) + if os.path.exists(local_path): + defer.returnValue(local_path) + + for provider in self.storage_providers: + res = yield provider.fetch(path, file_info) + if res: + with res: + with open(local_path, "w") as f: + res.write_to_consumer(FileConsumer(f)) + defer.returnValue(local_path) + + raise Exception("file could not be found") + def _file_info_to_path(self, file_info): """Converts file_info into a relative path. diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 8c9653843..49e4af514 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -162,7 +162,8 @@ class ThumbnailResource(Resource): # Okay, so we generate one. file_path = yield self.media_repo.generate_local_exact_thumbnail( - media_id, desired_width, desired_height, desired_method, desired_type + media_id, desired_width, desired_height, desired_method, desired_type, + url_cache=media_info["url_cache"], ) if file_path: From 2cf6a7bc2068350d0701b156deb2bb3a6f0da88a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 16:56:23 +0000 Subject: [PATCH 087/171] Use better file consumer --- synapse/rest/media/v1/media_storage.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index d3f54594a..5c3e4e5a6 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -15,10 +15,10 @@ from twisted.internet import defer, threads from twisted.protocols.basic import FileSender -from twisted.protocols.ftp import FileConsumer # This isn't FTP specific from ._base import Responder +from synapse.util.file_consumer import BackgroundFileConsumer from synapse.util.logcontext import make_deferred_yieldable import contextlib @@ -27,6 +27,7 @@ import logging import shutil import sys + logger = logging.getLogger(__name__) @@ -168,12 +169,17 @@ class MediaStorage(object): if os.path.exists(local_path): defer.returnValue(local_path) + dirname = os.path.dirname(local_path) + if not os.path.exists(dirname): + os.makedirs(dirname) + for provider in self.storage_providers: res = yield provider.fetch(path, file_info) if res: with res: - with open(local_path, "w") as f: - res.write_to_consumer(FileConsumer(f)) + consumer = BackgroundFileConsumer(open(local_path, "w")) + yield res.write_to_consumer(consumer) + yield consumer.wait() defer.returnValue(local_path) raise Exception("file could not be found") @@ -247,9 +253,8 @@ class FileResponder(Responder): def __init__(self, open_file): self.open_file = open_file - @defer.inlineCallbacks def write_to_consumer(self, consumer): - yield FileSender().beginFileTransfer(self.open_file, consumer) + return FileSender().beginFileTransfer(self.open_file, consumer) def __exit__(self, exc_type, exc_val, exc_tb): self.open_file.close() From d57765fc8a1b54dae001bbb97b2b529991292fbc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 18 Jan 2018 12:23:04 +0000 Subject: [PATCH 088/171] Fix bugs in block metrics ... which I introduced in #2785 --- synapse/util/metrics.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 059bb7fed..e4b5687a4 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -28,7 +28,7 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) # total number of times we have hit this block -response_count = metrics.register_counter( +block_counter = metrics.register_counter( "block_count", labels=["block_name"], alternative_names=( @@ -76,7 +76,7 @@ block_db_txn_count = metrics.register_counter( block_db_txn_duration = metrics.register_counter( "block_db_txn_duration_seconds", labels=["block_name"], alternative_names=( - metrics.name_prefix + "_block_db_txn_count:total", + metrics.name_prefix + "_block_db_txn_duration:total", ), ) @@ -131,6 +131,8 @@ class Measure(object): return duration = self.clock.time_msec() - self.start + + block_counter.inc(self.name) block_timer.inc_by(duration, self.name) context = LoggingContext.current_context() From 0af5dc63a8d180a2b610c14ce415fdf9be96e2ff Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 15:44:08 +0000 Subject: [PATCH 089/171] Make storage providers more configurable --- synapse/config/repository.py | 83 ++++++++++++++++++++--- synapse/rest/media/v1/media_repository.py | 18 ++--- synapse/rest/media/v1/storage_provider.py | 28 +++++--- 3 files changed, 98 insertions(+), 31 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 6baa47493..81db0193f 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -16,6 +16,8 @@ from ._base import Config, ConfigError from collections import namedtuple +from synapse.util.module_loader import load_module + MISSING_NETADDR = ( "Missing netaddr library. This is required for URL preview API." @@ -36,6 +38,10 @@ ThumbnailRequirement = namedtuple( "ThumbnailRequirement", ["width", "height", "method", "media_type"] ) +MediaStorageProviderConfig = namedtuple( + "MediaStorageProviderConfig", ("store_local", "store_remote", "store_synchronous",) +) + def parse_thumbnail_requirements(thumbnail_sizes): """ Takes a list of dictionaries with "width", "height", and "method" keys @@ -73,16 +79,65 @@ class ContentRepositoryConfig(Config): self.media_store_path = self.ensure_directory(config["media_store_path"]) - self.backup_media_store_path = config.get("backup_media_store_path") - if self.backup_media_store_path: - self.backup_media_store_path = self.ensure_directory( + backup_media_store_path = config.get("backup_media_store_path") + if backup_media_store_path: + backup_media_store_path = self.ensure_directory( self.backup_media_store_path ) - self.synchronous_backup_media_store = config.get( + synchronous_backup_media_store = config.get( "synchronous_backup_media_store", False ) + storage_providers = config.get("media_storage_providers", []) + + if backup_media_store_path: + if storage_providers: + raise ConfigError( + "Cannot use both 'backup_media_store_path' and 'storage_providers'" + ) + + storage_providers = [{ + "module": "file_system", + "store_local": True, + "store_synchronous": synchronous_backup_media_store, + "store_remote": True, + "config": { + "directory": backup_media_store_path, + } + }] + + # This is a list of config that can be used to create the storage + # providers. The entries are tuples of (Class, class_config, + # MediaStorageProviderConfig), where Class is the class of the provider, + # the class_config the config to pass to it, and + # MediaStorageProviderConfig are options for StorageProviderWrapper. + # + # We don't create the storage providers here as not all workers need + # them to be started. + self.media_storage_providers = [] + + for provider_config in storage_providers: + # We special case the module "file_system" so as not to need to + # expose FileStorageProviderBackend + if provider_config["module"] == "file_system": + provider_config["module"] = ( + "synapse.rest.media.v1.storage_provider" + ".FileStorageProviderBackend" + ) + + provider_class, parsed_config = load_module(provider_config) + + wrapper_config = MediaStorageProviderConfig( + provider_config.get("store_local", False), + provider_config.get("store_remote", False), + provider_config.get("store_synchronous", False), + ) + + self.media_storage_providers.append( + (provider_class, provider_config, wrapper_config,) + ) + self.uploads_path = self.ensure_directory(config["uploads_path"]) self.dynamic_thumbnails = config["dynamic_thumbnails"] self.thumbnail_requirements = parse_thumbnail_requirements( @@ -127,13 +182,19 @@ class ContentRepositoryConfig(Config): # Directory where uploaded images and attachments are stored. media_store_path: "%(media_store)s" - # A secondary directory where uploaded images and attachments are - # stored as a backup. - # backup_media_store_path: "%(media_store)s" - - # Whether to wait for successful write to backup media store before - # returning successfully. - # synchronous_backup_media_store: false + # Media storage providers allow media to be stored in different + # locations. + # media_storage_providers: + # - module: file_system + # # Whether to write new local files. + # store_local: false + # # Whether to write new remote media + # store_remote: false + # # Whether to block upload requests waiting for write to this + # # provider to complete + # store_synchronous: false + # config: + # directory: /mnt/some/other/directory # Directory where in-progress uploads are stored. uploads_path: "%(uploads_path)s" diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 97c82c150..4163c9e41 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -27,9 +27,7 @@ from .identicon_resource import IdenticonResource from .preview_url_resource import PreviewUrlResource from .filepath import MediaFilePaths from .thumbnailer import Thumbnailer -from .storage_provider import ( - StorageProviderWrapper, FileStorageProviderBackend, -) +from .storage_provider import StorageProviderWrapper from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient @@ -80,17 +78,13 @@ class MediaRepository(object): # potentially upload to. storage_providers = [] - # TODO: Move this into config and allow other storage providers to be - # defined. - if hs.config.backup_media_store_path: - backend = FileStorageProviderBackend( - self.primary_base_path, hs.config.backup_media_store_path, - ) + for clz, provider_config, wrapper_config in hs.config.media_storage_providers: + backend = clz(hs, provider_config) provider = StorageProviderWrapper( backend, - store=True, - store_synchronous=hs.config.synchronous_backup_media_store, - store_remote=True, + store_local=wrapper_config.store_local, + store_remote=wrapper_config.store_remote, + store_synchronous=wrapper_config.store_synchronous, ) storage_providers.append(provider) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 2ad602e10..0074d2d42 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -17,6 +17,7 @@ from twisted.internet import defer, threads from .media_storage import FileResponder +from synapse.config._base import Config from synapse.util.logcontext import preserve_fn import logging @@ -64,14 +65,14 @@ class StorageProviderWrapper(StorageProvider): Args: backend (StorageProvider) - store (bool): Whether to store new files or not. + store_local (bool): Whether to store new local files or not. store_synchronous (bool): Whether to wait for file to be successfully uploaded, or todo the upload in the backgroud. store_remote (bool): Whether remote media should be uploaded """ - def __init__(self, backend, store, store_synchronous, store_remote): + def __init__(self, backend, store_local, store_synchronous, store_remote): self.backend = backend - self.store = store + self.store_local = store_local self.store_synchronous = store_synchronous self.store_remote = store_remote @@ -97,13 +98,13 @@ class FileStorageProviderBackend(StorageProvider): """A storage provider that stores files in a directory on a filesystem. Args: - cache_directory (str): Base path of the local media repository - base_directory (str): Base path to store new files + hs (HomeServer) + config: The config returned by `parse_config`, i """ - def __init__(self, cache_directory, base_directory): - self.cache_directory = cache_directory - self.base_directory = base_directory + def __init__(self, hs, config): + self.cache_directory = hs.config.media_store_path + self.base_directory = config def store_file(self, path, file_info): """See StorageProvider.store_file""" @@ -125,3 +126,14 @@ class FileStorageProviderBackend(StorageProvider): backup_fname = os.path.join(self.base_directory, path) if os.path.isfile(backup_fname): return FileResponder(open(backup_fname, "rb")) + + def parse_config(config): + """Called on startup to parse config supplied. This should parse + the config and raise if there is a problem. + + The returned value is passed into the constructor. + + In this case we only care about a single param, the directory, so lets + just pull that out. + """ + return Config.ensure_directory(config["directory"]) From 9a89dae8c53140c74f968538f72a4045b6aca90d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 15:06:24 +0000 Subject: [PATCH 090/171] Fix typo in thumbnail resource causing access times to be incorrect --- synapse/rest/media/v1/thumbnail_resource.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index c09f2dec4..12e84a2b7 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -67,7 +67,7 @@ class ThumbnailResource(Resource): yield self._respond_local_thumbnail( request, media_id, width, height, method, m_type ) - self.media_repo.mark_recently_accessed(server_name, media_id) + self.media_repo.mark_recently_accessed(None, media_id) else: if self.dynamic_thumbnails: yield self._select_or_generate_remote_thumbnail( @@ -79,7 +79,7 @@ class ThumbnailResource(Resource): request, server_name, media_id, width, height, method, m_type ) - self.media_repo.mark_recently_accessed(None, media_id) + self.media_repo.mark_recently_accessed(server_name, media_id) @defer.inlineCallbacks def _respond_local_thumbnail(self, request, media_id, width, height, From ce4f66133e6d426dfd3323cef164703b2b79f6b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 16:56:35 +0000 Subject: [PATCH 091/171] Add unit tests --- tests/rest/media/__init__.py | 14 ++++ tests/rest/media/v1/__init__.py | 14 ++++ tests/rest/media/v1/test_media_storage.py | 81 +++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 tests/rest/media/__init__.py create mode 100644 tests/rest/media/v1/__init__.py create mode 100644 tests/rest/media/v1/test_media_storage.py diff --git a/tests/rest/media/__init__.py b/tests/rest/media/__init__.py new file mode 100644 index 000000000..a354d38ca --- /dev/null +++ b/tests/rest/media/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/rest/media/v1/__init__.py b/tests/rest/media/v1/__init__.py new file mode 100644 index 000000000..a354d38ca --- /dev/null +++ b/tests/rest/media/v1/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py new file mode 100644 index 000000000..c4de18157 --- /dev/null +++ b/tests/rest/media/v1/test_media_storage.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from twisted.internet import defer + +from synapse.rest.media.v1._base import FileInfo +from synapse.rest.media.v1.media_storage import MediaStorage +from synapse.rest.media.v1.filepath import MediaFilePaths +from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend + +from tests import unittest + +import os +import shutil +import tempfile + + +class MediaStorageTests(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-") + + self.primary_base_path = os.path.join(self.test_dir, "primary") + self.secondary_base_path = os.path.join(self.test_dir, "secondary") + + storage_providers = [FileStorageProviderBackend( + self.primary_base_path, self.secondary_base_path + )] + + self.filepaths = MediaFilePaths(self.primary_base_path) + self.media_storage = MediaStorage( + self.primary_base_path, self.filepaths, storage_providers, + ) + + def tearDown(self): + shutil.rmtree(self.test_dir) + + @defer.inlineCallbacks + def test_ensure_media_is_in_local_cache(self): + media_id = "some_media_id" + test_body = "Test\n" + + # First we create a file that is in a storage provider but not in the + # local primary media store + rel_path = self.filepaths.local_media_filepath_rel(media_id) + secondary_path = os.path.join(self.secondary_base_path, rel_path) + + os.makedirs(os.path.dirname(secondary_path)) + + with open(secondary_path, "w") as f: + f.write(test_body) + + # Now we run ensure_media_is_in_local_cache, which should copy the file + # to the local cache. + file_info = FileInfo(None, media_id) + local_path = yield self.media_storage.ensure_media_is_in_local_cache(file_info) + + self.assertTrue(os.path.exists(local_path)) + + # Asserts the file is under the expected local cache directory + self.assertEquals( + os.path.commonprefix([self.primary_base_path, local_path]), + self.primary_base_path, + ) + + with open(local_path) as f: + body = f.read() + + self.assertEqual(test_body, body) From aae77da73ffc89c31d0b17fa8ce5d8b58605de63 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 17:11:20 +0000 Subject: [PATCH 092/171] Fixup comments --- synapse/config/repository.py | 6 +++++- synapse/rest/media/v1/storage_provider.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 81db0193f..8bbc16ba4 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -39,7 +39,11 @@ ThumbnailRequirement = namedtuple( ) MediaStorageProviderConfig = namedtuple( - "MediaStorageProviderConfig", ("store_local", "store_remote", "store_synchronous",) + "MediaStorageProviderConfig", ( + "store_local", # Whether to store newly uploaded local files + "store_remote", # Whether to store newly downloaded remote files + "store_synchronous", # Whether to wait for successful storage for local uploads + ), ) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 0074d2d42..9bf88f01f 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -99,7 +99,7 @@ class FileStorageProviderBackend(StorageProvider): Args: hs (HomeServer) - config: The config returned by `parse_config`, i + config: The config returned by `parse_config`. """ def __init__(self, hs, config): @@ -133,7 +133,7 @@ class FileStorageProviderBackend(StorageProvider): The returned value is passed into the constructor. - In this case we only care about a single param, the directory, so lets + In this case we only care about a single param, the directory, so let's just pull that out. """ return Config.ensure_directory(config["directory"]) From 3fe2bae857cda58055c32329e15d3fe9828cf8f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 17:11:45 +0000 Subject: [PATCH 093/171] Missing staticmethod --- synapse/rest/media/v1/storage_provider.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 9bf88f01f..0a84aba86 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -127,6 +127,7 @@ class FileStorageProviderBackend(StorageProvider): if os.path.isfile(backup_fname): return FileResponder(open(backup_fname, "rb")) + @staticmethod def parse_config(config): """Called on startup to parse config supplied. This should parse the config and raise if there is a problem. From 8e85220373ee7a0396f36ffd3fddab8b1d6a7a12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 17:12:35 +0000 Subject: [PATCH 094/171] Remove duplicate directory test --- synapse/config/repository.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 8bbc16ba4..364e823cd 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -84,10 +84,6 @@ class ContentRepositoryConfig(Config): self.media_store_path = self.ensure_directory(config["media_store_path"]) backup_media_store_path = config.get("backup_media_store_path") - if backup_media_store_path: - backup_media_store_path = self.ensure_directory( - self.backup_media_store_path - ) synchronous_backup_media_store = config.get( "synchronous_backup_media_store", False From d69768348fc053dd6e243479acceabbdd6167238 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 17:14:05 +0000 Subject: [PATCH 095/171] Fix passing wrong config to provider constructor --- synapse/config/repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 364e823cd..25ea77738 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -135,7 +135,7 @@ class ContentRepositoryConfig(Config): ) self.media_storage_providers.append( - (provider_class, provider_config, wrapper_config,) + (provider_class, parsed_config, wrapper_config,) ) self.uploads_path = self.ensure_directory(config["uploads_path"]) From cd871a305708bca1acb5289f93cb62c15ba438da Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 18:37:59 +0000 Subject: [PATCH 096/171] Fix storage provider bug introduced when renamed to store_local --- synapse/rest/media/v1/storage_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 0a84aba86..c188192f2 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -77,7 +77,7 @@ class StorageProviderWrapper(StorageProvider): self.store_remote = store_remote def store_file(self, path, file_info): - if not self.store: + if not file_info.server_name and not self.store_local: return defer.succeed(None) if file_info.server_name and not self.store_remote: From 28a6ccb49c57cc686761b9e674b501b3b402e616 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 00:19:58 +0000 Subject: [PATCH 097/171] add registrations_require_3pid lets homeservers specify a whitelist for 3PIDs that users are allowed to associate with. Typically useful for stopping people from registering with non-work emails --- synapse/api/errors.py | 1 + synapse/config/registration.py | 13 +++++ synapse/rest/client/v2_alpha/_base.py | 22 +++++++ synapse/rest/client/v2_alpha/account.py | 14 ++++- synapse/rest/client/v2_alpha/register.py | 73 ++++++++++++++++++++---- 5 files changed, 110 insertions(+), 13 deletions(-) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 79b35b3e7..46b0d7b34 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -46,6 +46,7 @@ class Codes(object): THREEPID_AUTH_FAILED = "M_THREEPID_AUTH_FAILED" THREEPID_IN_USE = "M_THREEPID_IN_USE" THREEPID_NOT_FOUND = "M_THREEPID_NOT_FOUND" + THREEPID_DENIED = "M_THREEPID_DENIED" INVALID_USERNAME = "M_INVALID_USERNAME" SERVER_NOT_TRUSTED = "M_SERVER_NOT_TRUSTED" diff --git a/synapse/config/registration.py b/synapse/config/registration.py index ef917fc9f..e5e4f7787 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -31,6 +31,7 @@ class RegistrationConfig(Config): strtobool(str(config["disable_registration"])) ) + self.registrations_require_3pid = config.get("registrations_require_3pid", []) self.registration_shared_secret = config.get("registration_shared_secret") self.bcrypt_rounds = config.get("bcrypt_rounds", 12) @@ -52,6 +53,18 @@ class RegistrationConfig(Config): # Enable registration for new users. enable_registration: False + # Mandate that registrations require a 3PID which matches one or more + # of these 3PIDs. N.B. regexp escape backslashes are doubled (once for + # YAML and once for the regexp itself) + # + # registrations_require_3pid: + # - medium: email + # pattern: ".*@matrix\\.org" + # - medium: email + # pattern: ".*@vector\\.im" + # - medium: msisdn + # pattern: "\\+44" + # If set, allows registration by anyone who also has the shared # secret, even if registration is otherwise disabled. registration_shared_secret: "%(registration_shared_secret)s" diff --git a/synapse/rest/client/v2_alpha/_base.py b/synapse/rest/client/v2_alpha/_base.py index 77434937f..7c46ef7ca 100644 --- a/synapse/rest/client/v2_alpha/_base.py +++ b/synapse/rest/client/v2_alpha/_base.py @@ -60,6 +60,28 @@ def set_timeline_upper_limit(filter_json, filter_timeline_limit): filter_timeline_limit) +def check_3pid_allowed(hs, medium, address): + # check whether the HS has whitelisted the given 3PID + + allow = False + if hs.config.registrations_require_3pid: + for constraint in hs.config.registrations_require_3pid: + logger.debug("Checking 3PID %s (%s) against %s (%s)" % ( + address, medium, constraint['pattern'], constraint['medium'] + ) + ) + if ( + medium == constraint['medium'] and + re.match(constraint['pattern'], address) + ): + allow = True + break + else: + allow = True + + return allow + + def interactive_auth_handler(orig): """Wraps an on_POST method to handle InteractiveAuthIncompleteErrors diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 385a3ad2e..66221e8f0 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -26,7 +26,7 @@ from synapse.http.servlet import ( ) from synapse.util.async import run_on_reactor from synapse.util.msisdn import phone_number_to_msisdn -from ._base import client_v2_patterns, interactive_auth_handler +from ._base import client_v2_patterns, interactive_auth_handler, check_3pid_allowed logger = logging.getLogger(__name__) @@ -47,6 +47,9 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): 'id_server', 'client_secret', 'email', 'send_attempt' ]) + if not check_3pid_allowed(self.hs, "email", body['email']): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'email', body['email'] ) @@ -78,6 +81,9 @@ class MsisdnPasswordRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) + if not check_3pid_allowed(self.hs, "msisdn", msisdn): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.datastore.get_user_id_by_threepid( 'msisdn', msisdn ) @@ -217,6 +223,9 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): if absent: raise SynapseError(400, "Missing params: %r" % absent, Codes.MISSING_PARAM) + if not check_3pid_allowed(self.hs, "email", body['email']): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.datastore.get_user_id_by_threepid( 'email', body['email'] ) @@ -255,6 +264,9 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) + if not check_3pid_allowed(self.hs, "msisdn", msisdn): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.datastore.get_user_id_by_threepid( 'msisdn', msisdn ) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index e9d88a889..762782c1f 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -27,9 +27,10 @@ from synapse.http.servlet import ( ) from synapse.util.msisdn import phone_number_to_msisdn -from ._base import client_v2_patterns, interactive_auth_handler +from ._base import client_v2_patterns, interactive_auth_handler, check_3pid_allowed import logging +import re import hmac from hashlib import sha1 from synapse.util.async import run_on_reactor @@ -70,6 +71,9 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): 'id_server', 'client_secret', 'email', 'send_attempt' ]) + if not check_3pid_allowed(self.hs, "email", body['email']): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'email', body['email'] ) @@ -105,6 +109,9 @@ class MsisdnRegisterRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) + if not check_3pid_allowed(self.hs, "msisdn", msisdn): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'msisdn', msisdn ) @@ -305,31 +312,73 @@ class RegisterRestServlet(RestServlet): if 'x_show_msisdn' in body and body['x_show_msisdn']: show_msisdn = True + require_email = False + require_msisdn = False + for constraint in self.hs.config.registrations_require_3pid: + if constraint['medium'] == 'email': + require_email = True + elif constraint['medium'] == 'msisdn': + require_msisdn = True + else: + logger.warn( + "Unrecognised 3PID medium %s in registrations_require_3pid" % + constraint['medium'] + ) + + flows = [] if self.hs.config.enable_registration_captcha: - flows = [ - [LoginType.RECAPTCHA], - [LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA], - ] + if not require_email and not require_msisdn: + flows.extend([[LoginType.RECAPTCHA]]) + if require_email or not require_msisdn: + flows.extend([[LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA]]) + if show_msisdn: + if not require_email or require_msisdn: + flows.extend([[LoginType.MSISDN, LoginType.RECAPTCHA]]) flows.extend([ - [LoginType.MSISDN, LoginType.RECAPTCHA], [LoginType.MSISDN, LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA], ]) else: - flows = [ - [LoginType.DUMMY], - [LoginType.EMAIL_IDENTITY], - ] + if not require_email and not require_msisdn: + flows.extend([[LoginType.DUMMY]]) + if require_email or not require_msisdn: + flows.extend([[LoginType.EMAIL_IDENTITY]]) + if show_msisdn: + if not require_email or require_msisdn: + flows.extend([[LoginType.MSISDN]]) flows.extend([ - [LoginType.MSISDN], - [LoginType.MSISDN, LoginType.EMAIL_IDENTITY], + [LoginType.MSISDN, LoginType.EMAIL_IDENTITY] ]) auth_result, params, session_id = yield self.auth_handler.check_auth( flows, body, self.hs.get_ip_from_request(request) ) + # doublecheck that we're not trying to register an denied 3pid. + # the user-facing checks should already have happened when we requested + # a 3PID token to validate them in /register/email/requestToken etc + + for constraint in self.hs.config.registrations_require_3pid: + if ( + constraint['medium'] == 'email' and + auth_result and LoginType.EMAIL_IDENTITY in auth_result and + re.match( + constraint['pattern'], + auth_result[LoginType.EMAIL_IDENTITY].threepid.address + ) + ): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + elif ( + constraint['medium'] == 'msisdn' and + auth_result and LoginType.MSISDN in auth_result and + re.match( + constraint['pattern'], + auth_result[LoginType.MSISDN].threepid.address + ) + ): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + if registered_user_id is not None: logger.info( "Already registered user ID %r for this session", From 81d037dbd8c6616b33339f198f70134f73bbff5f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 00:28:08 +0000 Subject: [PATCH 098/171] mock registrations_require_3pid --- tests/rest/client/v2_alpha/test_register.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 096f771be..8aba45651 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -49,6 +49,7 @@ class RegisterRestServletTestCase(unittest.TestCase): self.hs.get_auth_handler = Mock(return_value=self.auth_handler) self.hs.get_device_handler = Mock(return_value=self.device_handler) self.hs.config.enable_registration = True + self.hs.config.registrations_require_3pid = [] self.hs.config.auto_join_rooms = [] # init the thing we're testing From 0af58f14ee351e7d52d7139df9218ff692764f20 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 00:33:51 +0000 Subject: [PATCH 099/171] fix pep8 --- synapse/rest/client/v2_alpha/_base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/rest/client/v2_alpha/_base.py b/synapse/rest/client/v2_alpha/_base.py index 7c46ef7ca..b286ff0d9 100644 --- a/synapse/rest/client/v2_alpha/_base.py +++ b/synapse/rest/client/v2_alpha/_base.py @@ -68,8 +68,7 @@ def check_3pid_allowed(hs, medium, address): for constraint in hs.config.registrations_require_3pid: logger.debug("Checking 3PID %s (%s) against %s (%s)" % ( address, medium, constraint['pattern'], constraint['medium'] - ) - ) + )) if ( medium == constraint['medium'] and re.match(constraint['pattern'], address) From 9d332e0f797e4f302a08b3708df4ac8b42b08216 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 00:53:58 +0000 Subject: [PATCH 100/171] fix up v1, and improve errors --- synapse/handlers/register.py | 13 +++++++- synapse/rest/client/v1/register.py | 40 +++++++++++++++++------- synapse/rest/client/v2_alpha/account.py | 16 +++++++--- synapse/rest/client/v2_alpha/register.py | 16 +++++++--- 4 files changed, 65 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 5b808beac..157ebaf25 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -15,6 +15,7 @@ """Contains functions for registering clients.""" import logging +import re from twisted.internet import defer @@ -293,7 +294,7 @@ class RegistrationHandler(BaseHandler): """ for c in threepidCreds: - logger.info("validating theeepidcred sid %s on id server %s", + logger.info("validating threepidcred sid %s on id server %s", c['sid'], c['idServer']) try: identity_handler = self.hs.get_handlers().identity_handler @@ -307,6 +308,16 @@ class RegistrationHandler(BaseHandler): logger.info("got threepid with medium '%s' and address '%s'", threepid['medium'], threepid['address']) + for constraint in self.hs.config.registrations_require_3pid: + if ( + constraint['medium'] == 'email' and + threepid['medium'] == 'email' and + re.match(constraint['pattern'], threepid['address']) + ): + raise RegistrationError( + 403, "Third party identifier is not allowed" + ) + @defer.inlineCallbacks def bind_emails(self, user_id, threepidCreds): """Links emails with a user ID and informs an identity server. diff --git a/synapse/rest/client/v1/register.py b/synapse/rest/client/v1/register.py index 32ed1d3ab..f793542ad 100644 --- a/synapse/rest/client/v1/register.py +++ b/synapse/rest/client/v1/register.py @@ -70,10 +70,24 @@ class RegisterRestServlet(ClientV1RestServlet): self.handlers = hs.get_handlers() def on_GET(self, request): + + require_email = False + require_msisdn = False + for constraint in self.hs.config.registrations_require_3pid: + if constraint['medium'] == 'email': + require_email = True + elif constraint['medium'] == 'msisdn': + require_msisdn = True + else: + logger.warn( + "Unrecognised 3PID medium %s in registrations_require_3pid" % + constraint['medium'] + ) + + flows = [] if self.hs.config.enable_registration_captcha: - return ( - 200, - {"flows": [ + if require_email or not require_msisdn: + flows.extend([ { "type": LoginType.RECAPTCHA, "stages": [ @@ -82,27 +96,31 @@ class RegisterRestServlet(ClientV1RestServlet): LoginType.PASSWORD ] }, + ]) + if not require_email and not require_msisdn: + flows.extend([ { "type": LoginType.RECAPTCHA, "stages": [LoginType.RECAPTCHA, LoginType.PASSWORD] } - ]} - ) + ]) else: - return ( - 200, - {"flows": [ + if require_email or not require_msisdn: + flows.extend([ { "type": LoginType.EMAIL_IDENTITY, "stages": [ LoginType.EMAIL_IDENTITY, LoginType.PASSWORD ] - }, + } + ]) + if not require_email and not require_msisdn: + flows.extend([ { "type": LoginType.PASSWORD } - ]} - ) + ]) + return (200, {"flows": flows}) @defer.inlineCallbacks def on_POST(self, request): diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 66221e8f0..2977ad439 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -48,7 +48,9 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): ]) if not check_3pid_allowed(self.hs, "email", body['email']): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'email', body['email'] @@ -82,7 +84,9 @@ class MsisdnPasswordRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) if not check_3pid_allowed(self.hs, "msisdn", msisdn): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.datastore.get_user_id_by_threepid( 'msisdn', msisdn @@ -224,7 +228,9 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): raise SynapseError(400, "Missing params: %r" % absent, Codes.MISSING_PARAM) if not check_3pid_allowed(self.hs, "email", body['email']): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.datastore.get_user_id_by_threepid( 'email', body['email'] @@ -265,7 +271,9 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) if not check_3pid_allowed(self.hs, "msisdn", msisdn): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.datastore.get_user_id_by_threepid( 'msisdn', msisdn diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 762782c1f..898d8b133 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -72,7 +72,9 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): ]) if not check_3pid_allowed(self.hs, "email", body['email']): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'email', body['email'] @@ -110,7 +112,9 @@ class MsisdnRegisterRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) if not check_3pid_allowed(self.hs, "msisdn", msisdn): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'msisdn', msisdn @@ -368,7 +372,9 @@ class RegisterRestServlet(RestServlet): auth_result[LoginType.EMAIL_IDENTITY].threepid.address ) ): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) elif ( constraint['medium'] == 'msisdn' and auth_result and LoginType.MSISDN in auth_result and @@ -377,7 +383,9 @@ class RegisterRestServlet(RestServlet): auth_result[LoginType.MSISDN].threepid.address ) ): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) if registered_user_id is not None: logger.info( From 447f4f0d5f136dcadd5fdc286ded2d6e24a3f686 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 15:33:55 +0000 Subject: [PATCH 101/171] rewrite based on PR feedback: * [ ] split config options into allowed_local_3pids and registrations_require_3pid * [ ] simplify and comment logic for picking registration flows * [ ] fix docstring and move check_3pid_allowed into a new util module * [ ] use check_3pid_allowed everywhere @erikjohnston PTAL --- synapse/config/registration.py | 12 +++- synapse/handlers/register.py | 15 ++--- synapse/rest/client/v1/register.py | 20 +++---- synapse/rest/client/v2_alpha/_base.py | 21 ------- synapse/rest/client/v2_alpha/account.py | 3 +- synapse/rest/client/v2_alpha/register.py | 73 +++++++++++------------- synapse/util/threepids.py | 45 +++++++++++++++ 7 files changed, 101 insertions(+), 88 deletions(-) create mode 100644 synapse/util/threepids.py diff --git a/synapse/config/registration.py b/synapse/config/registration.py index e5e4f7787..336959094 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -32,6 +32,7 @@ class RegistrationConfig(Config): ) self.registrations_require_3pid = config.get("registrations_require_3pid", []) + self.allowed_local_3pids = config.get("allowed_local_3pids", []) self.registration_shared_secret = config.get("registration_shared_secret") self.bcrypt_rounds = config.get("bcrypt_rounds", 12) @@ -53,11 +54,16 @@ class RegistrationConfig(Config): # Enable registration for new users. enable_registration: False - # Mandate that registrations require a 3PID which matches one or more - # of these 3PIDs. N.B. regexp escape backslashes are doubled (once for - # YAML and once for the regexp itself) + # The user must provide all of the below types of 3PID when registering. # # registrations_require_3pid: + # - email + # - msisdn + + # Mandate that users are only allowed to associate certain formats of + # 3PIDs with accounts on this server. + # + # allowed_local_3pids: # - medium: email # pattern: ".*@matrix\\.org" # - medium: email diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 157ebaf25..9021d4d57 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -15,7 +15,6 @@ """Contains functions for registering clients.""" import logging -import re from twisted.internet import defer @@ -26,6 +25,7 @@ from synapse.http.client import CaptchaServerHttpClient from synapse import types from synapse.types import UserID from synapse.util.async import run_on_reactor +from synapse.util.threepids import check_3pid_allowed from ._base import BaseHandler logger = logging.getLogger(__name__) @@ -308,15 +308,10 @@ class RegistrationHandler(BaseHandler): logger.info("got threepid with medium '%s' and address '%s'", threepid['medium'], threepid['address']) - for constraint in self.hs.config.registrations_require_3pid: - if ( - constraint['medium'] == 'email' and - threepid['medium'] == 'email' and - re.match(constraint['pattern'], threepid['address']) - ): - raise RegistrationError( - 403, "Third party identifier is not allowed" - ) + if not check_3pid_allowed(self.hs, threepid['medium'], threepid['address']): + raise RegistrationError( + 403, "Third party identifier is not allowed" + ) @defer.inlineCallbacks def bind_emails(self, user_id, threepidCreds): diff --git a/synapse/rest/client/v1/register.py b/synapse/rest/client/v1/register.py index f793542ad..5c5fa8f7a 100644 --- a/synapse/rest/client/v1/register.py +++ b/synapse/rest/client/v1/register.py @@ -71,22 +71,13 @@ class RegisterRestServlet(ClientV1RestServlet): def on_GET(self, request): - require_email = False - require_msisdn = False - for constraint in self.hs.config.registrations_require_3pid: - if constraint['medium'] == 'email': - require_email = True - elif constraint['medium'] == 'msisdn': - require_msisdn = True - else: - logger.warn( - "Unrecognised 3PID medium %s in registrations_require_3pid" % - constraint['medium'] - ) + require_email = 'email' in self.hs.config.registrations_require_3pid + require_msisdn = 'msisdn' in self.hs.config.registrations_require_3pid flows = [] if self.hs.config.enable_registration_captcha: - if require_email or not require_msisdn: + # only support the email-only flow if we don't require MSISDN 3PIDs + if not require_msisdn: flows.extend([ { "type": LoginType.RECAPTCHA, @@ -97,6 +88,7 @@ class RegisterRestServlet(ClientV1RestServlet): ] }, ]) + # only support 3PIDless registration if no 3PIDs are required if not require_email and not require_msisdn: flows.extend([ { @@ -105,6 +97,7 @@ class RegisterRestServlet(ClientV1RestServlet): } ]) else: + # only support the email-only flow if we don't require MSISDN 3PIDs if require_email or not require_msisdn: flows.extend([ { @@ -114,6 +107,7 @@ class RegisterRestServlet(ClientV1RestServlet): ] } ]) + # only support 3PIDless registration if no 3PIDs are required if not require_email and not require_msisdn: flows.extend([ { diff --git a/synapse/rest/client/v2_alpha/_base.py b/synapse/rest/client/v2_alpha/_base.py index b286ff0d9..77434937f 100644 --- a/synapse/rest/client/v2_alpha/_base.py +++ b/synapse/rest/client/v2_alpha/_base.py @@ -60,27 +60,6 @@ def set_timeline_upper_limit(filter_json, filter_timeline_limit): filter_timeline_limit) -def check_3pid_allowed(hs, medium, address): - # check whether the HS has whitelisted the given 3PID - - allow = False - if hs.config.registrations_require_3pid: - for constraint in hs.config.registrations_require_3pid: - logger.debug("Checking 3PID %s (%s) against %s (%s)" % ( - address, medium, constraint['pattern'], constraint['medium'] - )) - if ( - medium == constraint['medium'] and - re.match(constraint['pattern'], address) - ): - allow = True - break - else: - allow = True - - return allow - - def interactive_auth_handler(orig): """Wraps an on_POST method to handle InteractiveAuthIncompleteErrors diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 2977ad439..514bb37da 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -26,7 +26,8 @@ from synapse.http.servlet import ( ) from synapse.util.async import run_on_reactor from synapse.util.msisdn import phone_number_to_msisdn -from ._base import client_v2_patterns, interactive_auth_handler, check_3pid_allowed +from synapse.util.threepids import check_3pid_allowed +from ._base import client_v2_patterns, interactive_auth_handler logger = logging.getLogger(__name__) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 898d8b133..c3479e29d 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -26,11 +26,11 @@ from synapse.http.servlet import ( RestServlet, parse_json_object_from_request, assert_params_in_request, parse_string ) from synapse.util.msisdn import phone_number_to_msisdn +from synapse.util.threepids import check_3pid_allowed -from ._base import client_v2_patterns, interactive_auth_handler, check_3pid_allowed +from ._base import client_v2_patterns, interactive_auth_handler import logging -import re import hmac from hashlib import sha1 from synapse.util.async import run_on_reactor @@ -316,41 +316,41 @@ class RegisterRestServlet(RestServlet): if 'x_show_msisdn' in body and body['x_show_msisdn']: show_msisdn = True - require_email = False - require_msisdn = False - for constraint in self.hs.config.registrations_require_3pid: - if constraint['medium'] == 'email': - require_email = True - elif constraint['medium'] == 'msisdn': - require_msisdn = True - else: - logger.warn( - "Unrecognised 3PID medium %s in registrations_require_3pid" % - constraint['medium'] - ) + # FIXME: need a better error than "no auth flow found" for scenarios + # where we required 3PID for registration but the user didn't give one + require_email = 'email' in self.hs.config.registrations_require_3pid + require_msisdn = 'msisdn' in self.hs.config.registrations_require_3pid flows = [] if self.hs.config.enable_registration_captcha: + # only support 3PIDless registration if no 3PIDs are required if not require_email and not require_msisdn: flows.extend([[LoginType.RECAPTCHA]]) - if require_email or not require_msisdn: + # only support the email-only flow if we don't require MSISDN 3PIDs + if not require_msisdn: flows.extend([[LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA]]) if show_msisdn: - if not require_email or require_msisdn: + # only support the MSISDN-only flow if we don't require email 3PIDs + if not require_email: flows.extend([[LoginType.MSISDN, LoginType.RECAPTCHA]]) + # always let users provide both MSISDN & email flows.extend([ [LoginType.MSISDN, LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA], ]) else: + # only support 3PIDless registration if no 3PIDs are required if not require_email and not require_msisdn: flows.extend([[LoginType.DUMMY]]) - if require_email or not require_msisdn: + # only support the email-only flow if we don't require MSISDN 3PIDs + if not require_msisdn: flows.extend([[LoginType.EMAIL_IDENTITY]]) if show_msisdn: + # only support the MSISDN-only flow if we don't require email 3PIDs if not require_email or require_msisdn: flows.extend([[LoginType.MSISDN]]) + # always let users provide both MSISDN & email flows.extend([ [LoginType.MSISDN, LoginType.EMAIL_IDENTITY] ]) @@ -359,30 +359,23 @@ class RegisterRestServlet(RestServlet): flows, body, self.hs.get_ip_from_request(request) ) - # doublecheck that we're not trying to register an denied 3pid. - # the user-facing checks should already have happened when we requested - # a 3PID token to validate them in /register/email/requestToken etc + # Check that we're not trying to register a denied 3pid. + # + # the user-facing checks will probably already have happened in + # /register/email/requestToken when we requested a 3pid, but that's not + # guaranteed. - for constraint in self.hs.config.registrations_require_3pid: - if ( - constraint['medium'] == 'email' and - auth_result and LoginType.EMAIL_IDENTITY in auth_result and - re.match( - constraint['pattern'], - auth_result[LoginType.EMAIL_IDENTITY].threepid.address - ) - ): - raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED - ) - elif ( - constraint['medium'] == 'msisdn' and - auth_result and LoginType.MSISDN in auth_result and - re.match( - constraint['pattern'], - auth_result[LoginType.MSISDN].threepid.address - ) - ): + if ( + auth_result and + ( + LoginType.EMAIL_IDENTITY in auth_result or + LoginType.EMAIL_MSISDN in auth_result + ) + ): + medium = auth_result[LoginType.EMAIL_IDENTITY].threepid['medium'] + address = auth_result[LoginType.EMAIL_IDENTITY].threepid['address'] + + if not check_3pid_allowed(self.hs, medium, address): raise SynapseError( 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED ) diff --git a/synapse/util/threepids.py b/synapse/util/threepids.py new file mode 100644 index 000000000..e921b9779 --- /dev/null +++ b/synapse/util/threepids.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import re + +logger = logging.getLogger(__name__) + + +def check_3pid_allowed(hs, medium, address): + """Checks whether a given format of 3PID is allowed to be used on this HS + + Args: + hs (synapse.server.HomeServer): server + medium (str): 3pid medium - e.g. email, msisdn + address (str): address within that medium (e.g. "wotan@matrix.org") + msisdns need to first have been canonicalised + """ + + if hs.config.allowed_local_3pids: + for constraint in hs.config.allowed_local_3pids: + logger.debug("Checking 3PID %s (%s) against %s (%s)" % ( + address, medium, constraint['pattern'], constraint['medium'] + )) + if ( + medium == constraint['medium'] and + re.match(constraint['pattern'], address) + ): + return True + else: + return True + + return False From 293380bef761b67479a90d2837bbf6dfa5a70a90 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 15:38:53 +0000 Subject: [PATCH 102/171] trailing commas --- synapse/rest/client/v2_alpha/account.py | 8 ++++---- synapse/rest/client/v2_alpha/register.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 514bb37da..30523995a 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -50,7 +50,7 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "email", body['email']): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( @@ -86,7 +86,7 @@ class MsisdnPasswordRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "msisdn", msisdn): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.datastore.get_user_id_by_threepid( @@ -230,7 +230,7 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "email", body['email']): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.datastore.get_user_id_by_threepid( @@ -273,7 +273,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "msisdn", msisdn): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.datastore.get_user_id_by_threepid( diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index c3479e29d..bf68e34a5 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -73,7 +73,7 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "email", body['email']): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( @@ -113,7 +113,7 @@ class MsisdnRegisterRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "msisdn", msisdn): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( @@ -377,7 +377,7 @@ class RegisterRestServlet(RestServlet): if not check_3pid_allowed(self.hs, medium, address): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) if registered_user_id is not None: From 8fe253f19b1c61c38111948cce00a7d260d2925a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 18:23:45 +0000 Subject: [PATCH 103/171] fix PR nitpicking --- synapse/util/threepids.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/synapse/util/threepids.py b/synapse/util/threepids.py index e921b9779..75efa0117 100644 --- a/synapse/util/threepids.py +++ b/synapse/util/threepids.py @@ -27,13 +27,16 @@ def check_3pid_allowed(hs, medium, address): medium (str): 3pid medium - e.g. email, msisdn address (str): address within that medium (e.g. "wotan@matrix.org") msisdns need to first have been canonicalised + Returns: + bool: whether the 3PID medium/address is allowed to be added to this HS """ if hs.config.allowed_local_3pids: for constraint in hs.config.allowed_local_3pids: - logger.debug("Checking 3PID %s (%s) against %s (%s)" % ( - address, medium, constraint['pattern'], constraint['medium'] - )) + logger.debug( + "Checking 3PID %s (%s) against %s (%s)", + address, medium, constraint['pattern'], constraint['medium'], + ) if ( medium == constraint['medium'] and re.match(constraint['pattern'], address) From 62d7d66ae5592175bb35a2a8d2f69b1924d6e1f2 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 18:23:56 +0000 Subject: [PATCH 104/171] oops, check all login types --- synapse/rest/client/v2_alpha/register.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index bf68e34a5..4e73f9a40 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -365,20 +365,17 @@ class RegisterRestServlet(RestServlet): # /register/email/requestToken when we requested a 3pid, but that's not # guaranteed. - if ( - auth_result and - ( - LoginType.EMAIL_IDENTITY in auth_result or - LoginType.EMAIL_MSISDN in auth_result - ) - ): - medium = auth_result[LoginType.EMAIL_IDENTITY].threepid['medium'] - address = auth_result[LoginType.EMAIL_IDENTITY].threepid['address'] + if auth_result: + for login_type in [LoginType.EMAIL_IDENTITY, LoginType.EMAIL_MSISDN]: + if login_type in auth_result: + medium = auth_result[login_type].threepid['medium'] + address = auth_result[login_type].threepid['address'] - if not check_3pid_allowed(self.hs, medium, address): - raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, - ) + if not check_3pid_allowed(self.hs, medium, address): + raise SynapseError( + 403, "Third party identifier is not allowed", + Codes.THREEPID_DENIED, + ) if registered_user_id is not None: logger.info( From ad7ec63d08c9d814766ea4764e187bc7ba5589d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 18:29:39 +0000 Subject: [PATCH 105/171] Use the right path for url_preview thumbnails This was introduced by #2627: we were overwriting the original media for url previews with the thumbnails :/ (fixes https://github.com/vector-im/riot-web/issues/6012, hopefully) --- synapse/rest/media/v1/media_storage.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 001e84578..041ae396c 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -164,6 +164,14 @@ class MediaStorage(object): str """ if file_info.url_cache: + if file_info.thumbnail: + return self.filepaths.url_cache_thumbnail_rel( + media_id=file_info.file_id, + width=file_info.thumbnail_width, + height=file_info.thumbnail_height, + content_type=file_info.thumbnail_type, + method=file_info.thumbnail_method, + ) return self.filepaths.url_cache_filepath_rel(file_info.file_id) if file_info.server_name: From 49fce046243ae3e2c38ac6ac39001172667b8dfa Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 19:55:33 +0000 Subject: [PATCH 106/171] fix typo (thanks sytest) --- synapse/rest/client/v2_alpha/register.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 4e73f9a40..3abfe3547 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -366,7 +366,7 @@ class RegisterRestServlet(RestServlet): # guaranteed. if auth_result: - for login_type in [LoginType.EMAIL_IDENTITY, LoginType.EMAIL_MSISDN]: + for login_type in [LoginType.EMAIL_IDENTITY, LoginType.MSISDN]: if login_type in auth_result: medium = auth_result[login_type].threepid['medium'] address = auth_result[login_type].threepid['address'] From 87b7d727605c8e122adb768b7487dfcae830593f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 23:51:04 +0000 Subject: [PATCH 107/171] Add some comments about the reactor tick time metric --- synapse/metrics/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 2265e6e8d..e0cfb7d08 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -146,10 +146,15 @@ def runUntilCurrentTimer(func): num_pending += 1 num_pending += len(reactor.threadCallQueue) - start = time.time() * 1000 ret = func(*args, **kwargs) end = time.time() * 1000 + + # record the amount of wallclock time spent running pending calls. + # This is a proxy for the actual amount of time between reactor polls, + # since about 25% of time is actually spent running things triggered by + # I/O events, but that is harder to capture without rewriting half the + # reactor. tick_time.inc_by(end - start) pending_calls_metric.inc_by(num_pending) From 2c8526cac795fa2aa795e1a1aaae2ffb2558824d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 20 Jan 2018 00:55:44 +0000 Subject: [PATCH 108/171] Use a connection pool for the SimpleHttpClient In particular I hope this will help the pusher, which makes many requests to sygnal, and is currently negotiating SSL for each one. --- synapse/http/client.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index 4abb479ae..930d71301 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -30,6 +30,7 @@ from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web.client import ( BrowserLikeRedirectAgent, ContentDecoderAgent, GzipDecoder, Agent, readBody, PartialDownloadError, + HTTPConnectionPool, ) from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer from twisted.web.http import PotentialDataLoss @@ -64,13 +65,19 @@ class SimpleHttpClient(object): """ def __init__(self, hs): self.hs = hs + + pool = HTTPConnectionPool(reactor) + pool.maxPersistentPerHost = 5 + pool.cachedConnectionTimeout = 2 * 60 + # The default context factory in Twisted 14.0.0 (which we require) is # BrowserLikePolicyForHTTPS which will do regular cert validation # 'like a browser' self.agent = Agent( reactor, connectTimeout=15, - contextFactory=hs.get_http_client_context_factory() + contextFactory=hs.get_http_client_context_factory(), + pool=pool, ) self.user_agent = hs.version_string self.clock = hs.get_clock() From 5552ed9a7fb1300142a7aebe7fc85b0bd2535bcf Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sat, 20 Jan 2018 22:25:23 -0700 Subject: [PATCH 109/171] Add an admin route to get all the media in a room This is intended to be used by administrators to monitor the media that is passing through their server, if they wish. Signed-off-by: Travis Ralston --- synapse/rest/client/v1/admin.py | 22 ++++++ synapse/storage/room.py | 133 ++++++++++++++++++-------------- 2 files changed, 98 insertions(+), 57 deletions(-) diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 5022808ea..0615e5d80 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -289,6 +289,27 @@ class QuarantineMediaInRoom(ClientV1RestServlet): defer.returnValue((200, {"num_quarantined": num_quarantined})) +class ListMediaInRoom(ClientV1RestServlet): + """Lists all of the media in a given room. + """ + PATTERNS = client_path_patterns("/admin/room/(?P[^/]+)/media") + + def __init__(self, hs): + super(ListMediaInRoom, self).__init__(hs) + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_GET(self, request, room_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + if not is_admin: + raise AuthError(403, "You are not a server admin") + + local_mxcs, remote_mxcs = yield self.store.get_media_mxcs_in_room(room_id) + + defer.returnValue((200, {"local": local_mxcs, "remote": remote_mxcs})) + + class ResetPasswordRestServlet(ClientV1RestServlet): """Post request to allow an administrator reset password for a user. This needs user to have administrator access in Synapse. @@ -487,3 +508,4 @@ def register_servlets(hs, http_server): SearchUsersRestServlet(hs).register(http_server) ShutdownRoomRestServlet(hs).register(http_server) QuarantineMediaInRoom(hs).register(http_server) + ListMediaInRoom(hs).register(http_server) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b..cd6899a4b 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -533,73 +533,92 @@ class RoomStore(SQLBaseStore): ) self.is_room_blocked.invalidate((room_id,)) + def get_media_mxcs_in_room(self, room_id): + def _get_media_ids_in_room(txn): + local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + local_media_mxcs = [] + remote_media_mxcs = [] + + # Convert the IDs to MXC URIs + for media_id in local_media_ids: + local_media_mxcs.append("mxc://%s/%s" % (self.hostname, media_id)) + for hostname, media_id in remote_media_ids: + remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) + + return local_media_mxcs, remote_media_mxcs + return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + def quarantine_media_ids_in_room(self, room_id, quarantined_by): """For a room loops through all events with media and quarantines the associated media """ - def _get_media_ids_in_room(txn): - mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") - - next_token = self.get_current_events_token() + 1 - + def _quarantine_media_in_room(txn): + local_media_mxcs, remote_media_mxcs = self._get_media_ids_in_room(txn, room_id) total_media_quarantined = 0 - while next_token: - sql = """ - SELECT stream_ordering, content FROM events - WHERE room_id = ? - AND stream_ordering < ? - AND contains_url = ? AND outlier = ? - ORDER BY stream_ordering DESC - LIMIT ? + # Now update all the tables to set the quarantined_by flag + + txn.executemany(""" + UPDATE local_media_repository + SET quarantined_by = ? + WHERE media_id = ? + """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) + + txn.executemany( """ - txn.execute(sql, (room_id, next_token, True, False, 100)) - - next_token = None - local_media_mxcs = [] - remote_media_mxcs = [] - for stream_ordering, content_json in txn: - next_token = stream_ordering - content = json.loads(content_json) - - content_url = content.get("url") - thumbnail_url = content.get("info", {}).get("thumbnail_url") - - for url in (content_url, thumbnail_url): - if not url: - continue - matches = mxc_re.match(url) - if matches: - hostname = matches.group(1) - media_id = matches.group(2) - if hostname == self.hostname: - local_media_mxcs.append(media_id) - else: - remote_media_mxcs.append((hostname, media_id)) - - # Now update all the tables to set the quarantined_by flag - - txn.executemany(""" - UPDATE local_media_repository + UPDATE remote_media_cache SET quarantined_by = ? - WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) - - txn.executemany( - """ - UPDATE remote_media_cache - SET quarantined_by = ? - WHERE media_origin AND media_id = ? - """, - ( - (quarantined_by, origin, media_id) - for origin, media_id in remote_media_mxcs - ) + WHERE media_origin AND media_id = ? + """, + ( + (quarantined_by, origin, media_id) + for origin, media_id in remote_media_mxcs ) + ) - total_media_quarantined += len(local_media_mxcs) - total_media_quarantined += len(remote_media_mxcs) + total_media_quarantined += len(local_media_mxcs) + total_media_quarantined += len(remote_media_mxcs) return total_media_quarantined - return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room) + + def _get_media_ids_in_room(self, txn, room_id): + mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") + + next_token = self.get_current_events_token() + 1 + local_media_mxcs = [] + remote_media_mxcs = [] + + while next_token: + sql = """ + SELECT stream_ordering, content FROM events + WHERE room_id = ? + AND stream_ordering < ? + AND contains_url = ? AND outlier = ? + ORDER BY stream_ordering DESC + LIMIT ? + """ + txn.execute(sql, (room_id, next_token, True, False, 100)) + + next_token = None + for stream_ordering, content_json in txn: + next_token = stream_ordering + content = json.loads(content_json) + + content_url = content.get("url") + thumbnail_url = content.get("info", {}).get("thumbnail_url") + + for url in (content_url, thumbnail_url): + if not url: + continue + matches = mxc_re.match(url) + if matches: + hostname = matches.group(1) + media_id = matches.group(2) + if hostname == self.hostname: + local_media_mxcs.append(media_id) + else: + remote_media_mxcs.append((hostname, media_id)) + + return local_media_mxcs, remote_media_mxcs From a94d9b6b825c6b2db375460268567e637e10709a Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sat, 20 Jan 2018 22:49:46 -0700 Subject: [PATCH 110/171] Appease the linter These are ids anyways, not mxc uris. Signed-off-by: Travis Ralston --- synapse/storage/room.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index cd6899a4b..d1d63f404 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -553,7 +553,7 @@ class RoomStore(SQLBaseStore): the associated media """ def _quarantine_media_in_room(txn): - local_media_mxcs, remote_media_mxcs = self._get_media_ids_in_room(txn, room_id) + local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -562,7 +562,7 @@ class RoomStore(SQLBaseStore): UPDATE local_media_repository SET quarantined_by = ? WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) + """, ((quarantined_by, media_id) for media_id in local_media_ids)) txn.executemany( """ @@ -572,12 +572,12 @@ class RoomStore(SQLBaseStore): """, ( (quarantined_by, origin, media_id) - for origin, media_id in remote_media_mxcs + for origin, media_id in remote_media_ids ) ) - total_media_quarantined += len(local_media_mxcs) - total_media_quarantined += len(remote_media_mxcs) + total_media_quarantined += len(local_media_ids) + total_media_quarantined += len(remote_media_ids) return total_media_quarantined From 5c431f421c0edcdc4582da7e0d780355ebf647e1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 22 Jan 2018 16:45:43 +0000 Subject: [PATCH 111/171] Matthew's fixes to the unit tests Extracted from https://github.com/matrix-org/synapse/pull/2820 --- tests/replication/slave/storage/_base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 81063f19a..74f104e3b 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -15,6 +15,8 @@ from twisted.internet import defer, reactor from tests import unittest +import tempfile + from mock import Mock, NonCallableMock from tests.utils import setup_test_homeserver from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory @@ -41,7 +43,9 @@ class BaseSlavedStoreTestCase(unittest.TestCase): self.event_id = 0 server_factory = ReplicationStreamProtocolFactory(self.hs) - listener = reactor.listenUNIX("\0xxx", server_factory) + # XXX: mktemp is unsafe and should never be used. but we're just a test. + path = tempfile.mktemp(prefix="base_slaved_store_test_case_socket") + listener = reactor.listenUNIX(path, server_factory) self.addCleanup(listener.stopListening) self.streamer = server_factory.streamer @@ -49,7 +53,7 @@ class BaseSlavedStoreTestCase(unittest.TestCase): client_factory = ReplicationClientFactory( self.hs, "client_name", self.replication_handler ) - client_connector = reactor.connectUNIX("\0xxx", client_factory) + client_connector = reactor.connectUNIX(path, client_factory) self.addCleanup(client_factory.stopTrying) self.addCleanup(client_connector.disconnect) From ab9f844aaf3662a64dbc4c56077e9fa37bc7d5d0 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 22 Jan 2018 19:11:18 +0100 Subject: [PATCH 112/171] Add federation_domain_whitelist option (#2820) Add federation_domain_whitelist gives a way to restrict which domains your HS is allowed to federate with. useful mainly for gracefully preventing a private but internet-connected HS from trying to federate to the wider public Matrix network --- synapse/api/errors.py | 26 ++++++++++++++++++++ synapse/config/server.py | 22 +++++++++++++++++ synapse/federation/federation_client.py | 5 +++- synapse/federation/transaction_queue.py | 4 +++- synapse/federation/transport/client.py | 3 +++ synapse/federation/transport/server.py | 9 ++++++- synapse/handlers/device.py | 4 ++++ synapse/handlers/e2e_keys.py | 8 ++++++- synapse/handlers/federation.py | 4 ++++ synapse/http/matrixfederationclient.py | 28 +++++++++++++++++++++- synapse/rest/key/v2/remote_key_resource.py | 8 +++++++ synapse/rest/media/v1/media_repository.py | 19 +++++++++++++-- synapse/util/retryutils.py | 12 ++++++++++ tests/utils.py | 1 + 14 files changed, 146 insertions(+), 7 deletions(-) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 46b0d7b34..aa15f73f3 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -141,6 +141,32 @@ class RegistrationError(SynapseError): pass +class FederationDeniedError(SynapseError): + """An error raised when the server tries to federate with a server which + is not on its federation whitelist. + + Attributes: + destination (str): The destination which has been denied + """ + + def __init__(self, destination): + """Raised by federation client or server to indicate that we are + are deliberately not attempting to contact a given server because it is + not on our federation whitelist. + + Args: + destination (str): the domain in question + """ + + self.destination = destination + + super(FederationDeniedError, self).__init__( + code=403, + msg="Federation denied with %s." % (self.destination,), + errcode=Codes.FORBIDDEN, + ) + + class InteractiveAuthIncompleteError(Exception): """An error raised when UI auth is not yet complete diff --git a/synapse/config/server.py b/synapse/config/server.py index 436dd8a6f..8f0b6d1f2 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -55,6 +55,17 @@ class ServerConfig(Config): "block_non_admin_invites", False, ) + # FIXME: federation_domain_whitelist needs sytests + self.federation_domain_whitelist = None + federation_domain_whitelist = config.get( + "federation_domain_whitelist", None + ) + # turn the whitelist into a hash for speed of lookup + if federation_domain_whitelist is not None: + self.federation_domain_whitelist = {} + for domain in federation_domain_whitelist: + self.federation_domain_whitelist[domain] = True + if self.public_baseurl is not None: if self.public_baseurl[-1] != '/': self.public_baseurl += '/' @@ -210,6 +221,17 @@ class ServerConfig(Config): # (except those sent by local server admins). The default is False. # block_non_admin_invites: True + # Restrict federation to the following whitelist of domains. + # N.B. we recommend also firewalling your federation listener to limit + # inbound federation traffic as early as possible, rather than relying + # purely on this application-layer restriction. If not specified, the + # default is to whitelist everything. + # + # federation_domain_whitelist: + # - lon.example.com + # - nyc.example.com + # - syd.example.com + # List of ports that Synapse should listen on, their purpose and their # configuration. listeners: diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index b1fe03f70..813907f7f 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -23,7 +23,7 @@ from twisted.internet import defer from synapse.api.constants import Membership from synapse.api.errors import ( - CodeMessageException, HttpResponseException, SynapseError, + CodeMessageException, HttpResponseException, SynapseError, FederationDeniedError ) from synapse.events import builder from synapse.federation.federation_base import ( @@ -266,6 +266,9 @@ class FederationClient(FederationBase): except NotRetryingDestination as e: logger.info(e.message) continue + except FederationDeniedError as e: + logger.info(e.message) + continue except Exception as e: pdu_attempts[destination] = now diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 9d39f4658..a141ec995 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -19,7 +19,7 @@ from twisted.internet import defer from .persistence import TransactionActions from .units import Transaction, Edu -from synapse.api.errors import HttpResponseException +from synapse.api.errors import HttpResponseException, FederationDeniedError from synapse.util import logcontext, PreserveLoggingContext from synapse.util.async import run_on_reactor from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter @@ -490,6 +490,8 @@ class TransactionQueue(object): (e.retry_last_ts + e.retry_interval) / 1000.0 ), ) + except FederationDeniedError as e: + logger.info(e) except Exception as e: logger.warn( "TX [%s] Failed to send transaction: %s", diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 1f3ce238f..5488e8298 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -212,6 +212,9 @@ class TransportLayerClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if the remote destination + is not in our federation whitelist """ valid_memberships = {Membership.JOIN, Membership.LEAVE} if membership not in valid_memberships: diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 2b02b021e..06c16ba4f 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -16,7 +16,7 @@ from twisted.internet import defer from synapse.api.urls import FEDERATION_PREFIX as PREFIX -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import Codes, SynapseError, FederationDeniedError from synapse.http.server import JsonResource from synapse.http.servlet import ( parse_json_object_from_request, parse_integer_from_args, parse_string_from_args, @@ -81,6 +81,7 @@ class Authenticator(object): self.keyring = hs.get_keyring() self.server_name = hs.hostname self.store = hs.get_datastore() + self.federation_domain_whitelist = hs.config.federation_domain_whitelist # A method just so we can pass 'self' as the authenticator to the Servlets @defer.inlineCallbacks @@ -92,6 +93,12 @@ class Authenticator(object): "signatures": {}, } + if ( + self.federation_domain_whitelist is not None and + self.server_name not in self.federation_domain_whitelist + ): + raise FederationDeniedError(self.server_name) + if content is not None: json_request["content"] = content diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 2152efc69..0e8345385 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -14,6 +14,7 @@ # limitations under the License. from synapse.api import errors from synapse.api.constants import EventTypes +from synapse.api.errors import FederationDeniedError from synapse.util import stringutils from synapse.util.async import Linearizer from synapse.util.caches.expiringcache import ExpiringCache @@ -513,6 +514,9 @@ class DeviceListEduUpdater(object): # This makes it more likely that the device lists will # eventually become consistent. return + except FederationDeniedError as e: + logger.info(e) + return except Exception: # TODO: Remember that we are now out of sync and try again # later diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 5af8abf66..9aa95f89e 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -19,7 +19,9 @@ import logging from canonicaljson import encode_canonical_json from twisted.internet import defer -from synapse.api.errors import SynapseError, CodeMessageException +from synapse.api.errors import ( + SynapseError, CodeMessageException, FederationDeniedError, +) from synapse.types import get_domain_from_id, UserID from synapse.util.logcontext import preserve_fn, make_deferred_yieldable from synapse.util.retryutils import NotRetryingDestination @@ -140,6 +142,10 @@ class E2eKeysHandler(object): failures[destination] = { "status": 503, "message": "Not ready for retry", } + except FederationDeniedError as e: + failures[destination] = { + "status": 403, "message": "Federation Denied", + } except Exception as e: # include ConnectionRefused and other errors failures[destination] = { diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index ac7073088..677532c87 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -22,6 +22,7 @@ from ._base import BaseHandler from synapse.api.errors import ( AuthError, FederationError, StoreError, CodeMessageException, SynapseError, + FederationDeniedError, ) from synapse.api.constants import EventTypes, Membership, RejectedReason from synapse.events.validator import EventValidator @@ -782,6 +783,9 @@ class FederationHandler(BaseHandler): except NotRetryingDestination as e: logger.info(e.message) continue + except FederationDeniedError as e: + logger.info(e) + continue except Exception as e: logger.exception( "Failed to backfill from %s because %s", diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 833496b72..9145405cb 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -27,7 +27,7 @@ import synapse.metrics from canonicaljson import encode_canonical_json from synapse.api.errors import ( - SynapseError, Codes, HttpResponseException, + SynapseError, Codes, HttpResponseException, FederationDeniedError, ) from signedjson.sign import sign_json @@ -123,11 +123,22 @@ class MatrixFederationHttpClient(object): Fails with ``HTTPRequestException``: if we get an HTTP response code >= 300. + Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist + (May also fail with plenty of other Exceptions for things like DNS failures, connection failures, SSL failures.) """ + if ( + self.hs.config.federation_domain_whitelist and + destination not in self.hs.config.federation_domain_whitelist + ): + raise FederationDeniedError(destination) + limiter = yield synapse.util.retryutils.get_retry_limiter( destination, self.clock, @@ -308,6 +319,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ if not json_data_callback: @@ -368,6 +382,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ def body_callback(method, url_bytes, headers_dict): @@ -422,6 +439,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ logger.debug("get_json args: %s", args) @@ -475,6 +495,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ response = yield self._request( @@ -518,6 +541,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ encoded_args = {} diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index cc2842aa7..17e6079cb 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -93,6 +93,7 @@ class RemoteKey(Resource): self.store = hs.get_datastore() self.version_string = hs.version_string self.clock = hs.get_clock() + self.federation_domain_whitelist = hs.config.federation_domain_whitelist def render_GET(self, request): self.async_render_GET(request) @@ -137,6 +138,13 @@ class RemoteKey(Resource): logger.info("Handling query for keys %r", query) store_queries = [] for server_name, key_ids in query.items(): + if ( + self.federation_domain_whitelist is not None and + server_name not in self.federation_domain_whitelist + ): + logger.debug("Federation denied with %s", server_name) + continue + if not key_ids: key_ids = (None,) for key_id in key_ids: diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 4f56bcf57..485db8577 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -32,8 +32,9 @@ from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.util.stringutils import random_string -from synapse.api.errors import SynapseError, HttpResponseException, \ - NotFoundError +from synapse.api.errors import ( + SynapseError, HttpResponseException, NotFoundError, FederationDeniedError, +) from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii @@ -75,6 +76,8 @@ class MediaRepository(object): self.recently_accessed_remotes = set() self.recently_accessed_locals = set() + self.federation_domain_whitelist = hs.config.federation_domain_whitelist + # List of StorageProviders where we should search for media and # potentially upload to. storage_providers = [] @@ -216,6 +219,12 @@ class MediaRepository(object): Deferred: Resolves once a response has successfully been written to request """ + if ( + self.federation_domain_whitelist is not None and + server_name not in self.federation_domain_whitelist + ): + raise FederationDeniedError(server_name) + self.mark_recently_accessed(server_name, media_id) # We linearize here to ensure that we don't try and download remote @@ -250,6 +259,12 @@ class MediaRepository(object): Returns: Deferred[dict]: The media_info of the file """ + if ( + self.federation_domain_whitelist is not None and + server_name not in self.federation_domain_whitelist + ): + raise FederationDeniedError(server_name) + # We linearize here to ensure that we don't try and download remote # media multiple times concurrently key = (server_name, media_id) diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 1adedbb36..47b0bb5eb 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -26,6 +26,18 @@ logger = logging.getLogger(__name__) class NotRetryingDestination(Exception): def __init__(self, retry_last_ts, retry_interval, destination): + """Raised by the limiter (and federation client) to indicate that we are + are deliberately not attempting to contact a given server. + + Args: + retry_last_ts (int): the unix ts in milliseconds of our last attempt + to contact the server. 0 indicates that the last attempt was + successful or that we've never actually attempted to connect. + retry_interval (int): the time in milliseconds to wait until the next + attempt. + destination (str): the domain in question + """ + msg = "Not retrying server %s." % (destination,) super(NotRetryingDestination, self).__init__(msg) diff --git a/tests/utils.py b/tests/utils.py index 44e5f7509..311604789 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -57,6 +57,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.worker_app = None config.email_enable_notifs = False config.block_non_admin_invites = False + config.federation_domain_whitelist = None # disable user directory updates, because they get done in the # background, which upsets the test runner. From 93efd7eb04601ee103f176819283e0298c660adc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 22 Jan 2018 18:14:10 +0000 Subject: [PATCH 113/171] logging and debug for http pusher --- synapse/push/httppusher.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index c16f61452..f2517f39a 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -13,21 +13,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from synapse.push import PusherConfigException +import logging from twisted.internet import defer, reactor from twisted.internet.error import AlreadyCalled, AlreadyCancelled -import logging import push_rule_evaluator import push_tools - +import synapse +from synapse.push import PusherConfigException from synapse.util.logcontext import LoggingContext from synapse.util.metrics import Measure logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + +http_push_processed_counter = metrics.register_counter( + "http_pushes_processed", +) + +http_push_failed_counter = metrics.register_counter( + "http_pushes_failed", +) + class HttpPusher(object): INITIAL_BACKOFF_SEC = 1 # in seconds because that's what Twisted takes @@ -152,9 +161,15 @@ class HttpPusher(object): self.user_id, self.last_stream_ordering, self.max_stream_ordering ) + logger.info( + "Processing %i unprocessed push actions starting at stream_ordering %i", + len(unprocessed), self.last_stream_ordering, + ) + for push_action in unprocessed: processed = yield self._process_one(push_action) if processed: + http_push_processed_counter.inc() self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC self.last_stream_ordering = push_action['stream_ordering'] yield self.store.update_pusher_last_stream_ordering_and_success( @@ -169,6 +184,7 @@ class HttpPusher(object): self.failing_since ) else: + http_push_failed_counter.inc() if not self.failing_since: self.failing_since = self.clock.time_msec() yield self.store.update_pusher_failing_since( From 4528dd2443f4dc9e737bf4eeccedfb8807a1ea2c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 22 Jan 2018 20:15:42 +0000 Subject: [PATCH 114/171] Fix logging and add user_id --- synapse/push/httppusher.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index f2517f39a..4a03af5b2 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -162,8 +162,9 @@ class HttpPusher(object): ) logger.info( - "Processing %i unprocessed push actions starting at stream_ordering %i", - len(unprocessed), self.last_stream_ordering, + "Processing %i unprocessed push actions for %s starting at " + "stream_ordering %s", + len(unprocessed), self.user_id, self.last_stream_ordering, ) for push_action in unprocessed: From d32385336f2edf2c98ae9eb560bad9402860d7cc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 23 Jan 2018 09:59:06 +0100 Subject: [PATCH 115/171] add ?ts massaging for ASes (#2754) blindly implement ?ts for AS. untested --- synapse/rest/client/v1/room.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 682a0af9f..867ec8602 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -195,15 +195,20 @@ class RoomSendEventRestServlet(ClientV1RestServlet): requester = yield self.auth.get_user_by_req(request, allow_guest=True) content = parse_json_object_from_request(request) + event_dict = { + "type": event_type, + "content": content, + "room_id": room_id, + "sender": requester.user.to_string(), + } + + if 'ts' in request.args and requester.app_service: + event_dict['origin_server_ts'] = parse_integer(request, "ts", 0) + msg_handler = self.handlers.message_handler event = yield msg_handler.create_and_send_nonmember_event( requester, - { - "type": event_type, - "content": content, - "room_id": room_id, - "sender": requester.user.to_string(), - }, + event_dict, txn_id=txn_id, ) From 9a72b70630e111639243c6ab7867c4d4b970e2df Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 24 Jan 2018 11:07:24 +0100 Subject: [PATCH 116/171] fix thinko on 3pid whitelisting --- synapse/rest/client/v2_alpha/register.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 3abfe3547..c6f4680a7 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -368,8 +368,8 @@ class RegisterRestServlet(RestServlet): if auth_result: for login_type in [LoginType.EMAIL_IDENTITY, LoginType.MSISDN]: if login_type in auth_result: - medium = auth_result[login_type].threepid['medium'] - address = auth_result[login_type].threepid['address'] + medium = auth_result[login_type]['medium'] + address = auth_result[login_type]['address'] if not check_3pid_allowed(self.hs, medium, address): raise SynapseError( From 349c7399663b5fce856995a3a901019f5d210cc4 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:28:44 +0000 Subject: [PATCH 117/171] synapse 500s on a call to publicRooms in the case where the number of public rooms is zero, the specific cause is due to xrange trying to use a step value of zero, but if the total room number really is zero then it makes sense to just bail and save the extra processing --- synapse/handlers/room_list.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index bb4007538..ae5db4d2c 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -186,6 +186,11 @@ class RoomListHandler(BaseHandler): logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) + #bail if no rooms to work on + if len(rooms_to_scan) == 0: + defer.returnValue([]) + + # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit # From d02e43b15f6b9b24ffe5e0c0d696f8fd71fc8af3 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:29:46 +0000 Subject: [PATCH 118/171] remove white space --- synapse/handlers/room_list.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index ae5db4d2c..9f8173644 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -190,7 +190,6 @@ class RoomListHandler(BaseHandler): if len(rooms_to_scan) == 0: defer.returnValue([]) - # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit # From 6c6e197b0a100f14fa69d8decba59e58c7c25b6c Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:47:46 +0000 Subject: [PATCH 119/171] fix PEP8 violation --- synapse/handlers/room_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 9f8173644..f466a64ed 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -186,7 +186,7 @@ class RoomListHandler(BaseHandler): logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) - #bail if no rooms to work on + # bail if no rooms to work on if len(rooms_to_scan) == 0: defer.returnValue([]) From f6320835764dbb6cac058763737d67ca6359e3a9 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:52:17 +0000 Subject: [PATCH 120/171] fix return type, should be a dict --- synapse/handlers/room_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index f466a64ed..2ee63548c 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -188,7 +188,7 @@ class RoomListHandler(BaseHandler): # bail if no rooms to work on if len(rooms_to_scan) == 0: - defer.returnValue([]) + defer.returnValue({}) # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit From 86c4f49a31fe044a727c64e40009596050cdab95 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 26 Jan 2018 00:12:02 +0000 Subject: [PATCH 121/171] rather than try reconstruct the results object, better to guard against the xrange step argument being 0 --- synapse/handlers/room_list.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 2ee63548c..cf62ead81 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -186,10 +186,6 @@ class RoomListHandler(BaseHandler): logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) - # bail if no rooms to work on - if len(rooms_to_scan) == 0: - defer.returnValue({}) - # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit # @@ -207,8 +203,8 @@ class RoomListHandler(BaseHandler): if limit: step = limit + 1 else: - step = len(rooms_to_scan) - + # step cannot be zero + step = len(rooms_to_scan) if len(rooms_to_scan) != 0 else 1 chunk = [] for i in xrange(0, len(rooms_to_scan), step): batch = rooms_to_scan[i:i + step] From 73560237d646835197e07e9e6c50674786a79a28 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 26 Jan 2018 00:15:10 +0000 Subject: [PATCH 122/171] add white space line --- synapse/handlers/room_list.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index cf62ead81..dfa09141e 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -205,6 +205,7 @@ class RoomListHandler(BaseHandler): else: # step cannot be zero step = len(rooms_to_scan) if len(rooms_to_scan) != 0 else 1 + chunk = [] for i in xrange(0, len(rooms_to_scan), step): batch = rooms_to_scan[i:i + step] From 2b918464979c958447e5405c26b0832adbb8913a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 23:12:31 +0000 Subject: [PATCH 123/171] Remove spurious unittest.DEBUG --- tests/handlers/test_e2e_keys.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 19f5ed6bc..d92bf240b 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -143,7 +143,6 @@ class E2eKeysHandlerTestCase(unittest.TestCase): except errors.SynapseError: pass - @unittest.DEBUG @defer.inlineCallbacks def test_claim_one_time_key(self): local_user = "@boris:" + self.hs.hostname From 6e9bf67f189c09a65eea4e348c5b9ac265ea096f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:35:21 +0000 Subject: [PATCH 124/171] Remove unused/bitrotted MemoryDataStore This isn't used, and looks thoroughly bitrotted. --- tests/utils.py | 164 ++++--------------------------------------------- 1 file changed, 13 insertions(+), 151 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 311604789..de33deb0b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -13,27 +13,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.http.server import HttpServer -from synapse.api.errors import cs_error, CodeMessageException, StoreError -from synapse.api.constants import EventTypes -from synapse.storage.prepare_database import prepare_database -from synapse.storage.engines import create_engine -from synapse.server import HomeServer -from synapse.federation.transport import server -from synapse.util.ratelimitutils import FederationRateLimiter - -from synapse.util.logcontext import LoggingContext - -from twisted.internet import defer, reactor -from twisted.enterprise.adbapi import ConnectionPool - -from collections import namedtuple -from mock import patch, Mock import hashlib +from inspect import getcallargs import urllib import urlparse -from inspect import getcallargs +from mock import Mock, patch +from twisted.enterprise.adbapi import ConnectionPool +from twisted.internet import defer, reactor + +from synapse.api.errors import CodeMessageException, cs_error +from synapse.federation.transport import server +from synapse.http.server import HttpServer +from synapse.server import HomeServer +from synapse.storage.engines import create_engine +from synapse.storage.prepare_database import prepare_database +from synapse.util.logcontext import LoggingContext +from synapse.util.ratelimitutils import FederationRateLimiter @defer.inlineCallbacks @@ -334,140 +330,6 @@ class SQLiteMemoryDbPool(ConnectionPool, object): return create_engine(self.config.database_config) -class MemoryDataStore(object): - - Room = namedtuple( - "Room", - ["room_id", "is_public", "creator"] - ) - - def __init__(self): - self.tokens_to_users = {} - self.paths_to_content = {} - - self.members = {} - self.rooms = {} - - self.current_state = {} - self.events = [] - - class Snapshot(namedtuple("Snapshot", "room_id user_id membership_state")): - def fill_out_prev_events(self, event): - pass - - def snapshot_room(self, room_id, user_id, state_type=None, state_key=None): - return self.Snapshot( - room_id, user_id, self.get_room_member(user_id, room_id) - ) - - def register(self, user_id, token, password_hash): - if user_id in self.tokens_to_users.values(): - raise StoreError(400, "User in use.") - self.tokens_to_users[token] = user_id - - def get_user_by_access_token(self, token): - try: - return { - "name": self.tokens_to_users[token], - } - except Exception: - raise StoreError(400, "User does not exist.") - - def get_room(self, room_id): - try: - return self.rooms[room_id] - except Exception: - return None - - def store_room(self, room_id, room_creator_user_id, is_public): - if room_id in self.rooms: - raise StoreError(409, "Conflicting room!") - - room = MemoryDataStore.Room( - room_id=room_id, - is_public=is_public, - creator=room_creator_user_id - ) - self.rooms[room_id] = room - - def get_room_member(self, user_id, room_id): - return self.members.get(room_id, {}).get(user_id) - - def get_room_members(self, room_id, membership=None): - if membership: - return [ - v for k, v in self.members.get(room_id, {}).items() - if v.membership == membership - ] - else: - return self.members.get(room_id, {}).values() - - def get_rooms_for_user_where_membership_is(self, user_id, membership_list): - return [ - m[user_id] for m in self.members.values() - if user_id in m and m[user_id].membership in membership_list - ] - - def get_room_events_stream(self, user_id=None, from_key=None, to_key=None, - limit=0, with_feedback=False): - return ([], from_key) # TODO - - def get_joined_hosts_for_room(self, room_id): - return defer.succeed([]) - - def persist_event(self, event): - if event.type == EventTypes.Member: - room_id = event.room_id - user = event.state_key - self.members.setdefault(room_id, {})[user] = event - - if hasattr(event, "state_key"): - key = (event.room_id, event.type, event.state_key) - self.current_state[key] = event - - self.events.append(event) - - def get_current_state(self, room_id, event_type=None, state_key=""): - if event_type: - key = (room_id, event_type, state_key) - if self.current_state.get(key): - return [self.current_state.get(key)] - return None - else: - return [ - e for e in self.current_state - if e[0] == room_id - ] - - def set_presence_state(self, user_localpart, state): - return defer.succeed({"state": 0}) - - def get_presence_list(self, user_localpart, accepted): - return [] - - def get_room_events_max_id(self): - return "s0" # TODO (erikj) - - def get_send_event_level(self, room_id): - return defer.succeed(0) - - def get_power_level(self, room_id, user_id): - return defer.succeed(0) - - def get_add_state_level(self, room_id): - return defer.succeed(0) - - def get_room_join_rule(self, room_id): - # TODO (erikj): This should be configurable - return defer.succeed("invite") - - def get_ops_levels(self, room_id): - return defer.succeed((5, 5, 5)) - - def insert_client_ip(self, user, access_token, ip, user_agent): - return defer.succeed(None) - - def _format_call(args, kwargs): return ", ".join( ["%r" % (a) for a in args] + From 25adde9a04b82a2261404f9b6f9a25cec086f62f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 22:01:02 +0000 Subject: [PATCH 125/171] Factor out get_db_conn to HomeServer base class This function is identical to all subclasses, so we may as well push it up to the base class to reduce duplication (and make use of it in the tests) --- synapse/app/appservice.py | 13 ------------- synapse/app/client_reader.py | 13 ------------- synapse/app/federation_reader.py | 13 ------------- synapse/app/federation_sender.py | 13 ------------- synapse/app/frontend_proxy.py | 13 ------------- synapse/app/homeserver.py | 13 ------------- synapse/app/media_repository.py | 13 ------------- synapse/app/pusher.py | 13 ------------- synapse/app/synchrotron.py | 13 ------------- synapse/app/user_dir.py | 13 ------------- synapse/server.py | 17 +++++++++++++++++ 11 files changed, 17 insertions(+), 130 deletions(-) diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index 7d0c2879a..c6fe4516d 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -49,19 +49,6 @@ class AppserviceSlaveStore( class AppserviceServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = AppserviceSlaveStore(self.get_db_conn(), self) diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index dc3f6efd4..3b3352798 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -64,19 +64,6 @@ class ClientReaderSlavedStore( class ClientReaderServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = ClientReaderSlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index a072291e1..4de43c41f 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -58,19 +58,6 @@ class FederationReaderSlavedStore( class FederationReaderServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = FederationReaderSlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index 09e9488f0..f760826d2 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -76,19 +76,6 @@ class FederationSenderSlaveStore( class FederationSenderServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = FederationSenderSlaveStore(self.get_db_conn(), self) diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index ae531c0aa..e32ee8fe9 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -118,19 +118,6 @@ class FrontendProxySlavedStore( class FrontendProxyServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = FrontendProxySlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 92ab3b311..cb82a415a 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -266,19 +266,6 @@ class SynapseHomeServer(HomeServer): except IncorrectDatabaseSetup as e: quit_with_error(e.message) - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(config_options): """ diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index eab1597aa..1ed1ca877 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -60,19 +60,6 @@ class MediaRepositorySlavedStore( class MediaRepositoryServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = MediaRepositorySlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 7fbbb0b0e..32ccea3f1 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -81,19 +81,6 @@ class PusherSlaveStore( class PusherServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = PusherSlaveStore(self.get_db_conn(), self) diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 0abba3016..f87531f1b 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -246,19 +246,6 @@ class SynchrotronApplicationService(object): class SynchrotronServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = SynchrotronSlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index a48c4a2ae..494ccb702 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -92,19 +92,6 @@ class UserDirectorySlaveStore( class UserDirectoryServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = UserDirectorySlaveStore(self.get_db_conn(), self) diff --git a/synapse/server.py b/synapse/server.py index 99693071b..ff8a8fbc4 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -307,6 +307,23 @@ class HomeServer(object): **self.db_config.get("args", {}) ) + def get_db_conn(self, run_new_connection=True): + """Makes a new connection to the database, skipping the db pool + + Returns: + Connection: a connection object implementing the PEP-249 spec + """ + # Any param beginning with cp_ is a parameter for adbapi, and should + # not be passed to the database engine. + db_params = { + k: v for k, v in self.db_config.get("args", {}).items() + if not k.startswith("cp_") + } + db_conn = self.database_engine.module.connect(**db_params) + if run_new_connection: + self.database_engine.on_new_connection(db_conn) + return db_conn + def build_media_repository_resource(self): # build the media repo resource. This indirects through the HomeServer # to ensure that we only have a single instance of From b178eca2616d8210a212fbd04be253158fc7fc47 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:12:46 +0000 Subject: [PATCH 126/171] Run on_new_connection for unit tests Configure the connectionpool used for unit tests to run the `on_new_connection` function. --- tests/utils.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index de33deb0b..ab5e2341c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -66,13 +66,19 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): if "clock" not in kargs: kargs["clock"] = MockClock() + db_engine = create_engine(config.database_config) if datastore is None: - db_pool = SQLiteMemoryDbPool() + # we need to configure the connection pool to run the on_new_connection + # function, so that we can test code that uses custom sqlite functions + # (like rank). + db_pool = SQLiteMemoryDbPool( + cp_openfun=db_engine.on_new_connection, + ) yield db_pool.prepare() hs = HomeServer( name, db_pool=db_pool, config=config, version_string="Synapse/tests", - database_engine=create_engine(config.database_config), + database_engine=db_engine, get_db_conn=db_pool.get_db_conn, room_list_handler=object(), tls_server_context_factory=Mock(), @@ -83,7 +89,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): hs = HomeServer( name, db_pool=None, datastore=datastore, config=config, version_string="Synapse/tests", - database_engine=create_engine(config.database_config), + database_engine=db_engine, room_list_handler=object(), tls_server_context_factory=Mock(), **kargs @@ -303,11 +309,15 @@ class MockClock(object): class SQLiteMemoryDbPool(ConnectionPool, object): - def __init__(self): + def __init__(self, **kwargs): + connkw = { + "cp_min": 1, + "cp_max": 1, + } + connkw.update(kwargs) + super(SQLiteMemoryDbPool, self).__init__( - "sqlite3", ":memory:", - cp_min=1, - cp_max=1, + "sqlite3", ":memory:", **connkw ) self.config = Mock() From d7eacc4f87ca8353f612171fa7e070c14e3b35ee Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 23:14:24 +0000 Subject: [PATCH 127/171] Create dbpool as normal in tests ... instead of creating our own special SQLiteMemoryDbPool, whose purpose was a bit of a mystery. For some reason this makes one of the tests run slightly slower, so bump the sleep(). Sorry. --- tests/crypto/test_keyring.py | 4 +-- tests/utils.py | 60 ++++++++++-------------------------- 2 files changed, 19 insertions(+), 45 deletions(-) diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index c899fecf5..d4ec02ffc 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -167,7 +167,7 @@ class KeyringTestCase(unittest.TestCase): # wait a tick for it to send the request to the perspectives server # (it first tries the datastore) - yield async.sleep(0.005) + yield async.sleep(1) # XXX find out why this takes so long! self.http_client.post_json.assert_called_once() self.assertIs(LoggingContext.current_context(), context_11) @@ -183,7 +183,7 @@ class KeyringTestCase(unittest.TestCase): res_deferreds_2 = kr.verify_json_objects_for_server( [("server10", json1)], ) - yield async.sleep(0.005) + yield async.sleep(01) self.http_client.post_json.assert_not_called() res_deferreds_2[0].addBoth(self.check_context, None) diff --git a/tests/utils.py b/tests/utils.py index ab5e2341c..50de4199b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -19,7 +19,6 @@ import urllib import urlparse from mock import Mock, patch -from twisted.enterprise.adbapi import ConnectionPool from twisted.internet import defer, reactor from synapse.api.errors import CodeMessageException, cs_error @@ -60,30 +59,37 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.update_user_directory = False config.use_frozen_dicts = True - config.database_config = {"name": "sqlite3"} config.ldap_enabled = False if "clock" not in kargs: kargs["clock"] = MockClock() + config.database_config = { + "name": "sqlite3", + "args": { + "database": ":memory:", + "cp_min": 1, + "cp_max": 1, + }, + } db_engine = create_engine(config.database_config) + + # we need to configure the connection pool to run the on_new_connection + # function, so that we can test code that uses custom sqlite functions + # (like rank). + config.database_config["args"]["cp_openfun"] = db_engine.on_new_connection + if datastore is None: - # we need to configure the connection pool to run the on_new_connection - # function, so that we can test code that uses custom sqlite functions - # (like rank). - db_pool = SQLiteMemoryDbPool( - cp_openfun=db_engine.on_new_connection, - ) - yield db_pool.prepare() hs = HomeServer( - name, db_pool=db_pool, config=config, + name, config=config, + db_config=config.database_config, version_string="Synapse/tests", database_engine=db_engine, - get_db_conn=db_pool.get_db_conn, room_list_handler=object(), tls_server_context_factory=Mock(), **kargs ) + yield prepare_database(hs.get_db_conn(), db_engine, config) hs.setup() else: hs = HomeServer( @@ -308,38 +314,6 @@ class MockClock(object): return d -class SQLiteMemoryDbPool(ConnectionPool, object): - def __init__(self, **kwargs): - connkw = { - "cp_min": 1, - "cp_max": 1, - } - connkw.update(kwargs) - - super(SQLiteMemoryDbPool, self).__init__( - "sqlite3", ":memory:", **connkw - ) - - self.config = Mock() - self.config.password_providers = [] - self.config.database_config = {"name": "sqlite3"} - - def prepare(self): - engine = self.create_engine() - return self.runWithConnection( - lambda conn: prepare_database(conn, engine, self.config) - ) - - def get_db_conn(self): - conn = self.connect() - engine = self.create_engine() - prepare_database(conn, engine, self.config) - return conn - - def create_engine(self): - return create_engine(self.config.database_config) - - def _format_call(args, kwargs): return ", ".join( ["%r" % (a) for a in args] + From 88b9c5cbf0e571ba955bbd483a0169a0b79c78c7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 23:54:38 +0000 Subject: [PATCH 128/171] Make it possible to run tests against postgres --- tests/utils.py | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 50de4199b..d1f59551e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -25,11 +25,17 @@ from synapse.api.errors import CodeMessageException, cs_error from synapse.federation.transport import server from synapse.http.server import HttpServer from synapse.server import HomeServer +from synapse.storage import PostgresEngine from synapse.storage.engines import create_engine from synapse.storage.prepare_database import prepare_database from synapse.util.logcontext import LoggingContext from synapse.util.ratelimitutils import FederationRateLimiter +# set this to True to run the tests against postgres instead of sqlite. +# It requires you to have a local postgres database called synapse_test, within +# which ALL TABLES WILL BE DROPPED +USE_POSTGRES_FOR_TESTS = False + @defer.inlineCallbacks def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): @@ -64,14 +70,25 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): if "clock" not in kargs: kargs["clock"] = MockClock() - config.database_config = { - "name": "sqlite3", - "args": { - "database": ":memory:", - "cp_min": 1, - "cp_max": 1, - }, - } + if USE_POSTGRES_FOR_TESTS: + config.database_config = { + "name": "psycopg2", + "args": { + "database": "synapse_test", + "cp_min": 1, + "cp_max": 5, + }, + } + else: + config.database_config = { + "name": "sqlite3", + "args": { + "database": ":memory:", + "cp_min": 1, + "cp_max": 1, + }, + } + db_engine = create_engine(config.database_config) # we need to configure the connection pool to run the on_new_connection @@ -89,7 +106,15 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): tls_server_context_factory=Mock(), **kargs ) - yield prepare_database(hs.get_db_conn(), db_engine, config) + db_conn = hs.get_db_conn() + # make sure that the database is empty + if isinstance(db_engine, PostgresEngine): + cur = db_conn.cursor() + cur.execute("SELECT tablename FROM pg_tables where schemaname='public'") + rows = cur.fetchall() + for r in rows: + cur.execute("DROP TABLE %s CASCADE" % r[0]) + yield prepare_database(db_conn, db_engine, config) hs.setup() else: hs = HomeServer( From 46022025ea35895af3cf8d15973fb94a3a6b4f38 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:20:28 +0000 Subject: [PATCH 129/171] Fix SQL for user search fix some syntax errors for user search when search_all_users is enabled fixes #2801, hopefully --- synapse/storage/user_directory.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index f150ef010..dfdcbb318 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -641,13 +641,12 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - # dummy to keep the number of binds & aliases the same + # make s.user_id null to keep the ordering algorithm happy join_clause = """ - LEFT JOIN ( - SELECT NULL as user_id WHERE NULL = ? - ) AS s USING (user_id)" + CROSS JOIN (SELECT NULL as user_id) AS s """ - where_clause = "" + join_args = () + where_clause = "1=1" else: join_clause = """ LEFT JOIN users_in_public_rooms AS p USING (user_id) @@ -656,6 +655,7 @@ class UserDirectoryStore(SQLBaseStore): WHERE user_id = ? AND share_private ) AS s USING (user_id) """ + join_args = (user_id,) where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" if isinstance(self.database_engine, PostgresEngine): @@ -697,7 +697,7 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % (join_clause, where_clause) - args = (user_id, full_query, exact_query, prefix_query, limit + 1,) + args = join_args + (full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -715,7 +715,7 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % (join_clause, where_clause) - args = (user_id, search_query, limit + 1) + args = join_args + (search_query, limit + 1) else: # This should be unreachable. raise Exception("Unrecognized database engine") From d1f3490e75df49bf417eeeaef115df16729244ce Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:25:03 +0000 Subject: [PATCH 130/171] Add tests for user directory search --- tests/storage/test_user_directory.py | 88 ++++++++++++++++++++++++++++ tests/utils.py | 1 + 2 files changed, 89 insertions(+) create mode 100644 tests/storage/test_user_directory.py diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py new file mode 100644 index 000000000..0891308f2 --- /dev/null +++ b/tests/storage/test_user_directory.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.storage import UserDirectoryStore +from synapse.storage.roommember import ProfileInfo +from tests import unittest +from tests.utils import setup_test_homeserver + +ALICE = "@alice:a" +BOB = "@bob:b" +BOBBY = "@bobby:a" + + +class UserDirectoryStoreTestCase(unittest.TestCase): + @defer.inlineCallbacks + def setUp(self): + self.hs = yield setup_test_homeserver() + self.store = UserDirectoryStore(None, self.hs) + + # alice and bob are both in !room_id. bobby is not but shares + # a homeserver with alice. + yield self.store.add_profiles_to_user_dir( + "!room:id", + { + ALICE: ProfileInfo(None, "alice"), + BOB: ProfileInfo(None, "bob"), + BOBBY: ProfileInfo(None, "bobby") + }, + ) + yield self.store.add_users_to_public_room( + "!room:id", + [ALICE, BOB], + ) + yield self.store.add_users_who_share_room( + "!room:id", + False, + ( + (ALICE, BOB), + (BOB, ALICE), + ), + ) + + @defer.inlineCallbacks + def test_search_user_dir(self): + # normally when alice searches the directory she should just find + # bob because bobby doesn't share a room with her. + r = yield self.store.search_user_dir(ALICE, "bob", 10) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual(r["results"][0], { + "user_id": BOB, + "display_name": "bob", + "avatar_url": None, + }) + + @defer.inlineCallbacks + def test_search_user_dir_all_users(self): + self.hs.config.user_directory_search_all_users = True + try: + r = yield self.store.search_user_dir(ALICE, "bob", 10) + self.assertFalse(r["limited"]) + self.assertEqual(2, len(r["results"])) + self.assertDictEqual(r["results"][0], { + "user_id": BOB, + "display_name": "bob", + "avatar_url": None, + }) + self.assertDictEqual(r["results"][1], { + "user_id": BOBBY, + "display_name": "bobby", + "avatar_url": None, + }) + finally: + self.hs.config.user_directory_search_all_users = False diff --git a/tests/utils.py b/tests/utils.py index d1f59551e..8efd3a347 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -59,6 +59,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.email_enable_notifs = False config.block_non_admin_invites = False config.federation_domain_whitelist = None + config.user_directory_search_all_users = False # disable user directory updates, because they get done in the # background, which upsets the test runner. From 02ba118f81009b1c3ae290a17c35e1b9d75e802b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jan 2018 14:30:15 +0000 Subject: [PATCH 131/171] Increase http conn pool size --- synapse/http/client.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index 930d71301..f3e4973c2 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -18,6 +18,7 @@ from OpenSSL.SSL import VERIFY_NONE from synapse.api.errors import ( CodeMessageException, MatrixCodeMessageException, SynapseError, Codes, ) +from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.logcontext import make_deferred_yieldable from synapse.util import logcontext import synapse.metrics @@ -67,7 +68,11 @@ class SimpleHttpClient(object): self.hs = hs pool = HTTPConnectionPool(reactor) - pool.maxPersistentPerHost = 5 + + # the pusher makes lots of concurrent SSL connections to sygnal, and + # tends to do so in batches, so we need to allow the pool to keep lots + # of idle connections around. + pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5)) pool.cachedConnectionTimeout = 2 * 60 # The default context factory in Twisted 14.0.0 (which we require) is From e051abd20b1978ddc53723c8233bc54742243045 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 24 Jan 2018 21:06:54 +0000 Subject: [PATCH 132/171] add appid/device_display_name to to pusher logging --- synapse/push/httppusher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 4a03af5b2..02bd013ca 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -164,7 +164,7 @@ class HttpPusher(object): logger.info( "Processing %i unprocessed push actions for %s starting at " "stream_ordering %s", - len(unprocessed), self.user_id, self.last_stream_ordering, + len(unprocessed), self.name, self.last_stream_ordering, ) for push_action in unprocessed: @@ -342,7 +342,7 @@ class HttpPusher(object): @defer.inlineCallbacks def _send_badge(self, badge): - logger.info("Sending updated badge count %d to %r", badge, self.user_id) + logger.info("Sending updated badge count %d to %s", badge, self.name) d = { 'notification': { 'id': '', From 03dd745fe28a00c8788a2147d4f5c2a852182429 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jan 2018 15:49:06 +0000 Subject: [PATCH 133/171] Better logging when pushes fail --- synapse/push/httppusher.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 02bd013ca..2cbac571b 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -333,7 +333,10 @@ class HttpPusher(object): try: resp = yield self.http_client.post_json_get_json(self.url, notification_dict) except Exception: - logger.warn("Failed to push %s ", self.url) + logger.warn( + "Failed to push event %s to %s", + event.event_id, self.name, exc_info=True, + ) defer.returnValue(False) rejected = [] if 'rejected' in resp: @@ -364,7 +367,10 @@ class HttpPusher(object): try: resp = yield self.http_client.post_json_get_json(self.url, d) except Exception: - logger.exception("Failed to push %s ", self.url) + logger.warn( + "Failed to send badge count to %s", + self.name, exc_info=True, + ) defer.returnValue(False) rejected = [] if 'rejected' in resp: From b387ee17b68e4398a8fa26fdf122b773a046e429 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 14:00:11 +0000 Subject: [PATCH 134/171] Improve exception handling in persist_event 1. use `deferred.errback()` instead of `deferred.errback(e)`, which means that a Failure object will be constructed using the current exception state, *including* its stack trace - so the stack trace is saved in the Failure, leading to better exception reports. 2. Set `consumeErrors=True` on the ObservableDeferred, because we know that there will always be at least one observer - which avoids a spurious "CRITICAL: unhandled exception in Deferred" error in the logs --- synapse/storage/events.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7a9cd3ec9..33fccfa7a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -110,7 +110,7 @@ class _EventPeristenceQueue(object): end_item.events_and_contexts.extend(events_and_contexts) return end_item.deferred.observe() - deferred = ObservableDeferred(defer.Deferred()) + deferred = ObservableDeferred(defer.Deferred(), consumeErrors=True) queue.append(self._EventPersistQueueItem( events_and_contexts=events_and_contexts, @@ -152,8 +152,8 @@ class _EventPeristenceQueue(object): try: ret = yield per_item_callback(item) item.deferred.callback(ret) - except Exception as e: - item.deferred.errback(e) + except Exception: + item.deferred.errback() finally: queue = self._event_persist_queues.pop(room_id, None) if queue: From d413a2ba981b3e8d443d2ccdd0c8de7d43b62e73 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:10:26 +0000 Subject: [PATCH 135/171] Remove unused "event_type" param on state.get_current_state_ids this param doesn't seem to be used, and is a bit pointless anyway because it can easily be replicated by the caller. It is also horrible, because it changes the return type of the method. --- synapse/state.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 1f9abf9d3..18f1edef4 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -146,8 +146,7 @@ class StateHandler(object): defer.returnValue(state) @defer.inlineCallbacks - def get_current_state_ids(self, room_id, event_type=None, state_key="", - latest_event_ids=None): + def get_current_state_ids(self, room_id, latest_event_ids=None): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) @@ -155,10 +154,6 @@ class StateHandler(object): ret = yield self.resolve_state_groups(room_id, latest_event_ids) state = ret.state - if event_type: - defer.returnValue(state.get((event_type, state_key))) - return - defer.returnValue(state) @defer.inlineCallbacks From adfc0c95391bf43d48f7069823fc0f4902803402 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jan 2018 17:39:55 +0000 Subject: [PATCH 136/171] docstring for get_current_state_ids --- synapse/state.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/synapse/state.py b/synapse/state.py index 18f1edef4..4c8247e7c 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -147,6 +147,19 @@ class StateHandler(object): @defer.inlineCallbacks def get_current_state_ids(self, room_id, latest_event_ids=None): + """Get the current state, or the state at a set of events, for a room + + Args: + room_id (str): + + latest_event_ids (iterable[str]|None): if given, the forward + extremities to resolve. If None, we look them up from the + database (via a cache) + + Returns: + Deferred[dict[(str, str), str)]]: the state dict, mapping from + (event_type, state_key) -> event_id + """ if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) From 773f0eed1efa114bb32f6e54e8edc038a04d3526 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 30 Jan 2018 15:02:51 +0000 Subject: [PATCH 137/171] Fix sql error in quarantine_media --- synapse/storage/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b..d91c85307 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -589,7 +589,7 @@ class RoomStore(SQLBaseStore): """ UPDATE remote_media_cache SET quarantined_by = ? - WHERE media_origin AND media_id = ? + WHERE media_origin = ? AND media_id = ? """, ( (quarantined_by, origin, media_id) From af19f5e9aa47dd9875df3937d514ae7708eed539 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 30 Jan 2018 17:52:03 +0000 Subject: [PATCH 138/171] Remove spurious log argument ... which would cause scary-looking and unhelpful errors in the log on dns fail --- synapse/http/endpoint.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py index e2b99ef3b..87639b915 100644 --- a/synapse/http/endpoint.py +++ b/synapse/http/endpoint.py @@ -357,8 +357,7 @@ def _get_hosts_for_srv_record(dns_client, host): def eb(res, record_type): if res.check(DNSNameError): return [] - logger.warn("Error looking up %s for %s: %s", - record_type, host, res, res.value) + logger.warn("Error looking up %s for %s: %s", record_type, host, res) return res # no logcontexts here, so we can safely fire these off and gatherResults From 63c4383927cfb759046ccf576e0c7e35a70f6168 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 31 Jan 2018 08:07:41 -0700 Subject: [PATCH 139/171] Documentation and naming Signed-off-by: Travis Ralston --- synapse/storage/room.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index d1d63f404..5dfb0e19f 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -534,8 +534,17 @@ class RoomStore(SQLBaseStore): self.is_room_blocked.invalidate((room_id,)) def get_media_mxcs_in_room(self, room_id): - def _get_media_ids_in_room(txn): - local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + """Retrieves all the local and remote media MXC URIs in a given room + + Args: + room_id (str) + + Returns: + The local and remote media as a lists of tuples where the key is + the hostname and the value is the media ID. + """ + def _get_media_mxcs_in_room_txn(txn): + local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) local_media_mxcs = [] remote_media_mxcs = [] @@ -546,14 +555,14 @@ class RoomStore(SQLBaseStore): remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) return local_media_mxcs, remote_media_mxcs - return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + return self.runInteraction("get_media_ids_in_room", _get_media_mxcs_in_room_txn) def quarantine_media_ids_in_room(self, room_id, quarantined_by): """For a room loops through all events with media and quarantines the associated media """ - def _quarantine_media_in_room(txn): - local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + def _quarantine_media_in_room_txn(txn): + local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -581,9 +590,19 @@ class RoomStore(SQLBaseStore): return total_media_quarantined - return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room) + return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room_txn) - def _get_media_ids_in_room(self, txn, room_id): + def _get_media_mxcs_in_room_txn(self, txn, room_id): + """Retrieves all the local and remote media MXC URIs in a given room + + Args: + txn (cursor) + room_id (str) + + Returns: + The local and remote media as a lists of tuples where the key is + the hostname and the value is the media ID. + """ mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") next_token = self.get_current_events_token() + 1 From 3af53c183a0ab5d30ce0fb40e9b8eee8da7ad75a Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 31 Jan 2018 08:15:59 -0700 Subject: [PATCH 140/171] Add admin api documentation for list media endpoint Signed-off-by: Travis Ralston --- docs/admin_api/media_admin_api.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 docs/admin_api/media_admin_api.md diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md new file mode 100644 index 000000000..abdbc1ea8 --- /dev/null +++ b/docs/admin_api/media_admin_api.md @@ -0,0 +1,23 @@ +# List all media in a room + +This API gets a list of known media in a room. + +The API is: +``` +GET /_matrix/client/r0/admin/room//media +``` +including an `access_token` of a server admin. + +It returns a JSON body like the following: +``` +{ + "local": [ + "mxc://localhost/xwvutsrqponmlkjihgfedcba", + "mxc://localhost/abcdefghijklmnopqrstuvwx" + ], + "remote": [ + "mxc://matrix.org/xwvutsrqponmlkjihgfedcba", + "mxc://matrix.org/abcdefghijklmnopqrstuvwx" + ] +} +``` From 326189c25a14a2506831e74f6f111754ad656916 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 31 Jan 2018 18:43:54 +0000 Subject: [PATCH 141/171] Script to move remote media to another media store --- scripts/move_remote_media_to_new_store.py | 133 ++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100755 scripts/move_remote_media_to_new_store.py diff --git a/scripts/move_remote_media_to_new_store.py b/scripts/move_remote_media_to_new_store.py new file mode 100755 index 000000000..7914ead88 --- /dev/null +++ b/scripts/move_remote_media_to_new_store.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2017 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Moves a list of remote media from one media store to another. + +The input should be a list of media files to be moved, one per line. Each line +should be formatted:: + + | + +This can be extracted from postgres with:: + + psql --tuples-only -A -c "select media_origin, filesystem_id from + matrix.remote_media_cache where ..." + +To use, pipe the above into:: + + PYTHON_PATH=. ./scripts/move_remote_media_to_new_store.py +""" + +from __future__ import print_function + +import argparse +import logging + +import sys + +import os + +import shutil + +from synapse.rest.media.v1.filepath import MediaFilePaths + +logger = logging.getLogger() + + +def main(src_repo, dest_repo): + src_paths = MediaFilePaths(src_repo) + dest_paths = MediaFilePaths(dest_repo) + for line in sys.stdin: + line = line.strip() + parts = line.split('|') + if len(parts) != 2: + print("Unable to parse input line %s" % line, file=sys.stderr) + exit(1) + + move_media(parts[0], parts[1], src_paths, dest_paths) + + +def move_media(origin_server, file_id, src_paths, dest_paths): + """Move the given file, and any thumbnails, to the dest repo + + Args: + origin_server (str): + file_id (str): + src_paths (MediaFilePaths): + dest_paths (MediaFilePaths): + """ + logger.info("%s/%s", origin_server, file_id) + + # check that the original exists + original_file = src_paths.remote_media_filepath(origin_server, file_id) + if not os.path.exists(original_file): + logger.warn( + "Original for %s/%s (%s) does not exist", + origin_server, file_id, original_file, + ) + else: + mkdir_and_move( + original_file, + dest_paths.remote_media_filepath(origin_server, file_id), + ) + + # now look for thumbnails + original_thumb_dir = src_paths.remote_media_thumbnail_dir( + origin_server, file_id, + ) + if not os.path.exists(original_thumb_dir): + return + + mkdir_and_move( + original_thumb_dir, + dest_paths.remote_media_thumbnail_dir(origin_server, file_id) + ) + + +def mkdir_and_move(original_file, dest_file): + dirname = os.path.dirname(dest_file) + if not os.path.exists(dirname): + logger.debug("mkdir %s", dirname) + os.makedirs(dirname) + logger.debug("mv %s %s", original_file, dest_file) + shutil.move(original_file, dest_file) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class = argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "-v", action='store_true', help='enable debug logging') + parser.add_argument( + "src_repo", + help="Path to source content repo", + ) + parser.add_argument( + "dest_repo", + help="Path to source content repo", + ) + args = parser.parse_args() + + logging_config = { + "level": logging.DEBUG if args.v else logging.INFO, + "format": "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s" + } + logging.basicConfig(**logging_config) + + main(args.src_repo, args.dest_repo) From e1e4ec9f9d6570e7f5a3f519113516f47ec872e4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:43:40 +0000 Subject: [PATCH 142/171] factor _get_new_state_after_events out of _calculate_state_delta This reduces the scope of a bunch of variables --- synapse/storage/events.py | 57 ++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 33fccfa7a..dd28c2efe 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -386,11 +386,18 @@ class EventsStore(SQLBaseStore): if all_single_prev_not_state: continue - state = yield self._calculate_state_delta( - room_id, ev_ctx_rm, new_latest_event_ids + logger.info( + "Calculating state delta for room %s", room_id, ) - if state: - current_state_for_room[room_id] = state + current_state = yield self._get_new_state_after_events( + ev_ctx_rm, new_latest_event_ids, + ) + if current_state is not None: + delta = yield self._calculate_state_delta( + room_id, current_state, + ) + if delta is not None: + current_state_for_room[room_id] = delta yield self.runInteraction( "persist_events", @@ -467,20 +474,22 @@ class EventsStore(SQLBaseStore): defer.returnValue(new_latest_event_ids) @defer.inlineCallbacks - def _calculate_state_delta(self, room_id, events_context, new_latest_event_ids): - """Calculate the new state deltas for a room. + def _get_new_state_after_events(self, events_context, new_latest_event_ids): + """Calculate the current state dict after adding some new events to + a room - Assumes that we are only persisting events for one room at a time. + Args: + events_context (list[(EventBase, EventContext)]): + events and contexts which are being added to the room + + new_latest_event_ids (iterable[str]): + the new forward extremities for the room. Returns: - 3-tuple (to_delete, to_insert, new_state) where both are state dicts, - i.e. (type, state_key) -> event_id. `to_delete` are the entries to - first be deleted from current_state_events, `to_insert` are entries - to insert. `new_state` is the full set of state. - May return None if there are no changes to be applied. + Deferred[dict[(str,str), str]|None]: + None if there are no changes to the room state, or + a dict of (type, state_key) -> event_id]. """ - # Now we need to work out the different state sets for - # each state extremities state_sets = [] state_groups = set() missing_event_ids = [] @@ -523,12 +532,12 @@ class EventsStore(SQLBaseStore): state_sets.extend(group_to_state.itervalues()) if not new_latest_event_ids: - current_state = {} + defer.returnValue({}) elif was_updated: if len(state_sets) == 1: # If there is only one state set, then we know what the current # state is. - current_state = state_sets[0] + defer.returnValue(state_sets[0]) else: # We work out the current state by passing the state sets to the # state resolution algorithm. It may ask for some events, including @@ -537,8 +546,7 @@ class EventsStore(SQLBaseStore): # up in the db. logger.info( - "Resolving state for %s with %i state sets", - room_id, len(state_sets), + "Resolving state with %i state sets", len(state_sets), ) events_map = {ev.event_id: ev for ev, _ in events_context} @@ -567,9 +575,22 @@ class EventsStore(SQLBaseStore): state_sets, state_map_factory=get_events, ) + defer.returnValue(current_state) else: return + @defer.inlineCallbacks + def _calculate_state_delta(self, room_id, current_state): + """Calculate the new state deltas for a room. + + Assumes that we are only persisting events for one room at a time. + + Returns: + 3-tuple (to_delete, to_insert, new_state) where both are state dicts, + i.e. (type, state_key) -> event_id. `to_delete` are the entries to + first be deleted from current_state_events, `to_insert` are entries + to insert. `new_state` is the full set of state. + """ existing_state = yield self.get_current_state_ids(room_id) existing_events = set(existing_state.itervalues()) From e16e45b1b44c9b9f7d44e6b50406268869759397 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 31 Jan 2018 15:30:38 -0700 Subject: [PATCH 143/171] pep8 Signed-off-by: Travis Ralston --- synapse/storage/room.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 5dfb0e19f..961ad5abc 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -544,14 +544,14 @@ class RoomStore(SQLBaseStore): the hostname and the value is the media ID. """ def _get_media_mxcs_in_room_txn(txn): - local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) + local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id) local_media_mxcs = [] remote_media_mxcs = [] # Convert the IDs to MXC URIs - for media_id in local_media_ids: + for media_id in local_mxcs: local_media_mxcs.append("mxc://%s/%s" % (self.hostname, media_id)) - for hostname, media_id in remote_media_ids: + for hostname, media_id in remote_mxcs: remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) return local_media_mxcs, remote_media_mxcs @@ -562,7 +562,7 @@ class RoomStore(SQLBaseStore): the associated media """ def _quarantine_media_in_room_txn(txn): - local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) + local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -571,7 +571,7 @@ class RoomStore(SQLBaseStore): UPDATE local_media_repository SET quarantined_by = ? WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_ids)) + """, ((quarantined_by, media_id) for media_id in local_mxcs)) txn.executemany( """ @@ -581,16 +581,19 @@ class RoomStore(SQLBaseStore): """, ( (quarantined_by, origin, media_id) - for origin, media_id in remote_media_ids + for origin, media_id in remote_mxcs ) ) - total_media_quarantined += len(local_media_ids) - total_media_quarantined += len(remote_media_ids) + total_media_quarantined += len(local_mxcs) + total_media_quarantined += len(remote_mxcs) return total_media_quarantined - return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room_txn) + return self.runInteraction( + "quarantine_media_in_room", + _quarantine_media_in_room_txn, + ) def _get_media_mxcs_in_room_txn(self, txn, room_id): """Retrieves all the local and remote media MXC URIs in a given room From 0cbda53819dd66df05c872ea021767336b457769 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 08:48:41 +0000 Subject: [PATCH 144/171] Rename resolve_state_groups -> resolve_state_groups_for_events (to make way for a method that actually just does the state group resolution) --- synapse/handlers/federation.py | 11 +++++------ synapse/state.py | 12 ++++++------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 677532c87..8ee9434c9 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -808,13 +808,12 @@ class FederationHandler(BaseHandler): event_ids = list(extremities.keys()) logger.debug("calling resolve_state_groups in _maybe_backfill") + resolve = logcontext.preserve_fn( + self.state_handler.resolve_state_groups_for_events + ) states = yield logcontext.make_deferred_yieldable(defer.gatherResults( - [ - logcontext.preserve_fn(self.state_handler.resolve_state_groups)( - room_id, [e] - ) - for e in event_ids - ], consumeErrors=True, + [resolve(room_id, [e]) for e in event_ids], + consumeErrors=True, )) states = dict(zip(event_ids, [s.state for s in states])) diff --git a/synapse/state.py b/synapse/state.py index 4c8247e7c..8daf90013 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -127,7 +127,7 @@ class StateHandler(object): latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_state") - ret = yield self.resolve_state_groups(room_id, latest_event_ids) + ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) state = ret.state if event_type: @@ -164,7 +164,7 @@ class StateHandler(object): latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_state_ids") - ret = yield self.resolve_state_groups(room_id, latest_event_ids) + ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) state = ret.state defer.returnValue(state) @@ -174,7 +174,7 @@ class StateHandler(object): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_user_in_room") - entry = yield self.resolve_state_groups(room_id, latest_event_ids) + entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) joined_users = yield self.store.get_joined_users_from_state(room_id, entry) defer.returnValue(joined_users) @@ -183,7 +183,7 @@ class StateHandler(object): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_hosts_in_room") - entry = yield self.resolve_state_groups(room_id, latest_event_ids) + entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) joined_hosts = yield self.store.get_joined_hosts(room_id, entry) defer.returnValue(joined_hosts) @@ -241,7 +241,7 @@ class StateHandler(object): defer.returnValue(context) logger.debug("calling resolve_state_groups from compute_event_context") - entry = yield self.resolve_state_groups( + entry = yield self.resolve_state_groups_for_events( event.room_id, [e for e, _ in event.prev_events], ) @@ -284,7 +284,7 @@ class StateHandler(object): @defer.inlineCallbacks @log_function - def resolve_state_groups(self, room_id, event_ids): + def resolve_state_groups_for_events(self, room_id, event_ids): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. From 6da4c4d3bdceb99f20bc208dc4809d3d76ba9a72 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 09:15:45 +0000 Subject: [PATCH 145/171] Factor out resolve_state_groups to a separate handler We extract the storage-independent bits of the state group resolution out to a separate functiom, and stick it in a new handler, in preparation for its use from the storage layer. --- synapse/server.py | 6 +- synapse/server.pyi | 3 + synapse/state.py | 149 ++++++++++++++++++++++++++++---------------- tests/test_state.py | 4 +- 4 files changed, 108 insertions(+), 54 deletions(-) diff --git a/synapse/server.py b/synapse/server.py index ff8a8fbc4..3173aed1d 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -66,7 +66,7 @@ from synapse.rest.media.v1.media_repository import ( MediaRepository, MediaRepositoryResource, ) -from synapse.state import StateHandler +from synapse.state import StateHandler, StateResolutionHandler from synapse.storage import DataStore from synapse.streams.events import EventSources from synapse.util import Clock @@ -102,6 +102,7 @@ class HomeServer(object): 'v1auth', 'auth', 'state_handler', + 'state_resolution_handler', 'presence_handler', 'sync_handler', 'typing_handler', @@ -224,6 +225,9 @@ class HomeServer(object): def build_state_handler(self): return StateHandler(self) + def build_state_resolution_handler(self): + return StateResolutionHandler(self) + def build_presence_handler(self): return PresenceHandler(self) diff --git a/synapse/server.pyi b/synapse/server.pyi index 41416ef25..c3a9a3847 100644 --- a/synapse/server.pyi +++ b/synapse/server.pyi @@ -34,6 +34,9 @@ class HomeServer(object): def get_state_handler(self) -> synapse.state.StateHandler: pass + def get_state_resolution_handler(self) -> synapse.state.StateResolutionHandler: + pass + def get_deactivate_account_handler(self) -> synapse.handlers.deactivate_account.DeactivateAccountHandler: pass diff --git a/synapse/state.py b/synapse/state.py index 8daf90013..5004e0f91 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -81,31 +81,19 @@ class _StateCacheEntry(object): class StateHandler(object): - """ Responsible for doing state conflict resolution. + """Fetches bits of state from the stores, and does state resolution + where necessary """ def __init__(self, hs): self.clock = hs.get_clock() self.store = hs.get_datastore() self.hs = hs - - # dict of set of event_ids -> _StateCacheEntry. - self._state_cache = None - self.resolve_linearizer = Linearizer(name="state_resolve_lock") + self._state_resolution_handler = hs.get_state_resolution_handler() def start_caching(self): - logger.debug("start_caching") - - self._state_cache = ExpiringCache( - cache_name="state_cache", - clock=self.clock, - max_len=SIZE_OF_CACHE, - expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000, - iterable=True, - reset_expiry_on_get=True, - ) - - self._state_cache.start() + # TODO: remove this shim + self._state_resolution_handler.start_caching() @defer.inlineCallbacks def get_current_state(self, room_id, event_type=None, state_key="", @@ -283,7 +271,6 @@ class StateHandler(object): defer.returnValue(context) @defer.inlineCallbacks - @log_function def resolve_state_groups_for_events(self, room_id, event_ids): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. @@ -303,13 +290,7 @@ class StateHandler(object): room_id, event_ids ) - logger.debug( - "resolve_state_groups state_groups %s", - state_groups_ids.keys() - ) - - group_names = frozenset(state_groups_ids.keys()) - if len(group_names) == 1: + if len(state_groups_ids) == 1: name, state_list = state_groups_ids.items().pop() prev_group, delta_ids = yield self.store.get_state_group_delta(name) @@ -321,6 +302,92 @@ class StateHandler(object): delta_ids=delta_ids, )) + result = yield self._state_resolution_handler.resolve_state_groups( + room_id, state_groups_ids, self._state_map_factory, + ) + defer.returnValue(result) + + def _state_map_factory(self, ev_ids): + return self.store.get_events( + ev_ids, get_prev_content=False, check_redacted=False, + ) + + def resolve_events(self, state_sets, event): + logger.info( + "Resolving state for %s with %d groups", event.room_id, len(state_sets) + ) + state_set_ids = [{ + (ev.type, ev.state_key): ev.event_id + for ev in st + } for st in state_sets] + + state_map = { + ev.event_id: ev + for st in state_sets + for ev in st + } + + with Measure(self.clock, "state._resolve_events"): + new_state = resolve_events_with_state_map(state_set_ids, state_map) + + new_state = { + key: state_map[ev_id] for key, ev_id in new_state.items() + } + + return new_state + + +class StateResolutionHandler(object): + """Responsible for doing state conflict resolution. + + Note that the storage layer depends on this handler, so all functions must + be storage-independent. + """ + def __init__(self, hs): + self.clock = hs.get_clock() + + # dict of set of event_ids -> _StateCacheEntry. + self._state_cache = None + self.resolve_linearizer = Linearizer(name="state_resolve_lock") + + def start_caching(self): + logger.debug("start_caching") + + self._state_cache = ExpiringCache( + cache_name="state_cache", + clock=self.clock, + max_len=SIZE_OF_CACHE, + expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000, + iterable=True, + reset_expiry_on_get=True, + ) + + self._state_cache.start() + + @defer.inlineCallbacks + @log_function + def resolve_state_groups(self, room_id, state_groups_ids, state_map_factory): + """Resolves conflicts between a set of state groups + + Always generates a new state group (unless we hit the cache), so should + not be called for a single state group + + Args: + room_id (str): room we are resolving for (used for logging) + state_groups_ids (dict[int, dict[(str, str), str]]): + map from state group id to the state in that state group + (where 'state' is a map from state key to event id) + + Returns: + Deferred[_StateCacheEntry]: resolved state + """ + logger.debug( + "resolve_state_groups state_groups %s", + state_groups_ids.keys() + ) + + group_names = frozenset(state_groups_ids.keys()) + with (yield self.resolve_linearizer.queue(group_names)): if self._state_cache is not None: cache = self._state_cache.get(group_names, None) @@ -351,15 +418,17 @@ class StateHandler(object): with Measure(self.clock, "state._resolve_events"): new_state = yield resolve_events_with_factory( state_groups_ids.values(), - state_map_factory=lambda ev_ids: self.store.get_events( - ev_ids, get_prev_content=False, check_redacted=False, - ), + state_map_factory=state_map_factory, ) else: new_state = { key: e_ids.pop() for key, e_ids in state.items() } + # if the new state matches any of the input state groups, we can + # use that state group again. Otherwise we will generate a state_id + # which will be used as a cache key for future resolutions, but + # not get persisted. state_group = None new_state_event_ids = frozenset(new_state.values()) for sg, events in state_groups_ids.items(): @@ -396,30 +465,6 @@ class StateHandler(object): defer.returnValue(cache) - def resolve_events(self, state_sets, event): - logger.info( - "Resolving state for %s with %d groups", event.room_id, len(state_sets) - ) - state_set_ids = [{ - (ev.type, ev.state_key): ev.event_id - for ev in st - } for st in state_sets] - - state_map = { - ev.event_id: ev - for st in state_sets - for ev in st - } - - with Measure(self.clock, "state._resolve_events"): - new_state = resolve_events_with_state_map(state_set_ids, state_map) - - new_state = { - key: state_map[ev_id] for key, ev_id in new_state.items() - } - - return new_state - def _ordered_events(events): def key_func(e): diff --git a/tests/test_state.py b/tests/test_state.py index feb84f3d4..d16e1b3b8 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -19,7 +19,7 @@ from twisted.internet import defer from synapse.events import FrozenEvent from synapse.api.auth import Auth from synapse.api.constants import EventTypes, Membership -from synapse.state import StateHandler +from synapse.state import StateHandler, StateResolutionHandler from .utils import MockClock @@ -148,11 +148,13 @@ class StateTestCase(unittest.TestCase): ) hs = Mock(spec_set=[ "get_datastore", "get_auth", "get_state_handler", "get_clock", + "get_state_resolution_handler", ]) hs.get_datastore.return_value = self.store hs.get_state_handler.return_value = None hs.get_clock.return_value = MockClock() hs.get_auth.return_value = Auth(hs) + hs.get_state_resolution_handler = lambda: StateResolutionHandler(hs) self.store.get_next_state_group.side_effect = Mock self.store.get_state_group_delta.return_value = (None, None) From a18828c12961ce254660e7641335428ec0655868 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:33:22 +0000 Subject: [PATCH 146/171] Fix docstring for StateHandler.resolve_state_groups The return type was a complete lie, so fix it --- synapse/state.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 1f9abf9d3..2bc7e584c 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -58,7 +58,11 @@ class _StateCacheEntry(object): __slots__ = ["state", "state_group", "state_id", "prev_group", "delta_ids"] def __init__(self, state, state_group, prev_group=None, delta_ids=None): + # dict[(str, str), str] map from (type, state_key) to event_id self.state = frozendict(state) + + # the ID of a state group if one and only one is involved. + # otherwise, None otherwise? self.state_group = state_group self.prev_group = prev_group @@ -280,11 +284,12 @@ class StateHandler(object): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. + Args: + room_id (str): + event_ids (list[str]): + Returns: - a Deferred tuple of (`state_group`, `state`, `prev_state`). - `state_group` is the name of a state group if one and only one is - involved. `state` is a map from (type, state_key) to event, and - `prev_state` is a list of event ids. + Deferred[_StateCacheEntry]: resolved state """ logger.debug("resolve_state_groups event_ids %s", event_ids) From e15d4ea2482827035e31945f514b9d4c67086c37 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:36:45 +0000 Subject: [PATCH 147/171] More docstring fixes Fix a couple of errors in docstrings --- synapse/state.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 2bc7e584c..1071635ac 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -434,8 +434,8 @@ def resolve_events_with_state_map(state_sets, state_map): state_sets. Returns - dict[(str, str), synapse.events.FrozenEvent]: - a map from (type, state_key) to event. + dict[(str, str), str]: + a map from (type, state_key) to event_id. """ if len(state_sets) == 1: return state_sets[0] @@ -497,8 +497,8 @@ def resolve_events_with_factory(state_sets, state_map_factory): a Deferred of dict of event_id to event. Returns - Deferred[dict[(str, str), synapse.events.FrozenEvent]]: - a map from (type, state_key) to event. + Deferred[dict[(str, str), str]]: + a map from (type, state_key) to event_id. """ if len(state_sets) == 1: defer.returnValue(state_sets[0]) From 14737ba495d914daf350a80c9b34dfefb0ae5a86 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 23:55:01 +0000 Subject: [PATCH 148/171] doc arg types for _seperate --- synapse/state.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/synapse/state.py b/synapse/state.py index 1071635ac..30b16e201 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -457,6 +457,21 @@ def _seperate(state_sets): """Takes the state_sets and figures out which keys are conflicted and which aren't. i.e., which have multiple different event_ids associated with them in different state sets. + + Args: + state_sets(list[dict[(str, str), str]]): + List of dicts of (type, state_key) -> event_id, which are the + different state groups to resolve. + + Returns: + (dict[(str, str), str], dict[(str, str), set[str]]): + A tuple of (unconflicted_state, conflicted_state), where: + + unconflicted_state is a dict mapping (type, state_key)->event_id + for unconflicted state keys. + + conflicted_state is a dict mapping (type, state_key) to a set of + event ids for conflicted state keys. """ unconflicted_state = dict(state_sets[0]) conflicted_state = {} From d5352cbba80005fb226563211c9e7179edef2d65 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 2 Feb 2018 00:35:18 +0000 Subject: [PATCH 149/171] Handle url_previews with no content-type avoid failing with an exception if the remote server doesn't give us a Content-Type header. Also, clean up the exception handling a bit. --- synapse/rest/media/v1/preview_url_resource.py | 55 ++++++++++++------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 981f01e41..31fe7aa75 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -12,6 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import cgi +import datetime +import errno +import fnmatch +import itertools +import logging +import os +import re +import shutil +import sys +import traceback +import ujson as json +import urlparse from twisted.web.server import NOT_DONE_YET from twisted.internet import defer @@ -33,18 +46,6 @@ from synapse.http.server import ( from synapse.util.async import ObservableDeferred from synapse.util.stringutils import is_ascii -import os -import re -import fnmatch -import cgi -import ujson as json -import urlparse -import itertools -import datetime -import errno -import shutil - -import logging logger = logging.getLogger(__name__) @@ -286,17 +287,28 @@ class PreviewUrlResource(Resource): url_cache=True, ) - try: - with self.media_storage.store_into_file(file_info) as (f, fname, finish): + with self.media_storage.store_into_file(file_info) as (f, fname, finish): + try: logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) + except Exception as e: # FIXME: pass through 404s and other error messages nicely + logger.warn("Error downloading %s: %r", url, e) + raise SynapseError( + 500, "Failed to download content: %s" % ( + traceback.format_exception_only(sys.exc_type, e), + ), + Codes.UNKNOWN, + ) + yield finish() - yield finish() - - media_type = headers["Content-Type"][0] + try: + if "Content-Type" in headers: + media_type = headers["Content-Type"][0] + else: + media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() content_disposition = headers.get("Content-Disposition", None) @@ -336,10 +348,11 @@ class PreviewUrlResource(Resource): ) except Exception as e: - raise SynapseError( - 500, ("Failed to download content: %s" % e), - Codes.UNKNOWN - ) + logger.error("Error handling downloaded %s: %r", url, e) + # TODO: we really ought to delete the downloaded file in this + # case, since we won't have recorded it in the db, and will + # therefore not expire it. + raise defer.returnValue({ "media_type": media_type, From 4eeae7ad657729eb8c2765da6fb40fc983c740f7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 22:57:33 +0000 Subject: [PATCH 150/171] Move store_event_search_txn to SearchStore ... as a precursor to making event storing and doing the bg update share some code. --- synapse/storage/room.py | 45 +++++++-------------------------------- synapse/storage/search.py | 35 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 37 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9f373b47e..0fcfb7f86 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -16,11 +16,9 @@ from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from ._base import SQLBaseStore -from .engines import PostgresEngine, Sqlite3Engine - import collections import logging import ujson as json @@ -40,7 +38,7 @@ RatelimitOverride = collections.namedtuple( ) -class RoomStore(SQLBaseStore): +class RoomStore(SearchStore): @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -263,8 +261,8 @@ class RoomStore(SQLBaseStore): }, ) - self._store_event_search_txn( - txn, event, "content.topic", event.content["topic"] + self.store_event_search_txn( + txn, event, "content.topic", event.content["topic"], ) def _store_room_name_txn(self, txn, event): @@ -279,14 +277,14 @@ class RoomStore(SQLBaseStore): } ) - self._store_event_search_txn( - txn, event, "content.name", event.content["name"] + self.store_event_search_txn( + txn, event, "content.name", event.content["name"], ) def _store_room_message_txn(self, txn, event): if hasattr(event, "content") and "body" in event.content: - self._store_event_search_txn( - txn, event, "content.body", event.content["body"] + self.store_event_search_txn( + txn, event, "content.body", event.content["body"], ) def _store_history_visibility_txn(self, txn, event): @@ -308,33 +306,6 @@ class RoomStore(SQLBaseStore): event.content[key] )) - def _store_event_search_txn(self, txn, event, key, value): - if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") - sql = ( - "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" - " VALUES (?,?,?,to_tsvector('english', ?),?,?)" - ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) - txn.execute("RESET work_mem") - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - def add_event_report(self, room_id, event_id, user_id, reason, content, received_ts): next_id = self._event_reports_id_gen.get_next() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index f52f3c859..205e8d001 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -246,6 +246,41 @@ class SearchStore(BackgroundUpdateStore): defer.returnValue(num_rows) + def store_event_search_txn(self, txn, event, key, value): + """Add event to the search table + + Args: + txn (cursor): + event (EventBase): + key (str): + value (str): + """ + if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256kB'") + sql = ( + "INSERT INTO event_search" + " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " VALUES (?,?,?,to_tsvector('english', ?),?,?)" + ) + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) + ) + txn.execute("RESET work_mem") + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. From bd25f9cf36ff86d1616853d88cebd2a4a83fa552 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:05:41 +0000 Subject: [PATCH 151/171] Clean up work_mem handling Add some comments and improve exception handling when twiddling work_mem for the search update --- synapse/storage/search.py | 52 ++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 205e8d001..190751bad 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import sys from twisted.internet import defer from .background_updates import BackgroundUpdateStore @@ -256,21 +256,51 @@ class SearchStore(BackgroundUpdateStore): value (str): """ if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " (event_id, room_id, key, vector, stream_ordering, " + " origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, + + # inserts to a GIN index are normally batched up into a pending + # list, and then all committed together once the list gets to a + # certain size. The trouble with that is that postgres (pre-9.5) + # uses work_mem to determine the length of the list, and work_mem + # is typically very large. + # + # We therefore reduce work_mem while we do the insert. + # + # (postgres 9.5 uses the separate gin_pending_list_limit setting, + # so doesn't suffer the same problem, but changing work_mem will + # be harmless) + + txn.execute("SET work_mem='256kB'") + try: + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) ) - ) - txn.execute("RESET work_mem") + except Exception: + # we need to reset work_mem, but doing so may throw a new + # exception and we want to preserve the original + t, v, tb = sys.exc_info() + try: + txn.execute("RESET work_mem") + except Exception as e: + logger.warn( + "exception resetting work_mem during exception " + "handling: %r", + e, + ) + raise t, v, tb + else: + txn.execute("RESET work_mem") + elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" From 80b8a28100e29e34bdc6226513575789310aa41f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:07:13 +0000 Subject: [PATCH 152/171] Factor out common code for search insert we can reuse the same code as is used for event insert, for doing the background index population. --- synapse/storage/search.py | 89 ++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 190751bad..eecf77851 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from collections import namedtuple import sys from twisted.internet import defer @@ -26,6 +27,11 @@ import ujson as json logger = logging.getLogger(__name__) +SearchEntry = namedtuple('SearchEntry', [ + 'key', 'value', 'event_id', 'room_id', 'stream_ordering', + 'origin_server_ts', +]) + class SearchStore(BackgroundUpdateStore): @@ -49,16 +55,17 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): + # we work through the events table from highest stream id to lowest target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) - INSERT_CLUMP_SIZE = 1000 TYPES = ["m.room.name", "m.room.message", "m.room.topic"] def reindex_search_txn(txn): sql = ( - "SELECT stream_ordering, event_id, room_id, type, content FROM events" + "SELECT stream_ordering, event_id, room_id, type, content, " + " origin_server_ts FROM events" " WHERE ? <= stream_ordering AND stream_ordering < ?" " AND (%s)" " ORDER BY stream_ordering DESC" @@ -67,6 +74,10 @@ class SearchStore(BackgroundUpdateStore): txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + # we could stream straight from the results into + # store_search_entries_txn with a generator function, but that + # would mean having two cursors open on the database at once. + # Instead we just build a list of results. rows = self.cursor_to_dict(txn) if not rows: return 0 @@ -79,6 +90,8 @@ class SearchStore(BackgroundUpdateStore): event_id = row["event_id"] room_id = row["room_id"] etype = row["type"] + stream_ordering = row["stream_ordering"] + origin_server_ts = row["origin_server_ts"] try: content = json.loads(row["content"]) except Exception: @@ -93,6 +106,8 @@ class SearchStore(BackgroundUpdateStore): elif etype == "m.room.name": key = "content.name" value = content["name"] + else: + raise Exception("unexpected event type %s" % etype) except (KeyError, AttributeError): # If the event is missing a necessary field then # skip over it. @@ -103,29 +118,16 @@ class SearchStore(BackgroundUpdateStore): # then skip over it continue - event_search_rows.append((event_id, room_id, key, value)) + event_search_rows.append(SearchEntry( + key=key, + value=value, + event_id=event_id, + room_id=room_id, + stream_ordering=stream_ordering, + origin_server_ts=origin_server_ts, + )) - if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") - sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): - clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - - if isinstance(self.database_engine, PostgresEngine): - txn.execute("RESET work_mem") + self.store_search_entries_txn(txn, event_search_rows) progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -255,6 +257,26 @@ class SearchStore(BackgroundUpdateStore): key (str): value (str): """ + self.store_search_entries_txn( + txn, + (SearchEntry( + key=key, + value=value, + event_id=event.event_id, + room_id=event.room_id, + stream_ordering=event.internal_metadata.stream_ordering, + origin_server_ts=event.origin_server_ts, + ),), + ) + + def store_search_entries_txn(self, txn, entries): + """Add entries to the search table + + Args: + txn (cursor): + entries (iterable[SearchEntry]): + entries to be added to the table + """ if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" @@ -262,6 +284,10 @@ class SearchStore(BackgroundUpdateStore): " origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + entry.stream_ordering, entry.origin_server_ts, + ) for entry in entries) # inserts to a GIN index are normally batched up into a pending # list, and then all committed together once the list gets to a @@ -277,14 +303,7 @@ class SearchStore(BackgroundUpdateStore): txn.execute("SET work_mem='256kB'") try: - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) + txn.executemany(sql, args) except Exception: # we need to reset work_mem, but doing so may throw a new # exception and we want to preserve the original @@ -306,7 +325,11 @@ class SearchStore(BackgroundUpdateStore): "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + ) for entry in entries) + + txn.executemany(sql, args) else: # This should be unreachable. raise Exception("Unrecognized database engine") From b5049d2e5cab5780c08c61b9d8ac70c6a9298fb1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Feb 2018 12:06:46 +0000 Subject: [PATCH 153/171] Add .vscode to gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 491047c35..c8901eb20 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,5 @@ static/client/register/register_config.js env/ *.config + +.vscode/ From a1beca0e254104ad87220d276fb56d26e5ca5809 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Feb 2018 12:44:03 +0000 Subject: [PATCH 154/171] Fix broken unit test for media storage --- tests/rest/media/v1/test_media_storage.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index c4de18157..eef38b678 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -21,6 +21,8 @@ from synapse.rest.media.v1.media_storage import MediaStorage from synapse.rest.media.v1.filepath import MediaFilePaths from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend +from mock import Mock + from tests import unittest import os @@ -35,8 +37,11 @@ class MediaStorageTests(unittest.TestCase): self.primary_base_path = os.path.join(self.test_dir, "primary") self.secondary_base_path = os.path.join(self.test_dir, "secondary") + hs = Mock() + hs.config.media_store_path = self.primary_base_path + storage_providers = [FileStorageProviderBackend( - self.primary_base_path, self.secondary_base_path + hs, self.secondary_base_path )] self.filepaths = MediaFilePaths(self.primary_base_path) From bc496df192fa20dee933590d5f21a3425388c0d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Feb 2018 17:57:51 +0000 Subject: [PATCH 155/171] report metrics on number of cache evictions --- synapse/metrics/metric.py | 11 ++++++++++- synapse/util/caches/descriptors.py | 4 ++++ synapse/util/caches/expiringcache.py | 6 +++++- synapse/util/caches/lrucache.py | 28 +++++++++++++++++++++++++--- tests/metrics/test_metric.py | 12 ++++++++++++ 5 files changed, 56 insertions(+), 5 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 1e783e5ff..ff5aa8c0e 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -193,7 +193,9 @@ class DistributionMetric(object): class CacheMetric(object): - __slots__ = ("name", "cache_name", "hits", "misses", "size_callback") + __slots__ = ( + "name", "cache_name", "hits", "misses", "evicted_size", "size_callback", + ) def __init__(self, name, size_callback, cache_name): self.name = name @@ -201,6 +203,7 @@ class CacheMetric(object): self.hits = 0 self.misses = 0 + self.evicted_size = 0 self.size_callback = size_callback @@ -210,6 +213,9 @@ class CacheMetric(object): def inc_misses(self): self.misses += 1 + def inc_evictions(self, size=1): + self.evicted_size += size + def render(self): size = self.size_callback() hits = self.hits @@ -219,6 +225,9 @@ class CacheMetric(object): """%s:hits{name="%s"} %d""" % (self.name, self.cache_name, hits), """%s:total{name="%s"} %d""" % (self.name, self.cache_name, total), """%s:size{name="%s"} %d""" % (self.name, self.cache_name, size), + """%s:evicted_size{name="%s"} %d""" % ( + self.name, self.cache_name, self.evicted_size + ), ] diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index af65bfe7b..bf3a66eae 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -75,6 +75,7 @@ class Cache(object): self.cache = LruCache( max_size=max_entries, keylen=keylen, cache_type=cache_type, size_callback=(lambda d: len(d)) if iterable else None, + evicted_callback=self._on_evicted, ) self.name = name @@ -83,6 +84,9 @@ class Cache(object): self.thread = None self.metrics = register_cache(name, self.cache) + def _on_evicted(self, evicted_count): + self.metrics.inc_evictions(evicted_count) + def check_thread(self): expected_thread = self.thread if expected_thread is None: diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 6ad53a639..0aa103eec 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -79,7 +79,11 @@ class ExpiringCache(object): while self._max_len and len(self) > self._max_len: _key, value = self._cache.popitem(last=False) if self.iterable: - self._size_estimate -= len(value.value) + removed_len = len(value.value) + self.metrics.inc_evictions(removed_len) + self._size_estimate -= removed_len + else: + self.metrics.inc_evictions() def __getitem__(self, key): try: diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index cf5fbb679..f088dd430 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -49,7 +49,24 @@ class LruCache(object): Can also set callbacks on objects when getting/setting which are fired when that key gets invalidated/evicted. """ - def __init__(self, max_size, keylen=1, cache_type=dict, size_callback=None): + def __init__(self, max_size, keylen=1, cache_type=dict, size_callback=None, + evicted_callback=None): + """ + Args: + max_size (int): + + keylen (int): + + cache_type (type): + type of underlying cache to be used. Typically one of dict + or TreeCache. + + size_callback (func(V) -> int | None): + + evicted_callback (func(int)|None): + if not None, called on eviction with the size of the evicted + entry + """ cache = cache_type() self.cache = cache # Used for introspection. list_root = _Node(None, None, None, None) @@ -61,8 +78,10 @@ class LruCache(object): def evict(): while cache_len() > max_size: todelete = list_root.prev_node - delete_node(todelete) + evicted_len = delete_node(todelete) cache.pop(todelete.key, None) + if evicted_callback: + evicted_callback(evicted_len) def synchronized(f): @wraps(f) @@ -111,12 +130,15 @@ class LruCache(object): prev_node.next_node = next_node next_node.prev_node = prev_node + deleted_len = 1 if size_callback: - cached_cache_len[0] -= size_callback(node.value) + deleted_len = size_callback(node.value) + cached_cache_len[0] -= deleted_len for cb in node.callbacks: cb() node.callbacks.clear() + return deleted_len @synchronized def cache_get(key, default=None, callbacks=[]): diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index f85455a5a..39bde6e3f 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -141,6 +141,7 @@ class CacheMetricTestCase(unittest.TestCase): 'cache:hits{name="cache_name"} 0', 'cache:total{name="cache_name"} 0', 'cache:size{name="cache_name"} 0', + 'cache:evicted_size{name="cache_name"} 0', ]) metric.inc_misses() @@ -150,6 +151,7 @@ class CacheMetricTestCase(unittest.TestCase): 'cache:hits{name="cache_name"} 0', 'cache:total{name="cache_name"} 1', 'cache:size{name="cache_name"} 1', + 'cache:evicted_size{name="cache_name"} 0', ]) metric.inc_hits() @@ -158,4 +160,14 @@ class CacheMetricTestCase(unittest.TestCase): 'cache:hits{name="cache_name"} 1', 'cache:total{name="cache_name"} 2', 'cache:size{name="cache_name"} 1', + 'cache:evicted_size{name="cache_name"} 0', + ]) + + metric.inc_evictions(2) + + self.assertEquals(metric.render(), [ + 'cache:hits{name="cache_name"} 1', + 'cache:total{name="cache_name"} 2', + 'cache:size{name="cache_name"} 1', + 'cache:evicted_size{name="cache_name"} 2', ]) From c46e75d3d8311f378f234e3de4719d6fa5d380c9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 22:57:33 +0000 Subject: [PATCH 156/171] Move store_event_search_txn to SearchStore ... as a precursor to making event storing and doing the bg update share some code. --- synapse/storage/room.py | 43 ++++++++------------------------------- synapse/storage/search.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index cf2c4dae3..fff6652e0 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -16,11 +16,9 @@ from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from ._base import SQLBaseStore -from .engines import PostgresEngine, Sqlite3Engine - import collections import logging import ujson as json @@ -40,7 +38,7 @@ RatelimitOverride = collections.namedtuple( ) -class RoomStore(SQLBaseStore): +class RoomStore(SearchStore): @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -263,8 +261,8 @@ class RoomStore(SQLBaseStore): }, ) - self._store_event_search_txn( - txn, event, "content.topic", event.content["topic"] + self.store_event_search_txn( + txn, event, "content.topic", event.content["topic"], ) def _store_room_name_txn(self, txn, event): @@ -279,14 +277,14 @@ class RoomStore(SQLBaseStore): } ) - self._store_event_search_txn( - txn, event, "content.name", event.content["name"] + self.store_event_search_txn( + txn, event, "content.name", event.content["name"], ) def _store_room_message_txn(self, txn, event): if hasattr(event, "content") and "body" in event.content: - self._store_event_search_txn( - txn, event, "content.body", event.content["body"] + self.store_event_search_txn( + txn, event, "content.body", event.content["body"], ) def _store_history_visibility_txn(self, txn, event): @@ -308,31 +306,6 @@ class RoomStore(SQLBaseStore): event.content[key] )) - def _store_event_search_txn(self, txn, event, key, value): - if isinstance(self.database_engine, PostgresEngine): - sql = ( - "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" - " VALUES (?,?,?,to_tsvector('english', ?),?,?)" - ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - def add_event_report(self, room_id, event_id, user_id, reason, content, received_ts): next_id = self._event_reports_id_gen.get_next() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 479b04c63..4f38a587c 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -242,6 +242,39 @@ class SearchStore(BackgroundUpdateStore): defer.returnValue(num_rows) + def store_event_search_txn(self, txn, event, key, value): + """Add event to the search table + + Args: + txn (cursor): + event (EventBase): + key (str): + value (str): + """ + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "INSERT INTO event_search" + " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " VALUES (?,?,?,to_tsvector('english', ?),?,?)" + ) + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. From 5ff3d23564f41e3ae82398a2f0726d8914d060a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 16:51:53 +0000 Subject: [PATCH 157/171] Split event creation into a separate handler --- synapse/handlers/message.py | 368 +++++++++++++++++++----------------- 1 file changed, 191 insertions(+), 177 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 21f1717dd..afa19bf65 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -47,21 +47,9 @@ class MessageHandler(BaseHandler): self.hs = hs self.state = hs.get_state_handler() self.clock = hs.get_clock() - self.validator = EventValidator() - self.profile_handler = hs.get_profile_handler() self.pagination_lock = ReadWriteLock() - self.pusher_pool = hs.get_pusherpool() - - # We arbitrarily limit concurrent event creation for a room to 5. - # This is to stop us from diverging history *too* much. - self.limiter = Limiter(max_count=5) - - self.action_generator = hs.get_action_generator() - - self.spam_checker = hs.get_spam_checker() - @defer.inlineCallbacks def purge_history(self, room_id, event_id): event = yield self.store.get_event(event_id) @@ -182,166 +170,6 @@ class MessageHandler(BaseHandler): defer.returnValue(chunk) - @defer.inlineCallbacks - def create_event(self, requester, event_dict, token_id=None, txn_id=None, - prev_event_ids=None): - """ - Given a dict from a client, create a new event. - - Creates an FrozenEvent object, filling out auth_events, prev_events, - etc. - - Adds display names to Join membership events. - - Args: - requester - event_dict (dict): An entire event - token_id (str) - txn_id (str) - prev_event_ids (list): The prev event ids to use when creating the event - - Returns: - Tuple of created event (FrozenEvent), Context - """ - builder = self.event_builder_factory.new(event_dict) - - with (yield self.limiter.queue(builder.room_id)): - self.validator.validate_new(builder) - - if builder.type == EventTypes.Member: - membership = builder.content.get("membership", None) - target = UserID.from_string(builder.state_key) - - if membership in {Membership.JOIN, Membership.INVITE}: - # If event doesn't include a display name, add one. - profile = self.profile_handler - content = builder.content - - try: - if "displayname" not in content: - content["displayname"] = yield profile.get_displayname(target) - if "avatar_url" not in content: - content["avatar_url"] = yield profile.get_avatar_url(target) - except Exception as e: - logger.info( - "Failed to get profile information for %r: %s", - target, e - ) - - if token_id is not None: - builder.internal_metadata.token_id = token_id - - if txn_id is not None: - builder.internal_metadata.txn_id = txn_id - - event, context = yield self._create_new_client_event( - builder=builder, - requester=requester, - prev_event_ids=prev_event_ids, - ) - - defer.returnValue((event, context)) - - @defer.inlineCallbacks - def send_nonmember_event(self, requester, event, context, ratelimit=True): - """ - Persists and notifies local clients and federation of an event. - - Args: - event (FrozenEvent) the event to send. - context (Context) the context of the event. - ratelimit (bool): Whether to rate limit this send. - is_guest (bool): Whether the sender is a guest. - """ - if event.type == EventTypes.Member: - raise SynapseError( - 500, - "Tried to send member event through non-member codepath" - ) - - # We check here if we are currently being rate limited, so that we - # don't do unnecessary work. We check again just before we actually - # send the event. - yield self.ratelimit(requester, update=False) - - user = UserID.from_string(event.sender) - - assert self.hs.is_mine(user), "User must be our own: %s" % (user,) - - if event.is_state(): - prev_state = yield self.deduplicate_state_event(event, context) - if prev_state is not None: - defer.returnValue(prev_state) - - yield self.handle_new_client_event( - requester=requester, - event=event, - context=context, - ratelimit=ratelimit, - ) - - if event.type == EventTypes.Message: - presence = self.hs.get_presence_handler() - # We don't want to block sending messages on any presence code. This - # matters as sometimes presence code can take a while. - preserve_fn(presence.bump_presence_active_time)(user) - - @defer.inlineCallbacks - def deduplicate_state_event(self, event, context): - """ - Checks whether event is in the latest resolved state in context. - - If so, returns the version of the event in context. - Otherwise, returns None. - """ - prev_event_id = context.prev_state_ids.get((event.type, event.state_key)) - prev_event = yield self.store.get_event(prev_event_id, allow_none=True) - if not prev_event: - return - - if prev_event and event.user_id == prev_event.user_id: - prev_content = encode_canonical_json(prev_event.content) - next_content = encode_canonical_json(event.content) - if prev_content == next_content: - defer.returnValue(prev_event) - return - - @defer.inlineCallbacks - def create_and_send_nonmember_event( - self, - requester, - event_dict, - ratelimit=True, - txn_id=None - ): - """ - Creates an event, then sends it. - - See self.create_event and self.send_nonmember_event. - """ - event, context = yield self.create_event( - requester, - event_dict, - token_id=requester.access_token_id, - txn_id=txn_id - ) - - spam_error = self.spam_checker.check_event_for_spam(event) - if spam_error: - if not isinstance(spam_error, basestring): - spam_error = "Spam is not permitted here" - raise SynapseError( - 403, spam_error, Codes.FORBIDDEN - ) - - yield self.send_nonmember_event( - requester, - event, - context, - ratelimit=ratelimit, - ) - defer.returnValue(event) - @defer.inlineCallbacks def get_room_data(self, user_id=None, room_id=None, event_type=None, state_key="", is_guest=False): @@ -470,6 +298,194 @@ class MessageHandler(BaseHandler): for user_id, profile in users_with_profile.iteritems() }) + +class EventCreationHandler(object): + def __init__(self, hs): + self.hs = hs + self.auth = hs.get_auth() + self.store = hs.get_datastore() + self.state = hs.get_state_handler() + self.clock = hs.get_clock() + self.validator = EventValidator() + self.profile_handler = hs.get_profile_handler() + self.event_builder_factory = hs.get_event_builder_factory() + self.server_name = hs.hostname + self.ratelimiter = hs.get_ratelimiter() + self.notifier = hs.get_notifier() + + # This is only used to get at ratelimit function, and maybe_kick_guest_users + self.base_handler = BaseHandler(hs) + + self.pusher_pool = hs.get_pusherpool() + + # We arbitrarily limit concurrent event creation for a room to 5. + # This is to stop us from diverging history *too* much. + self.limiter = Limiter(max_count=5) + + self.action_generator = hs.get_action_generator() + + self.spam_checker = hs.get_spam_checker() + + @defer.inlineCallbacks + def create_event(self, requester, event_dict, token_id=None, txn_id=None, + prev_event_ids=None): + """ + Given a dict from a client, create a new event. + + Creates an FrozenEvent object, filling out auth_events, prev_events, + etc. + + Adds display names to Join membership events. + + Args: + requester + event_dict (dict): An entire event + token_id (str) + txn_id (str) + prev_event_ids (list): The prev event ids to use when creating the event + + Returns: + Tuple of created event (FrozenEvent), Context + """ + builder = self.event_builder_factory.new(event_dict) + + with (yield self.limiter.queue(builder.room_id)): + self.validator.validate_new(builder) + + if builder.type == EventTypes.Member: + membership = builder.content.get("membership", None) + target = UserID.from_string(builder.state_key) + + if membership in {Membership.JOIN, Membership.INVITE}: + # If event doesn't include a display name, add one. + profile = self.profile_handler + content = builder.content + + try: + if "displayname" not in content: + content["displayname"] = yield profile.get_displayname(target) + if "avatar_url" not in content: + content["avatar_url"] = yield profile.get_avatar_url(target) + except Exception as e: + logger.info( + "Failed to get profile information for %r: %s", + target, e + ) + + if token_id is not None: + builder.internal_metadata.token_id = token_id + + if txn_id is not None: + builder.internal_metadata.txn_id = txn_id + + event, context = yield self._create_new_client_event( + builder=builder, + requester=requester, + prev_event_ids=prev_event_ids, + ) + + defer.returnValue((event, context)) + + @defer.inlineCallbacks + def send_nonmember_event(self, requester, event, context, ratelimit=True): + """ + Persists and notifies local clients and federation of an event. + + Args: + event (FrozenEvent) the event to send. + context (Context) the context of the event. + ratelimit (bool): Whether to rate limit this send. + is_guest (bool): Whether the sender is a guest. + """ + if event.type == EventTypes.Member: + raise SynapseError( + 500, + "Tried to send member event through non-member codepath" + ) + + # We check here if we are currently being rate limited, so that we + # don't do unnecessary work. We check again just before we actually + # send the event. + yield self.base_handler.ratelimit(requester, update=False) + + user = UserID.from_string(event.sender) + + assert self.hs.is_mine(user), "User must be our own: %s" % (user,) + + if event.is_state(): + prev_state = yield self.deduplicate_state_event(event, context) + if prev_state is not None: + defer.returnValue(prev_state) + + yield self.handle_new_client_event( + requester=requester, + event=event, + context=context, + ratelimit=ratelimit, + ) + + if event.type == EventTypes.Message: + presence = self.hs.get_presence_handler() + # We don't want to block sending messages on any presence code. This + # matters as sometimes presence code can take a while. + preserve_fn(presence.bump_presence_active_time)(user) + + @defer.inlineCallbacks + def deduplicate_state_event(self, event, context): + """ + Checks whether event is in the latest resolved state in context. + + If so, returns the version of the event in context. + Otherwise, returns None. + """ + prev_event_id = context.prev_state_ids.get((event.type, event.state_key)) + prev_event = yield self.store.get_event(prev_event_id, allow_none=True) + if not prev_event: + return + + if prev_event and event.user_id == prev_event.user_id: + prev_content = encode_canonical_json(prev_event.content) + next_content = encode_canonical_json(event.content) + if prev_content == next_content: + defer.returnValue(prev_event) + return + + @defer.inlineCallbacks + def create_and_send_nonmember_event( + self, + requester, + event_dict, + ratelimit=True, + txn_id=None + ): + """ + Creates an event, then sends it. + + See self.create_event and self.send_nonmember_event. + """ + event, context = yield self.create_event( + requester, + event_dict, + token_id=requester.access_token_id, + txn_id=txn_id + ) + + spam_error = self.spam_checker.check_event_for_spam(event) + if spam_error: + if not isinstance(spam_error, basestring): + spam_error = "Spam is not permitted here" + raise SynapseError( + 403, spam_error, Codes.FORBIDDEN + ) + + yield self.send_nonmember_event( + requester, + event, + context, + ratelimit=ratelimit, + ) + defer.returnValue(event) + @measure_func("_create_new_client_event") @defer.inlineCallbacks def _create_new_client_event(self, builder, requester=None, prev_event_ids=None): @@ -509,9 +525,7 @@ class MessageHandler(BaseHandler): builder.prev_events = prev_events builder.depth = depth - state_handler = self.state_handler - - context = yield state_handler.compute_event_context(builder) + context = yield self.state.compute_event_context(builder) if requester: context.app_service = requester.app_service @@ -551,7 +565,7 @@ class MessageHandler(BaseHandler): # We now need to go and hit out to wherever we need to hit out to. if ratelimit: - yield self.ratelimit(requester) + yield self.base_handler.ratelimit(requester) try: yield self.auth.check_from_context(event, context) @@ -567,7 +581,7 @@ class MessageHandler(BaseHandler): logger.exception("Failed to encode content: %r", event.content) raise - yield self.maybe_kick_guest_users(event, context) + yield self.base_handler.maybe_kick_guest_users(event, context) if event.type == EventTypes.CanonicalAlias: # Check the alias is acually valid (at this time at least) From 3fa362502cc6c509bac65753954c313d307035e6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 16:52:07 +0000 Subject: [PATCH 158/171] Update places where we create events --- synapse/handlers/directory.py | 7 +++---- synapse/handlers/federation.py | 18 ++++++++---------- synapse/handlers/room.py | 10 ++++------ synapse/handlers/room_member.py | 20 +++++++++++--------- synapse/rest/client/v1/admin.py | 4 ++-- synapse/rest/client/v1/room.py | 16 +++++++++------- synapse/server.py | 5 +++++ 7 files changed, 42 insertions(+), 38 deletions(-) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index a0464ae5c..8580ada60 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -34,6 +34,7 @@ class DirectoryHandler(BaseHandler): self.state = hs.get_state_handler() self.appservice_handler = hs.get_application_service_handler() + self.event_creation_handler = hs.get_event_creation_handler() self.federation = hs.get_replication_layer() self.federation.register_query_handler( @@ -249,8 +250,7 @@ class DirectoryHandler(BaseHandler): def send_room_alias_update_event(self, requester, user_id, room_id): aliases = yield self.store.get_aliases_for_room(room_id) - msg_handler = self.hs.get_handlers().message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Aliases, @@ -272,8 +272,7 @@ class DirectoryHandler(BaseHandler): if not alias_event or alias_event.content.get("alias", "") != alias_str: return - msg_handler = self.hs.get_handlers().message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.CanonicalAlias, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8ee9434c9..e6b9f5cf5 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -75,6 +75,7 @@ class FederationHandler(BaseHandler): self.is_mine_id = hs.is_mine_id self.pusher_pool = hs.get_pusherpool() self.spam_checker = hs.get_spam_checker() + self.event_creation_handler = hs.get_event_creation_handler() self.replication_layer.set_handler(self) @@ -1007,8 +1008,7 @@ class FederationHandler(BaseHandler): }) try: - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder, ) except AuthError as e: @@ -1248,8 +1248,7 @@ class FederationHandler(BaseHandler): "state_key": user_id, }) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder, ) @@ -2120,8 +2119,7 @@ class FederationHandler(BaseHandler): if (yield self.auth.check_host_in_room(room_id, self.hs.hostname)): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder ) @@ -2159,8 +2157,7 @@ class FederationHandler(BaseHandler): """ builder = self.event_builder_factory.new(event_dict) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder, ) @@ -2210,8 +2207,9 @@ class FederationHandler(BaseHandler): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event(builder=builder) + event, context = yield self.event_creation_handler._create_new_client_event( + builder=builder, + ) defer.returnValue((event, context)) @defer.inlineCallbacks diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index d1cc87a01..4ea5bf1bc 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -64,6 +64,7 @@ class RoomCreationHandler(BaseHandler): super(RoomCreationHandler, self).__init__(hs) self.spam_checker = hs.get_spam_checker() + self.event_creation_handler = hs.get_event_creation_handler() @defer.inlineCallbacks def create_room(self, requester, config, ratelimit=True): @@ -163,13 +164,11 @@ class RoomCreationHandler(BaseHandler): creation_content = config.get("creation_content", {}) - msg_handler = self.hs.get_handlers().message_handler room_member_handler = self.hs.get_handlers().room_member_handler yield self._send_events_for_new_room( requester, room_id, - msg_handler, room_member_handler, preset_config=preset_config, invite_list=invite_list, @@ -181,7 +180,7 @@ class RoomCreationHandler(BaseHandler): if "name" in config: name = config["name"] - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Name, @@ -194,7 +193,7 @@ class RoomCreationHandler(BaseHandler): if "topic" in config: topic = config["topic"] - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Topic, @@ -249,7 +248,6 @@ class RoomCreationHandler(BaseHandler): self, creator, # A Requester object. room_id, - msg_handler, room_member_handler, preset_config, invite_list, @@ -272,7 +270,7 @@ class RoomCreationHandler(BaseHandler): @defer.inlineCallbacks def send(etype, content, **kwargs): event = create(etype, content, **kwargs) - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( creator, event, ratelimit=False diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 7e6467cd1..ab58beb0f 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -46,6 +46,7 @@ class RoomMemberHandler(BaseHandler): super(RoomMemberHandler, self).__init__(hs) self.profile_handler = hs.get_profile_handler() + self.event_creation_hander = hs.get_event_creation_handler() self.member_linearizer = Linearizer(name="member") @@ -66,13 +67,12 @@ class RoomMemberHandler(BaseHandler): ): if content is None: content = {} - msg_handler = self.hs.get_handlers().message_handler content["membership"] = membership if requester.is_guest: content["kind"] = "guest" - event, context = yield msg_handler.create_event( + event, context = yield self.event_creation_hander.create_event( requester, { "type": EventTypes.Member, @@ -90,12 +90,14 @@ class RoomMemberHandler(BaseHandler): ) # Check if this event matches the previous membership event for the user. - duplicate = yield msg_handler.deduplicate_state_event(event, context) + duplicate = yield self.event_creation_hander.deduplicate_state_event( + event, context, + ) if duplicate is not None: # Discard the new event since this membership change is a no-op. defer.returnValue(duplicate) - yield msg_handler.handle_new_client_event( + yield self.event_creation_hander.handle_new_client_event( requester, event, context, @@ -394,8 +396,9 @@ class RoomMemberHandler(BaseHandler): else: requester = synapse.types.create_requester(target_user) - message_handler = self.hs.get_handlers().message_handler - prev_event = yield message_handler.deduplicate_state_event(event, context) + prev_event = yield self.event_creation_hander.deduplicate_state_event( + event, context, + ) if prev_event is not None: return @@ -412,7 +415,7 @@ class RoomMemberHandler(BaseHandler): if is_blocked: raise SynapseError(403, "This room has been blocked on this server") - yield message_handler.handle_new_client_event( + yield self.event_creation_hander.handle_new_client_event( requester, event, context, @@ -644,8 +647,7 @@ class RoomMemberHandler(BaseHandler): ) ) - msg_handler = self.hs.get_handlers().message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_hander.create_and_send_nonmember_event( requester, { "type": EventTypes.ThirdPartyInvite, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 0615e5d80..f77f64667 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -171,6 +171,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): self.store = hs.get_datastore() self.handlers = hs.get_handlers() self.state = hs.get_state_handler() + self.event_creation_handler = hs.get_event_creation_handler() @defer.inlineCallbacks def on_POST(self, request, room_id): @@ -203,8 +204,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): ) new_room_id = info["room_id"] - msg_handler = self.handlers.message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( room_creator_requester, { "type": "m.room.message", diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 867ec8602..ad6534537 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -82,6 +82,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomStateEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.event_creation_hander = hs.get_event_creation_handler() def register(self, http_server): # /room/$roomid/state/$eventtype @@ -162,15 +163,16 @@ class RoomStateEventRestServlet(ClientV1RestServlet): content=content, ) else: - msg_handler = self.handlers.message_handler - event, context = yield msg_handler.create_event( + event, context = yield self.event_creation_hander.create_event( requester, event_dict, token_id=requester.access_token_id, txn_id=txn_id, ) - yield msg_handler.send_nonmember_event(requester, event, context) + yield self.event_creation_hander.send_nonmember_event( + requester, event, context, + ) ret = {} if event: @@ -184,6 +186,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomSendEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.event_creation_hander = hs.get_event_creation_handler() def register(self, http_server): # /rooms/$roomid/send/$event_type[/$txn_id] @@ -205,8 +208,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): if 'ts' in request.args and requester.app_service: event_dict['origin_server_ts'] = parse_integer(request, "ts", 0) - msg_handler = self.handlers.message_handler - event = yield msg_handler.create_and_send_nonmember_event( + event = yield self.event_creation_hander.create_and_send_nonmember_event( requester, event_dict, txn_id=txn_id, @@ -670,6 +672,7 @@ class RoomRedactEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomRedactEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.event_creation_handler = hs.get_event_creation_handler() def register(self, http_server): PATTERNS = ("/rooms/(?P[^/]*)/redact/(?P[^/]*)") @@ -680,8 +683,7 @@ class RoomRedactEventRestServlet(ClientV1RestServlet): requester = yield self.auth.get_user_by_req(request) content = parse_json_object_from_request(request) - msg_handler = self.handlers.message_handler - event = yield msg_handler.create_and_send_nonmember_event( + event = yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Redaction, diff --git a/synapse/server.py b/synapse/server.py index 3173aed1d..fbd602d40 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -55,6 +55,7 @@ from synapse.handlers.read_marker import ReadMarkerHandler from synapse.handlers.user_directory import UserDirectoryHandler from synapse.handlers.groups_local import GroupsLocalHandler from synapse.handlers.profile import ProfileHandler +from synapse.handlers.message import EventCreationHandler from synapse.groups.groups_server import GroupsServerHandler from synapse.groups.attestations import GroupAttestionRenewer, GroupAttestationSigning from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory @@ -118,6 +119,7 @@ class HomeServer(object): 'application_service_handler', 'device_message_handler', 'profile_handler', + 'event_creation_handler', 'deactivate_account_handler', 'set_password_handler', 'notifier', @@ -276,6 +278,9 @@ class HomeServer(object): def build_profile_handler(self): return ProfileHandler(self) + def build_event_creation_handler(self): + return EventCreationHandler(self) + def build_deactivate_account_handler(self): return DeactivateAccountHandler(self) From 25c0a020f430de2781041c8104a5cf92321a60a2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 16:52:12 +0000 Subject: [PATCH 159/171] Updates tests --- tests/storage/test_redaction.py | 9 ++++----- tests/storage/test_roommember.py | 5 ++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 6afaca3a6..de6d7904e 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -36,8 +36,7 @@ class RedactionTestCase(unittest.TestCase): self.store = hs.get_datastore() self.event_builder_factory = hs.get_event_builder_factory() - self.handlers = hs.get_handlers() - self.message_handler = self.handlers.message_handler + self.event_creation_handler = hs.get_event_creation_handler() self.u_alice = UserID.from_string("@alice:test") self.u_bob = UserID.from_string("@bob:test") @@ -59,7 +58,7 @@ class RedactionTestCase(unittest.TestCase): "content": content, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) @@ -79,7 +78,7 @@ class RedactionTestCase(unittest.TestCase): "content": {"body": body, "msgtype": u"message"}, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) @@ -98,7 +97,7 @@ class RedactionTestCase(unittest.TestCase): "redacts": event_id, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 1be7d932f..4aff38bd5 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -37,8 +37,7 @@ class RoomMemberStoreTestCase(unittest.TestCase): # storage logic self.store = hs.get_datastore() self.event_builder_factory = hs.get_event_builder_factory() - self.handlers = hs.get_handlers() - self.message_handler = self.handlers.message_handler + self.event_creation_handler = hs.get_event_creation_handler() self.u_alice = UserID.from_string("@alice:test") self.u_bob = UserID.from_string("@bob:test") @@ -58,7 +57,7 @@ class RoomMemberStoreTestCase(unittest.TestCase): "content": {"membership": membership}, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) From 3c7b480ba33c68bfc4e98de57b6874c32011c8f4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:07:13 +0000 Subject: [PATCH 160/171] Factor out common code for search insert we can reuse the same code as is used for event insert, for doing the background index population. --- synapse/storage/search.py | 95 +++++++++++++++++++++++++-------------- 1 file changed, 62 insertions(+), 33 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 4f38a587c..f1ac9ba0f 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -13,19 +13,25 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import namedtuple +import logging +import re +import ujson as json + from twisted.internet import defer from .background_updates import BackgroundUpdateStore from synapse.api.errors import SynapseError from synapse.storage.engines import PostgresEngine, Sqlite3Engine -import logging -import re -import ujson as json - logger = logging.getLogger(__name__) +SearchEntry = namedtuple('SearchEntry', [ + 'key', 'value', 'event_id', 'room_id', 'stream_ordering', + 'origin_server_ts', +]) + class SearchStore(BackgroundUpdateStore): @@ -49,16 +55,17 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): + # we work through the events table from highest stream id to lowest target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) - INSERT_CLUMP_SIZE = 1000 TYPES = ["m.room.name", "m.room.message", "m.room.topic"] def reindex_search_txn(txn): sql = ( - "SELECT stream_ordering, event_id, room_id, type, content FROM events" + "SELECT stream_ordering, event_id, room_id, type, content, " + " origin_server_ts FROM events" " WHERE ? <= stream_ordering AND stream_ordering < ?" " AND (%s)" " ORDER BY stream_ordering DESC" @@ -67,6 +74,10 @@ class SearchStore(BackgroundUpdateStore): txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + # we could stream straight from the results into + # store_search_entries_txn with a generator function, but that + # would mean having two cursors open on the database at once. + # Instead we just build a list of results. rows = self.cursor_to_dict(txn) if not rows: return 0 @@ -79,6 +90,8 @@ class SearchStore(BackgroundUpdateStore): event_id = row["event_id"] room_id = row["room_id"] etype = row["type"] + stream_ordering = row["stream_ordering"] + origin_server_ts = row["origin_server_ts"] try: content = json.loads(row["content"]) except Exception: @@ -93,6 +106,8 @@ class SearchStore(BackgroundUpdateStore): elif etype == "m.room.name": key = "content.name" value = content["name"] + else: + raise Exception("unexpected event type %s" % etype) except (KeyError, AttributeError): # If the event is missing a necessary field then # skip over it. @@ -103,25 +118,16 @@ class SearchStore(BackgroundUpdateStore): # then skip over it continue - event_search_rows.append((event_id, room_id, key, value)) + event_search_rows.append(SearchEntry( + key=key, + value=value, + event_id=event_id, + room_id=room_id, + stream_ordering=stream_ordering, + origin_server_ts=origin_server_ts, + )) - if isinstance(self.database_engine, PostgresEngine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): - clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) + self.store_search_entries_txn(txn, event_search_rows) progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -251,26 +257,49 @@ class SearchStore(BackgroundUpdateStore): key (str): value (str): """ + self.store_search_entries_txn( + txn, + (SearchEntry( + key=key, + value=value, + event_id=event.event_id, + room_id=event.room_id, + stream_ordering=event.internal_metadata.stream_ordering, + origin_server_ts=event.origin_server_ts, + ),), + ) + + def store_search_entries_txn(self, txn, entries): + """Add entries to the search table + + Args: + txn (cursor): + entries (iterable[SearchEntry]): + entries to be added to the table + """ if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) + + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + entry.stream_ordering, entry.origin_server_ts, + ) for entry in entries) + + txn.executemany(sql, args) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + ) for entry in entries) + + txn.executemany(sql, args) else: # This should be unreachable. raise Exception("Unrecognized database engine") From ee6fb4cf8560534a9acc61b075c09dceeca83e85 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 20 Jan 2018 00:23:36 +0000 Subject: [PATCH 161/171] Remove redundant return value from _calculate_state_delta we already have the state from _get_new_state_after_events, so returning it from _calculate_state_delta is just confusing. --- synapse/storage/events.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dd28c2efe..2fead9eb0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -342,8 +342,20 @@ class EventsStore(SQLBaseStore): # NB: Assumes that we are only persisting events for one room # at a time. + + # map room_id->list[event_ids] giving the new forward + # extremities in each room new_forward_extremeties = {} + + # map room_id->(type,state_key)->event_id tracking the full + # state in each room after adding these events current_state_for_room = {} + + # map room_id->(to_delete, to_insert) where each entry is + # a map (type,key)->event_id giving the state delta in each + # room + state_delta_for_room = {} + if not backfilled: with Measure(self._clock, "_calculate_state_and_extrem"): # Work out the new "current state" for each room. @@ -393,11 +405,12 @@ class EventsStore(SQLBaseStore): ev_ctx_rm, new_latest_event_ids, ) if current_state is not None: + current_state_for_room[room_id] = current_state delta = yield self._calculate_state_delta( room_id, current_state, ) if delta is not None: - current_state_for_room[room_id] = delta + state_delta_for_room[room_id] = delta yield self.runInteraction( "persist_events", @@ -405,7 +418,7 @@ class EventsStore(SQLBaseStore): events_and_contexts=chunk, backfilled=backfilled, delete_existing=delete_existing, - current_state_for_room=current_state_for_room, + state_delta_for_room=state_delta_for_room, new_forward_extremeties=new_forward_extremeties, ) persist_event_counter.inc_by(len(chunk)) @@ -422,7 +435,7 @@ class EventsStore(SQLBaseStore): event_counter.inc(event.type, origin_type, origin_entity) - for room_id, (_, _, new_state) in current_state_for_room.iteritems(): + for room_id, new_state in current_state_for_room.iteritems(): self.get_current_state_ids.prefill( (room_id, ), new_state ) @@ -586,10 +599,10 @@ class EventsStore(SQLBaseStore): Assumes that we are only persisting events for one room at a time. Returns: - 3-tuple (to_delete, to_insert, new_state) where both are state dicts, + 2-tuple (to_delete, to_insert) where both are state dicts, i.e. (type, state_key) -> event_id. `to_delete` are the entries to first be deleted from current_state_events, `to_insert` are entries - to insert. `new_state` is the full set of state. + to insert. """ existing_state = yield self.get_current_state_ids(room_id) @@ -610,7 +623,7 @@ class EventsStore(SQLBaseStore): if ev_id in events_to_insert } - defer.returnValue((to_delete, to_insert, current_state)) + defer.returnValue((to_delete, to_insert)) @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, @@ -670,7 +683,7 @@ class EventsStore(SQLBaseStore): @log_function def _persist_events_txn(self, txn, events_and_contexts, backfilled, - delete_existing=False, current_state_for_room={}, + delete_existing=False, state_delta_for_room={}, new_forward_extremeties={}): """Insert some number of room events into the necessary database tables. @@ -686,7 +699,7 @@ class EventsStore(SQLBaseStore): delete_existing (bool): True to purge existing table rows for the events from the database. This is useful when retrying due to IntegrityError. - current_state_for_room (dict[str, (list[str], list[str])]): + state_delta_for_room (dict[str, (list[str], list[str])]): The current-state delta for each room. For each room, a tuple (to_delete, to_insert), being a list of event ids to be removed from the current state, and a list of event ids to be added to @@ -698,7 +711,7 @@ class EventsStore(SQLBaseStore): """ max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering - self._update_current_state_txn(txn, current_state_for_room, max_stream_order) + self._update_current_state_txn(txn, state_delta_for_room, max_stream_order) self._update_forward_extremities_txn( txn, @@ -764,7 +777,7 @@ class EventsStore(SQLBaseStore): def _update_current_state_txn(self, txn, state_delta_by_room, max_stream_order): for room_id, current_state_tuple in state_delta_by_room.iteritems(): - to_delete, to_insert, _ = current_state_tuple + to_delete, to_insert = current_state_tuple txn.executemany( "DELETE FROM current_state_events WHERE event_id = ?", [(ev_id,) for ev_id in to_delete.itervalues()], From 3d33eef6fcbba474664a9bccdcb8822c6f72ee8c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 14:31:24 +0000 Subject: [PATCH 162/171] Store state groups separately from events (#2784) * Split state group persist into seperate storage func * Add per database engine code for state group id gen * Move store_state_group to StateReadStore This allows other workers to use it, and so resolve state. * Hook up store_state_group * Fix tests * Rename _store_mult_state_groups_txn * Rename StateGroupReadStore * Remove redundant _have_persisted_state_group_txn * Update comments * Comment compute_event_context * Set start val for state_group_id_seq ... otherwise we try to recreate old state groups * Update comments * Don't store state for outliers * Update comment * Update docstring as state groups are ints --- synapse/events/snapshot.py | 4 +- synapse/handlers/federation.py | 24 +- synapse/replication/slave/storage/events.py | 4 +- synapse/state.py | 58 ++++- synapse/storage/__init__.py | 1 - synapse/storage/engines/postgres.py | 6 + synapse/storage/engines/sqlite3.py | 19 ++ synapse/storage/events.py | 10 +- .../schema/delta/47/state_group_seq.py | 37 +++ synapse/storage/state.py | 224 +++++++++--------- .../replication/slave/storage/test_events.py | 4 +- tests/test_state.py | 154 ++++++------ 12 files changed, 341 insertions(+), 204 deletions(-) create mode 100644 synapse/storage/schema/delta/47/state_group_seq.py diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index e9a732ff0..87e3fe7b9 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -25,7 +25,9 @@ class EventContext(object): The current state map excluding the current event. (type, state_key) -> event_id - state_group (int): state group id + state_group (int|None): state group id, if the state has been stored + as a state group. This is usually only None if e.g. the event is + an outlier. rejected (bool|str): A rejection reason if the event was rejected, else False diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8ee9434c9..643e813b1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1831,8 +1831,8 @@ class FederationHandler(BaseHandler): current_state = set(e.event_id for e in auth_events.values()) different_auth = event_auth_events - current_state - self._update_context_for_auth_events( - context, auth_events, event_key, + yield self._update_context_for_auth_events( + event, context, auth_events, event_key, ) if different_auth and not event.internal_metadata.is_outlier(): @@ -1913,8 +1913,8 @@ class FederationHandler(BaseHandler): # 4. Look at rejects and their proofs. # TODO. - self._update_context_for_auth_events( - context, auth_events, event_key, + yield self._update_context_for_auth_events( + event, context, auth_events, event_key, ) try: @@ -1923,11 +1923,15 @@ class FederationHandler(BaseHandler): logger.warn("Failed auth resolution for %r because %s", event, e) raise e - def _update_context_for_auth_events(self, context, auth_events, + @defer.inlineCallbacks + def _update_context_for_auth_events(self, event, context, auth_events, event_key): - """Update the state_ids in an event context after auth event resolution + """Update the state_ids in an event context after auth event resolution, + storing the changes as a new state group. Args: + event (Event): The event we're handling the context for + context (synapse.events.snapshot.EventContext): event context to be updated @@ -1950,7 +1954,13 @@ class FederationHandler(BaseHandler): context.prev_state_ids.update({ k: a.event_id for k, a in auth_events.iteritems() }) - context.state_group = self.store.get_next_state_group() + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=context.prev_group, + delta_ids=context.delta_ids, + current_state_ids=context.current_state_ids, + ) @defer.inlineCallbacks def construct_auth_difference(self, local_auth, remote_auth): diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 29d7296b4..8acb5df0f 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -19,7 +19,7 @@ from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore from synapse.storage.roommember import RoomMemberStore -from synapse.storage.state import StateGroupReadStore +from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore @@ -37,7 +37,7 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(StateGroupReadStore, BaseSlavedStore): +class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) diff --git a/synapse/state.py b/synapse/state.py index 273f9911c..cc93bbcb6 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -183,8 +183,15 @@ class StateHandler(object): def compute_event_context(self, event, old_state=None): """Build an EventContext structure for the event. + This works out what the current state should be for the event, and + generates a new state group if necessary. + Args: event (synapse.events.EventBase): + old_state (dict|None): The state at the event if it can't be + calculated from existing events. This is normally only specified + when receiving an event from federation where we don't have the + prev events for, e.g. when backfilling. Returns: synapse.events.snapshot.EventContext: """ @@ -208,15 +215,22 @@ class StateHandler(object): context.current_state_ids = {} context.prev_state_ids = {} context.prev_state_events = [] - context.state_group = self.store.get_next_state_group() + + # We don't store state for outliers, so we don't generate a state + # froup for it. + context.state_group = None + defer.returnValue(context) if old_state: + # We already have the state, so we don't need to calculate it. + # Let's just correctly fill out the context and create a + # new state group for it. + context = EventContext() context.prev_state_ids = { (s.type, s.state_key): s.event_id for s in old_state } - context.state_group = self.store.get_next_state_group() if event.is_state(): key = (event.type, event.state_key) @@ -229,6 +243,14 @@ class StateHandler(object): else: context.current_state_ids = context.prev_state_ids + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=None, + delta_ids=None, + current_state_ids=context.current_state_ids, + ) + context.prev_state_events = [] defer.returnValue(context) @@ -242,7 +264,8 @@ class StateHandler(object): context = EventContext() context.prev_state_ids = curr_state if event.is_state(): - context.state_group = self.store.get_next_state_group() + # If this is a state event then we need to create a new state + # group for the state after this event. key = (event.type, event.state_key) if key in context.prev_state_ids: @@ -253,24 +276,43 @@ class StateHandler(object): context.current_state_ids[key] = event.event_id if entry.state_group: + # If the state at the event has a state group assigned then + # we can use that as the prev group context.prev_group = entry.state_group context.delta_ids = { key: event.event_id } elif entry.prev_group: + # If the state at the event only has a prev group, then we can + # use that as a prev group too. context.prev_group = entry.prev_group context.delta_ids = dict(entry.delta_ids) context.delta_ids[key] = event.event_id - else: - if entry.state_group is None: - entry.state_group = self.store.get_next_state_group() - entry.state_id = entry.state_group - context.state_group = entry.state_group + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=context.prev_group, + delta_ids=context.delta_ids, + current_state_ids=context.current_state_ids, + ) + else: context.current_state_ids = context.prev_state_ids context.prev_group = entry.prev_group context.delta_ids = entry.delta_ids + if entry.state_group is None: + entry.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=entry.prev_group, + delta_ids=entry.delta_ids, + current_state_ids=context.current_state_ids, + ) + entry.state_id = entry.state_group + + context.state_group = entry.state_group + context.prev_state_events = [] defer.returnValue(context) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index d01d46338..f8fbd02ce 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -124,7 +124,6 @@ class DataStore(RoomMemberStore, RoomStore, ) self._transaction_id_gen = IdGenerator(db_conn, "sent_transactions", "id") - self._state_groups_id_gen = IdGenerator(db_conn, "state_groups", "id") self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id") self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id") self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id") diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index a6ae79dfa..8a0386c1a 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -62,3 +62,9 @@ class PostgresEngine(object): def lock_table(self, txn, table): txn.execute("LOCK TABLE %s in EXCLUSIVE MODE" % (table,)) + + def get_next_state_group_id(self, txn): + """Returns an int that can be used as a new state_group ID + """ + txn.execute("SELECT nextval('state_group_id_seq')") + return txn.fetchone()[0] diff --git a/synapse/storage/engines/sqlite3.py b/synapse/storage/engines/sqlite3.py index 755c9a1f0..60f0fa7fb 100644 --- a/synapse/storage/engines/sqlite3.py +++ b/synapse/storage/engines/sqlite3.py @@ -16,6 +16,7 @@ from synapse.storage.prepare_database import prepare_database import struct +import threading class Sqlite3Engine(object): @@ -24,6 +25,11 @@ class Sqlite3Engine(object): def __init__(self, database_module, database_config): self.module = database_module + # The current max state_group, or None if we haven't looked + # in the DB yet. + self._current_state_group_id = None + self._current_state_group_id_lock = threading.Lock() + def check_database(self, txn): pass @@ -43,6 +49,19 @@ class Sqlite3Engine(object): def lock_table(self, txn, table): return + def get_next_state_group_id(self, txn): + """Returns an int that can be used as a new state_group ID + """ + # We do application locking here since if we're using sqlite then + # we are a single process synapse. + with self._current_state_group_id_lock: + if self._current_state_group_id is None: + txn.execute("SELECT COALESCE(max(id), 0) FROM state_groups") + self._current_state_group_id = txn.fetchone()[0] + + self._current_state_group_id += 1 + return self._current_state_group_id + # Following functions taken from: https://github.com/coleifer/peewee diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2fead9eb0..af56f1ee5 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -755,9 +755,8 @@ class EventsStore(SQLBaseStore): events_and_contexts=events_and_contexts, ) - # Insert into the state_groups, state_groups_state, and - # event_to_state_groups tables. - self._store_mult_state_groups_txn(txn, events_and_contexts) + # Insert into event_to_state_groups. + self._store_event_state_mappings_txn(txn, events_and_contexts) # _store_rejected_events_txn filters out any events which were # rejected, and returns the filtered list. @@ -992,10 +991,9 @@ class EventsStore(SQLBaseStore): # an outlier in the database. We now have some state at that # so we need to update the state_groups table with that state. - # insert into the state_group, state_groups_state and - # event_to_state_groups tables. + # insert into event_to_state_groups. try: - self._store_mult_state_groups_txn(txn, ((event, context),)) + self._store_event_state_mappings_txn(txn, ((event, context),)) except Exception: logger.exception("") raise diff --git a/synapse/storage/schema/delta/47/state_group_seq.py b/synapse/storage/schema/delta/47/state_group_seq.py new file mode 100644 index 000000000..f6766501d --- /dev/null +++ b/synapse/storage/schema/delta/47/state_group_seq.py @@ -0,0 +1,37 @@ +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.storage.engines import PostgresEngine + + +def run_create(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + # if we already have some state groups, we want to start making new + # ones with a higher id. + cur.execute("SELECT max(id) FROM state_groups") + row = cur.fetchone() + + if row[0] is None: + start_val = 1 + else: + start_val = row[0] + 1 + + cur.execute( + "CREATE SEQUENCE state_group_id_seq START WITH %s", + (start_val, ), + ) + + +def run_upgrade(*args, **kwargs): + pass diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 360e3e435..adb48df73 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -42,11 +42,8 @@ class _GetStateGroupDelta(namedtuple("_GetStateGroupDelta", ("prev_group", "delt return len(self.delta_ids) if self.delta_ids else 0 -class StateGroupReadStore(SQLBaseStore): - """The read-only parts of StateGroupStore - - None of these functions write to the state tables, so are suitable for - including in the SlavedStores. +class StateGroupWorkerStore(SQLBaseStore): + """The parts of StateGroupStore that can be called from workers. """ STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" @@ -54,7 +51,7 @@ class StateGroupReadStore(SQLBaseStore): CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" def __init__(self, db_conn, hs): - super(StateGroupReadStore, self).__init__(db_conn, hs) + super(StateGroupWorkerStore, self).__init__(db_conn, hs) self._state_group_cache = DictionaryCache( "*stateGroupCache*", 100000 * CACHE_SIZE_FACTOR @@ -549,8 +546,117 @@ class StateGroupReadStore(SQLBaseStore): defer.returnValue(results) + def store_state_group(self, event_id, room_id, prev_group, delta_ids, + current_state_ids): + """Store a new set of state, returning a newly assigned state group. -class StateStore(StateGroupReadStore, BackgroundUpdateStore): + Args: + event_id (str): The event ID for which the state was calculated + room_id (str) + prev_group (int|None): A previous state group for the room, optional. + delta_ids (dict|None): The delta between state at `prev_group` and + `current_state_ids`, if `prev_group` was given. Same format as + `current_state_ids`. + current_state_ids (dict): The state to store. Map of (type, state_key) + to event_id. + + Returns: + Deferred[int]: The state group ID + """ + def _store_state_group_txn(txn): + if current_state_ids is None: + # AFAIK, this can never happen + raise Exception("current_state_ids cannot be None") + + state_group = self.database_engine.get_next_state_group_id(txn) + + self._simple_insert_txn( + txn, + table="state_groups", + values={ + "id": state_group, + "room_id": room_id, + "event_id": event_id, + }, + ) + + # We persist as a delta if we can, while also ensuring the chain + # of deltas isn't tooo long, as otherwise read performance degrades. + if prev_group: + is_in_db = self._simple_select_one_onecol_txn( + txn, + table="state_groups", + keyvalues={"id": prev_group}, + retcol="id", + allow_none=True, + ) + if not is_in_db: + raise Exception( + "Trying to persist state with unpersisted prev_group: %r" + % (prev_group,) + ) + + potential_hops = self._count_state_group_hops_txn( + txn, prev_group + ) + if prev_group and potential_hops < MAX_STATE_DELTA_HOPS: + self._simple_insert_txn( + txn, + table="state_group_edges", + values={ + "state_group": state_group, + "prev_state_group": prev_group, + }, + ) + + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": state_group, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in delta_ids.iteritems() + ], + ) + else: + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": state_group, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in current_state_ids.iteritems() + ], + ) + + # Prefill the state group cache with this group. + # It's fine to use the sequence like this as the state group map + # is immutable. (If the map wasn't immutable then this prefill could + # race with another update) + txn.call_after( + self._state_group_cache.update, + self._state_group_cache.sequence, + key=state_group, + value=dict(current_state_ids), + full=True, + ) + + return state_group + + return self.runInteraction("store_state_group", _store_state_group_txn) + + +class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): """ Keeps track of the state at a given event. This is done by the concept of `state groups`. Every event is a assigned @@ -591,27 +697,12 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): where_clause="type='m.room.member'", ) - def _have_persisted_state_group_txn(self, txn, state_group): - txn.execute( - "SELECT count(*) FROM state_groups WHERE id = ?", - (state_group,) - ) - row = txn.fetchone() - return row and row[0] - - def _store_mult_state_groups_txn(self, txn, events_and_contexts): + def _store_event_state_mappings_txn(self, txn, events_and_contexts): state_groups = {} for event, context in events_and_contexts: if event.internal_metadata.is_outlier(): continue - if context.current_state_ids is None: - # AFAIK, this can never happen - logger.error( - "Non-outlier event %s had current_state_ids==None", - event.event_id) - continue - # if the event was rejected, just give it the same state as its # predecessor. if context.rejected: @@ -620,90 +711,6 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): state_groups[event.event_id] = context.state_group - if self._have_persisted_state_group_txn(txn, context.state_group): - continue - - self._simple_insert_txn( - txn, - table="state_groups", - values={ - "id": context.state_group, - "room_id": event.room_id, - "event_id": event.event_id, - }, - ) - - # We persist as a delta if we can, while also ensuring the chain - # of deltas isn't tooo long, as otherwise read performance degrades. - if context.prev_group: - is_in_db = self._simple_select_one_onecol_txn( - txn, - table="state_groups", - keyvalues={"id": context.prev_group}, - retcol="id", - allow_none=True, - ) - if not is_in_db: - raise Exception( - "Trying to persist state with unpersisted prev_group: %r" - % (context.prev_group,) - ) - - potential_hops = self._count_state_group_hops_txn( - txn, context.prev_group - ) - if context.prev_group and potential_hops < MAX_STATE_DELTA_HOPS: - self._simple_insert_txn( - txn, - table="state_group_edges", - values={ - "state_group": context.state_group, - "prev_state_group": context.prev_group, - }, - ) - - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { - "state_group": context.state_group, - "room_id": event.room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in context.delta_ids.iteritems() - ], - ) - else: - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { - "state_group": context.state_group, - "room_id": event.room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in context.current_state_ids.iteritems() - ], - ) - - # Prefill the state group cache with this group. - # It's fine to use the sequence like this as the state group map - # is immutable. (If the map wasn't immutable then this prefill could - # race with another update) - txn.call_after( - self._state_group_cache.update, - self._state_group_cache.sequence, - key=context.state_group, - value=dict(context.current_state_ids), - full=True, - ) - self._simple_insert_many_txn( txn, table="event_to_state_groups", @@ -763,9 +770,6 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): return count - def get_next_state_group(self): - return self._state_groups_id_gen.get_next() - @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): """This background update will slowly deduplicate state by reencoding diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index 105e1228b..f430cce93 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -226,11 +226,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): context = EventContext() context.current_state_ids = state_ids context.prev_state_ids = state_ids - elif not backfill: + else: state_handler = self.hs.get_state_handler() context = yield state_handler.compute_event_context(event) - else: - context = EventContext() context.push_actions = push_actions diff --git a/tests/test_state.py b/tests/test_state.py index d16e1b3b8..a5c5e5595 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -80,14 +80,14 @@ class StateGroupStore(object): return defer.succeed(groups) - def store_state_groups(self, event, context): - if context.current_state_ids is None: - return + def store_state_group(self, event_id, room_id, prev_group, delta_ids, + current_state_ids): + state_group = self._next_group + self._next_group += 1 - state_events = dict(context.current_state_ids) + self._group_to_state[state_group] = dict(current_state_ids) - self._group_to_state[context.state_group] = state_events - self._event_to_state_group[event.event_id] = context.state_group + return state_group def get_events(self, event_ids, **kwargs): return { @@ -95,10 +95,19 @@ class StateGroupStore(object): if e_id in self._event_id_to_event } + def get_state_group_delta(self, name): + return (None, None) + def register_events(self, events): for e in events: self._event_id_to_event[e.event_id] = e + def register_event_context(self, event, context): + self._event_to_state_group[event.event_id] = context.state_group + + def register_event_id_state_group(self, event_id, state_group): + self._event_to_state_group[event_id] = state_group + class DictObj(dict): def __init__(self, **kwargs): @@ -137,15 +146,7 @@ class Graph(object): class StateTestCase(unittest.TestCase): def setUp(self): - self.store = Mock( - spec_set=[ - "get_state_groups_ids", - "add_event_hashes", - "get_events", - "get_next_state_group", - "get_state_group_delta", - ] - ) + self.store = StateGroupStore() hs = Mock(spec_set=[ "get_datastore", "get_auth", "get_state_handler", "get_clock", "get_state_resolution_handler", @@ -156,9 +157,6 @@ class StateTestCase(unittest.TestCase): hs.get_auth.return_value = Auth(hs) hs.get_state_resolution_handler = lambda: StateResolutionHandler(hs) - self.store.get_next_state_group.side_effect = Mock - self.store.get_state_group_delta.return_value = (None, None) - self.state = StateHandler(hs) self.event_id = 0 @@ -197,14 +195,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertEqual(2, len(context_store["D"].prev_state_ids)) @@ -249,16 +246,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -315,16 +309,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -398,16 +389,13 @@ class StateTestCase(unittest.TestCase): self._add_depths(nodes, edges) graph = Graph(nodes, edges) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -467,7 +455,11 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_trivial_annotate_message(self): - event = create_event(type="test_message", name="event") + prev_event_id = "prev_event_id" + event = create_event( + type="test_message", name="event2", + prev_events=[(prev_event_id, {})], + ) old_state = [ create_event(type="test1", state_key="1"), @@ -475,11 +467,11 @@ class StateTestCase(unittest.TestCase): create_event(type="test2", state_key=""), ] - group_name = "group_name_1" - - self.store.get_state_groups_ids.return_value = { - group_name: {(e.type, e.state_key): e.event_id for e in old_state}, - } + group_name = self.store.store_state_group( + prev_event_id, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state}, + ) + self.store.register_event_id_state_group(prev_event_id, group_name) context = yield self.state.compute_event_context(event) @@ -492,7 +484,11 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_trivial_annotate_state(self): - event = create_event(type="state", state_key="", name="event") + prev_event_id = "prev_event_id" + event = create_event( + type="state", state_key="", name="event2", + prev_events=[(prev_event_id, {})], + ) old_state = [ create_event(type="test1", state_key="1"), @@ -500,11 +496,11 @@ class StateTestCase(unittest.TestCase): create_event(type="test2", state_key=""), ] - group_name = "group_name_1" - - self.store.get_state_groups_ids.return_value = { - group_name: {(e.type, e.state_key): e.event_id for e in old_state}, - } + group_name = self.store.store_state_group( + prev_event_id, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state}, + ) + self.store.register_event_id_state_group(prev_event_id, group_name) context = yield self.state.compute_event_context(event) @@ -517,7 +513,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_resolve_message_conflict(self): - event = create_event(type="test_message", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test_message", name="event3", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) creation = create_event( type=EventTypes.Create, state_key="" @@ -537,12 +538,12 @@ class StateTestCase(unittest.TestCase): create_event(type="test4", state_key=""), ] - store = StateGroupStore() - store.register_events(old_state_1) - store.register_events(old_state_2) - self.store.get_events = store.get_events + self.store.register_events(old_state_1) + self.store.register_events(old_state_2) - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual(len(context.current_state_ids), 6) @@ -550,7 +551,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_resolve_state_conflict(self): - event = create_event(type="test4", state_key="", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test4", state_key="", name="event", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) creation = create_event( type=EventTypes.Create, state_key="" @@ -575,7 +581,9 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_2) self.store.get_events = store.get_events - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual(len(context.current_state_ids), 6) @@ -583,7 +591,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_standard_depth_conflict(self): - event = create_event(type="test4", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test4", name="event", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) member_event = create_event( type=EventTypes.Member, @@ -615,7 +628,9 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_2) self.store.get_events = store.get_events - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual( old_state_2[2].event_id, context.current_state_ids[("test1", "1")] @@ -639,19 +654,26 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_1) store.register_events(old_state_2) - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual( old_state_1[2].event_id, context.current_state_ids[("test1", "1")] ) - def _get_context(self, event, old_state_1, old_state_2): - group_name_1 = "group_name_1" - group_name_2 = "group_name_2" + def _get_context(self, event, prev_event_id_1, old_state_1, prev_event_id_2, + old_state_2): + sg1 = self.store.store_state_group( + prev_event_id_1, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state_1}, + ) + self.store.register_event_id_state_group(prev_event_id_1, sg1) - self.store.get_state_groups_ids.return_value = { - group_name_1: {(e.type, e.state_key): e.event_id for e in old_state_1}, - group_name_2: {(e.type, e.state_key): e.event_id for e in old_state_2}, - } + sg2 = self.store.store_state_group( + prev_event_id_2, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state_2}, + ) + self.store.register_event_id_state_group(prev_event_id_2, sg2) return self.state.compute_event_context(event) From 770b2252ca9b5a74c3e767bfe0c5e7bb2e84ebd3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 16:31:50 +0000 Subject: [PATCH 163/171] s/_create_new_client_event/create_new_client_event/ --- synapse/handlers/federation.py | 10 +++++----- synapse/handlers/message.py | 6 +++--- tests/storage/test_redaction.py | 6 +++--- tests/storage/test_roommember.py | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e6b9f5cf5..06d6c8425 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1008,7 +1008,7 @@ class FederationHandler(BaseHandler): }) try: - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) except AuthError as e: @@ -1248,7 +1248,7 @@ class FederationHandler(BaseHandler): "state_key": user_id, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) @@ -2119,7 +2119,7 @@ class FederationHandler(BaseHandler): if (yield self.auth.check_host_in_room(room_id, self.hs.hostname)): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder ) @@ -2157,7 +2157,7 @@ class FederationHandler(BaseHandler): """ builder = self.event_builder_factory.new(event_dict) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) @@ -2207,7 +2207,7 @@ class FederationHandler(BaseHandler): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) defer.returnValue((event, context)) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index afa19bf65..e8e6a89a3 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -378,7 +378,7 @@ class EventCreationHandler(object): if txn_id is not None: builder.internal_metadata.txn_id = txn_id - event, context = yield self._create_new_client_event( + event, context = yield self.create_new_client_event( builder=builder, requester=requester, prev_event_ids=prev_event_ids, @@ -486,9 +486,9 @@ class EventCreationHandler(object): ) defer.returnValue(event) - @measure_func("_create_new_client_event") + @measure_func("create_new_client_event") @defer.inlineCallbacks - def _create_new_client_event(self, builder, requester=None, prev_event_ids=None): + def create_new_client_event(self, builder, requester=None, prev_event_ids=None): if prev_event_ids: prev_events = yield self.store.add_event_hashes(prev_event_ids) prev_max_depth = yield self.store.get_max_depth_of_events(prev_event_ids) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index de6d7904e..888ddfadd 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -58,7 +58,7 @@ class RedactionTestCase(unittest.TestCase): "content": content, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) @@ -78,7 +78,7 @@ class RedactionTestCase(unittest.TestCase): "content": {"body": body, "msgtype": u"message"}, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) @@ -97,7 +97,7 @@ class RedactionTestCase(unittest.TestCase): "redacts": event_id, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 4aff38bd5..657b279e5 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -57,7 +57,7 @@ class RoomMemberStoreTestCase(unittest.TestCase): "content": {"membership": membership}, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) From 3e1e69ccafbfdf8aa7c0cd06bc4eaf948a6bafdf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 16:40:38 +0000 Subject: [PATCH 164/171] Update copyright --- synapse/handlers/federation.py | 1 + synapse/handlers/message.py | 2 +- synapse/handlers/room.py | 1 + synapse/handlers/room_member.py | 1 + synapse/rest/client/v1/admin.py | 1 + synapse/rest/client/v1/room.py | 1 + 6 files changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 06d6c8425..cba96111d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e8e6a89a3..154072181 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014 - 2016 OpenMarket Ltd -# Copyright 2017 New Vector Ltd +# Copyright 2017 - 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 4ea5bf1bc..6ab020bf4 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014 - 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ab58beb0f..37dc5e99a 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index f77f64667..20c5c6663 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index ad6534537..fbb2fc36e 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From e3624fad5f0dfd3cffcbb7c996a8d29bb2c79dbd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Feb 2018 10:30:25 +0000 Subject: [PATCH 165/171] Remove pointless ratelimit check The intention was for the check to be called as early as possible in the request, but actually was called just before the main ratelimit check, so was fairly pointless. --- synapse/handlers/message.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 154072181..a58fc37ff 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -403,11 +403,6 @@ class EventCreationHandler(object): "Tried to send member event through non-member codepath" ) - # We check here if we are currently being rate limited, so that we - # don't do unnecessary work. We check again just before we actually - # send the event. - yield self.base_handler.ratelimit(requester, update=False) - user = UserID.from_string(event.sender) assert self.hs.is_mine(user), "User must be our own: %s" % (user,) From 5fa571a91b851d372e07217c091b7e3a9ef3d116 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Feb 2018 13:35:08 +0000 Subject: [PATCH 166/171] Tell storage providers about new file so they can upload --- synapse/rest/media/v1/media_storage.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index e8e8b3986..3f8d4b9c2 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -70,6 +70,12 @@ class MediaStorage(object): _write_file_synchronously, source, fname, )) + # Tell the storage providers about the new file. They'll decide + # if they should upload it and whether to do so synchronously + # or not. + for provider in self.storage_providers: + yield provider.store_file(path, file_info) + defer.returnValue(fname) @contextlib.contextmanager From 671540dccf3996620ffe65705904fb911e21fb68 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:27:08 +0000 Subject: [PATCH 167/171] rename delete_old_state -> purge_history (beacause it deletes more than state) --- synapse/handlers/message.py | 2 +- synapse/storage/events.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 21f1717dd..1c7860bb0 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -72,7 +72,7 @@ class MessageHandler(BaseHandler): depth = event.depth with (yield self.pagination_lock.write(room_id)): - yield self.store.delete_old_state(room_id, depth) + yield self.store.purge_history(room_id, depth) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7a9cd3ec9..21533970d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2031,16 +2031,16 @@ class EventsStore(SQLBaseStore): ) return self.runInteraction("get_all_new_events", get_all_new_events_txn) - def delete_old_state(self, room_id, topological_ordering): - return self.runInteraction( - "delete_old_state", - self._delete_old_state_txn, room_id, topological_ordering - ) - - def _delete_old_state_txn(self, txn, room_id, topological_ordering): - """Deletes old room state + def purge_history(self, room_id, topological_ordering): + """Deletes room history before a certain point """ + return self.runInteraction( + "purge_history", + self._purge_history_txn, room_id, topological_ordering + ) + + def _purge_history_txn(self, txn, room_id, topological_ordering): # Tables that should be pruned: # event_auth # event_backward_extremities From 61ffaa8137ac962f84a077bb53c4a1b06b21b49b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:34:35 +0000 Subject: [PATCH 168/171] bump purge logging to info this thing takes ages and the only sign of any progress is the logs, so having some logs is useful. --- synapse/storage/events.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 21533970d..803a4e247 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2081,7 +2081,7 @@ class EventsStore(SQLBaseStore): 400, "topological_ordering is greater than forward extremeties" ) - logger.debug("[purge] looking for events to delete") + logger.info("[purge] looking for events to delete") txn.execute( "SELECT event_id, state_key FROM events" @@ -2102,7 +2102,7 @@ class EventsStore(SQLBaseStore): for event_id, state_key in event_rows: txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) - logger.debug("[purge] Finding new backward extremities") + logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremeties by finding # all events that point to events that are to be purged @@ -2116,7 +2116,7 @@ class EventsStore(SQLBaseStore): ) new_backwards_extrems = txn.fetchall() - logger.debug("[purge] replacing backward extremities: %r", new_backwards_extrems) + logger.info("[purge] replacing backward extremities: %r", new_backwards_extrems) txn.execute( "DELETE FROM event_backward_extremities WHERE room_id = ?", @@ -2132,7 +2132,7 @@ class EventsStore(SQLBaseStore): ] ) - logger.debug("[purge] finding redundant state groups") + logger.info("[purge] finding redundant state groups") # Get all state groups that are only referenced by events that are # to be deleted. @@ -2149,15 +2149,15 @@ class EventsStore(SQLBaseStore): ) state_rows = txn.fetchall() - logger.debug("[purge] found %i redundant state groups", len(state_rows)) + logger.info("[purge] found %i redundant state groups", len(state_rows)) # make a set of the redundant state groups, so that we can look them up # efficiently state_groups_to_delete = set([sg for sg, in state_rows]) # Now we get all the state groups that rely on these state groups - logger.debug("[purge] finding state groups which depend on redundant" - " state groups") + logger.info("[purge] finding state groups which depend on redundant" + " state groups") remaining_state_groups = [] for i in xrange(0, len(state_rows), 100): chunk = [sg for sg, in state_rows[i:i + 100]] @@ -2182,7 +2182,7 @@ class EventsStore(SQLBaseStore): # Now we turn the state groups that reference to-be-deleted state # groups to non delta versions. for sg in remaining_state_groups: - logger.debug("[purge] de-delta-ing remaining state group %s", sg) + logger.info("[purge] de-delta-ing remaining state group %s", sg) curr_state = self._get_state_groups_from_groups_txn( txn, [sg], types=None ) @@ -2219,7 +2219,7 @@ class EventsStore(SQLBaseStore): ], ) - logger.debug("[purge] removing redundant state groups") + logger.info("[purge] removing redundant state groups") txn.executemany( "DELETE FROM state_groups_state WHERE state_group = ?", state_rows @@ -2230,13 +2230,13 @@ class EventsStore(SQLBaseStore): ) # Delete all non-state - logger.debug("[purge] removing events from event_to_state_groups") + logger.info("[purge] removing events from event_to_state_groups") txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", [(event_id,) for event_id, _ in event_rows] ) - logger.debug("[purge] updating room_depth") + logger.info("[purge] updating room_depth") txn.execute( "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", (topological_ordering, room_id,) @@ -2258,7 +2258,8 @@ class EventsStore(SQLBaseStore): "event_signatures", "rejections", ): - logger.debug("[purge] removing remote non-state events from %s", table) + logger.info("[purge] removing remote non-state events from %s", + table) txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), @@ -2266,7 +2267,7 @@ class EventsStore(SQLBaseStore): ) # Mark all state and own events as outliers - logger.debug("[purge] marking remaining events as outliers") + logger.info("[purge] marking remaining events as outliers") txn.executemany( "UPDATE events SET outlier = ?" " WHERE event_id = ?", From e571aef06d3b1af3946e790841f4b8a3a4cfdebf Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:40:29 +0000 Subject: [PATCH 169/171] purge: Move cache invalidation to more appropriate place it was a bit of a non-sequitur there --- synapse/storage/events.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 803a4e247..24d997830 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2099,9 +2099,6 @@ class EventsStore(SQLBaseStore): "[purge] found %i events before cutoff, of which %i are remote" " non-state events to delete", len(event_rows), len(to_delete)) - for event_id, state_key in event_rows: - txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) - logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremeties by finding @@ -2229,12 +2226,15 @@ class EventsStore(SQLBaseStore): state_rows ) - # Delete all non-state logger.info("[purge] removing events from event_to_state_groups") txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", [(event_id,) for event_id, _ in event_rows] ) + for event_id, _ in event_rows: + txn.call_after(self._get_state_group_for_event.invalidate, ( + event_id, + )) logger.info("[purge] updating room_depth") txn.execute( From 74fcbf741b3a7b95b5cc44478050e8a40fb7dc46 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 8 Feb 2018 18:44:52 +0000 Subject: [PATCH 170/171] delete_local_events for purge_history Add a flag which makes the purger delete local events --- docs/admin_api/purge_history_api.rst | 14 +++++++++-- synapse/handlers/message.py | 4 ++-- synapse/http/servlet.py | 18 +++++++++++--- synapse/rest/client/v1/admin.py | 11 ++++++++- synapse/storage/events.py | 35 ++++++++++++++++++++++------ 5 files changed, 67 insertions(+), 15 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index 08b330636..b4e5bd9d7 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -4,8 +4,6 @@ Purge History API The purge history API allows server admins to purge historic events from their database, reclaiming disk space. -**NB!** This will not delete local events (locally sent messages content etc) from the database, but will remove lots of the metadata about them and does dramatically reduce the on disk space usage - Depending on the amount of history being purged a call to the API may take several minutes or longer. During this period users will not be able to paginate further back in the room from the point being purged from. @@ -15,3 +13,15 @@ The API is simply: ``POST /_matrix/client/r0/admin/purge_history//`` including an ``access_token`` of a server admin. + +By default, events sent by local users are not deleted, as they may represent +the only copies of this content in existence. (Events sent by remote users are +deleted, and room state data before the cutoff is always removed). + +To delete local events as well, set ``delete_local_events`` in the body: + +.. code:: json + + { + "delete_local_events": True, + } diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1c7860bb0..276d1a772 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -63,7 +63,7 @@ class MessageHandler(BaseHandler): self.spam_checker = hs.get_spam_checker() @defer.inlineCallbacks - def purge_history(self, room_id, event_id): + def purge_history(self, room_id, event_id, delete_local_events=False): event = yield self.store.get_event(event_id) if event.room_id != room_id: @@ -72,7 +72,7 @@ class MessageHandler(BaseHandler): depth = event.depth with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history(room_id, depth) + yield self.store.purge_history(room_id, depth, delete_local_events) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 71420e54d..ef8e62901 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -148,11 +148,13 @@ def parse_string_from_args(args, name, default=None, required=False, return default -def parse_json_value_from_request(request): +def parse_json_value_from_request(request, allow_empty_body=False): """Parse a JSON value from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into None Returns: The JSON value. @@ -165,6 +167,9 @@ def parse_json_value_from_request(request): except Exception: raise SynapseError(400, "Error reading JSON content.") + if not content_bytes and allow_empty_body: + return None + try: content = simplejson.loads(content_bytes) except Exception as e: @@ -174,17 +179,24 @@ def parse_json_value_from_request(request): return content -def parse_json_object_from_request(request): +def parse_json_object_from_request(request, allow_empty_body=False): """Parse a JSON object from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into an empty dict. Raises: SynapseError if the request body couldn't be decoded as JSON or if it wasn't a JSON object. """ - content = parse_json_value_from_request(request) + content = parse_json_value_from_request( + request, allow_empty_body=allow_empty_body, + ) + + if allow_empty_body and content is None: + return {} if type(content) != dict: message = "Content must be a JSON object." diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 5022808ea..f954d2ea6 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -128,7 +128,16 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): if not is_admin: raise AuthError(403, "You are not a server admin") - yield self.handlers.message_handler.purge_history(room_id, event_id) + body = parse_json_object_from_request(request, allow_empty_body=True) + + delete_local_events = bool( + body.get("delete_local_history", False) + ) + + yield self.handlers.message_handler.purge_history( + room_id, event_id, + delete_local_events=delete_local_events, + ) defer.returnValue((200, {})) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 24d997830..11a2ff2d8 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2031,16 +2031,32 @@ class EventsStore(SQLBaseStore): ) return self.runInteraction("get_all_new_events", get_all_new_events_txn) - def purge_history(self, room_id, topological_ordering): + def purge_history( + self, room_id, topological_ordering, delete_local_events, + ): """Deletes room history before a certain point + + Args: + room_id (str): + + topological_ordering (int): + minimum topo ordering to preserve + + delete_local_events (bool): + if True, we will delete local events as well as remote ones + (instead of just marking them as outliers and deleting their + state groups). """ return self.runInteraction( "purge_history", - self._purge_history_txn, room_id, topological_ordering + self._purge_history_txn, room_id, topological_ordering, + delete_local_events, ) - def _purge_history_txn(self, txn, room_id, topological_ordering): + def _purge_history_txn( + self, txn, room_id, topological_ordering, delete_local_events, + ): # Tables that should be pruned: # event_auth # event_backward_extremities @@ -2093,11 +2109,14 @@ class EventsStore(SQLBaseStore): to_delete = [ (event_id,) for event_id, state_key in event_rows - if state_key is None and not self.hs.is_mine_id(event_id) + if state_key is None and ( + delete_local_events or not self.hs.is_mine_id(event_id) + ) ] logger.info( - "[purge] found %i events before cutoff, of which %i are remote" - " non-state events to delete", len(event_rows), len(to_delete)) + "[purge] found %i events before cutoff, of which %i can be deleted", + len(event_rows), len(to_delete), + ) logger.info("[purge] Finding new backward extremities") @@ -2273,7 +2292,9 @@ class EventsStore(SQLBaseStore): " WHERE event_id = ?", [ (True, event_id,) for event_id, state_key in event_rows - if state_key is not None or self.hs.is_mine_id(event_id) + if state_key is not None or ( + not delete_local_events and self.hs.is_mine_id(event_id) + ) ] ) From 39a6b3549638c70e3aaf51b361576fbd729eb655 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Feb 2018 12:13:34 +0000 Subject: [PATCH 171/171] purge: move room_depth update to end ... to avoid locking the table for too long --- synapse/storage/events.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 11a2ff2d8..238a2006b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2255,12 +2255,6 @@ class EventsStore(SQLBaseStore): event_id, )) - logger.info("[purge] updating room_depth") - txn.execute( - "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", - (topological_ordering, room_id,) - ) - # Delete all remote non-state events for table in ( "events", @@ -2298,6 +2292,18 @@ class EventsStore(SQLBaseStore): ] ) + # synapse tries to take out an exclusive lock on room_depth whenever it + # persists events (because upsert), and once we run this update, we + # will block that for the rest of our transaction. + # + # So, let's stick it at the end so that we don't block event + # persistence. + logger.info("[purge] updating room_depth") + txn.execute( + "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", + (topological_ordering, room_id,) + ) + logger.info("[purge] done") @defer.inlineCallbacks