From 220a733d7379be88514f7681ec37388755d4e612 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 3 Jun 2019 09:56:45 +0100 Subject: [PATCH 1/9] Fix handling of failures when calling /event_auth. When processing an incoming event over federation, we may try and resolve any unexpected differences in auth events. This is a non-essential process and so should not stop the processing of the event if it fails (e.g. due to the remote disappearing or not implementing the necessary endpoints). Fixes #3330 --- synapse/handlers/federation.py | 50 ++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index cf4fad7de..fa735efed 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -35,6 +35,7 @@ from synapse.api.errors import ( CodeMessageException, FederationDeniedError, FederationError, + RequestSendFailed, StoreError, SynapseError, ) @@ -2027,9 +2028,15 @@ class FederationHandler(BaseHandler): """ room_version = yield self.store.get_room_version(event.room_id) - yield self._update_auth_events_and_context_for_auth( - origin, event, context, auth_events - ) + try: + yield self._update_auth_events_and_context_for_auth( + origin, event, context, auth_events + ) + except Exception: + # We don't really mind if the above fails, so lets not fail + # processing if it does. + logger.exception("Failed to call _update_auth_events_and_context_for_auth") + try: self.auth.check(room_version, event, auth_events=auth_events) except AuthError as e: @@ -2042,6 +2049,15 @@ class FederationHandler(BaseHandler): ): """Helper for do_auth. See there for docs. + Checks whether a given event has the expected auth events. If it + doesn't then we talk to the remote server to compare state to see if + we can come to a consensus (e.g. if one server missed some valid + state). + + This attempts to resovle any potential divergence of state between + servers, but is not essential and so failures should not block further + processing of the event. + Args: origin (str): event (synapse.events.EventBase): @@ -2088,9 +2104,14 @@ class FederationHandler(BaseHandler): missing_auth, ) try: - remote_auth_chain = yield self.federation_client.get_event_auth( - origin, event.room_id, event.event_id - ) + try: + remote_auth_chain = yield self.federation_client.get_event_auth( + origin, event.room_id, event.event_id + ) + except RequestSendFailed: + # The other side isn't around or doesn't implement the + # endpoint, so lets just bail out. + return seen_remotes = yield self.store.have_seen_events( [e.event_id for e in remote_auth_chain] @@ -2236,12 +2257,17 @@ class FederationHandler(BaseHandler): try: # 2. Get remote difference. - result = yield self.federation_client.query_auth( - origin, - event.room_id, - event.event_id, - local_auth_chain, - ) + try: + result = yield self.federation_client.query_auth( + origin, + event.room_id, + event.event_id, + local_auth_chain, + ) + except RequestSendFailed: + # The other side isn't around or doesn't implement the + # endpoint, so lets just bail out. + return seen_remotes = yield self.store.have_seen_events( [e.event_id for e in result["auth_chain"]] From fde37e4e98163c269a2b82e4892a70b2e37c619c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 3 Jun 2019 10:22:03 +0100 Subject: [PATCH 2/9] Newsfile --- changelog.d/5317.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5317.bugfix diff --git a/changelog.d/5317.bugfix b/changelog.d/5317.bugfix new file mode 100644 index 000000000..270937521 --- /dev/null +++ b/changelog.d/5317.bugfix @@ -0,0 +1 @@ +Fix handling of failures when processing incoming events where calling `/event_auth` on remote server fails. From bc3d6b918b62c3dd6ce96eba638cf4601126e2f9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 Jun 2019 11:31:27 +0100 Subject: [PATCH 3/9] Add logging when request fails and clarify we ignore errors. --- synapse/handlers/federation.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index fa735efed..ac5ca7914 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2034,8 +2034,14 @@ class FederationHandler(BaseHandler): ) except Exception: # We don't really mind if the above fails, so lets not fail - # processing if it does. - logger.exception("Failed to call _update_auth_events_and_context_for_auth") + # processing if it does. However, it really shouldn't fail so + # let's still log as an exception since we'll still want to fix + # any bugs. + logger.exception( + "Failed to double check auth events for %s with remote. " + "Ignoring failure and continuing processing of event.", + event.event_id, + ) try: self.auth.check(room_version, event, auth_events=auth_events) @@ -2108,9 +2114,10 @@ class FederationHandler(BaseHandler): remote_auth_chain = yield self.federation_client.get_event_auth( origin, event.room_id, event.event_id ) - except RequestSendFailed: + except RequestSendFailed as e: # The other side isn't around or doesn't implement the # endpoint, so lets just bail out. + logger.info("Failed to get event auth from remote: %s", e) return seen_remotes = yield self.store.have_seen_events( @@ -2264,9 +2271,10 @@ class FederationHandler(BaseHandler): event.event_id, local_auth_chain, ) - except RequestSendFailed: + except RequestSendFailed as e: # The other side isn't around or doesn't implement the # endpoint, so lets just bail out. + logger.info("Failed to query auth from remote: %s", e) return seen_remotes = yield self.store.have_seen_events( From 26713515de97c98dda99a9b06325781fe09b1cbe Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 5 Jun 2019 13:16:23 +0100 Subject: [PATCH 4/9] Neilj/mau tracking config explainer (#5284) Improve documentation of monthly active user blocking and mau_trial_days --- changelog.d/5284.misc | 1 + docs/sample_config.yaml | 16 ++++++++++++++++ synapse/config/server.py | 16 ++++++++++++++++ 3 files changed, 33 insertions(+) create mode 100644 changelog.d/5284.misc diff --git a/changelog.d/5284.misc b/changelog.d/5284.misc new file mode 100644 index 000000000..c4d42ca3d --- /dev/null +++ b/changelog.d/5284.misc @@ -0,0 +1 @@ +Improve sample config for monthly active user blocking. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 493ea9ee9..0960b9b5e 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -261,6 +261,22 @@ listeners: # Monthly Active User Blocking # +# Used in cases where the admin or server owner wants to limit to the +# number of monthly active users. +# +# 'limit_usage_by_mau' disables/enables monthly active user blocking. When +# anabled and a limit is reached the server returns a 'ResourceLimitError' +# with error type Codes.RESOURCE_LIMIT_EXCEEDED +# +# 'max_mau_value' is the hard limit of monthly active users above which +# the server will start blocking user actions. +# +# 'mau_trial_days' is a means to add a grace period for active users. It +# means that users must be active for this number of days before they +# can be considered active and guards against the case where lots of users +# sign up in a short space of time never to return after their initial +# session. +# #limit_usage_by_mau: False #max_mau_value: 50 #mau_trial_days: 2 diff --git a/synapse/config/server.py b/synapse/config/server.py index e763e19e1..334921d42 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -585,6 +585,22 @@ class ServerConfig(Config): # Monthly Active User Blocking # + # Used in cases where the admin or server owner wants to limit to the + # number of monthly active users. + # + # 'limit_usage_by_mau' disables/enables monthly active user blocking. When + # anabled and a limit is reached the server returns a 'ResourceLimitError' + # with error type Codes.RESOURCE_LIMIT_EXCEEDED + # + # 'max_mau_value' is the hard limit of monthly active users above which + # the server will start blocking user actions. + # + # 'mau_trial_days' is a means to add a grace period for active users. It + # means that users must be active for this number of days before they + # can be considered active and guards against the case where lots of users + # sign up in a short space of time never to return after their initial + # session. + # #limit_usage_by_mau: False #max_mau_value: 50 #mau_trial_days: 2 From 4650526b5ebf699920ebf9ecfdf13797c189a922 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 5 Jun 2019 13:47:03 +0100 Subject: [PATCH 5/9] Neilj/changelog clean up (#5356) * group together key validity refactors --- changelog.d/5232.misc | 2 +- changelog.d/5234.misc | 2 +- changelog.d/5235.misc | 2 +- changelog.d/5236.misc | 2 +- changelog.d/5237.misc | 2 +- changelog.d/5244.misc | 2 +- changelog.d/5250.misc | 2 +- changelog.d/5296.misc | 2 +- changelog.d/5299.misc | 2 +- changelog.d/5343.misc | 2 +- changelog.d/5347.misc | 3 +-- changelog.d/5356.misc | 1 + 12 files changed, 12 insertions(+), 12 deletions(-) create mode 100644 changelog.d/5356.misc diff --git a/changelog.d/5232.misc b/changelog.d/5232.misc index 1cdc71f09..8336bc55d 100644 --- a/changelog.d/5232.misc +++ b/changelog.d/5232.misc @@ -1 +1 @@ -Run black on synapse.crypto.keyring. +Preparatory work for key-validity features. diff --git a/changelog.d/5234.misc b/changelog.d/5234.misc index 43fbd6f67..8336bc55d 100644 --- a/changelog.d/5234.misc +++ b/changelog.d/5234.misc @@ -1 +1 @@ -Rewrite store_server_verify_key to store several keys at once. +Preparatory work for key-validity features. diff --git a/changelog.d/5235.misc b/changelog.d/5235.misc index 2296ad2a4..8336bc55d 100644 --- a/changelog.d/5235.misc +++ b/changelog.d/5235.misc @@ -1 +1 @@ -Remove unused VerifyKey.expired and .time_added fields. +Preparatory work for key-validity features. diff --git a/changelog.d/5236.misc b/changelog.d/5236.misc index cb4417a9f..8336bc55d 100644 --- a/changelog.d/5236.misc +++ b/changelog.d/5236.misc @@ -1 +1 @@ -Simplify Keyring.process_v2_response. \ No newline at end of file +Preparatory work for key-validity features. diff --git a/changelog.d/5237.misc b/changelog.d/5237.misc index f4fe3b821..8336bc55d 100644 --- a/changelog.d/5237.misc +++ b/changelog.d/5237.misc @@ -1 +1 @@ -Store key validity time in the storage layer. +Preparatory work for key-validity features. diff --git a/changelog.d/5244.misc b/changelog.d/5244.misc index 9cc1fb869..8336bc55d 100644 --- a/changelog.d/5244.misc +++ b/changelog.d/5244.misc @@ -1 +1 @@ -Refactor synapse.crypto.keyring to use a KeyFetcher interface. +Preparatory work for key-validity features. diff --git a/changelog.d/5250.misc b/changelog.d/5250.misc index 575a299a8..8336bc55d 100644 --- a/changelog.d/5250.misc +++ b/changelog.d/5250.misc @@ -1 +1 @@ -Simplification to Keyring.wait_for_previous_lookups. +Preparatory work for key-validity features. diff --git a/changelog.d/5296.misc b/changelog.d/5296.misc index a038a6f7f..8336bc55d 100644 --- a/changelog.d/5296.misc +++ b/changelog.d/5296.misc @@ -1 +1 @@ -Refactor keyring.VerifyKeyRequest to use attr.s. +Preparatory work for key-validity features. diff --git a/changelog.d/5299.misc b/changelog.d/5299.misc index 53297c768..8336bc55d 100644 --- a/changelog.d/5299.misc +++ b/changelog.d/5299.misc @@ -1 +1 @@ -Rewrite get_server_verify_keys, again. +Preparatory work for key-validity features. diff --git a/changelog.d/5343.misc b/changelog.d/5343.misc index dbee0f71b..8336bc55d 100644 --- a/changelog.d/5343.misc +++ b/changelog.d/5343.misc @@ -1 +1 @@ -Rename VerifyKeyRequest.deferred field. +Preparatory work for key-validity features. diff --git a/changelog.d/5347.misc b/changelog.d/5347.misc index 436245fb1..8336bc55d 100644 --- a/changelog.d/5347.misc +++ b/changelog.d/5347.misc @@ -1,2 +1 @@ -Various improvements to debug logging. - +Preparatory work for key-validity features. diff --git a/changelog.d/5356.misc b/changelog.d/5356.misc new file mode 100644 index 000000000..8336bc55d --- /dev/null +++ b/changelog.d/5356.misc @@ -0,0 +1 @@ +Preparatory work for key-validity features. From 95ab2eb4a1e9757bfe881abacce6ff81b3dbc371 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 5 Jun 2019 15:12:33 +0100 Subject: [PATCH 6/9] Fix notes about well-known and acme (#5357) fixes #4951 --- changelog.d/5357.doc | 1 + docs/MSC1711_certificates_FAQ.md | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) create mode 100644 changelog.d/5357.doc diff --git a/changelog.d/5357.doc b/changelog.d/5357.doc new file mode 100644 index 000000000..27cba4964 --- /dev/null +++ b/changelog.d/5357.doc @@ -0,0 +1 @@ +Fix notes about ACME in the MSC1711 faq. diff --git a/docs/MSC1711_certificates_FAQ.md b/docs/MSC1711_certificates_FAQ.md index ebfb20f5c..37f7f669c 100644 --- a/docs/MSC1711_certificates_FAQ.md +++ b/docs/MSC1711_certificates_FAQ.md @@ -145,12 +145,11 @@ You can do this with a `.well-known` file as follows: 1. Keep the SRV record in place - it is needed for backwards compatibility with Synapse 0.34 and earlier. - 2. Give synapse a certificate corresponding to the target domain - (`customer.example.net` in the above example). Currently Synapse's ACME - support [does not support - this](https://github.com/matrix-org/synapse/issues/4552), so you will have - to acquire a certificate yourself and give it to Synapse via - `tls_certificate_path` and `tls_private_key_path`. + 2. Give Synapse a certificate corresponding to the target domain + (`customer.example.net` in the above example). You can either use Synapse's + built-in [ACME support](./ACME.md) for this (via the `domain` parameter in + the `acme` section), or acquire a certificate yourself and give it to + Synapse via `tls_certificate_path` and `tls_private_key_path`. 3. Restart Synapse to ensure the new certificate is loaded. From 75538813fcd0403ec8915484a813b99e6eb256c6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 Jun 2019 15:45:46 +0100 Subject: [PATCH 7/9] Fix background updates to handle redactions/rejections (#5352) * Fix background updates to handle redactions/rejections In background updates based on current state delta stream we need to handle that we may not have all the events (or at least that `get_events` may raise an exception). --- changelog.d/5352.bugfix | 1 + synapse/handlers/presence.py | 11 +++--- synapse/handlers/stats.py | 18 +++++++--- synapse/storage/events_worker.py | 37 +++++++++++++++++++ tests/handlers/test_stats.py | 62 ++++++++++++++++++++++++++++++-- 5 files changed, 117 insertions(+), 12 deletions(-) create mode 100644 changelog.d/5352.bugfix diff --git a/changelog.d/5352.bugfix b/changelog.d/5352.bugfix new file mode 100644 index 000000000..2ffefe5a6 --- /dev/null +++ b/changelog.d/5352.bugfix @@ -0,0 +1 @@ +Fix room stats and presence background updates to correctly handle missing events. diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 6209858bb..e49c8203e 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -828,14 +828,17 @@ class PresenceHandler(object): # joins. continue - event = yield self.store.get_event(event_id) - if event.content.get("membership") != Membership.JOIN: + event = yield self.store.get_event(event_id, allow_none=True) + if not event or event.content.get("membership") != Membership.JOIN: # We only care about joins continue if prev_event_id: - prev_event = yield self.store.get_event(prev_event_id) - if prev_event.content.get("membership") == Membership.JOIN: + prev_event = yield self.store.get_event(prev_event_id, allow_none=True) + if ( + prev_event + and prev_event.content.get("membership") == Membership.JOIN + ): # Ignore changes to join events. continue diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 0e92b405b..7ad16c856 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -115,6 +115,7 @@ class StatsHandler(StateDeltasHandler): event_id = delta["event_id"] stream_id = delta["stream_id"] prev_event_id = delta["prev_event_id"] + stream_pos = delta["stream_id"] logger.debug("Handling: %r %r, %s", typ, state_key, event_id) @@ -136,10 +137,15 @@ class StatsHandler(StateDeltasHandler): event_content = {} if event_id is not None: - event_content = (yield self.store.get_event(event_id)).content or {} + event = yield self.store.get_event(event_id, allow_none=True) + if event: + event_content = event.content or {} + + # We use stream_pos here rather than fetch by event_id as event_id + # may be None + now = yield self.store.get_received_ts_by_stream_pos(stream_pos) # quantise time to the nearest bucket - now = yield self.store.get_received_ts(event_id) now = (now // 1000 // self.stats_bucket_size) * self.stats_bucket_size if typ == EventTypes.Member: @@ -149,9 +155,11 @@ class StatsHandler(StateDeltasHandler): # compare them. prev_event_content = {} if prev_event_id is not None: - prev_event_content = ( - yield self.store.get_event(prev_event_id) - ).content + prev_event = yield self.store.get_event( + prev_event_id, allow_none=True, + ) + if prev_event: + prev_event_content = prev_event.content membership = event_content.get("membership", Membership.LEAVE) prev_membership = prev_event_content.get("membership", Membership.LEAVE) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 178242804..cc7df5cf1 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -78,6 +78,43 @@ class EventsWorkerStore(SQLBaseStore): desc="get_received_ts", ) + def get_received_ts_by_stream_pos(self, stream_ordering): + """Given a stream ordering get an approximate timestamp of when it + happened. + + This is done by simply taking the received ts of the first event that + has a stream ordering greater than or equal to the given stream pos. + If none exists returns the current time, on the assumption that it must + have happened recently. + + Args: + stream_ordering (int) + + Returns: + Deferred[int] + """ + + def _get_approximate_received_ts_txn(txn): + sql = """ + SELECT received_ts FROM events + WHERE stream_ordering >= ? + LIMIT 1 + """ + + txn.execute(sql, (stream_ordering,)) + row = txn.fetchone() + if row and row[0]: + ts = row[0] + else: + ts = self.clock.time_msec() + + return ts + + return self.runInteraction( + "get_approximate_received_ts", + _get_approximate_received_ts_txn, + ) + @defer.inlineCallbacks def get_event( self, diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py index 249aba3d5..2710c991c 100644 --- a/tests/handlers/test_stats.py +++ b/tests/handlers/test_stats.py @@ -204,7 +204,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): "a2": {"membership": "not a real thing"}, } - def get_event(event_id): + def get_event(event_id, allow_none=True): m = Mock() m.content = events[event_id] d = defer.Deferred() @@ -224,7 +224,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): "room_id": "room", "event_id": "a1", "prev_event_id": "a2", - "stream_id": "bleb", + "stream_id": 60, } ] @@ -241,7 +241,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): "room_id": "room", "event_id": "a2", "prev_event_id": "a1", - "stream_id": "bleb", + "stream_id": 100, } ] @@ -249,3 +249,59 @@ class StatsRoomTests(unittest.HomeserverTestCase): self.assertEqual( f.value.args[0], "'not a real thing' is not a valid membership" ) + + def test_redacted_prev_event(self): + """ + If the prev_event does not exist, then it is assumed to be a LEAVE. + """ + u1 = self.register_user("u1", "pass") + u1_token = self.login("u1", "pass") + + room_1 = self.helper.create_room_as(u1, tok=u1_token) + + # Do the initial population of the user directory via the background update + self._add_background_updates() + + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + events = { + "a1": None, + "a2": {"membership": Membership.JOIN}, + } + + def get_event(event_id, allow_none=True): + if events.get(event_id): + m = Mock() + m.content = events[event_id] + else: + m = None + d = defer.Deferred() + self.reactor.callLater(0.0, d.callback, m) + return d + + def get_received_ts(event_id): + return defer.succeed(1) + + self.store.get_received_ts = get_received_ts + self.store.get_event = get_event + + deltas = [ + { + "type": EventTypes.Member, + "state_key": "some_user:test", + "room_id": room_1, + "event_id": "a2", + "prev_event_id": "a1", + "stream_id": 100, + } + ] + + # Handle our fake deltas, which has a user going from LEAVE -> JOIN. + self.get_success(self.handler._handle_deltas(deltas)) + + # One delta, with two joined members -- the room creator, and our fake + # user. + r = self.get_success(self.store.get_deltas_for_room(room_1, 0)) + self.assertEqual(len(r), 1) + self.assertEqual(r[0]["joined_members"], 2) From 94f6c674df8035d44e7219193377f77afdfa6669 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 5 Jun 2019 16:11:31 +0100 Subject: [PATCH 8/9] Neilj/add r0.5 to versions (#5360) * Update _matrix/client/versions to reference support for r0.5.0 --- changelog.d/5360.feature | 1 + synapse/rest/client/versions.py | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/5360.feature diff --git a/changelog.d/5360.feature b/changelog.d/5360.feature new file mode 100644 index 000000000..01fbb3b06 --- /dev/null +++ b/changelog.d/5360.feature @@ -0,0 +1 @@ +Update /_matrix/client/versions to reference support for r0.5.0. diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 27e7cbf3c..babbf6a23 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -39,6 +39,7 @@ class VersionsRestServlet(RestServlet): "r0.2.0", "r0.3.0", "r0.4.0", + "r0.5.0", ], # as per MSC1497: "unstable_features": { From f8a45302c9ce147d7797ceb9e3757bd3b2af6b99 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 5 Jun 2019 16:16:33 +0100 Subject: [PATCH 9/9] Fix `federation_custom_ca_list` configuration option. Previously, setting this option would cause an exception at startup. --- changelog.d/5362.bugfix | 1 + synapse/config/tls.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5362.bugfix diff --git a/changelog.d/5362.bugfix b/changelog.d/5362.bugfix new file mode 100644 index 000000000..1c8b19182 --- /dev/null +++ b/changelog.d/5362.bugfix @@ -0,0 +1 @@ +Fix `federation_custom_ca_list` configuration option. diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 72dd5926f..94a53d05f 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -107,7 +107,7 @@ class TlsConfig(Config): certs = [] for ca_file in custom_ca_list: logger.debug("Reading custom CA certificate file: %s", ca_file) - content = self.read_file(ca_file) + content = self.read_file(ca_file, "federation_custom_ca_list") # Parse the CA certificates try: