From 806964b5de3e5036a897d4b5d477e7616779d019 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 6 Sep 2018 18:51:06 +0100 Subject: [PATCH 1/3] add some logging for the keyring queue why is it so damn slow? --- synapse/crypto/keyring.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 30e274210..a6b31e8b5 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -40,6 +40,7 @@ from synapse.api.errors import Codes, SynapseError from synapse.crypto.keyclient import fetch_server_key from synapse.util import logcontext, unwrapFirstError from synapse.util.logcontext import ( + LoggingContext, PreserveLoggingContext, preserve_fn, run_in_background, @@ -216,23 +217,34 @@ class Keyring(object): servers have completed. Follows the synapse rules of logcontext preservation. """ + loop_count = 1 while True: wait_on = [ - self.key_downloads[server_name] + (server_name, self.key_downloads[server_name]) for server_name in server_names if server_name in self.key_downloads ] - if wait_on: - with PreserveLoggingContext(): - yield defer.DeferredList(wait_on) - else: + if not wait_on: break + logger.info( + "Waiting for existing lookups for %s to complete [loop %i]", + [w[0] for w in wait_on], loop_count, + ) + with PreserveLoggingContext(): + yield defer.DeferredList((w[1] for w in wait_on)) + + loop_count += 1 + + ctx = LoggingContext.current_context() def rm(r, server_name_): - self.key_downloads.pop(server_name_, None) + with PreserveLoggingContext(ctx): + logger.debug("Releasing key lookup lock on %s", server_name_) + self.key_downloads.pop(server_name_, None) return r for server_name, deferred in server_to_deferred.items(): + logger.debug("Got key lookup lock on %s", server_name) self.key_downloads[server_name] = deferred deferred.addBoth(rm, server_name) From cd7ef4387205eaeb64b77860a82f782f91e41e93 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 6 Sep 2018 23:56:47 +0100 Subject: [PATCH 2/3] clearer logging when things fail, too --- synapse/federation/federation_base.py | 34 +++++++++++++++++++++------ synapse/storage/keys.py | 1 + 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 5be8e66fb..61782ae1c 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -143,11 +143,31 @@ class FederationBase(object): def callback(_, pdu): with logcontext.PreserveLoggingContext(ctx): if not check_event_content_hash(pdu): - logger.warn( - "Event content has been tampered, redacting %s: %s", - pdu.event_id, pdu.get_pdu_json() - ) - return prune_event(pdu) + # let's try to distinguish between failures because the event was + # redacted (which are somewhat expected) vs actual ball-tampering + # incidents. + # + # This is just a heuristic, so we just assume that if the keys are + # about the same between the redacted and received events, then the + # received event was probably a redacted copy (but we then use our + # *actual* redacted copy to be on the safe side.) + redacted_event = prune_event(pdu) + if ( + set(six.iterkeys(redacted_event)) == set(six.iterkeys(pdu)) and + set(six.iterkeys(redacted_event.content)) + == set(six.iterkeys(pdu.content)) + ): + logger.info( + "Event %s seems to have been redacted; using our redacted " + "copy", + pdu.event_id, + ) + else: + logger.warning( + "Event %s content has been tampered, redacting", + pdu.event_id, pdu.get_pdu_json(), + ) + return redacted_event if self.spam_checker.check_event_for_spam(pdu): logger.warn( @@ -162,8 +182,8 @@ class FederationBase(object): failure.trap(SynapseError) with logcontext.PreserveLoggingContext(ctx): logger.warn( - "Signature check failed for %s", - pdu.event_id, + "Signature check failed for %s: %s", + pdu.event_id, failure.getErrorMessage(), ) return failure diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index f54797760..a1331c1a6 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -134,6 +134,7 @@ class KeyStore(SQLBaseStore): """ key_id = "%s:%s" % (verify_key.alg, verify_key.version) + # XXX fix this to not need a lock (#3819) def _txn(txn): self._simple_upsert_txn( txn, From 1e4c7fff5f30d195071e83190bb480f18cd94142 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 7 Sep 2018 16:37:30 +0100 Subject: [PATCH 3/3] changelog --- changelog.d/3826.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3826.misc diff --git a/changelog.d/3826.misc b/changelog.d/3826.misc new file mode 100644 index 000000000..a4d9a012f --- /dev/null +++ b/changelog.d/3826.misc @@ -0,0 +1 @@ +add some logging for the keyring queue