diff --git a/changelog.d/3826.misc b/changelog.d/3826.misc new file mode 100644 index 000000000..a4d9a012f --- /dev/null +++ b/changelog.d/3826.misc @@ -0,0 +1 @@ +add some logging for the keyring queue diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 9d497abf1..d89f94c21 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -41,6 +41,7 @@ from synapse.api.errors import Codes, SynapseError from synapse.crypto.keyclient import fetch_server_key from synapse.util import logcontext, unwrapFirstError from synapse.util.logcontext import ( + LoggingContext, PreserveLoggingContext, preserve_fn, run_in_background, @@ -217,23 +218,34 @@ class Keyring(object): servers have completed. Follows the synapse rules of logcontext preservation. """ + loop_count = 1 while True: wait_on = [ - self.key_downloads[server_name] + (server_name, self.key_downloads[server_name]) for server_name in server_names if server_name in self.key_downloads ] - if wait_on: - with PreserveLoggingContext(): - yield defer.DeferredList(wait_on) - else: + if not wait_on: break + logger.info( + "Waiting for existing lookups for %s to complete [loop %i]", + [w[0] for w in wait_on], loop_count, + ) + with PreserveLoggingContext(): + yield defer.DeferredList((w[1] for w in wait_on)) + + loop_count += 1 + + ctx = LoggingContext.current_context() def rm(r, server_name_): - self.key_downloads.pop(server_name_, None) + with PreserveLoggingContext(ctx): + logger.debug("Releasing key lookup lock on %s", server_name_) + self.key_downloads.pop(server_name_, None) return r for server_name, deferred in server_to_deferred.items(): + logger.debug("Got key lookup lock on %s", server_name) self.key_downloads[server_name] = deferred deferred.addBoth(rm, server_name) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 5be8e66fb..61782ae1c 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -143,11 +143,31 @@ class FederationBase(object): def callback(_, pdu): with logcontext.PreserveLoggingContext(ctx): if not check_event_content_hash(pdu): - logger.warn( - "Event content has been tampered, redacting %s: %s", - pdu.event_id, pdu.get_pdu_json() - ) - return prune_event(pdu) + # let's try to distinguish between failures because the event was + # redacted (which are somewhat expected) vs actual ball-tampering + # incidents. + # + # This is just a heuristic, so we just assume that if the keys are + # about the same between the redacted and received events, then the + # received event was probably a redacted copy (but we then use our + # *actual* redacted copy to be on the safe side.) + redacted_event = prune_event(pdu) + if ( + set(six.iterkeys(redacted_event)) == set(six.iterkeys(pdu)) and + set(six.iterkeys(redacted_event.content)) + == set(six.iterkeys(pdu.content)) + ): + logger.info( + "Event %s seems to have been redacted; using our redacted " + "copy", + pdu.event_id, + ) + else: + logger.warning( + "Event %s content has been tampered, redacting", + pdu.event_id, pdu.get_pdu_json(), + ) + return redacted_event if self.spam_checker.check_event_for_spam(pdu): logger.warn( @@ -162,8 +182,8 @@ class FederationBase(object): failure.trap(SynapseError) with logcontext.PreserveLoggingContext(ctx): logger.warn( - "Signature check failed for %s", - pdu.event_id, + "Signature check failed for %s: %s", + pdu.event_id, failure.getErrorMessage(), ) return failure diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py index f54797760..a1331c1a6 100644 --- a/synapse/storage/keys.py +++ b/synapse/storage/keys.py @@ -134,6 +134,7 @@ class KeyStore(SQLBaseStore): """ key_id = "%s:%s" % (verify_key.alg, verify_key.version) + # XXX fix this to not need a lock (#3819) def _txn(txn): self._simple_upsert_txn( txn,