Merge pull request #3826 from matrix-org/rav/logging_for_keyring

add some logging for the keyring queue
This commit is contained in:
Amber Brown 2018-09-12 20:43:47 +10:00 committed by GitHub
commit 33716c4aea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 47 additions and 13 deletions

1
changelog.d/3826.misc Normal file
View File

@ -0,0 +1 @@
add some logging for the keyring queue

View File

@ -41,6 +41,7 @@ from synapse.api.errors import Codes, SynapseError
from synapse.crypto.keyclient import fetch_server_key
from synapse.util import logcontext, unwrapFirstError
from synapse.util.logcontext import (
LoggingContext,
PreserveLoggingContext,
preserve_fn,
run_in_background,
@ -217,23 +218,34 @@ class Keyring(object):
servers have completed. Follows the synapse rules of logcontext
preservation.
"""
loop_count = 1
while True:
wait_on = [
self.key_downloads[server_name]
(server_name, self.key_downloads[server_name])
for server_name in server_names
if server_name in self.key_downloads
]
if wait_on:
with PreserveLoggingContext():
yield defer.DeferredList(wait_on)
else:
if not wait_on:
break
logger.info(
"Waiting for existing lookups for %s to complete [loop %i]",
[w[0] for w in wait_on], loop_count,
)
with PreserveLoggingContext():
yield defer.DeferredList((w[1] for w in wait_on))
loop_count += 1
ctx = LoggingContext.current_context()
def rm(r, server_name_):
with PreserveLoggingContext(ctx):
logger.debug("Releasing key lookup lock on %s", server_name_)
self.key_downloads.pop(server_name_, None)
return r
for server_name, deferred in server_to_deferred.items():
logger.debug("Got key lookup lock on %s", server_name)
self.key_downloads[server_name] = deferred
deferred.addBoth(rm, server_name)

View File

@ -143,11 +143,31 @@ class FederationBase(object):
def callback(_, pdu):
with logcontext.PreserveLoggingContext(ctx):
if not check_event_content_hash(pdu):
logger.warn(
"Event content has been tampered, redacting %s: %s",
pdu.event_id, pdu.get_pdu_json()
# let's try to distinguish between failures because the event was
# redacted (which are somewhat expected) vs actual ball-tampering
# incidents.
#
# This is just a heuristic, so we just assume that if the keys are
# about the same between the redacted and received events, then the
# received event was probably a redacted copy (but we then use our
# *actual* redacted copy to be on the safe side.)
redacted_event = prune_event(pdu)
if (
set(six.iterkeys(redacted_event)) == set(six.iterkeys(pdu)) and
set(six.iterkeys(redacted_event.content))
== set(six.iterkeys(pdu.content))
):
logger.info(
"Event %s seems to have been redacted; using our redacted "
"copy",
pdu.event_id,
)
return prune_event(pdu)
else:
logger.warning(
"Event %s content has been tampered, redacting",
pdu.event_id, pdu.get_pdu_json(),
)
return redacted_event
if self.spam_checker.check_event_for_spam(pdu):
logger.warn(
@ -162,8 +182,8 @@ class FederationBase(object):
failure.trap(SynapseError)
with logcontext.PreserveLoggingContext(ctx):
logger.warn(
"Signature check failed for %s",
pdu.event_id,
"Signature check failed for %s: %s",
pdu.event_id, failure.getErrorMessage(),
)
return failure

View File

@ -134,6 +134,7 @@ class KeyStore(SQLBaseStore):
"""
key_id = "%s:%s" % (verify_key.alg, verify_key.version)
# XXX fix this to not need a lock (#3819)
def _txn(txn):
self._simple_upsert_txn(
txn,