From b088aafcae71fa61433f92ce0b139802a9cbd304 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 21 Jun 2018 06:15:03 +0100 Subject: [PATCH 1/2] Log number of events fetched from DB When we finish processing a request, log the number of events we fetched from the database to handle it. [I'm trying to figure out which requests are responsible for large amounts of event cache churn. It may turn out to be more helpful to add counts to the prometheus per-request/block metrics, but that is an extension to this code anyway.] --- synapse/http/site.py | 5 ++++- synapse/storage/events_worker.py | 4 ++++ synapse/util/logcontext.py | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/synapse/http/site.py b/synapse/http/site.py index 2664006f8..74a752d6c 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -99,16 +99,18 @@ class SynapseRequest(Request): db_txn_count = context.db_txn_count db_txn_duration_sec = context.db_txn_duration_sec db_sched_duration_sec = context.db_sched_duration_sec + evt_db_fetch_count = context.evt_db_fetch_count except Exception: ru_utime, ru_stime = (0, 0) db_txn_count, db_txn_duration_sec = (0, 0) + evt_db_fetch_count = 0 end_time = time.time() self.site.access_logger.info( "%s - %s - {%s}" " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" - " %sB %s \"%s %s %s\" \"%s\"", + " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]", self.getClientIP(), self.site.site_tag, self.authenticated_entity, @@ -124,6 +126,7 @@ class SynapseRequest(Request): self.get_redacted_uri(), self.clientproto, self.get_user_agent(), + evt_db_fetch_count, ) try: diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 32d9d00ff..cf4efa9d1 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -21,6 +21,7 @@ from synapse.events.utils import prune_event from synapse.util.logcontext import ( PreserveLoggingContext, make_deferred_yieldable, run_in_background, + LoggingContext, ) from synapse.util.metrics import Measure from synapse.api.errors import SynapseError @@ -145,6 +146,9 @@ class EventsWorkerStore(SQLBaseStore): missing_events_ids = [e for e in event_ids if e not in event_entry_map] if missing_events_ids: + log_ctx = LoggingContext.current_context() + log_ctx.evt_db_fetch_count += len(missing_events_ids) + missing_events = yield self._enqueue_events( missing_events_ids, check_redacted=check_redacted, diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index a58c72340..2bfd4bee9 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -60,6 +60,7 @@ class LoggingContext(object): __slots__ = [ "previous_context", "name", "ru_stime", "ru_utime", "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec", + "evt_db_fetch_count", "usage_start", "main_thread", "alive", "request", "tag", @@ -109,6 +110,9 @@ class LoggingContext(object): # sec spent waiting for db txns to be scheduled self.db_sched_duration_sec = 0 + # number of events this thread has fetched from the db + self.evt_db_fetch_count = 0 + # If alive has the thread resource usage when the logcontext last # became active. self.usage_start = None From 0495fe00350a3610cbfd9b0097ff1a8a1c31f5f4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 22 Jun 2018 10:42:28 +0100 Subject: [PATCH 2/2] Indirect evt_count updates via method call so that we can stub it for the sentinel and not have a billion failing UTs --- synapse/storage/events_worker.py | 2 +- synapse/util/logcontext.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index cf4efa9d1..3a634842c 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -147,7 +147,7 @@ class EventsWorkerStore(SQLBaseStore): if missing_events_ids: log_ctx = LoggingContext.current_context() - log_ctx.evt_db_fetch_count += len(missing_events_ids) + log_ctx.record_event_fetch(len(missing_events_ids)) missing_events = yield self._enqueue_events( missing_events_ids, diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 2bfd4bee9..df2b71b79 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -91,6 +91,9 @@ class LoggingContext(object): def add_database_scheduled(self, sched_sec): pass + def record_event_fetch(self, event_count): + pass + def __nonzero__(self): return False __bool__ = __nonzero__ # python3 @@ -247,6 +250,14 @@ class LoggingContext(object): """ self.db_sched_duration_sec += sched_sec + def record_event_fetch(self, event_count): + """Record a number of events being fetched from the db + + Args: + event_count (int): number of events being fetched + """ + self.evt_db_fetch_count += event_count + class LoggingContextFilter(logging.Filter): """Logging filter that adds values from the current logging context to each