Correctly account for cpu usage by background threads (#4074)

Wrap calls to deferToThread() in a thing which uses a child logcontext to attribute CPU usage to the right request. While we're in the area, remove the logcontext_tracer stuff, which is never used, and afaik doesn't work. Fixes #4064
2025-11-24 03:43:13 -05:00 · 2018-10-23 13:12:32 +01:00 · 2018-10-23 13:12:32 +01:00 · 5c445114d3
commit 5c445114d3
parent 1fe6bbb555
6 changed files with 94 additions and 77 deletions
--- a/synapse/util/logcontext.py
+++ b/synapse/util/logcontext.py
@ -25,7 +25,7 @@ See doc/log_contexts.rst for details on how this works.
 import logging
 import threading

-from twisted.internet import defer
+from twisted.internet import defer, threads

 logger = logging.getLogger(__name__)

@ -562,58 +562,76 @@ def _set_context_cb(result, context):
    return result


-# modules to ignore in `logcontext_tracer`
-_to_ignore = [
-    "synapse.util.logcontext",
-    "synapse.http.server",
-    "synapse.storage._base",
-    "synapse.util.async_helpers",
-]
-
-
-def logcontext_tracer(frame, event, arg):
-    """A tracer that logs whenever a logcontext "unexpectedly" changes within
-    a function. Probably inaccurate.
-
-    Use by calling `sys.settrace(logcontext_tracer)` in the main thread.
+def defer_to_thread(reactor, f, *args, **kwargs):
    """
-    if event == 'call':
-        name = frame.f_globals["__name__"]
-        if name.startswith("synapse"):
-            if name == "synapse.util.logcontext":
-                if frame.f_code.co_name in ["__enter__", "__exit__"]:
-                    tracer = frame.f_back.f_trace
-                    if tracer:
-                        tracer.just_changed = True
+    Calls the function `f` using a thread from the reactor's default threadpool and
+    returns the result as a Deferred.

-            tracer = frame.f_trace
-            if tracer:
-                return tracer
+    Creates a new logcontext for `f`, which is created as a child of the current
+    logcontext (so its CPU usage metrics will get attributed to the current
+    logcontext). `f` should preserve the logcontext it is given.

-            if not any(name.startswith(ig) for ig in _to_ignore):
-                return LineTracer()
+    The result deferred follows the Synapse logcontext rules: you should `yield`
+    on it.
+
+    Args:
+        reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
+            the Deferred will be invoked, and whose threadpool we should use for the
+            function.
+
+            Normally this will be hs.get_reactor().
+
+        f (callable): The function to call.
+
+        args: positional arguments to pass to f.
+
+        kwargs: keyword arguments to pass to f.
+
+    Returns:
+        Deferred: A Deferred which fires a callback with the result of `f`, or an
+            errback if `f` throws an exception.
+    """
+    return defer_to_threadpool(reactor, reactor.getThreadPool(), f, *args, **kwargs)


-class LineTracer(object):
-    __slots__ = ["context", "just_changed"]
+def defer_to_threadpool(reactor, threadpool, f, *args, **kwargs):
+    """
+    A wrapper for twisted.internet.threads.deferToThreadpool, which handles
+    logcontexts correctly.

-    def __init__(self):
-        self.context = LoggingContext.current_context()
-        self.just_changed = False
+    Calls the function `f` using a thread from the given threadpool and returns
+    the result as a Deferred.

-    def __call__(self, frame, event, arg):
-        if event in 'line':
-            if self.just_changed:
-                self.context = LoggingContext.current_context()
-                self.just_changed = False
-            else:
-                c = LoggingContext.current_context()
-                if c != self.context:
-                    logger.info(
-                        "Context changed! %s -> %s, %s, %s",
-                        self.context, c,
-                        frame.f_code.co_filename, frame.f_lineno
-                    )
-                    self.context = c
+    Creates a new logcontext for `f`, which is created as a child of the current
+    logcontext (so its CPU usage metrics will get attributed to the current
+    logcontext). `f` should preserve the logcontext it is given.

-        return self
+    The result deferred follows the Synapse logcontext rules: you should `yield`
+    on it.
+
+    Args:
+        reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
+            the Deferred will be invoked. Normally this will be hs.get_reactor().
+
+        threadpool (twisted.python.threadpool.ThreadPool): The threadpool to use for
+            running `f`. Normally this will be hs.get_reactor().getThreadPool().
+
+        f (callable): The function to call.
+
+        args: positional arguments to pass to f.
+
+        kwargs: keyword arguments to pass to f.
+
+    Returns:
+        Deferred: A Deferred which fires a callback with the result of `f`, or an
+            errback if `f` throws an exception.
+    """
+    logcontext = LoggingContext.current_context()
+
+    def g():
+        with LoggingContext(parent_context=logcontext):
+            return f(*args, **kwargs)
+
+    return make_deferred_yieldable(
+        threads.deferToThreadPool(reactor, threadpool, g)
+    )