diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst index 7aa4757a3..ca10799b0 100644 --- a/docs/metrics-howto.rst +++ b/docs/metrics-howto.rst @@ -51,9 +51,9 @@ python_gc_counts reactor_gc_counts The twisted-specific reactor metrics have been renamed. -==================================== ================= +==================================== ===================== New name Old name ------------------------------------- ----------------- -python_twisted_reactor_pending_calls reactor_tick_time +------------------------------------ --------------------- +python_twisted_reactor_pending_calls reactor_pending_calls python_twisted_reactor_tick_time reactor_tick_time -==================================== ================= +==================================== ===================== diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 7041da25c..2265e6e8d 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -111,18 +111,20 @@ def render_all(): return "\n".join(strs) -reactor_metrics = get_metrics_for("reactor") -tick_time = reactor_metrics.register_distribution("tick_time") -pending_calls_metric = reactor_metrics.register_distribution("pending_calls") +register_process_collector(get_metrics_for("process")) -gc_time = reactor_metrics.register_distribution("gc_time", labels=["gen"]) -gc_unreachable = reactor_metrics.register_counter("gc_unreachable", labels=["gen"]) -reactor_metrics.register_callback( +python_metrics = get_metrics_for("python") + +gc_time = python_metrics.register_distribution("gc_time", labels=["gen"]) +gc_unreachable = python_metrics.register_counter("gc_unreachable_total", labels=["gen"]) +python_metrics.register_callback( "gc_counts", lambda: {(i,): v for i, v in enumerate(gc.get_count())}, labels=["gen"] ) -register_process_collector(get_metrics_for("process")) +reactor_metrics = get_metrics_for("python.twisted.reactor") +tick_time = reactor_metrics.register_distribution("tick_time") +pending_calls_metric = reactor_metrics.register_distribution("pending_calls") def runUntilCurrentTimer(func): diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py index 0e9558236..6fec3de39 100644 --- a/synapse/metrics/process_collector.py +++ b/synapse/metrics/process_collector.py @@ -13,12 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Because otherwise 'resource' collides with synapse.metrics.resource -from __future__ import absolute_import - import os -import stat -from resource import getrusage, RUSAGE_SELF TICKS_PER_SEC = 100 @@ -29,16 +24,6 @@ HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits") HAVE_PROC_SELF_FD = os.path.exists("/proc/self/fd") -TYPES = { - stat.S_IFSOCK: "SOCK", - stat.S_IFLNK: "LNK", - stat.S_IFREG: "REG", - stat.S_IFBLK: "BLK", - stat.S_IFDIR: "DIR", - stat.S_IFCHR: "CHR", - stat.S_IFIFO: "FIFO", -} - # Field indexes from /proc/self/stat, taken from the proc(5) manpage STAT_FIELDS = { "utime": 14, @@ -49,9 +34,7 @@ STAT_FIELDS = { } -rusage = None stats = {} -fd_counts = None # In order to report process_start_time_seconds we need to know the # machine's boot time, because the value in /proc/self/stat is relative to @@ -65,9 +48,6 @@ if HAVE_PROC_STAT: def update_resource_metrics(): - global rusage - rusage = getrusage(RUSAGE_SELF) - if HAVE_PROC_SELF_STAT: global stats with open("/proc/self/stat") as s: @@ -80,52 +60,17 @@ def update_resource_metrics(): # we've lost the first two fields in PID and COMMAND above stats[name] = int(raw_stats[index - 3]) - global fd_counts - fd_counts = _process_fds() - - -def _process_fds(): - counts = {(k,): 0 for k in TYPES.values()} - counts[("other",)] = 0 +def _count_fds(): # Not every OS will have a /proc/self/fd directory if not HAVE_PROC_SELF_FD: - return counts + return 0 - for fd in os.listdir("/proc/self/fd"): - try: - s = os.stat("/proc/self/fd/%s" % (fd)) - fmt = stat.S_IFMT(s.st_mode) - if fmt in TYPES: - t = TYPES[fmt] - else: - t = "other" - - counts[(t,)] += 1 - except OSError: - # the dirh itself used by listdir() is usually missing by now - pass - - return counts + return len(os.listdir("/proc/self/fd")) def register_process_collector(process_metrics): - # Legacy synapse-invented metric names - - resource_metrics = process_metrics.make_subspace("resource") - - resource_metrics.register_collector(update_resource_metrics) - - # msecs - resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000) - resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000) - - # kilobytes - resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * 1024) - - process_metrics.register_callback("fds", _process_fds, labels=["type"]) - - # New prometheus-standard metric names + process_metrics.register_collector(update_resource_metrics) if HAVE_PROC_SELF_STAT: process_metrics.register_callback( @@ -158,7 +103,7 @@ def register_process_collector(process_metrics): if HAVE_PROC_SELF_FD: process_metrics.register_callback( "open_fds", - lambda: sum(fd_counts.values()) + lambda: _count_fds() ) if HAVE_PROC_SELF_LIMITS: