From b21b9dbc378ae336f70d832f19ce123b3f82f3be Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 13:53:55 +0100 Subject: [PATCH 01/15] Callback metric values might not just be integers - allow floats --- synapse/metrics/metric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index e81af2989..e87b2b80a 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -98,9 +98,9 @@ class CallbackMetric(BaseMetric): value = self.callback() if self.is_scalar(): - return ["%s %d" % (self.name, value)] + return ["%s %.12g" % (self.name, value)] - return ["%s%s %d" % (self.name, self._render_key(k), value[k]) + return ["%s%s %.12g" % (self.name, self._render_key(k), value[k]) for k in sorted(value.keys())] From 03c2720940ed730dad11996020c180e592fd64a9 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 13:56:06 +0100 Subject: [PATCH 02/15] Export CPU usage metrics also under prometheus-standard metric name --- synapse/metrics/__init__.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 76d5998d7..e4dd4c61e 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -119,6 +119,8 @@ def update_resource_metrics(): global rusage rusage = getrusage(RUSAGE_SELF) +## Legacy synapse-invented metric names + resource_metrics = get_metrics_for("process.resource") # msecs @@ -165,6 +167,19 @@ def _process_fds(): get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"]) +## New prometheus-standard metric names +process_metrics = get_metrics_for("process"); + +process_metrics.register_callback( + "cpu_user_seconds_total", lambda: rusage.ru_utime +) +process_metrics.register_callback( + "cpu_system_seconds_total", lambda: rusage.ru_stime +) +process_metrics.register_callback( + "cpu_seconds_total", lambda: rusage.ru_utime + rusage.ru_stime +) + reactor_metrics = get_metrics_for("reactor") tick_time = reactor_metrics.register_distribution("tick_time") pending_calls_metric = reactor_metrics.register_distribution("pending_calls") From 9b0316c75a50de6ce85a952f09247221b25b65a9 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 14:10:03 +0100 Subject: [PATCH 03/15] Use /proc/self/stat to generate the new process_cpu_*_seconds_total metrics --- synapse/metrics/__init__.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index e4dd4c61e..434e7535c 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -112,13 +112,21 @@ def render_all(): # Now register some standard process-wide state metrics, to give indications of # process resource usage -rusage = None +TICKS_PER_SEC = 100 +rusage = None +stats = None def update_resource_metrics(): global rusage rusage = getrusage(RUSAGE_SELF) + global stats + with open("/proc/self/stat") as s: + line = s.read() + # line is PID (command) more stats go here ... + stats = line.split(") ", 1)[1].split(" ") + ## Legacy synapse-invented metric names resource_metrics = get_metrics_for("process.resource") @@ -171,13 +179,13 @@ get_metrics_for("process").register_callback("fds", _process_fds, labels=["type" process_metrics = get_metrics_for("process"); process_metrics.register_callback( - "cpu_user_seconds_total", lambda: rusage.ru_utime + "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_system_seconds_total", lambda: rusage.ru_stime + "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_seconds_total", lambda: rusage.ru_utime + rusage.ru_stime + "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC ) reactor_metrics = get_metrics_for("reactor") From 95fc70216d7c9f2142466c298b0b8fa456afd399 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 14:23:40 +0100 Subject: [PATCH 04/15] Add standard process_*_memory_bytes metrics --- synapse/metrics/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 434e7535c..106458e10 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -113,6 +113,7 @@ def render_all(): # process resource usage TICKS_PER_SEC = 100 +BYTES_PER_PAGE = 4096 rusage = None stats = None @@ -188,6 +189,13 @@ process_metrics.register_callback( "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC ) +process_metrics.register_callback( + "virtual_memory_bytes", lambda: int(stats[20]) +) +process_metrics.register_callback( + "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE +) + reactor_metrics = get_metrics_for("reactor") tick_time = reactor_metrics.register_distribution("tick_time") pending_calls_metric = reactor_metrics.register_distribution("pending_calls") From 06f1ad16250c5f0666fb5fdb26bf1c67cb6b494b Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 14:45:08 +0100 Subject: [PATCH 05/15] Add standard process_open_fds metric --- synapse/metrics/__init__.py | 49 ++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 106458e10..dfbb634af 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -117,6 +117,17 @@ BYTES_PER_PAGE = 4096 rusage = None stats = None +fd_counts = None + +TYPES = { + stat.S_IFSOCK: "SOCK", + stat.S_IFLNK: "LNK", + stat.S_IFREG: "REG", + stat.S_IFBLK: "BLK", + stat.S_IFDIR: "DIR", + stat.S_IFCHR: "CHR", + stat.S_IFIFO: "FIFO", +} def update_resource_metrics(): global rusage @@ -128,26 +139,8 @@ def update_resource_metrics(): # line is PID (command) more stats go here ... stats = line.split(") ", 1)[1].split(" ") -## Legacy synapse-invented metric names - -resource_metrics = get_metrics_for("process.resource") - -# msecs -resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000) -resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000) - -# kilobytes -resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * 1024) - -TYPES = { - stat.S_IFSOCK: "SOCK", - stat.S_IFLNK: "LNK", - stat.S_IFREG: "REG", - stat.S_IFBLK: "BLK", - stat.S_IFDIR: "DIR", - stat.S_IFCHR: "CHR", - stat.S_IFIFO: "FIFO", -} + global fd_counts + fd_counts = _process_fds() def _process_fds(): @@ -174,6 +167,18 @@ def _process_fds(): return counts + +## Legacy synapse-invented metric names + +resource_metrics = get_metrics_for("process.resource") + +# msecs +resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000) +resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000) + +# kilobytes +resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * 1024) + get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"]) ## New prometheus-standard metric names @@ -196,6 +201,10 @@ process_metrics.register_callback( "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE ) +process_metrics.register_callback( + "open_fds", lambda: sum(fd_counts.values()) +) + reactor_metrics = get_metrics_for("reactor") tick_time = reactor_metrics.register_distribution("tick_time") pending_calls_metric = reactor_metrics.register_distribution("pending_calls") From def63649df49528113f5e84d23576956baaf851e Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 14:50:57 +0100 Subject: [PATCH 06/15] Add standard process_max_fds metric --- synapse/metrics/__init__.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index dfbb634af..75ba248c5 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -205,6 +205,19 @@ process_metrics.register_callback( "open_fds", lambda: sum(fd_counts.values()) ) +def _get_max_fds(): + with open("/proc/self/limits") as limits: + for line in limits: + if not line.startswith("Max open files "): + continue + # Line is Max open files $SOFT $HARD + return int(line.split()[3]) + return None + +process_metrics.register_callback( + "max_fds", lambda: _get_max_fds() +) + reactor_metrics = get_metrics_for("reactor") tick_time = reactor_metrics.register_distribution("tick_time") pending_calls_metric = reactor_metrics.register_distribution("pending_calls") From 981f852d540f20ff58c2ab0c2b59f42f89f01e61 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 15:04:52 +0100 Subject: [PATCH 07/15] Add standard process_start_time_seconds metric --- synapse/metrics/__init__.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 75ba248c5..a79fb026d 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -119,6 +119,17 @@ rusage = None stats = None fd_counts = None +# In order to report process_start_time_seconds we need to know the machine's +# boot time, because the value in /proc/self/stat is relative to this +boot_time = None +try: + with open("/proc/stat") as _procstat: + for line in _procstat: + if line.startswith("btime "): + boot_time = int(line.split()[1]) +except IOError: + pass + TYPES = { stat.S_IFSOCK: "SOCK", stat.S_IFLNK: "LNK", @@ -218,6 +229,10 @@ process_metrics.register_callback( "max_fds", lambda: _get_max_fds() ) +process_metrics.register_callback( + "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC +) + reactor_metrics = get_metrics_for("reactor") tick_time = reactor_metrics.register_distribution("tick_time") pending_calls_metric = reactor_metrics.register_distribution("pending_calls") From 1b179455fce9aaf8ce8d1f28f0e552efdee2b7f9 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 15:34:38 +0100 Subject: [PATCH 08/15] Guard registration of process-wide metrics by existence of the requisite /proc entries --- synapse/metrics/__init__.py | 85 ++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index a79fb026d..323328061 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -115,6 +115,10 @@ def render_all(): TICKS_PER_SEC = 100 BYTES_PER_PAGE = 4096 +HAVE_PROC_STAT = os.path.exists("/proc/stat") +HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") +HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits") + rusage = None stats = None fd_counts = None @@ -122,13 +126,11 @@ fd_counts = None # In order to report process_start_time_seconds we need to know the machine's # boot time, because the value in /proc/self/stat is relative to this boot_time = None -try: +if HAVE_PROC_STAT: with open("/proc/stat") as _procstat: for line in _procstat: if line.startswith("btime "): boot_time = int(line.split()[1]) -except IOError: - pass TYPES = { stat.S_IFSOCK: "SOCK", @@ -144,11 +146,12 @@ def update_resource_metrics(): global rusage rusage = getrusage(RUSAGE_SELF) - global stats - with open("/proc/self/stat") as s: - line = s.read() - # line is PID (command) more stats go here ... - stats = line.split(") ", 1)[1].split(" ") + if HAVE_PROC_SELF_STAT: + global stats + with open("/proc/self/stat") as s: + line = s.read() + # line is PID (command) more stats go here ... + stats = line.split(") ", 1)[1].split(" ") global fd_counts fd_counts = _process_fds() @@ -195,43 +198,45 @@ get_metrics_for("process").register_callback("fds", _process_fds, labels=["type" ## New prometheus-standard metric names process_metrics = get_metrics_for("process"); -process_metrics.register_callback( - "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC -) -process_metrics.register_callback( - "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC -) -process_metrics.register_callback( - "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC -) +if HAVE_PROC_SELF_STAT: + process_metrics.register_callback( + "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC + ) + process_metrics.register_callback( + "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC + ) + process_metrics.register_callback( + "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC + ) -process_metrics.register_callback( - "virtual_memory_bytes", lambda: int(stats[20]) -) -process_metrics.register_callback( - "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE -) + process_metrics.register_callback( + "virtual_memory_bytes", lambda: int(stats[20]) + ) + process_metrics.register_callback( + "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE + ) -process_metrics.register_callback( - "open_fds", lambda: sum(fd_counts.values()) -) + process_metrics.register_callback( + "open_fds", lambda: sum(fd_counts.values()) + ) -def _get_max_fds(): - with open("/proc/self/limits") as limits: - for line in limits: - if not line.startswith("Max open files "): - continue - # Line is Max open files $SOFT $HARD - return int(line.split()[3]) - return None + process_metrics.register_callback( + "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC + ) -process_metrics.register_callback( - "max_fds", lambda: _get_max_fds() -) +if HAVE_PROC_SELF_LIMITS: + def _get_max_fds(): + with open("/proc/self/limits") as limits: + for line in limits: + if not line.startswith("Max open files "): + continue + # Line is Max open files $SOFT $HARD + return int(line.split()[3]) + return None -process_metrics.register_callback( - "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC -) + process_metrics.register_callback( + "max_fds", lambda: _get_max_fds() + ) reactor_metrics = get_metrics_for("reactor") tick_time = reactor_metrics.register_distribution("tick_time") From b202531be6d8bfcdb7ce555b164a1578bcdf33f5 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 15:37:41 +0100 Subject: [PATCH 09/15] Also guard /proc/self/fds-related code with a suitable psuedoconstant --- synapse/metrics/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 323328061..081f3aa3d 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -118,6 +118,7 @@ BYTES_PER_PAGE = 4096 HAVE_PROC_STAT = os.path.exists("/proc/stat") HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits") +HAVE_PROC_SELF_FDS = os.path.exists("/proc/self/fds") rusage = None stats = None @@ -162,7 +163,7 @@ def _process_fds(): counts[("other",)] = 0 # Not every OS will have a /proc/self/fd directory - if not os.path.exists("/proc/self/fd"): + if not HAVE_PROC_SELF_FDS: return counts for fd in os.listdir("/proc/self/fd"): @@ -217,11 +218,12 @@ if HAVE_PROC_SELF_STAT: ) process_metrics.register_callback( - "open_fds", lambda: sum(fd_counts.values()) + "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC ) +if HAVE_PROC_SELF_FDS: process_metrics.register_callback( - "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC + "open_fds", lambda: sum(fd_counts.values()) ) if HAVE_PROC_SELF_LIMITS: From 5663137e0323a295585ac4843cd86f422457df07 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 16:09:42 +0100 Subject: [PATCH 10/15] appease pep8 --- synapse/metrics/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 081f3aa3d..ea8219a77 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -143,6 +143,7 @@ TYPES = { stat.S_IFIFO: "FIFO", } + def update_resource_metrics(): global rusage rusage = getrusage(RUSAGE_SELF) @@ -183,7 +184,7 @@ def _process_fds(): return counts -## Legacy synapse-invented metric names +# Legacy synapse-invented metric names resource_metrics = get_metrics_for("process.resource") @@ -196,8 +197,9 @@ resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * 1024) get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"]) -## New prometheus-standard metric names -process_metrics = get_metrics_for("process"); +# New prometheus-standard metric names + +process_metrics = get_metrics_for("process") if HAVE_PROC_SELF_STAT: process_metrics.register_callback( From 4cedd532246278361f31f93216103d769b40650c Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 17:54:09 +0100 Subject: [PATCH 11/15] A slightly neater way to manage metric collector functions --- synapse/metrics/__init__.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index ea8219a77..9d18b0fbf 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -36,6 +36,7 @@ logger = logging.getLogger(__name__) all_metrics = [] +all_collectors = [] class Metrics(object): @@ -46,6 +47,9 @@ class Metrics(object): def __init__(self, name): self.name_prefix = name + def register_collector(self, func): + all_collectors.append(func) + def _register(self, metric_class, name, *args, **kwargs): full_name = "%s_%s" % (self.name_prefix, name) @@ -94,8 +98,8 @@ def get_metrics_for(pkg_name): def render_all(): strs = [] - # TODO(paul): Internal hack - update_resource_metrics() + for collector in all_collectors: + collector() for metric in all_metrics: try: @@ -188,6 +192,8 @@ def _process_fds(): resource_metrics = get_metrics_for("process.resource") +resource_metrics.register_collector(update_resource_metrics) + # msecs resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000) resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000) From 3ae48a1f991a98292df326d56b545d08ed54b135 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 18:10:24 +0100 Subject: [PATCH 12/15] Move the process metrics collector code into its own file --- synapse/app/homeserver.py | 2 + synapse/metrics/__init__.py | 141 ------------------------ synapse/metrics/process_collector.py | 159 +++++++++++++++++++++++++++ 3 files changed, 161 insertions(+), 141 deletions(-) create mode 100644 synapse/metrics/process_collector.py diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 54f35900f..f27150d41 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -52,6 +52,7 @@ from synapse.config.homeserver import HomeServerConfig from synapse.crypto import context_factory from synapse.util.logcontext import LoggingContext from synapse.metrics import register_memory_metrics, get_metrics_for +from synapse.metrics.process_collector import register_process_collector from synapse.metrics.resource import MetricsResource, METRICS_PREFIX from synapse.replication.resource import ReplicationResource, REPLICATION_PREFIX from synapse.federation.transport.server import TransportLayerServer @@ -337,6 +338,7 @@ def setup(config_options): hs.get_replication_layer().start_get_pdu_cache() register_memory_metrics(hs) + register_process_collector() reactor.callWhenRunning(start) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 9d18b0fbf..a6b868775 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -13,14 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Because otherwise 'resource' collides with synapse.metrics.resource -from __future__ import absolute_import - import logging -from resource import getrusage, RUSAGE_SELF import functools -import os -import stat import time import gc @@ -113,141 +107,6 @@ def render_all(): return "\n".join(strs) -# Now register some standard process-wide state metrics, to give indications of -# process resource usage - -TICKS_PER_SEC = 100 -BYTES_PER_PAGE = 4096 - -HAVE_PROC_STAT = os.path.exists("/proc/stat") -HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") -HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits") -HAVE_PROC_SELF_FDS = os.path.exists("/proc/self/fds") - -rusage = None -stats = None -fd_counts = None - -# In order to report process_start_time_seconds we need to know the machine's -# boot time, because the value in /proc/self/stat is relative to this -boot_time = None -if HAVE_PROC_STAT: - with open("/proc/stat") as _procstat: - for line in _procstat: - if line.startswith("btime "): - boot_time = int(line.split()[1]) - -TYPES = { - stat.S_IFSOCK: "SOCK", - stat.S_IFLNK: "LNK", - stat.S_IFREG: "REG", - stat.S_IFBLK: "BLK", - stat.S_IFDIR: "DIR", - stat.S_IFCHR: "CHR", - stat.S_IFIFO: "FIFO", -} - - -def update_resource_metrics(): - global rusage - rusage = getrusage(RUSAGE_SELF) - - if HAVE_PROC_SELF_STAT: - global stats - with open("/proc/self/stat") as s: - line = s.read() - # line is PID (command) more stats go here ... - stats = line.split(") ", 1)[1].split(" ") - - global fd_counts - fd_counts = _process_fds() - - -def _process_fds(): - counts = {(k,): 0 for k in TYPES.values()} - counts[("other",)] = 0 - - # Not every OS will have a /proc/self/fd directory - if not HAVE_PROC_SELF_FDS: - return counts - - for fd in os.listdir("/proc/self/fd"): - try: - s = os.stat("/proc/self/fd/%s" % (fd)) - fmt = stat.S_IFMT(s.st_mode) - if fmt in TYPES: - t = TYPES[fmt] - else: - t = "other" - - counts[(t,)] += 1 - except OSError: - # the dirh itself used by listdir() is usually missing by now - pass - - return counts - - -# Legacy synapse-invented metric names - -resource_metrics = get_metrics_for("process.resource") - -resource_metrics.register_collector(update_resource_metrics) - -# msecs -resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000) -resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000) - -# kilobytes -resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * 1024) - -get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"]) - -# New prometheus-standard metric names - -process_metrics = get_metrics_for("process") - -if HAVE_PROC_SELF_STAT: - process_metrics.register_callback( - "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC - ) - process_metrics.register_callback( - "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC - ) - process_metrics.register_callback( - "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC - ) - - process_metrics.register_callback( - "virtual_memory_bytes", lambda: int(stats[20]) - ) - process_metrics.register_callback( - "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE - ) - - process_metrics.register_callback( - "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC - ) - -if HAVE_PROC_SELF_FDS: - process_metrics.register_callback( - "open_fds", lambda: sum(fd_counts.values()) - ) - -if HAVE_PROC_SELF_LIMITS: - def _get_max_fds(): - with open("/proc/self/limits") as limits: - for line in limits: - if not line.startswith("Max open files "): - continue - # Line is Max open files $SOFT $HARD - return int(line.split()[3]) - return None - - process_metrics.register_callback( - "max_fds", lambda: _get_max_fds() - ) - reactor_metrics = get_metrics_for("reactor") tick_time = reactor_metrics.register_distribution("tick_time") pending_calls_metric = reactor_metrics.register_distribution("pending_calls") diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py new file mode 100644 index 000000000..4145694c5 --- /dev/null +++ b/synapse/metrics/process_collector.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +# Copyright 2015, 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Because otherwise 'resource' collides with synapse.metrics.resource +from __future__ import absolute_import + +import os +import stat +from resource import getrusage, RUSAGE_SELF + +from synapse.metrics import get_metrics_for + + +TICKS_PER_SEC = 100 +BYTES_PER_PAGE = 4096 + +HAVE_PROC_STAT = os.path.exists("/proc/stat") +HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") +HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits") +HAVE_PROC_SELF_FD = os.path.exists("/proc/self/fd") + +TYPES = { + stat.S_IFSOCK: "SOCK", + stat.S_IFLNK: "LNK", + stat.S_IFREG: "REG", + stat.S_IFBLK: "BLK", + stat.S_IFDIR: "DIR", + stat.S_IFCHR: "CHR", + stat.S_IFIFO: "FIFO", +} + + +rusage = None +stats = None +fd_counts = None + +# In order to report process_start_time_seconds we need to know the +# machine's boot time, because the value in /proc/self/stat is relative to +# this +boot_time = None +if HAVE_PROC_STAT: + with open("/proc/stat") as _procstat: + for line in _procstat: + if line.startswith("btime "): + boot_time = int(line.split()[1]) + + +def update_resource_metrics(): + global rusage + rusage = getrusage(RUSAGE_SELF) + + if HAVE_PROC_SELF_STAT: + global stats + with open("/proc/self/stat") as s: + line = s.read() + # line is PID (command) more stats go here ... + stats = line.split(") ", 1)[1].split(" ") + + global fd_counts + fd_counts = _process_fds() + + +def _process_fds(): + counts = {(k,): 0 for k in TYPES.values()} + counts[("other",)] = 0 + + # Not every OS will have a /proc/self/fd directory + if not HAVE_PROC_SELF_FD: + return counts + + for fd in os.listdir("/proc/self/fd"): + try: + s = os.stat("/proc/self/fd/%s" % (fd)) + fmt = stat.S_IFMT(s.st_mode) + if fmt in TYPES: + t = TYPES[fmt] + else: + t = "other" + + counts[(t,)] += 1 + except OSError: + # the dirh itself used by listdir() is usually missing by now + pass + + return counts + + +def register_process_collector(): + # Legacy synapse-invented metric names + + resource_metrics = get_metrics_for("process.resource") + + resource_metrics.register_collector(update_resource_metrics) + + # msecs + resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000) + resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000) + + # kilobytes + resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * 1024) + + get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"]) + + # New prometheus-standard metric names + + process_metrics = get_metrics_for("process") + + if HAVE_PROC_SELF_STAT: + process_metrics.register_callback( + "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC + ) + process_metrics.register_callback( + "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC + ) + process_metrics.register_callback( + "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC + ) + + process_metrics.register_callback( + "virtual_memory_bytes", lambda: int(stats[20]) + ) + process_metrics.register_callback( + "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE + ) + + process_metrics.register_callback( + "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC + ) + + if HAVE_PROC_SELF_FD: + process_metrics.register_callback( + "open_fds", lambda: sum(fd_counts.values()) + ) + + if HAVE_PROC_SELF_LIMITS: + def _get_max_fds(): + with open("/proc/self/limits") as limits: + for line in limits: + if not line.startswith("Max open files "): + continue + # Line is Max open files $SOFT $HARD + return int(line.split()[3]) + return None + + process_metrics.register_callback( + "max_fds", lambda: _get_max_fds() + ) From 6453d03edd0765b4f403bf324588df59328623e5 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 18:21:40 +0100 Subject: [PATCH 13/15] Cut the raw /proc/self/stat line up into named fields at collection time --- synapse/metrics/process_collector.py | 30 ++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py index 4145694c5..f31aa79c5 100644 --- a/synapse/metrics/process_collector.py +++ b/synapse/metrics/process_collector.py @@ -41,9 +41,18 @@ TYPES = { stat.S_IFIFO: "FIFO", } +# Field indexes from /proc/self/stat, taken from the proc(5) manpage +STAT_FIELDS = { + "utime": 14, + "stime": 15, + "starttime": 22, + "vsize": 23, + "rss": 24, +} + rusage = None -stats = None +stats = {} fd_counts = None # In order to report process_start_time_seconds we need to know the @@ -66,7 +75,12 @@ def update_resource_metrics(): with open("/proc/self/stat") as s: line = s.read() # line is PID (command) more stats go here ... - stats = line.split(") ", 1)[1].split(" ") + raw_stats = line.split(") ", 1)[1].split(" ") + + for (name, index) in STAT_FIELDS.iteritems(): + # subtract 3 from the index, because proc(5) is 1-based, and + # we've lost the first two fields in PID and COMMAND above + stats[name] = int(raw_stats[index - 3]) global fd_counts fd_counts = _process_fds() @@ -119,24 +133,24 @@ def register_process_collector(): if HAVE_PROC_SELF_STAT: process_metrics.register_callback( - "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC + "cpu_user_seconds_total", lambda: float(stats["utime"]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC + "cpu_system_seconds_total", lambda: float(stats["stime"]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC + "cpu_seconds_total", lambda: (float(stats["utime"]) + float(stats["stime"])) / TICKS_PER_SEC ) process_metrics.register_callback( - "virtual_memory_bytes", lambda: int(stats[20]) + "virtual_memory_bytes", lambda: int(stats["vsize"]) ) process_metrics.register_callback( - "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE + "resident_memory_bytes", lambda: int(stats["rss"]) * BYTES_PER_PAGE ) process_metrics.register_callback( - "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC + "start_time_seconds", lambda: boot_time + int(stats["starttime"]) / TICKS_PER_SEC ) if HAVE_PROC_SELF_FD: From 1071c7d9634ac25aff657cee5a8dbfbb80128c71 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 18:23:25 +0100 Subject: [PATCH 14/15] Adjust code for <100 char line limit --- synapse/metrics/process_collector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py index f31aa79c5..f8542affd 100644 --- a/synapse/metrics/process_collector.py +++ b/synapse/metrics/process_collector.py @@ -139,7 +139,7 @@ def register_process_collector(): "cpu_system_seconds_total", lambda: float(stats["stime"]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_seconds_total", lambda: (float(stats["utime"]) + float(stats["stime"])) / TICKS_PER_SEC + "cpu_seconds_total", lambda: (float(stats["utime"] + stats["stime"])) / TICKS_PER_SEC ) process_metrics.register_callback( From b01aaadd4892d0edabbd241d1314921d8b47cec0 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 18:26:13 +0100 Subject: [PATCH 15/15] Split callback metric lambda functions down onto their own lines to keep line lengths under 90 --- synapse/metrics/process_collector.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py index f8542affd..1c851d923 100644 --- a/synapse/metrics/process_collector.py +++ b/synapse/metrics/process_collector.py @@ -133,29 +133,36 @@ def register_process_collector(): if HAVE_PROC_SELF_STAT: process_metrics.register_callback( - "cpu_user_seconds_total", lambda: float(stats["utime"]) / TICKS_PER_SEC + "cpu_user_seconds_total", + lambda: float(stats["utime"]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_system_seconds_total", lambda: float(stats["stime"]) / TICKS_PER_SEC + "cpu_system_seconds_total", + lambda: float(stats["stime"]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_seconds_total", lambda: (float(stats["utime"] + stats["stime"])) / TICKS_PER_SEC + "cpu_seconds_total", + lambda: (float(stats["utime"] + stats["stime"])) / TICKS_PER_SEC ) process_metrics.register_callback( - "virtual_memory_bytes", lambda: int(stats["vsize"]) + "virtual_memory_bytes", + lambda: int(stats["vsize"]) ) process_metrics.register_callback( - "resident_memory_bytes", lambda: int(stats["rss"]) * BYTES_PER_PAGE + "resident_memory_bytes", + lambda: int(stats["rss"]) * BYTES_PER_PAGE ) process_metrics.register_callback( - "start_time_seconds", lambda: boot_time + int(stats["starttime"]) / TICKS_PER_SEC + "start_time_seconds", + lambda: boot_time + int(stats["starttime"]) / TICKS_PER_SEC ) if HAVE_PROC_SELF_FD: process_metrics.register_callback( - "open_fds", lambda: sum(fd_counts.values()) + "open_fds", + lambda: sum(fd_counts.values()) ) if HAVE_PROC_SELF_LIMITS: @@ -169,5 +176,6 @@ def register_process_collector(): return None process_metrics.register_callback( - "max_fds", lambda: _get_max_fds() + "max_fds", + lambda: _get_max_fds() )