From 80fa610f9c8702d6b7256be9d97668de29ba2e06 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 16:52:52 +0000 Subject: [PATCH 1/4] Add some comments to metrics classes --- synapse/metrics/metric.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 1d054dd55..c5f0bcbc1 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -24,8 +24,16 @@ def map_concat(func, items): class BaseMetric(object): + """Base class for metrics which report a single value per label set + """ def __init__(self, name, labels=[]): + """ + Args: + name (str): principal name for this metric + labels (list(str)): names of the labels which will be reported + for this metric + """ self.name = name self.labels = labels # OK not to clone as we never write it @@ -36,7 +44,7 @@ class BaseMetric(object): return not len(self.labels) def _render_labelvalue(self, value): - # TODO: some kind of value escape + # TODO: escape backslashes, quotes and newlines return '"%s"' % (value) def _render_key(self, values): @@ -47,6 +55,20 @@ class BaseMetric(object): for k, v in zip(self.labels, values)]) ) + def render(self): + """Render this metric + + Each metric is rendered as: + + name{label1="val1",label2="val2"} value + + https://prometheus.io/docs/instrumenting/exposition_formats/#text-format-details + + Returns: + iterable[str]: rendered metrics + """ + raise NotImplementedError() + class CounterMetric(BaseMetric): """The simplest kind of metric; one that stores a monotonically-increasing @@ -62,6 +84,10 @@ class CounterMetric(BaseMetric): def __init__(self, *args, **kwargs): super(CounterMetric, self).__init__(*args, **kwargs) + # dict[list[str]]: value for each set of label values. the keys are the + # label values, in the same order as the labels in self.labels. + # + # (if the metric is a scalar, the (single) key is the empty list). self.counts = {} # Scalar metrics are never empty From 992018d1c07a727e54c4ad5b4079f5f5de8fec5d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 16:58:41 +0000 Subject: [PATCH 2/4] mechanism to render metrics with alternative names --- synapse/metrics/metric.py | 53 +++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index c5f0bcbc1..f480aae61 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -17,24 +17,33 @@ from itertools import chain -# TODO(paul): I can't believe Python doesn't have one of these -def map_concat(func, items): - # flatten a list-of-lists - return list(chain.from_iterable(map(func, items))) +def flatten(items): + """Flatten a list of lists + + Args: + items: iterable[iterable[X]] + + Returns: + list[X]: flattened list + """ + return list(chain.from_iterable(items)) class BaseMetric(object): """Base class for metrics which report a single value per label set """ - def __init__(self, name, labels=[]): + def __init__(self, name, labels=[], alternative_names=[]): """ Args: name (str): principal name for this metric labels (list(str)): names of the labels which will be reported for this metric + alternative_names (iterable(str)): list of alternative names for + this metric. This can be useful to provide a migration path + when renaming metrics. """ - self.name = name + self._names = [name] + list(alternative_names) self.labels = labels # OK not to clone as we never write it def dimension(self): @@ -55,6 +64,22 @@ class BaseMetric(object): for k, v in zip(self.labels, values)]) ) + def _render_for_labels(self, label_values, value): + """Render this metric for a single set of labels + + Args: + label_values (list[str]): values for each of the labels + value: value of the metric at with these labels + + Returns: + iterable[str]: rendered metric + """ + rendered_labels = self._render_key(label_values) + return ( + "%s%s %.12g" % (name, rendered_labels, value) + for name in self._names + ) + def render(self): """Render this metric @@ -110,11 +135,11 @@ class CounterMetric(BaseMetric): def inc(self, *values): self.inc_by(1, *values) - def render_item(self, k): - return ["%s%s %.12g" % (self.name, self._render_key(k), self.counts[k])] - def render(self): - return map_concat(self.render_item, sorted(self.counts.keys())) + return flatten( + self._render_for_labels(k, self.counts[k]) + for k in sorted(self.counts.keys()) + ) class CallbackMetric(BaseMetric): @@ -131,10 +156,12 @@ class CallbackMetric(BaseMetric): value = self.callback() if self.is_scalar(): - return ["%s %.12g" % (self.name, value)] + return list(self._render_for_labels([], value)) - return ["%s%s %.12g" % (self.name, self._render_key(k), value[k]) - for k in sorted(value.keys())] + return flatten( + self._render_for_labels(k, value[k]) + for k in sorted(value.keys()) + ) class DistributionMetric(object): From 39f4e29d0151b56a3c8528e3149cd5765b9f600d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 17:00:12 +0000 Subject: [PATCH 3/4] Reorganise request and block metrics In order to circumvent the number of duplicate foo:count metrics increasing without bounds, it's time for a rearrangement. The following are all deprecated, and replaced with synapse_util_metrics_block_count: synapse_util_metrics_block_timer:count synapse_util_metrics_block_ru_utime:count synapse_util_metrics_block_ru_stime:count synapse_util_metrics_block_db_txn_count:count synapse_util_metrics_block_db_txn_duration:count The following are all deprecated, and replaced with synapse_http_server_response_count: synapse_http_server_requests synapse_http_server_response_time:count synapse_http_server_response_ru_utime:count synapse_http_server_response_ru_stime:count synapse_http_server_response_db_txn_count:count synapse_http_server_response_db_txn_duration:count The following are renamed (the old metrics are kept for now, but deprecated): synapse_util_metrics_block_timer:total -> synapse_util_metrics_block_time_seconds synapse_util_metrics_block_ru_utime:total -> synapse_util_metrics_block_ru_utime_seconds synapse_util_metrics_block_ru_stime:total -> synapse_util_metrics_block_ru_stime_seconds synapse_util_metrics_block_db_txn_count:total -> synapse_util_metrics_block_db_txn_count synapse_util_metrics_block_db_txn_duration:total -> synapse_util_metrics_block_db_txn_duration_seconds synapse_http_server_response_time:total -> synapse_http_server_response_time_seconds synapse_http_server_response_ru_utime:total -> synapse_http_server_response_ru_utime_seconds synapse_http_server_response_ru_stime:total -> synapse_http_server_response_ru_stime_seconds synapse_http_server_response_db_txn_count:total -> synapse_http_server_response_db_txn_count synapse_http_server_response_db_txn_duration:total synapse_http_server_response_db_txn_duration_seconds --- synapse/http/server.py | 56 ++++++++++++++++++++++++++++++----------- synapse/util/metrics.py | 53 ++++++++++++++++++++++++++++++-------- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 6e8f4c9c5..269b65ca4 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -42,34 +42,62 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -incoming_requests_counter = metrics.register_counter( - "requests", +# total number of responses served, split by method/servlet/tag +response_count = metrics.register_counter( + "response_count", labels=["method", "servlet", "tag"], + alternative_names=( + # the following are all deprecated aliases for the same metric + metrics.name_prefix + x for x in ( + "_requests", + "_response_time:count", + "_response_ru_utime:count", + "_response_ru_stime:count", + "_response_db_txn_count:count", + "_response_db_txn_duration:count", + ) + ) ) + outgoing_responses_counter = metrics.register_counter( "responses", labels=["method", "code"], ) -response_timer = metrics.register_distribution( - "response_time", - labels=["method", "servlet", "tag"] +response_timer = metrics.register_counter( + "response_time_seconds", + labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_time:total", + ), ) -response_ru_utime = metrics.register_distribution( - "response_ru_utime", labels=["method", "servlet", "tag"] +response_ru_utime = metrics.register_counter( + "response_ru_utime_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_ru_utime:total", + ), ) -response_ru_stime = metrics.register_distribution( - "response_ru_stime", labels=["method", "servlet", "tag"] +response_ru_stime = metrics.register_counter( + "response_ru_stime_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_ru_stime:total", + ), ) -response_db_txn_count = metrics.register_distribution( - "response_db_txn_count", labels=["method", "servlet", "tag"] +response_db_txn_count = metrics.register_counter( + "response_db_txn_count", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_db_txn_count:total", + ), ) -response_db_txn_duration = metrics.register_distribution( - "response_db_txn_duration", labels=["method", "servlet", "tag"] +response_db_txn_duration = metrics.register_counter( + "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_db_txn_duration:total", + ), ) @@ -330,7 +358,7 @@ class RequestMetrics(object): ) return - incoming_requests_counter.inc(request.method, self.name, tag) + response_count.inc(request.method, self.name, tag) response_timer.inc_by( clock.time_msec() - self.start, request.method, diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 4ea930d3e..8d22ff306 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -27,25 +27,56 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -block_timer = metrics.register_distribution( - "block_timer", - labels=["block_name"] +# total number of times we have hit this block +response_count = metrics.register_counter( + "block_count", + labels=["block_name"], + alternative_names=( + # the following are all deprecated aliases for the same metric + metrics.name_prefix + x for x in ( + "_block_timer:count", + "_block_ru_utime:count", + "_block_ru_stime:count", + "_block_db_txn_count:count", + "_block_db_txn_duration:count", + ) + ) ) -block_ru_utime = metrics.register_distribution( - "block_ru_utime", labels=["block_name"] +block_timer = metrics.register_counter( + "block_time_seconds", + labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_timer:total", + ), ) -block_ru_stime = metrics.register_distribution( - "block_ru_stime", labels=["block_name"] +block_ru_utime = metrics.register_counter( + "block_ru_utime_seconds", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_ru_utime:total", + ), ) -block_db_txn_count = metrics.register_distribution( - "block_db_txn_count", labels=["block_name"] +block_ru_stime = metrics.register_counter( + "block_ru_stime_seconds", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_ru_stime:total", + ), ) -block_db_txn_duration = metrics.register_distribution( - "block_db_txn_duration", labels=["block_name"] +block_db_txn_count = metrics.register_counter( + "block_db_txn_count", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_db_txn_count:total", + ), +) + +block_db_txn_duration = metrics.register_counter( + "block_db_txn_duration_seconds", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_db_txn_count:total", + ), ) From ee7a1cabd8c6d218b838295fde6999dcbc23036b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 13:04:01 +0000 Subject: [PATCH 4/4] document metrics changes --- CHANGES.rst | 5 +++- docs/metrics-howto.rst | 61 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 24e4e7a38..a7ed49e10 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,7 +2,10 @@ Unreleased ========== synctl no longer starts the main synapse when using ``-a`` option with workers. -A new worker file should be added with ``worker_app: synapse.app.homeserver`` +A new worker file should be added with ``worker_app: synapse.app.homeserver``. + +This release also begins the process of renaming a number of the metrics +reported to prometheus. See `docs/metrics-howto.rst `_. Changes in synapse v0.26.0 (2018-01-05) diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst index 143cd0f42..8acc479bc 100644 --- a/docs/metrics-howto.rst +++ b/docs/metrics-howto.rst @@ -16,7 +16,7 @@ How to monitor Synapse metrics using Prometheus metrics_port: 9092 Also ensure that ``enable_metrics`` is set to ``True``. - + Restart synapse. 3. Add a prometheus target for synapse. @@ -28,11 +28,58 @@ How to monitor Synapse metrics using Prometheus static_configs: - targets: ["my.server.here:9092"] - If your prometheus is older than 1.5.2, you will need to replace + If your prometheus is older than 1.5.2, you will need to replace ``static_configs`` in the above with ``target_groups``. - + Restart prometheus. + +Block and response metrics renamed for 0.27.0 +--------------------------------------------- + +Synapse 0.27.0 begins the process of rationalising the duplicate ``*:count`` +metrics reported for the resource tracking for code blocks and HTTP requests. + +At the same time, the corresponding ``*:total`` metrics are being renamed, as +the ``:total`` suffix no longer makes sense in the absence of a corresponding +``:count`` metric. + +To enable a graceful migration path, this release just adds new names for the +metrics being renamed. A future release will remove the old ones. + +The following table shows the new metrics, and the old metrics which they are +replacing. + +==================================================== =================================================== +New name Old name +==================================================== =================================================== +synapse_util_metrics_block_count synapse_util_metrics_block_timer:count +synapse_util_metrics_block_count synapse_util_metrics_block_ru_utime:count +synapse_util_metrics_block_count synapse_util_metrics_block_ru_stime:count +synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_count:count +synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_duration:count + +synapse_util_metrics_block_time_seconds synapse_util_metrics_block_timer:total +synapse_util_metrics_block_ru_utime_seconds synapse_util_metrics_block_ru_utime:total +synapse_util_metrics_block_ru_stime_seconds synapse_util_metrics_block_ru_stime:total +synapse_util_metrics_block_db_txn_count synapse_util_metrics_block_db_txn_count:total +synapse_util_metrics_block_db_txn_duration_seconds synapse_util_metrics_block_db_txn_duration:total + +synapse_http_server_response_count synapse_http_server_requests +synapse_http_server_response_count synapse_http_server_response_time:count +synapse_http_server_response_count synapse_http_server_response_ru_utime:count +synapse_http_server_response_count synapse_http_server_response_ru_stime:count +synapse_http_server_response_count synapse_http_server_response_db_txn_count:count +synapse_http_server_response_count synapse_http_server_response_db_txn_duration:count + +synapse_http_server_response_time_seconds synapse_http_server_response_time:total +synapse_http_server_response_ru_utime_seconds synapse_http_server_response_ru_utime:total +synapse_http_server_response_ru_stime_seconds synapse_http_server_response_ru_stime:total +synapse_http_server_response_db_txn_count synapse_http_server_response_db_txn_count:total +synapse_http_server_response_db_txn_duration_seconds synapse_http_server_response_db_txn_duration:total +==================================================== =================================================== + + Standard Metric Names --------------------- @@ -42,7 +89,7 @@ have been changed to seconds, from miliseconds. ================================== ============================= New name Old name ----------------------------------- ----------------------------- +================================== ============================= process_cpu_user_seconds_total process_resource_utime / 1000 process_cpu_system_seconds_total process_resource_stime / 1000 process_open_fds (no 'type' label) process_fds @@ -52,8 +99,8 @@ The python-specific counts of garbage collector performance have been renamed. =========================== ====================== New name Old name ---------------------------- ---------------------- -python_gc_time reactor_gc_time +=========================== ====================== +python_gc_time reactor_gc_time python_gc_unreachable_total reactor_gc_unreachable python_gc_counts reactor_gc_counts =========================== ====================== @@ -62,7 +109,7 @@ The twisted-specific reactor metrics have been renamed. ==================================== ===================== New name Old name ------------------------------------- --------------------- +==================================== ===================== python_twisted_reactor_pending_calls reactor_pending_calls python_twisted_reactor_tick_time reactor_tick_time ==================================== =====================