From ad5701f50f1893ceb04160d36facf9e10bd52a0d Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 24 Feb 2015 14:23:26 +0000 Subject: [PATCH 01/53] Expose 'config' as a real HomeServer dependency key --- synapse/server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/server.py b/synapse/server.py index cb8610a1b..e3eefda4f 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -56,6 +56,7 @@ class BaseHomeServer(object): """ DEPENDENCIES = [ + 'config', 'clock', 'http_client', 'db_name', From 211c14c3910ca0cd86a55ac27ef575127e73e721 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 24 Feb 2015 14:23:50 +0000 Subject: [PATCH 02/53] No need to explicitly pass 'web_client' in to create_resource_tree as it can be found via config --- synapse/app/homeserver.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 3649406ef..5df4ba23a 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -110,7 +110,7 @@ class SynapseHomeServer(HomeServer): # so that :memory: sqlite works ) - def create_resource_tree(self, web_client, redirect_root_to_web_client): + def create_resource_tree(self, redirect_root_to_web_client): """Create the resource tree for this Home Server. This in unduly complicated because Twisted does not support putting @@ -122,6 +122,8 @@ class SynapseHomeServer(HomeServer): location of the web client. This does nothing if web_client is not True. """ + web_client = self.get_config().webclient + # list containing (path_str, Resource) e.g: # [ ("/aaa/bbb/cc", Resource1), ("/aaa/dummy", Resource2) ] desired_tree = [ @@ -340,7 +342,6 @@ def setup(config_options): ) hs.create_resource_tree( - web_client=config.webclient, redirect_root_to_web_client=True, ) From 5b6e11d560e274e8690417b21300ca1a79a24d92 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 24 Feb 2015 14:30:14 +0000 Subject: [PATCH 03/53] Commandline option to enable metrics system --- synapse/config/homeserver.py | 4 +++- synapse/config/metrics.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 synapse/config/metrics.py diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index c024535f5..241afdf87 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -23,11 +23,13 @@ from .captcha import CaptchaConfig from .email import EmailConfig from .voip import VoipConfig from .registration import RegistrationConfig +from .metrics import MetricsConfig class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, RatelimitConfig, ContentRepositoryConfig, CaptchaConfig, - EmailConfig, VoipConfig, RegistrationConfig,): + EmailConfig, VoipConfig, RegistrationConfig, + MetricsConfig,): pass diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py new file mode 100644 index 000000000..90aba10bb --- /dev/null +++ b/synapse/config/metrics.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import Config + + +class MetricsConfig(Config): + def __init__(self, args): + super(MetricsConfig, self).__init__(args) + self.enable_metrics = args.enable_metrics + + @classmethod + def add_arguments(cls, parser): + super(MetricsConfig, cls).add_arguments(parser) + metrics_group = parser.add_argument_group("metrics") + metrics_group.add_argument( + '--enable-metrics', dest="enable_metrics", action="store_true", + help="Enable collection and rendering of performance metrics" + ) From e07fc62833dbbfc9dda7c344f194fc070cfc587f Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 24 Feb 2015 15:49:14 +0000 Subject: [PATCH 04/53] A trivial 'hello world'-style resource on /_synapse/metrics, with optional commandline flag --- synapse/app/homeserver.py | 7 +++++++ synapse/metrics/resource.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 synapse/metrics/resource.py diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 5df4ba23a..9747f7a01 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -147,6 +147,13 @@ class SynapseHomeServer(HomeServer): else: self.root_resource = Resource() + if self.get_config().enable_metrics: + from synapse.metrics.resource import ( + MetricsResource, METRICS_PREFIX + ) + + desired_tree.append((METRICS_PREFIX, MetricsResource(self))) + # ideally we'd just use getChild and putChild but getChild doesn't work # unless you give it a Request object IN ADDITION to the name :/ So # instead, we'll store a copy of this mapping so we can actually add diff --git a/synapse/metrics/resource.py b/synapse/metrics/resource.py new file mode 100644 index 000000000..b48048462 --- /dev/null +++ b/synapse/metrics/resource.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET + + +METRICS_PREFIX = "/_synapse/metrics" + + +class MetricsResource(Resource): + isLeaf = True + + def __init__(self, hs): + Resource.__init__(self) # Resource is old-style, so no super() + + self.hs = hs + + def render_GET(self, request): + response = "Hello, world!" + + request.setHeader("Content-Type", "text/plain") + request.setHeader("Content-Length", str(len(response))) + + return response From e7420a3bef308e12d2b202c7a2c256d15eee0983 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 24 Feb 2015 16:58:26 +0000 Subject: [PATCH 05/53] Initial tiny attempt at (vectorable) counter metrics --- synapse/metrics/metric.py | 54 +++++++++++++++++++++++++++++++ tests/metrics/__init__.py | 0 tests/metrics/test_metric.py | 61 ++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 synapse/metrics/metric.py create mode 100644 tests/metrics/__init__.py create mode 100644 tests/metrics/test_metric.py diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py new file mode 100644 index 000000000..f5a98763c --- /dev/null +++ b/synapse/metrics/metric.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class CounterMetric(object): + + def __init__(self, name, keys=[]): + self.name = name + self.keys = keys # OK not to clone as we never write it + + self.counts = {} + + # Scalar metrics are never empty + if not len(keys): + self.counts[()] = 0 + + def inc(self, *values): + if len(values) != len(self.keys): + raise ValueError("Expected as many values to inc() as keys (%d)" % + (len(self.keys)) + ) + + # TODO: should assert that the tag values are all strings + + if values not in self.counts: + self.counts[values] = 1 + else: + self.counts[values] += 1 + + def fetch(self): + return dict(self.counts) + + def _render_key(self, values): + # TODO: some kind of value escape + return ",".join(["%s=%s" % kv for kv in zip(self.keys, values)]) + + def render(self): + if not len(self.keys): + return ["%s %d" % (self.name, self.counts[()])] + + return ["%s{%s} %d" % (self.name, self._render_key(k), self.counts[k]) + for k in sorted(self.counts.keys())] diff --git a/tests/metrics/__init__.py b/tests/metrics/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py new file mode 100644 index 000000000..a4fd52a9d --- /dev/null +++ b/tests/metrics/test_metric.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from tests import unittest + +from synapse.metrics.metric import CounterMetric + + +class CounterMetricTestCase(unittest.TestCase): + + def test_scalar(self): + counter = CounterMetric("scalar") + + self.assertEquals(counter.render(), [ + "scalar 0", + ]) + + counter.inc() + + self.assertEquals(counter.render(), [ + "scalar 1", + ]) + + counter.inc() + counter.inc() + + self.assertEquals(counter.render(), [ + "scalar 3" + ]) + + def test_vector(self): + counter = CounterMetric("vector", keys=["method"]) + + # Empty counter doesn't yet know what values it has + self.assertEquals(counter.render(), []) + + counter.inc("GET") + + self.assertEquals(counter.render(), [ + "vector{method=GET} 1", + ]) + + counter.inc("GET") + counter.inc("PUT") + + self.assertEquals(counter.render(), [ + "vector{method=GET} 2", + "vector{method=PUT} 1", + ]) From 120b6892840bae0e791348da4b1b35761e841b55 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 24 Feb 2015 17:20:14 +0000 Subject: [PATCH 06/53] Delete pointless (and unreachable) __init__ method from FederationClient --- synapse/federation/federation_client.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index f131941f4..2284fc1d9 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -37,8 +37,6 @@ logger = logging.getLogger(__name__) class FederationClient(FederationBase): - def __init__(self): - self._get_pdu_cache = None def start_get_pdu_cache(self): self._get_pdu_cache = ExpiringCache( From 74bc42cfddb9bbaf364794857ab902b046c893dc Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 24 Feb 2015 18:03:39 +0000 Subject: [PATCH 07/53] An initial implementation of a 'metrics' instance, similar to a 'logger' for keeping counter stats on method calls --- synapse/metrics/__init__.py | 69 +++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 synapse/metrics/__init__.py diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py new file mode 100644 index 000000000..125845eb3 --- /dev/null +++ b/synapse/metrics/__init__.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .metric import CounterMetric + + +# We'll keep all the available metrics in a single toplevel dict, one shared +# for the entire process. We don't currently support per-HomeServer instances +# of metrics, because in practice any one python VM will host only one +# HomeServer anyway. This makes a lot of implementation neater +all_metrics = {} + + +class Metrics(object): + """ A single Metrics object gives a (mutable) slice view of the all_metrics + dict, allowing callers to easily register new metrics that are namespaced + nicely.""" + + def __init__(self, name): + self.name_prefix = name + + def _register(self, metric): + all_metrics[metric.name] = metric + + def register_counter(self, name, *args, **kwargs): + full_name = "%s.%s" % (self.name_prefix, name) + + metric = CounterMetric(full_name, *args, **kwargs) + + self._register(metric) + + return metric + + def counted(self, func): + """ A method decorator that registers a counter, to count invocations + of this method. """ + counter = self.register_counter(func.__name__) + + def wrapped(*args, **kwargs): + counter.inc() + return func(*args, **kwargs) + return wrapped + + +def get_metrics_for(name): + """ Returns a Metrics instance for conveniently creating metrics + namespaced with the given name prefix. """ + return Metrics(name) + + +def render_all(): + strs = [] + + for name in sorted(all_metrics.keys()): + strs += all_metrics[name].render() + + return "\n".join(strs) From a594087f06795ae19f7eaf22df314be83dd7286f Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 24 Feb 2015 18:04:01 +0000 Subject: [PATCH 08/53] Have the MetricsResource actually render metric counters --- synapse/metrics/resource.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/resource.py b/synapse/metrics/resource.py index b48048462..2b9a1ee0a 100644 --- a/synapse/metrics/resource.py +++ b/synapse/metrics/resource.py @@ -16,6 +16,8 @@ from twisted.web.resource import Resource from twisted.web.server import NOT_DONE_YET +import synapse.metrics + METRICS_PREFIX = "/_synapse/metrics" @@ -29,7 +31,7 @@ class MetricsResource(Resource): self.hs = hs def render_GET(self, request): - response = "Hello, world!" + response = synapse.metrics.render_all() request.setHeader("Content-Type", "text/plain") request.setHeader("Content-Length", str(len(response))) From 9470412316dee5c782b0815383fff1ba10002f15 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 24 Feb 2015 18:10:44 +0000 Subject: [PATCH 09/53] Initial attempt at sprinkling some @metrics.counted decorations around the federation code --- synapse/federation/federation_client.py | 14 ++++++++++++++ synapse/federation/federation_server.py | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 2284fc1d9..ef177b79c 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -25,6 +25,7 @@ from synapse.api.errors import ( from synapse.util.expiringcache import ExpiringCache from synapse.util.logutils import log_function from synapse.events import FrozenEvent +import synapse.metrics from synapse.util.retryutils import get_retry_limiter, NotRetryingDestination @@ -35,6 +36,8 @@ import random logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + class FederationClient(FederationBase): @@ -50,6 +53,7 @@ class FederationClient(FederationBase): self._get_pdu_cache.start() @log_function + @metrics.counted def send_pdu(self, pdu, destinations): """Informs the replication layer about a new PDU generated within the home server that should be transmitted to others. @@ -77,6 +81,7 @@ class FederationClient(FederationBase): ) @log_function + @metrics.counted def send_edu(self, destination, edu_type, content): edu = Edu( origin=self.server_name, @@ -90,11 +95,13 @@ class FederationClient(FederationBase): return defer.succeed(None) @log_function + @metrics.counted def send_failure(self, failure, destination): self._transaction_queue.enqueue_failure(failure, destination) return defer.succeed(None) @log_function + @metrics.counted def make_query(self, destination, query_type, args, retry_on_dns_fail=True): """Sends a federation Query to a remote homeserver of the given type @@ -156,6 +163,7 @@ class FederationClient(FederationBase): @defer.inlineCallbacks @log_function + @metrics.counted def get_pdu(self, destinations, event_id, outlier=False): """Requests the PDU with given origin and ID from the remote home servers. @@ -245,6 +253,7 @@ class FederationClient(FederationBase): @defer.inlineCallbacks @log_function + @metrics.counted def get_state_for_room(self, destination, room_id, event_id): """Requests all of the `current` state PDUs for a given room from a remote home server. @@ -285,6 +294,7 @@ class FederationClient(FederationBase): @defer.inlineCallbacks @log_function + @metrics.counted def get_event_auth(self, destination, room_id, event_id): res = yield self.transport_layer.get_event_auth( destination, room_id, event_id, @@ -304,6 +314,7 @@ class FederationClient(FederationBase): defer.returnValue(signed_auth) @defer.inlineCallbacks + @metrics.counted def make_join(self, destinations, room_id, user_id): for destination in destinations: try: @@ -330,6 +341,7 @@ class FederationClient(FederationBase): raise RuntimeError("Failed to send to any server.") @defer.inlineCallbacks + @metrics.counted def send_join(self, destinations, pdu): for destination in destinations: try: @@ -379,6 +391,7 @@ class FederationClient(FederationBase): raise RuntimeError("Failed to send to any server.") @defer.inlineCallbacks + @metrics.counted def send_invite(self, destination, room_id, event_id, pdu): time_now = self._clock.time_msec() code, content = yield self.transport_layer.send_invite( @@ -402,6 +415,7 @@ class FederationClient(FederationBase): defer.returnValue(pdu) @defer.inlineCallbacks + @metrics.counted def query_auth(self, destination, room_id, event_id, local_auth): """ Params: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 9c7dcdba9..3216fca95 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -22,6 +22,7 @@ from .units import Transaction, Edu from synapse.util.logutils import log_function from synapse.util.logcontext import PreserveLoggingContext from synapse.events import FrozenEvent +import synapse.metrics from synapse.api.errors import FederationError, SynapseError @@ -32,6 +33,8 @@ import logging logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + class FederationServer(FederationBase): def set_handler(self, handler): @@ -72,6 +75,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function + @metrics.counted def on_backfill_request(self, origin, room_id, versions, limit): pdus = yield self.handler.on_backfill_request( origin, room_id, versions, limit @@ -81,6 +85,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function + @metrics.counted def on_incoming_transaction(self, transaction_data): transaction = Transaction(**transaction_data) @@ -160,6 +165,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function + @metrics.counted def on_context_state_request(self, origin, room_id, event_id): if event_id: pdus = yield self.handler.get_state_for_pdu( @@ -187,6 +193,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function + @metrics.counted def on_pdu_request(self, origin, event_id): pdu = yield self._get_persisted_pdu(origin, event_id) @@ -199,10 +206,12 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function + @metrics.counted def on_pull_request(self, origin, versions): raise NotImplementedError("Pull transactions not implemented") @defer.inlineCallbacks + @metrics.counted def on_query_request(self, query_type, args): if query_type in self.query_handlers: response = yield self.query_handlers[query_type](args) @@ -213,12 +222,14 @@ class FederationServer(FederationBase): ) @defer.inlineCallbacks + @metrics.counted def on_make_join_request(self, room_id, user_id): pdu = yield self.handler.on_make_join_request(room_id, user_id) time_now = self._clock.time_msec() defer.returnValue({"event": pdu.get_pdu_json(time_now)}) @defer.inlineCallbacks + @metrics.counted def on_invite_request(self, origin, content): pdu = self.event_from_pdu_json(content) ret_pdu = yield self.handler.on_invite_request(origin, pdu) @@ -226,6 +237,7 @@ class FederationServer(FederationBase): defer.returnValue((200, {"event": ret_pdu.get_pdu_json(time_now)})) @defer.inlineCallbacks + @metrics.counted def on_send_join_request(self, origin, content): logger.debug("on_send_join_request: content: %s", content) pdu = self.event_from_pdu_json(content) @@ -240,6 +252,7 @@ class FederationServer(FederationBase): })) @defer.inlineCallbacks + @metrics.counted def on_event_auth(self, origin, room_id, event_id): time_now = self._clock.time_msec() auth_pdus = yield self.handler.on_event_auth(event_id) @@ -248,6 +261,7 @@ class FederationServer(FederationBase): })) @defer.inlineCallbacks + @metrics.counted def on_query_auth_request(self, origin, content, event_id): """ Content is a dict with keys:: From c53ec53d80a02a1feea5efab18f0f84a0bdf4be8 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 24 Feb 2015 19:05:20 +0000 Subject: [PATCH 10/53] Pull out all uses of the underlying HTTP user agent .request() method into a single wrapper function, to make adding metrics easier --- synapse/http/client.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index b53a07aa2..d61e53bd9 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -45,12 +45,17 @@ class SimpleHttpClient(object): self.agent = Agent(reactor) self.version_string = hs.version_string + def request(self, method, *args, **kwargs): + # A small wrapper around self.agent.request() so we can easily attach + # counters to it + return self.agent.request(method, *args, **kwargs) + @defer.inlineCallbacks def post_urlencoded_get_json(self, uri, args={}): logger.debug("post_urlencoded_get_json args: %s", args) query_bytes = urllib.urlencode(args, True) - response = yield self.agent.request( + response = yield self.request( "POST", uri.encode("ascii"), headers=Headers({ @@ -70,7 +75,7 @@ class SimpleHttpClient(object): logger.info("HTTP POST %s -> %s", json_str, uri) - response = yield self.agent.request( + response = yield self.request( "POST", uri.encode("ascii"), headers=Headers({ @@ -104,7 +109,7 @@ class SimpleHttpClient(object): query_bytes = urllib.urlencode(args, True) uri = "%s?%s" % (uri, query_bytes) - response = yield self.agent.request( + response = yield self.request( "GET", uri.encode("ascii"), headers=Headers({ @@ -145,7 +150,7 @@ class SimpleHttpClient(object): json_str = encode_canonical_json(json_body) - response = yield self.agent.request( + response = yield self.request( "PUT", uri.encode("ascii"), headers=Headers({ @@ -176,7 +181,7 @@ class CaptchaServerHttpClient(SimpleHttpClient): def post_urlencoded_get_raw(self, url, args={}): query_bytes = urllib.urlencode(args, True) - response = yield self.agent.request( + response = yield self.request( "POST", url.encode("ascii"), bodyProducer=FileBodyProducer(StringIO(query_bytes)), From 7d72e44eb9660eaeab017897853cf3ec18fd06c6 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 24 Feb 2015 19:51:21 +0000 Subject: [PATCH 11/53] Add vector counters to HTTP clients and servers; count the requests by method and responses by method and response code --- synapse/http/client.py | 23 ++++++++++++++++++++- synapse/http/matrixfederationclient.py | 28 +++++++++++++++++++++++--- synapse/http/server.py | 14 +++++++++++++ 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index d61e53bd9..e40e82e80 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -15,6 +15,7 @@ from synapse.api.errors import CodeMessageException from syutil.jsonutil import encode_canonical_json +import synapse.metrics from twisted.internet import defer, reactor from twisted.web.client import ( @@ -31,6 +32,15 @@ import urllib logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + +outgoing_requests_counter = metrics.register_counter("outgoing_requests", + keys=["method"], +) +incoming_responses_counter = metrics.register_counter("incoming_responses", + keys=["method","code"], +) + class SimpleHttpClient(object): """ @@ -48,7 +58,18 @@ class SimpleHttpClient(object): def request(self, method, *args, **kwargs): # A small wrapper around self.agent.request() so we can easily attach # counters to it - return self.agent.request(method, *args, **kwargs) + outgoing_requests_counter.inc(method) + d = self.agent.request(method, *args, **kwargs) + + def _cb(response): + incoming_responses_counter.inc(method, response.code) + return response + def _eb(failure): + incoming_responses_counter.inc(method, "ERR") + return failure + d.addCallbacks(_cb, _eb) + + return d @defer.inlineCallbacks def post_urlencoded_get_json(self, uri, args={}): diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 7db001cc6..009152769 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -23,6 +23,7 @@ from twisted.web._newclient import ResponseDone from synapse.http.endpoint import matrix_federation_endpoint from synapse.util.async import sleep from synapse.util.logcontext import PreserveLoggingContext +import synapse.metrics from syutil.jsonutil import encode_canonical_json @@ -40,6 +41,15 @@ import urlparse logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + +outgoing_requests_counter = metrics.register_counter("outgoing_requests", + keys=["method"], +) +incoming_responses_counter = metrics.register_counter("incoming_responses", + keys=["method","code"], +) + class MatrixFederationHttpAgent(_AgentBase): @@ -49,6 +59,8 @@ class MatrixFederationHttpAgent(_AgentBase): def request(self, destination, endpoint, method, path, params, query, headers, body_producer): + outgoing_requests_counter.inc(method) + host = b"" port = 0 fragment = b"" @@ -59,9 +71,19 @@ class MatrixFederationHttpAgent(_AgentBase): # Set the connection pool key to be the destination. key = destination - return self._requestWithEndpoint(key, endpoint, method, parsed_URI, - headers, body_producer, - parsed_URI.originForm) + d = self._requestWithEndpoint(key, endpoint, method, parsed_URI, + headers, body_producer, + parsed_URI.originForm) + + def _cb(response): + incoming_responses_counter.inc(method, response.code) + return response + def _eb(failure): + incoming_responses_counter.inc(method, "ERR") + return failure + d.addCallbacks(_cb, _eb) + + return d class MatrixFederationHttpClient(object): diff --git a/synapse/http/server.py b/synapse/http/server.py index 767c3ef79..ac893bb40 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -18,6 +18,7 @@ from synapse.api.errors import ( cs_exception, SynapseError, CodeMessageException, UnrecognizedRequestError ) from synapse.util.logcontext import LoggingContext +import synapse.metrics from syutil.jsonutil import ( encode_canonical_json, encode_pretty_printed_json @@ -34,6 +35,15 @@ import urllib logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + +incoming_requests_counter = metrics.register_counter("incoming_requests", + keys=["method"], +) +outgoing_responses_counter = metrics.register_counter("outgoing_responses", + keys=["method","code"], +) + class HttpServer(object): """ Interface for registering callbacks on a HTTP server @@ -112,6 +122,8 @@ class JsonResource(HttpServer, resource.Resource): This checks if anyone has registered a callback for that method and path. """ + incoming_requests_counter.inc(request.method) + code = None start = self.clock.time_msec() try: @@ -190,6 +202,8 @@ class JsonResource(HttpServer, resource.Resource): request) return + outgoing_responses_counter.inc(request.method, str(code)) + # TODO: Only enable CORS for the requests that need it. respond_with_json( request, code, response_json_object, From ce8b5769f7e08515edf8988281d17df7b0ddfdaa Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 15:47:23 +0000 Subject: [PATCH 12/53] Create the concept of a cachecounter metric; generating two counters specific to caches --- synapse/metrics/__init__.py | 11 ++++++++- synapse/metrics/metric.py | 43 +++++++++++++++++++++++++++++++----- tests/metrics/test_metric.py | 27 +++++++++++++++++++++- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 125845eb3..d5c30bbe4 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .metric import CounterMetric +from .metric import CounterMetric, CacheCounterMetric # We'll keep all the available metrics in a single toplevel dict, one shared @@ -43,6 +43,15 @@ class Metrics(object): return metric + def register_cachecounter(self, name, *args, **kwargs): + full_name = "%s.%s" % (self.name_prefix, name) + + metric = CacheCounterMetric(full_name, *args, **kwargs) + + self._register(metric) + + return metric + def counted(self, func): """ A method decorator that registers a counter, to count invocations of this method. """ diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index f5a98763c..00b149f6f 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -14,16 +14,28 @@ # limitations under the License. -class CounterMetric(object): +class BaseMetric(object): def __init__(self, name, keys=[]): self.name = name self.keys = keys # OK not to clone as we never write it + def _render_key(self, values): + # TODO: some kind of value escape + return ",".join(["%s=%s" % kv for kv in zip(self.keys, values)]) + + +class CounterMetric(BaseMetric): + """The simplest kind of metric; one that stores a monotonically-increasing + integer that counts events.""" + + def __init__(self, *args, **kwargs): + super(CounterMetric, self).__init__(*args, **kwargs) + self.counts = {} # Scalar metrics are never empty - if not len(keys): + if not len(self.keys): self.counts[()] = 0 def inc(self, *values): @@ -42,13 +54,32 @@ class CounterMetric(object): def fetch(self): return dict(self.counts) - def _render_key(self, values): - # TODO: some kind of value escape - return ",".join(["%s=%s" % kv for kv in zip(self.keys, values)]) - def render(self): if not len(self.keys): return ["%s %d" % (self.name, self.counts[()])] return ["%s{%s} %d" % (self.name, self._render_key(k), self.counts[k]) for k in sorted(self.counts.keys())] + + +class CacheCounterMetric(object): + """A combination of two CounterMetrics, one to count cache hits and one to + count misses. + + This metric generates standard metric name pairs, so that monitoring rules + can easily be applied to measure hit ratio.""" + + def __init__(self, name, keys=[]): + self.name = name + + self.hits = CounterMetric(name + ":hits", keys=keys) + self.misses = CounterMetric(name + ":misses", keys=keys) + + def inc_hits(self, *values): + self.hits.inc(*values) + + def inc_misses(self, *values): + self.misses.inc(*values) + + def render(self): + return self.hits.render() + self.misses.render() diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index a4fd52a9d..93e8e27e4 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -15,7 +15,7 @@ from tests import unittest -from synapse.metrics.metric import CounterMetric +from synapse.metrics.metric import CounterMetric, CacheCounterMetric class CounterMetricTestCase(unittest.TestCase): @@ -59,3 +59,28 @@ class CounterMetricTestCase(unittest.TestCase): "vector{method=GET} 2", "vector{method=PUT} 1", ]) + + +class CacheCounterMetricTestCase(unittest.TestCase): + + def test_cachecounter(self): + counter = CacheCounterMetric("cache") + + self.assertEquals(counter.render(), [ + "cache:hits 0", + "cache:misses 0", + ]) + + counter.inc_misses() + + self.assertEquals(counter.render(), [ + "cache:hits 0", + "cache:misses 1", + ]) + + counter.inc_hits() + + self.assertEquals(counter.render(), [ + "cache:hits 1", + "cache:misses 1", + ]) From b0cdf097f4393fc288d6d31bb9b37bf921a35a1c Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 16:04:46 +0000 Subject: [PATCH 13/53] Sprinkle some CacheCounterMetrics around the synapse.storage layer --- synapse/storage/_base.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 3ea738276..804655e34 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -20,6 +20,7 @@ from synapse.events.utils import prune_event from synapse.util.logutils import log_function from synapse.util.logcontext import PreserveLoggingContext, LoggingContext from synapse.util.lrucache import LruCache +import synapse.metrics from twisted.internet import defer @@ -31,6 +32,8 @@ import time logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for("synapse.storage") + sql_logger = logging.getLogger("synapse.storage.SQL") transaction_logger = logging.getLogger("synapse.storage.txn") @@ -56,6 +59,8 @@ def cached(max_entries=1000): def wrap(orig): cache = OrderedDict() + counter = metrics.register_cachecounter(orig.__name__) + def prefill(key, value): while len(cache) > max_entries: cache.popitem(last=False) @@ -65,8 +70,10 @@ def cached(max_entries=1000): @defer.inlineCallbacks def wrapped(self, key): if key in cache: + counter.inc_hits() defer.returnValue(cache[key]) + counter.inc_misses() ret = yield orig(self, key) prefill(key, ret) defer.returnValue(ret) @@ -176,6 +183,9 @@ class SQLBaseStore(object): self._get_event_counters = PerformanceCounters() self._get_event_cache = LruCache(hs.config.event_cache_size) + self._get_event_cache_counter = metrics.register_cachecounter( + "get_event" + ) def start_profiling(self): self._previous_loop_ts = self._clock.time_msec() @@ -644,8 +654,12 @@ class SQLBaseStore(object): try: # Separate cache entries for each way to invoke _get_event_txn - return cache[(check_redacted, get_prev_content, allow_rejected)] + ret = cache[(check_redacted, get_prev_content, allow_rejected)] + + self._get_event_cache_counter.inc_hits() + return ret except KeyError: + self._get_event_cache_counter.inc_misses() pass finally: start_time = update_counter("event_cache", start_time) From d8caa5454d781a76a65fa4ce75336541b973f624 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 16:46:44 +0000 Subject: [PATCH 14/53] Initial attempt at a scalar callback-based metric to give instantaneous snapshot gauges --- synapse/metrics/__init__.py | 11 ++++++++++- synapse/metrics/metric.py | 14 ++++++++++++++ tests/metrics/test_metric.py | 22 +++++++++++++++++++++- 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index d5c30bbe4..d7584fc0b 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .metric import CounterMetric, CacheCounterMetric +from .metric import CounterMetric, CallbackMetric, CacheCounterMetric # We'll keep all the available metrics in a single toplevel dict, one shared @@ -43,6 +43,15 @@ class Metrics(object): return metric + def register_callback(self, name, callback, *args, **kwargs): + full_name = "%s.%s" % (self.name_prefix, name) + + metric = CallbackMetric(full_name, *args, callback=callback, **kwargs) + + self._register(metric) + + return metric + def register_cachecounter(self, name, *args, **kwargs): full_name = "%s.%s" % (self.name_prefix, name) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 00b149f6f..8a497fc15 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -62,6 +62,20 @@ class CounterMetric(BaseMetric): for k in sorted(self.counts.keys())] +class CallbackMetric(BaseMetric): + """A metric that returns the numeric value returned by a callback whenever + it is rendered. Typically this is used to implement gauges that yield the + size or other state of some in-memory object by actively querying it.""" + + def __init__(self, name, callback, keys=[]): + super(CallbackMetric, self).__init__(name, keys=keys) + + self.callback = callback + + def render(self): + # TODO(paul): work out something we can do with keys and vectors + return ["%s %d" % (self.name, self.callback())] + class CacheCounterMetric(object): """A combination of two CounterMetrics, one to count cache hits and one to count misses. diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index 93e8e27e4..b7bb375ce 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -15,7 +15,9 @@ from tests import unittest -from synapse.metrics.metric import CounterMetric, CacheCounterMetric +from synapse.metrics.metric import ( + CounterMetric, CallbackMetric, CacheCounterMetric +) class CounterMetricTestCase(unittest.TestCase): @@ -61,6 +63,24 @@ class CounterMetricTestCase(unittest.TestCase): ]) +class CallbackMetricTestCase(unittest.TestCase): + + def test_callback(self): + d = dict() + + metric = CallbackMetric("size", lambda: len(d)) + + self.assertEquals(metric.render(), [ + "size 0", + ]) + + d["key"] = "value" + + self.assertEquals(metric.render(), [ + "size 1", + ]) + + class CacheCounterMetricTestCase(unittest.TestCase): def test_cachecounter(self): From 59c448f074439f1c6f5f51765e24ecfcff8e4101 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 16:50:23 +0000 Subject: [PATCH 15/53] Add a scalar gauge metric on the size of the presence user cachemap --- synapse/handlers/presence.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 28e922f79..698946a48 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -21,6 +21,7 @@ from synapse.api.constants import PresenceState from synapse.util.logutils import log_function from synapse.util.logcontext import PreserveLoggingContext from synapse.types import UserID +import synapse.metrics from ._base import BaseHandler @@ -29,6 +30,8 @@ import logging logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + # TODO(paul): Maybe there's one of these I can steal from somewhere def partition(l, func): @@ -133,6 +136,10 @@ class PresenceHandler(BaseHandler): self._user_cachemap = {} self._user_cachemap_latest_serial = 0 + metrics.register_callback("user_cachemap:size", + lambda: len(self._user_cachemap) + ) + def _get_or_make_usercache(self, user): """If the cache entry doesn't exist, initialise a new one.""" if user not in self._user_cachemap: From e02cc249da6ff71dcc1e5560232b302246a11c9d Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 17:13:09 +0000 Subject: [PATCH 16/53] Ensure that exceptions while rendering individual metrics don't stop others from being rendered anyway - especially useful for CallbackMetric --- synapse/metrics/__init__.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index d7584fc0b..d967b04ee 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -13,9 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging + from .metric import CounterMetric, CallbackMetric, CacheCounterMetric +logger = logging.getLogger(__name__) + + # We'll keep all the available metrics in a single toplevel dict, one shared # for the entire process. We don't currently support per-HomeServer instances # of metrics, because in practice any one python VM will host only one @@ -82,6 +87,10 @@ def render_all(): strs = [] for name in sorted(all_metrics.keys()): - strs += all_metrics[name].render() + try: + strs += all_metrics[name].render() + except Exception as e: + strs += ["# FAILED to render %s" % name] + logger.exception("Failed to render %s metric", name) return "\n".join(strs) From 8664599af77ba0ed6268b3112174dc8e0c91101b Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 17:34:23 +0000 Subject: [PATCH 17/53] Rename CacheCounterMetric to just CacheMetric; add a CallbackMetric component to give the size of the cache --- synapse/metrics/__init__.py | 6 +++--- synapse/metrics/metric.py | 13 +++++++++---- synapse/storage/_base.py | 6 +++--- tests/metrics/test_metric.py | 24 +++++++++++++++--------- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index d967b04ee..442fd70cd 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -15,7 +15,7 @@ import logging -from .metric import CounterMetric, CallbackMetric, CacheCounterMetric +from .metric import CounterMetric, CallbackMetric, CacheMetric logger = logging.getLogger(__name__) @@ -57,10 +57,10 @@ class Metrics(object): return metric - def register_cachecounter(self, name, *args, **kwargs): + def register_cache(self, name, *args, **kwargs): full_name = "%s.%s" % (self.name_prefix, name) - metric = CacheCounterMetric(full_name, *args, **kwargs) + metric = CacheMetric(full_name, *args, **kwargs) self._register(metric) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 8a497fc15..7e47f8615 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -76,19 +76,24 @@ class CallbackMetric(BaseMetric): # TODO(paul): work out something we can do with keys and vectors return ["%s %d" % (self.name, self.callback())] -class CacheCounterMetric(object): +class CacheMetric(object): """A combination of two CounterMetrics, one to count cache hits and one to - count misses. + count misses, and a callback metric to yield the current size. This metric generates standard metric name pairs, so that monitoring rules can easily be applied to measure hit ratio.""" - def __init__(self, name, keys=[]): + def __init__(self, name, size_callback, keys=[]): self.name = name self.hits = CounterMetric(name + ":hits", keys=keys) self.misses = CounterMetric(name + ":misses", keys=keys) + self.size = CallbackMetric(name + ":size", + callback=size_callback, + keys=keys, + ) + def inc_hits(self, *values): self.hits.inc(*values) @@ -96,4 +101,4 @@ class CacheCounterMetric(object): self.misses.inc(*values) def render(self): - return self.hits.render() + self.misses.render() + return self.hits.render() + self.misses.render() + self.size.render() diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 804655e34..d3c2bc7bf 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -59,7 +59,7 @@ def cached(max_entries=1000): def wrap(orig): cache = OrderedDict() - counter = metrics.register_cachecounter(orig.__name__) + counter = metrics.register_cache(orig.__name__, lambda: len(cache)) def prefill(key, value): while len(cache) > max_entries: @@ -183,8 +183,8 @@ class SQLBaseStore(object): self._get_event_counters = PerformanceCounters() self._get_event_cache = LruCache(hs.config.event_cache_size) - self._get_event_cache_counter = metrics.register_cachecounter( - "get_event" + self._get_event_cache_counter = metrics.register_cache("get_event", + size_callback=lambda: len(self._get_event_cache), ) def start_profiling(self): diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index b7bb375ce..32fd178ed 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -16,7 +16,7 @@ from tests import unittest from synapse.metrics.metric import ( - CounterMetric, CallbackMetric, CacheCounterMetric + CounterMetric, CallbackMetric, CacheMetric ) @@ -81,26 +81,32 @@ class CallbackMetricTestCase(unittest.TestCase): ]) -class CacheCounterMetricTestCase(unittest.TestCase): +class CacheMetricTestCase(unittest.TestCase): - def test_cachecounter(self): - counter = CacheCounterMetric("cache") + def test_cache(self): + d = dict() - self.assertEquals(counter.render(), [ + metric = CacheMetric("cache", lambda: len(d)) + + self.assertEquals(metric.render(), [ "cache:hits 0", "cache:misses 0", + "cache:size 0", ]) - counter.inc_misses() + metric.inc_misses() + d["key"] = "value" - self.assertEquals(counter.render(), [ + self.assertEquals(metric.render(), [ "cache:hits 0", "cache:misses 1", + "cache:size 1", ]) - counter.inc_hits() + metric.inc_hits() - self.assertEquals(counter.render(), [ + self.assertEquals(metric.render(), [ "cache:hits 1", "cache:misses 1", + "cache:size 1", ]) From 849300bc736b82a6bcefdae110f5bf9111141afb Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 17:53:51 +0000 Subject: [PATCH 18/53] Neater introspection methods on BaseMetric so that subclasses don't need to touch self.keys directly --- synapse/metrics/metric.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 7e47f8615..e8c15a60e 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -20,6 +20,12 @@ class BaseMetric(object): self.name = name self.keys = keys # OK not to clone as we never write it + def dimension(self): + return len(self.keys) + + def is_scalar(self): + return not len(self.keys) + def _render_key(self, values): # TODO: some kind of value escape return ",".join(["%s=%s" % kv for kv in zip(self.keys, values)]) @@ -35,13 +41,13 @@ class CounterMetric(BaseMetric): self.counts = {} # Scalar metrics are never empty - if not len(self.keys): + if self.is_scalar(): self.counts[()] = 0 def inc(self, *values): - if len(values) != len(self.keys): + if len(values) != self.dimension(): raise ValueError("Expected as many values to inc() as keys (%d)" % - (len(self.keys)) + (self.dimension()) ) # TODO: should assert that the tag values are all strings @@ -55,7 +61,7 @@ class CounterMetric(BaseMetric): return dict(self.counts) def render(self): - if not len(self.keys): + if self.is_scalar(): return ["%s %d" % (self.name, self.counts[()])] return ["%s{%s} %d" % (self.name, self._render_key(k), self.counts[k]) @@ -76,6 +82,7 @@ class CallbackMetric(BaseMetric): # TODO(paul): work out something we can do with keys and vectors return ["%s %d" % (self.name, self.callback())] + class CacheMetric(object): """A combination of two CounterMetrics, one to count cache hits and one to count misses, and a callback metric to yield the current size. From 23ab0c68c28e60e0f8774ee4099b2abe876374d0 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 17:58:10 +0000 Subject: [PATCH 19/53] Implement vector CallbackMetrics --- synapse/metrics/metric.py | 8 ++++++-- tests/metrics/test_metric.py | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index e8c15a60e..4df5ebfda 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -79,9 +79,13 @@ class CallbackMetric(BaseMetric): self.callback = callback def render(self): - # TODO(paul): work out something we can do with keys and vectors - return ["%s %d" % (self.name, self.callback())] + value = self.callback() + if self.is_scalar(): + return ["%s %d" % (self.name, value)] + + return ["%s{%s} %d" % (self.name, self._render_key(k), value[k]) + for k in sorted(value.keys())] class CacheMetric(object): """A combination of two CounterMetrics, one to count cache hits and one to diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index 32fd178ed..b7facb858 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -65,7 +65,7 @@ class CounterMetricTestCase(unittest.TestCase): class CallbackMetricTestCase(unittest.TestCase): - def test_callback(self): + def test_scalar(self): d = dict() metric = CallbackMetric("size", lambda: len(d)) @@ -80,6 +80,22 @@ class CallbackMetricTestCase(unittest.TestCase): "size 1", ]) + def test_vector(self): + vals = dict() + + metric = CallbackMetric("values", lambda: vals, keys=["type"]) + + self.assertEquals(metric.render(), []) + + # Keys have to be tuples, even if they're 1-element + vals[("foo",)] = 1 + vals[("bar",)] = 2 + + self.assertEquals(metric.render(), [ + "values{type=bar} 2", + "values{type=foo} 1", + ]) + class CacheMetricTestCase(unittest.TestCase): From e9c4b0d1789fca9cdefb53c75defa19a8ea62473 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 18:15:07 +0000 Subject: [PATCH 20/53] Ensure that /_synapse/metrics response is UTF-8 encoded --- synapse/metrics/resource.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/resource.py b/synapse/metrics/resource.py index 2b9a1ee0a..ff7baab01 100644 --- a/synapse/metrics/resource.py +++ b/synapse/metrics/resource.py @@ -36,4 +36,5 @@ class MetricsResource(Resource): request.setHeader("Content-Type", "text/plain") request.setHeader("Content-Length", str(len(response))) - return response + # Encode as UTF-8 (default) + return response.encode() From 094803cf82cc748dd32fe4b03c6db016aeb90075 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 18:15:34 +0000 Subject: [PATCH 21/53] Put vector gauges on transaction queue pending PDU and EDU dicts --- synapse/federation/transaction_queue.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 9dc7849b1..b9d3f8932 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -25,12 +25,15 @@ from synapse.util.logcontext import PreserveLoggingContext from synapse.util.retryutils import ( get_retry_limiter, NotRetryingDestination, ) +import synapse.metrics import logging logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + class TransactionQueue(object): """This class makes sure we only have one transaction in flight at @@ -56,9 +59,9 @@ class TransactionQueue(object): # Is a mapping from destination -> list of # tuple(pending pdus, deferred, order) - self.pending_pdus_by_dest = {} + self.pending_pdus_by_dest = pdus = {} # destination -> list of tuple(edu, deferred) - self.pending_edus_by_dest = {} + self.pending_edus_by_dest = edus = {} # destination -> list of tuple(failure, deferred) self.pending_failures_by_dest = {} @@ -66,6 +69,15 @@ class TransactionQueue(object): # HACK to get unique tx id self._next_txn_id = int(self._clock.time_msec()) + metrics.register_callback("pending_pdus", + lambda: {(dest,): len(pdus[dest]) for dest in pdus.keys()}, + keys=["dest"], + ) + metrics.register_callback("pending_edus", + lambda: {(dest,): len(edus[dest]) for dest in edus.keys()}, + keys=["dest"], + ) + def can_send_to(self, destination): """Can we send messages to the given server? From e1a7e3564fa1e20b52c7294a8b0f28e5ecbaf672 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 18:16:54 +0000 Subject: [PATCH 22/53] Delete a couple of TODO markers of monitoring stats now done --- synapse/storage/_base.py | 1 - synapse/util/lrucache.py | 1 - 2 files changed, 2 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d3c2bc7bf..d6cf88c35 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -40,7 +40,6 @@ transaction_logger = logging.getLogger("synapse.storage.txn") # TODO(paul): # * more generic key management -# * export monitoring stats # * consider other eviction strategies - LRU? def cached(max_entries=1000): """ A method decorator that applies a memoizing cache around the function. diff --git a/synapse/util/lrucache.py b/synapse/util/lrucache.py index f115f50e5..65d579290 100644 --- a/synapse/util/lrucache.py +++ b/synapse/util/lrucache.py @@ -16,7 +16,6 @@ class LruCache(object): """Least-recently-used cache.""" - # TODO(mjark) Add hit/miss counters # TODO(mjark) Add mutex for linked list for thread safety. def __init__(self, max_size): cache = {} From 72625f2f4d633e9fe59e61bb371a118927e5c66c Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 19:22:14 +0000 Subject: [PATCH 23/53] Initial hack at a TimerMetric; for storing counts + duration accumulators --- synapse/metrics/metric.py | 48 ++++++++++++++++++++++++++++++++++++ tests/metrics/test_metric.py | 36 ++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 4df5ebfda..717588194 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -14,6 +14,15 @@ # limitations under the License. +from itertools import chain + + +# TODO(paul): I can't believe Python doesn't have one of these +def map_concat(func, items): + # flatten a list-of-lists + return list(chain.from_iterable(map(func, items))) + + class BaseMetric(object): def __init__(self, name, keys=[]): @@ -87,6 +96,45 @@ class CallbackMetric(BaseMetric): return ["%s{%s} %d" % (self.name, self._render_key(k), value[k]) for k in sorted(value.keys())] + +class TimerMetric(CounterMetric): + """A combination of an event counter and a time accumulator, which counts + both the number of events and how long each one takes. + + TODO(paul): Try to export some heatmap-style stats? + """ + + def __init__(self, *args, **kwargs): + super(TimerMetric, self).__init__(*args, **kwargs) + + self.times = {} + + # Scalar metrics are never empty + if self.is_scalar(): + self.times[()] = 0 + + def inc_time(self, msec, *values): + self.inc(*values) + + if values not in self.times: + self.times[values] = msec + else: + self.times[values] += msec + + def render(self): + if self.is_scalar(): + return ["%s:count %d" % (self.name, self.counts[()]), + "%s:msec %d" % (self.name, self.times[()])] + + def render_item(k): + keystr = self._render_key(k) + + return ["%s{%s}:count %d" % (self.name, keystr, self.counts[k]), + "%s{%s}:msec %d" % (self.name, keystr, self.times[k])] + + return map_concat(render_item, sorted(self.counts.keys())) + + class CacheMetric(object): """A combination of two CounterMetrics, one to count cache hits and one to count misses, and a callback metric to yield the current size. diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index b7facb858..b25520821 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -16,7 +16,7 @@ from tests import unittest from synapse.metrics.metric import ( - CounterMetric, CallbackMetric, CacheMetric + CounterMetric, CallbackMetric, TimerMetric, CacheMetric ) @@ -97,6 +97,40 @@ class CallbackMetricTestCase(unittest.TestCase): ]) +class TimerMetricTestCase(unittest.TestCase): + + def test_scalar(self): + metric = TimerMetric("thing") + + self.assertEquals(metric.render(), [ + "thing:count 0", + "thing:msec 0", + ]) + + metric.inc_time(500) + + self.assertEquals(metric.render(), [ + "thing:count 1", + "thing:msec 500", + ]) + + def test_vector(self): + metric = TimerMetric("queries", keys=["verb"]) + + self.assertEquals(metric.render(), []) + + metric.inc_time(300, "SELECT") + metric.inc_time(200, "SELECT") + metric.inc_time(800, "INSERT") + + self.assertEquals(metric.render(), [ + "queries{verb=INSERT}:count 1", + "queries{verb=INSERT}:msec 800", + "queries{verb=SELECT}:count 2", + "queries{verb=SELECT}:msec 500", + ]) + + class CacheMetricTestCase(unittest.TestCase): def test_cache(self): From a99d6edc05c60cdb473adbe5fc783532e0ff9ea3 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 19:28:17 +0000 Subject: [PATCH 24/53] Neater implementation of metric render methods by pulling out 'render' as a base method that calls self.render_item --- synapse/metrics/metric.py | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 717588194..4a6ab9cd7 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -36,8 +36,15 @@ class BaseMetric(object): return not len(self.keys) def _render_key(self, values): + if self.is_scalar(): + return "" # TODO: some kind of value escape - return ",".join(["%s=%s" % kv for kv in zip(self.keys, values)]) + return "{%s}" % ( + ",".join(["%s=%s" % kv for kv in zip(self.keys, values)]) + ) + + def render(self): + return map_concat(self.render_item, sorted(self.counts.keys())) class CounterMetric(BaseMetric): @@ -69,12 +76,8 @@ class CounterMetric(BaseMetric): def fetch(self): return dict(self.counts) - def render(self): - if self.is_scalar(): - return ["%s %d" % (self.name, self.counts[()])] - - return ["%s{%s} %d" % (self.name, self._render_key(k), self.counts[k]) - for k in sorted(self.counts.keys())] + def render_item(self, k): + return ["%s%s %d" % (self.name, self._render_key(k), self.counts[k])] class CallbackMetric(BaseMetric): @@ -93,7 +96,7 @@ class CallbackMetric(BaseMetric): if self.is_scalar(): return ["%s %d" % (self.name, value)] - return ["%s{%s} %d" % (self.name, self._render_key(k), value[k]) + return ["%s%s %d" % (self.name, self._render_key(k), value[k]) for k in sorted(value.keys())] @@ -121,18 +124,12 @@ class TimerMetric(CounterMetric): else: self.times[values] += msec - def render(self): - if self.is_scalar(): - return ["%s:count %d" % (self.name, self.counts[()]), - "%s:msec %d" % (self.name, self.times[()])] + def render_item(self, k): + keystr = self._render_key(k) - def render_item(k): - keystr = self._render_key(k) + return ["%s%s:count %d" % (self.name, keystr, self.counts[k]), + "%s%s:msec %d" % (self.name, keystr, self.times[k])] - return ["%s{%s}:count %d" % (self.name, keystr, self.counts[k]), - "%s{%s}:msec %d" % (self.name, keystr, self.times[k])] - - return map_concat(render_item, sorted(self.counts.keys())) class CacheMetric(object): From f52acf3b129864f400876f9316e1324e62e75eb0 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 19:43:46 +0000 Subject: [PATCH 25/53] Neater register_* methods on overall Metrics container --- synapse/metrics/__init__.py | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 442fd70cd..e1818ce39 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -15,7 +15,7 @@ import logging -from .metric import CounterMetric, CallbackMetric, CacheMetric +from .metric import CounterMetric, CallbackMetric, TimerMetric, CacheMetric logger = logging.getLogger(__name__) @@ -36,35 +36,25 @@ class Metrics(object): def __init__(self, name): self.name_prefix = name - def _register(self, metric): - all_metrics[metric.name] = metric - - def register_counter(self, name, *args, **kwargs): + def _register(self, metric_class, name, *args, **kwargs): full_name = "%s.%s" % (self.name_prefix, name) - metric = CounterMetric(full_name, *args, **kwargs) - - self._register(metric) + metric = metric_class(full_name, *args, **kwargs) + all_metrics[full_name] = metric return metric - def register_callback(self, name, callback, *args, **kwargs): - full_name = "%s.%s" % (self.name_prefix, name) + def register_counter(self, *args, **kwargs): + return self._register(CounterMetric, *args, **kwargs) - metric = CallbackMetric(full_name, *args, callback=callback, **kwargs) + def register_callback(self, *args, **kwargs): + return self._register(CallbackMetric, *args, **kwargs) - self._register(metric) + def register_timer(self, *args, **kwargs): + return self._register(TimerMetric, *args, **kwargs) - return metric - - def register_cache(self, name, *args, **kwargs): - full_name = "%s.%s" % (self.name_prefix, name) - - metric = CacheMetric(full_name, *args, **kwargs) - - self._register(metric) - - return metric + def register_cache(self, *args, **kwargs): + return self._register(CacheMetric, *args, **kwargs) def counted(self, func): """ A method decorator that registers a counter, to count invocations From 25187ab67426e206de30ebf5c8b5f34c995d3faf Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 19:45:16 +0000 Subject: [PATCH 26/53] Collect per-SQL-verb timer stats on query execution time --- synapse/storage/_base.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d6cf88c35..92e90ac57 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -32,12 +32,15 @@ import time logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for("synapse.storage") - sql_logger = logging.getLogger("synapse.storage.SQL") transaction_logger = logging.getLogger("synapse.storage.txn") +metrics = synapse.metrics.get_metrics_for("synapse.storage") + +sql_query_timer = metrics.register_timer("queries", keys=["verb"]) + + # TODO(paul): # * more generic key management # * consider other eviction strategies - LRU? @@ -89,7 +92,8 @@ def cached(max_entries=1000): class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object - passed to the constructor. Adds logging to the .execute() method.""" + passed to the constructor. Adds logging and metrics to the .execute() + method.""" __slots__ = ["txn", "name"] def __init__(self, txn, name): @@ -105,6 +109,7 @@ class LoggingTransaction(object): def execute(self, sql, *args, **kwargs): # TODO(paul): Maybe use 'info' and 'debug' for values? sql_logger.debug("[SQL] {%s} %s", self.name, sql) + try: if args and args[0]: values = args[0] @@ -126,8 +131,9 @@ class LoggingTransaction(object): logger.exception("[SQL FAIL] {%s}", self.name) raise finally: - end = time.time() * 1000 - sql_logger.debug("[SQL time] {%s} %f", self.name, end - start) + msecs = (time.time() * 1000) - start + sql_logger.debug("[SQL time] {%s} %f", self.name, msecs) + sql_query_timer.inc_time(msecs, sql.split()[0]) class PerformanceCounters(object): From 6d146e15df645a7958c59e4d1e4f48e4822be9f9 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 5 Mar 2015 15:12:39 +0000 Subject: [PATCH 27/53] Put some gauge metrics on the number of notifier listeners, and notified-on objects (users, rooms, appservices) --- synapse/notifier.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/synapse/notifier.py b/synapse/notifier.py index df13e8ddb..1f7cad624 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -19,12 +19,25 @@ from synapse.util.logutils import log_function from synapse.util.logcontext import PreserveLoggingContext from synapse.util.async import run_on_reactor from synapse.types import StreamToken +import synapse.metrics import logging logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + + +# TODO(paul): Should be shared somewhere +def count(func, l): + """Return the number of items in l for which func returns true.""" + n = 0 + for x in l: + if func(x): + n += 1 + return n + class _NotificationListener(object): """ This represents a single client connection to the events stream. @@ -95,6 +108,32 @@ class Notifier(object): "user_joined_room", self._user_joined_room ) + # This is not a very cheap test to perform, but it's only executed + # when rendering the metrics page, which is likely once per minute at + # most when scraping it. + def count_listeners(): + all_listeners = set() + + for x in self.room_to_listeners.values(): + all_listeners |= x + for x in self.user_to_listeners.values(): + all_listeners |= x + for x in self.appservice_to_listeners.values(): + all_listeners |= x + + return len(all_listeners) + metrics.register_callback("all_listeners", count_listeners) + + metrics.register_callback("rooms", + lambda: count(bool, self.room_to_listeners.values()) + ) + metrics.register_callback("users", + lambda: count(bool, self.user_to_listeners.values()) + ) + metrics.register_callback("appservices", + lambda: count(bool, self.appservice_to_listeners.values()) + ) + @log_function @defer.inlineCallbacks def on_new_room_event(self, event, extra_users=[]): From fa319a57860af460add565d7801711ec53ab6799 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 5 Mar 2015 15:58:03 +0000 Subject: [PATCH 28/53] Add TimerMetrics to shadow the PerformanceCounters in synapse.storage; with the view to eventually replacing them entirely --- synapse/storage/_base.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 92e90ac57..d8c5a60c7 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -39,6 +39,8 @@ transaction_logger = logging.getLogger("synapse.storage.txn") metrics = synapse.metrics.get_metrics_for("synapse.storage") sql_query_timer = metrics.register_timer("queries", keys=["verb"]) +sql_txn_timer = metrics.register_timer("transactions", keys=["desc"]) +sql_getevents_timer = metrics.register_timer("get_events", keys=["desc"]) # TODO(paul): @@ -184,11 +186,16 @@ class SQLBaseStore(object): self._previous_txn_total_time = 0 self._current_txn_total_time = 0 self._previous_loop_ts = 0 + + # TODO(paul): These can eventually be removed once the metrics code + # is running in mainline, and we have some nice monitoring frontends + # to watch it self._txn_perf_counters = PerformanceCounters() self._get_event_counters = PerformanceCounters() self._get_event_cache = LruCache(hs.config.event_cache_size) - self._get_event_cache_counter = metrics.register_cache("get_event", + self._get_event_cache_counter = metrics.register_cache( + "get_event_cache", size_callback=lambda: len(self._get_event_cache), ) @@ -254,6 +261,8 @@ class SQLBaseStore(object): self._current_txn_total_time += end - start self._txn_perf_counters.update(desc, start, end) + sql_txn_timer.inc_time(self._current_txn_total_time, desc) + with PreserveLoggingContext(): result = yield self._db_pool.runInteraction( inner_func, *args, **kwargs @@ -653,7 +662,11 @@ class SQLBaseStore(object): get_prev_content=False, allow_rejected=False): start_time = time.time() * 1000 - update_counter = self._get_event_counters.update + + def update_counter(desc, last_time): + curr_time = self._get_event_counters.update(desc, last_time) + sql_getevents_timer.inc_time(curr_time - last_time, desc) + return curr_time cache = self._get_event_cache.setdefault(event_id, {}) @@ -704,7 +717,11 @@ class SQLBaseStore(object): check_redacted=True, get_prev_content=False): start_time = time.time() * 1000 - update_counter = self._get_event_counters.update + + def update_counter(desc, last_time): + curr_time = self._get_event_counters.update(desc, last_time) + sql_getevents_timer.inc_time(curr_time - last_time, desc) + return curr_time d = json.loads(js) start_time = update_counter("decode_json", start_time) From 399689dcc79e73de1ec6bae8aa18bd83f5618f38 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 5 Mar 2015 16:15:21 +0000 Subject: [PATCH 29/53] Provide some process resource usage metrics --- synapse/metrics/__init__.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index e1818ce39..c00f088ff 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -13,7 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Because otherwise 'resource' collides with synapse.metrics.resource +from __future__ import absolute_import + import logging +from resource import getrusage, getpagesize, RUSAGE_SELF from .metric import CounterMetric, CallbackMetric, TimerMetric, CacheMetric @@ -76,6 +80,9 @@ def get_metrics_for(name): def render_all(): strs = [] + # TODO(paul): Internal hack + update_resource_metrics() + for name in sorted(all_metrics.keys()): try: strs += all_metrics[name].render() @@ -84,3 +91,23 @@ def render_all(): logger.exception("Failed to render %s metric", name) return "\n".join(strs) + + +# Now register some standard process-wide state metrics, to give indications of +# process resource usage + +rusage = None +PAGE_SIZE = getpagesize() + +def update_resource_metrics(): + global rusage + rusage = getrusage(RUSAGE_SELF) + +resource_metrics = get_metrics_for("process.resource") + +# msecs +resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000) +resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000) + +# pages +resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * PAGE_SIZE) From f9478e475bf645038b4f1f163240d7fd0ec02af0 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 6 Mar 2015 15:28:06 +0000 Subject: [PATCH 30/53] Rename Metrics' "keys" to "labels" --- synapse/federation/transaction_queue.py | 4 ++-- synapse/http/client.py | 4 ++-- synapse/http/matrixfederationclient.py | 4 ++-- synapse/http/server.py | 4 ++-- synapse/metrics/metric.py | 24 ++++++++++++------------ synapse/storage/_base.py | 6 +++--- tests/metrics/test_metric.py | 6 +++--- 7 files changed, 26 insertions(+), 26 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index b9d3f8932..ae62c69fc 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -71,11 +71,11 @@ class TransactionQueue(object): metrics.register_callback("pending_pdus", lambda: {(dest,): len(pdus[dest]) for dest in pdus.keys()}, - keys=["dest"], + labels=["dest"], ) metrics.register_callback("pending_edus", lambda: {(dest,): len(edus[dest]) for dest in edus.keys()}, - keys=["dest"], + labels=["dest"], ) def can_send_to(self, destination): diff --git a/synapse/http/client.py b/synapse/http/client.py index e40e82e80..ad2c9c05e 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -35,10 +35,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) outgoing_requests_counter = metrics.register_counter("outgoing_requests", - keys=["method"], + labels=["method"], ) incoming_responses_counter = metrics.register_counter("incoming_responses", - keys=["method","code"], + labels=["method","code"], ) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 009152769..6b6d79a04 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -44,10 +44,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) outgoing_requests_counter = metrics.register_counter("outgoing_requests", - keys=["method"], + labels=["method"], ) incoming_responses_counter = metrics.register_counter("incoming_responses", - keys=["method","code"], + labels=["method","code"], ) diff --git a/synapse/http/server.py b/synapse/http/server.py index ac893bb40..35bd3a00b 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -38,10 +38,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) incoming_requests_counter = metrics.register_counter("incoming_requests", - keys=["method"], + labels=["method"], ) outgoing_responses_counter = metrics.register_counter("outgoing_responses", - keys=["method","code"], + labels=["method","code"], ) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 4a6ab9cd7..8ba13075f 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -25,22 +25,22 @@ def map_concat(func, items): class BaseMetric(object): - def __init__(self, name, keys=[]): + def __init__(self, name, labels=[]): self.name = name - self.keys = keys # OK not to clone as we never write it + self.labels = labels # OK not to clone as we never write it def dimension(self): - return len(self.keys) + return len(self.labels) def is_scalar(self): - return not len(self.keys) + return not len(self.labels) def _render_key(self, values): if self.is_scalar(): return "" # TODO: some kind of value escape return "{%s}" % ( - ",".join(["%s=%s" % kv for kv in zip(self.keys, values)]) + ",".join(["%s=%s" % kv for kv in zip(self.labels, values)]) ) def render(self): @@ -62,7 +62,7 @@ class CounterMetric(BaseMetric): def inc(self, *values): if len(values) != self.dimension(): - raise ValueError("Expected as many values to inc() as keys (%d)" % + raise ValueError("Expected as many values to inc() as labels (%d)" % (self.dimension()) ) @@ -85,8 +85,8 @@ class CallbackMetric(BaseMetric): it is rendered. Typically this is used to implement gauges that yield the size or other state of some in-memory object by actively querying it.""" - def __init__(self, name, callback, keys=[]): - super(CallbackMetric, self).__init__(name, keys=keys) + def __init__(self, name, callback, labels=[]): + super(CallbackMetric, self).__init__(name, labels=labels) self.callback = callback @@ -139,15 +139,15 @@ class CacheMetric(object): This metric generates standard metric name pairs, so that monitoring rules can easily be applied to measure hit ratio.""" - def __init__(self, name, size_callback, keys=[]): + def __init__(self, name, size_callback, labels=[]): self.name = name - self.hits = CounterMetric(name + ":hits", keys=keys) - self.misses = CounterMetric(name + ":misses", keys=keys) + self.hits = CounterMetric(name + ":hits", labels=labels) + self.misses = CounterMetric(name + ":misses", labels=labels) self.size = CallbackMetric(name + ":size", callback=size_callback, - keys=keys, + labels=labels, ) def inc_hits(self, *values): diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d8c5a60c7..a38b60358 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -38,9 +38,9 @@ transaction_logger = logging.getLogger("synapse.storage.txn") metrics = synapse.metrics.get_metrics_for("synapse.storage") -sql_query_timer = metrics.register_timer("queries", keys=["verb"]) -sql_txn_timer = metrics.register_timer("transactions", keys=["desc"]) -sql_getevents_timer = metrics.register_timer("get_events", keys=["desc"]) +sql_query_timer = metrics.register_timer("queries", labels=["verb"]) +sql_txn_timer = metrics.register_timer("transactions", labels=["desc"]) +sql_getevents_timer = metrics.register_timer("get_events", labels=["desc"]) # TODO(paul): diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index b25520821..fefe1a586 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -43,7 +43,7 @@ class CounterMetricTestCase(unittest.TestCase): ]) def test_vector(self): - counter = CounterMetric("vector", keys=["method"]) + counter = CounterMetric("vector", labels=["method"]) # Empty counter doesn't yet know what values it has self.assertEquals(counter.render(), []) @@ -83,7 +83,7 @@ class CallbackMetricTestCase(unittest.TestCase): def test_vector(self): vals = dict() - metric = CallbackMetric("values", lambda: vals, keys=["type"]) + metric = CallbackMetric("values", lambda: vals, labels=["type"]) self.assertEquals(metric.render(), []) @@ -115,7 +115,7 @@ class TimerMetricTestCase(unittest.TestCase): ]) def test_vector(self): - metric = TimerMetric("queries", keys=["verb"]) + metric = TimerMetric("queries", labels=["verb"]) self.assertEquals(metric.render(), []) From b3a0179d64c2c3b4f57688bdcceb818d0124c858 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 6 Mar 2015 15:35:23 +0000 Subject: [PATCH 31/53] Bugfix to rendering output of vectored TimerMetrics --- synapse/metrics/metric.py | 5 ++--- tests/metrics/test_metric.py | 8 ++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 8ba13075f..17cd67389 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -127,9 +127,8 @@ class TimerMetric(CounterMetric): def render_item(self, k): keystr = self._render_key(k) - return ["%s%s:count %d" % (self.name, keystr, self.counts[k]), - "%s%s:msec %d" % (self.name, keystr, self.times[k])] - + return ["%s:count%s %d" % (self.name, keystr, self.counts[k]), + "%s:msec%s %d" % (self.name, keystr, self.times[k])] class CacheMetric(object): diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index fefe1a586..75b6cbc92 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -124,10 +124,10 @@ class TimerMetricTestCase(unittest.TestCase): metric.inc_time(800, "INSERT") self.assertEquals(metric.render(), [ - "queries{verb=INSERT}:count 1", - "queries{verb=INSERT}:msec 800", - "queries{verb=SELECT}:count 2", - "queries{verb=SELECT}:msec 500", + "queries:count{verb=INSERT} 1", + "queries:msec{verb=INSERT} 800", + "queries:count{verb=SELECT} 2", + "queries:msec{verb=SELECT} 500", ]) From 0b96bb793e7e5d3935804b8f0ccaf415006388a9 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 6 Mar 2015 15:39:14 +0000 Subject: [PATCH 32/53] Have all @metrics.counted use a single metric name vectored on the method name, rather than a brand new scalar counter per counted method --- synapse/metrics/__init__.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index c00f088ff..443d67f41 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -63,10 +63,17 @@ class Metrics(object): def counted(self, func): """ A method decorator that registers a counter, to count invocations of this method. """ - counter = self.register_counter(func.__name__) + if not hasattr(self, "method_counter"): + self.method_counter = self.register_counter( + "calls", + labels=["method"] + ) + + counter = self.method_counter + name = func.__name__ def wrapped(*args, **kwargs): - counter.inc() + counter.inc(name) return func(*args, **kwargs) return wrapped From b0cf86731957876ca877c35bf30c6f695f1a544c Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 6 Mar 2015 16:18:21 +0000 Subject: [PATCH 33/53] Use _ instead of . as a metric namespacing separator, for Prometheus --- synapse/federation/transaction_queue.py | 4 ++-- synapse/handlers/presence.py | 2 +- synapse/http/client.py | 4 ++-- synapse/http/matrixfederationclient.py | 4 ++-- synapse/http/server.py | 4 ++-- synapse/metrics/__init__.py | 14 +++++++++++--- synapse/notifier.py | 2 +- synapse/storage/_base.py | 18 +++++++++++++----- 8 files changed, 34 insertions(+), 18 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index ae62c69fc..ca5bcf21c 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -69,11 +69,11 @@ class TransactionQueue(object): # HACK to get unique tx id self._next_txn_id = int(self._clock.time_msec()) - metrics.register_callback("pending_pdus", + metrics.register_callback("pendingPdus", lambda: {(dest,): len(pdus[dest]) for dest in pdus.keys()}, labels=["dest"], ) - metrics.register_callback("pending_edus", + metrics.register_callback("pendingEdus", lambda: {(dest,): len(edus[dest]) for dest in edus.keys()}, labels=["dest"], ) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 698946a48..c6d6aef53 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -136,7 +136,7 @@ class PresenceHandler(BaseHandler): self._user_cachemap = {} self._user_cachemap_latest_serial = 0 - metrics.register_callback("user_cachemap:size", + metrics.register_callback("userCachemap:size", lambda: len(self._user_cachemap) ) diff --git a/synapse/http/client.py b/synapse/http/client.py index ad2c9c05e..01737a718 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -34,10 +34,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -outgoing_requests_counter = metrics.register_counter("outgoing_requests", +outgoing_requests_counter = metrics.register_counter("requests", labels=["method"], ) -incoming_responses_counter = metrics.register_counter("incoming_responses", +incoming_responses_counter = metrics.register_counter("responses", labels=["method","code"], ) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 6b6d79a04..11883d385 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -43,10 +43,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -outgoing_requests_counter = metrics.register_counter("outgoing_requests", +outgoing_requests_counter = metrics.register_counter("requests", labels=["method"], ) -incoming_responses_counter = metrics.register_counter("incoming_responses", +incoming_responses_counter = metrics.register_counter("responses", labels=["method","code"], ) diff --git a/synapse/http/server.py b/synapse/http/server.py index 35bd3a00b..23708c08c 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -37,10 +37,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -incoming_requests_counter = metrics.register_counter("incoming_requests", +incoming_requests_counter = metrics.register_counter("requests", labels=["method"], ) -outgoing_responses_counter = metrics.register_counter("outgoing_responses", +outgoing_responses_counter = metrics.register_counter("responses", labels=["method","code"], ) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 443d67f41..47e475acd 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -41,7 +41,12 @@ class Metrics(object): self.name_prefix = name def _register(self, metric_class, name, *args, **kwargs): - full_name = "%s.%s" % (self.name_prefix, name) + if "_" in name: + raise ValueError("Metric names %s is invalid as it cannot contain an underscore" + % (name) + ) + + full_name = "%s_%s" % (self.name_prefix, name) metric = metric_class(full_name, *args, **kwargs) @@ -78,10 +83,13 @@ class Metrics(object): return wrapped -def get_metrics_for(name): +def get_metrics_for(pkg_name): """ Returns a Metrics instance for conveniently creating metrics namespaced with the given name prefix. """ - return Metrics(name) + + # Convert a "package.name" to "package_name" because Prometheus doesn't + # let us use . in metric names + return Metrics(pkg_name.replace(".", "_")) def render_all(): diff --git a/synapse/notifier.py b/synapse/notifier.py index 1f7cad624..75e8152d0 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -122,7 +122,7 @@ class Notifier(object): all_listeners |= x return len(all_listeners) - metrics.register_callback("all_listeners", count_listeners) + metrics.register_callback("listeners", count_listeners) metrics.register_callback("rooms", lambda: count(bool, self.room_to_listeners.values()) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index a38b60358..35d118c58 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -40,7 +40,14 @@ metrics = synapse.metrics.get_metrics_for("synapse.storage") sql_query_timer = metrics.register_timer("queries", labels=["verb"]) sql_txn_timer = metrics.register_timer("transactions", labels=["desc"]) -sql_getevents_timer = metrics.register_timer("get_events", labels=["desc"]) +sql_getevents_timer = metrics.register_timer("getEvents", labels=["desc"]) + +caches_by_name = {} +cache_counter = metrics.register_cache( + "cache", + lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()}, + labels=["name"], +) # TODO(paul): @@ -62,8 +69,9 @@ def cached(max_entries=1000): """ def wrap(orig): cache = OrderedDict() + name = orig.__name__ - counter = metrics.register_cache(orig.__name__, lambda: len(cache)) + caches_by_name[name] = cache def prefill(key, value): while len(cache) > max_entries: @@ -74,10 +82,10 @@ def cached(max_entries=1000): @defer.inlineCallbacks def wrapped(self, key): if key in cache: - counter.inc_hits() + cache_counter.inc_hits(name) defer.returnValue(cache[key]) - counter.inc_misses() + cache_counter.inc_misses(name) ret = yield orig(self, key) prefill(key, ret) defer.returnValue(ret) @@ -195,7 +203,7 @@ class SQLBaseStore(object): self._get_event_cache = LruCache(hs.config.event_cache_size) self._get_event_cache_counter = metrics.register_cache( - "get_event_cache", + "getEventCache", size_callback=lambda: len(self._get_event_cache), ) From 22b37b75dbfa69fd90705d7aa3e5650eebb89b5f Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 6 Mar 2015 17:08:25 +0000 Subject: [PATCH 34/53] Kill unused CounterMetric.fetch() method --- synapse/metrics/metric.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 17cd67389..93508eeac 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -73,9 +73,6 @@ class CounterMetric(BaseMetric): else: self.counts[values] += 1 - def fetch(self): - return dict(self.counts) - def render_item(self, k): return ["%s%s %d" % (self.name, self._render_key(k), self.counts[k])] From 0e847540c3aa1c471a00b3200f7f18e48004b48d Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 6 Mar 2015 18:40:20 +0000 Subject: [PATCH 35/53] Prometheus needs "escaped" label values --- synapse/metrics/metric.py | 8 ++++-- tests/metrics/test_metric.py | 54 ++++++++++++++++++------------------ 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 93508eeac..922cb5a6f 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -35,12 +35,16 @@ class BaseMetric(object): def is_scalar(self): return not len(self.labels) + def _render_labelvalue(self, value): + # TODO: some kind of value escape + return '"%s"' % (value) + def _render_key(self, values): if self.is_scalar(): return "" - # TODO: some kind of value escape return "{%s}" % ( - ",".join(["%s=%s" % kv for kv in zip(self.labels, values)]) + ",".join(["%s=%s" % (k, self._render_labelvalue(v)) + for k, v in zip(self.labels, values)]) ) def render(self): diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index 75b6cbc92..1919630fe 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -26,20 +26,20 @@ class CounterMetricTestCase(unittest.TestCase): counter = CounterMetric("scalar") self.assertEquals(counter.render(), [ - "scalar 0", + 'scalar 0', ]) counter.inc() self.assertEquals(counter.render(), [ - "scalar 1", + 'scalar 1', ]) counter.inc() counter.inc() self.assertEquals(counter.render(), [ - "scalar 3" + 'scalar 3' ]) def test_vector(self): @@ -51,15 +51,15 @@ class CounterMetricTestCase(unittest.TestCase): counter.inc("GET") self.assertEquals(counter.render(), [ - "vector{method=GET} 1", + 'vector{method="GET"} 1', ]) counter.inc("GET") counter.inc("PUT") self.assertEquals(counter.render(), [ - "vector{method=GET} 2", - "vector{method=PUT} 1", + 'vector{method="GET"} 2', + 'vector{method="PUT"} 1', ]) @@ -71,13 +71,13 @@ class CallbackMetricTestCase(unittest.TestCase): metric = CallbackMetric("size", lambda: len(d)) self.assertEquals(metric.render(), [ - "size 0", + 'size 0', ]) d["key"] = "value" self.assertEquals(metric.render(), [ - "size 1", + 'size 1', ]) def test_vector(self): @@ -92,8 +92,8 @@ class CallbackMetricTestCase(unittest.TestCase): vals[("bar",)] = 2 self.assertEquals(metric.render(), [ - "values{type=bar} 2", - "values{type=foo} 1", + 'values{type="bar"} 2', + 'values{type="foo"} 1', ]) @@ -103,15 +103,15 @@ class TimerMetricTestCase(unittest.TestCase): metric = TimerMetric("thing") self.assertEquals(metric.render(), [ - "thing:count 0", - "thing:msec 0", + 'thing:count 0', + 'thing:msec 0', ]) metric.inc_time(500) self.assertEquals(metric.render(), [ - "thing:count 1", - "thing:msec 500", + 'thing:count 1', + 'thing:msec 500', ]) def test_vector(self): @@ -124,10 +124,10 @@ class TimerMetricTestCase(unittest.TestCase): metric.inc_time(800, "INSERT") self.assertEquals(metric.render(), [ - "queries:count{verb=INSERT} 1", - "queries:msec{verb=INSERT} 800", - "queries:count{verb=SELECT} 2", - "queries:msec{verb=SELECT} 500", + 'queries:count{verb="INSERT"} 1', + 'queries:msec{verb="INSERT"} 800', + 'queries:count{verb="SELECT"} 2', + 'queries:msec{verb="SELECT"} 500', ]) @@ -139,24 +139,24 @@ class CacheMetricTestCase(unittest.TestCase): metric = CacheMetric("cache", lambda: len(d)) self.assertEquals(metric.render(), [ - "cache:hits 0", - "cache:misses 0", - "cache:size 0", + 'cache:hits 0', + 'cache:misses 0', + 'cache:size 0', ]) metric.inc_misses() d["key"] = "value" self.assertEquals(metric.render(), [ - "cache:hits 0", - "cache:misses 1", - "cache:size 1", + 'cache:hits 0', + 'cache:misses 1', + 'cache:size 1', ]) metric.inc_hits() self.assertEquals(metric.render(), [ - "cache:hits 1", - "cache:misses 1", - "cache:size 1", + 'cache:hits 1', + 'cache:misses 1', + 'cache:size 1', ]) From 4d661ec0f3ccced9cb6a0b1441bfb845f70f1270 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 6 Mar 2015 19:08:47 +0000 Subject: [PATCH 36/53] Remember to emit final linefeed from /metrics page, or Prometheus gets upset --- synapse/metrics/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 47e475acd..1acaa3fd0 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -105,6 +105,8 @@ def render_all(): strs += ["# FAILED to render %s" % name] logger.exception("Failed to render %s metric", name) + strs.append("") # to generate a final CRLF + return "\n".join(strs) From 1748605c5d69cb93cbe6bb4d93060124cdc9282f Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Mon, 9 Mar 2015 18:34:20 +0000 Subject: [PATCH 37/53] Count incoming HTTP requests per servlet that responds --- synapse/federation/transport/server.py | 4 ++++ synapse/http/server.py | 18 +++++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 6c624977d..7838a8136 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -148,6 +148,10 @@ class BaseFederationServlet(object): logger.exception("authenticate_request failed") raise defer.returnValue(response) + + # Extra logic that functools.wraps() doesn't finish + new_code.__self__ = code.__self__ + return new_code def register(self, server): diff --git a/synapse/http/server.py b/synapse/http/server.py index 23708c08c..a0d190ff7 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -38,7 +38,7 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) incoming_requests_counter = metrics.register_counter("requests", - labels=["method"], + labels=["method", "servlet"], ) outgoing_responses_counter = metrics.register_counter("responses", labels=["method","code"], @@ -122,8 +122,6 @@ class JsonResource(HttpServer, resource.Resource): This checks if anyone has registered a callback for that method and path. """ - incoming_requests_counter.inc(request.method) - code = None start = self.clock.time_msec() try: @@ -143,6 +141,15 @@ class JsonResource(HttpServer, resource.Resource): # returned response. We pass both the request and any # matched groups from the regex to the callback. + callback = path_entry.callback + + servlet_instance = getattr(callback, "__self__", None) + if servlet_instance is not None: + servlet_classname = servlet_instance.__class__.__name__ + else: + servlet_classname = "%r" % callback + incoming_requests_counter.inc(request.method, servlet_classname) + args = [ urllib.unquote(u).decode("UTF-8") for u in m.groups() ] @@ -152,10 +159,7 @@ class JsonResource(HttpServer, resource.Resource): request.method, request.path ) - code, response = yield path_entry.callback( - request, - *args - ) + code, response = yield callback(request, *args) self._send_response(request, code, response) return From cbc0406be844894cac08c457544f1eb0c28435bb Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Mon, 9 Mar 2015 20:35:33 +0000 Subject: [PATCH 38/53] Export CacheMetric as hits+total, rather than hits+misses, as it's easier to derive hit ratio from that --- synapse/metrics/metric.py | 11 ++++++----- tests/metrics/test_metric.py | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 922cb5a6f..6b7d3358b 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -134,7 +134,7 @@ class TimerMetric(CounterMetric): class CacheMetric(object): """A combination of two CounterMetrics, one to count cache hits and one to - count misses, and a callback metric to yield the current size. + count a total, and a callback metric to yield the current size. This metric generates standard metric name pairs, so that monitoring rules can easily be applied to measure hit ratio.""" @@ -142,8 +142,8 @@ class CacheMetric(object): def __init__(self, name, size_callback, labels=[]): self.name = name - self.hits = CounterMetric(name + ":hits", labels=labels) - self.misses = CounterMetric(name + ":misses", labels=labels) + self.hits = CounterMetric(name + ":hits", labels=labels) + self.total = CounterMetric(name + ":total", labels=labels) self.size = CallbackMetric(name + ":size", callback=size_callback, @@ -152,9 +152,10 @@ class CacheMetric(object): def inc_hits(self, *values): self.hits.inc(*values) + self.total.inc(*values) def inc_misses(self, *values): - self.misses.inc(*values) + self.total.inc(*values) def render(self): - return self.hits.render() + self.misses.render() + self.size.render() + return self.hits.render() + self.total.render() + self.size.render() diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index 1919630fe..193908b44 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -140,7 +140,7 @@ class CacheMetricTestCase(unittest.TestCase): self.assertEquals(metric.render(), [ 'cache:hits 0', - 'cache:misses 0', + 'cache:total 0', 'cache:size 0', ]) @@ -149,7 +149,7 @@ class CacheMetricTestCase(unittest.TestCase): self.assertEquals(metric.render(), [ 'cache:hits 0', - 'cache:misses 1', + 'cache:total 1', 'cache:size 1', ]) @@ -157,6 +157,6 @@ class CacheMetricTestCase(unittest.TestCase): self.assertEquals(metric.render(), [ 'cache:hits 1', - 'cache:misses 1', + 'cache:total 2', 'cache:size 1', ]) From 642f725fd74df0a921912c5284a93a81dc0d448d Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Mon, 9 Mar 2015 20:39:17 +0000 Subject: [PATCH 39/53] Pretend the 'getEvent' cache is just another cache in the set of all the others for metric --- synapse/storage/_base.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 35d118c58..2708d3c5b 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -202,10 +202,9 @@ class SQLBaseStore(object): self._get_event_counters = PerformanceCounters() self._get_event_cache = LruCache(hs.config.event_cache_size) - self._get_event_cache_counter = metrics.register_cache( - "getEventCache", - size_callback=lambda: len(self._get_event_cache), - ) + + # Pretend the getEventCache is just another named cache + caches_by_name["*getEvent*"] = self._get_event_cache def start_profiling(self): self._previous_loop_ts = self._clock.time_msec() @@ -682,10 +681,10 @@ class SQLBaseStore(object): # Separate cache entries for each way to invoke _get_event_txn ret = cache[(check_redacted, get_prev_content, allow_rejected)] - self._get_event_cache_counter.inc_hits() + cache_counter.inc_hits("*getEvent*") return ret except KeyError: - self._get_event_cache_counter.inc_misses() + cache_counter.inc_misses("*getEvent*") pass finally: start_time = update_counter("event_cache", start_time) From f1fbe3e09f5573ac7ea9159635b02cc579e19720 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 10 Mar 2015 15:21:03 +0000 Subject: [PATCH 40/53] Rename TimerMetric to DistributionMetric; as it could count more than just time --- synapse/metrics/__init__.py | 8 +++++--- synapse/metrics/metric.py | 24 +++++++++++++----------- synapse/storage/_base.py | 14 +++++++------- tests/metrics/test_metric.py | 24 ++++++++++++------------ 4 files changed, 37 insertions(+), 33 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 1acaa3fd0..c161c17e9 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -19,7 +19,9 @@ from __future__ import absolute_import import logging from resource import getrusage, getpagesize, RUSAGE_SELF -from .metric import CounterMetric, CallbackMetric, TimerMetric, CacheMetric +from .metric import ( + CounterMetric, CallbackMetric, DistributionMetric, CacheMetric +) logger = logging.getLogger(__name__) @@ -59,8 +61,8 @@ class Metrics(object): def register_callback(self, *args, **kwargs): return self._register(CallbackMetric, *args, **kwargs) - def register_timer(self, *args, **kwargs): - return self._register(TimerMetric, *args, **kwargs) + def register_distribution(self, *args, **kwargs): + return self._register(DistributionMetric, *args, **kwargs) def register_cache(self, *args, **kwargs): return self._register(CacheMetric, *args, **kwargs) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 6b7d3358b..45d2752a2 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -101,35 +101,37 @@ class CallbackMetric(BaseMetric): for k in sorted(value.keys())] -class TimerMetric(CounterMetric): - """A combination of an event counter and a time accumulator, which counts - both the number of events and how long each one takes. +class DistributionMetric(CounterMetric): + """A combination of an event counter and an accumulator, which counts + both the number of events and accumulates the total value. Typically this + could be used to keep track of method-running times, or other distributions + of values that occur in discrete occurances. TODO(paul): Try to export some heatmap-style stats? """ def __init__(self, *args, **kwargs): - super(TimerMetric, self).__init__(*args, **kwargs) + super(DistributionMetric, self).__init__(*args, **kwargs) - self.times = {} + self.totals = {} # Scalar metrics are never empty if self.is_scalar(): - self.times[()] = 0 + self.totals[()] = 0 - def inc_time(self, msec, *values): + def inc_by(self, inc, *values): self.inc(*values) - if values not in self.times: - self.times[values] = msec + if values not in self.totals: + self.totals[values] = inc else: - self.times[values] += msec + self.totals[values] += inc def render_item(self, k): keystr = self._render_key(k) return ["%s:count%s %d" % (self.name, keystr, self.counts[k]), - "%s:msec%s %d" % (self.name, keystr, self.times[k])] + "%s:total%s %d" % (self.name, keystr, self.totals[k])] class CacheMetric(object): diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 2708d3c5b..104e8e3cf 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -38,9 +38,9 @@ transaction_logger = logging.getLogger("synapse.storage.txn") metrics = synapse.metrics.get_metrics_for("synapse.storage") -sql_query_timer = metrics.register_timer("queries", labels=["verb"]) -sql_txn_timer = metrics.register_timer("transactions", labels=["desc"]) -sql_getevents_timer = metrics.register_timer("getEvents", labels=["desc"]) +sql_query_timer = metrics.register_distribution("queries", labels=["verb"]) +sql_txn_timer = metrics.register_distribution("transactions", labels=["desc"]) +sql_getevents_timer = metrics.register_distribution("getEvents", labels=["desc"]) caches_by_name = {} cache_counter = metrics.register_cache( @@ -143,7 +143,7 @@ class LoggingTransaction(object): finally: msecs = (time.time() * 1000) - start sql_logger.debug("[SQL time] {%s} %f", self.name, msecs) - sql_query_timer.inc_time(msecs, sql.split()[0]) + sql_query_timer.inc_by(msecs, sql.split()[0]) class PerformanceCounters(object): @@ -268,7 +268,7 @@ class SQLBaseStore(object): self._current_txn_total_time += end - start self._txn_perf_counters.update(desc, start, end) - sql_txn_timer.inc_time(self._current_txn_total_time, desc) + sql_txn_timer.inc_by(self._current_txn_total_time, desc) with PreserveLoggingContext(): result = yield self._db_pool.runInteraction( @@ -672,7 +672,7 @@ class SQLBaseStore(object): def update_counter(desc, last_time): curr_time = self._get_event_counters.update(desc, last_time) - sql_getevents_timer.inc_time(curr_time - last_time, desc) + sql_getevents_timer.inc_by(curr_time - last_time, desc) return curr_time cache = self._get_event_cache.setdefault(event_id, {}) @@ -727,7 +727,7 @@ class SQLBaseStore(object): def update_counter(desc, last_time): curr_time = self._get_event_counters.update(desc, last_time) - sql_getevents_timer.inc_time(curr_time - last_time, desc) + sql_getevents_timer.inc_by(curr_time - last_time, desc) return curr_time d = json.loads(js) diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index 193908b44..1ca3e45a2 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -16,7 +16,7 @@ from tests import unittest from synapse.metrics.metric import ( - CounterMetric, CallbackMetric, TimerMetric, CacheMetric + CounterMetric, CallbackMetric, DistributionMetric, CacheMetric ) @@ -97,37 +97,37 @@ class CallbackMetricTestCase(unittest.TestCase): ]) -class TimerMetricTestCase(unittest.TestCase): +class DistributionMetricTestCase(unittest.TestCase): def test_scalar(self): - metric = TimerMetric("thing") + metric = DistributionMetric("thing") self.assertEquals(metric.render(), [ 'thing:count 0', - 'thing:msec 0', + 'thing:total 0', ]) - metric.inc_time(500) + metric.inc_by(500) self.assertEquals(metric.render(), [ 'thing:count 1', - 'thing:msec 500', + 'thing:total 500', ]) def test_vector(self): - metric = TimerMetric("queries", labels=["verb"]) + metric = DistributionMetric("queries", labels=["verb"]) self.assertEquals(metric.render(), []) - metric.inc_time(300, "SELECT") - metric.inc_time(200, "SELECT") - metric.inc_time(800, "INSERT") + metric.inc_by(300, "SELECT") + metric.inc_by(200, "SELECT") + metric.inc_by(800, "INSERT") self.assertEquals(metric.render(), [ 'queries:count{verb="INSERT"} 1', - 'queries:msec{verb="INSERT"} 800', + 'queries:total{verb="INSERT"} 800', 'queries:count{verb="SELECT"} 2', - 'queries:msec{verb="SELECT"} 500', + 'queries:total{verb="SELECT"} 500', ]) From 493e3fa0ca81b6070648e0a2c00c6c229cec92fe Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 10 Mar 2015 15:23:33 +0000 Subject: [PATCH 41/53] Don't forbid '_' in metric basenames any more, to allow things like foo_time --- synapse/metrics/__init__.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index c161c17e9..f85c6418e 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -43,11 +43,6 @@ class Metrics(object): self.name_prefix = name def _register(self, metric_class, name, *args, **kwargs): - if "_" in name: - raise ValueError("Metric names %s is invalid as it cannot contain an underscore" - % (name) - ) - full_name = "%s_%s" % (self.name_prefix, name) metric = metric_class(full_name, *args, **kwargs) From 63cb7ece62962a0004bc45cf866b87e107bc0bc9 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 10 Mar 2015 15:24:02 +0000 Subject: [PATCH 42/53] Rename the timer metrics exported by synapse.storage to append _time, so the meaning of ':total' is clearer --- synapse/storage/_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 104e8e3cf..f742fe15d 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -38,9 +38,9 @@ transaction_logger = logging.getLogger("synapse.storage.txn") metrics = synapse.metrics.get_metrics_for("synapse.storage") -sql_query_timer = metrics.register_distribution("queries", labels=["verb"]) -sql_txn_timer = metrics.register_distribution("transactions", labels=["desc"]) -sql_getevents_timer = metrics.register_distribution("getEvents", labels=["desc"]) +sql_query_timer = metrics.register_distribution("query_time", labels=["verb"]) +sql_txn_timer = metrics.register_distribution("transaction_time", labels=["desc"]) +sql_getevents_timer = metrics.register_distribution("getEvents_time", labels=["desc"]) caches_by_name = {} cache_counter = metrics.register_cache( From c1cdd7954d7cc411a5ec926148b9060e59b8a7bd Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 10 Mar 2015 15:54:16 +0000 Subject: [PATCH 43/53] Add an .inc_by() method to CounterMetric; implement DistributionMetric a neater way --- synapse/metrics/metric.py | 37 ++++++++++++++---------------------- tests/metrics/test_metric.py | 5 ++--- 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 45d2752a2..12460c99c 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -64,7 +64,7 @@ class CounterMetric(BaseMetric): if self.is_scalar(): self.counts[()] = 0 - def inc(self, *values): + def inc_by(self, incr, *values): if len(values) != self.dimension(): raise ValueError("Expected as many values to inc() as labels (%d)" % (self.dimension()) @@ -73,9 +73,12 @@ class CounterMetric(BaseMetric): # TODO: should assert that the tag values are all strings if values not in self.counts: - self.counts[values] = 1 + self.counts[values] = incr else: - self.counts[values] += 1 + self.counts[values] += incr + + def inc(self, *values): + self.inc_by(1, *values) def render_item(self, k): return ["%s%s %d" % (self.name, self._render_key(k), self.counts[k])] @@ -101,7 +104,7 @@ class CallbackMetric(BaseMetric): for k in sorted(value.keys())] -class DistributionMetric(CounterMetric): +class DistributionMetric(object): """A combination of an event counter and an accumulator, which counts both the number of events and accumulates the total value. Typically this could be used to keep track of method-running times, or other distributions @@ -110,28 +113,16 @@ class DistributionMetric(CounterMetric): TODO(paul): Try to export some heatmap-style stats? """ - def __init__(self, *args, **kwargs): - super(DistributionMetric, self).__init__(*args, **kwargs) - - self.totals = {} - - # Scalar metrics are never empty - if self.is_scalar(): - self.totals[()] = 0 + def __init__(self, name, *args, **kwargs): + self.counts = CounterMetric(name + ":count", **kwargs) + self.totals = CounterMetric(name + ":total", **kwargs) def inc_by(self, inc, *values): - self.inc(*values) + self.counts.inc(*values) + self.totals.inc_by(inc, *values) - if values not in self.totals: - self.totals[values] = inc - else: - self.totals[values] += inc - - def render_item(self, k): - keystr = self._render_key(k) - - return ["%s:count%s %d" % (self.name, keystr, self.counts[k]), - "%s:total%s %d" % (self.name, keystr, self.totals[k])] + def render(self): + return self.counts.render() + self.totals.render() class CacheMetric(object): diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index 1ca3e45a2..600901429 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -35,8 +35,7 @@ class CounterMetricTestCase(unittest.TestCase): 'scalar 1', ]) - counter.inc() - counter.inc() + counter.inc_by(2) self.assertEquals(counter.render(), [ 'scalar 3' @@ -125,8 +124,8 @@ class DistributionMetricTestCase(unittest.TestCase): self.assertEquals(metric.render(), [ 'queries:count{verb="INSERT"} 1', - 'queries:total{verb="INSERT"} 800', 'queries:count{verb="SELECT"} 2', + 'queries:total{verb="INSERT"} 800', 'queries:total{verb="SELECT"} 500', ]) From 2e4f0b2bd736fd70040d936145948b65b4e00b12 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 10 Mar 2015 15:29:22 +0000 Subject: [PATCH 44/53] Replace the @metrics.counted annotations in federation with specifically-written counters and distributions --- synapse/federation/federation_client.py | 27 ++++++++++++++----------- synapse/federation/federation_server.py | 26 +++++++++++++----------- synapse/metrics/__init__.py | 17 ---------------- 3 files changed, 29 insertions(+), 41 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index ef177b79c..6811a0e3d 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -36,7 +36,15 @@ import random logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for(__name__) + +# synapse.federation.federation_client is a silly name +metrics = synapse.metrics.get_metrics_for("synapse.federation.client") + +sent_pdus_destination_dist = metrics.register_distribution("sent_pdu_destinations") + +sent_edus_counter = metrics.register_counter("sent_edus") + +sent_queries_counter = metrics.register_counter("sent_queries", labels=["type"]) class FederationClient(FederationBase): @@ -53,7 +61,6 @@ class FederationClient(FederationBase): self._get_pdu_cache.start() @log_function - @metrics.counted def send_pdu(self, pdu, destinations): """Informs the replication layer about a new PDU generated within the home server that should be transmitted to others. @@ -70,6 +77,8 @@ class FederationClient(FederationBase): order = self._order self._order += 1 + sent_pdus_destination_dist.inc_by(len(destinations)) + logger.debug("[%s] transaction_layer.enqueue_pdu... ", pdu.event_id) # TODO, add errback, etc. @@ -81,7 +90,6 @@ class FederationClient(FederationBase): ) @log_function - @metrics.counted def send_edu(self, destination, edu_type, content): edu = Edu( origin=self.server_name, @@ -90,18 +98,18 @@ class FederationClient(FederationBase): content=content, ) + sent_edus_counter.inc() + # TODO, add errback, etc. self._transaction_queue.enqueue_edu(edu) return defer.succeed(None) @log_function - @metrics.counted def send_failure(self, failure, destination): self._transaction_queue.enqueue_failure(failure, destination) return defer.succeed(None) @log_function - @metrics.counted def make_query(self, destination, query_type, args, retry_on_dns_fail=True): """Sends a federation Query to a remote homeserver of the given type @@ -118,6 +126,8 @@ class FederationClient(FederationBase): a Deferred which will eventually yield a JSON object from the response """ + sent_queries_counter.inc(query_type) + return self.transport_layer.make_query( destination, query_type, args, retry_on_dns_fail=retry_on_dns_fail ) @@ -163,7 +173,6 @@ class FederationClient(FederationBase): @defer.inlineCallbacks @log_function - @metrics.counted def get_pdu(self, destinations, event_id, outlier=False): """Requests the PDU with given origin and ID from the remote home servers. @@ -253,7 +262,6 @@ class FederationClient(FederationBase): @defer.inlineCallbacks @log_function - @metrics.counted def get_state_for_room(self, destination, room_id, event_id): """Requests all of the `current` state PDUs for a given room from a remote home server. @@ -294,7 +302,6 @@ class FederationClient(FederationBase): @defer.inlineCallbacks @log_function - @metrics.counted def get_event_auth(self, destination, room_id, event_id): res = yield self.transport_layer.get_event_auth( destination, room_id, event_id, @@ -314,7 +321,6 @@ class FederationClient(FederationBase): defer.returnValue(signed_auth) @defer.inlineCallbacks - @metrics.counted def make_join(self, destinations, room_id, user_id): for destination in destinations: try: @@ -341,7 +347,6 @@ class FederationClient(FederationBase): raise RuntimeError("Failed to send to any server.") @defer.inlineCallbacks - @metrics.counted def send_join(self, destinations, pdu): for destination in destinations: try: @@ -391,7 +396,6 @@ class FederationClient(FederationBase): raise RuntimeError("Failed to send to any server.") @defer.inlineCallbacks - @metrics.counted def send_invite(self, destination, room_id, event_id, pdu): time_now = self._clock.time_msec() code, content = yield self.transport_layer.send_invite( @@ -415,7 +419,6 @@ class FederationClient(FederationBase): defer.returnValue(pdu) @defer.inlineCallbacks - @metrics.counted def query_auth(self, destination, room_id, event_id, local_auth): """ Params: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 3216fca95..25c0014f9 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -33,7 +33,14 @@ import logging logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for(__name__) +# synapse.federation.federation_server is a silly name +metrics = synapse.metrics.get_metrics_for("synapse.federation.server") + +received_pdus_counter = metrics.register_counter("received_pdus") + +received_edus_counter = metrics.register_counter("received_edus") + +received_queries_counter = metrics.register_counter("received_queries", labels=["type"]) class FederationServer(FederationBase): @@ -75,7 +82,6 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function - @metrics.counted def on_backfill_request(self, origin, room_id, versions, limit): pdus = yield self.handler.on_backfill_request( origin, room_id, versions, limit @@ -85,10 +91,11 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function - @metrics.counted def on_incoming_transaction(self, transaction_data): transaction = Transaction(**transaction_data) + received_pdus_counter.inc_by(len(transaction.pdus)) + for p in transaction.pdus: if "unsigned" in p: unsigned = p["unsigned"] @@ -158,6 +165,8 @@ class FederationServer(FederationBase): defer.returnValue((200, response)) def received_edu(self, origin, edu_type, content): + received_edus_counter.inc() + if edu_type in self.edu_handlers: self.edu_handlers[edu_type](origin, content) else: @@ -165,7 +174,6 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function - @metrics.counted def on_context_state_request(self, origin, room_id, event_id): if event_id: pdus = yield self.handler.get_state_for_pdu( @@ -193,7 +201,6 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function - @metrics.counted def on_pdu_request(self, origin, event_id): pdu = yield self._get_persisted_pdu(origin, event_id) @@ -206,13 +213,13 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function - @metrics.counted def on_pull_request(self, origin, versions): raise NotImplementedError("Pull transactions not implemented") @defer.inlineCallbacks - @metrics.counted def on_query_request(self, query_type, args): + received_queries_counter.inc(query_type) + if query_type in self.query_handlers: response = yield self.query_handlers[query_type](args) defer.returnValue((200, response)) @@ -222,14 +229,12 @@ class FederationServer(FederationBase): ) @defer.inlineCallbacks - @metrics.counted def on_make_join_request(self, room_id, user_id): pdu = yield self.handler.on_make_join_request(room_id, user_id) time_now = self._clock.time_msec() defer.returnValue({"event": pdu.get_pdu_json(time_now)}) @defer.inlineCallbacks - @metrics.counted def on_invite_request(self, origin, content): pdu = self.event_from_pdu_json(content) ret_pdu = yield self.handler.on_invite_request(origin, pdu) @@ -237,7 +242,6 @@ class FederationServer(FederationBase): defer.returnValue((200, {"event": ret_pdu.get_pdu_json(time_now)})) @defer.inlineCallbacks - @metrics.counted def on_send_join_request(self, origin, content): logger.debug("on_send_join_request: content: %s", content) pdu = self.event_from_pdu_json(content) @@ -252,7 +256,6 @@ class FederationServer(FederationBase): })) @defer.inlineCallbacks - @metrics.counted def on_event_auth(self, origin, room_id, event_id): time_now = self._clock.time_msec() auth_pdus = yield self.handler.on_event_auth(event_id) @@ -261,7 +264,6 @@ class FederationServer(FederationBase): })) @defer.inlineCallbacks - @metrics.counted def on_query_auth_request(self, origin, content, event_id): """ Content is a dict with keys:: diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index f85c6418e..94164974f 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -62,23 +62,6 @@ class Metrics(object): def register_cache(self, *args, **kwargs): return self._register(CacheMetric, *args, **kwargs) - def counted(self, func): - """ A method decorator that registers a counter, to count invocations - of this method. """ - if not hasattr(self, "method_counter"): - self.method_counter = self.register_counter( - "calls", - labels=["method"] - ) - - counter = self.method_counter - name = func.__name__ - - def wrapped(*args, **kwargs): - counter.inc(name) - return func(*args, **kwargs) - return wrapped - def get_metrics_for(pkg_name): """ Returns a Metrics instance for conveniently creating metrics From 89ac1fa8ba55c6cbb4f1888e0542d106209d9c2a Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 10 Mar 2015 18:06:24 +0000 Subject: [PATCH 45/53] Add a counter to track total number of events served by the notifier --- synapse/notifier.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/notifier.py b/synapse/notifier.py index 75e8152d0..88873d453 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -28,6 +28,8 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) +notified_events_counter = metrics.register_counter("notified_events") + # TODO(paul): Should be shared somewhere def count(func, l): @@ -72,6 +74,7 @@ class _NotificationListener(object): try: self.deferred.callback(result) + notified_events_counter.inc_by(len(events)) except defer.AlreadyCalledError: pass From c782e893ec5ceaf7e8136f45c9e6cfa8b11ec653 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 10 Mar 2015 18:24:52 +0000 Subject: [PATCH 46/53] Neater metrics from TransactionQueue --- synapse/federation/transaction_queue.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index ca5bcf21c..99e386fa5 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -57,27 +57,29 @@ class TransactionQueue(object): # done self.pending_transactions = {} + metrics.register_callback("pending_destinations", + lambda: len(self.pending_transactions), + ) + # Is a mapping from destination -> list of # tuple(pending pdus, deferred, order) self.pending_pdus_by_dest = pdus = {} # destination -> list of tuple(edu, deferred) self.pending_edus_by_dest = edus = {} + metrics.register_callback("pending_pdus", + lambda: sum(map(len, pdus.values())), + ) + metrics.register_callback("pending_edus", + lambda: sum(map(len, edus.values())), + ) + # destination -> list of tuple(failure, deferred) self.pending_failures_by_dest = {} # HACK to get unique tx id self._next_txn_id = int(self._clock.time_msec()) - metrics.register_callback("pendingPdus", - lambda: {(dest,): len(pdus[dest]) for dest in pdus.keys()}, - labels=["dest"], - ) - metrics.register_callback("pendingEdus", - lambda: {(dest,): len(edus[dest]) for dest in edus.keys()}, - labels=["dest"], - ) - def can_send_to(self, destination): """Can we send messages to the given server? From e75fa8bbbf1215a89e0239f96789317b96383734 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 11 Mar 2015 18:57:35 +0000 Subject: [PATCH 47/53] Bugfix to sql_txn_timer increment - add only the per-TXN duration, not the total time ever spent since boot --- synapse/storage/_base.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index f742fe15d..40f2fc6d7 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -260,15 +260,13 @@ class SQLBaseStore(object): raise finally: end = time.time() * 1000 - transaction_logger.debug( - "[TXN END] {%s} %f", - name, end - start - ) + duration = end - start - self._current_txn_total_time += end - start + transaction_logger.debug("[TXN END] {%s} %f", name, duration) + + self._current_txn_total_time += duration self._txn_perf_counters.update(desc, start, end) - - sql_txn_timer.inc_by(self._current_txn_total_time, desc) + sql_txn_timer.inc_by(duration, desc) with PreserveLoggingContext(): result = yield self._db_pool.runInteraction( From e0214a263b610f21d59d16cf84b5a3eee8485b28 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 12 Mar 2015 15:33:53 +0000 Subject: [PATCH 48/53] Build MetricsResource as a specific HomeServer dependency --- synapse/app/homeserver.py | 16 ++++++++++------ synapse/server.py | 1 + 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 9747f7a01..3801302c6 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -47,6 +47,7 @@ from synapse.crypto import context_factory from synapse.util.logcontext import LoggingContext from synapse.rest.client.v1 import ClientV1RestResource from synapse.rest.client.v2_alpha import ClientV2AlphaRestResource +from synapse.metrics.resource import MetricsResource, METRICS_PREFIX from daemonize import Daemonize import twisted.manhole.telnet @@ -100,6 +101,12 @@ class SynapseHomeServer(HomeServer): def build_resource_for_server_key(self): return LocalKey(self) + def build_resource_for_metrics(self): + if self.get_config().enable_metrics: + return MetricsResource(self) + else: + return None + def build_db_pool(self): return adbapi.ConnectionPool( "sqlite3", self.get_db_name(), @@ -147,12 +154,9 @@ class SynapseHomeServer(HomeServer): else: self.root_resource = Resource() - if self.get_config().enable_metrics: - from synapse.metrics.resource import ( - MetricsResource, METRICS_PREFIX - ) - - desired_tree.append((METRICS_PREFIX, MetricsResource(self))) + metrics_resource = self.get_resource_for_metrics() + if metrics_resource is not None: + desired_tree.append((METRICS_PREFIX, metrics_resource)) # ideally we'd just use getChild and putChild but getChild doesn't work # unless you give it a Request object IN ADDITION to the name :/ So diff --git a/synapse/server.py b/synapse/server.py index e3eefda4f..c7772244b 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -80,6 +80,7 @@ class BaseHomeServer(object): 'resource_for_server_key', 'resource_for_media_repository', 'resource_for_app_services', + 'resource_for_metrics', 'event_sources', 'ratelimiter', 'keyring', From a2cdd11d4a05f76b0be96d7e2de9294a9ff2cd57 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 12 Mar 2015 15:51:33 +0000 Subject: [PATCH 49/53] Fold the slightly-odd bind_port/secure_port/etc.. logic into SynapseHomeServer.start_listening() --- synapse/app/homeserver.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 3801302c6..2ba701f53 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -219,17 +219,20 @@ class SynapseHomeServer(HomeServer): """ return "%s-%s" % (resource, path_seg) - def start_listening(self, secure_port, unsecure_port): - if secure_port is not None: + def start_listening(self): + config = self.get_config() + + if not config.no_tls and config.bind_port is not None: reactor.listenSSL( - secure_port, Site(self.root_resource), self.tls_context_factory + config.bind_port, Site(self.root_resource), self.tls_context_factory ) - logger.info("Synapse now listening on port %d", secure_port) - if unsecure_port is not None: + logger.info("Synapse now listening on port %d", config.bind_port) + + if config.unsecure_port is not None: reactor.listenTCP( - unsecure_port, Site(self.root_resource) + config.unsecure_port, Site(self.root_resource) ) - logger.info("Synapse now listening on port %d", unsecure_port) + logger.info("Synapse now listening on port %d", config.unsecure_port) def get_version_string(): @@ -381,11 +384,7 @@ def setup(config_options): f.namespace['hs'] = hs reactor.listenTCP(config.manhole, f, interface='127.0.0.1') - bind_port = config.bind_port - if config.no_tls: - bind_port = None - - hs.start_listening(bind_port, config.unsecure_port) + hs.start_listening() hs.get_pusherpool().start() hs.get_state_handler().start_caching() From b98b4c135d9738d5cf701712fc244209651cddf7 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 12 Mar 2015 16:05:46 +0000 Subject: [PATCH 50/53] Option to serve metrics from their own localhost-only TCP port instead of muxed on the main listener --- synapse/app/homeserver.py | 12 ++++++++++-- synapse/config/metrics.py | 5 +++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 2ba701f53..aa7c722ef 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -129,7 +129,8 @@ class SynapseHomeServer(HomeServer): location of the web client. This does nothing if web_client is not True. """ - web_client = self.get_config().webclient + config = self.get_config() + web_client = config.webclient # list containing (path_str, Resource) e.g: # [ ("/aaa/bbb/cc", Resource1), ("/aaa/dummy", Resource2) ] @@ -155,7 +156,7 @@ class SynapseHomeServer(HomeServer): self.root_resource = Resource() metrics_resource = self.get_resource_for_metrics() - if metrics_resource is not None: + if config.metrics_port is None and metrics_resource is not None: desired_tree.append((METRICS_PREFIX, metrics_resource)) # ideally we'd just use getChild and putChild but getChild doesn't work @@ -234,6 +235,13 @@ class SynapseHomeServer(HomeServer): ) logger.info("Synapse now listening on port %d", config.unsecure_port) + metrics_resource = self.get_resource_for_metrics() + if metrics_resource and config.metrics_port is not None: + reactor.listenTCP( + config.metrics_port, Site(metrics_resource), interface="127.0.0.1", + ) + logger.info("Metrics now running on 127.0.0.1 port %d", config.metrics_port) + def get_version_string(): try: diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 90aba10bb..901a429c7 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -20,6 +20,7 @@ class MetricsConfig(Config): def __init__(self, args): super(MetricsConfig, self).__init__(args) self.enable_metrics = args.enable_metrics + self.metrics_port = args.metrics_port @classmethod def add_arguments(cls, parser): @@ -29,3 +30,7 @@ class MetricsConfig(Config): '--enable-metrics', dest="enable_metrics", action="store_true", help="Enable collection and rendering of performance metrics" ) + metrics_group.add_argument( + '--metrics-port', metavar="PORT", type=int, + help="Separate port to accept metrics requests on (on localhost)" + ) From 128cf2daf76e5b05a4e577b60ea406fdbb6986bf Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 12 Mar 2015 16:24:38 +0000 Subject: [PATCH 51/53] Appease pep8 --- synapse/federation/transaction_queue.py | 9 ++++++--- synapse/handlers/presence.py | 5 +++-- synapse/http/client.py | 10 +++++++--- synapse/http/matrixfederationclient.py | 10 +++++++--- synapse/http/server.py | 8 +++++--- synapse/metrics/__init__.py | 3 ++- synapse/metrics/metric.py | 11 ++++++----- synapse/metrics/resource.py | 2 +- synapse/notifier.py | 15 +++++++++------ 9 files changed, 46 insertions(+), 27 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 99e386fa5..4dccd93d0 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -57,7 +57,8 @@ class TransactionQueue(object): # done self.pending_transactions = {} - metrics.register_callback("pending_destinations", + metrics.register_callback( + "pending_destinations", lambda: len(self.pending_transactions), ) @@ -67,10 +68,12 @@ class TransactionQueue(object): # destination -> list of tuple(edu, deferred) self.pending_edus_by_dest = edus = {} - metrics.register_callback("pending_pdus", + metrics.register_callback( + "pending_pdus", lambda: sum(map(len, pdus.values())), ) - metrics.register_callback("pending_edus", + metrics.register_callback( + "pending_edus", lambda: sum(map(len, edus.values())), ) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index c6d6aef53..731df0064 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -136,8 +136,9 @@ class PresenceHandler(BaseHandler): self._user_cachemap = {} self._user_cachemap_latest_serial = 0 - metrics.register_callback("userCachemap:size", - lambda: len(self._user_cachemap) + metrics.register_callback( + "userCachemap:size", + lambda: len(self._user_cachemap), ) def _get_or_make_usercache(self, user): diff --git a/synapse/http/client.py b/synapse/http/client.py index 01737a718..2ae1c4d3a 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -34,11 +34,13 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -outgoing_requests_counter = metrics.register_counter("requests", +outgoing_requests_counter = metrics.register_counter( + "requests", labels=["method"], ) -incoming_responses_counter = metrics.register_counter("responses", - labels=["method","code"], +incoming_responses_counter = metrics.register_counter( + "responses", + labels=["method", "code"], ) @@ -64,9 +66,11 @@ class SimpleHttpClient(object): def _cb(response): incoming_responses_counter.inc(method, response.code) return response + def _eb(failure): incoming_responses_counter.inc(method, "ERR") return failure + d.addCallbacks(_cb, _eb) return d diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 11883d385..7fa295cad 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -43,11 +43,13 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -outgoing_requests_counter = metrics.register_counter("requests", +outgoing_requests_counter = metrics.register_counter( + "requests", labels=["method"], ) -incoming_responses_counter = metrics.register_counter("responses", - labels=["method","code"], +incoming_responses_counter = metrics.register_counter( + "responses", + labels=["method", "code"], ) @@ -78,9 +80,11 @@ class MatrixFederationHttpAgent(_AgentBase): def _cb(response): incoming_responses_counter.inc(method, response.code) return response + def _eb(failure): incoming_responses_counter.inc(method, "ERR") return failure + d.addCallbacks(_cb, _eb) return d diff --git a/synapse/http/server.py b/synapse/http/server.py index a0d190ff7..d77cb7779 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -37,11 +37,13 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -incoming_requests_counter = metrics.register_counter("requests", +incoming_requests_counter = metrics.register_counter( + "requests", labels=["method", "servlet"], ) -outgoing_responses_counter = metrics.register_counter("responses", - labels=["method","code"], +outgoing_responses_counter = metrics.register_counter( + "responses", + labels=["method", "code"], ) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 94164974f..7b9c9c8ba 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -85,7 +85,7 @@ def render_all(): strs += ["# FAILED to render %s" % name] logger.exception("Failed to render %s metric", name) - strs.append("") # to generate a final CRLF + strs.append("") # to generate a final CRLF return "\n".join(strs) @@ -96,6 +96,7 @@ def render_all(): rusage = None PAGE_SIZE = getpagesize() + def update_resource_metrics(): global rusage rusage = getrusage(RUSAGE_SELF) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 12460c99c..21b37748f 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -27,7 +27,7 @@ class BaseMetric(object): def __init__(self, name, labels=[]): self.name = name - self.labels = labels # OK not to clone as we never write it + self.labels = labels # OK not to clone as we never write it def dimension(self): return len(self.labels) @@ -66,8 +66,8 @@ class CounterMetric(BaseMetric): def inc_by(self, incr, *values): if len(values) != self.dimension(): - raise ValueError("Expected as many values to inc() as labels (%d)" % - (self.dimension()) + raise ValueError( + "Expected as many values to inc() as labels (%d)" % (self.dimension()) ) # TODO: should assert that the tag values are all strings @@ -135,10 +135,11 @@ class CacheMetric(object): def __init__(self, name, size_callback, labels=[]): self.name = name - self.hits = CounterMetric(name + ":hits", labels=labels) + self.hits = CounterMetric(name + ":hits", labels=labels) self.total = CounterMetric(name + ":total", labels=labels) - self.size = CallbackMetric(name + ":size", + self.size = CallbackMetric( + name + ":size", callback=size_callback, labels=labels, ) diff --git a/synapse/metrics/resource.py b/synapse/metrics/resource.py index ff7baab01..97ea797bf 100644 --- a/synapse/metrics/resource.py +++ b/synapse/metrics/resource.py @@ -26,7 +26,7 @@ class MetricsResource(Resource): isLeaf = True def __init__(self, hs): - Resource.__init__(self) # Resource is old-style, so no super() + Resource.__init__(self) # Resource is old-style, so no super() self.hs = hs diff --git a/synapse/notifier.py b/synapse/notifier.py index 88873d453..7121d659d 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -127,14 +127,17 @@ class Notifier(object): return len(all_listeners) metrics.register_callback("listeners", count_listeners) - metrics.register_callback("rooms", - lambda: count(bool, self.room_to_listeners.values()) + metrics.register_callback( + "rooms", + lambda: count(bool, self.room_to_listeners.values()), ) - metrics.register_callback("users", - lambda: count(bool, self.user_to_listeners.values()) + metrics.register_callback( + "users", + lambda: count(bool, self.user_to_listeners.values()), ) - metrics.register_callback("appservices", - lambda: count(bool, self.appservice_to_listeners.values()) + metrics.register_callback( + "appservices", + lambda: count(bool, self.appservice_to_listeners.values()), ) @log_function From 0eb7e6b9a8e4e38793b1e045ab5f0f0a4d4e6777 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 12 Mar 2015 16:39:52 +0000 Subject: [PATCH 52/53] Delete unused import of NOT_READY_YET --- synapse/metrics/resource.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/metrics/resource.py b/synapse/metrics/resource.py index 97ea797bf..0af4b3eb5 100644 --- a/synapse/metrics/resource.py +++ b/synapse/metrics/resource.py @@ -14,7 +14,6 @@ # limitations under the License. from twisted.web.resource import Resource -from twisted.web.server import NOT_DONE_YET import synapse.metrics From 05a056a409042d233972c608a8e526e8fd0af262 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 12 Mar 2015 16:45:05 +0000 Subject: [PATCH 53/53] Appease pyflakes --- synapse/metrics/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 7b9c9c8ba..dffb8a486 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -81,7 +81,7 @@ def render_all(): for name in sorted(all_metrics.keys()): try: strs += all_metrics[name].render() - except Exception as e: + except Exception: strs += ["# FAILED to render %s" % name] logger.exception("Failed to render %s metric", name)