From 55d5b3af8863167432017f23cd8a04a0c14c9d23 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Sat, 7 Sep 2019 01:45:51 +1000 Subject: [PATCH] Servers-known-about statistic (#5981) --- changelog.d/5981.feature | 1 + docs/sample_config.yaml | 10 +++ synapse/config/metrics.py | 31 ++++++++ synapse/storage/roommember.py | 59 +++++++++++++++ tests/config/test_generate.py | 25 ++++--- tests/config/test_load.py | 34 ++++++--- tests/storage/test_roommember.py | 124 ++++++++++++++++++++++--------- 7 files changed, 225 insertions(+), 59 deletions(-) create mode 100644 changelog.d/5981.feature diff --git a/changelog.d/5981.feature b/changelog.d/5981.feature new file mode 100644 index 000000000..e39514273 --- /dev/null +++ b/changelog.d/5981.feature @@ -0,0 +1 @@ +Setting metrics_flags.known_servers to True in the configuration will publish the synapse_federation_known_servers metric over Prometheus. This represents the total number of servers your server knows about (i.e. is in rooms with), including itself. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 186cdbedd..93c0edd8c 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -958,6 +958,16 @@ account_threepid_delegates: #sentry: # dsn: "..." +# Flags to enable Prometheus metrics which are not suitable to be +# enabled by default, either for performance reasons or limited use. +# +metrics_flags: + # Publish synapse_federation_known_servers, a g auge of the number of + # servers this homeserver knows about, including itself. May cause + # performance problems on large homeservers. + # + #known_servers: true + # Whether or not to report anonymized homeserver usage statistics. # report_stats: true|false diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 369844196..653b990e6 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import attr + from ._base import Config, ConfigError MISSING_SENTRY = """Missing sentry-sdk library. This is required to enable sentry @@ -20,6 +23,18 @@ MISSING_SENTRY = """Missing sentry-sdk library. This is required to enable sentr """ +@attr.s +class MetricsFlags(object): + known_servers = attr.ib(default=False, validator=attr.validators.instance_of(bool)) + + @classmethod + def all_off(cls): + """ + Instantiate the flags with all options set to off. + """ + return cls(**{x.name: False for x in attr.fields(cls)}) + + class MetricsConfig(Config): def read_config(self, config, **kwargs): self.enable_metrics = config.get("enable_metrics", False) @@ -27,6 +42,12 @@ class MetricsConfig(Config): self.metrics_port = config.get("metrics_port") self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1") + if self.enable_metrics: + _metrics_config = config.get("metrics_flags") or {} + self.metrics_flags = MetricsFlags(**_metrics_config) + else: + self.metrics_flags = MetricsFlags.all_off() + self.sentry_enabled = "sentry" in config if self.sentry_enabled: try: @@ -58,6 +79,16 @@ class MetricsConfig(Config): #sentry: # dsn: "..." + # Flags to enable Prometheus metrics which are not suitable to be + # enabled by default, either for performance reasons or limited use. + # + metrics_flags: + # Publish synapse_federation_known_servers, a g auge of the number of + # servers this homeserver knows about, including itself. May cause + # performance problems on large homeservers. + # + #known_servers: true + # Whether or not to report anonymized homeserver usage statistics. """ diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index f8b682ebd..4df8ebdac 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -24,8 +24,10 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.constants import EventTypes, Membership +from synapse.metrics import LaterGauge from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import LoggingTransaction +from synapse.storage.engines import Sqlite3Engine from synapse.storage.events_worker import EventsWorkerStore from synapse.types import get_domain_from_id from synapse.util.async_helpers import Linearizer @@ -74,6 +76,63 @@ class RoomMemberWorkerStore(EventsWorkerStore): self._check_safe_current_state_events_membership_updated_txn(txn) txn.close() + if self.hs.config.metrics_flags.known_servers: + self._known_servers_count = 1 + self.hs.get_clock().looping_call( + run_as_background_process, + 60 * 1000, + "_count_known_servers", + self._count_known_servers, + ) + self.hs.get_clock().call_later( + 1000, + run_as_background_process, + "_count_known_servers", + self._count_known_servers, + ) + LaterGauge( + "synapse_federation_known_servers", + "", + [], + lambda: self._known_servers_count, + ) + + @defer.inlineCallbacks + def _count_known_servers(self): + """ + Count the servers that this server knows about. + + The statistic is stored on the class for the + `synapse_federation_known_servers` LaterGauge to collect. + """ + + def _transact(txn): + if isinstance(self.database_engine, Sqlite3Engine): + query = """ + SELECT COUNT(DISTINCT substr(out.user_id, pos+1)) + FROM ( + SELECT rm.user_id as user_id, instr(rm.user_id, ':') + AS pos FROM room_memberships as rm + INNER JOIN current_state_events as c ON rm.event_id = c.event_id + WHERE c.type = 'm.room.member' + ) as out + """ + else: + query = """ + SELECT COUNT(DISTINCT split_part(state_key, ':', 2)) + FROM current_state_events + WHERE type = 'm.room.member' AND membership = 'join'; + """ + txn.execute(query) + return list(txn)[0][0] + + count = yield self.runInteraction("get_known_servers", _transact) + + # We always know about ourselves, even if we have nothing in + # room_memberships (for example, the server is new). + self._known_servers_count = max([count, 1]) + return self._known_servers_count + def _check_safe_current_state_events_membership_updated_txn(self, txn): """Checks if it is safe to assume the new current_state_events membership column is up to date diff --git a/tests/config/test_generate.py b/tests/config/test_generate.py index 5017cbce8..2684e662d 100644 --- a/tests/config/test_generate.py +++ b/tests/config/test_generate.py @@ -17,6 +17,8 @@ import os.path import re import shutil import tempfile +from contextlib import redirect_stdout +from io import StringIO from synapse.config.homeserver import HomeServerConfig @@ -32,17 +34,18 @@ class ConfigGenerationTestCase(unittest.TestCase): shutil.rmtree(self.dir) def test_generate_config_generates_files(self): - HomeServerConfig.load_or_generate_config( - "", - [ - "--generate-config", - "-c", - self.file, - "--report-stats=yes", - "-H", - "lemurs.win", - ], - ) + with redirect_stdout(StringIO()): + HomeServerConfig.load_or_generate_config( + "", + [ + "--generate-config", + "-c", + self.file, + "--report-stats=yes", + "-H", + "lemurs.win", + ], + ) self.assertSetEqual( set(["homeserver.yaml", "lemurs.win.log.config", "lemurs.win.signing.key"]), diff --git a/tests/config/test_load.py b/tests/config/test_load.py index 6bfc1970a..b3e557bd6 100644 --- a/tests/config/test_load.py +++ b/tests/config/test_load.py @@ -15,6 +15,8 @@ import os.path import shutil import tempfile +from contextlib import redirect_stdout +from io import StringIO import yaml @@ -26,7 +28,6 @@ from tests import unittest class ConfigLoadingTestCase(unittest.TestCase): def setUp(self): self.dir = tempfile.mkdtemp() - print(self.dir) self.file = os.path.join(self.dir, "homeserver.yaml") def tearDown(self): @@ -94,18 +95,27 @@ class ConfigLoadingTestCase(unittest.TestCase): ) self.assertTrue(config.enable_registration) + def test_stats_enabled(self): + self.generate_config_and_remove_lines_containing("enable_metrics") + self.add_lines_to_config(["enable_metrics: true"]) + + # The default Metrics Flags are off by default. + config = HomeServerConfig.load_config("", ["-c", self.file]) + self.assertFalse(config.metrics_flags.known_servers) + def generate_config(self): - HomeServerConfig.load_or_generate_config( - "", - [ - "--generate-config", - "-c", - self.file, - "--report-stats=yes", - "-H", - "lemurs.win", - ], - ) + with redirect_stdout(StringIO()): + HomeServerConfig.load_or_generate_config( + "", + [ + "--generate-config", + "-c", + self.file, + "--report-stats=yes", + "-H", + "lemurs.win", + ], + ) def generate_config_and_remove_lines_containing(self, needle): self.generate_config() diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 64cb294c3..447a3c6ff 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,78 +14,129 @@ # See the License for the specific language governing permissions and # limitations under the License. - -from mock import Mock - -from twisted.internet import defer +from unittest.mock import Mock from synapse.api.constants import EventTypes, Membership from synapse.api.room_versions import RoomVersions -from synapse.types import Requester, RoomID, UserID +from synapse.rest.admin import register_servlets_for_client_rest_resource +from synapse.rest.client.v1 import login, room +from synapse.types import Requester, UserID from tests import unittest -from tests.utils import create_room, setup_test_homeserver -class RoomMemberStoreTestCase(unittest.TestCase): - @defer.inlineCallbacks - def setUp(self): - hs = yield setup_test_homeserver( - self.addCleanup, resource_for_federation=Mock(), http_client=None +class RoomMemberStoreTestCase(unittest.HomeserverTestCase): + + servlets = [ + login.register_servlets, + register_servlets_for_client_rest_resource, + room.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + hs = self.setup_test_homeserver( + resource_for_federation=Mock(), http_client=None ) + return hs + + def prepare(self, reactor, clock, hs): + # We can't test the RoomMemberStore on its own without the other event # storage logic self.store = hs.get_datastore() self.event_builder_factory = hs.get_event_builder_factory() self.event_creation_handler = hs.get_event_creation_handler() - self.u_alice = UserID.from_string("@alice:test") - self.u_bob = UserID.from_string("@bob:test") + self.u_alice = self.register_user("alice", "pass") + self.t_alice = self.login("alice", "pass") + self.u_bob = self.register_user("bob", "pass") # User elsewhere on another host self.u_charlie = UserID.from_string("@charlie:elsewhere") - self.room = RoomID.from_string("!abc123:test") - - yield create_room(hs, self.room.to_string(), self.u_alice.to_string()) - - @defer.inlineCallbacks def inject_room_member(self, room, user, membership, replaces_state=None): builder = self.event_builder_factory.for_room_version( RoomVersions.V1, { "type": EventTypes.Member, - "sender": user.to_string(), - "state_key": user.to_string(), - "room_id": room.to_string(), + "sender": user, + "state_key": user, + "room_id": room, "content": {"membership": membership}, }, ) - event, context = yield self.event_creation_handler.create_new_client_event( - builder + event, context = self.get_success( + self.event_creation_handler.create_new_client_event(builder) ) - yield self.store.persist_event(event, context) + self.get_success(self.store.persist_event(event, context)) return event - @defer.inlineCallbacks def test_one_member(self): - yield self.inject_room_member(self.room, self.u_alice, Membership.JOIN) - self.assertEquals( - [self.room.to_string()], - [ - m.room_id - for m in ( - yield self.store.get_rooms_for_user_where_membership_is( - self.u_alice.to_string(), [Membership.JOIN] - ) - ) - ], + # Alice creates the room, and is automatically joined + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + + rooms_for_user = self.get_success( + self.store.get_rooms_for_user_where_membership_is( + self.u_alice, [Membership.JOIN] + ) ) + self.assertEquals([self.room], [m.room_id for m in rooms_for_user]) + + def test_count_known_servers(self): + """ + _count_known_servers will calculate how many servers are in a room. + """ + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + self.inject_room_member(self.room, self.u_bob, Membership.JOIN) + self.inject_room_member(self.room, self.u_charlie.to_string(), Membership.JOIN) + + servers = self.get_success(self.store._count_known_servers()) + self.assertEqual(servers, 2) + + def test_count_known_servers_stat_counter_disabled(self): + """ + If enabled, the metrics for how many servers are known will be counted. + """ + self.assertTrue("_known_servers_count" not in self.store.__dict__.keys()) + + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + self.inject_room_member(self.room, self.u_bob, Membership.JOIN) + self.inject_room_member(self.room, self.u_charlie.to_string(), Membership.JOIN) + + self.pump(20) + + self.assertTrue("_known_servers_count" not in self.store.__dict__.keys()) + + @unittest.override_config( + {"enable_metrics": True, "metrics_flags": {"known_servers": True}} + ) + def test_count_known_servers_stat_counter_enabled(self): + """ + If enabled, the metrics for how many servers are known will be counted. + """ + # Initialises to 1 -- itself + self.assertEqual(self.store._known_servers_count, 1) + + self.pump(20) + + # No rooms have been joined, so technically the SQL returns 0, but it + # will still say it knows about itself. + self.assertEqual(self.store._known_servers_count, 1) + + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + self.inject_room_member(self.room, self.u_bob, Membership.JOIN) + self.inject_room_member(self.room, self.u_charlie.to_string(), Membership.JOIN) + + self.pump(20) + + # It now knows about Charlie's server. + self.assertEqual(self.store._known_servers_count, 2) + class CurrentStateMembershipUpdateTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, homeserver):