replacing portions

This commit is contained in:
Amber Brown 2018-05-21 19:47:37 -05:00
parent c60e0d5e02
commit df9f72d9e5
23 changed files with 270 additions and 416 deletions

View File

@ -57,7 +57,7 @@ class Auth(object):
self.TOKEN_NOT_FOUND_HTTP_STATUS = 401 self.TOKEN_NOT_FOUND_HTTP_STATUS = 401
self.token_cache = LruCache(CACHE_SIZE_FACTOR * 10000) self.token_cache = LruCache(CACHE_SIZE_FACTOR * 10000)
register_cache("token_cache", self.token_cache) register_cache("cache", "token_cache", self.token_cache)
@defer.inlineCallbacks @defer.inlineCallbacks
def check_from_context(self, event, context, do_sig_check=True): def check_from_context(self, event, context, do_sig_check=True):

View File

@ -32,20 +32,17 @@ from synapse.federation.federation_base import (
FederationBase, FederationBase,
event_from_pdu_json, event_from_pdu_json,
) )
import synapse.metrics
from synapse.util import logcontext, unwrapFirstError from synapse.util import logcontext, unwrapFirstError
from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.logcontext import make_deferred_yieldable, run_in_background from synapse.util.logcontext import make_deferred_yieldable, run_in_background
from synapse.util.logutils import log_function from synapse.util.logutils import log_function
from synapse.util.retryutils import NotRetryingDestination from synapse.util.retryutils import NotRetryingDestination
from prometheus_client import Counter
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
sent_queries_counter = Counter("synapse_federation_client_sent_queries", "", ["type"])
# synapse.federation.federation_client is a silly name
metrics = synapse.metrics.get_metrics_for("synapse.federation.client")
sent_queries_counter = metrics.register_counter("sent_queries", labels=["type"])
PDU_RETRY_TIME_MS = 1 * 60 * 1000 PDU_RETRY_TIME_MS = 1 * 60 * 1000
@ -108,7 +105,7 @@ class FederationClient(FederationBase):
a Deferred which will eventually yield a JSON object from the a Deferred which will eventually yield a JSON object from the
response response
""" """
sent_queries_counter.inc(query_type) sent_queries_counter.labels(query_type).inc()
return self.transport_layer.make_query( return self.transport_layer.make_query(
destination, query_type, args, retry_on_dns_fail=retry_on_dns_fail, destination, query_type, args, retry_on_dns_fail=retry_on_dns_fail,
@ -127,7 +124,7 @@ class FederationClient(FederationBase):
a Deferred which will eventually yield a JSON object from the a Deferred which will eventually yield a JSON object from the
response response
""" """
sent_queries_counter.inc("client_device_keys") sent_queries_counter.labels("client_device_keys").inc()
return self.transport_layer.query_client_keys( return self.transport_layer.query_client_keys(
destination, content, timeout destination, content, timeout
) )
@ -137,7 +134,7 @@ class FederationClient(FederationBase):
"""Query the device keys for a list of user ids hosted on a remote """Query the device keys for a list of user ids hosted on a remote
server. server.
""" """
sent_queries_counter.inc("user_devices") sent_queries_counter.labels("user_devices").inc()
return self.transport_layer.query_user_devices( return self.transport_layer.query_user_devices(
destination, user_id, timeout destination, user_id, timeout
) )
@ -154,7 +151,7 @@ class FederationClient(FederationBase):
a Deferred which will eventually yield a JSON object from the a Deferred which will eventually yield a JSON object from the
response response
""" """
sent_queries_counter.inc("client_one_time_keys") sent_queries_counter.labels("client_one_time_keys").inc()
return self.transport_layer.claim_client_keys( return self.transport_layer.claim_client_keys(
destination, content, timeout destination, content, timeout
) )

View File

@ -27,12 +27,13 @@ from synapse.federation.federation_base import (
from synapse.federation.persistence import TransactionActions from synapse.federation.persistence import TransactionActions
from synapse.federation.units import Edu, Transaction from synapse.federation.units import Edu, Transaction
import synapse.metrics
from synapse.types import get_domain_from_id from synapse.types import get_domain_from_id
from synapse.util import async from synapse.util import async
from synapse.util.caches.response_cache import ResponseCache from synapse.util.caches.response_cache import ResponseCache
from synapse.util.logutils import log_function from synapse.util.logutils import log_function
from prometheus_client import Counter
from six import iteritems from six import iteritems
# when processing incoming transactions, we try to handle multiple rooms in # when processing incoming transactions, we try to handle multiple rooms in
@ -41,14 +42,11 @@ TRANSACTION_CONCURRENCY_LIMIT = 10
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# synapse.federation.federation_server is a silly name received_pdus_counter = Counter("synapse_federation_server_received_pdus", "")
metrics = synapse.metrics.get_metrics_for("synapse.federation.server")
received_pdus_counter = metrics.register_counter("received_pdus") received_edus_counter = Counter("synapse_federation_server_received_edus", "")
received_edus_counter = metrics.register_counter("received_edus") received_queries_counter = Counter("synapse_federation_server_received_queries", "", ["type"])
received_queries_counter = metrics.register_counter("received_queries", labels=["type"])
class FederationServer(FederationBase): class FederationServer(FederationBase):
@ -131,7 +129,7 @@ class FederationServer(FederationBase):
logger.debug("[%s] Transaction is new", transaction.transaction_id) logger.debug("[%s] Transaction is new", transaction.transaction_id)
received_pdus_counter.inc_by(len(transaction.pdus)) received_pdus_counter.inc(len(transaction.pdus))
pdus_by_room = {} pdus_by_room = {}
@ -292,7 +290,7 @@ class FederationServer(FederationBase):
@defer.inlineCallbacks @defer.inlineCallbacks
def on_query_request(self, query_type, args): def on_query_request(self, query_type, args):
received_queries_counter.inc(query_type) received_queries_counter.labels(query_type).inc()
resp = yield self.registry.on_query(query_type, args) resp = yield self.registry.on_query(query_type, args)
defer.returnValue((200, resp)) defer.returnValue((200, resp))

View File

@ -33,7 +33,7 @@ from .units import Edu
from synapse.storage.presence import UserPresenceState from synapse.storage.presence import UserPresenceState
from synapse.util.metrics import Measure from synapse.util.metrics import Measure
import synapse.metrics from synapse.metrics import LaterGauge
from blist import sorteddict from blist import sorteddict
from collections import namedtuple from collections import namedtuple
@ -45,9 +45,6 @@ from six import itervalues, iteritems
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
metrics = synapse.metrics.get_metrics_for(__name__)
class FederationRemoteSendQueue(object): class FederationRemoteSendQueue(object):
"""A drop in replacement for TransactionQueue""" """A drop in replacement for TransactionQueue"""
@ -77,8 +74,7 @@ class FederationRemoteSendQueue(object):
# lambda binds to the queue rather than to the name of the queue which # lambda binds to the queue rather than to the name of the queue which
# changes. ARGH. # changes. ARGH.
def register(name, queue): def register(name, queue):
metrics.register_callback( LaterGauge("synapse_federation_send_queue_%s_size" % (queue_name,), "",
queue_name + "_size",
lambda: len(queue), lambda: len(queue),
) )

View File

@ -26,23 +26,18 @@ from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter
from synapse.util.metrics import measure_func from synapse.util.metrics import measure_func
from synapse.handlers.presence import format_user_presence_state, get_interested_remotes from synapse.handlers.presence import format_user_presence_state, get_interested_remotes
import synapse.metrics import synapse.metrics
from synapse.metrics import LaterGauge
from synapse.metrics import (
sent_edus_counter, sent_transactions_counter, events_processed_counter)
from prometheus_client import Counter
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
metrics = synapse.metrics.get_metrics_for(__name__) sent_pdus_destination_dist = Counter("synapse_federation_client_sent_pdu_destinations", "")
client_metrics = synapse.metrics.get_metrics_for("synapse.federation.client")
sent_pdus_destination_dist = client_metrics.register_distribution(
"sent_pdu_destinations"
)
sent_edus_counter = client_metrics.register_counter("sent_edus")
sent_transactions_counter = client_metrics.register_counter("sent_transactions")
events_processed_counter = client_metrics.register_counter("events_processed")
class TransactionQueue(object): class TransactionQueue(object):
@ -69,8 +64,7 @@ class TransactionQueue(object):
# done # done
self.pending_transactions = {} self.pending_transactions = {}
metrics.register_callback( LaterGauge("pending_destinations", "", [],
"pending_destinations",
lambda: len(self.pending_transactions), lambda: len(self.pending_transactions),
) )
@ -94,12 +88,12 @@ class TransactionQueue(object):
# Map of destination -> (edu_type, key) -> Edu # Map of destination -> (edu_type, key) -> Edu
self.pending_edus_keyed_by_dest = edus_keyed = {} self.pending_edus_keyed_by_dest = edus_keyed = {}
metrics.register_callback( LaterGauge(
"pending_pdus", "pending_pdus", "", [],
lambda: sum(map(len, pdus.values())), lambda: sum(map(len, pdus.values())),
) )
metrics.register_callback( LaterGauge(
"pending_edus", "pending_edus", "", [],
lambda: ( lambda: (
sum(map(len, edus.values())) sum(map(len, edus.values()))
+ sum(map(len, presence.values())) + sum(map(len, presence.values()))
@ -241,18 +235,15 @@ class TransactionQueue(object):
now = self.clock.time_msec() now = self.clock.time_msec()
ts = yield self.store.get_received_ts(events[-1].event_id) ts = yield self.store.get_received_ts(events[-1].event_id)
synapse.metrics.event_processing_lag.set( synapse.metrics.event_processing_lag.labels(
now - ts, "federation_sender", "federation_sender").set(now - ts)
) synapse.metrics.event_processing_last_ts.labels(
synapse.metrics.event_processing_last_ts.set( "federation_sender").set(ts)
ts, "federation_sender",
)
events_processed_counter.inc_by(len(events)) events_processed_counter.inc(len(events))
synapse.metrics.event_processing_positions.set( synapse.metrics.event_processing_positions.labels(
next_token, "federation_sender", "federation_sender").set(next_token)
)
finally: finally:
self._is_processing = False self._is_processing = False
@ -275,7 +266,7 @@ class TransactionQueue(object):
if not destinations: if not destinations:
return return
sent_pdus_destination_dist.inc_by(len(destinations)) sent_pdus_destination_dist.inc(len(destinations))
for destination in destinations: for destination in destinations:
self.pending_pdus_by_dest.setdefault(destination, []).append( self.pending_pdus_by_dest.setdefault(destination, []).append(

View File

@ -21,14 +21,13 @@ from synapse.util.metrics import Measure
from synapse.util.logcontext import ( from synapse.util.logcontext import (
make_deferred_yieldable, run_in_background, make_deferred_yieldable, run_in_background,
) )
from prometheus_client import Counter
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
metrics = synapse.metrics.get_metrics_for(__name__) events_processed_counter = Counter("synapse_handlers_appservice_events_processed", "")
events_processed_counter = metrics.register_counter("events_processed")
def log_failure(failure): def log_failure(failure):
@ -128,18 +127,12 @@ class ApplicationServicesHandler(object):
now = self.clock.time_msec() now = self.clock.time_msec()
ts = yield self.store.get_received_ts(events[-1].event_id) ts = yield self.store.get_received_ts(events[-1].event_id)
synapse.metrics.event_processing_positions.set( synapse.metrics.event_processing_positions.labels("appservice_sender").set(upper_bound)
upper_bound, "appservice_sender",
)
events_processed_counter.inc_by(len(events)) events_processed_counter.inc(len(events))
synapse.metrics.event_processing_lag.set( synapse.metrics.event_processing_lag.labels("appservice_sender").set(now - ts)
now - ts, "appservice_sender", synapse.metrics.event_processing_last_ts.labels("appservice_sender").set(ts)
)
synapse.metrics.event_processing_last_ts.set(
ts, "appservice_sender",
)
finally: finally:
self.is_processing = False self.is_processing = False

View File

@ -36,27 +36,27 @@ from synapse.util.logutils import log_function
from synapse.util.metrics import Measure from synapse.util.metrics import Measure
from synapse.util.wheel_timer import WheelTimer from synapse.util.wheel_timer import WheelTimer
from synapse.types import UserID, get_domain_from_id from synapse.types import UserID, get_domain_from_id
import synapse.metrics from synapse.metrics import LaterGauge
import logging import logging
from prometheus_client import Counter
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
metrics = synapse.metrics.get_metrics_for(__name__)
notified_presence_counter = metrics.register_counter("notified_presence") notified_presence_counter = Counter("synapse_handler_presence_notified_presence", "")
federation_presence_out_counter = metrics.register_counter("federation_presence_out") federation_presence_out_counter = Counter("synapse_handler_presence_federation_presence_out", "")
presence_updates_counter = metrics.register_counter("presence_updates") presence_updates_counter = Counter("synapse_handler_presence_presence_updates", "")
timers_fired_counter = metrics.register_counter("timers_fired") timers_fired_counter = Counter("synapse_handler_presence_timers_fired", "")
federation_presence_counter = metrics.register_counter("federation_presence") federation_presence_counter = Counter("synapse_handler_presence_federation_presence", "")
bump_active_time_counter = metrics.register_counter("bump_active_time") bump_active_time_counter = Counter("synapse_handler_presence_bump_active_time", "")
get_updates_counter = metrics.register_counter("get_updates", labels=["type"]) get_updates_counter = Counter("synapse_handler_presence_get_updates", "", ["type"])
notify_reason_counter = metrics.register_counter("notify_reason", labels=["reason"]) notify_reason_counter = Counter("synapse_handler_presence_notify_reason", "", ["reason"])
state_transition_counter = metrics.register_counter( state_transition_counter = Counter("synapse_handler_presence_state_transition", "", ["from", "to"]
"state_transition", labels=["from", "to"]
) )
@ -137,9 +137,9 @@ class PresenceHandler(object):
for state in active_presence for state in active_presence
} }
metrics.register_callback( LaterGauge(
"user_to_current_state_size", lambda: len(self.user_to_current_state) "user_to_current_state_size", "", [], lambda: len(self.user_to_current_state)
) ).register()
now = self.clock.time_msec() now = self.clock.time_msec()
for state in active_presence: for state in active_presence:
@ -208,7 +208,7 @@ class PresenceHandler(object):
60 * 1000, 60 * 1000,
) )
metrics.register_callback("wheel_timer_size", lambda: len(self.wheel_timer)) LaterGauge("wheel_timer_size", "", [], lambda: len(self.wheel_timer)).register()
@defer.inlineCallbacks @defer.inlineCallbacks
def _on_shutdown(self): def _on_shutdown(self):
@ -311,10 +311,10 @@ class PresenceHandler(object):
# TODO: We should probably ensure there are no races hereafter # TODO: We should probably ensure there are no races hereafter
presence_updates_counter.inc_by(len(new_states)) presence_updates_counter.inc(len(new_states))
if to_notify: if to_notify:
notified_presence_counter.inc_by(len(to_notify)) notified_presence_counter.inc(len(to_notify))
yield self._persist_and_notify(to_notify.values()) yield self._persist_and_notify(to_notify.values())
self.unpersisted_users_changes |= set(s.user_id for s in new_states) self.unpersisted_users_changes |= set(s.user_id for s in new_states)
@ -325,7 +325,7 @@ class PresenceHandler(object):
if user_id not in to_notify if user_id not in to_notify
} }
if to_federation_ping: if to_federation_ping:
federation_presence_out_counter.inc_by(len(to_federation_ping)) federation_presence_out_counter.inc(len(to_federation_ping))
self._push_to_remotes(to_federation_ping.values()) self._push_to_remotes(to_federation_ping.values())
@ -363,7 +363,7 @@ class PresenceHandler(object):
for user_id in users_to_check for user_id in users_to_check
] ]
timers_fired_counter.inc_by(len(states)) timers_fired_counter.inc(len(states))
changes = handle_timeouts( changes = handle_timeouts(
states, states,
@ -707,7 +707,7 @@ class PresenceHandler(object):
updates.append(prev_state.copy_and_replace(**new_fields)) updates.append(prev_state.copy_and_replace(**new_fields))
if updates: if updates:
federation_presence_counter.inc_by(len(updates)) federation_presence_counter.inc(len(updates))
yield self._update_states(updates) yield self._update_states(updates)
@defer.inlineCallbacks @defer.inlineCallbacks
@ -982,28 +982,28 @@ def should_notify(old_state, new_state):
return False return False
if old_state.status_msg != new_state.status_msg: if old_state.status_msg != new_state.status_msg:
notify_reason_counter.inc("status_msg_change") notify_reason_counter.labels("status_msg_change").inc()
return True return True
if old_state.state != new_state.state: if old_state.state != new_state.state:
notify_reason_counter.inc("state_change") notify_reason_counter.labels("state_change").inc()
state_transition_counter.inc(old_state.state, new_state.state) state_transition_counter.labels(old_state.state, new_state.state).inc()
return True return True
if old_state.state == PresenceState.ONLINE: if old_state.state == PresenceState.ONLINE:
if new_state.currently_active != old_state.currently_active: if new_state.currently_active != old_state.currently_active:
notify_reason_counter.inc("current_active_change") notify_reason_counter.labels("current_active_change").inc()
return True return True
if new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY: if new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY:
# Only notify about last active bumps if we're not currently acive # Only notify about last active bumps if we're not currently acive
if not new_state.currently_active: if not new_state.currently_active:
notify_reason_counter.inc("last_active_change_online") notify_reason_counter.labels("last_active_change_online").inc()
return True return True
elif new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY: elif new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY:
# Always notify for a transition where last active gets bumped. # Always notify for a transition where last active gets bumped.
notify_reason_counter.inc("last_active_change_not_online") notify_reason_counter.labels("last_active_change_not_online").inc()
return True return True
return False return False
@ -1077,14 +1077,14 @@ class PresenceEventSource(object):
if changed is not None and len(changed) < 500: if changed is not None and len(changed) < 500:
# For small deltas, its quicker to get all changes and then # For small deltas, its quicker to get all changes and then
# work out if we share a room or they're in our presence list # work out if we share a room or they're in our presence list
get_updates_counter.inc("stream") get_updates_counter.labels("stream").inc()
for other_user_id in changed: for other_user_id in changed:
if other_user_id in users_interested_in: if other_user_id in users_interested_in:
user_ids_changed.add(other_user_id) user_ids_changed.add(other_user_id)
else: else:
# Too many possible updates. Find all users we can see and check # Too many possible updates. Find all users we can see and check
# if any of them have changed. # if any of them have changed.
get_updates_counter.inc("full") get_updates_counter.labels("full").inc()
if from_key: if from_key:
user_ids_changed = stream_change_cache.get_entities_changed( user_ids_changed = stream_change_cache.get_entities_changed(

View File

@ -23,7 +23,6 @@ from synapse.http import cancelled_to_request_timed_out_error
from synapse.util.async import add_timeout_to_deferred from synapse.util.async import add_timeout_to_deferred
from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.caches import CACHE_SIZE_FACTOR
from synapse.util.logcontext import make_deferred_yieldable from synapse.util.logcontext import make_deferred_yieldable
import synapse.metrics
from synapse.http.endpoint import SpiderEndpoint from synapse.http.endpoint import SpiderEndpoint
from canonicaljson import encode_canonical_json from canonicaljson import encode_canonical_json
@ -42,6 +41,7 @@ from twisted.web._newclient import ResponseDone
from six import StringIO from six import StringIO
from prometheus_client import Counter
import simplejson as json import simplejson as json
import logging import logging
import urllib import urllib
@ -49,16 +49,8 @@ import urllib
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
metrics = synapse.metrics.get_metrics_for(__name__) outgoing_requests_counter = Counter("synapse_http_client_requests", "", ["method"])
incoming_responses_counter = Counter("synapse_http_client_responses", "", ["method", "code"])
outgoing_requests_counter = metrics.register_counter(
"requests",
labels=["method"],
)
incoming_responses_counter = metrics.register_counter(
"responses",
labels=["method", "code"],
)
class SimpleHttpClient(object): class SimpleHttpClient(object):
@ -95,7 +87,7 @@ class SimpleHttpClient(object):
def request(self, method, uri, *args, **kwargs): def request(self, method, uri, *args, **kwargs):
# A small wrapper around self.agent.request() so we can easily attach # A small wrapper around self.agent.request() so we can easily attach
# counters to it # counters to it
outgoing_requests_counter.inc(method) outgoing_requests_counter.labels(method).inc()
logger.info("Sending request %s %s", method, uri) logger.info("Sending request %s %s", method, uri)
@ -109,14 +101,14 @@ class SimpleHttpClient(object):
) )
response = yield make_deferred_yieldable(request_deferred) response = yield make_deferred_yieldable(request_deferred)
incoming_responses_counter.inc(method, response.code) incoming_responses_counter.labels(method, response.code).inc()
logger.info( logger.info(
"Received response to %s %s: %s", "Received response to %s %s: %s",
method, uri, response.code method, uri, response.code
) )
defer.returnValue(response) defer.returnValue(response)
except Exception as e: except Exception as e:
incoming_responses_counter.inc(method, "ERR") incoming_responses_counter.labels(method, "ERR").inc()
logger.info( logger.info(
"Error sending request to %s %s: %s %s", "Error sending request to %s %s: %s %s",
method, uri, type(e).__name__, e.message method, uri, type(e).__name__, e.message

View File

@ -43,19 +43,13 @@ import sys
import urllib import urllib
from six.moves.urllib import parse as urlparse from six.moves.urllib import parse as urlparse
from prometheus_client import Counter
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
outbound_logger = logging.getLogger("synapse.http.outbound") outbound_logger = logging.getLogger("synapse.http.outbound")
metrics = synapse.metrics.get_metrics_for(__name__) outgoing_requests_counter = Counter("synapse_http_matrixfederationclient_requests", "", ["method"])
incoming_responses_counter = Counter("synapse_http_matrixfederationclient_responses", "", ["method", "code"])
outgoing_requests_counter = metrics.register_counter(
"requests",
labels=["method"],
)
incoming_responses_counter = metrics.register_counter(
"responses",
labels=["method", "code"],
)
MAX_LONG_RETRIES = 10 MAX_LONG_RETRIES = 10

View File

@ -18,15 +18,14 @@ import functools
import time import time
import gc import gc
import platform import platform
import attr
from prometheus_client import Gauge, Histogram, Counter
from prometheus_client.core import (
GaugeMetricFamily, CounterMetricFamily, REGISTRY)
from twisted.internet import reactor from twisted.internet import reactor
from .metric import (
CounterMetric, CallbackMetric, DistributionMetric, CacheMetric,
MemoryUsageMetric, GaugeMetric,
)
from .process_collector import register_process_collector
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -34,149 +33,94 @@ logger = logging.getLogger(__name__)
running_on_pypy = platform.python_implementation() == 'PyPy' running_on_pypy = platform.python_implementation() == 'PyPy'
all_metrics = [] all_metrics = []
all_collectors = [] all_collectors = []
all_gauges = {}
@attr.s(hash=True)
class LaterGauge(object):
class Metrics(object): name = attr.ib()
""" A single Metrics object gives a (mutable) slice view of the all_metrics desc = attr.ib()
dict, allowing callers to easily register new metrics that are namespaced labels = attr.ib(hash=False)
nicely.""" caller = attr.ib()
def __init__(self, name): def collect(self):
self.name_prefix = name
def make_subspace(self, name): g = GaugeMetricFamily(self.name, self.desc, self.labels)
return Metrics("%s_%s" % (self.name_prefix, name))
def register_collector(self, func):
all_collectors.append(func)
def _register(self, metric_class, name, *args, **kwargs):
full_name = "%s_%s" % (self.name_prefix, name)
metric = metric_class(full_name, *args, **kwargs)
all_metrics.append(metric)
return metric
def register_counter(self, *args, **kwargs):
"""
Returns:
CounterMetric
"""
return self._register(CounterMetric, *args, **kwargs)
def register_gauge(self, *args, **kwargs):
"""
Returns:
GaugeMetric
"""
return self._register(GaugeMetric, *args, **kwargs)
def register_callback(self, *args, **kwargs):
"""
Returns:
CallbackMetric
"""
return self._register(CallbackMetric, *args, **kwargs)
def register_distribution(self, *args, **kwargs):
"""
Returns:
DistributionMetric
"""
return self._register(DistributionMetric, *args, **kwargs)
def register_cache(self, *args, **kwargs):
"""
Returns:
CacheMetric
"""
return self._register(CacheMetric, *args, **kwargs)
def register_memory_metrics(hs):
try: try:
import psutil calls = self.caller()
process = psutil.Process() except Exception as e:
process.memory_info().rss print(e)
except (ImportError, AttributeError): logger.err()
logger.warn( yield g
"psutil is not installed or incorrect version."
" Disabling memory metrics." if isinstance(calls, dict):
) for k, v in calls.items():
return g.add_metric(k, v)
metric = MemoryUsageMetric(hs, psutil) else:
all_metrics.append(metric) g.add_metric([], calls)
yield g
def register(self):
if self.name in all_gauges.keys():
REGISTRY.unregister(all_gauges.pop(self.name))
REGISTRY.register(self)
all_gauges[self.name] = self
def get_metrics_for(pkg_name): #
""" Returns a Metrics instance for conveniently creating metrics # Python GC metrics
namespaced with the given name prefix. """ #
# Convert a "package.name" to "package_name" because Prometheus doesn't gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"])
# let us use . in metric names gc_time = Histogram("python_gc_time", "Time taken to GC (ms)", ["gen"], buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000])
return Metrics(pkg_name.replace(".", "_"))
class GCCounts(object):
def collect(self):
gc_counts = gc.get_count()
def render_all(): cm = GaugeMetricFamily("python_gc_counts", "GC cycle counts", labels=["gen"])
strs = [] for n, m in enumerate(gc.get_count()):
cm.add_metric([str(n)], m)
for collector in all_collectors: yield cm
collector()
for metric in all_metrics: REGISTRY.register(GCCounts())
try:
strs += metric.render()
except Exception:
strs += ["# FAILED to render"]
logger.exception("Failed to render metric")
strs.append("") # to generate a final CRLF #
# Twisted reactor metrics
#
return "\n".join(strs) tick_time = Histogram("python_twisted_reactor_tick_time", "Tick time of the Twisted reactor (ms)", buckets=[1, 2, 5, 10, 50, 100, 250, 500, 1000, 2000])
pending_calls_metric = Histogram("python_twisted_reactor_pending_calls", "Pending calls", buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000])
#
# Federation Metrics
#
register_process_collector(get_metrics_for("process")) sent_edus_counter = Counter("synapse_federation_client_sent_edus", "")
sent_transactions_counter = Counter("synapse_federation_client_sent_transactions", "")
python_metrics = get_metrics_for("python") events_processed_counter = Counter("synapse_federation_client_events_processed", "")
gc_time = python_metrics.register_distribution("gc_time", labels=["gen"])
gc_unreachable = python_metrics.register_counter("gc_unreachable_total", labels=["gen"])
python_metrics.register_callback(
"gc_counts", lambda: {(i,): v for i, v in enumerate(gc.get_count())}, labels=["gen"]
)
reactor_metrics = get_metrics_for("python.twisted.reactor")
tick_time = reactor_metrics.register_distribution("tick_time")
pending_calls_metric = reactor_metrics.register_distribution("pending_calls")
synapse_metrics = get_metrics_for("synapse")
# Used to track where various components have processed in the event stream, # Used to track where various components have processed in the event stream,
# e.g. federation sending, appservice sending, etc. # e.g. federation sending, appservice sending, etc.
event_processing_positions = synapse_metrics.register_gauge( event_processing_positions = Gauge("synapse_event_processing_positions", "", ["name"])
"event_processing_positions", labels=["name"],
)
# Used to track the current max events stream position # Used to track the current max events stream position
event_persisted_position = synapse_metrics.register_gauge( event_persisted_position = Gauge("synapse_event_persisted_position", "")
"event_persisted_position",
)
# Used to track the received_ts of the last event processed by various # Used to track the received_ts of the last event processed by various
# components # components
event_processing_last_ts = synapse_metrics.register_gauge( event_processing_last_ts = Gauge("synapse_event_processing_last_ts", "", ["name"])
"event_processing_last_ts", labels=["name"],
)
# Used to track the lag processing events. This is the time difference # Used to track the lag processing events. This is the time difference
# between the last processed event's received_ts and the time it was # between the last processed event's received_ts and the time it was
# finished being processed. # finished being processed.
event_processing_lag = synapse_metrics.register_gauge( event_processing_lag = Gauge("synapse_event_processing_lag", "", ["name"])
"event_processing_lag", labels=["name"],
)
def runUntilCurrentTimer(func): def runUntilCurrentTimer(func):
@ -206,8 +150,8 @@ def runUntilCurrentTimer(func):
# since about 25% of time is actually spent running things triggered by # since about 25% of time is actually spent running things triggered by
# I/O events, but that is harder to capture without rewriting half the # I/O events, but that is harder to capture without rewriting half the
# reactor. # reactor.
tick_time.inc_by(end - start) tick_time.observe(end - start)
pending_calls_metric.inc_by(num_pending) pending_calls_metric.observe(num_pending)
if running_on_pypy: if running_on_pypy:
return ret return ret
@ -224,8 +168,8 @@ def runUntilCurrentTimer(func):
unreachable = gc.collect(i) unreachable = gc.collect(i)
end = time.time() * 1000 end = time.time() * 1000
gc_time.inc_by(end - start, i) gc_time.labels(i).observe(end - start)
gc_unreachable.inc_by(unreachable, i) gc_unreachable.labels(i).set(unreachable)
return ret return ret

View File

@ -28,22 +28,19 @@ from synapse.util.logcontext import PreserveLoggingContext, run_in_background
from synapse.util.metrics import Measure from synapse.util.metrics import Measure
from synapse.types import StreamToken from synapse.types import StreamToken
from synapse.visibility import filter_events_for_client from synapse.visibility import filter_events_for_client
import synapse.metrics from synapse.metrics import LaterGauge
from collections import namedtuple from collections import namedtuple
from prometheus_client import Counter
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
metrics = synapse.metrics.get_metrics_for(__name__) notified_events_counter = Counter("synapse_notifier_notified_events", "")
notified_events_counter = metrics.register_counter("notified_events") users_woken_by_stream_counter = Counter("synapse_notifier_users_woken_by_stream", "", ["stream"])
users_woken_by_stream_counter = metrics.register_counter(
"users_woken_by_stream", labels=["stream"]
)
# TODO(paul): Should be shared somewhere # TODO(paul): Should be shared somewhere
@ -108,7 +105,7 @@ class _NotifierUserStream(object):
self.last_notified_ms = time_now_ms self.last_notified_ms = time_now_ms
noify_deferred = self.notify_deferred noify_deferred = self.notify_deferred
users_woken_by_stream_counter.inc(stream_key) users_woken_by_stream_counter.labels(stream_key).inc()
with PreserveLoggingContext(): with PreserveLoggingContext():
self.notify_deferred = ObservableDeferred(defer.Deferred()) self.notify_deferred = ObservableDeferred(defer.Deferred())
@ -197,14 +194,14 @@ class Notifier(object):
all_user_streams.add(x) all_user_streams.add(x)
return sum(stream.count_listeners() for stream in all_user_streams) return sum(stream.count_listeners() for stream in all_user_streams)
metrics.register_callback("listeners", count_listeners) LaterGauge("listeners", "", [], count_listeners)
metrics.register_callback( LaterGauge(
"rooms", "rooms", "", [],
lambda: count(bool, self.room_to_user_streams.values()), lambda: count(bool, self.room_to_user_streams.values()),
) )
metrics.register_callback( LaterGauge(
"users", "users", "", [],
lambda: len(self.user_to_user_stream), lambda: len(self.user_to_user_stream),
) )

View File

@ -20,22 +20,17 @@ from twisted.internet.error import AlreadyCalled, AlreadyCancelled
from . import push_rule_evaluator from . import push_rule_evaluator
from . import push_tools from . import push_tools
import synapse
from synapse.push import PusherConfigException from synapse.push import PusherConfigException
from synapse.util.logcontext import LoggingContext from synapse.util.logcontext import LoggingContext
from synapse.util.metrics import Measure from synapse.util.metrics import Measure
from prometheus_client import Counter
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
metrics = synapse.metrics.get_metrics_for(__name__) http_push_processed_counter = Counter("http_pushes_processed", "")
http_push_processed_counter = metrics.register_counter( http_push_failed_counter = Counter("http_pushes_failed", "")
"http_pushes_processed",
)
http_push_failed_counter = metrics.register_counter(
"http_pushes_failed",
)
class HttpPusher(object): class HttpPusher(object):

View File

@ -150,7 +150,7 @@ class PushRuleEvaluatorForEvent(object):
# Caches (glob, word_boundary) -> regex for push. See _glob_matches # Caches (glob, word_boundary) -> regex for push. See _glob_matches
regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR) regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR)
register_cache("regex_push_cache", regex_cache) register_cache("cache", "regex_push_cache", regex_cache)
def _glob_matches(glob, value, word_boundary=False): def _glob_matches(glob, value, word_boundary=False):

View File

@ -56,6 +56,7 @@ REQUIREMENTS = {
"msgpack-python>=0.3.0": ["msgpack"], "msgpack-python>=0.3.0": ["msgpack"],
"phonenumbers>=8.2.0": ["phonenumbers"], "phonenumbers>=8.2.0": ["phonenumbers"],
"six": ["six"], "six": ["six"],
"prometheus_client": ["prometheus_client"],
} }
CONDITIONAL_REQUIREMENTS = { CONDITIONAL_REQUIREMENTS = {
"web_client": { "web_client": {

View File

@ -60,19 +60,19 @@ from .commands import (
) )
from .streams import STREAMS_MAP from .streams import STREAMS_MAP
from synapse.metrics import LaterGauge
from synapse.util.stringutils import random_string from synapse.util.stringutils import random_string
from synapse.metrics.metric import CounterMetric
from prometheus_client import Counter
from collections import defaultdict
import logging import logging
import synapse.metrics
import struct import struct
import fcntl import fcntl
connection_close_counter = Counter(
metrics = synapse.metrics.get_metrics_for(__name__) "synapse_replication_tcp_protocol_close_reason", "", ["reason_type"],
connection_close_counter = metrics.register_counter(
"close_reason", labels=["reason_type"],
) )
@ -136,12 +136,8 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
# The LoopingCall for sending pings. # The LoopingCall for sending pings.
self._send_ping_loop = None self._send_ping_loop = None
self.inbound_commands_counter = CounterMetric( self.inbound_commands_counter = defaultdict(int)
"inbound_commands", labels=["command"], self.outbound_commands_counter = defaultdict(int)
)
self.outbound_commands_counter = CounterMetric(
"outbound_commands", labels=["command"],
)
def connectionMade(self): def connectionMade(self):
logger.info("[%s] Connection established", self.id()) logger.info("[%s] Connection established", self.id())
@ -201,7 +197,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
self.last_received_command = self.clock.time_msec() self.last_received_command = self.clock.time_msec()
self.inbound_commands_counter.inc(cmd_name) self.inbound_commands_counter[cmd_name] = self.inbound_commands_counter[cmd_name] + 1
cmd_cls = COMMAND_MAP[cmd_name] cmd_cls = COMMAND_MAP[cmd_name]
try: try:
@ -251,8 +247,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
self._queue_command(cmd) self._queue_command(cmd)
return return
self.outbound_commands_counter.inc(cmd.NAME) self.outbound_commands_counter[cmd.NAME] = self.outbound_commands_counter[cmd.NAME] + 1
string = "%s %s" % (cmd.NAME, cmd.to_line(),) string = "%s %s" % (cmd.NAME, cmd.to_line(),)
if "\n" in string: if "\n" in string:
raise Exception("Unexpected newline in command: %r", string) raise Exception("Unexpected newline in command: %r", string)
@ -317,9 +312,9 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
def connectionLost(self, reason): def connectionLost(self, reason):
logger.info("[%s] Replication connection closed: %r", self.id(), reason) logger.info("[%s] Replication connection closed: %r", self.id(), reason)
if isinstance(reason, Failure): if isinstance(reason, Failure):
connection_close_counter.inc(reason.type.__name__) connection_close_counter.labels(reason.type.__name__).inc()
else: else:
connection_close_counter.inc(reason.__class__.__name__) connection_close_counter.labels(reason.__class__.__name__).inc()
try: try:
# Remove us from list of connections to be monitored # Remove us from list of connections to be monitored
@ -566,14 +561,12 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
# The following simply registers metrics for the replication connections # The following simply registers metrics for the replication connections
metrics.register_callback( pending_commands = LaterGauge(
"pending_commands", "pending_commands", "", ["name", "conn_id"],
lambda: { lambda: {
(p.name, p.conn_id): len(p.pending_commands) (p.name, p.conn_id): len(p.pending_commands)
for p in connected_connections for p in connected_connections
}, })
labels=["name", "conn_id"],
)
def transport_buffer_size(protocol): def transport_buffer_size(protocol):
@ -583,14 +576,12 @@ def transport_buffer_size(protocol):
return 0 return 0
metrics.register_callback( transport_send_buffer = LaterGauge(
"transport_send_buffer", "synapse_replication_tcp_transport_send_buffer", "", ["name", "conn_id"],
lambda: { lambda: {
(p.name, p.conn_id): transport_buffer_size(p) (p.name, p.conn_id): transport_buffer_size(p)
for p in connected_connections for p in connected_connections
}, })
labels=["name", "conn_id"],
)
def transport_kernel_read_buffer_size(protocol, read=True): def transport_kernel_read_buffer_size(protocol, read=True):
@ -608,48 +599,37 @@ def transport_kernel_read_buffer_size(protocol, read=True):
return 0 return 0
metrics.register_callback( tcp_transport_kernel_send_buffer = LaterGauge(
"transport_kernel_send_buffer", "synapse_replication_tcp_transport_kernel_send_buffer", "", ["name", "conn_id"],
lambda: { lambda: {
(p.name, p.conn_id): transport_kernel_read_buffer_size(p, False) (p.name, p.conn_id): transport_kernel_read_buffer_size(p, False)
for p in connected_connections for p in connected_connections
}, })
labels=["name", "conn_id"],
)
metrics.register_callback( tcp_transport_kernel_read_buffer = LaterGauge(
"transport_kernel_read_buffer", "synapse_replication_tcp_transport_kernel_read_buffer", "", ["name", "conn_id"],
lambda: { lambda: {
(p.name, p.conn_id): transport_kernel_read_buffer_size(p, True) (p.name, p.conn_id): transport_kernel_read_buffer_size(p, True)
for p in connected_connections for p in connected_connections
}, })
labels=["name", "conn_id"],
)
metrics.register_callback( tcp_inbound_commands = LaterGauge(
"inbound_commands", "synapse_replication_tcp_inbound_commands", "", ["command", "name", "conn_id"],
lambda: { lambda: {
(k[0], p.name, p.conn_id): count (k[0], p.name, p.conn_id): count
for p in connected_connections for p in connected_connections
for k, count in p.inbound_commands_counter.counts.iteritems() for k, count in p.inbound_commands_counter.items()
}, })
labels=["command", "name", "conn_id"],
)
metrics.register_callback( tcp_outbound_commands = LaterGauge(
"outbound_commands", "synapse_replication_tcp_outbound_commands", "", ["command", "name", "conn_id"],
lambda: { lambda: {
(k[0], p.name, p.conn_id): count (k[0], p.name, p.conn_id): count
for p in connected_connections for p in connected_connections
for k, count in p.outbound_commands_counter.counts.iteritems() for k, count in p.outbound_commands_counter.items()
}, })
labels=["command", "name", "conn_id"],
)
# number of updates received for each RDATA stream # number of updates received for each RDATA stream
inbound_rdata_count = metrics.register_counter( inbound_rdata_count = Counter("synapse_replication_tcp_inbound_rdata_count", "", ["stream_name"])
"inbound_rdata_count",
labels=["stream_name"],
)

View File

@ -13,29 +13,52 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import synapse.metrics from prometheus_client.core import GaugeMetricFamily, REGISTRY
import os import os
CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5)) CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5))
metrics = synapse.metrics.get_metrics_for("synapse.util.caches")
caches_by_name = {} caches_by_name = {}
# cache_counter = metrics.register_cache( collectors_by_name = {}
# "cache",
# lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()},
# labels=["name"],
# )
def register_cache(name, cache_name, cache):
def register_cache(name, cache): # Check if the metric is already registered. Unregister it, if so.
caches_by_name[name] = cache metric_name = "synapse_util_caches_%s:%s" % (name, cache_name,)
return metrics.register_cache( if metric_name in collectors_by_name.keys():
"cache", REGISTRY.unregister(collectors_by_name[metric_name])
lambda: len(cache),
name,
)
class CacheMetric(object):
hits = 0
misses = 0
evicted_size = 0
def inc_hits(self):
self.hits += 1
def inc_misses(self):
self.misses += 1
def inc_evictions(self, size=1):
self.evicted_size += size
def collect(self):
cache_size = len(cache)
gm = GaugeMetricFamily(metric_name, "", labels=["size", "hits", "misses", "total"])
gm.add_metric(["size"], cache_size)
gm.add_metric(["hits"], self.hits)
gm.add_metric(["misses"], self.misses)
gm.add_metric(["total"], self.hits + self.misses)
yield gm
metric = CacheMetric()
REGISTRY.register(metric)
caches_by_name[cache_name] = cache
collectors_by_name[metric_name] = metric
return metric
KNOWN_KEYS = { KNOWN_KEYS = {
key: key for key in key: key for key in

View File

@ -80,7 +80,7 @@ class Cache(object):
self.name = name self.name = name
self.keylen = keylen self.keylen = keylen
self.thread = None self.thread = None
self.metrics = register_cache(name, self.cache) self.metrics = register_cache("descriptor", name, self.cache)
def _on_evicted(self, evicted_count): def _on_evicted(self, evicted_count):
self.metrics.inc_evictions(evicted_count) self.metrics.inc_evictions(evicted_count)

View File

@ -55,7 +55,7 @@ class DictionaryCache(object):
__slots__ = [] __slots__ = []
self.sentinel = Sentinel() self.sentinel = Sentinel()
self.metrics = register_cache(name, self.cache) self.metrics = register_cache("dictionary", name, self.cache)
def check_thread(self): def check_thread(self):
expected_thread = self.thread expected_thread = self.thread

View File

@ -52,12 +52,12 @@ class ExpiringCache(object):
self._cache = OrderedDict() self._cache = OrderedDict()
self.metrics = register_cache(cache_name, self)
self.iterable = iterable self.iterable = iterable
self._size_estimate = 0 self._size_estimate = 0
self.metrics = register_cache("expiring", cache_name, self)
def start(self): def start(self):
if not self._expiry_ms: if not self._expiry_ms:
# Don't bother starting the loop if things never expire # Don't bother starting the loop if things never expire

View File

@ -17,7 +17,7 @@ import logging
from twisted.internet import defer from twisted.internet import defer
from synapse.util.async import ObservableDeferred from synapse.util.async import ObservableDeferred
from synapse.util.caches import metrics as cache_metrics from synapse.util.caches import register_cache
from synapse.util.logcontext import make_deferred_yieldable, run_in_background from synapse.util.logcontext import make_deferred_yieldable, run_in_background
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -38,15 +38,16 @@ class ResponseCache(object):
self.timeout_sec = timeout_ms / 1000. self.timeout_sec = timeout_ms / 1000.
self._name = name self._name = name
self._metrics = cache_metrics.register_cache( self._metrics = register_cache(
"response_cache", "response_cache", name, self
size_callback=lambda: self.size(),
cache_name=name,
) )
def size(self): def size(self):
return len(self.pending_result_cache) return len(self.pending_result_cache)
def __len__(self):
return self.size()
def get(self, key): def get(self, key):
"""Look up the given key. """Look up the given key.

View File

@ -38,7 +38,7 @@ class StreamChangeCache(object):
self._cache = sorteddict() self._cache = sorteddict()
self._earliest_known_stream_pos = current_stream_pos self._earliest_known_stream_pos = current_stream_pos
self.name = name self.name = name
self.metrics = register_cache(self.name, self._cache) self.metrics = register_cache("cache", self.name, self._cache)
for entity, stream_pos in prefilled_cache.items(): for entity, stream_pos in prefilled_cache.items():
self.entity_has_changed(entity, stream_pos) self.entity_has_changed(entity, stream_pos)

View File

@ -15,8 +15,8 @@
from twisted.internet import defer from twisted.internet import defer
from prometheus_client import Counter
from synapse.util.logcontext import LoggingContext from synapse.util.logcontext import LoggingContext
import synapse.metrics
from functools import wraps from functools import wraps
import logging import logging
@ -24,66 +24,21 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
block_counter = Counter("synapse_util_metrics_block_count", "", ["block_name"])
metrics = synapse.metrics.get_metrics_for(__name__) block_timer = Counter("synapse_util_metrics_block_time_seconds", "", ["block_name"])
# total number of times we have hit this block block_ru_utime = Counter("synapse_util_metrics_block_ru_utime_seconds", "", ["block_name"])
block_counter = metrics.register_counter(
"block_count",
labels=["block_name"],
alternative_names=(
# the following are all deprecated aliases for the same metric
metrics.name_prefix + x for x in (
"_block_timer:count",
"_block_ru_utime:count",
"_block_ru_stime:count",
"_block_db_txn_count:count",
"_block_db_txn_duration:count",
)
)
)
block_timer = metrics.register_counter( block_ru_stime = Counter("synapse_util_metrics_block_ru_stime_seconds", "", ["block_name"])
"block_time_seconds",
labels=["block_name"],
alternative_names=(
metrics.name_prefix + "_block_timer:total",
),
)
block_ru_utime = metrics.register_counter( block_db_txn_count = Counter("synapse_util_metrics_block_db_txn_count", "", ["block_name"])
"block_ru_utime_seconds", labels=["block_name"],
alternative_names=(
metrics.name_prefix + "_block_ru_utime:total",
),
)
block_ru_stime = metrics.register_counter(
"block_ru_stime_seconds", labels=["block_name"],
alternative_names=(
metrics.name_prefix + "_block_ru_stime:total",
),
)
block_db_txn_count = metrics.register_counter(
"block_db_txn_count", labels=["block_name"],
alternative_names=(
metrics.name_prefix + "_block_db_txn_count:total",
),
)
# seconds spent waiting for db txns, excluding scheduling time, in this block # seconds spent waiting for db txns, excluding scheduling time, in this block
block_db_txn_duration = metrics.register_counter( block_db_txn_duration = Counter("synapse_util_metrics_block_db_txn_duration_seconds", "", ["block_name"])
"block_db_txn_duration_seconds", labels=["block_name"],
alternative_names=(
metrics.name_prefix + "_block_db_txn_duration:total",
),
)
# seconds spent waiting for a db connection, in this block # seconds spent waiting for a db connection, in this block
block_db_sched_duration = metrics.register_counter( block_db_sched_duration = Counter("synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"])
"block_db_sched_duration_seconds", labels=["block_name"],
)
def measure_func(name): def measure_func(name):
@ -132,8 +87,8 @@ class Measure(object):
duration = self.clock.time_msec() - self.start duration = self.clock.time_msec() - self.start
block_counter.inc(self.name) block_counter.labels(self.name).inc()
block_timer.inc_by(duration, self.name) block_timer.labels(self.name).inc(duration)
context = LoggingContext.current_context() context = LoggingContext.current_context()
@ -150,19 +105,13 @@ class Measure(object):
ru_utime, ru_stime = context.get_resource_usage() ru_utime, ru_stime = context.get_resource_usage()
block_ru_utime.inc_by(ru_utime - self.ru_utime, self.name) block_ru_utime.labels(self.name).inc(ru_utime - self.ru_utime)
block_ru_stime.inc_by(ru_stime - self.ru_stime, self.name) block_ru_stime.labels(self.name).inc(ru_stime - self.ru_stime)
block_db_txn_count.inc_by( block_db_txn_count.labels(self.name).inc(context.db_txn_count - self.db_txn_count)
context.db_txn_count - self.db_txn_count, self.name block_db_txn_duration.labels(self.name).inc(
) (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000.)
block_db_txn_duration.inc_by( block_db_sched_duration.labels(self.name).inc(
(context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000., (context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000.)
self.name
)
block_db_sched_duration.inc_by(
(context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000.,
self.name
)
if self.created_context: if self.created_context:
self.start_context.__exit__(exc_type, exc_val, exc_tb) self.start_context.__exit__(exc_type, exc_val, exc_tb)

View File

@ -12,3 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from twisted.trial import util
util.DEFAULT_TIMEOUT_DURATION = 10