mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-05-02 12:56:02 -04:00
Add prometheus metrics to track federation delays (#8430)
Add a pair of federation metrics to track the delays in sending PDUs to/from particular servers.
This commit is contained in:
parent
7941372ec8
commit
c1ef579b63
8 changed files with 88 additions and 6 deletions
|
@ -28,7 +28,7 @@ from typing import (
|
|||
Union,
|
||||
)
|
||||
|
||||
from prometheus_client import Counter, Histogram
|
||||
from prometheus_client import Counter, Gauge, Histogram
|
||||
|
||||
from twisted.internet import defer
|
||||
from twisted.internet.abstract import isIPAddress
|
||||
|
@ -88,6 +88,13 @@ pdu_process_time = Histogram(
|
|||
)
|
||||
|
||||
|
||||
last_pdu_age_metric = Gauge(
|
||||
"synapse_federation_last_received_pdu_age",
|
||||
"The age (in seconds) of the last PDU successfully received from the given domain",
|
||||
labelnames=("server_name",),
|
||||
)
|
||||
|
||||
|
||||
class FederationServer(FederationBase):
|
||||
def __init__(self, hs):
|
||||
super().__init__(hs)
|
||||
|
@ -118,6 +125,10 @@ class FederationServer(FederationBase):
|
|||
hs, "state_ids_resp", timeout_ms=30000
|
||||
)
|
||||
|
||||
self._federation_metrics_domains = (
|
||||
hs.get_config().federation.federation_metrics_domains
|
||||
)
|
||||
|
||||
async def on_backfill_request(
|
||||
self, origin: str, room_id: str, versions: List[str], limit: int
|
||||
) -> Tuple[int, Dict[str, Any]]:
|
||||
|
@ -262,7 +273,11 @@ class FederationServer(FederationBase):
|
|||
|
||||
pdus_by_room = {} # type: Dict[str, List[EventBase]]
|
||||
|
||||
newest_pdu_ts = 0
|
||||
|
||||
for p in transaction.pdus: # type: ignore
|
||||
# FIXME (richardv): I don't think this works:
|
||||
# https://github.com/matrix-org/synapse/issues/8429
|
||||
if "unsigned" in p:
|
||||
unsigned = p["unsigned"]
|
||||
if "age" in unsigned:
|
||||
|
@ -300,6 +315,9 @@ class FederationServer(FederationBase):
|
|||
event = event_from_pdu_json(p, room_version)
|
||||
pdus_by_room.setdefault(room_id, []).append(event)
|
||||
|
||||
if event.origin_server_ts > newest_pdu_ts:
|
||||
newest_pdu_ts = event.origin_server_ts
|
||||
|
||||
pdu_results = {}
|
||||
|
||||
# we can process different rooms in parallel (which is useful if they
|
||||
|
@ -340,6 +358,10 @@ class FederationServer(FederationBase):
|
|||
process_pdus_for_room, pdus_by_room.keys(), TRANSACTION_CONCURRENCY_LIMIT
|
||||
)
|
||||
|
||||
if newest_pdu_ts and origin in self._federation_metrics_domains:
|
||||
newest_pdu_age = self._clock.time_msec() - newest_pdu_ts
|
||||
last_pdu_age_metric.labels(server_name=origin).set(newest_pdu_age / 1000)
|
||||
|
||||
return pdu_results
|
||||
|
||||
async def _handle_edus_in_txn(self, origin: str, transaction: Transaction):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue