mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-01-12 14:39:27 -05:00
Track number of hosts affected by the rate limiter (#13541)
Track number of hosts affected by the rate limiter so we can differentiate one really noisy homeserver from a general ratelimit tuning problem across the federation. Follow-up to https://github.com/matrix-org/synapse/pull/13534 Part of https://github.com/matrix-org/synapse/issues/13356
This commit is contained in:
parent
22ea51faf9
commit
d64653d062
1
changelog.d/13541.misc
Normal file
1
changelog.d/13541.misc
Normal file
@ -0,0 +1 @@
|
|||||||
|
Add metrics to track how the rate limiter is affecting requests (sleep/reject).
|
@ -30,7 +30,7 @@ from synapse.logging.context import (
|
|||||||
run_in_background,
|
run_in_background,
|
||||||
)
|
)
|
||||||
from synapse.logging.opentracing import start_active_span
|
from synapse.logging.opentracing import start_active_span
|
||||||
from synapse.metrics import Histogram
|
from synapse.metrics import Histogram, LaterGauge
|
||||||
from synapse.util import Clock
|
from synapse.util import Clock
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
if typing.TYPE_CHECKING:
|
||||||
@ -74,6 +74,27 @@ class FederationRateLimiter:
|
|||||||
str, "_PerHostRatelimiter"
|
str, "_PerHostRatelimiter"
|
||||||
] = collections.defaultdict(new_limiter)
|
] = collections.defaultdict(new_limiter)
|
||||||
|
|
||||||
|
# We track the number of affected hosts per time-period so we can
|
||||||
|
# differentiate one really noisy homeserver from a general
|
||||||
|
# ratelimit tuning problem across the federation.
|
||||||
|
LaterGauge(
|
||||||
|
"synapse_rate_limit_sleep_affected_hosts",
|
||||||
|
"Number of hosts that had requests put to sleep",
|
||||||
|
[],
|
||||||
|
lambda: sum(
|
||||||
|
ratelimiter.should_sleep() for ratelimiter in self.ratelimiters.values()
|
||||||
|
),
|
||||||
|
)
|
||||||
|
LaterGauge(
|
||||||
|
"synapse_rate_limit_reject_affected_hosts",
|
||||||
|
"Number of hosts that had requests rejected",
|
||||||
|
[],
|
||||||
|
lambda: sum(
|
||||||
|
ratelimiter.should_reject()
|
||||||
|
for ratelimiter in self.ratelimiters.values()
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
def ratelimit(self, host: str) -> "_GeneratorContextManager[defer.Deferred[None]]":
|
def ratelimit(self, host: str) -> "_GeneratorContextManager[defer.Deferred[None]]":
|
||||||
"""Used to ratelimit an incoming request from a given host
|
"""Used to ratelimit an incoming request from a given host
|
||||||
|
|
||||||
@ -139,6 +160,21 @@ class _PerHostRatelimiter:
|
|||||||
finally:
|
finally:
|
||||||
self._on_exit(request_id)
|
self._on_exit(request_id)
|
||||||
|
|
||||||
|
def should_reject(self) -> bool:
|
||||||
|
"""
|
||||||
|
Whether to reject the request if we already have too many queued up
|
||||||
|
(either sleeping or in the ready queue).
|
||||||
|
"""
|
||||||
|
queue_size = len(self.ready_request_queue) + len(self.sleeping_requests)
|
||||||
|
return queue_size > self.reject_limit
|
||||||
|
|
||||||
|
def should_sleep(self) -> bool:
|
||||||
|
"""
|
||||||
|
Whether to sleep the request if we already have too many requests coming
|
||||||
|
through within the window.
|
||||||
|
"""
|
||||||
|
return len(self.request_times) > self.sleep_limit
|
||||||
|
|
||||||
def _on_enter(self, request_id: object) -> "defer.Deferred[None]":
|
def _on_enter(self, request_id: object) -> "defer.Deferred[None]":
|
||||||
time_now = self.clock.time_msec()
|
time_now = self.clock.time_msec()
|
||||||
|
|
||||||
@ -149,8 +185,7 @@ class _PerHostRatelimiter:
|
|||||||
|
|
||||||
# reject the request if we already have too many queued up (either
|
# reject the request if we already have too many queued up (either
|
||||||
# sleeping or in the ready queue).
|
# sleeping or in the ready queue).
|
||||||
queue_size = len(self.ready_request_queue) + len(self.sleeping_requests)
|
if self.should_reject():
|
||||||
if queue_size > self.reject_limit:
|
|
||||||
logger.debug("Ratelimiter(%s): rejecting request", self.host)
|
logger.debug("Ratelimiter(%s): rejecting request", self.host)
|
||||||
rate_limit_reject_counter.inc()
|
rate_limit_reject_counter.inc()
|
||||||
raise LimitExceededError(
|
raise LimitExceededError(
|
||||||
@ -180,7 +215,7 @@ class _PerHostRatelimiter:
|
|||||||
len(self.request_times),
|
len(self.request_times),
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(self.request_times) > self.sleep_limit:
|
if self.should_sleep():
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Ratelimiter(%s) [%s]: sleeping request for %f sec",
|
"Ratelimiter(%s) [%s]: sleeping request for %f sec",
|
||||||
self.host,
|
self.host,
|
||||||
|
Loading…
Reference in New Issue
Block a user