diff --git a/CHANGES.md b/CHANGES.md index 7713328f1..7188f9444 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,11 +1,24 @@ +Synapse 1.32.1 (2021-04-21) +=========================== + +This release fixes [a regression](https://github.com/matrix-org/synapse/issues/9853) +in Synapse 1.32.0 that caused connected Prometheus instances to become unstable. If you +ran Synapse 1.32.0 with Prometheus metrics, first upgrade to Synapse 1.32.1 and follow +[these instructions](https://github.com/matrix-org/synapse/pull/9854#issuecomment-823472183) +to clean up any excess writeahead logs. + +Bugfixes +-------- + +- Fix a regression in Synapse 1.32.0 which caused Synapse to report large numbers of Prometheus time series, potentially overwhelming Prometheus instances. ([\#9854](https://github.com/matrix-org/synapse/issues/9854)) + + Synapse 1.32.0 (2021-04-20) =========================== -**Note:** This release introduces [a regression](https://githubcom/matrix-org/synapse/issues/9853) +**Note:** This release introduces [a regression](https://github.com/matrix-org/synapse/issues/9853) that can overwhelm connected Prometheus instances. This issue was not present in -Synapse v1.32.0rc1. It is recommended not to update to this release. If you have -upgraded to v1.32.0 already, please downgrade to v1.31.0. This issue will be -resolved in a subsequent release version shortly. +1.32.0rc1, and is fixed in 1.32.1. See the changelog for 1.32.1 above for more information. **Note:** This release requires Python 3.6+ and Postgres 9.6+ or SQLite 3.22+. diff --git a/UPGRADE.rst b/UPGRADE.rst index c8dce6222..76d2ee394 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -91,11 +91,14 @@ Upgrading to v1.32.0 Regression causing connected Prometheus instances to become overwhelmed ----------------------------------------------------------------------- -This release introduces `a regression `_ -that can overwhelm connected Prometheus instances. This issue was not present in -Synapse v1.32.0rc1. It is recommended not to update to this release. If you have -upgraded to v1.32.0 already, please downgrade to v1.31.0. This issue will be -resolved in a subsequent release version shortly. +This release introduces `a regression `_ +that can overwhelm connected Prometheus instances. This issue is not present in +Synapse v1.32.0rc1, and is fixed in Synapse v1.32.1. + +If you have been affected, please first upgrade to a more recent Synapse version. +You then may need to remove excess writeahead logs in order for Prometheus to recover. +Instructions for doing so are provided +`here `_. Dropping support for old Python, Postgres and SQLite versions ------------------------------------------------------------- diff --git a/debian/changelog b/debian/changelog index 83be4497e..b8cf2cac5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.32.1) stable; urgency=medium + + * New synapse release 1.32.1. + + -- Synapse Packaging team Wed, 21 Apr 2021 14:00:55 +0100 + matrix-synapse-py3 (1.32.0) stable; urgency=medium [ Dan Callahan ] diff --git a/synapse/__init__.py b/synapse/__init__.py index 79232c4de..a0332d602 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -48,7 +48,7 @@ try: except ImportError: pass -__version__ = "1.32.0" +__version__ = "1.32.1" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 78e9cfbc2..3f621539f 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -16,7 +16,7 @@ import logging import threading from functools import wraps -from typing import TYPE_CHECKING, Dict, Optional, Set +from typing import TYPE_CHECKING, Dict, Optional, Set, Union from prometheus_client.core import REGISTRY, Counter, Gauge @@ -199,7 +199,7 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar _background_process_start_count.labels(desc).inc() _background_process_in_flight_count.labels(desc).inc() - with BackgroundProcessLoggingContext("%s-%s" % (desc, count)) as context: + with BackgroundProcessLoggingContext(desc, count) as context: try: ctx = noop_context_manager() if bg_start_span: @@ -244,8 +244,20 @@ class BackgroundProcessLoggingContext(LoggingContext): __slots__ = ["_proc"] - def __init__(self, name: str): - super().__init__(name) + def __init__(self, name: str, instance_id: Optional[Union[int, str]] = None): + """ + + Args: + name: The name of the background process. Each distinct `name` gets a + separate prometheus time series. + + instance_id: an identifer to add to `name` to distinguish this instance of + the named background process in the logs. If this is `None`, one is + made up based on id(self). + """ + if instance_id is None: + instance_id = id(self) + super().__init__("%s-%s" % (name, instance_id)) self._proc = _BackgroundProcess(name, self) def start(self, rusage: "Optional[resource._RUsage]"): diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index ba753318b..d10d57424 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -185,7 +185,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): # a logcontext which we use for processing incoming commands. We declare it as a # background process so that the CPU stats get reported to prometheus. self._logging_context = BackgroundProcessLoggingContext( - "replication-conn-%s" % (self.conn_id,) + "replication-conn", self.conn_id ) def connectionMade(self):