From 959550b645a0752f6a8592d90fff44683b189523 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 21 May 2019 16:51:49 +0100 Subject: [PATCH 01/20] 0.99.5rc1 --- CHANGES.md | 39 +++++++++++++++++++++++++++++++++++++++ changelog.d/3484.misc | 1 - changelog.d/5039.bugfix | 1 - changelog.d/5043.feature | 1 - changelog.d/5146.bugfix | 1 - changelog.d/5171.misc | 1 - changelog.d/5174.bugfix | 1 - changelog.d/5177.bugfix | 1 - changelog.d/5181.feature | 1 - changelog.d/5183.misc | 1 - changelog.d/5184.misc | 1 - changelog.d/5185.misc | 1 - changelog.d/5187.bugfix | 1 - changelog.d/5190.feature | 1 - changelog.d/5191.misc | 1 - changelog.d/5196.feature | 1 - changelog.d/5197.misc | 1 - changelog.d/5198.bugfix | 1 - changelog.d/5204.feature | 1 - changelog.d/5209.feature | 1 - changelog.d/5210.feature | 1 - changelog.d/5211.feature | 1 - changelog.d/5217.feature | 1 - changelog.d/5218.bugfix | 1 - changelog.d/5219.bugfix | 1 - synapse/__init__.py | 2 +- 26 files changed, 40 insertions(+), 25 deletions(-) delete mode 100644 changelog.d/3484.misc delete mode 100644 changelog.d/5039.bugfix delete mode 100644 changelog.d/5043.feature delete mode 100644 changelog.d/5146.bugfix delete mode 100644 changelog.d/5171.misc delete mode 100644 changelog.d/5174.bugfix delete mode 100644 changelog.d/5177.bugfix delete mode 100644 changelog.d/5181.feature delete mode 100644 changelog.d/5183.misc delete mode 100644 changelog.d/5184.misc delete mode 100644 changelog.d/5185.misc delete mode 100644 changelog.d/5187.bugfix delete mode 100644 changelog.d/5190.feature delete mode 100644 changelog.d/5191.misc delete mode 100644 changelog.d/5196.feature delete mode 100644 changelog.d/5197.misc delete mode 100644 changelog.d/5198.bugfix delete mode 100644 changelog.d/5204.feature delete mode 100644 changelog.d/5209.feature delete mode 100644 changelog.d/5210.feature delete mode 100644 changelog.d/5211.feature delete mode 100644 changelog.d/5217.feature delete mode 100644 changelog.d/5218.bugfix delete mode 100644 changelog.d/5219.bugfix diff --git a/CHANGES.md b/CHANGES.md index 1e9c3cf95..d23166caa 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,42 @@ +Synapse 0.99.5rc1 (2019-05-21) +============================== + +Features +-------- + +- Add ability to blacklist IP ranges for the federation client. ([\#5043](https://github.com/matrix-org/synapse/issues/5043)) +- Ratelimiting configuration for clients sending messages and the federation server has been altered to match login ratelimiting. The old configuration names will continue working. Check the sample config for details of the new names. ([\#5181](https://github.com/matrix-org/synapse/issues/5181)) +- Drop support for the undocumented /_matrix/client/v2_alpha API prefix. ([\#5190](https://github.com/matrix-org/synapse/issues/5190)) +- Add an option to disable per-room profiles. ([\#5196](https://github.com/matrix-org/synapse/issues/5196)) +- Stick an expiration date to any registered user missing one at startup if account validity is enabled. ([\#5204](https://github.com/matrix-org/synapse/issues/5204)) +- Add experimental support for relations (aka reactions and edits). ([\#5209](https://github.com/matrix-org/synapse/issues/5209), [\#5211](https://github.com/matrix-org/synapse/issues/5211)) +- Add a room version 4 which uses a new event ID format, as per [MSC2002](https://github.com/matrix-org/matrix-doc/pull/2002). ([\#5210](https://github.com/matrix-org/synapse/issues/5210), [\#5217](https://github.com/matrix-org/synapse/issues/5217)) + + +Bugfixes +-------- + +- Fix image orientation when generating thumbnails (needs pillow>=4.3.0). Contributed by Pau Rodriguez-Estivill. ([\#5039](https://github.com/matrix-org/synapse/issues/5039)) +- Exclude soft-failed events from forward-extremity candidates: fixes "No forward extremities left!" error. ([\#5146](https://github.com/matrix-org/synapse/issues/5146)) +- Re-order stages in registration flows such that msisdn and email verification are done last. ([\#5174](https://github.com/matrix-org/synapse/issues/5174)) +- Fix 3pid guest invites. ([\#5177](https://github.com/matrix-org/synapse/issues/5177)) +- Fix a bug where the register endpoint would fail with M_THREEPID_IN_USE instead of returning an account previously registered in the same session. ([\#5187](https://github.com/matrix-org/synapse/issues/5187)) +- Prevent registration for user ids that are to long to fit into a state key. Contributed by Reid Anderson. ([\#5198](https://github.com/matrix-org/synapse/issues/5198)) +- Fix incompatibility between ACME support and Python 3.5.2. ([\#5218](https://github.com/matrix-org/synapse/issues/5218)) +- Fix error handling for rooms whose versions are unknown. ([\#5219](https://github.com/matrix-org/synapse/issues/5219)) + + +Internal Changes +---------------- + +- Make /sync attempt to return device updates for both joined and invited users. Note that this doesn't currently work correctly due to other bugs. ([\#3484](https://github.com/matrix-org/synapse/issues/3484)) +- Update tests to consistently be configured via the same code that is used when loading from configuration files. ([\#5171](https://github.com/matrix-org/synapse/issues/5171), [\#5185](https://github.com/matrix-org/synapse/issues/5185)) +- Allow client event serialization to be async. ([\#5183](https://github.com/matrix-org/synapse/issues/5183)) +- Expose DataStore._get_events as get_events_as_list. ([\#5184](https://github.com/matrix-org/synapse/issues/5184)) +- Make generating SQL bounds for pagination generic. ([\#5191](https://github.com/matrix-org/synapse/issues/5191)) +- Stop telling people to install the optional dependencies by default. ([\#5197](https://github.com/matrix-org/synapse/issues/5197)) + + Synapse 0.99.4 (2019-05-15) =========================== diff --git a/changelog.d/3484.misc b/changelog.d/3484.misc deleted file mode 100644 index 364584984..000000000 --- a/changelog.d/3484.misc +++ /dev/null @@ -1 +0,0 @@ -Make /sync attempt to return device updates for both joined and invited users. Note that this doesn't currently work correctly due to other bugs. diff --git a/changelog.d/5039.bugfix b/changelog.d/5039.bugfix deleted file mode 100644 index 212cff7ae..000000000 --- a/changelog.d/5039.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix image orientation when generating thumbnails (needs pillow>=4.3.0). Contributed by Pau Rodriguez-Estivill. diff --git a/changelog.d/5043.feature b/changelog.d/5043.feature deleted file mode 100644 index 0f1e0ee30..000000000 --- a/changelog.d/5043.feature +++ /dev/null @@ -1 +0,0 @@ -Add ability to blacklist IP ranges for the federation client. diff --git a/changelog.d/5146.bugfix b/changelog.d/5146.bugfix deleted file mode 100644 index a54abed92..000000000 --- a/changelog.d/5146.bugfix +++ /dev/null @@ -1 +0,0 @@ -Exclude soft-failed events from forward-extremity candidates: fixes "No forward extremities left!" error. diff --git a/changelog.d/5171.misc b/changelog.d/5171.misc deleted file mode 100644 index d148b03b5..000000000 --- a/changelog.d/5171.misc +++ /dev/null @@ -1 +0,0 @@ -Update tests to consistently be configured via the same code that is used when loading from configuration files. diff --git a/changelog.d/5174.bugfix b/changelog.d/5174.bugfix deleted file mode 100644 index 0f26d46b2..000000000 --- a/changelog.d/5174.bugfix +++ /dev/null @@ -1 +0,0 @@ -Re-order stages in registration flows such that msisdn and email verification are done last. diff --git a/changelog.d/5177.bugfix b/changelog.d/5177.bugfix deleted file mode 100644 index c2f1644ae..000000000 --- a/changelog.d/5177.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix 3pid guest invites. diff --git a/changelog.d/5181.feature b/changelog.d/5181.feature deleted file mode 100644 index 5ce13aa2e..000000000 --- a/changelog.d/5181.feature +++ /dev/null @@ -1 +0,0 @@ -Ratelimiting configuration for clients sending messages and the federation server has been altered to match login ratelimiting. The old configuration names will continue working. Check the sample config for details of the new names. diff --git a/changelog.d/5183.misc b/changelog.d/5183.misc deleted file mode 100644 index a8970f29e..000000000 --- a/changelog.d/5183.misc +++ /dev/null @@ -1 +0,0 @@ -Allow client event serialization to be async. diff --git a/changelog.d/5184.misc b/changelog.d/5184.misc deleted file mode 100644 index 1588bdef6..000000000 --- a/changelog.d/5184.misc +++ /dev/null @@ -1 +0,0 @@ -Expose DataStore._get_events as get_events_as_list. diff --git a/changelog.d/5185.misc b/changelog.d/5185.misc deleted file mode 100644 index d148b03b5..000000000 --- a/changelog.d/5185.misc +++ /dev/null @@ -1 +0,0 @@ -Update tests to consistently be configured via the same code that is used when loading from configuration files. diff --git a/changelog.d/5187.bugfix b/changelog.d/5187.bugfix deleted file mode 100644 index df176cf5b..000000000 --- a/changelog.d/5187.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug where the register endpoint would fail with M_THREEPID_IN_USE instead of returning an account previously registered in the same session. diff --git a/changelog.d/5190.feature b/changelog.d/5190.feature deleted file mode 100644 index 34904aa7a..000000000 --- a/changelog.d/5190.feature +++ /dev/null @@ -1 +0,0 @@ -Drop support for the undocumented /_matrix/client/v2_alpha API prefix. diff --git a/changelog.d/5191.misc b/changelog.d/5191.misc deleted file mode 100644 index e0615fec9..000000000 --- a/changelog.d/5191.misc +++ /dev/null @@ -1 +0,0 @@ -Make generating SQL bounds for pagination generic. diff --git a/changelog.d/5196.feature b/changelog.d/5196.feature deleted file mode 100644 index 1ffb928f6..000000000 --- a/changelog.d/5196.feature +++ /dev/null @@ -1 +0,0 @@ -Add an option to disable per-room profiles. diff --git a/changelog.d/5197.misc b/changelog.d/5197.misc deleted file mode 100644 index fca1d86b2..000000000 --- a/changelog.d/5197.misc +++ /dev/null @@ -1 +0,0 @@ -Stop telling people to install the optional dependencies by default. diff --git a/changelog.d/5198.bugfix b/changelog.d/5198.bugfix deleted file mode 100644 index c6b156f17..000000000 --- a/changelog.d/5198.bugfix +++ /dev/null @@ -1 +0,0 @@ -Prevent registration for user ids that are to long to fit into a state key. Contributed by Reid Anderson. \ No newline at end of file diff --git a/changelog.d/5204.feature b/changelog.d/5204.feature deleted file mode 100644 index 2a7212ca1..000000000 --- a/changelog.d/5204.feature +++ /dev/null @@ -1 +0,0 @@ -Stick an expiration date to any registered user missing one at startup if account validity is enabled. diff --git a/changelog.d/5209.feature b/changelog.d/5209.feature deleted file mode 100644 index 747098c16..000000000 --- a/changelog.d/5209.feature +++ /dev/null @@ -1 +0,0 @@ -Add experimental support for relations (aka reactions and edits). diff --git a/changelog.d/5210.feature b/changelog.d/5210.feature deleted file mode 100644 index c78325a6a..000000000 --- a/changelog.d/5210.feature +++ /dev/null @@ -1 +0,0 @@ -Add a room version 4 which uses a new event ID format, as per [MSC2002](https://github.com/matrix-org/matrix-doc/pull/2002). diff --git a/changelog.d/5211.feature b/changelog.d/5211.feature deleted file mode 100644 index 747098c16..000000000 --- a/changelog.d/5211.feature +++ /dev/null @@ -1 +0,0 @@ -Add experimental support for relations (aka reactions and edits). diff --git a/changelog.d/5217.feature b/changelog.d/5217.feature deleted file mode 100644 index c78325a6a..000000000 --- a/changelog.d/5217.feature +++ /dev/null @@ -1 +0,0 @@ -Add a room version 4 which uses a new event ID format, as per [MSC2002](https://github.com/matrix-org/matrix-doc/pull/2002). diff --git a/changelog.d/5218.bugfix b/changelog.d/5218.bugfix deleted file mode 100644 index cd624ecfd..000000000 --- a/changelog.d/5218.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix incompatibility between ACME support and Python 3.5.2. diff --git a/changelog.d/5219.bugfix b/changelog.d/5219.bugfix deleted file mode 100644 index c1e17adc5..000000000 --- a/changelog.d/5219.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix error handling for rooms whose versions are unknown. diff --git a/synapse/__init__.py b/synapse/__init__.py index bf9e810da..42af03b53 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -27,4 +27,4 @@ try: except ImportError: pass -__version__ = "0.99.4" +__version__ = "0.99.5rc1" From 8aed6d87ffe9a16b2c2809177fa819ddba966bd9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 21 May 2019 16:57:56 +0100 Subject: [PATCH 02/20] Fix spelling in changelog --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index d23166caa..7eab35be3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -21,7 +21,7 @@ Bugfixes - Re-order stages in registration flows such that msisdn and email verification are done last. ([\#5174](https://github.com/matrix-org/synapse/issues/5174)) - Fix 3pid guest invites. ([\#5177](https://github.com/matrix-org/synapse/issues/5177)) - Fix a bug where the register endpoint would fail with M_THREEPID_IN_USE instead of returning an account previously registered in the same session. ([\#5187](https://github.com/matrix-org/synapse/issues/5187)) -- Prevent registration for user ids that are to long to fit into a state key. Contributed by Reid Anderson. ([\#5198](https://github.com/matrix-org/synapse/issues/5198)) +- Prevent registration for user ids that are too long to fit into a state key. Contributed by Reid Anderson. ([\#5198](https://github.com/matrix-org/synapse/issues/5198)) - Fix incompatibility between ACME support and Python 3.5.2. ([\#5218](https://github.com/matrix-org/synapse/issues/5218)) - Fix error handling for rooms whose versions are unknown. ([\#5219](https://github.com/matrix-org/synapse/issues/5219)) From e26e6b3230f0b55376f0f3bf823dd789ac7064d0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 21 May 2019 17:37:19 +0100 Subject: [PATCH 03/20] update changelog --- CHANGES.md | 2 +- changelog.d/5203.feature | 1 - changelog.d/5212.feature | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 changelog.d/5203.feature delete mode 100644 changelog.d/5212.feature diff --git a/CHANGES.md b/CHANGES.md index 7eab35be3..25ceec8b4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,7 +9,7 @@ Features - Drop support for the undocumented /_matrix/client/v2_alpha API prefix. ([\#5190](https://github.com/matrix-org/synapse/issues/5190)) - Add an option to disable per-room profiles. ([\#5196](https://github.com/matrix-org/synapse/issues/5196)) - Stick an expiration date to any registered user missing one at startup if account validity is enabled. ([\#5204](https://github.com/matrix-org/synapse/issues/5204)) -- Add experimental support for relations (aka reactions and edits). ([\#5209](https://github.com/matrix-org/synapse/issues/5209), [\#5211](https://github.com/matrix-org/synapse/issues/5211)) +- Add experimental support for relations (aka reactions and edits). ([\#5209](https://github.com/matrix-org/synapse/issues/5209), [\#5211](https://github.com/matrix-org/synapse/issues/5211), [\#5203](https://github.com/matrix-org/synapse/issues/5203), [\#5212](https://github.com/matrix-org/synapse/issues/5212)) - Add a room version 4 which uses a new event ID format, as per [MSC2002](https://github.com/matrix-org/matrix-doc/pull/2002). ([\#5210](https://github.com/matrix-org/synapse/issues/5210), [\#5217](https://github.com/matrix-org/synapse/issues/5217)) diff --git a/changelog.d/5203.feature b/changelog.d/5203.feature deleted file mode 100644 index 747098c16..000000000 --- a/changelog.d/5203.feature +++ /dev/null @@ -1 +0,0 @@ -Add experimental support for relations (aka reactions and edits). diff --git a/changelog.d/5212.feature b/changelog.d/5212.feature deleted file mode 100644 index 747098c16..000000000 --- a/changelog.d/5212.feature +++ /dev/null @@ -1 +0,0 @@ -Add experimental support for relations (aka reactions and edits). From 8031a6f3d50f0d0bc80dbe38e354db19f75fdf09 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 22 May 2019 15:40:28 +0100 Subject: [PATCH 04/20] 0.99.5 --- CHANGES.md | 6 ++++++ debian/changelog | 4 ++++ synapse/__init__.py | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 25ceec8b4..d1c9ea0e1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +Synapse 0.99.5 (2019-05-22) +=========================== + +No significant changes. + + Synapse 0.99.5rc1 (2019-05-21) ============================== diff --git a/debian/changelog b/debian/changelog index 35cf8ffb2..e148c3272 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,7 @@ +matrix-synapse-py3 (0.99.5) stable; urgency=medium + + * New synapse release 0.99.5. + matrix-synapse-py3 (0.99.4) stable; urgency=medium [ Christoph Müller ] diff --git a/synapse/__init__.py b/synapse/__init__.py index 42af03b53..d66d2411a 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -27,4 +27,4 @@ try: except ImportError: pass -__version__ = "0.99.5rc1" +__version__ = "0.99.5" From c31e375ade1b59a7fe38628337e9e1aa3de91feb Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 22 May 2019 17:45:44 +0100 Subject: [PATCH 05/20] 0.99.5 --- CHANGES.md | 6 ++++++ debian/changelog | 6 ++++-- synapse/__init__.py | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d1c9ea0e1..6bdfdd6d7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +Synapse 0.99.5.1 (2019-05-22) +============================= + +No significant changes. + + Synapse 0.99.5 (2019-05-22) =========================== diff --git a/debian/changelog b/debian/changelog index e148c3272..90c6b86c5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,8 @@ -matrix-synapse-py3 (0.99.5) stable; urgency=medium +matrix-synapse-py3 (0.99.5.1) stable; urgency=medium - * New synapse release 0.99.5. + * New synapse release 0.99.5.1. + + -- Synapse Packaging team Wed, 22 May 2019 16:22:24 +0000 matrix-synapse-py3 (0.99.4) stable; urgency=medium diff --git a/synapse/__init__.py b/synapse/__init__.py index d66d2411a..4f95778ee 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -27,4 +27,4 @@ try: except ImportError: pass -__version__ = "0.99.5" +__version__ = "0.99.5.1" From 006bd8f4f6e231ac1fafef1a83a68c3efb0b1bbf Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 22 May 2019 17:49:53 +0100 Subject: [PATCH 06/20] Revert "0.99.5" This reverts commit c31e375ade1b59a7fe38628337e9e1aa3de91feb. --- CHANGES.md | 6 ------ debian/changelog | 6 ++---- synapse/__init__.py | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 6bdfdd6d7..d1c9ea0e1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,9 +1,3 @@ -Synapse 0.99.5.1 (2019-05-22) -============================= - -No significant changes. - - Synapse 0.99.5 (2019-05-22) =========================== diff --git a/debian/changelog b/debian/changelog index 90c6b86c5..e148c3272 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,6 @@ -matrix-synapse-py3 (0.99.5.1) stable; urgency=medium +matrix-synapse-py3 (0.99.5) stable; urgency=medium - * New synapse release 0.99.5.1. - - -- Synapse Packaging team Wed, 22 May 2019 16:22:24 +0000 + * New synapse release 0.99.5. matrix-synapse-py3 (0.99.4) stable; urgency=medium diff --git a/synapse/__init__.py b/synapse/__init__.py index 4f95778ee..d66d2411a 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -27,4 +27,4 @@ try: except ImportError: pass -__version__ = "0.99.5.1" +__version__ = "0.99.5" From 3d5bba581ba0fc442ce64e3e0d68ad755869db82 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 22 May 2019 17:52:44 +0100 Subject: [PATCH 07/20] 0.99.5.1 --- CHANGES.md | 6 ++++++ debian/changelog | 6 ++++-- synapse/__init__.py | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d1c9ea0e1..6bdfdd6d7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +Synapse 0.99.5.1 (2019-05-22) +============================= + +No significant changes. + + Synapse 0.99.5 (2019-05-22) =========================== diff --git a/debian/changelog b/debian/changelog index e148c3272..90c6b86c5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,8 @@ -matrix-synapse-py3 (0.99.5) stable; urgency=medium +matrix-synapse-py3 (0.99.5.1) stable; urgency=medium - * New synapse release 0.99.5. + * New synapse release 0.99.5.1. + + -- Synapse Packaging team Wed, 22 May 2019 16:22:24 +0000 matrix-synapse-py3 (0.99.4) stable; urgency=medium diff --git a/synapse/__init__.py b/synapse/__init__.py index d66d2411a..4f95778ee 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -27,4 +27,4 @@ try: except ImportError: pass -__version__ = "0.99.5" +__version__ = "0.99.5.1" From df9d9005448d837c5e1a2b75edb5730e2062b0f2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 May 2019 11:56:24 +0100 Subject: [PATCH 08/20] Correctly filter out extremities with soft failed prevs (#5274) When we receive a soft failed event we, correctly, *do not* update the forward extremity table with the event. However, if we later receive an event that references the soft failed event we then need to remove the soft failed events prev events from the forward extremities table, otherwise we just build up forward extremities. Fixes #5269 --- changelog.d/5274.bugfix | 1 + synapse/storage/events.py | 82 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 changelog.d/5274.bugfix diff --git a/changelog.d/5274.bugfix b/changelog.d/5274.bugfix new file mode 100644 index 000000000..9e14d2028 --- /dev/null +++ b/changelog.d/5274.bugfix @@ -0,0 +1 @@ +Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2ffc27ff4..6e9f3d1dc 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -554,10 +554,18 @@ class EventsStore( e_id for event in new_events for e_id in event.prev_event_ids() ) - # Finally, remove any events which are prev_events of any existing events. + # Remove any events which are prev_events of any existing events. existing_prevs = yield self._get_events_which_are_prevs(result) result.difference_update(existing_prevs) + # Finally handle the case where the new events have soft-failed prev + # events. If they do we need to remove them and their prev events, + # otherwise we end up with dangling extremities. + existing_prevs = yield self._get_prevs_before_rejected( + e_id for event in new_events for e_id in event.prev_event_ids() + ) + result.difference_update(existing_prevs) + defer.returnValue(result) @defer.inlineCallbacks @@ -573,7 +581,7 @@ class EventsStore( """ results = [] - def _get_events(txn, batch): + def _get_events_which_are_prevs_txn(txn, batch): sql = """ SELECT prev_event_id, internal_metadata FROM event_edges @@ -596,10 +604,78 @@ class EventsStore( ) for chunk in batch_iter(event_ids, 100): - yield self.runInteraction("_get_events_which_are_prevs", _get_events, chunk) + yield self.runInteraction( + "_get_events_which_are_prevs", + _get_events_which_are_prevs_txn, + chunk, + ) defer.returnValue(results) + @defer.inlineCallbacks + def _get_prevs_before_rejected(self, event_ids): + """Get soft-failed ancestors to remove from the extremities. + + Given a set of events, find all those that have been soft-failed or + rejected. Returns those soft failed/rejected events and their prev + events (whether soft-failed/rejected or not), and recurses up the + prev-event graph until it finds no more soft-failed/rejected events. + + This is used to find extremities that are ancestors of new events, but + are separated by soft failed events. + + Args: + event_ids (Iterable[str]): Events to find prev events for. Note + that these must have already been persisted. + + Returns: + Deferred[set[str]] + """ + + # The set of event_ids to return. This includes all soft-failed events + # and their prev events. + existing_prevs = set() + + def _get_prevs_before_rejected_txn(txn, batch): + to_recursively_check = batch + + while to_recursively_check: + sql = """ + SELECT + event_id, prev_event_id, internal_metadata, + rejections.event_id IS NOT NULL + FROM event_edges + INNER JOIN events USING (event_id) + LEFT JOIN rejections USING (event_id) + LEFT JOIN event_json USING (event_id) + WHERE + event_id IN (%s) + AND NOT events.outlier + """ % ( + ",".join("?" for _ in to_recursively_check), + ) + + txn.execute(sql, to_recursively_check) + to_recursively_check = [] + + for event_id, prev_event_id, metadata, rejected in txn: + if prev_event_id in existing_prevs: + continue + + soft_failed = json.loads(metadata).get("soft_failed") + if soft_failed or rejected: + to_recursively_check.append(prev_event_id) + existing_prevs.add(prev_event_id) + + for chunk in batch_iter(event_ids, 100): + yield self.runInteraction( + "_get_prevs_before_rejected", + _get_prevs_before_rejected_txn, + chunk, + ) + + defer.returnValue(existing_prevs) + @defer.inlineCallbacks def _get_new_state_after_events( self, room_id, events_context, old_latest_event_ids, new_latest_event_ids From 6ebc08c09d4ced251750cb087aa4689f90cdd4b6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 May 2019 18:52:41 +0100 Subject: [PATCH 09/20] Add DB bg update to cleanup extremities. Due to #5269 we may have extremities in our DB that we shouldn't have, so lets add a cleanup task such to remove those. --- synapse/storage/events.py | 186 ++++++++++++++++++ .../delta/54/delete_forward_extremities.sql | 19 ++ 2 files changed, 205 insertions(+) create mode 100644 synapse/storage/schema/delta/54/delete_forward_extremities.sql diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 6e9f3d1dc..a9be143bd 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -221,6 +221,7 @@ class EventsStore( ): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" + EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" def __init__(self, db_conn, hs): super(EventsStore, self).__init__(db_conn, hs) @@ -252,6 +253,11 @@ class EventsStore( psql_only=True, ) + self.register_background_update_handler( + self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, + self._cleanup_extremities_bg_update, + ) + self._event_persist_queue = _EventPeristenceQueue() self._state_resolution_handler = hs.get_state_resolution_handler() @@ -2341,6 +2347,186 @@ class EventsStore( get_all_updated_current_state_deltas_txn, ) + @defer.inlineCallbacks + def _cleanup_extremities_bg_update(self, progress, batch_size): + """Background update to clean out extremities that should have been + deleted previously. + + Mainly used to deal with the aftermath of #5269. + """ + + # This works by first copying all existing forward extremities into the + # `_extremities_to_check` table at start up, and then checking each + # event in that table whether we have any descendants that are not + # soft-failed/rejected. If that is the case then we delete that event + # from the forward extremities table. + # + # For efficiency, we do this in batches by recursively pulling out all + # descendants of a batch until we find the non soft-failed/rejected + # events, i.e. the set of descendants whose chain of prev events back + # to the batch of extremities are all soft-failed or rejected. + # Typically, we won't find any such events as extremities will rarely + # have any descendants, but if they do then we should delete those + # extremities. + + def _cleanup_extremities_bg_update_txn(txn): + # The set of extremity event IDs that we're checking this round + original_set = set() + + # A dict[str, set[str]] of event ID to their prev events. + graph = {} + + # The set of descendants of the original set that are not rejected + # nor soft-failed. Ancestors of these events should be removed + # from the forward extremities table. + non_rejected_leaves = set() + + # Set of event IDs that have been soft failed, and for which we + # should check if they have descendants which haven't been soft + # failed. + soft_failed_events_to_lookup = set() + + # First, we get `batch_size` events from the table, pulling out + # their prev events, if any, and their prev events rejection status. + txn.execute( + """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL, events.outlier + FROM ( + SELECT event_id AS prev_event_id + FROM _extremities_to_check + LIMIT ? + ) AS f + LEFT JOIN event_edges USING (prev_event_id) + LEFT JOIN events USING (event_id) + LEFT JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + """, (batch_size,) + ) + + for prev_event_id, event_id, metadata, rejected, outlier in txn: + original_set.add(prev_event_id) + + if not event_id or outlier: + # Common case where the forward extremity doesn't have any + # descendants. + continue + + graph.setdefault(event_id, set()).add(prev_event_id) + + soft_failed = False + if metadata: + soft_failed = json.loads(metadata).get("soft_failed") + + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # Now we recursively check all the soft-failed descendants we + # found above in the same way, until we have nothing left to + # check. + while soft_failed_events_to_lookup: + # We only want to do 100 at a time, so we split given list + # into two. + batch = list(soft_failed_events_to_lookup) + to_check, to_defer = batch[:100], batch[100:] + soft_failed_events_to_lookup = set(to_defer) + + sql = """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL + FROM event_edges + INNER JOIN events USING (event_id) + INNER JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + WHERE + prev_event_id IN (%s) + AND NOT events.outlier + """ % ( + ",".join("?" for _ in to_check), + ) + txn.execute(sql, to_check) + + for prev_event_id, event_id, metadata, rejected in txn: + if event_id in graph: + # Already handled this event previously, but we still + # want to record the edge. + graph.setdefault(event_id, set()).add(prev_event_id) + logger.info("Already handled") + continue + + graph.setdefault(event_id, set()).add(prev_event_id) + + soft_failed = json.loads(metadata).get("soft_failed") + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # We have a set of non-soft-failed descendants, so we recurse up + # the graph to find all ancestors and add them to the set of event + # IDs that we can delete from forward extremities table. + to_delete = set() + while non_rejected_leaves: + event_id = non_rejected_leaves.pop() + prev_event_ids = graph.get(event_id, set()) + non_rejected_leaves.update(prev_event_ids) + to_delete.update(prev_event_ids) + + to_delete.intersection_update(original_set) + + logger.info("Deleting up to %d forward extremities", len(to_delete)) + + self._simple_delete_many_txn( + txn=txn, + table="event_forward_extremities", + column="event_id", + iterable=to_delete, + keyvalues={}, + ) + + if to_delete: + # We now need to invalidate the caches of these rooms + rows = self._simple_select_many_txn( + txn, + table="events", + column="event_id", + iterable=to_delete, + keyvalues={}, + retcols=("room_id",) + ) + for row in rows: + txn.call_after( + self.get_latest_event_ids_in_room.invalidate, + (row["room_id"],) + ) + + self._simple_delete_many_txn( + txn=txn, + table="_extremities_to_check", + column="event_id", + iterable=original_set, + keyvalues={}, + ) + + return len(original_set) + + num_handled = yield self.runInteraction( + "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn, + ) + + if not num_handled: + yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) + + def _drop_table_txn(txn): + txn.execute("DROP TABLE _extremities_to_check") + + yield self.runInteraction( + "_cleanup_extremities_bg_update_drop_table", + _drop_table_txn, + ) + + defer.returnValue(num_handled) + AllNewEventsResult = namedtuple( "AllNewEventsResult", diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/schema/delta/54/delete_forward_extremities.sql new file mode 100644 index 000000000..7056bd1d0 --- /dev/null +++ b/synapse/storage/schema/delta/54/delete_forward_extremities.sql @@ -0,0 +1,19 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('delete_soft_failed_extremities', '{}'); + +CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities; From 1d818fde14595299de9e13008c67239ca677014f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 May 2019 11:58:32 +0100 Subject: [PATCH 10/20] Log actual number of entries deleted --- synapse/storage/_base.py | 12 +++++++++--- synapse/storage/events.py | 6 ++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index fa6839cec..3fe827cd4 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -1261,7 +1261,8 @@ class SQLBaseStore(object): " AND ".join("%s = ?" % (k,) for k in keyvalues), ) - return txn.execute(sql, list(keyvalues.values())) + txn.execute(sql, list(keyvalues.values())) + return txn.rowcount def _simple_delete_many(self, table, column, iterable, keyvalues, desc): return self.runInteraction( @@ -1280,9 +1281,12 @@ class SQLBaseStore(object): column : column name to test for inclusion against `iterable` iterable : list keyvalues : dict of column names and values to select the rows with + + Returns: + int: Number rows deleted """ if not iterable: - return + return 0 sql = "DELETE FROM %s" % table @@ -1297,7 +1301,9 @@ class SQLBaseStore(object): if clauses: sql = "%s WHERE %s" % (sql, " AND ".join(clauses)) - return txn.execute(sql, values) + txn.execute(sql, values) + + return txn.rowcount def _get_cache_dict( self, db_conn, table, entity_column, stream_column, max_value, limit=100000 diff --git a/synapse/storage/events.py b/synapse/storage/events.py index a9be143bd..a9664928c 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2476,7 +2476,7 @@ class EventsStore( logger.info("Deleting up to %d forward extremities", len(to_delete)) - self._simple_delete_many_txn( + deleted = self._simple_delete_many_txn( txn=txn, table="event_forward_extremities", column="event_id", @@ -2484,7 +2484,9 @@ class EventsStore( keyvalues={}, ) - if to_delete: + logger.info("Deleted %d forward extremities", deleted) + + if deleted: # We now need to invalidate the caches of these rooms rows = self._simple_select_many_txn( txn, From 6574d4ad0a95e1d5a709e5d1d61555561dc180f2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 May 2019 14:19:11 +0100 Subject: [PATCH 11/20] Add test --- tests/storage/test_cleanup_extrems.py | 248 ++++++++++++++++++++++++++ 1 file changed, 248 insertions(+) create mode 100644 tests/storage/test_cleanup_extrems.py diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py new file mode 100644 index 000000000..6dda66ecd --- /dev/null +++ b/tests/storage/test_cleanup_extrems.py @@ -0,0 +1,248 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path + +from synapse.api.constants import EventTypes +from synapse.storage import prepare_database +from synapse.types import Requester, UserID + +from tests.unittest import HomeserverTestCase + + +class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase): + """Test the background update to clean forward extremities table. + """ + + def prepare(self, reactor, clock, homeserver): + self.store = homeserver.get_datastore() + self.event_creator = homeserver.get_event_creation_handler() + self.room_creator = homeserver.get_room_creation_handler() + + # Create a test user and room + self.user = UserID("alice", "test") + self.requester = Requester(self.user, None, False, None, None) + info = self.get_success(self.room_creator.create_room(self.requester, {})) + self.room_id = info["room_id"] + + def create_and_send_event(self, soft_failed=False, prev_event_ids=None): + """Create and send an event. + + Args: + soft_failed (bool): Whether to create a soft failed event or not + prev_event_ids (list[str]|None): Explicitly set the prev events, + or if None just use the default + + Returns: + str: The new event's ID. + """ + prev_events_and_hashes = None + if prev_event_ids: + prev_events_and_hashes = [[p, {}, 0] for p in prev_event_ids] + + event, context = self.get_success( + self.event_creator.create_event( + self.requester, + { + "type": EventTypes.Message, + "room_id": self.room_id, + "sender": self.user.to_string(), + "content": {"body": "", "msgtype": "m.text"}, + }, + prev_events_and_hashes=prev_events_and_hashes, + ) + ) + + if soft_failed: + event.internal_metadata.soft_failed = True + + self.get_success( + self.event_creator.send_nonmember_event(self.requester, event, context) + ) + + return event.event_id + + def add_extremity(self, event_id): + """Add the given event as an extremity to the room. + """ + self.get_success( + self.store._simple_insert( + table="event_forward_extremities", + values={"room_id": self.room_id, "event_id": event_id}, + desc="test_add_extremity", + ) + ) + + self.store.get_latest_event_ids_in_room.invalidate((self.room_id,)) + + def run_background_update(self): + """Re run the background update to clean up the extremities. + """ + # Make sure we don't clash with in progress updates. + self.assertTrue(self.store._all_done, "Background updates are still ongoing") + + schema_path = os.path.join( + prepare_database.dir_path, + "schema", + "delta", + "54", + "delete_forward_extremities.sql", + ) + + def run_delta_file(txn): + prepare_database.executescript(txn, schema_path) + + self.get_success( + self.store.runInteraction("test_delete_forward_extremities", run_delta_file) + ) + + # Ugh, have to reset this flag + self.store._all_done = False + + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + def test_soft_failed_extremities_handled_correctly(self): + """Test that extremities are correctly calculated in the presence of + soft failed events. + + Tests a graph like: + + A <- SF1 <- SF2 <- B + + Where SF* are soft failed. + """ + + # Create the room graph + event_id_1 = self.create_and_send_event() + event_id_2 = self.create_and_send_event(True, [event_id_1]) + event_id_3 = self.create_and_send_event(True, [event_id_2]) + event_id_4 = self.create_and_send_event(False, [event_id_3]) + + # Check the latest events are as expected + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + + self.assertEqual(latest_event_ids, [event_id_4]) + + def test_basic_cleanup(self): + """Test that extremities are correctly calculated in the presence of + soft failed events. + + Tests a graph like: + + A <- SF1 <- B + + Where SF* are soft failed, and with extremities of A and B + """ + # Create the room graph + event_id_a = self.create_and_send_event() + event_id_sf1 = self.create_and_send_event(True, [event_id_a]) + event_id_b = self.create_and_send_event(False, [event_id_sf1]) + + # Add the new extremity and check the latest events are as expected + self.add_extremity(event_id_a) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(set(latest_event_ids), set((event_id_a, event_id_b))) + + # Run the background update and check it did the right thing + self.run_background_update() + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(latest_event_ids, [event_id_b]) + + def test_chain_of_fail_cleanup(self): + """Test that extremities are correctly calculated in the presence of + soft failed events. + + Tests a graph like: + + A <- SF1 <- SF2 <- B + + Where SF* are soft failed, and with extremities of A and B + """ + # Create the room graph + event_id_a = self.create_and_send_event() + event_id_sf1 = self.create_and_send_event(True, [event_id_a]) + event_id_sf2 = self.create_and_send_event(True, [event_id_sf1]) + event_id_b = self.create_and_send_event(False, [event_id_sf2]) + + # Add the new extremity and check the latest events are as expected + self.add_extremity(event_id_a) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(set(latest_event_ids), set((event_id_a, event_id_b))) + + # Run the background update and check it did the right thing + self.run_background_update() + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(latest_event_ids, [event_id_b]) + + def test_forked_graph_cleanup(self): + r"""Test that extremities are correctly calculated in the presence of + soft failed events. + + Tests a graph like, where time flows down the page: + + A B + / \ / + / \ / + SF1 SF2 + | | + SF3 | + / \ | + | \ | + C SF4 + + Where SF* are soft failed, and with them A, B and C marked as + extremities. This should resolve to B and C being marked as extremity. + """ + # Create the room graph + event_id_a = self.create_and_send_event() + event_id_b = self.create_and_send_event() + event_id_sf1 = self.create_and_send_event(True, [event_id_a]) + event_id_sf2 = self.create_and_send_event(True, [event_id_a, event_id_b]) + event_id_sf3 = self.create_and_send_event(True, [event_id_sf1]) + self.create_and_send_event(True, [event_id_sf2, event_id_sf3]) # SF4 + event_id_c = self.create_and_send_event(False, [event_id_sf3]) + + # Add the new extremity and check the latest events are as expected + self.add_extremity(event_id_a) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual( + set(latest_event_ids), set((event_id_a, event_id_b, event_id_c)) + ) + + # Run the background update and check it did the right thing + self.run_background_update() + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(set(latest_event_ids), set([event_id_b, event_id_c])) From 9f5268388abb266260c30e9855da35a0b6b11bcb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 May 2019 18:56:02 +0100 Subject: [PATCH 12/20] Newsfile --- changelog.d/5278.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5278.bugfix diff --git a/changelog.d/5278.bugfix b/changelog.d/5278.bugfix new file mode 100644 index 000000000..9e14d2028 --- /dev/null +++ b/changelog.d/5278.bugfix @@ -0,0 +1 @@ +Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. From 9b8cd66524304f76209a59e12f4eca561b1a43d3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 10:55:55 +0100 Subject: [PATCH 13/20] Fixup comments and logging --- synapse/storage/events.py | 21 +++++++++++-------- .../delta/54/delete_forward_extremities.sql | 3 +++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index a9664928c..418d88b8d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2387,7 +2387,8 @@ class EventsStore( soft_failed_events_to_lookup = set() # First, we get `batch_size` events from the table, pulling out - # their prev events, if any, and their prev events rejection status. + # their successor events, if any, and their successor events + # rejection status. txn.execute( """SELECT prev_event_id, event_id, internal_metadata, rejections.event_id IS NOT NULL, events.outlier @@ -2450,11 +2451,10 @@ class EventsStore( if event_id in graph: # Already handled this event previously, but we still # want to record the edge. - graph.setdefault(event_id, set()).add(prev_event_id) - logger.info("Already handled") + graph[event_id].add(prev_event_id) continue - graph.setdefault(event_id, set()).add(prev_event_id) + graph[event_id] = {prev_event_id} soft_failed = json.loads(metadata).get("soft_failed") if soft_failed or rejected: @@ -2474,8 +2474,6 @@ class EventsStore( to_delete.intersection_update(original_set) - logger.info("Deleting up to %d forward extremities", len(to_delete)) - deleted = self._simple_delete_many_txn( txn=txn, table="event_forward_extremities", @@ -2484,7 +2482,11 @@ class EventsStore( keyvalues={}, ) - logger.info("Deleted %d forward extremities", deleted) + logger.info( + "Deleted %d forward extremities of %d checked, to clean up #5269", + deleted, + len(original_set), + ) if deleted: # We now need to invalidate the caches of these rooms @@ -2496,10 +2498,11 @@ class EventsStore( keyvalues={}, retcols=("room_id",) ) - for row in rows: + room_ids = set(row["room_id"] for row in rows) + for room_id in room_ids: txn.call_after( self.get_latest_event_ids_in_room.invalidate, - (row["room_id"],) + (room_id,) ) self._simple_delete_many_txn( diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/schema/delta/54/delete_forward_extremities.sql index 7056bd1d0..aa40f13da 100644 --- a/synapse/storage/schema/delta/54/delete_forward_extremities.sql +++ b/synapse/storage/schema/delta/54/delete_forward_extremities.sql @@ -13,7 +13,10 @@ * limitations under the License. */ +-- Start a background job to cleanup extremities that were incorrectly added +-- by bug #5269. INSERT INTO background_updates (update_name, progress_json) VALUES ('delete_soft_failed_extremities', '{}'); +DROP TABLE IF EXISTS _extremities_to_check; -- To make this delta schema file idempotent. CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities; From 98f438b52a93b1ce9d1f3e93fa57db0f870f9101 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 11:22:59 +0100 Subject: [PATCH 14/20] Move event background updates to a separate file --- synapse/storage/__init__.py | 2 + synapse/storage/events.py | 371 +------------------------ synapse/storage/events_bg_updates.py | 401 +++++++++++++++++++++++++++ 3 files changed, 405 insertions(+), 369 deletions(-) create mode 100644 synapse/storage/events_bg_updates.py diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 7522d3fd5..56c434d4e 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -36,6 +36,7 @@ from .engines import PostgresEngine from .event_federation import EventFederationStore from .event_push_actions import EventPushActionsStore from .events import EventsStore +from .events_bg_updates import EventsBackgroundUpdatesStore from .filtering import FilteringStore from .group_server import GroupServerStore from .keys import KeyStore @@ -65,6 +66,7 @@ logger = logging.getLogger(__name__) class DataStore( + EventsBackgroundUpdatesStore, RoomMemberStore, RoomStore, RegistrationStore, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 418d88b8d..f9162be9b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd +# Copyright 2018-2019 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -219,47 +220,11 @@ class EventsStore( EventsWorkerStore, BackgroundUpdateStore, ): - EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" - EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" - EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" def __init__(self, db_conn, hs): super(EventsStore, self).__init__(db_conn, hs) - self.register_background_update_handler( - self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts - ) - self.register_background_update_handler( - self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, - self._background_reindex_fields_sender, - ) - - self.register_background_index_update( - "event_contains_url_index", - index_name="event_contains_url_index", - table="events", - columns=["room_id", "topological_ordering", "stream_ordering"], - where_clause="contains_url = true AND outlier = false", - ) - - # an event_id index on event_search is useful for the purge_history - # api. Plus it means we get to enforce some integrity with a UNIQUE - # clause - self.register_background_index_update( - "event_search_event_id_idx", - index_name="event_search_event_id_idx", - table="event_search", - columns=["event_id"], - unique=True, - psql_only=True, - ) - - self.register_background_update_handler( - self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, - self._cleanup_extremities_bg_update, - ) self._event_persist_queue = _EventPeristenceQueue() - self._state_resolution_handler = hs.get_state_resolution_handler() @defer.inlineCallbacks @@ -1585,153 +1550,6 @@ class EventsStore( ret = yield self.runInteraction("count_daily_active_rooms", _count) defer.returnValue(ret) - @defer.inlineCallbacks - def _background_reindex_fields_sender(self, progress, batch_size): - target_min_stream_id = progress["target_min_stream_id_inclusive"] - max_stream_id = progress["max_stream_id_exclusive"] - rows_inserted = progress.get("rows_inserted", 0) - - INSERT_CLUMP_SIZE = 1000 - - def reindex_txn(txn): - sql = ( - "SELECT stream_ordering, event_id, json FROM events" - " INNER JOIN event_json USING (event_id)" - " WHERE ? <= stream_ordering AND stream_ordering < ?" - " ORDER BY stream_ordering DESC" - " LIMIT ?" - ) - - txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) - - rows = txn.fetchall() - if not rows: - return 0 - - min_stream_id = rows[-1][0] - - update_rows = [] - for row in rows: - try: - event_id = row[1] - event_json = json.loads(row[2]) - sender = event_json["sender"] - content = event_json["content"] - - contains_url = "url" in content - if contains_url: - contains_url &= isinstance(content["url"], text_type) - except (KeyError, AttributeError): - # If the event is missing a necessary field then - # skip over it. - continue - - update_rows.append((sender, contains_url, event_id)) - - sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?" - - for index in range(0, len(update_rows), INSERT_CLUMP_SIZE): - clump = update_rows[index : index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - - progress = { - "target_min_stream_id_inclusive": target_min_stream_id, - "max_stream_id_exclusive": min_stream_id, - "rows_inserted": rows_inserted + len(rows), - } - - self._background_update_progress_txn( - txn, self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, progress - ) - - return len(rows) - - result = yield self.runInteraction( - self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, reindex_txn - ) - - if not result: - yield self._end_background_update(self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME) - - defer.returnValue(result) - - @defer.inlineCallbacks - def _background_reindex_origin_server_ts(self, progress, batch_size): - target_min_stream_id = progress["target_min_stream_id_inclusive"] - max_stream_id = progress["max_stream_id_exclusive"] - rows_inserted = progress.get("rows_inserted", 0) - - INSERT_CLUMP_SIZE = 1000 - - def reindex_search_txn(txn): - sql = ( - "SELECT stream_ordering, event_id FROM events" - " WHERE ? <= stream_ordering AND stream_ordering < ?" - " ORDER BY stream_ordering DESC" - " LIMIT ?" - ) - - txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) - - rows = txn.fetchall() - if not rows: - return 0 - - min_stream_id = rows[-1][0] - event_ids = [row[1] for row in rows] - - rows_to_update = [] - - chunks = [event_ids[i : i + 100] for i in range(0, len(event_ids), 100)] - for chunk in chunks: - ev_rows = self._simple_select_many_txn( - txn, - table="event_json", - column="event_id", - iterable=chunk, - retcols=["event_id", "json"], - keyvalues={}, - ) - - for row in ev_rows: - event_id = row["event_id"] - event_json = json.loads(row["json"]) - try: - origin_server_ts = event_json["origin_server_ts"] - except (KeyError, AttributeError): - # If the event is missing a necessary field then - # skip over it. - continue - - rows_to_update.append((origin_server_ts, event_id)) - - sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?" - - for index in range(0, len(rows_to_update), INSERT_CLUMP_SIZE): - clump = rows_to_update[index : index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - - progress = { - "target_min_stream_id_inclusive": target_min_stream_id, - "max_stream_id_exclusive": min_stream_id, - "rows_inserted": rows_inserted + len(rows_to_update), - } - - self._background_update_progress_txn( - txn, self.EVENT_ORIGIN_SERVER_TS_NAME, progress - ) - - return len(rows_to_update) - - result = yield self.runInteraction( - self.EVENT_ORIGIN_SERVER_TS_NAME, reindex_search_txn - ) - - if not result: - yield self._end_background_update(self.EVENT_ORIGIN_SERVER_TS_NAME) - - defer.returnValue(result) - def get_current_backfill_token(self): """The current minimum token that backfilled events have reached""" return -self._backfill_id_gen.get_current_token() @@ -2347,191 +2165,6 @@ class EventsStore( get_all_updated_current_state_deltas_txn, ) - @defer.inlineCallbacks - def _cleanup_extremities_bg_update(self, progress, batch_size): - """Background update to clean out extremities that should have been - deleted previously. - - Mainly used to deal with the aftermath of #5269. - """ - - # This works by first copying all existing forward extremities into the - # `_extremities_to_check` table at start up, and then checking each - # event in that table whether we have any descendants that are not - # soft-failed/rejected. If that is the case then we delete that event - # from the forward extremities table. - # - # For efficiency, we do this in batches by recursively pulling out all - # descendants of a batch until we find the non soft-failed/rejected - # events, i.e. the set of descendants whose chain of prev events back - # to the batch of extremities are all soft-failed or rejected. - # Typically, we won't find any such events as extremities will rarely - # have any descendants, but if they do then we should delete those - # extremities. - - def _cleanup_extremities_bg_update_txn(txn): - # The set of extremity event IDs that we're checking this round - original_set = set() - - # A dict[str, set[str]] of event ID to their prev events. - graph = {} - - # The set of descendants of the original set that are not rejected - # nor soft-failed. Ancestors of these events should be removed - # from the forward extremities table. - non_rejected_leaves = set() - - # Set of event IDs that have been soft failed, and for which we - # should check if they have descendants which haven't been soft - # failed. - soft_failed_events_to_lookup = set() - - # First, we get `batch_size` events from the table, pulling out - # their successor events, if any, and their successor events - # rejection status. - txn.execute( - """SELECT prev_event_id, event_id, internal_metadata, - rejections.event_id IS NOT NULL, events.outlier - FROM ( - SELECT event_id AS prev_event_id - FROM _extremities_to_check - LIMIT ? - ) AS f - LEFT JOIN event_edges USING (prev_event_id) - LEFT JOIN events USING (event_id) - LEFT JOIN event_json USING (event_id) - LEFT JOIN rejections USING (event_id) - """, (batch_size,) - ) - - for prev_event_id, event_id, metadata, rejected, outlier in txn: - original_set.add(prev_event_id) - - if not event_id or outlier: - # Common case where the forward extremity doesn't have any - # descendants. - continue - - graph.setdefault(event_id, set()).add(prev_event_id) - - soft_failed = False - if metadata: - soft_failed = json.loads(metadata).get("soft_failed") - - if soft_failed or rejected: - soft_failed_events_to_lookup.add(event_id) - else: - non_rejected_leaves.add(event_id) - - # Now we recursively check all the soft-failed descendants we - # found above in the same way, until we have nothing left to - # check. - while soft_failed_events_to_lookup: - # We only want to do 100 at a time, so we split given list - # into two. - batch = list(soft_failed_events_to_lookup) - to_check, to_defer = batch[:100], batch[100:] - soft_failed_events_to_lookup = set(to_defer) - - sql = """SELECT prev_event_id, event_id, internal_metadata, - rejections.event_id IS NOT NULL - FROM event_edges - INNER JOIN events USING (event_id) - INNER JOIN event_json USING (event_id) - LEFT JOIN rejections USING (event_id) - WHERE - prev_event_id IN (%s) - AND NOT events.outlier - """ % ( - ",".join("?" for _ in to_check), - ) - txn.execute(sql, to_check) - - for prev_event_id, event_id, metadata, rejected in txn: - if event_id in graph: - # Already handled this event previously, but we still - # want to record the edge. - graph[event_id].add(prev_event_id) - continue - - graph[event_id] = {prev_event_id} - - soft_failed = json.loads(metadata).get("soft_failed") - if soft_failed or rejected: - soft_failed_events_to_lookup.add(event_id) - else: - non_rejected_leaves.add(event_id) - - # We have a set of non-soft-failed descendants, so we recurse up - # the graph to find all ancestors and add them to the set of event - # IDs that we can delete from forward extremities table. - to_delete = set() - while non_rejected_leaves: - event_id = non_rejected_leaves.pop() - prev_event_ids = graph.get(event_id, set()) - non_rejected_leaves.update(prev_event_ids) - to_delete.update(prev_event_ids) - - to_delete.intersection_update(original_set) - - deleted = self._simple_delete_many_txn( - txn=txn, - table="event_forward_extremities", - column="event_id", - iterable=to_delete, - keyvalues={}, - ) - - logger.info( - "Deleted %d forward extremities of %d checked, to clean up #5269", - deleted, - len(original_set), - ) - - if deleted: - # We now need to invalidate the caches of these rooms - rows = self._simple_select_many_txn( - txn, - table="events", - column="event_id", - iterable=to_delete, - keyvalues={}, - retcols=("room_id",) - ) - room_ids = set(row["room_id"] for row in rows) - for room_id in room_ids: - txn.call_after( - self.get_latest_event_ids_in_room.invalidate, - (room_id,) - ) - - self._simple_delete_many_txn( - txn=txn, - table="_extremities_to_check", - column="event_id", - iterable=original_set, - keyvalues={}, - ) - - return len(original_set) - - num_handled = yield self.runInteraction( - "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn, - ) - - if not num_handled: - yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) - - def _drop_table_txn(txn): - txn.execute("DROP TABLE _extremities_to_check") - - yield self.runInteraction( - "_cleanup_extremities_bg_update_drop_table", - _drop_table_txn, - ) - - defer.returnValue(num_handled) - AllNewEventsResult = namedtuple( "AllNewEventsResult", diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py new file mode 100644 index 000000000..2eba106ab --- /dev/null +++ b/synapse/storage/events_bg_updates.py @@ -0,0 +1,401 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from six import text_type + +from canonicaljson import json + +from twisted.internet import defer + +from synapse.storage.background_updates import BackgroundUpdateStore + +logger = logging.getLogger(__name__) + + +class EventsBackgroundUpdatesStore(BackgroundUpdateStore): + + EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" + EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" + EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" + + def __init__(self, db_conn, hs): + super(EventsBackgroundUpdatesStore, self).__init__(db_conn, hs) + + self.register_background_update_handler( + self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts + ) + self.register_background_update_handler( + self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, + self._background_reindex_fields_sender, + ) + + self.register_background_index_update( + "event_contains_url_index", + index_name="event_contains_url_index", + table="events", + columns=["room_id", "topological_ordering", "stream_ordering"], + where_clause="contains_url = true AND outlier = false", + ) + + # an event_id index on event_search is useful for the purge_history + # api. Plus it means we get to enforce some integrity with a UNIQUE + # clause + self.register_background_index_update( + "event_search_event_id_idx", + index_name="event_search_event_id_idx", + table="event_search", + columns=["event_id"], + unique=True, + psql_only=True, + ) + + self.register_background_update_handler( + self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, + self._cleanup_extremities_bg_update, + ) + + @defer.inlineCallbacks + def _background_reindex_fields_sender(self, progress, batch_size): + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] + rows_inserted = progress.get("rows_inserted", 0) + + INSERT_CLUMP_SIZE = 1000 + + def reindex_txn(txn): + sql = ( + "SELECT stream_ordering, event_id, json FROM events" + " INNER JOIN event_json USING (event_id)" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " ORDER BY stream_ordering DESC" + " LIMIT ?" + ) + + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + min_stream_id = rows[-1][0] + + update_rows = [] + for row in rows: + try: + event_id = row[1] + event_json = json.loads(row[2]) + sender = event_json["sender"] + content = event_json["content"] + + contains_url = "url" in content + if contains_url: + contains_url &= isinstance(content["url"], text_type) + except (KeyError, AttributeError): + # If the event is missing a necessary field then + # skip over it. + continue + + update_rows.append((sender, contains_url, event_id)) + + sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?" + + for index in range(0, len(update_rows), INSERT_CLUMP_SIZE): + clump = update_rows[index : index + INSERT_CLUMP_SIZE] + txn.executemany(sql, clump) + + progress = { + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, + "rows_inserted": rows_inserted + len(rows), + } + + self._background_update_progress_txn( + txn, self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, progress + ) + + return len(rows) + + result = yield self.runInteraction( + self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, reindex_txn + ) + + if not result: + yield self._end_background_update(self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME) + + defer.returnValue(result) + + @defer.inlineCallbacks + def _background_reindex_origin_server_ts(self, progress, batch_size): + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] + rows_inserted = progress.get("rows_inserted", 0) + + INSERT_CLUMP_SIZE = 1000 + + def reindex_search_txn(txn): + sql = ( + "SELECT stream_ordering, event_id FROM events" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " ORDER BY stream_ordering DESC" + " LIMIT ?" + ) + + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + min_stream_id = rows[-1][0] + event_ids = [row[1] for row in rows] + + rows_to_update = [] + + chunks = [event_ids[i : i + 100] for i in range(0, len(event_ids), 100)] + for chunk in chunks: + ev_rows = self._simple_select_many_txn( + txn, + table="event_json", + column="event_id", + iterable=chunk, + retcols=["event_id", "json"], + keyvalues={}, + ) + + for row in ev_rows: + event_id = row["event_id"] + event_json = json.loads(row["json"]) + try: + origin_server_ts = event_json["origin_server_ts"] + except (KeyError, AttributeError): + # If the event is missing a necessary field then + # skip over it. + continue + + rows_to_update.append((origin_server_ts, event_id)) + + sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?" + + for index in range(0, len(rows_to_update), INSERT_CLUMP_SIZE): + clump = rows_to_update[index : index + INSERT_CLUMP_SIZE] + txn.executemany(sql, clump) + + progress = { + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, + "rows_inserted": rows_inserted + len(rows_to_update), + } + + self._background_update_progress_txn( + txn, self.EVENT_ORIGIN_SERVER_TS_NAME, progress + ) + + return len(rows_to_update) + + result = yield self.runInteraction( + self.EVENT_ORIGIN_SERVER_TS_NAME, reindex_search_txn + ) + + if not result: + yield self._end_background_update(self.EVENT_ORIGIN_SERVER_TS_NAME) + + defer.returnValue(result) + + @defer.inlineCallbacks + def _cleanup_extremities_bg_update(self, progress, batch_size): + """Background update to clean out extremities that should have been + deleted previously. + + Mainly used to deal with the aftermath of #5269. + """ + + # This works by first copying all existing forward extremities into the + # `_extremities_to_check` table at start up, and then checking each + # event in that table whether we have any descendants that are not + # soft-failed/rejected. If that is the case then we delete that event + # from the forward extremities table. + # + # For efficiency, we do this in batches by recursively pulling out all + # descendants of a batch until we find the non soft-failed/rejected + # events, i.e. the set of descendants whose chain of prev events back + # to the batch of extremities are all soft-failed or rejected. + # Typically, we won't find any such events as extremities will rarely + # have any descendants, but if they do then we should delete those + # extremities. + + def _cleanup_extremities_bg_update_txn(txn): + # The set of extremity event IDs that we're checking this round + original_set = set() + + # A dict[str, set[str]] of event ID to their prev events. + graph = {} + + # The set of descendants of the original set that are not rejected + # nor soft-failed. Ancestors of these events should be removed + # from the forward extremities table. + non_rejected_leaves = set() + + # Set of event IDs that have been soft failed, and for which we + # should check if they have descendants which haven't been soft + # failed. + soft_failed_events_to_lookup = set() + + # First, we get `batch_size` events from the table, pulling out + # their successor events, if any, and their successor events + # rejection status. + txn.execute( + """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL, events.outlier + FROM ( + SELECT event_id AS prev_event_id + FROM _extremities_to_check + LIMIT ? + ) AS f + LEFT JOIN event_edges USING (prev_event_id) + LEFT JOIN events USING (event_id) + LEFT JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + """, (batch_size,) + ) + + for prev_event_id, event_id, metadata, rejected, outlier in txn: + original_set.add(prev_event_id) + + if not event_id or outlier: + # Common case where the forward extremity doesn't have any + # descendants. + continue + + graph.setdefault(event_id, set()).add(prev_event_id) + + soft_failed = False + if metadata: + soft_failed = json.loads(metadata).get("soft_failed") + + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # Now we recursively check all the soft-failed descendants we + # found above in the same way, until we have nothing left to + # check. + while soft_failed_events_to_lookup: + # We only want to do 100 at a time, so we split given list + # into two. + batch = list(soft_failed_events_to_lookup) + to_check, to_defer = batch[:100], batch[100:] + soft_failed_events_to_lookup = set(to_defer) + + sql = """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL + FROM event_edges + INNER JOIN events USING (event_id) + INNER JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + WHERE + prev_event_id IN (%s) + AND NOT events.outlier + """ % ( + ",".join("?" for _ in to_check), + ) + txn.execute(sql, to_check) + + for prev_event_id, event_id, metadata, rejected in txn: + if event_id in graph: + # Already handled this event previously, but we still + # want to record the edge. + graph[event_id].add(prev_event_id) + continue + + graph[event_id] = {prev_event_id} + + soft_failed = json.loads(metadata).get("soft_failed") + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # We have a set of non-soft-failed descendants, so we recurse up + # the graph to find all ancestors and add them to the set of event + # IDs that we can delete from forward extremities table. + to_delete = set() + while non_rejected_leaves: + event_id = non_rejected_leaves.pop() + prev_event_ids = graph.get(event_id, set()) + non_rejected_leaves.update(prev_event_ids) + to_delete.update(prev_event_ids) + + to_delete.intersection_update(original_set) + + deleted = self._simple_delete_many_txn( + txn=txn, + table="event_forward_extremities", + column="event_id", + iterable=to_delete, + keyvalues={}, + ) + + logger.info( + "Deleted %d forward extremities of %d checked, to clean up #5269", + deleted, + len(original_set), + ) + + if deleted: + # We now need to invalidate the caches of these rooms + rows = self._simple_select_many_txn( + txn, + table="events", + column="event_id", + iterable=to_delete, + keyvalues={}, + retcols=("room_id",) + ) + room_ids = set(row["room_id"] for row in rows) + for room_id in room_ids: + txn.call_after( + self.get_latest_event_ids_in_room.invalidate, + (room_id,) + ) + + self._simple_delete_many_txn( + txn=txn, + table="_extremities_to_check", + column="event_id", + iterable=original_set, + keyvalues={}, + ) + + return len(original_set) + + num_handled = yield self.runInteraction( + "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn, + ) + + if not num_handled: + yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) + + def _drop_table_txn(txn): + txn.execute("DROP TABLE _extremities_to_check") + + yield self.runInteraction( + "_cleanup_extremities_bg_update_drop_table", + _drop_table_txn, + ) + + defer.returnValue(num_handled) From 7386c35f58f360269df1410b2d1ec6d179081b32 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 11:24:42 +0100 Subject: [PATCH 15/20] Rename constant --- synapse/storage/events_bg_updates.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 2eba106ab..22aac1393 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -30,7 +30,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" - EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" + DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" def __init__(self, db_conn, hs): super(EventsBackgroundUpdatesStore, self).__init__(db_conn, hs) @@ -64,7 +64,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ) self.register_background_update_handler( - self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, + self.DELETE_SOFT_FAILED_EXTREMITIES, self._cleanup_extremities_bg_update, ) @@ -388,7 +388,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ) if not num_handled: - yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) + yield self._end_background_update(self.DELETE_SOFT_FAILED_EXTREMITIES) def _drop_table_txn(txn): txn.execute("DROP TABLE _extremities_to_check") From 06eb408da5c4ab52a3072dc6d76fe5ac3b9b1e83 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 14:06:42 +0100 Subject: [PATCH 16/20] Update synapse/storage/events_bg_updates.py Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 22aac1393..75c1935bf 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -255,7 +255,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): soft_failed_events_to_lookup = set() # First, we get `batch_size` events from the table, pulling out - # their successor events, if any, and their successor events + # their successor events, if any, and the successor events' # rejection status. txn.execute( """SELECT prev_event_id, event_id, internal_metadata, From e2c3660a0ffb13d4198893e91a90ae1abcad8915 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 14:54:56 +0100 Subject: [PATCH 17/20] Add index to temp table --- synapse/storage/schema/delta/54/delete_forward_extremities.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/schema/delta/54/delete_forward_extremities.sql index aa40f13da..b062ec840 100644 --- a/synapse/storage/schema/delta/54/delete_forward_extremities.sql +++ b/synapse/storage/schema/delta/54/delete_forward_extremities.sql @@ -20,3 +20,4 @@ INSERT INTO background_updates (update_name, progress_json) VALUES DROP TABLE IF EXISTS _extremities_to_check; -- To make this delta schema file idempotent. CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities; +CREATE INDEX _extremities_to_check_id ON _extremities_to_check(event_id); From f5c7f90d7278f199523457ade9587d284b3ce39b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:05:26 +0100 Subject: [PATCH 18/20] Newsfile --- changelog.d/5291.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5291.bugfix diff --git a/changelog.d/5291.bugfix b/changelog.d/5291.bugfix new file mode 100644 index 000000000..9e14d2028 --- /dev/null +++ b/changelog.d/5291.bugfix @@ -0,0 +1 @@ +Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. From 9315802221ede86def56986d5c5303f649cd2fa9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 30 May 2019 16:28:02 +0100 Subject: [PATCH 19/20] fix changelog for 0.99.5.1 (#5270) --- CHANGES.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 6bdfdd6d7..350151b62 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,7 @@ Synapse 0.99.5.1 (2019-05-22) ============================= -No significant changes. - +0.99.5.1 supersedes 0.99.5 due to malformed debian changelog - no functional changes. Synapse 0.99.5 (2019-05-22) =========================== From c831748f4d243d74e9a3fd2042bc2b35cc30f961 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 16:28:52 +0100 Subject: [PATCH 20/20] 0.99.5.2 --- CHANGES.md | 9 +++++++++ changelog.d/5274.bugfix | 1 - changelog.d/5278.bugfix | 1 - changelog.d/5291.bugfix | 1 - debian/changelog | 6 ++++++ synapse/__init__.py | 2 +- 6 files changed, 16 insertions(+), 4 deletions(-) delete mode 100644 changelog.d/5274.bugfix delete mode 100644 changelog.d/5278.bugfix delete mode 100644 changelog.d/5291.bugfix diff --git a/CHANGES.md b/CHANGES.md index 350151b62..0ffdf1aae 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +Synapse 0.99.5.2 (2019-05-30) +============================= + +Bugfixes +-------- + +- Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. ([\#5274](https://github.com/matrix-org/synapse/issues/5274), [\#5278](https://github.com/matrix-org/synapse/issues/5278), [\#5291](https://github.com/matrix-org/synapse/issues/5291)) + + Synapse 0.99.5.1 (2019-05-22) ============================= diff --git a/changelog.d/5274.bugfix b/changelog.d/5274.bugfix deleted file mode 100644 index 9e14d2028..000000000 --- a/changelog.d/5274.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. diff --git a/changelog.d/5278.bugfix b/changelog.d/5278.bugfix deleted file mode 100644 index 9e14d2028..000000000 --- a/changelog.d/5278.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. diff --git a/changelog.d/5291.bugfix b/changelog.d/5291.bugfix deleted file mode 100644 index 9e14d2028..000000000 --- a/changelog.d/5291.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. diff --git a/debian/changelog b/debian/changelog index 90c6b86c5..6a1a72c0e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (0.99.5.2) stable; urgency=medium + + * New synapse release 0.99.5.2. + + -- Synapse Packaging team Thu, 30 May 2019 16:28:07 +0100 + matrix-synapse-py3 (0.99.5.1) stable; urgency=medium * New synapse release 0.99.5.1. diff --git a/synapse/__init__.py b/synapse/__init__.py index 4f95778ee..d0e8d7c21 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -27,4 +27,4 @@ try: except ImportError: pass -__version__ = "0.99.5.1" +__version__ = "0.99.5.2"