From 483ba85c7a1a8ee9b7eebcc5c07d522c71229c9f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 12 Feb 2019 11:55:27 +0000 Subject: [PATCH 1/3] Analyze user_ips before running deduplication Due to the table locks taken out by the naive upsert, the table statistics may be out of date. During deduplication it is important that the correct index is used as otherwise a full table scan may be incorrectly used, which can end up thrashing the database badly. --- synapse/storage/client_ips.py | 24 +++++++++++++++++++ .../schema/delta/53/user_ips_index.sql | 10 +++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 091d7116c..6f8140626 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -65,6 +65,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): columns=["last_seen"], ) + self.register_background_update_handler( + "user_ips_analyze", + self._analyze_user_ip, + ) + self.register_background_update_handler( "user_ips_remove_dupes", self._remove_user_ip_dupes, @@ -108,6 +113,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): yield self._end_background_update("user_ips_drop_nonunique_index") defer.returnValue(1) + @defer.inlineCallbacks + def _analyze_user_ip(self, progress, batch_size): + # Background update to analyze user_ips table before we run the + # deduplication background update. The table may not have been analyzed + # for ages due to the table locks. + # + # This will lock out the naive upserts to user_ips while it happens, but + # the analyze should be quick (28GB table takes ~10s) + def user_ips_analyze(txn): + txn.execute("ANALYZE user_ips") + + end_last_seen = yield self.runInteraction( + "user_ips_analyze", user_ips_analyze + ) + + yield self._end_background_update("user_ips_analyze") + + defer.returnValue(1) + @defer.inlineCallbacks def _remove_user_ip_dupes(self, progress, batch_size): # This works function works by scanning the user_ips table in batches diff --git a/synapse/storage/schema/delta/53/user_ips_index.sql b/synapse/storage/schema/delta/53/user_ips_index.sql index 4ca346c11..b812c5794 100644 --- a/synapse/storage/schema/delta/53/user_ips_index.sql +++ b/synapse/storage/schema/delta/53/user_ips_index.sql @@ -13,9 +13,13 @@ * limitations under the License. */ --- delete duplicates + -- analyze user_ips, to help ensure the correct indices are used INSERT INTO background_updates (update_name, progress_json) VALUES - ('user_ips_remove_dupes', '{}'); + ('user_ips_analyze', '{}'); + +-- delete duplicates +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('user_ips_remove_dupes', '{}', 'user_ips_analyze'); -- add a new unique index to user_ips table INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES @@ -23,4 +27,4 @@ INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES -- drop the old original index INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); \ No newline at end of file + ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); From b2327eb9cb2c03037acd58b320063ec34968ea81 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 12 Feb 2019 11:58:36 +0000 Subject: [PATCH 2/3] Newsfile --- changelog.d/4627.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/4627.misc diff --git a/changelog.d/4627.misc b/changelog.d/4627.misc new file mode 100644 index 000000000..f1a57dcf9 --- /dev/null +++ b/changelog.d/4627.misc @@ -0,0 +1 @@ +Improve 'user_ips' table deduplication background update From 495ea92350c4a3f6bd92cded5da939326b858d9b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 12 Feb 2019 12:40:42 +0000 Subject: [PATCH 3/3] Fix pep8 --- synapse/storage/client_ips.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 6f8140626..cc23d7cdb 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -124,7 +124,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): def user_ips_analyze(txn): txn.execute("ANALYZE user_ips") - end_last_seen = yield self.runInteraction( + yield self.runInteraction( "user_ips_analyze", user_ips_analyze )