Analyze user_ips before running deduplication

Due to the table locks taken out by the naive upsert, the table
statistics may be out of date. During deduplication it is important that
the correct index is used as otherwise a full table scan may be
incorrectly used, which can end up thrashing the database badly.
This commit is contained in:
Erik Johnston 2019-02-12 11:55:27 +00:00
parent 46b8a79b3a
commit 483ba85c7a
2 changed files with 31 additions and 3 deletions

View File

@ -65,6 +65,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
columns=["last_seen"], columns=["last_seen"],
) )
self.register_background_update_handler(
"user_ips_analyze",
self._analyze_user_ip,
)
self.register_background_update_handler( self.register_background_update_handler(
"user_ips_remove_dupes", "user_ips_remove_dupes",
self._remove_user_ip_dupes, self._remove_user_ip_dupes,
@ -108,6 +113,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
yield self._end_background_update("user_ips_drop_nonunique_index") yield self._end_background_update("user_ips_drop_nonunique_index")
defer.returnValue(1) defer.returnValue(1)
@defer.inlineCallbacks
def _analyze_user_ip(self, progress, batch_size):
# Background update to analyze user_ips table before we run the
# deduplication background update. The table may not have been analyzed
# for ages due to the table locks.
#
# This will lock out the naive upserts to user_ips while it happens, but
# the analyze should be quick (28GB table takes ~10s)
def user_ips_analyze(txn):
txn.execute("ANALYZE user_ips")
end_last_seen = yield self.runInteraction(
"user_ips_analyze", user_ips_analyze
)
yield self._end_background_update("user_ips_analyze")
defer.returnValue(1)
@defer.inlineCallbacks @defer.inlineCallbacks
def _remove_user_ip_dupes(self, progress, batch_size): def _remove_user_ip_dupes(self, progress, batch_size):
# This works function works by scanning the user_ips table in batches # This works function works by scanning the user_ips table in batches

View File

@ -13,9 +13,13 @@
* limitations under the License. * limitations under the License.
*/ */
-- delete duplicates -- analyze user_ips, to help ensure the correct indices are used
INSERT INTO background_updates (update_name, progress_json) VALUES INSERT INTO background_updates (update_name, progress_json) VALUES
('user_ips_remove_dupes', '{}'); ('user_ips_analyze', '{}');
-- delete duplicates
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('user_ips_remove_dupes', '{}', 'user_ips_analyze');
-- add a new unique index to user_ips table -- add a new unique index to user_ips table
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
@ -23,4 +27,4 @@ INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
-- drop the old original index -- drop the old original index
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index');