Use execute_batch in more places (#9188)

* Use execute_batch in more places

* Newsfile
This commit is contained in:
Erik Johnston 2021-01-21 14:44:12 +00:00 committed by GitHub
parent c55e62548c
commit 7a43482f19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 26 additions and 31 deletions

1
changelog.d/9188.misc Normal file
View File

@ -0,0 +1 @@
Speed up batch insertion when using PostgreSQL.

View File

@ -262,6 +262,12 @@ class LoggingTransaction:
return self.txn.description return self.txn.description
def execute_batch(self, sql: str, args: Iterable[Iterable[Any]]) -> None: def execute_batch(self, sql: str, args: Iterable[Iterable[Any]]) -> None:
"""Similar to `executemany`, except `txn.rowcount` will not be correct
afterwards.
More efficient than `executemany` on PostgreSQL
"""
if isinstance(self.database_engine, PostgresEngine): if isinstance(self.database_engine, PostgresEngine):
from psycopg2.extras import execute_batch # type: ignore from psycopg2.extras import execute_batch # type: ignore

View File

@ -897,7 +897,7 @@ class DeviceWorkerStore(SQLBaseStore):
DELETE FROM device_lists_outbound_last_success DELETE FROM device_lists_outbound_last_success
WHERE destination = ? AND user_id = ? WHERE destination = ? AND user_id = ?
""" """
txn.executemany(sql, ((row[0], row[1]) for row in rows)) txn.execute_batch(sql, ((row[0], row[1]) for row in rows))
logger.info("Pruned %d device list outbound pokes", count) logger.info("Pruned %d device list outbound pokes", count)
@ -1343,7 +1343,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
# Delete older entries in the table, as we really only care about # Delete older entries in the table, as we really only care about
# when the latest change happened. # when the latest change happened.
txn.executemany( txn.execute_batch(
""" """
DELETE FROM device_lists_stream DELETE FROM device_lists_stream
WHERE user_id = ? AND device_id = ? AND stream_id < ? WHERE user_id = ? AND device_id = ? AND stream_id < ?

View File

@ -487,7 +487,7 @@ class EventPushActionsWorkerStore(SQLBaseStore):
VALUES (?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?)
""" """
txn.executemany( txn.execute_batch(
sql, sql,
( (
_gen_entry(user_id, actions) _gen_entry(user_id, actions)
@ -803,7 +803,7 @@ class EventPushActionsWorkerStore(SQLBaseStore):
], ],
) )
txn.executemany( txn.execute_batch(
""" """
UPDATE event_push_summary UPDATE event_push_summary
SET notif_count = ?, unread_count = ?, stream_ordering = ? SET notif_count = ?, unread_count = ?, stream_ordering = ?

View File

@ -139,8 +139,6 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
max_stream_id = progress["max_stream_id_exclusive"] max_stream_id = progress["max_stream_id_exclusive"]
rows_inserted = progress.get("rows_inserted", 0) rows_inserted = progress.get("rows_inserted", 0)
INSERT_CLUMP_SIZE = 1000
def reindex_txn(txn): def reindex_txn(txn):
sql = ( sql = (
"SELECT stream_ordering, event_id, json FROM events" "SELECT stream_ordering, event_id, json FROM events"
@ -178,9 +176,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?" sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?"
for index in range(0, len(update_rows), INSERT_CLUMP_SIZE): txn.execute_batch(sql, update_rows)
clump = update_rows[index : index + INSERT_CLUMP_SIZE]
txn.executemany(sql, clump)
progress = { progress = {
"target_min_stream_id_inclusive": target_min_stream_id, "target_min_stream_id_inclusive": target_min_stream_id,
@ -210,8 +206,6 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
max_stream_id = progress["max_stream_id_exclusive"] max_stream_id = progress["max_stream_id_exclusive"]
rows_inserted = progress.get("rows_inserted", 0) rows_inserted = progress.get("rows_inserted", 0)
INSERT_CLUMP_SIZE = 1000
def reindex_search_txn(txn): def reindex_search_txn(txn):
sql = ( sql = (
"SELECT stream_ordering, event_id FROM events" "SELECT stream_ordering, event_id FROM events"
@ -256,9 +250,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?" sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?"
for index in range(0, len(rows_to_update), INSERT_CLUMP_SIZE): txn.execute_batch(sql, rows_to_update)
clump = rows_to_update[index : index + INSERT_CLUMP_SIZE]
txn.executemany(sql, clump)
progress = { progress = {
"target_min_stream_id_inclusive": target_min_stream_id, "target_min_stream_id_inclusive": target_min_stream_id,

View File

@ -417,7 +417,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
" WHERE media_origin = ? AND media_id = ?" " WHERE media_origin = ? AND media_id = ?"
) )
txn.executemany( txn.execute_batch(
sql, sql,
( (
(time_ms, media_origin, media_id) (time_ms, media_origin, media_id)
@ -430,7 +430,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
" WHERE media_id = ?" " WHERE media_id = ?"
) )
txn.executemany(sql, ((time_ms, media_id) for media_id in local_media)) txn.execute_batch(sql, ((time_ms, media_id) for media_id in local_media))
return await self.db_pool.runInteraction( return await self.db_pool.runInteraction(
"update_cached_last_access_time", update_cache_txn "update_cached_last_access_time", update_cache_txn
@ -557,7 +557,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
sql = "DELETE FROM local_media_repository_url_cache WHERE media_id = ?" sql = "DELETE FROM local_media_repository_url_cache WHERE media_id = ?"
def _delete_url_cache_txn(txn): def _delete_url_cache_txn(txn):
txn.executemany(sql, [(media_id,) for media_id in media_ids]) txn.execute_batch(sql, [(media_id,) for media_id in media_ids])
return await self.db_pool.runInteraction( return await self.db_pool.runInteraction(
"delete_url_cache", _delete_url_cache_txn "delete_url_cache", _delete_url_cache_txn
@ -586,11 +586,11 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
def _delete_url_cache_media_txn(txn): def _delete_url_cache_media_txn(txn):
sql = "DELETE FROM local_media_repository WHERE media_id = ?" sql = "DELETE FROM local_media_repository WHERE media_id = ?"
txn.executemany(sql, [(media_id,) for media_id in media_ids]) txn.execute_batch(sql, [(media_id,) for media_id in media_ids])
sql = "DELETE FROM local_media_repository_thumbnails WHERE media_id = ?" sql = "DELETE FROM local_media_repository_thumbnails WHERE media_id = ?"
txn.executemany(sql, [(media_id,) for media_id in media_ids]) txn.execute_batch(sql, [(media_id,) for media_id in media_ids])
return await self.db_pool.runInteraction( return await self.db_pool.runInteraction(
"delete_url_cache_media", _delete_url_cache_media_txn "delete_url_cache_media", _delete_url_cache_media_txn

View File

@ -172,7 +172,7 @@ class PurgeEventsStore(StateGroupWorkerStore, SQLBaseStore):
) )
# Update backward extremeties # Update backward extremeties
txn.executemany( txn.execute_batch(
"INSERT INTO event_backward_extremities (room_id, event_id)" "INSERT INTO event_backward_extremities (room_id, event_id)"
" VALUES (?, ?)", " VALUES (?, ?)",
[(room_id, event_id) for event_id, in new_backwards_extrems], [(room_id, event_id) for event_id, in new_backwards_extrems],

View File

@ -1104,7 +1104,7 @@ class RegistrationBackgroundUpdateStore(RegistrationWorkerStore):
FROM user_threepids FROM user_threepids
""" """
txn.executemany(sql, [(id_server,) for id_server in id_servers]) txn.execute_batch(sql, [(id_server,) for id_server in id_servers])
if id_servers: if id_servers:
await self.db_pool.runInteraction( await self.db_pool.runInteraction(

View File

@ -873,8 +873,6 @@ class RoomMemberBackgroundUpdateStore(SQLBaseStore):
"max_stream_id_exclusive", self._stream_order_on_start + 1 "max_stream_id_exclusive", self._stream_order_on_start + 1
) )
INSERT_CLUMP_SIZE = 1000
def add_membership_profile_txn(txn): def add_membership_profile_txn(txn):
sql = """ sql = """
SELECT stream_ordering, event_id, events.room_id, event_json.json SELECT stream_ordering, event_id, events.room_id, event_json.json
@ -915,9 +913,7 @@ class RoomMemberBackgroundUpdateStore(SQLBaseStore):
UPDATE room_memberships SET display_name = ?, avatar_url = ? UPDATE room_memberships SET display_name = ?, avatar_url = ?
WHERE event_id = ? AND room_id = ? WHERE event_id = ? AND room_id = ?
""" """
for index in range(0, len(to_update), INSERT_CLUMP_SIZE): txn.execute_batch(to_update_sql, to_update)
clump = to_update[index : index + INSERT_CLUMP_SIZE]
txn.executemany(to_update_sql, clump)
progress = { progress = {
"target_min_stream_id_inclusive": target_min_stream_id, "target_min_stream_id_inclusive": target_min_stream_id,

View File

@ -55,7 +55,7 @@ def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs
# { "ignored_users": "@someone:example.org": {} } # { "ignored_users": "@someone:example.org": {} }
ignored_users = content.get("ignored_users", {}) ignored_users = content.get("ignored_users", {})
if isinstance(ignored_users, dict) and ignored_users: if isinstance(ignored_users, dict) and ignored_users:
cur.executemany(insert_sql, [(user_id, u) for u in ignored_users]) cur.execute_batch(insert_sql, [(user_id, u) for u in ignored_users])
# Add indexes after inserting data for efficiency. # Add indexes after inserting data for efficiency.
logger.info("Adding constraints to ignored_users table") logger.info("Adding constraints to ignored_users table")

View File

@ -63,7 +63,7 @@ class SearchWorkerStore(SQLBaseStore):
for entry in entries for entry in entries
) )
txn.executemany(sql, args) txn.execute_batch(sql, args)
elif isinstance(self.database_engine, Sqlite3Engine): elif isinstance(self.database_engine, Sqlite3Engine):
sql = ( sql = (
@ -75,7 +75,7 @@ class SearchWorkerStore(SQLBaseStore):
for entry in entries for entry in entries
) )
txn.executemany(sql, args) txn.execute_batch(sql, args)
else: else:
# This should be unreachable. # This should be unreachable.
raise Exception("Unrecognized database engine") raise Exception("Unrecognized database engine")

View File

@ -565,11 +565,11 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
) )
logger.info("[purge] removing redundant state groups") logger.info("[purge] removing redundant state groups")
txn.executemany( txn.execute_batch(
"DELETE FROM state_groups_state WHERE state_group = ?", "DELETE FROM state_groups_state WHERE state_group = ?",
((sg,) for sg in state_groups_to_delete), ((sg,) for sg in state_groups_to_delete),
) )
txn.executemany( txn.execute_batch(
"DELETE FROM state_groups WHERE id = ?", "DELETE FROM state_groups WHERE id = ?",
((sg,) for sg in state_groups_to_delete), ((sg,) for sg in state_groups_to_delete),
) )