mirror of
https://mau.dev/maunium/synapse.git
synced 2024-10-01 05:36:05 +00:00
Speed up rebuilding of the user directory for local users (#15529)
The idea here is to batch up the work.
This commit is contained in:
parent
9890f23469
commit
fc3a878220
1
changelog.d/15529.misc
Normal file
1
changelog.d/15529.misc
Normal file
@ -0,0 +1 @@
|
|||||||
|
Speed up rebuilding of the user directory for local users.
|
@ -386,13 +386,20 @@ class LoggingTransaction:
|
|||||||
self.executemany(sql, args)
|
self.executemany(sql, args)
|
||||||
|
|
||||||
def execute_values(
|
def execute_values(
|
||||||
self, sql: str, values: Iterable[Iterable[Any]], fetch: bool = True
|
self,
|
||||||
|
sql: str,
|
||||||
|
values: Iterable[Iterable[Any]],
|
||||||
|
template: Optional[str] = None,
|
||||||
|
fetch: bool = True,
|
||||||
) -> List[Tuple]:
|
) -> List[Tuple]:
|
||||||
"""Corresponds to psycopg2.extras.execute_values. Only available when
|
"""Corresponds to psycopg2.extras.execute_values. Only available when
|
||||||
using postgres.
|
using postgres.
|
||||||
|
|
||||||
The `fetch` parameter must be set to False if the query does not return
|
The `fetch` parameter must be set to False if the query does not return
|
||||||
rows (e.g. INSERTs).
|
rows (e.g. INSERTs).
|
||||||
|
|
||||||
|
The `template` is the snippet to merge to every item in argslist to
|
||||||
|
compose the query.
|
||||||
"""
|
"""
|
||||||
assert isinstance(self.database_engine, PostgresEngine)
|
assert isinstance(self.database_engine, PostgresEngine)
|
||||||
from psycopg2.extras import execute_values
|
from psycopg2.extras import execute_values
|
||||||
@ -400,7 +407,9 @@ class LoggingTransaction:
|
|||||||
return self._do_execute(
|
return self._do_execute(
|
||||||
# TODO: is it safe for values to be Iterable[Iterable[Any]] here?
|
# TODO: is it safe for values to be Iterable[Iterable[Any]] here?
|
||||||
# https://www.psycopg.org/docs/extras.html?highlight=execute_batch#psycopg2.extras.execute_values says values should be Sequence[Sequence]
|
# https://www.psycopg.org/docs/extras.html?highlight=execute_batch#psycopg2.extras.execute_values says values should be Sequence[Sequence]
|
||||||
lambda the_sql: execute_values(self.txn, the_sql, values, fetch=fetch),
|
lambda the_sql: execute_values(
|
||||||
|
self.txn, the_sql, values, template=template, fetch=fetch
|
||||||
|
),
|
||||||
sql,
|
sql,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -27,6 +27,8 @@ from typing import (
|
|||||||
cast,
|
cast,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
import attr
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Figure out if ICU support is available for searching users.
|
# Figure out if ICU support is available for searching users.
|
||||||
import icu
|
import icu
|
||||||
@ -66,6 +68,19 @@ logger = logging.getLogger(__name__)
|
|||||||
TEMP_TABLE = "_temp_populate_user_directory"
|
TEMP_TABLE = "_temp_populate_user_directory"
|
||||||
|
|
||||||
|
|
||||||
|
@attr.s(auto_attribs=True, frozen=True)
|
||||||
|
class _UserDirProfile:
|
||||||
|
"""Helper type for the user directory code for an entry to be inserted into
|
||||||
|
the directory.
|
||||||
|
"""
|
||||||
|
|
||||||
|
user_id: str
|
||||||
|
|
||||||
|
# If the display name or avatar URL are unexpected types, replace with None
|
||||||
|
display_name: Optional[str] = attr.ib(default=None, converter=non_null_str_or_none)
|
||||||
|
avatar_url: Optional[str] = attr.ib(default=None, converter=non_null_str_or_none)
|
||||||
|
|
||||||
|
|
||||||
class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
||||||
# How many records do we calculate before sending it to
|
# How many records do we calculate before sending it to
|
||||||
# add_users_who_share_private_rooms?
|
# add_users_who_share_private_rooms?
|
||||||
@ -381,25 +396,65 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
|||||||
% (len(users_to_work_on), progress["remaining"])
|
% (len(users_to_work_on), progress["remaining"])
|
||||||
)
|
)
|
||||||
|
|
||||||
for user_id in users_to_work_on:
|
# First filter down to users we want to insert into the user directory.
|
||||||
if await self.should_include_local_user_in_dir(user_id):
|
users_to_insert = [
|
||||||
profile = await self.get_profileinfo(get_localpart_from_id(user_id)) # type: ignore[attr-defined]
|
user_id
|
||||||
await self.update_profile_in_user_dir(
|
for user_id in users_to_work_on
|
||||||
user_id, profile.display_name, profile.avatar_url
|
if await self.should_include_local_user_in_dir(user_id)
|
||||||
)
|
]
|
||||||
|
|
||||||
# We've finished processing a user. Delete it from the table.
|
# Next fetch their profiles. Note that the `user_id` here is the
|
||||||
await self.db_pool.simple_delete_one(
|
# *localpart*, and that not all users have profiles.
|
||||||
TEMP_TABLE + "_users", {"user_id": user_id}
|
profile_rows = await self.db_pool.simple_select_many_batch(
|
||||||
)
|
table="profiles",
|
||||||
# Update the remaining counter.
|
column="user_id",
|
||||||
progress["remaining"] -= 1
|
iterable=[get_localpart_from_id(u) for u in users_to_insert],
|
||||||
await self.db_pool.runInteraction(
|
retcols=(
|
||||||
"populate_user_directory",
|
"user_id",
|
||||||
self.db_pool.updates._background_update_progress_txn,
|
"displayname",
|
||||||
"populate_user_directory_process_users",
|
"avatar_url",
|
||||||
progress,
|
),
|
||||||
|
keyvalues={},
|
||||||
|
desc="populate_user_directory_process_users_get_profiles",
|
||||||
|
)
|
||||||
|
profiles = {
|
||||||
|
f"@{row['user_id']}:{self.server_name}": _UserDirProfile(
|
||||||
|
f"@{row['user_id']}:{self.server_name}",
|
||||||
|
row["displayname"],
|
||||||
|
row["avatar_url"],
|
||||||
)
|
)
|
||||||
|
for row in profile_rows
|
||||||
|
}
|
||||||
|
|
||||||
|
profiles_to_insert = [
|
||||||
|
profiles.get(user_id) or _UserDirProfile(user_id)
|
||||||
|
for user_id in users_to_insert
|
||||||
|
]
|
||||||
|
|
||||||
|
# Actually insert the users with their profiles into the directory.
|
||||||
|
await self.db_pool.runInteraction(
|
||||||
|
"populate_user_directory_process_users_insertion",
|
||||||
|
self._update_profiles_in_user_dir_txn,
|
||||||
|
profiles_to_insert,
|
||||||
|
)
|
||||||
|
|
||||||
|
# We've finished processing the users. Delete it from the table.
|
||||||
|
await self.db_pool.simple_delete_many(
|
||||||
|
table=TEMP_TABLE + "_users",
|
||||||
|
column="user_id",
|
||||||
|
iterable=users_to_work_on,
|
||||||
|
keyvalues={},
|
||||||
|
desc="populate_user_directory_process_users_delete",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update the remaining counter.
|
||||||
|
progress["remaining"] -= len(users_to_work_on)
|
||||||
|
await self.db_pool.runInteraction(
|
||||||
|
"populate_user_directory",
|
||||||
|
self.db_pool.updates._background_update_progress_txn,
|
||||||
|
"populate_user_directory_process_users",
|
||||||
|
progress,
|
||||||
|
)
|
||||||
|
|
||||||
return len(users_to_work_on)
|
return len(users_to_work_on)
|
||||||
|
|
||||||
@ -584,72 +639,102 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
|||||||
Update or add a user's profile in the user directory.
|
Update or add a user's profile in the user directory.
|
||||||
If the user is remote, the profile will be marked as not stale.
|
If the user is remote, the profile will be marked as not stale.
|
||||||
"""
|
"""
|
||||||
# If the display name or avatar URL are unexpected types, replace with None.
|
await self.db_pool.runInteraction(
|
||||||
display_name = non_null_str_or_none(display_name)
|
"update_profiles_in_user_dir",
|
||||||
avatar_url = non_null_str_or_none(avatar_url)
|
self._update_profiles_in_user_dir_txn,
|
||||||
|
[_UserDirProfile(user_id, display_name, avatar_url)],
|
||||||
|
)
|
||||||
|
|
||||||
def _update_profile_in_user_dir_txn(txn: LoggingTransaction) -> None:
|
def _update_profiles_in_user_dir_txn(
|
||||||
self.db_pool.simple_upsert_txn(
|
self,
|
||||||
|
txn: LoggingTransaction,
|
||||||
|
profiles: Sequence[_UserDirProfile],
|
||||||
|
) -> None:
|
||||||
|
self.db_pool.simple_upsert_many_txn(
|
||||||
|
txn,
|
||||||
|
table="user_directory",
|
||||||
|
key_names=("user_id",),
|
||||||
|
key_values=[(p.user_id,) for p in profiles],
|
||||||
|
value_names=("display_name", "avatar_url"),
|
||||||
|
value_values=[
|
||||||
|
(
|
||||||
|
p.display_name,
|
||||||
|
p.avatar_url,
|
||||||
|
)
|
||||||
|
for p in profiles
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remote users: Make sure the profile is not marked as stale anymore.
|
||||||
|
remote_users = [
|
||||||
|
p.user_id for p in profiles if not self.hs.is_mine_id(p.user_id)
|
||||||
|
]
|
||||||
|
if remote_users:
|
||||||
|
self.db_pool.simple_delete_many_txn(
|
||||||
txn,
|
txn,
|
||||||
table="user_directory",
|
table="user_directory_stale_remote_users",
|
||||||
keyvalues={"user_id": user_id},
|
column="user_id",
|
||||||
values={"display_name": display_name, "avatar_url": avatar_url},
|
values=remote_users,
|
||||||
|
keyvalues={},
|
||||||
)
|
)
|
||||||
|
|
||||||
if not self.hs.is_mine_id(user_id):
|
if isinstance(self.database_engine, PostgresEngine):
|
||||||
# Remote users: Make sure the profile is not marked as stale anymore.
|
# We weight the localpart most highly, then display name and finally
|
||||||
self.db_pool.simple_delete_txn(
|
# server name
|
||||||
txn,
|
template = """
|
||||||
table="user_directory_stale_remote_users",
|
(
|
||||||
keyvalues={"user_id": user_id},
|
%s,
|
||||||
|
setweight(to_tsvector('simple', %s), 'A')
|
||||||
|
|| setweight(to_tsvector('simple', %s), 'D')
|
||||||
|
|| setweight(to_tsvector('simple', COALESCE(%s, '')), 'B')
|
||||||
)
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
# The display name that goes into the database index.
|
sql = """
|
||||||
index_display_name = display_name
|
INSERT INTO user_directory_search(user_id, vector)
|
||||||
if index_display_name is not None:
|
VALUES ? ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector
|
||||||
index_display_name = _filter_text_for_index(index_display_name)
|
"""
|
||||||
|
txn.execute_values(
|
||||||
if isinstance(self.database_engine, PostgresEngine):
|
sql,
|
||||||
# We weight the localpart most highly, then display name and finally
|
[
|
||||||
# server name
|
|
||||||
sql = """
|
|
||||||
INSERT INTO user_directory_search(user_id, vector)
|
|
||||||
VALUES (?,
|
|
||||||
setweight(to_tsvector('simple', ?), 'A')
|
|
||||||
|| setweight(to_tsvector('simple', ?), 'D')
|
|
||||||
|| setweight(to_tsvector('simple', COALESCE(?, '')), 'B')
|
|
||||||
) ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector
|
|
||||||
"""
|
|
||||||
txn.execute(
|
|
||||||
sql,
|
|
||||||
(
|
(
|
||||||
user_id,
|
p.user_id,
|
||||||
get_localpart_from_id(user_id),
|
get_localpart_from_id(p.user_id),
|
||||||
get_domain_from_id(user_id),
|
get_domain_from_id(p.user_id),
|
||||||
index_display_name,
|
_filter_text_for_index(p.display_name)
|
||||||
),
|
if p.display_name
|
||||||
)
|
else None,
|
||||||
elif isinstance(self.database_engine, Sqlite3Engine):
|
)
|
||||||
value = (
|
for p in profiles
|
||||||
"%s %s" % (user_id, index_display_name)
|
],
|
||||||
if index_display_name
|
template=template,
|
||||||
else user_id
|
fetch=False,
|
||||||
)
|
)
|
||||||
self.db_pool.simple_upsert_txn(
|
elif isinstance(self.database_engine, Sqlite3Engine):
|
||||||
txn,
|
values = []
|
||||||
table="user_directory_search",
|
for p in profiles:
|
||||||
keyvalues={"user_id": user_id},
|
if p.display_name is not None:
|
||||||
values={"value": value},
|
index_display_name = _filter_text_for_index(p.display_name)
|
||||||
)
|
value = f"{p.user_id} {index_display_name}"
|
||||||
else:
|
else:
|
||||||
# This should be unreachable.
|
value = p.user_id
|
||||||
raise Exception("Unrecognized database engine")
|
|
||||||
|
|
||||||
txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
|
values.append((value,))
|
||||||
|
|
||||||
await self.db_pool.runInteraction(
|
self.db_pool.simple_upsert_many_txn(
|
||||||
"update_profile_in_user_dir", _update_profile_in_user_dir_txn
|
txn,
|
||||||
)
|
table="user_directory_search",
|
||||||
|
key_names=("user_id",),
|
||||||
|
key_values=[(p.user_id,) for p in profiles],
|
||||||
|
value_names=("value",),
|
||||||
|
value_values=values,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# This should be unreachable.
|
||||||
|
raise Exception("Unrecognized database engine")
|
||||||
|
|
||||||
|
for p in profiles:
|
||||||
|
txn.call_after(self.get_user_in_directory.invalidate, (p.user_id,))
|
||||||
|
|
||||||
async def add_users_who_share_private_room(
|
async def add_users_who_share_private_room(
|
||||||
self, room_id: str, user_id_tuples: Iterable[Tuple[str, str]]
|
self, room_id: str, user_id_tuples: Iterable[Tuple[str, str]]
|
||||||
|
Loading…
Reference in New Issue
Block a user