mirror of
https://mau.dev/maunium/synapse.git
synced 2024-10-01 01:36:05 -04:00
Migrate the user directory initial population to a background task (#4864)
This commit is contained in:
parent
651ad8bc96
commit
282c97327f
1
changelog.d/4864.feature
Normal file
1
changelog.d/4864.feature
Normal file
@ -0,0 +1 @@
|
|||||||
|
The user directory has been rewritten to make it faster, with less chance of falling behind on a large server.
|
@ -38,18 +38,8 @@ class UserDirectoryHandler(object):
|
|||||||
world_readable or publically joinable room. We keep a database table up to date
|
world_readable or publically joinable room. We keep a database table up to date
|
||||||
by streaming changes of the current state and recalculating whether users should
|
by streaming changes of the current state and recalculating whether users should
|
||||||
be in the directory or not when necessary.
|
be in the directory or not when necessary.
|
||||||
|
|
||||||
For each user in the directory we also store a room_id which is public and that the
|
|
||||||
user is joined to. This allows us to ignore history_visibility and join_rules changes
|
|
||||||
for that user in all other public rooms, as we know they'll still be in at least
|
|
||||||
one public room.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
INITIAL_ROOM_SLEEP_MS = 50
|
|
||||||
INITIAL_ROOM_SLEEP_COUNT = 100
|
|
||||||
INITIAL_ROOM_BATCH_SIZE = 100
|
|
||||||
INITIAL_USER_SLEEP_MS = 10
|
|
||||||
|
|
||||||
def __init__(self, hs):
|
def __init__(self, hs):
|
||||||
self.store = hs.get_datastore()
|
self.store = hs.get_datastore()
|
||||||
self.state = hs.get_state_handler()
|
self.state = hs.get_state_handler()
|
||||||
@ -59,17 +49,6 @@ class UserDirectoryHandler(object):
|
|||||||
self.is_mine_id = hs.is_mine_id
|
self.is_mine_id = hs.is_mine_id
|
||||||
self.update_user_directory = hs.config.update_user_directory
|
self.update_user_directory = hs.config.update_user_directory
|
||||||
self.search_all_users = hs.config.user_directory_search_all_users
|
self.search_all_users = hs.config.user_directory_search_all_users
|
||||||
|
|
||||||
# If we're a worker, don't sleep when doing the initial room work, as it
|
|
||||||
# won't monopolise the master's CPU.
|
|
||||||
if hs.config.worker_app:
|
|
||||||
self.INITIAL_ROOM_SLEEP_MS = 0
|
|
||||||
self.INITIAL_USER_SLEEP_MS = 0
|
|
||||||
|
|
||||||
# When start up for the first time we need to populate the user_directory.
|
|
||||||
# This is a set of user_id's we've inserted already
|
|
||||||
self.initially_handled_users = set()
|
|
||||||
|
|
||||||
# The current position in the current_state_delta stream
|
# The current position in the current_state_delta stream
|
||||||
self.pos = None
|
self.pos = None
|
||||||
|
|
||||||
@ -132,7 +111,7 @@ class UserDirectoryHandler(object):
|
|||||||
# Support users are for diagnostics and should not appear in the user directory.
|
# Support users are for diagnostics and should not appear in the user directory.
|
||||||
if not is_support:
|
if not is_support:
|
||||||
yield self.store.update_profile_in_user_dir(
|
yield self.store.update_profile_in_user_dir(
|
||||||
user_id, profile.display_name, profile.avatar_url, None
|
user_id, profile.display_name, profile.avatar_url
|
||||||
)
|
)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
@ -149,10 +128,9 @@ class UserDirectoryHandler(object):
|
|||||||
if self.pos is None:
|
if self.pos is None:
|
||||||
self.pos = yield self.store.get_user_directory_stream_pos()
|
self.pos = yield self.store.get_user_directory_stream_pos()
|
||||||
|
|
||||||
# If still None then we need to do the initial fill of directory
|
# If still None then the initial background update hasn't happened yet
|
||||||
if self.pos is None:
|
if self.pos is None:
|
||||||
yield self._do_initial_spam()
|
defer.returnValue(None)
|
||||||
self.pos = yield self.store.get_user_directory_stream_pos()
|
|
||||||
|
|
||||||
# Loop round handling deltas until we're up to date
|
# Loop round handling deltas until we're up to date
|
||||||
while True:
|
while True:
|
||||||
@ -173,133 +151,6 @@ class UserDirectoryHandler(object):
|
|||||||
|
|
||||||
yield self.store.update_user_directory_stream_pos(self.pos)
|
yield self.store.update_user_directory_stream_pos(self.pos)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
|
||||||
def _do_initial_spam(self):
|
|
||||||
"""Populates the user_directory from the current state of the DB, used
|
|
||||||
when synapse first starts with user_directory support
|
|
||||||
"""
|
|
||||||
new_pos = yield self.store.get_max_stream_id_in_current_state_deltas()
|
|
||||||
|
|
||||||
# Delete any existing entries just in case there are any
|
|
||||||
yield self.store.delete_all_from_user_dir()
|
|
||||||
|
|
||||||
# We process by going through each existing room at a time.
|
|
||||||
room_ids = yield self.store.get_all_rooms()
|
|
||||||
|
|
||||||
logger.info("Doing initial update of user directory. %d rooms", len(room_ids))
|
|
||||||
num_processed_rooms = 0
|
|
||||||
|
|
||||||
for room_id in room_ids:
|
|
||||||
logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids))
|
|
||||||
yield self._handle_initial_room(room_id)
|
|
||||||
num_processed_rooms += 1
|
|
||||||
yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0)
|
|
||||||
|
|
||||||
logger.info("Processed all rooms.")
|
|
||||||
|
|
||||||
if self.search_all_users:
|
|
||||||
num_processed_users = 0
|
|
||||||
user_ids = yield self.store.get_all_local_users()
|
|
||||||
logger.info(
|
|
||||||
"Doing initial update of user directory. %d users", len(user_ids)
|
|
||||||
)
|
|
||||||
for user_id in user_ids:
|
|
||||||
# We add profiles for all users even if they don't match the
|
|
||||||
# include pattern, just in case we want to change it in future
|
|
||||||
logger.info(
|
|
||||||
"Handling user %d/%d", num_processed_users + 1, len(user_ids)
|
|
||||||
)
|
|
||||||
yield self._handle_local_user(user_id)
|
|
||||||
num_processed_users += 1
|
|
||||||
yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.0)
|
|
||||||
|
|
||||||
logger.info("Processed all users")
|
|
||||||
|
|
||||||
self.initially_handled_users = None
|
|
||||||
|
|
||||||
yield self.store.update_user_directory_stream_pos(new_pos)
|
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
|
||||||
def _handle_initial_room(self, room_id):
|
|
||||||
"""
|
|
||||||
Called when we initially fill out user_directory one room at a time
|
|
||||||
"""
|
|
||||||
is_in_room = yield self.store.is_host_joined(room_id, self.server_name)
|
|
||||||
if not is_in_room:
|
|
||||||
return
|
|
||||||
|
|
||||||
is_public = yield self.store.is_room_world_readable_or_publicly_joinable(
|
|
||||||
room_id
|
|
||||||
)
|
|
||||||
|
|
||||||
users_with_profile = yield self.state.get_current_user_in_room(room_id)
|
|
||||||
user_ids = set(users_with_profile)
|
|
||||||
unhandled_users = user_ids - self.initially_handled_users
|
|
||||||
|
|
||||||
yield self.store.add_profiles_to_user_dir(
|
|
||||||
{user_id: users_with_profile[user_id] for user_id in unhandled_users}
|
|
||||||
)
|
|
||||||
|
|
||||||
self.initially_handled_users |= unhandled_users
|
|
||||||
|
|
||||||
# We now go and figure out the new users who share rooms with user entries
|
|
||||||
# We sleep aggressively here as otherwise it can starve resources.
|
|
||||||
# We also batch up inserts/updates, but try to avoid too many at once.
|
|
||||||
to_insert = set()
|
|
||||||
count = 0
|
|
||||||
|
|
||||||
if is_public:
|
|
||||||
for user_id in user_ids:
|
|
||||||
if count % self.INITIAL_ROOM_SLEEP_COUNT == 0:
|
|
||||||
yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0)
|
|
||||||
|
|
||||||
if self.store.get_if_app_services_interested_in_user(user_id):
|
|
||||||
count += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
to_insert.add(user_id)
|
|
||||||
if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE:
|
|
||||||
yield self.store.add_users_in_public_rooms(room_id, to_insert)
|
|
||||||
to_insert.clear()
|
|
||||||
|
|
||||||
if to_insert:
|
|
||||||
yield self.store.add_users_in_public_rooms(room_id, to_insert)
|
|
||||||
to_insert.clear()
|
|
||||||
else:
|
|
||||||
|
|
||||||
for user_id in user_ids:
|
|
||||||
if count % self.INITIAL_ROOM_SLEEP_COUNT == 0:
|
|
||||||
yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0)
|
|
||||||
|
|
||||||
if not self.is_mine_id(user_id):
|
|
||||||
count += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
if self.store.get_if_app_services_interested_in_user(user_id):
|
|
||||||
count += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
for other_user_id in user_ids:
|
|
||||||
if user_id == other_user_id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if count % self.INITIAL_ROOM_SLEEP_COUNT == 0:
|
|
||||||
yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0)
|
|
||||||
count += 1
|
|
||||||
|
|
||||||
user_set = (user_id, other_user_id)
|
|
||||||
to_insert.add(user_set)
|
|
||||||
|
|
||||||
if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE:
|
|
||||||
yield self.store.add_users_who_share_private_room(
|
|
||||||
room_id, not is_public, to_insert
|
|
||||||
)
|
|
||||||
to_insert.clear()
|
|
||||||
|
|
||||||
if to_insert:
|
|
||||||
yield self.store.add_users_who_share_private_room(room_id, to_insert)
|
|
||||||
to_insert.clear()
|
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def _handle_deltas(self, deltas):
|
def _handle_deltas(self, deltas):
|
||||||
"""Called with the state deltas to process
|
"""Called with the state deltas to process
|
||||||
@ -449,7 +300,9 @@ class UserDirectoryHandler(object):
|
|||||||
|
|
||||||
row = yield self.store.get_user_in_directory(user_id)
|
row = yield self.store.get_user_in_directory(user_id)
|
||||||
if not row:
|
if not row:
|
||||||
yield self.store.add_profiles_to_user_dir({user_id: profile})
|
yield self.store.update_profile_in_user_dir(
|
||||||
|
user_id, profile.display_name, profile.avatar_url
|
||||||
|
)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def _handle_new_user(self, room_id, user_id, profile):
|
def _handle_new_user(self, room_id, user_id, profile):
|
||||||
@ -461,9 +314,9 @@ class UserDirectoryHandler(object):
|
|||||||
"""
|
"""
|
||||||
logger.debug("Adding new user to dir, %r", user_id)
|
logger.debug("Adding new user to dir, %r", user_id)
|
||||||
|
|
||||||
row = yield self.store.get_user_in_directory(user_id)
|
yield self.store.update_profile_in_user_dir(
|
||||||
if not row:
|
user_id, profile.display_name, profile.avatar_url
|
||||||
yield self.store.add_profiles_to_user_dir({user_id: profile})
|
)
|
||||||
|
|
||||||
is_public = yield self.store.is_room_world_readable_or_publicly_joinable(
|
is_public = yield self.store.is_room_world_readable_or_publicly_joinable(
|
||||||
room_id
|
room_id
|
||||||
@ -479,7 +332,9 @@ class UserDirectoryHandler(object):
|
|||||||
# First, if they're our user then we need to update for every user
|
# First, if they're our user then we need to update for every user
|
||||||
if self.is_mine_id(user_id):
|
if self.is_mine_id(user_id):
|
||||||
|
|
||||||
is_appservice = self.store.get_if_app_services_interested_in_user(user_id)
|
is_appservice = self.store.get_if_app_services_interested_in_user(
|
||||||
|
user_id
|
||||||
|
)
|
||||||
|
|
||||||
# We don't care about appservice users.
|
# We don't care about appservice users.
|
||||||
if not is_appservice:
|
if not is_appservice:
|
||||||
@ -546,9 +401,7 @@ class UserDirectoryHandler(object):
|
|||||||
new_avatar = event.content.get("avatar_url")
|
new_avatar = event.content.get("avatar_url")
|
||||||
|
|
||||||
if prev_name != new_name or prev_avatar != new_avatar:
|
if prev_name != new_name or prev_avatar != new_avatar:
|
||||||
yield self.store.update_profile_in_user_dir(
|
yield self.store.update_profile_in_user_dir(user_id, new_name, new_avatar)
|
||||||
user_id, new_name, new_avatar, room_id
|
|
||||||
)
|
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def _get_key_change(self, prev_event_id, event_id, key_name, public_value):
|
def _get_key_change(self, prev_event_id, event_id, key_name, public_value):
|
||||||
|
@ -52,7 +52,9 @@ class BackgroundUpdatePerformance(object):
|
|||||||
Returns:
|
Returns:
|
||||||
A duration in ms as a float
|
A duration in ms as a float
|
||||||
"""
|
"""
|
||||||
if self.total_item_count == 0:
|
if self.avg_duration_ms == 0:
|
||||||
|
return 0
|
||||||
|
elif self.total_item_count == 0:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
# Use the exponential moving average so that we can adapt to
|
# Use the exponential moving average so that we can adapt to
|
||||||
@ -64,7 +66,9 @@ class BackgroundUpdatePerformance(object):
|
|||||||
Returns:
|
Returns:
|
||||||
A duration in ms as a float
|
A duration in ms as a float
|
||||||
"""
|
"""
|
||||||
if self.total_item_count == 0:
|
if self.total_duration_ms == 0:
|
||||||
|
return 0
|
||||||
|
elif self.total_item_count == 0:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
return float(self.total_item_count) / float(self.total_duration_ms)
|
return float(self.total_item_count) / float(self.total_duration_ms)
|
||||||
|
30
synapse/storage/schema/delta/53/user_dir_populate.sql
Normal file
30
synapse/storage/schema/delta/53/user_dir_populate.sql
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
/* Copyright 2019 New Vector Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
-- Set up staging tables
|
||||||
|
INSERT INTO background_updates (update_name, progress_json) VALUES
|
||||||
|
('populate_user_directory_createtables', '{}');
|
||||||
|
|
||||||
|
-- Run through each room and update the user directory according to who is in it
|
||||||
|
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
|
||||||
|
('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables');
|
||||||
|
|
||||||
|
-- Insert all users, if search_all_users is on
|
||||||
|
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
|
||||||
|
('populate_user_directory_process_users', '{}', 'populate_user_directory_rooms');
|
||||||
|
|
||||||
|
-- Clean up staging tables
|
||||||
|
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
|
||||||
|
('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users');
|
@ -16,12 +16,10 @@
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from six import iteritems
|
|
||||||
|
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
|
|
||||||
from synapse.api.constants import EventTypes, JoinRules
|
from synapse.api.constants import EventTypes, JoinRules
|
||||||
from synapse.storage._base import SQLBaseStore
|
from synapse.storage.background_updates import BackgroundUpdateStore
|
||||||
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
|
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
|
||||||
from synapse.storage.state import StateFilter
|
from synapse.storage.state import StateFilter
|
||||||
from synapse.types import get_domain_from_id, get_localpart_from_id
|
from synapse.types import get_domain_from_id, get_localpart_from_id
|
||||||
@ -30,7 +28,276 @@ from synapse.util.caches.descriptors import cached
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class UserDirectoryStore(SQLBaseStore):
|
TEMP_TABLE = "_temp_populate_user_directory"
|
||||||
|
|
||||||
|
|
||||||
|
class UserDirectoryStore(BackgroundUpdateStore):
|
||||||
|
def __init__(self, db_conn, hs):
|
||||||
|
super(UserDirectoryStore, self).__init__(db_conn, hs)
|
||||||
|
|
||||||
|
self.server_name = hs.hostname
|
||||||
|
|
||||||
|
self.register_background_update_handler(
|
||||||
|
"populate_user_directory_createtables",
|
||||||
|
self._populate_user_directory_createtables,
|
||||||
|
)
|
||||||
|
self.register_background_update_handler(
|
||||||
|
"populate_user_directory_process_rooms",
|
||||||
|
self._populate_user_directory_process_rooms,
|
||||||
|
)
|
||||||
|
self.register_background_update_handler(
|
||||||
|
"populate_user_directory_process_users",
|
||||||
|
self._populate_user_directory_process_users,
|
||||||
|
)
|
||||||
|
self.register_background_update_handler(
|
||||||
|
"populate_user_directory_cleanup", self._populate_user_directory_cleanup
|
||||||
|
)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def _populate_user_directory_createtables(self, progress, batch_size):
|
||||||
|
|
||||||
|
# Get all the rooms that we want to process.
|
||||||
|
def _make_staging_area(txn):
|
||||||
|
sql = (
|
||||||
|
"CREATE TABLE IF NOT EXISTS "
|
||||||
|
+ TEMP_TABLE
|
||||||
|
+ "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)"
|
||||||
|
)
|
||||||
|
txn.execute(sql)
|
||||||
|
|
||||||
|
sql = (
|
||||||
|
"CREATE TABLE IF NOT EXISTS "
|
||||||
|
+ TEMP_TABLE
|
||||||
|
+ "_position(position TEXT NOT NULL)"
|
||||||
|
)
|
||||||
|
txn.execute(sql)
|
||||||
|
|
||||||
|
# Get rooms we want to process from the database
|
||||||
|
sql = """
|
||||||
|
SELECT room_id, count(*) FROM current_state_events
|
||||||
|
GROUP BY room_id
|
||||||
|
"""
|
||||||
|
txn.execute(sql)
|
||||||
|
rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()]
|
||||||
|
self._simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms)
|
||||||
|
del rooms
|
||||||
|
|
||||||
|
# If search all users is on, get all the users we want to add.
|
||||||
|
if self.hs.config.user_directory_search_all_users:
|
||||||
|
sql = (
|
||||||
|
"CREATE TABLE IF NOT EXISTS "
|
||||||
|
+ TEMP_TABLE
|
||||||
|
+ "_users(user_id TEXT NOT NULL)"
|
||||||
|
)
|
||||||
|
txn.execute(sql)
|
||||||
|
|
||||||
|
txn.execute("SELECT name FROM users")
|
||||||
|
users = [{"user_id": x[0]} for x in txn.fetchall()]
|
||||||
|
|
||||||
|
self._simple_insert_many_txn(txn, TEMP_TABLE + "_users", users)
|
||||||
|
|
||||||
|
new_pos = yield self.get_max_stream_id_in_current_state_deltas()
|
||||||
|
yield self.runInteraction(
|
||||||
|
"populate_user_directory_temp_build", _make_staging_area
|
||||||
|
)
|
||||||
|
yield self._simple_insert(TEMP_TABLE + "_position", {"position": new_pos})
|
||||||
|
|
||||||
|
yield self._end_background_update("populate_user_directory_createtables")
|
||||||
|
defer.returnValue(1)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def _populate_user_directory_cleanup(self, progress, batch_size):
|
||||||
|
"""
|
||||||
|
Update the user directory stream position, then clean up the old tables.
|
||||||
|
"""
|
||||||
|
position = yield self._simple_select_one_onecol(
|
||||||
|
TEMP_TABLE + "_position", None, "position"
|
||||||
|
)
|
||||||
|
yield self.update_user_directory_stream_pos(position)
|
||||||
|
|
||||||
|
def _delete_staging_area(txn):
|
||||||
|
txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms")
|
||||||
|
txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_users")
|
||||||
|
txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position")
|
||||||
|
|
||||||
|
yield self.runInteraction(
|
||||||
|
"populate_user_directory_cleanup", _delete_staging_area
|
||||||
|
)
|
||||||
|
|
||||||
|
yield self._end_background_update("populate_user_directory_cleanup")
|
||||||
|
defer.returnValue(1)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def _populate_user_directory_process_rooms(self, progress, batch_size):
|
||||||
|
|
||||||
|
state = self.hs.get_state_handler()
|
||||||
|
|
||||||
|
# If we don't have progress filed, delete everything.
|
||||||
|
if not progress:
|
||||||
|
yield self.delete_all_from_user_dir()
|
||||||
|
|
||||||
|
def _get_next_batch(txn):
|
||||||
|
sql = """
|
||||||
|
SELECT room_id FROM %s
|
||||||
|
ORDER BY events DESC
|
||||||
|
LIMIT %s
|
||||||
|
""" % (
|
||||||
|
TEMP_TABLE + "_rooms",
|
||||||
|
str(batch_size),
|
||||||
|
)
|
||||||
|
txn.execute(sql)
|
||||||
|
rooms_to_work_on = txn.fetchall()
|
||||||
|
|
||||||
|
if not rooms_to_work_on:
|
||||||
|
return None
|
||||||
|
|
||||||
|
rooms_to_work_on = [x[0] for x in rooms_to_work_on]
|
||||||
|
|
||||||
|
# Get how many are left to process, so we can give status on how
|
||||||
|
# far we are in processing
|
||||||
|
txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms")
|
||||||
|
progress["remaining"] = txn.fetchone()[0]
|
||||||
|
|
||||||
|
return rooms_to_work_on
|
||||||
|
|
||||||
|
rooms_to_work_on = yield self.runInteraction(
|
||||||
|
"populate_user_directory_temp_read", _get_next_batch
|
||||||
|
)
|
||||||
|
|
||||||
|
# No more rooms -- complete the transaction.
|
||||||
|
if not rooms_to_work_on:
|
||||||
|
yield self._end_background_update("populate_user_directory_process_rooms")
|
||||||
|
defer.returnValue(1)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Processing the next %d rooms of %d remaining"
|
||||||
|
% (len(rooms_to_work_on), progress["remaining"])
|
||||||
|
)
|
||||||
|
|
||||||
|
for room_id in rooms_to_work_on:
|
||||||
|
is_in_room = yield self.is_host_joined(room_id, self.server_name)
|
||||||
|
|
||||||
|
if is_in_room:
|
||||||
|
is_public = yield self.is_room_world_readable_or_publicly_joinable(
|
||||||
|
room_id
|
||||||
|
)
|
||||||
|
|
||||||
|
users_with_profile = yield state.get_current_user_in_room(room_id)
|
||||||
|
user_ids = set(users_with_profile)
|
||||||
|
|
||||||
|
# Update each user in the user directory.
|
||||||
|
for user_id, profile in users_with_profile.items():
|
||||||
|
yield self.update_profile_in_user_dir(
|
||||||
|
user_id, profile.display_name, profile.avatar_url
|
||||||
|
)
|
||||||
|
|
||||||
|
to_insert = set()
|
||||||
|
|
||||||
|
if is_public:
|
||||||
|
for user_id in user_ids:
|
||||||
|
if self.get_if_app_services_interested_in_user(user_id):
|
||||||
|
continue
|
||||||
|
|
||||||
|
to_insert.add(user_id)
|
||||||
|
|
||||||
|
if to_insert:
|
||||||
|
yield self.add_users_in_public_rooms(room_id, to_insert)
|
||||||
|
to_insert.clear()
|
||||||
|
else:
|
||||||
|
for user_id in user_ids:
|
||||||
|
if not self.hs.is_mine_id(user_id):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if self.get_if_app_services_interested_in_user(user_id):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for other_user_id in user_ids:
|
||||||
|
if user_id == other_user_id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
user_set = (user_id, other_user_id)
|
||||||
|
to_insert.add(user_set)
|
||||||
|
|
||||||
|
if to_insert:
|
||||||
|
yield self.add_users_who_share_private_room(room_id, to_insert)
|
||||||
|
to_insert.clear()
|
||||||
|
|
||||||
|
# We've finished a room. Delete it from the table.
|
||||||
|
yield self._simple_delete_one(TEMP_TABLE + "_rooms", {"room_id": room_id})
|
||||||
|
# Update the remaining counter.
|
||||||
|
progress["remaining"] -= 1
|
||||||
|
yield self.runInteraction(
|
||||||
|
"populate_user_directory",
|
||||||
|
self._background_update_progress_txn,
|
||||||
|
"populate_user_directory_process_rooms",
|
||||||
|
progress,
|
||||||
|
)
|
||||||
|
|
||||||
|
defer.returnValue(len(rooms_to_work_on))
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def _populate_user_directory_process_users(self, progress, batch_size):
|
||||||
|
"""
|
||||||
|
If search_all_users is enabled, add all of the users to the user directory.
|
||||||
|
"""
|
||||||
|
if not self.hs.config.user_directory_search_all_users:
|
||||||
|
yield self._end_background_update("populate_user_directory_process_users")
|
||||||
|
defer.returnValue(1)
|
||||||
|
|
||||||
|
def _get_next_batch(txn):
|
||||||
|
sql = "SELECT user_id FROM %s LIMIT %s" % (
|
||||||
|
TEMP_TABLE + "_users",
|
||||||
|
str(batch_size),
|
||||||
|
)
|
||||||
|
txn.execute(sql)
|
||||||
|
users_to_work_on = txn.fetchall()
|
||||||
|
|
||||||
|
if not users_to_work_on:
|
||||||
|
return None
|
||||||
|
|
||||||
|
users_to_work_on = [x[0] for x in users_to_work_on]
|
||||||
|
|
||||||
|
# Get how many are left to process, so we can give status on how
|
||||||
|
# far we are in processing
|
||||||
|
sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users"
|
||||||
|
txn.execute(sql)
|
||||||
|
progress["remaining"] = txn.fetchone()[0]
|
||||||
|
|
||||||
|
return users_to_work_on
|
||||||
|
|
||||||
|
users_to_work_on = yield self.runInteraction(
|
||||||
|
"populate_user_directory_temp_read", _get_next_batch
|
||||||
|
)
|
||||||
|
|
||||||
|
# No more users -- complete the transaction.
|
||||||
|
if not users_to_work_on:
|
||||||
|
yield self._end_background_update("populate_user_directory_process_users")
|
||||||
|
defer.returnValue(1)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Processing the next %d users of %d remaining"
|
||||||
|
% (len(users_to_work_on), progress["remaining"])
|
||||||
|
)
|
||||||
|
|
||||||
|
for user_id in users_to_work_on:
|
||||||
|
profile = yield self.get_profileinfo(get_localpart_from_id(user_id))
|
||||||
|
yield self.update_profile_in_user_dir(
|
||||||
|
user_id, profile.display_name, profile.avatar_url
|
||||||
|
)
|
||||||
|
|
||||||
|
# We've finished processing a user. Delete it from the table.
|
||||||
|
yield self._simple_delete_one(TEMP_TABLE + "_users", {"user_id": user_id})
|
||||||
|
# Update the remaining counter.
|
||||||
|
progress["remaining"] -= 1
|
||||||
|
yield self.runInteraction(
|
||||||
|
"populate_user_directory",
|
||||||
|
self._background_update_progress_txn,
|
||||||
|
"populate_user_directory_process_users",
|
||||||
|
progress,
|
||||||
|
)
|
||||||
|
|
||||||
|
defer.returnValue(len(users_to_work_on))
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def is_room_world_readable_or_publicly_joinable(self, room_id):
|
def is_room_world_readable_or_publicly_joinable(self, room_id):
|
||||||
"""Check if the room is either world_readable or publically joinable
|
"""Check if the room is either world_readable or publically joinable
|
||||||
@ -62,89 +329,16 @@ class UserDirectoryStore(SQLBaseStore):
|
|||||||
|
|
||||||
defer.returnValue(False)
|
defer.returnValue(False)
|
||||||
|
|
||||||
def add_profiles_to_user_dir(self, users_with_profile):
|
def update_profile_in_user_dir(self, user_id, display_name, avatar_url):
|
||||||
"""Add profiles to the user directory
|
"""
|
||||||
|
Update or add a user's profile in the user directory.
|
||||||
Args:
|
|
||||||
users_with_profile (dict): Users to add to directory in the form of
|
|
||||||
mapping of user_id -> ProfileInfo
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if isinstance(self.database_engine, PostgresEngine):
|
|
||||||
# We weight the loclpart most highly, then display name and finally
|
|
||||||
# server name
|
|
||||||
sql = """
|
|
||||||
INSERT INTO user_directory_search(user_id, vector)
|
|
||||||
VALUES (?,
|
|
||||||
setweight(to_tsvector('english', ?), 'A')
|
|
||||||
|| setweight(to_tsvector('english', ?), 'D')
|
|
||||||
|| setweight(to_tsvector('english', COALESCE(?, '')), 'B')
|
|
||||||
)
|
|
||||||
"""
|
|
||||||
args = (
|
|
||||||
(
|
|
||||||
user_id,
|
|
||||||
get_localpart_from_id(user_id),
|
|
||||||
get_domain_from_id(user_id),
|
|
||||||
profile.display_name,
|
|
||||||
)
|
|
||||||
for user_id, profile in iteritems(users_with_profile)
|
|
||||||
)
|
|
||||||
elif isinstance(self.database_engine, Sqlite3Engine):
|
|
||||||
sql = """
|
|
||||||
INSERT INTO user_directory_search(user_id, value)
|
|
||||||
VALUES (?,?)
|
|
||||||
"""
|
|
||||||
args = tuple(
|
|
||||||
(
|
|
||||||
user_id,
|
|
||||||
"%s %s" % (user_id, p.display_name) if p.display_name else user_id,
|
|
||||||
)
|
|
||||||
for user_id, p in iteritems(users_with_profile)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# This should be unreachable.
|
|
||||||
raise Exception("Unrecognized database engine")
|
|
||||||
|
|
||||||
def _add_profiles_to_user_dir_txn(txn):
|
|
||||||
txn.executemany(sql, args)
|
|
||||||
self._simple_insert_many_txn(
|
|
||||||
txn,
|
|
||||||
table="user_directory",
|
|
||||||
values=[
|
|
||||||
{
|
|
||||||
"user_id": user_id,
|
|
||||||
"room_id": None,
|
|
||||||
"display_name": profile.display_name,
|
|
||||||
"avatar_url": profile.avatar_url,
|
|
||||||
}
|
|
||||||
for user_id, profile in iteritems(users_with_profile)
|
|
||||||
],
|
|
||||||
)
|
|
||||||
for user_id in users_with_profile:
|
|
||||||
txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
|
|
||||||
|
|
||||||
return self.runInteraction(
|
|
||||||
"add_profiles_to_user_dir", _add_profiles_to_user_dir_txn
|
|
||||||
)
|
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
|
||||||
def update_user_in_user_dir(self, user_id, room_id):
|
|
||||||
yield self._simple_update_one(
|
|
||||||
table="user_directory",
|
|
||||||
keyvalues={"user_id": user_id},
|
|
||||||
updatevalues={"room_id": room_id},
|
|
||||||
desc="update_user_in_user_dir",
|
|
||||||
)
|
|
||||||
self.get_user_in_directory.invalidate((user_id,))
|
|
||||||
|
|
||||||
def update_profile_in_user_dir(self, user_id, display_name, avatar_url, room_id):
|
|
||||||
def _update_profile_in_user_dir_txn(txn):
|
def _update_profile_in_user_dir_txn(txn):
|
||||||
new_entry = self._simple_upsert_txn(
|
new_entry = self._simple_upsert_txn(
|
||||||
txn,
|
txn,
|
||||||
table="user_directory",
|
table="user_directory",
|
||||||
keyvalues={"user_id": user_id},
|
keyvalues={"user_id": user_id},
|
||||||
insertion_values={"room_id": room_id},
|
|
||||||
values={"display_name": display_name, "avatar_url": avatar_url},
|
values={"display_name": display_name, "avatar_url": avatar_url},
|
||||||
lock=False, # We're only inserter
|
lock=False, # We're only inserter
|
||||||
)
|
)
|
||||||
@ -281,18 +475,6 @@ class UserDirectoryStore(SQLBaseStore):
|
|||||||
|
|
||||||
defer.returnValue(user_ids)
|
defer.returnValue(user_ids)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
|
||||||
def get_all_rooms(self):
|
|
||||||
"""Get all room_ids we've ever known about, in ascending order of "size"
|
|
||||||
"""
|
|
||||||
sql = """
|
|
||||||
SELECT room_id FROM current_state_events
|
|
||||||
GROUP BY room_id
|
|
||||||
ORDER BY count(*) ASC
|
|
||||||
"""
|
|
||||||
rows = yield self._execute("get_all_rooms", None, sql)
|
|
||||||
defer.returnValue([room_id for room_id, in rows])
|
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def get_all_local_users(self):
|
def get_all_local_users(self):
|
||||||
"""Get all local users
|
"""Get all local users
|
||||||
@ -553,8 +735,8 @@ class UserDirectoryStore(SQLBaseStore):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if self.hs.config.user_directory_search_all_users:
|
if self.hs.config.user_directory_search_all_users:
|
||||||
join_args = ()
|
join_args = (user_id,)
|
||||||
where_clause = "1=1"
|
where_clause = "user_id != ?"
|
||||||
else:
|
else:
|
||||||
join_args = (user_id,)
|
join_args = (user_id,)
|
||||||
where_clause = """
|
where_clause = """
|
||||||
|
@ -163,9 +163,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
|
|||||||
def get_users_in_public_rooms(self):
|
def get_users_in_public_rooms(self):
|
||||||
r = self.get_success(
|
r = self.get_success(
|
||||||
self.store._simple_select_list(
|
self.store._simple_select_list(
|
||||||
"users_in_public_rooms",
|
"users_in_public_rooms", None, ("user_id", "room_id")
|
||||||
None,
|
|
||||||
("user_id", "room_id"),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
retval = []
|
retval = []
|
||||||
@ -182,6 +180,53 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _add_background_updates(self):
|
||||||
|
"""
|
||||||
|
Add the background updates we need to run.
|
||||||
|
"""
|
||||||
|
# Ugh, have to reset this flag
|
||||||
|
self.store._all_done = False
|
||||||
|
|
||||||
|
self.get_success(
|
||||||
|
self.store._simple_insert(
|
||||||
|
"background_updates",
|
||||||
|
{
|
||||||
|
"update_name": "populate_user_directory_createtables",
|
||||||
|
"progress_json": "{}",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.get_success(
|
||||||
|
self.store._simple_insert(
|
||||||
|
"background_updates",
|
||||||
|
{
|
||||||
|
"update_name": "populate_user_directory_process_rooms",
|
||||||
|
"progress_json": "{}",
|
||||||
|
"depends_on": "populate_user_directory_createtables",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.get_success(
|
||||||
|
self.store._simple_insert(
|
||||||
|
"background_updates",
|
||||||
|
{
|
||||||
|
"update_name": "populate_user_directory_process_users",
|
||||||
|
"progress_json": "{}",
|
||||||
|
"depends_on": "populate_user_directory_process_rooms",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.get_success(
|
||||||
|
self.store._simple_insert(
|
||||||
|
"background_updates",
|
||||||
|
{
|
||||||
|
"update_name": "populate_user_directory_cleanup",
|
||||||
|
"progress_json": "{}",
|
||||||
|
"depends_on": "populate_user_directory_process_users",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def test_initial(self):
|
def test_initial(self):
|
||||||
"""
|
"""
|
||||||
The user directory's initial handler correctly updates the search tables.
|
The user directory's initial handler correctly updates the search tables.
|
||||||
@ -211,26 +256,17 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
|
|||||||
self.assertEqual(shares_private, [])
|
self.assertEqual(shares_private, [])
|
||||||
self.assertEqual(public_users, [])
|
self.assertEqual(public_users, [])
|
||||||
|
|
||||||
# Reset the handled users caches
|
# Do the initial population of the user directory via the background update
|
||||||
self.handler.initially_handled_users = set()
|
self._add_background_updates()
|
||||||
|
|
||||||
# Do the initial population
|
while not self.get_success(self.store.has_completed_background_updates()):
|
||||||
d = self.handler._do_initial_spam()
|
self.get_success(self.store.do_next_background_update(100), by=0.1)
|
||||||
|
|
||||||
# This takes a while, so pump it a bunch of times to get through the
|
|
||||||
# sleep delays
|
|
||||||
for i in range(10):
|
|
||||||
self.pump(1)
|
|
||||||
|
|
||||||
self.get_success(d)
|
|
||||||
|
|
||||||
shares_private = self.get_users_who_share_private_rooms()
|
shares_private = self.get_users_who_share_private_rooms()
|
||||||
public_users = self.get_users_in_public_rooms()
|
public_users = self.get_users_in_public_rooms()
|
||||||
|
|
||||||
# User 1 and User 2 are in the same public room
|
# User 1 and User 2 are in the same public room
|
||||||
self.assertEqual(
|
self.assertEqual(set(public_users), set([(u1, room), (u2, room)]))
|
||||||
set(public_users), set([(u1, room), (u2, room)])
|
|
||||||
)
|
|
||||||
|
|
||||||
# User 1 and User 3 share private rooms
|
# User 1 and User 3 share private rooms
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@ -238,7 +274,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
|
|||||||
set([(u1, u3, private_room), (u3, u1, private_room)]),
|
set([(u1, u3, private_room), (u3, u1, private_room)]),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_search_all_users(self):
|
def test_initial_share_all_users(self):
|
||||||
"""
|
"""
|
||||||
Search all users = True means that a user does not have to share a
|
Search all users = True means that a user does not have to share a
|
||||||
private room with the searching user or be in a public room to be search
|
private room with the searching user or be in a public room to be search
|
||||||
@ -248,33 +284,36 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
|
|||||||
self.hs.config.user_directory_search_all_users = True
|
self.hs.config.user_directory_search_all_users = True
|
||||||
|
|
||||||
u1 = self.register_user("user1", "pass")
|
u1 = self.register_user("user1", "pass")
|
||||||
u1_token = self.login(u1, "pass")
|
self.register_user("user2", "pass")
|
||||||
u2 = self.register_user("user2", "pass")
|
|
||||||
u2_token = self.login(u2, "pass")
|
|
||||||
u3 = self.register_user("user3", "pass")
|
u3 = self.register_user("user3", "pass")
|
||||||
|
|
||||||
# User 1 and User 2 join a room. User 3 never does.
|
# Wipe the user dir
|
||||||
room = self.helper.create_room_as(u1, is_public=True, tok=u1_token)
|
|
||||||
self.helper.invite(room, src=u1, targ=u2, tok=u1_token)
|
|
||||||
self.helper.join(room, user=u2, tok=u2_token)
|
|
||||||
|
|
||||||
self.get_success(self.store.update_user_directory_stream_pos(None))
|
self.get_success(self.store.update_user_directory_stream_pos(None))
|
||||||
self.get_success(self.store.delete_all_from_user_dir())
|
self.get_success(self.store.delete_all_from_user_dir())
|
||||||
|
|
||||||
# Reset the handled users caches
|
# Do the initial population of the user directory via the background update
|
||||||
self.handler.initially_handled_users = set()
|
self._add_background_updates()
|
||||||
|
|
||||||
# Do the initial population
|
while not self.get_success(self.store.has_completed_background_updates()):
|
||||||
d = self.handler._do_initial_spam()
|
self.get_success(self.store.do_next_background_update(100), by=0.1)
|
||||||
|
|
||||||
# This takes a while, so pump it a bunch of times to get through the
|
shares_private = self.get_users_who_share_private_rooms()
|
||||||
# sleep delays
|
public_users = self.get_users_in_public_rooms()
|
||||||
for i in range(10):
|
|
||||||
self.pump(1)
|
|
||||||
|
|
||||||
self.get_success(d)
|
# No users share rooms
|
||||||
|
self.assertEqual(public_users, [])
|
||||||
|
self.assertEqual(self._compress_shared(shares_private), set([]))
|
||||||
|
|
||||||
# Despite not sharing a room, search_all_users means we get a search
|
# Despite not sharing a room, search_all_users means we get a search
|
||||||
# result.
|
# result.
|
||||||
s = self.get_success(self.handler.search_users(u1, u3, 10))
|
s = self.get_success(self.handler.search_users(u1, u3, 10))
|
||||||
self.assertEqual(len(s["results"]), 1)
|
self.assertEqual(len(s["results"]), 1)
|
||||||
|
|
||||||
|
# We can find the other two users
|
||||||
|
s = self.get_success(self.handler.search_users(u1, "user", 10))
|
||||||
|
self.assertEqual(len(s["results"]), 2)
|
||||||
|
|
||||||
|
# Registering a user and then searching for them works.
|
||||||
|
u4 = self.register_user("user4", "pass")
|
||||||
|
s = self.get_success(self.handler.search_users(u1, u4, 10))
|
||||||
|
self.assertEqual(len(s["results"]), 1)
|
||||||
|
@ -16,7 +16,6 @@
|
|||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
|
|
||||||
from synapse.storage import UserDirectoryStore
|
from synapse.storage import UserDirectoryStore
|
||||||
from synapse.storage.roommember import ProfileInfo
|
|
||||||
|
|
||||||
from tests import unittest
|
from tests import unittest
|
||||||
from tests.utils import setup_test_homeserver
|
from tests.utils import setup_test_homeserver
|
||||||
@ -34,13 +33,9 @@ class UserDirectoryStoreTestCase(unittest.TestCase):
|
|||||||
|
|
||||||
# alice and bob are both in !room_id. bobby is not but shares
|
# alice and bob are both in !room_id. bobby is not but shares
|
||||||
# a homeserver with alice.
|
# a homeserver with alice.
|
||||||
yield self.store.add_profiles_to_user_dir(
|
yield self.store.update_profile_in_user_dir(ALICE, "alice", None)
|
||||||
{
|
yield self.store.update_profile_in_user_dir(BOB, "bob", None)
|
||||||
ALICE: ProfileInfo(None, "alice"),
|
yield self.store.update_profile_in_user_dir(BOBBY, "bobby", None)
|
||||||
BOB: ProfileInfo(None, "bob"),
|
|
||||||
BOBBY: ProfileInfo(None, "bobby"),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
yield self.store.add_users_in_public_rooms(
|
yield self.store.add_users_in_public_rooms(
|
||||||
"!room:id", (ALICE, BOB)
|
"!room:id", (ALICE, BOB)
|
||||||
)
|
)
|
||||||
|
@ -330,10 +330,10 @@ class HomeserverTestCase(TestCase):
|
|||||||
"""
|
"""
|
||||||
self.reactor.pump([by] * 100)
|
self.reactor.pump([by] * 100)
|
||||||
|
|
||||||
def get_success(self, d):
|
def get_success(self, d, by=0.0):
|
||||||
if not isinstance(d, Deferred):
|
if not isinstance(d, Deferred):
|
||||||
return d
|
return d
|
||||||
self.pump()
|
self.pump(by=by)
|
||||||
return self.successResultOf(d)
|
return self.successResultOf(d)
|
||||||
|
|
||||||
def register_user(self, username, password, admin=False):
|
def register_user(self, username, password, admin=False):
|
||||||
|
Loading…
Reference in New Issue
Block a user