From 47d99a20d5b4ae58ece15be8a43e96e39a9b943e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 29 Nov 2017 16:46:45 +0000 Subject: [PATCH 01/14] Add user_directory_include_pattern config param to expand search results to additional users Initial commit; this doesn't work yet - the LIKE filtering seems too aggressive. It also needs _do_initial_spam to be aware of prepopulating the whole user_directory_search table with all users... ...and it needs a handle_user_signup() or something to be added so that new signups get incrementally added to the table too. Committing it here as a WIP --- synapse/config/homeserver.py | 3 ++- synapse/config/user_directory.py | 40 ++++++++++++++++++++++++++++++ synapse/handlers/user_directory.py | 4 +-- synapse/server.py | 4 +-- synapse/storage/user_directory.py | 16 +++++++++--- 5 files changed, 58 insertions(+), 9 deletions(-) create mode 100644 synapse/config/user_directory.py diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 05e242aef..bf19cfee2 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -36,6 +36,7 @@ from .workers import WorkerConfig from .push import PushConfig from .spam_checker import SpamCheckerConfig from .groups import GroupsConfig +from .user_directory import UserDirectoryConfig class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, @@ -44,7 +45,7 @@ class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, AppServiceConfig, KeyConfig, SAML2Config, CasConfig, JWTConfig, PasswordConfig, EmailConfig, WorkerConfig, PasswordAuthProviderConfig, PushConfig, - SpamCheckerConfig, GroupsConfig,): + SpamCheckerConfig, GroupsConfig, UserDirectoryConfig,): pass diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py new file mode 100644 index 000000000..03fb2090c --- /dev/null +++ b/synapse/config/user_directory.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# Copyright 2017 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import Config + + +class UserDirectoryConfig(Config): + """User Directory Configuration + Configuration for the behaviour of the /user_directory API + """ + + def read_config(self, config): + self.user_directory_include_pattern = "%" + user_directory_config = config.get("user_directory", None) + if user_directory_config: + self.user_directory_include_pattern = ( + user_directory_config.get("include_pattern", "%") + ) + + def default_config(self, config_dir_path, server_name, **kwargs): + return """ + # User Directory configuration + # 'include_pattern' defines an optional SQL LIKE pattern when querying the + # user directory in addition to publicly visible users. Defaults to "%%" + # + #user_directory: + # include_pattern: "%%:%s" + """ % (server_name) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index b5be5d962..51f8340df 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -25,7 +25,7 @@ from synapse.util.async import sleep logger = logging.getLogger(__name__) -class UserDirectoyHandler(object): +class UserDirectoryHandler(object): """Handles querying of and keeping updated the user_directory. N.B.: ASSUMES IT IS THE ONLY THING THAT MODIFIES THE USER DIRECTORY @@ -389,7 +389,7 @@ class UserDirectoyHandler(object): """Called when we might need to add user to directory Args: - room_id (str): room_id that user joined or started being public that + room_id (str): room_id that user joined or started being public user_id (str) """ logger.debug("Adding user to dir, %r", user_id) diff --git a/synapse/server.py b/synapse/server.py index 10e3e9a4f..6de33019d 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -50,7 +50,7 @@ from synapse.handlers.events import EventHandler, EventStreamHandler from synapse.handlers.initial_sync import InitialSyncHandler from synapse.handlers.receipts import ReceiptsHandler from synapse.handlers.read_marker import ReadMarkerHandler -from synapse.handlers.user_directory import UserDirectoyHandler +from synapse.handlers.user_directory import UserDirectoryHandler from synapse.handlers.groups_local import GroupsLocalHandler from synapse.handlers.profile import ProfileHandler from synapse.groups.groups_server import GroupsServerHandler @@ -321,7 +321,7 @@ class HomeServer(object): return ActionGenerator(self) def build_user_directory_handler(self): - return UserDirectoyHandler(self) + return UserDirectoryHandler(self) def build_groups_local_handler(self): return GroupsLocalHandler(self) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 5dc5b9582..1022cdf7d 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -629,6 +629,10 @@ class UserDirectoryStore(SQLBaseStore): ] } """ + + include_pattern = self.hs.config.user_directory_include_pattern or "%"; + logger.error("include pattern is %s" % (include_pattern)) + if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) @@ -647,7 +651,9 @@ class UserDirectoryStore(SQLBaseStore): WHERE user_id = ? AND share_private ) AS s USING (user_id) WHERE - (s.user_id IS NOT NULL OR p.user_id IS NOT NULL) + (s.user_id IS NOT NULL OR + p.user_id IS NOT NULL OR + d.user_id LIKE ?) AND vector @@ to_tsquery('english', ?) ORDER BY (CASE WHEN s.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) @@ -672,7 +678,7 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ - args = (user_id, full_query, exact_query, prefix_query, limit + 1,) + args = (user_id, include_pattern, full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -686,7 +692,9 @@ class UserDirectoryStore(SQLBaseStore): WHERE user_id = ? AND share_private ) AS s USING (user_id) WHERE - (s.user_id IS NOT NULL OR p.user_id IS NOT NULL) + (s.user_id IS NOT NULL OR + p.user_id IS NOT NULL OR + d.user_id LIKE ?) AND value MATCH ? ORDER BY rank(matchinfo(user_directory_search)) DESC, @@ -694,7 +702,7 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ - args = (user_id, search_query, limit + 1) + args = (user_id, include_pattern, search_query, limit + 1) else: # This should be unreachable. raise Exception("Unrecognized database engine") From 3241c7aac3dc114a6abce46e5d241f42ed35a7fe Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 29 Nov 2017 18:27:05 +0000 Subject: [PATCH 02/14] untested WIP but might actually work --- synapse/config/user_directory.py | 5 ++-- synapse/handlers/profile.py | 14 +++++++++ synapse/handlers/user_directory.py | 46 +++++++++++++++++++++++++++--- synapse/storage/_base.py | 2 +- synapse/storage/profile.py | 14 +++++++++ synapse/storage/user_directory.py | 35 +++++++++++++++-------- 6 files changed, 96 insertions(+), 20 deletions(-) diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index 03fb2090c..86177da40 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -22,18 +22,17 @@ class UserDirectoryConfig(Config): """ def read_config(self, config): - self.user_directory_include_pattern = "%" user_directory_config = config.get("user_directory", None) if user_directory_config: self.user_directory_include_pattern = ( - user_directory_config.get("include_pattern", "%") + user_directory_config.get("include_pattern", None) ) def default_config(self, config_dir_path, server_name, **kwargs): return """ # User Directory configuration # 'include_pattern' defines an optional SQL LIKE pattern when querying the - # user directory in addition to publicly visible users. Defaults to "%%" + # user directory in addition to publicly visible users. Defaults to None. # #user_directory: # include_pattern: "%%:%s" diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 5e5b1952d..0a394a10b 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -36,6 +36,8 @@ class ProfileHandler(BaseHandler): "profile", self.on_profile_query ) + self.user_directory_handler = hs.get_user_directory_handler() + self.clock.looping_call(self._update_remote_profile_cache, self.PROFILE_UPDATE_MS) @defer.inlineCallbacks @@ -139,6 +141,12 @@ class ProfileHandler(BaseHandler): target_user.localpart, new_displayname ) + if self.hs.config.user_directory_include_pattern: + profile = yield self.store.get_profileinfo(target_user.localpart) + yield self.user_directory_handler.handle_local_profile_change( + target_user.to_string(), profile + ) + yield self._update_join_states(requester, target_user) @defer.inlineCallbacks @@ -183,6 +191,12 @@ class ProfileHandler(BaseHandler): target_user.localpart, new_avatar_url ) + if self.hs.config.user_directory_include_pattern: + profile = yield self.store.get_profileinfo(target_user.localpart) + yield self.user_directory_handler.handle_local_profile_change( + target_user.user_id, profile + ) + yield self._update_join_states(requester, target_user) @defer.inlineCallbacks diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 51f8340df..1769e8469 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -110,6 +110,15 @@ class UserDirectoryHandler(object): finally: self._is_processing = False + @defer.inlineCallbacks + def handle_local_profile_change(self, user_id, profile): + """Called to update index of our local user profiles when they change + irrespective of any rooms the user may be in. + """ + yield self.store.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url, + ) + @defer.inlineCallbacks def _unsafe_process(self): # If self.pos is None then means we haven't fetched it from DB @@ -148,16 +157,30 @@ class UserDirectoryHandler(object): room_ids = yield self.store.get_all_rooms() logger.info("Doing initial update of user directory. %d rooms", len(room_ids)) - num_processed_rooms = 1 + num_processed_rooms = 0 for room_id in room_ids: - logger.info("Handling room %d/%d", num_processed_rooms, len(room_ids)) + logger.info("Handling room %d/%d", num_processed_rooms+1, len(room_ids)) yield self._handle_initial_room(room_id) num_processed_rooms += 1 yield sleep(self.INITIAL_SLEEP_MS / 1000.) logger.info("Processed all rooms.") + if self.hs.config.user_directory_include_pattern: + logger.info("Doing initial update of user directory. %d users", len(user_ids)) + num_processed_users = 0 + user_ids = yield self.store.get_all_local_users() + for user_id in user_ids: + # We add profiles for all users even if they don't match the + # include pattern, just in case we want to change it in future + logger.info("Handling user %d/%d", num_processed_users+1, len(user_ids)) + yield self._handle_local_user(user_id) + num_processed_users += 1 + yield sleep(self.INITIAL_SLEEP_MS / 1000.) + + logger.info("Processed all users") + self.initially_handled_users = None self.initially_handled_users_in_public = None self.initially_handled_users_share = None @@ -384,6 +407,21 @@ class UserDirectoryHandler(object): for user_id in users: yield self._handle_remove_user(room_id, user_id) + @defer.inlineCallbacks + def _handle_local_user(self, user_id): + """Adds a new local roomless user into the user_directory_search table. + Used to populate up the user index when we have an + user_directory_include_pattern specified. + """ + logger.debug("Adding new local user to dir, %r", user_id) + + profile = yield self.store.get_profileinfo(user_id) + + row = yield self.store.get_user_in_directory(user_id) + if not row: + yield self.store.add_profiles_to_user_dir(None, {user_id: profile}) + + @defer.inlineCallbacks def _handle_new_user(self, room_id, user_id, profile): """Called when we might need to add user to directory @@ -392,7 +430,7 @@ class UserDirectoryHandler(object): room_id (str): room_id that user joined or started being public user_id (str) """ - logger.debug("Adding user to dir, %r", user_id) + logger.debug("Adding new user to dir, %r", user_id) row = yield self.store.get_user_in_directory(user_id) if not row: @@ -407,7 +445,7 @@ class UserDirectoryHandler(object): if not row: yield self.store.add_users_to_public_room(room_id, [user_id]) else: - logger.debug("Not adding user to public dir, %r", user_id) + logger.debug("Not adding new user to public dir, %r", user_id) # Now we update users who share rooms with users. We do this by getting # all the current users in the room and seeing which aren't already diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index e6eefdd6f..0fdf49a2f 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -547,7 +547,7 @@ class SQLBaseStore(object): def _simple_select_one(self, table, keyvalues, retcols, allow_none=False, desc="_simple_select_one"): """Executes a SELECT query on the named table, which is expected to - return a single row, returning a single column from it. + return a single row, returning multiple columns from it. Args: table : string giving the table name diff --git a/synapse/storage/profile.py b/synapse/storage/profile.py index beea3102f..484ad59b6 100644 --- a/synapse/storage/profile.py +++ b/synapse/storage/profile.py @@ -15,6 +15,8 @@ from twisted.internet import defer +from synapse.storage.roommember import ProfileInfo + from ._base import SQLBaseStore @@ -26,6 +28,18 @@ class ProfileStore(SQLBaseStore): desc="create_profile", ) + def get_profileinfo(self, user_localpart): + profile = self._simple_select_one( + table="profiles", + keyvalues={"user_id": user_localpart}, + retcols=("displayname", "avatar_url"), + desc="get_profileinfo", + ) + return ProfileInfo( + avatar_url=profile.avatar_url, + displayname=profile.displayname, + ) + def get_profile_displayname(self, user_localpart): return self._simple_select_one_onecol( table="profiles", diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 1022cdf7d..1cc73e387 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -164,7 +164,7 @@ class UserDirectoryStore(SQLBaseStore): ) if isinstance(self.database_engine, PostgresEngine): - # We weight the loclpart most highly, then display name and finally + # We weight the localpart most highly, then display name and finally # server name if new_entry: sql = """ @@ -317,6 +317,16 @@ class UserDirectoryStore(SQLBaseStore): rows = yield self._execute("get_all_rooms", None, sql) defer.returnValue([room_id for room_id, in rows]) + @defer.inlineCallbacks + def get_all_local_users(self): + """Get all local users + """ + sql = """ + SELECT name FROM users + """ + rows = yield self._execute("get_all_local_users", None, sql) + defer.returnValue([name for name, in rows]) + def add_users_who_share_room(self, room_id, share_private, user_id_tuples): """Insert entries into the users_who_share_rooms table. The first user should be a local user. @@ -630,7 +640,12 @@ class UserDirectoryStore(SQLBaseStore): } """ - include_pattern = self.hs.config.user_directory_include_pattern or "%"; + include_pattern_clause = "" + if self.hs.config.user_directory_include_pattern: + include_pattern_clause = "OR d.user_id LIKE '%s'" % ( + self.hs.config.user_directory_include_pattern + ) + logger.error("include pattern is %s" % (include_pattern)) if isinstance(self.database_engine, PostgresEngine): @@ -651,9 +666,7 @@ class UserDirectoryStore(SQLBaseStore): WHERE user_id = ? AND share_private ) AS s USING (user_id) WHERE - (s.user_id IS NOT NULL OR - p.user_id IS NOT NULL OR - d.user_id LIKE ?) + (s.user_id IS NOT NULL OR p.user_id IS NOT NULL %s) AND vector @@ to_tsquery('english', ?) ORDER BY (CASE WHEN s.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) @@ -677,8 +690,8 @@ class UserDirectoryStore(SQLBaseStore): display_name IS NULL, avatar_url IS NULL LIMIT ? - """ - args = (user_id, include_pattern, full_query, exact_query, prefix_query, limit + 1,) + """ % ( include_pattern_clause ) + args = (user_id, full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -692,17 +705,15 @@ class UserDirectoryStore(SQLBaseStore): WHERE user_id = ? AND share_private ) AS s USING (user_id) WHERE - (s.user_id IS NOT NULL OR - p.user_id IS NOT NULL OR - d.user_id LIKE ?) + (s.user_id IS NOT NULL OR p.user_id IS NOT NULL %s) AND value MATCH ? ORDER BY rank(matchinfo(user_directory_search)) DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? - """ - args = (user_id, include_pattern, search_query, limit + 1) + """ % ( include_pattern_clause ) + args = (user_id, search_query, limit + 1) else: # This should be unreachable. raise Exception("Unrecognized database engine") From cd3697e8b781a79b91a9d0c3cb8cc3201e8f3bc8 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 29 Nov 2017 18:33:34 +0000 Subject: [PATCH 03/14] kick the user_directory index when new users register --- synapse/handlers/register.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index f6e7e5856..5db106dfc 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -38,6 +38,7 @@ class RegistrationHandler(BaseHandler): self.auth = hs.get_auth() self._auth_handler = hs.get_auth_handler() self.profile_handler = hs.get_profile_handler() + self.user_directory_handler = hs.get_user_directory_handler() self.captcha_client = CaptchaServerHttpClient(hs) self._next_generated_user_id = None @@ -165,6 +166,13 @@ class RegistrationHandler(BaseHandler): ), admin=admin, ) + + if self.hs.config.user_directory_include_pattern: + profile = yield self.store.get_profileinfo(localpart) + yield self.user_directory_handler.handle_local_profile_change( + user_id, profile + ) + else: # autogen a sequential user ID attempts = 0 From a4bb133b6880c6adffce5feef3681504730cd484 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 30 Nov 2017 01:17:15 +0000 Subject: [PATCH 04/14] fix thinkos galore --- synapse/handlers/user_directory.py | 10 ++++++---- synapse/storage/profile.py | 31 +++++++++++++++++++++--------- synapse/storage/user_directory.py | 20 ++++++++++++------- 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 1769e8469..b902f5d4e 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -20,6 +20,7 @@ from synapse.api.constants import EventTypes, JoinRules, Membership from synapse.storage.roommember import ProfileInfo from synapse.util.metrics import Measure from synapse.util.async import sleep +from synapse.types import get_localpart_from_id logger = logging.getLogger(__name__) @@ -53,6 +54,7 @@ class UserDirectoryHandler(object): self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.update_user_directory = hs.config.update_user_directory + self.include_pattern = hs.config.user_directory_include_pattern # When start up for the first time we need to populate the user_directory. # This is a set of user_id's we've inserted already @@ -116,7 +118,7 @@ class UserDirectoryHandler(object): irrespective of any rooms the user may be in. """ yield self.store.update_profile_in_user_dir( - user_id, profile.display_name, profile.avatar_url, + user_id, profile.display_name, profile.avatar_url, None, ) @defer.inlineCallbacks @@ -167,10 +169,10 @@ class UserDirectoryHandler(object): logger.info("Processed all rooms.") - if self.hs.config.user_directory_include_pattern: - logger.info("Doing initial update of user directory. %d users", len(user_ids)) + if self.include_pattern: num_processed_users = 0 user_ids = yield self.store.get_all_local_users() + logger.info("Doing initial update of user directory. %d users", len(user_ids)) for user_id in user_ids: # We add profiles for all users even if they don't match the # include pattern, just in case we want to change it in future @@ -415,7 +417,7 @@ class UserDirectoryHandler(object): """ logger.debug("Adding new local user to dir, %r", user_id) - profile = yield self.store.get_profileinfo(user_id) + profile = yield self.store.get_profileinfo(get_localpart_from_id(user_id)) row = yield self.store.get_user_in_directory(user_id) if not row: diff --git a/synapse/storage/profile.py b/synapse/storage/profile.py index 484ad59b6..4e7b7c253 100644 --- a/synapse/storage/profile.py +++ b/synapse/storage/profile.py @@ -16,6 +16,7 @@ from twisted.internet import defer from synapse.storage.roommember import ProfileInfo +from synapse.api.errors import StoreError from ._base import SQLBaseStore @@ -28,16 +29,28 @@ class ProfileStore(SQLBaseStore): desc="create_profile", ) + @defer.inlineCallbacks def get_profileinfo(self, user_localpart): - profile = self._simple_select_one( - table="profiles", - keyvalues={"user_id": user_localpart}, - retcols=("displayname", "avatar_url"), - desc="get_profileinfo", - ) - return ProfileInfo( - avatar_url=profile.avatar_url, - displayname=profile.displayname, + try: + profile = yield self._simple_select_one( + table="profiles", + keyvalues={"user_id": user_localpart}, + retcols=("displayname", "avatar_url"), + desc="get_profileinfo", + ) + except StoreError, e: + if e.code == 404: + # no match + defer.returnValue(ProfileInfo(None, None)) + return + else: + raise + + defer.returnValue( + ProfileInfo( + avatar_url=profile['avatar_url'], + display_name=profile['displayname'], + ) ) def get_profile_displayname(self, user_localpart): diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 1cc73e387..d8c965712 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -640,13 +640,17 @@ class UserDirectoryStore(SQLBaseStore): } """ - include_pattern_clause = "" + include_pattern_join = "" + include_pattern_where_clause = "" if self.hs.config.user_directory_include_pattern: - include_pattern_clause = "OR d.user_id LIKE '%s'" % ( - self.hs.config.user_directory_include_pattern - ) + include_pattern_join = """ + LEFT JOIN ( + SELECT user_id FROM user_directory + WHERE user_id LIKE '%s' + ) AS ld USING (user_id) + """ % ( self.hs.config.user_directory_include_pattern ) - logger.error("include pattern is %s" % (include_pattern)) + include_pattern_where_clause = "OR ld.user_id IS NOT NULL" if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) @@ -665,6 +669,7 @@ class UserDirectoryStore(SQLBaseStore): SELECT other_user_id AS user_id FROM users_who_share_rooms WHERE user_id = ? AND share_private ) AS s USING (user_id) + %s WHERE (s.user_id IS NOT NULL OR p.user_id IS NOT NULL %s) AND vector @@ to_tsquery('english', ?) @@ -690,7 +695,7 @@ class UserDirectoryStore(SQLBaseStore): display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( include_pattern_clause ) + """ % ( include_pattern_join, include_pattern_where_clause ) args = (user_id, full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -704,6 +709,7 @@ class UserDirectoryStore(SQLBaseStore): SELECT other_user_id AS user_id FROM users_who_share_rooms WHERE user_id = ? AND share_private ) AS s USING (user_id) + %s WHERE (s.user_id IS NOT NULL OR p.user_id IS NOT NULL %s) AND value MATCH ? @@ -712,7 +718,7 @@ class UserDirectoryStore(SQLBaseStore): display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( include_pattern_clause ) + """ % ( include_pattern_join, include_pattern_where_clause ) args = (user_id, search_query, limit + 1) else: # This should be unreachable. From 4b1fceb913dbcd7472346e2d31c6cd4d99424f9a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 30 Nov 2017 01:34:03 +0000 Subject: [PATCH 05/14] fix alternation operator for FTS4 - how did this ever work!? --- synapse/storage/user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index d8c965712..44b7d8906 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -748,7 +748,7 @@ def _parse_query_sqlite(search_term): # Pull out the individual words, discarding any non-word characters. results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) - return " & ".join("(%s* | %s)" % (result, result,) for result in results) + return " & ".join("(%s* OR %s)" % (result, result,) for result in results) def _parse_query_postgres(search_term): From f61e107f6351d0b5d0de76ec60912be915a7108c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 30 Nov 2017 01:43:50 +0000 Subject: [PATCH 06/14] remove null constraint on user_dir.room_id --- .../delta/46/user_dir_null_room_ids.sql | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 synapse/storage/schema/delta/46/user_dir_null_room_ids.sql diff --git a/synapse/storage/schema/delta/46/user_dir_null_room_ids.sql b/synapse/storage/schema/delta/46/user_dir_null_room_ids.sql new file mode 100644 index 000000000..cb0d5a257 --- /dev/null +++ b/synapse/storage/schema/delta/46/user_dir_null_room_ids.sql @@ -0,0 +1,35 @@ +/* Copyright 2017 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- change the user_directory table to also cover global local user profiles +-- rather than just profiles within specific rooms. + +CREATE TABLE user_directory2 ( + user_id TEXT NOT NULL, + room_id TEXT, + display_name TEXT, + avatar_url TEXT +); + +INSERT INTO user_directory2(user_id, room_id, display_name, avatar_url) + SELECT user_id, room_id, display_name, avatar_url from user_directory; + +DROP TABLE user_directory; +ALTER TABLE user_directory2 RENAME TO user_directory; + +-- create indexes after doing the inserts because that's more efficient. +-- it also means we can give it the same name as the old one without renaming. +CREATE INDEX user_directory_room_idx ON user_directory(room_id); +CREATE UNIQUE INDEX user_directory_user_idx ON user_directory(user_id); From 5406392f8b4790e36bb60758436b18594ae7a8e0 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 30 Nov 2017 01:45:34 +0000 Subject: [PATCH 07/14] specify default user_directory_include_pattern --- synapse/config/user_directory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index 86177da40..cf358fe8a 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -22,6 +22,7 @@ class UserDirectoryConfig(Config): """ def read_config(self, config): + self.user_directory_include_pattern = None user_directory_config = config.get("user_directory", None) if user_directory_config: self.user_directory_include_pattern = ( From 1bd40ca73e1a6dc83d0a6c96f52071553960b3a8 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 4 Dec 2017 14:58:39 +0000 Subject: [PATCH 08/14] switch to a simpler 'search_all_users' button as per review feedback --- synapse/config/user_directory.py | 15 +++++------ synapse/handlers/profile.py | 4 +-- synapse/handlers/register.py | 2 +- synapse/handlers/user_directory.py | 6 ++--- synapse/storage/user_directory.py | 40 ++++++++++++------------------ 5 files changed, 30 insertions(+), 37 deletions(-) diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index cf358fe8a..55b4e553e 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -22,19 +22,20 @@ class UserDirectoryConfig(Config): """ def read_config(self, config): - self.user_directory_include_pattern = None + self.user_directory_search_all_users = False user_directory_config = config.get("user_directory", None) if user_directory_config: - self.user_directory_include_pattern = ( - user_directory_config.get("include_pattern", None) + self.user_directory_search_all_users = ( + user_directory_config.get("search_all_users", False) ) def default_config(self, config_dir_path, server_name, **kwargs): return """ # User Directory configuration - # 'include_pattern' defines an optional SQL LIKE pattern when querying the - # user directory in addition to publicly visible users. Defaults to None. + # 'search_all_users' defines whether to search all users visible to your HS + # when searching the user directory, rather than limiting to users visible + # in public rooms. Defaults to false. # #user_directory: - # include_pattern: "%%:%s" - """ % (server_name) + # search_all_users: false + """ diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 0a394a10b..50e81a20e 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -141,7 +141,7 @@ class ProfileHandler(BaseHandler): target_user.localpart, new_displayname ) - if self.hs.config.user_directory_include_pattern: + if self.hs.config.user_directory_search_all_users: profile = yield self.store.get_profileinfo(target_user.localpart) yield self.user_directory_handler.handle_local_profile_change( target_user.to_string(), profile @@ -191,7 +191,7 @@ class ProfileHandler(BaseHandler): target_user.localpart, new_avatar_url ) - if self.hs.config.user_directory_include_pattern: + if self.hs.config.user_directory_search_all_users: profile = yield self.store.get_profileinfo(target_user.localpart) yield self.user_directory_handler.handle_local_profile_change( target_user.user_id, profile diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 5db106dfc..4bc6ef51f 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -167,7 +167,7 @@ class RegistrationHandler(BaseHandler): admin=admin, ) - if self.hs.config.user_directory_include_pattern: + if self.hs.config.user_directory_search_all_users: profile = yield self.store.get_profileinfo(localpart) yield self.user_directory_handler.handle_local_profile_change( user_id, profile diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index b902f5d4e..c1f8e20bf 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -54,7 +54,7 @@ class UserDirectoryHandler(object): self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.update_user_directory = hs.config.update_user_directory - self.include_pattern = hs.config.user_directory_include_pattern + self.search_all_users = hs.config.user_directory_search_all_users # When start up for the first time we need to populate the user_directory. # This is a set of user_id's we've inserted already @@ -169,7 +169,7 @@ class UserDirectoryHandler(object): logger.info("Processed all rooms.") - if self.include_pattern: + if self.search_all_users: num_processed_users = 0 user_ids = yield self.store.get_all_local_users() logger.info("Doing initial update of user directory. %d users", len(user_ids)) @@ -413,7 +413,7 @@ class UserDirectoryHandler(object): def _handle_local_user(self, user_id): """Adds a new local roomless user into the user_directory_search table. Used to populate up the user index when we have an - user_directory_include_pattern specified. + user_directory_search_all_users specified. """ logger.debug("Adding new local user to dir, %r", user_id) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 44b7d8906..f9d2f8e60 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -640,17 +640,19 @@ class UserDirectoryStore(SQLBaseStore): } """ - include_pattern_join = "" - include_pattern_where_clause = "" - if self.hs.config.user_directory_include_pattern: - include_pattern_join = """ - LEFT JOIN ( - SELECT user_id FROM user_directory - WHERE user_id LIKE '%s' - ) AS ld USING (user_id) - """ % ( self.hs.config.user_directory_include_pattern ) - include_pattern_where_clause = "OR ld.user_id IS NOT NULL" + if self.hs.config.user_directory_search_all_users: + join_clause = "" + where_clause = "?<>''" # naughty hack to keep the same number of binds + else: + join_clause = """ + LEFT JOIN users_in_public_rooms AS p USING (user_id) + LEFT JOIN ( + SELECT other_user_id AS user_id FROM users_who_share_rooms + WHERE user_id = ? AND share_private + ) AS s USING (user_id) + """ + where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) @@ -664,14 +666,9 @@ class UserDirectoryStore(SQLBaseStore): SELECT d.user_id, display_name, avatar_url FROM user_directory_search INNER JOIN user_directory AS d USING (user_id) - LEFT JOIN users_in_public_rooms AS p USING (user_id) - LEFT JOIN ( - SELECT other_user_id AS user_id FROM users_who_share_rooms - WHERE user_id = ? AND share_private - ) AS s USING (user_id) %s WHERE - (s.user_id IS NOT NULL OR p.user_id IS NOT NULL %s) + %s AND vector @@ to_tsquery('english', ?) ORDER BY (CASE WHEN s.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) @@ -695,7 +692,7 @@ class UserDirectoryStore(SQLBaseStore): display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( include_pattern_join, include_pattern_where_clause ) + """ % ( join_clause, where_clause ) args = (user_id, full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -704,21 +701,16 @@ class UserDirectoryStore(SQLBaseStore): SELECT d.user_id, display_name, avatar_url FROM user_directory_search INNER JOIN user_directory AS d USING (user_id) - LEFT JOIN users_in_public_rooms AS p USING (user_id) - LEFT JOIN ( - SELECT other_user_id AS user_id FROM users_who_share_rooms - WHERE user_id = ? AND share_private - ) AS s USING (user_id) %s WHERE - (s.user_id IS NOT NULL OR p.user_id IS NOT NULL %s) + %s AND value MATCH ? ORDER BY rank(matchinfo(user_directory_search)) DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( include_pattern_join, include_pattern_where_clause ) + """ % ( join_clause, where_clause ) args = (user_id, search_query, limit + 1) else: # This should be unreachable. From 74e0cc74ceb48a8c55615180f67406b339c88a18 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 4 Dec 2017 15:11:38 +0000 Subject: [PATCH 09/14] fix pep8 and tests --- synapse/handlers/profile.py | 2 +- synapse/handlers/user_directory.py | 5 ++--- synapse/storage/user_directory.py | 7 +++---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 50e81a20e..9800e2445 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -194,7 +194,7 @@ class ProfileHandler(BaseHandler): if self.hs.config.user_directory_search_all_users: profile = yield self.store.get_profileinfo(target_user.localpart) yield self.user_directory_handler.handle_local_profile_change( - target_user.user_id, profile + target_user.to_string(), profile ) yield self._update_join_states(requester, target_user) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index c1f8e20bf..667e98a21 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -162,7 +162,7 @@ class UserDirectoryHandler(object): num_processed_rooms = 0 for room_id in room_ids: - logger.info("Handling room %d/%d", num_processed_rooms+1, len(room_ids)) + logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids)) yield self._handle_initial_room(room_id) num_processed_rooms += 1 yield sleep(self.INITIAL_SLEEP_MS / 1000.) @@ -176,7 +176,7 @@ class UserDirectoryHandler(object): for user_id in user_ids: # We add profiles for all users even if they don't match the # include pattern, just in case we want to change it in future - logger.info("Handling user %d/%d", num_processed_users+1, len(user_ids)) + logger.info("Handling user %d/%d", num_processed_users + 1, len(user_ids)) yield self._handle_local_user(user_id) num_processed_users += 1 yield sleep(self.INITIAL_SLEEP_MS / 1000.) @@ -423,7 +423,6 @@ class UserDirectoryHandler(object): if not row: yield self.store.add_profiles_to_user_dir(None, {user_id: profile}) - @defer.inlineCallbacks def _handle_new_user(self, room_id, user_id, profile): """Called when we might need to add user to directory diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index f9d2f8e60..c9bff408e 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -640,10 +640,9 @@ class UserDirectoryStore(SQLBaseStore): } """ - if self.hs.config.user_directory_search_all_users: join_clause = "" - where_clause = "?<>''" # naughty hack to keep the same number of binds + where_clause = "?<>''" # naughty hack to keep the same number of binds else: join_clause = """ LEFT JOIN users_in_public_rooms AS p USING (user_id) @@ -692,7 +691,7 @@ class UserDirectoryStore(SQLBaseStore): display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( join_clause, where_clause ) + """ % (join_clause, where_clause) args = (user_id, full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -710,7 +709,7 @@ class UserDirectoryStore(SQLBaseStore): display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( join_clause, where_clause ) + """ % (join_clause, where_clause) args = (user_id, search_query, limit + 1) else: # This should be unreachable. From 95f8a713dc614459421288188723fab79636c3b1 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 4 Dec 2017 16:56:25 +0000 Subject: [PATCH 10/14] erik told me to --- tests/handlers/test_typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index dbe50383d..8c6815bcd 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -58,7 +58,7 @@ class TypingNotificationsTestCase(unittest.TestCase): self.mock_federation_resource = MockHttpResource() - mock_notifier = Mock(spec=["on_new_event"]) + mock_notifier = Mock() self.on_new_event = mock_notifier.on_new_event self.auth = Mock(spec=[]) From 7b86c1fdcdb71cde499e634e97685dfc47d3e0d0 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 4 Dec 2017 17:10:03 +0000 Subject: [PATCH 11/14] try make tests work a bit more... --- tests/handlers/test_typing.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 8c6815bcd..fcd380b03 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -76,6 +76,9 @@ class TypingNotificationsTestCase(unittest.TestCase): "set_received_txn_response", "get_destination_retry_timings", "get_devices_by_remote", + # Bits that user_directory needs + "get_user_directory_stream_pos", + "get_current_state_deltas", ]), state_handler=self.state_handler, handlers=None, @@ -122,6 +125,15 @@ class TypingNotificationsTestCase(unittest.TestCase): return set(str(u) for u in self.room_members) self.state_handler.get_current_user_in_room = get_current_user_in_room + self.datastore.get_user_directory_stream_pos.return_value = ( + # we deliberately return a non-None stream pos to avoid doing an initial_spam + defer.succeed(1) + ) + + self.datastore.get_current_state_deltas.return_value = ( + None + ) + self.auth.check_joined_room = check_joined_room self.datastore.get_to_device_stream_token = lambda: 0 From b11dca20252af94cfad900bf7aa1915844cb07b1 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 4 Dec 2017 17:51:33 +0000 Subject: [PATCH 12/14] better doc --- synapse/config/user_directory.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index 55b4e553e..38e894784 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -32,9 +32,12 @@ class UserDirectoryConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): return """ # User Directory configuration + # # 'search_all_users' defines whether to search all users visible to your HS # when searching the user directory, rather than limiting to users visible - # in public rooms. Defaults to false. + # in public rooms. Defaults to false. If you set it True, you'll have to run + # UPDATE user_directory_stream_pos SET stream_id = NULL; + # on your database to tell it to rebuild the user_directory search indexes. # #user_directory: # search_all_users: false From c22e73293a7c7f26e90385b4e85baffa1356a9b5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 4 Dec 2017 18:05:28 +0000 Subject: [PATCH 13/14] speed up the rate of initial spam for users --- synapse/handlers/user_directory.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 667e98a21..714f0195c 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -42,9 +42,10 @@ class UserDirectoryHandler(object): one public room. """ - INITIAL_SLEEP_MS = 50 - INITIAL_SLEEP_COUNT = 100 - INITIAL_BATCH_SIZE = 100 + INITIAL_ROOM_SLEEP_MS = 50 + INITIAL_ROOM_SLEEP_COUNT = 100 + INITIAL_ROOM_BATCH_SIZE = 100 + INITIAL_USER_SLEEP_MS = 10 def __init__(self, hs): self.store = hs.get_datastore() @@ -165,7 +166,7 @@ class UserDirectoryHandler(object): logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids)) yield self._handle_initial_room(room_id) num_processed_rooms += 1 - yield sleep(self.INITIAL_SLEEP_MS / 1000.) + yield sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) logger.info("Processed all rooms.") @@ -179,7 +180,7 @@ class UserDirectoryHandler(object): logger.info("Handling user %d/%d", num_processed_users + 1, len(user_ids)) yield self._handle_local_user(user_id) num_processed_users += 1 - yield sleep(self.INITIAL_SLEEP_MS / 1000.) + yield sleep(self.INITIAL_USER_SLEEP_MS / 1000.) logger.info("Processed all users") @@ -226,8 +227,8 @@ class UserDirectoryHandler(object): to_update = set() count = 0 for user_id in user_ids: - if count % self.INITIAL_SLEEP_COUNT == 0: - yield sleep(self.INITIAL_SLEEP_MS / 1000.) + if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: + yield sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) if not self.is_mine_id(user_id): count += 1 @@ -241,8 +242,8 @@ class UserDirectoryHandler(object): if user_id == other_user_id: continue - if count % self.INITIAL_SLEEP_COUNT == 0: - yield sleep(self.INITIAL_SLEEP_MS / 1000.) + if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: + yield sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) count += 1 user_set = (user_id, other_user_id) @@ -262,13 +263,13 @@ class UserDirectoryHandler(object): else: self.initially_handled_users_share_private_room.add(user_set) - if len(to_insert) > self.INITIAL_BATCH_SIZE: + if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: yield self.store.add_users_who_share_room( room_id, not is_public, to_insert, ) to_insert.clear() - if len(to_update) > self.INITIAL_BATCH_SIZE: + if len(to_update) > self.INITIAL_ROOM_BATCH_SIZE: yield self.store.update_users_who_share_room( room_id, not is_public, to_update, ) From cdc2cb5d11860f013c58923779a2226620453309 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 5 Dec 2017 11:09:31 +0000 Subject: [PATCH 14/14] fix StoreError syntax --- synapse/storage/profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/profile.py b/synapse/storage/profile.py index 4e7b7c253..ec02e73bc 100644 --- a/synapse/storage/profile.py +++ b/synapse/storage/profile.py @@ -38,7 +38,7 @@ class ProfileStore(SQLBaseStore): retcols=("displayname", "avatar_url"), desc="get_profileinfo", ) - except StoreError, e: + except StoreError as e: if e.code == 404: # no match defer.returnValue(ProfileInfo(None, None))