From f2783fc201edaa49eafd8be06f8cda16ec1f3d95 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 17 Dec 2020 14:42:30 +0100 Subject: [PATCH] Use the simple dictionary in full text search for the user directory (#8959) * Use the simple dictionary in fts for the user directory * Clarify naming --- changelog.d/8959.bugfix | 1 + .../storage/databases/main/user_directory.py | 24 +++++++++---------- tests/storage/test_user_directory.py | 23 ++++++++++++++++++ 3 files changed, 36 insertions(+), 12 deletions(-) create mode 100644 changelog.d/8959.bugfix diff --git a/changelog.d/8959.bugfix b/changelog.d/8959.bugfix new file mode 100644 index 000000000..772818bae --- /dev/null +++ b/changelog.d/8959.bugfix @@ -0,0 +1 @@ +Fix a bug causing common English words to not be considered for a user directory search. diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index fc8caf46a..ef11f1c3b 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -396,9 +396,9 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): sql = """ INSERT INTO user_directory_search(user_id, vector) VALUES (?, - setweight(to_tsvector('english', ?), 'A') - || setweight(to_tsvector('english', ?), 'D') - || setweight(to_tsvector('english', COALESCE(?, '')), 'B') + setweight(to_tsvector('simple', ?), 'A') + || setweight(to_tsvector('simple', ?), 'D') + || setweight(to_tsvector('simple', COALESCE(?, '')), 'B') ) ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector """ txn.execute( @@ -418,9 +418,9 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): sql = """ INSERT INTO user_directory_search(user_id, vector) VALUES (?, - setweight(to_tsvector('english', ?), 'A') - || setweight(to_tsvector('english', ?), 'D') - || setweight(to_tsvector('english', COALESCE(?, '')), 'B') + setweight(to_tsvector('simple', ?), 'A') + || setweight(to_tsvector('simple', ?), 'D') + || setweight(to_tsvector('simple', COALESCE(?, '')), 'B') ) """ txn.execute( @@ -435,9 +435,9 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): elif new_entry is False: sql = """ UPDATE user_directory_search - SET vector = setweight(to_tsvector('english', ?), 'A') - || setweight(to_tsvector('english', ?), 'D') - || setweight(to_tsvector('english', COALESCE(?, '')), 'B') + SET vector = setweight(to_tsvector('simple', ?), 'A') + || setweight(to_tsvector('simple', ?), 'D') + || setweight(to_tsvector('simple', COALESCE(?, '')), 'B') WHERE user_id = ? """ txn.execute( @@ -764,7 +764,7 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): INNER JOIN user_directory AS d USING (user_id) WHERE %s - AND vector @@ to_tsquery('english', ?) + AND vector @@ to_tsquery('simple', ?) ORDER BY (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END) @@ -773,13 +773,13 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): 3 * ts_rank_cd( '{0.1, 0.1, 0.9, 1.0}', vector, - to_tsquery('english', ?), + to_tsquery('simple', ?), 8 ) + ts_rank_cd( '{0.1, 0.1, 0.9, 1.0}', vector, - to_tsquery('english', ?), + to_tsquery('simple', ?), 8 ) ) diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 738e91246..a6f63f4aa 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -21,6 +21,8 @@ from tests.utils import setup_test_homeserver ALICE = "@alice:a" BOB = "@bob:b" BOBBY = "@bobby:a" +# The localpart isn't 'Bela' on purpose so we can test looking up display names. +BELA = "@somenickname:a" class UserDirectoryStoreTestCase(unittest.TestCase): @@ -40,6 +42,9 @@ class UserDirectoryStoreTestCase(unittest.TestCase): yield defer.ensureDeferred( self.store.update_profile_in_user_dir(BOBBY, "bobby", None) ) + yield defer.ensureDeferred( + self.store.update_profile_in_user_dir(BELA, "Bela", None) + ) yield defer.ensureDeferred( self.store.add_users_in_public_rooms("!room:id", (ALICE, BOB)) ) @@ -72,3 +77,21 @@ class UserDirectoryStoreTestCase(unittest.TestCase): ) finally: self.hs.config.user_directory_search_all_users = False + + @defer.inlineCallbacks + def test_search_user_dir_stop_words(self): + """Tests that a user can look up another user by searching for the start if its + display name even if that name happens to be a common English word that would + usually be ignored in full text searches. + """ + self.hs.config.user_directory_search_all_users = True + try: + r = yield defer.ensureDeferred(self.store.search_user_dir(ALICE, "be", 10)) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual( + r["results"][0], + {"user_id": BELA, "display_name": "Bela", "avatar_url": None}, + ) + finally: + self.hs.config.user_directory_search_all_users = False