Use the simple dictionary in full text search for the user directory (#8959)

* Use the simple dictionary in fts for the user directory

* Clarify naming
This commit is contained in:
Brendan Abolivier 2020-12-17 14:42:30 +01:00 committed by GitHub
parent c07022303e
commit f2783fc201
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 36 additions and 12 deletions

1
changelog.d/8959.bugfix Normal file
View File

@ -0,0 +1 @@
Fix a bug causing common English words to not be considered for a user directory search.

View File

@ -396,9 +396,9 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
sql = """
INSERT INTO user_directory_search(user_id, vector)
VALUES (?,
setweight(to_tsvector('english', ?), 'A')
|| setweight(to_tsvector('english', ?), 'D')
|| setweight(to_tsvector('english', COALESCE(?, '')), 'B')
setweight(to_tsvector('simple', ?), 'A')
|| setweight(to_tsvector('simple', ?), 'D')
|| setweight(to_tsvector('simple', COALESCE(?, '')), 'B')
) ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector
"""
txn.execute(
@ -418,9 +418,9 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
sql = """
INSERT INTO user_directory_search(user_id, vector)
VALUES (?,
setweight(to_tsvector('english', ?), 'A')
|| setweight(to_tsvector('english', ?), 'D')
|| setweight(to_tsvector('english', COALESCE(?, '')), 'B')
setweight(to_tsvector('simple', ?), 'A')
|| setweight(to_tsvector('simple', ?), 'D')
|| setweight(to_tsvector('simple', COALESCE(?, '')), 'B')
)
"""
txn.execute(
@ -435,9 +435,9 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
elif new_entry is False:
sql = """
UPDATE user_directory_search
SET vector = setweight(to_tsvector('english', ?), 'A')
|| setweight(to_tsvector('english', ?), 'D')
|| setweight(to_tsvector('english', COALESCE(?, '')), 'B')
SET vector = setweight(to_tsvector('simple', ?), 'A')
|| setweight(to_tsvector('simple', ?), 'D')
|| setweight(to_tsvector('simple', COALESCE(?, '')), 'B')
WHERE user_id = ?
"""
txn.execute(
@ -764,7 +764,7 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
INNER JOIN user_directory AS d USING (user_id)
WHERE
%s
AND vector @@ to_tsquery('english', ?)
AND vector @@ to_tsquery('simple', ?)
ORDER BY
(CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END)
* (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END)
@ -773,13 +773,13 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
3 * ts_rank_cd(
'{0.1, 0.1, 0.9, 1.0}',
vector,
to_tsquery('english', ?),
to_tsquery('simple', ?),
8
)
+ ts_rank_cd(
'{0.1, 0.1, 0.9, 1.0}',
vector,
to_tsquery('english', ?),
to_tsquery('simple', ?),
8
)
)

View File

@ -21,6 +21,8 @@ from tests.utils import setup_test_homeserver
ALICE = "@alice:a"
BOB = "@bob:b"
BOBBY = "@bobby:a"
# The localpart isn't 'Bela' on purpose so we can test looking up display names.
BELA = "@somenickname:a"
class UserDirectoryStoreTestCase(unittest.TestCase):
@ -40,6 +42,9 @@ class UserDirectoryStoreTestCase(unittest.TestCase):
yield defer.ensureDeferred(
self.store.update_profile_in_user_dir(BOBBY, "bobby", None)
)
yield defer.ensureDeferred(
self.store.update_profile_in_user_dir(BELA, "Bela", None)
)
yield defer.ensureDeferred(
self.store.add_users_in_public_rooms("!room:id", (ALICE, BOB))
)
@ -72,3 +77,21 @@ class UserDirectoryStoreTestCase(unittest.TestCase):
)
finally:
self.hs.config.user_directory_search_all_users = False
@defer.inlineCallbacks
def test_search_user_dir_stop_words(self):
"""Tests that a user can look up another user by searching for the start if its
display name even if that name happens to be a common English word that would
usually be ignored in full text searches.
"""
self.hs.config.user_directory_search_all_users = True
try:
r = yield defer.ensureDeferred(self.store.search_user_dir(ALICE, "be", 10))
self.assertFalse(r["limited"])
self.assertEqual(1, len(r["results"]))
self.assertDictEqual(
r["results"][0],
{"user_id": BELA, "display_name": "Bela", "avatar_url": None},
)
finally:
self.hs.config.user_directory_search_all_users = False