Browse Source

Use the simple dictionary in full text search for the user directory (#8959)

* Use the simple dictionary in fts for the user directory

* Clarify naming
Brendan Abolivier 3 years ago
parent
commit
f2783fc201

+ 1 - 0
changelog.d/8959.bugfix

@@ -0,0 +1 @@
+Fix a bug causing common English words to not be considered for a user directory search.

+ 12 - 12
synapse/storage/databases/main/user_directory.py

@@ -396,9 +396,9 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                     sql = """
                         INSERT INTO user_directory_search(user_id, vector)
                         VALUES (?,
-                            setweight(to_tsvector('english', ?), 'A')
-                            || setweight(to_tsvector('english', ?), 'D')
-                            || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
+                            setweight(to_tsvector('simple', ?), 'A')
+                            || setweight(to_tsvector('simple', ?), 'D')
+                            || setweight(to_tsvector('simple', COALESCE(?, '')), 'B')
                         ) ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector
                     """
                     txn.execute(
@@ -418,9 +418,9 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                         sql = """
                             INSERT INTO user_directory_search(user_id, vector)
                             VALUES (?,
-                                setweight(to_tsvector('english', ?), 'A')
-                                || setweight(to_tsvector('english', ?), 'D')
-                                || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
+                                setweight(to_tsvector('simple', ?), 'A')
+                                || setweight(to_tsvector('simple', ?), 'D')
+                                || setweight(to_tsvector('simple', COALESCE(?, '')), 'B')
                             )
                         """
                         txn.execute(
@@ -435,9 +435,9 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                     elif new_entry is False:
                         sql = """
                             UPDATE user_directory_search
-                            SET vector = setweight(to_tsvector('english', ?), 'A')
-                                || setweight(to_tsvector('english', ?), 'D')
-                                || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
+                            SET vector = setweight(to_tsvector('simple', ?), 'A')
+                                || setweight(to_tsvector('simple', ?), 'D')
+                                || setweight(to_tsvector('simple', COALESCE(?, '')), 'B')
                             WHERE user_id = ?
                         """
                         txn.execute(
@@ -764,7 +764,7 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                 INNER JOIN user_directory AS d USING (user_id)
                 WHERE
                     %s
-                    AND vector @@ to_tsquery('english', ?)
+                    AND vector @@ to_tsquery('simple', ?)
                 ORDER BY
                     (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END)
                     * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END)
@@ -773,13 +773,13 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                         3 * ts_rank_cd(
                             '{0.1, 0.1, 0.9, 1.0}',
                             vector,
-                            to_tsquery('english', ?),
+                            to_tsquery('simple', ?),
                             8
                         )
                         + ts_rank_cd(
                             '{0.1, 0.1, 0.9, 1.0}',
                             vector,
-                            to_tsquery('english', ?),
+                            to_tsquery('simple', ?),
                             8
                         )
                     )

+ 23 - 0
tests/storage/test_user_directory.py

@@ -21,6 +21,8 @@ from tests.utils import setup_test_homeserver
 ALICE = "@alice:a"
 BOB = "@bob:b"
 BOBBY = "@bobby:a"
+# The localpart isn't 'Bela' on purpose so we can test looking up display names.
+BELA = "@somenickname:a"
 
 
 class UserDirectoryStoreTestCase(unittest.TestCase):
@@ -40,6 +42,9 @@ class UserDirectoryStoreTestCase(unittest.TestCase):
         yield defer.ensureDeferred(
             self.store.update_profile_in_user_dir(BOBBY, "bobby", None)
         )
+        yield defer.ensureDeferred(
+            self.store.update_profile_in_user_dir(BELA, "Bela", None)
+        )
         yield defer.ensureDeferred(
             self.store.add_users_in_public_rooms("!room:id", (ALICE, BOB))
         )
@@ -72,3 +77,21 @@ class UserDirectoryStoreTestCase(unittest.TestCase):
             )
         finally:
             self.hs.config.user_directory_search_all_users = False
+
+    @defer.inlineCallbacks
+    def test_search_user_dir_stop_words(self):
+        """Tests that a user can look up another user by searching for the start if its
+        display name even if that name happens to be a common English word that would
+        usually be ignored in full text searches.
+        """
+        self.hs.config.user_directory_search_all_users = True
+        try:
+            r = yield defer.ensureDeferred(self.store.search_user_dir(ALICE, "be", 10))
+            self.assertFalse(r["limited"])
+            self.assertEqual(1, len(r["results"]))
+            self.assertDictEqual(
+                r["results"][0],
+                {"user_id": BELA, "display_name": "Bela", "avatar_url": None},
+            )
+        finally:
+            self.hs.config.user_directory_search_all_users = False