user_directory.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2017 Vector Creations Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import logging
  16. import re
  17. from six import iteritems
  18. from twisted.internet import defer
  19. from synapse.api.constants import EventTypes, JoinRules
  20. from synapse.storage.engines import PostgresEngine, Sqlite3Engine
  21. from synapse.types import get_domain_from_id, get_localpart_from_id
  22. from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
  23. from ._base import SQLBaseStore
  24. logger = logging.getLogger(__name__)
  25. class UserDirectoryStore(SQLBaseStore):
  26. @cachedInlineCallbacks(cache_context=True)
  27. def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context):
  28. """Check if the room is either world_readable or publically joinable
  29. """
  30. current_state_ids = yield self.get_current_state_ids(
  31. room_id, on_invalidate=cache_context.invalidate
  32. )
  33. join_rules_id = current_state_ids.get((EventTypes.JoinRules, ""))
  34. if join_rules_id:
  35. join_rule_ev = yield self.get_event(join_rules_id, allow_none=True)
  36. if join_rule_ev:
  37. if join_rule_ev.content.get("join_rule") == JoinRules.PUBLIC:
  38. defer.returnValue(True)
  39. hist_vis_id = current_state_ids.get((EventTypes.RoomHistoryVisibility, ""))
  40. if hist_vis_id:
  41. hist_vis_ev = yield self.get_event(hist_vis_id, allow_none=True)
  42. if hist_vis_ev:
  43. if hist_vis_ev.content.get("history_visibility") == "world_readable":
  44. defer.returnValue(True)
  45. defer.returnValue(False)
  46. @defer.inlineCallbacks
  47. def add_users_to_public_room(self, room_id, user_ids):
  48. """Add user to the list of users in public rooms
  49. Args:
  50. room_id (str): A room_id that all users are in that is world_readable
  51. or publically joinable
  52. user_ids (list(str)): Users to add
  53. """
  54. yield self._simple_insert_many(
  55. table="users_in_public_rooms",
  56. values=[
  57. {
  58. "user_id": user_id,
  59. "room_id": room_id,
  60. }
  61. for user_id in user_ids
  62. ],
  63. desc="add_users_to_public_room"
  64. )
  65. for user_id in user_ids:
  66. self.get_user_in_public_room.invalidate((user_id,))
  67. def add_profiles_to_user_dir(self, room_id, users_with_profile):
  68. """Add profiles to the user directory
  69. Args:
  70. room_id (str): A room_id that all users are joined to
  71. users_with_profile (dict): Users to add to directory in the form of
  72. mapping of user_id -> ProfileInfo
  73. """
  74. if isinstance(self.database_engine, PostgresEngine):
  75. # We weight the loclpart most highly, then display name and finally
  76. # server name
  77. sql = """
  78. INSERT INTO user_directory_search(user_id, vector)
  79. VALUES (?,
  80. setweight(to_tsvector('english', ?), 'A')
  81. || setweight(to_tsvector('english', ?), 'D')
  82. || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
  83. )
  84. """
  85. args = (
  86. (
  87. user_id, get_localpart_from_id(user_id), get_domain_from_id(user_id),
  88. profile.display_name,
  89. )
  90. for user_id, profile in iteritems(users_with_profile)
  91. )
  92. elif isinstance(self.database_engine, Sqlite3Engine):
  93. sql = """
  94. INSERT INTO user_directory_search(user_id, value)
  95. VALUES (?,?)
  96. """
  97. args = (
  98. (
  99. user_id,
  100. "%s %s" % (user_id, p.display_name,) if p.display_name else user_id
  101. )
  102. for user_id, p in iteritems(users_with_profile)
  103. )
  104. else:
  105. # This should be unreachable.
  106. raise Exception("Unrecognized database engine")
  107. def _add_profiles_to_user_dir_txn(txn):
  108. txn.executemany(sql, args)
  109. self._simple_insert_many_txn(
  110. txn,
  111. table="user_directory",
  112. values=[
  113. {
  114. "user_id": user_id,
  115. "room_id": room_id,
  116. "display_name": profile.display_name,
  117. "avatar_url": profile.avatar_url,
  118. }
  119. for user_id, profile in iteritems(users_with_profile)
  120. ]
  121. )
  122. for user_id in users_with_profile:
  123. txn.call_after(
  124. self.get_user_in_directory.invalidate, (user_id,)
  125. )
  126. return self.runInteraction(
  127. "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn
  128. )
  129. @defer.inlineCallbacks
  130. def update_user_in_user_dir(self, user_id, room_id):
  131. yield self._simple_update_one(
  132. table="user_directory",
  133. keyvalues={"user_id": user_id},
  134. updatevalues={"room_id": room_id},
  135. desc="update_user_in_user_dir",
  136. )
  137. self.get_user_in_directory.invalidate((user_id,))
  138. def update_profile_in_user_dir(self, user_id, display_name, avatar_url, room_id):
  139. def _update_profile_in_user_dir_txn(txn):
  140. new_entry = self._simple_upsert_txn(
  141. txn,
  142. table="user_directory",
  143. keyvalues={"user_id": user_id},
  144. insertion_values={"room_id": room_id},
  145. values={"display_name": display_name, "avatar_url": avatar_url},
  146. lock=False, # We're only inserter
  147. )
  148. if isinstance(self.database_engine, PostgresEngine):
  149. # We weight the localpart most highly, then display name and finally
  150. # server name
  151. if new_entry:
  152. sql = """
  153. INSERT INTO user_directory_search(user_id, vector)
  154. VALUES (?,
  155. setweight(to_tsvector('english', ?), 'A')
  156. || setweight(to_tsvector('english', ?), 'D')
  157. || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
  158. )
  159. """
  160. txn.execute(
  161. sql,
  162. (
  163. user_id, get_localpart_from_id(user_id),
  164. get_domain_from_id(user_id), display_name,
  165. )
  166. )
  167. else:
  168. sql = """
  169. UPDATE user_directory_search
  170. SET vector = setweight(to_tsvector('english', ?), 'A')
  171. || setweight(to_tsvector('english', ?), 'D')
  172. || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
  173. WHERE user_id = ?
  174. """
  175. txn.execute(
  176. sql,
  177. (
  178. get_localpart_from_id(user_id), get_domain_from_id(user_id),
  179. display_name, user_id,
  180. )
  181. )
  182. elif isinstance(self.database_engine, Sqlite3Engine):
  183. value = "%s %s" % (user_id, display_name,) if display_name else user_id
  184. self._simple_upsert_txn(
  185. txn,
  186. table="user_directory_search",
  187. keyvalues={"user_id": user_id},
  188. values={"value": value},
  189. lock=False, # We're only inserter
  190. )
  191. else:
  192. # This should be unreachable.
  193. raise Exception("Unrecognized database engine")
  194. txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
  195. return self.runInteraction(
  196. "update_profile_in_user_dir", _update_profile_in_user_dir_txn
  197. )
  198. @defer.inlineCallbacks
  199. def update_user_in_public_user_list(self, user_id, room_id):
  200. yield self._simple_update_one(
  201. table="users_in_public_rooms",
  202. keyvalues={"user_id": user_id},
  203. updatevalues={"room_id": room_id},
  204. desc="update_user_in_public_user_list",
  205. )
  206. self.get_user_in_public_room.invalidate((user_id,))
  207. def remove_from_user_dir(self, user_id):
  208. def _remove_from_user_dir_txn(txn):
  209. self._simple_delete_txn(
  210. txn,
  211. table="user_directory",
  212. keyvalues={"user_id": user_id},
  213. )
  214. self._simple_delete_txn(
  215. txn,
  216. table="user_directory_search",
  217. keyvalues={"user_id": user_id},
  218. )
  219. self._simple_delete_txn(
  220. txn,
  221. table="users_in_public_rooms",
  222. keyvalues={"user_id": user_id},
  223. )
  224. txn.call_after(
  225. self.get_user_in_directory.invalidate, (user_id,)
  226. )
  227. txn.call_after(
  228. self.get_user_in_public_room.invalidate, (user_id,)
  229. )
  230. return self.runInteraction(
  231. "remove_from_user_dir", _remove_from_user_dir_txn,
  232. )
  233. @defer.inlineCallbacks
  234. def remove_from_user_in_public_room(self, user_id):
  235. yield self._simple_delete(
  236. table="users_in_public_rooms",
  237. keyvalues={"user_id": user_id},
  238. desc="remove_from_user_in_public_room",
  239. )
  240. self.get_user_in_public_room.invalidate((user_id,))
  241. def get_users_in_public_due_to_room(self, room_id):
  242. """Get all user_ids that are in the room directory because they're
  243. in the given room_id
  244. """
  245. return self._simple_select_onecol(
  246. table="users_in_public_rooms",
  247. keyvalues={"room_id": room_id},
  248. retcol="user_id",
  249. desc="get_users_in_public_due_to_room",
  250. )
  251. @defer.inlineCallbacks
  252. def get_users_in_dir_due_to_room(self, room_id):
  253. """Get all user_ids that are in the room directory because they're
  254. in the given room_id
  255. """
  256. user_ids_dir = yield self._simple_select_onecol(
  257. table="user_directory",
  258. keyvalues={"room_id": room_id},
  259. retcol="user_id",
  260. desc="get_users_in_dir_due_to_room",
  261. )
  262. user_ids_pub = yield self._simple_select_onecol(
  263. table="users_in_public_rooms",
  264. keyvalues={"room_id": room_id},
  265. retcol="user_id",
  266. desc="get_users_in_dir_due_to_room",
  267. )
  268. user_ids_share = yield self._simple_select_onecol(
  269. table="users_who_share_rooms",
  270. keyvalues={"room_id": room_id},
  271. retcol="user_id",
  272. desc="get_users_in_dir_due_to_room",
  273. )
  274. user_ids = set(user_ids_dir)
  275. user_ids.update(user_ids_pub)
  276. user_ids.update(user_ids_share)
  277. defer.returnValue(user_ids)
  278. @defer.inlineCallbacks
  279. def get_all_rooms(self):
  280. """Get all room_ids we've ever known about, in ascending order of "size"
  281. """
  282. sql = """
  283. SELECT room_id FROM current_state_events
  284. GROUP BY room_id
  285. ORDER BY count(*) ASC
  286. """
  287. rows = yield self._execute("get_all_rooms", None, sql)
  288. defer.returnValue([room_id for room_id, in rows])
  289. @defer.inlineCallbacks
  290. def get_all_local_users(self):
  291. """Get all local users
  292. """
  293. sql = """
  294. SELECT name FROM users
  295. """
  296. rows = yield self._execute("get_all_local_users", None, sql)
  297. defer.returnValue([name for name, in rows])
  298. def add_users_who_share_room(self, room_id, share_private, user_id_tuples):
  299. """Insert entries into the users_who_share_rooms table. The first
  300. user should be a local user.
  301. Args:
  302. room_id (str)
  303. share_private (bool): Is the room private
  304. user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs.
  305. """
  306. def _add_users_who_share_room_txn(txn):
  307. self._simple_insert_many_txn(
  308. txn,
  309. table="users_who_share_rooms",
  310. values=[
  311. {
  312. "user_id": user_id,
  313. "other_user_id": other_user_id,
  314. "room_id": room_id,
  315. "share_private": share_private,
  316. }
  317. for user_id, other_user_id in user_id_tuples
  318. ],
  319. )
  320. for user_id, other_user_id in user_id_tuples:
  321. txn.call_after(
  322. self.get_users_who_share_room_from_dir.invalidate,
  323. (user_id,),
  324. )
  325. txn.call_after(
  326. self.get_if_users_share_a_room.invalidate,
  327. (user_id, other_user_id),
  328. )
  329. return self.runInteraction(
  330. "add_users_who_share_room", _add_users_who_share_room_txn
  331. )
  332. def update_users_who_share_room(self, room_id, share_private, user_id_sets):
  333. """Updates entries in the users_who_share_rooms table. The first
  334. user should be a local user.
  335. Args:
  336. room_id (str)
  337. share_private (bool): Is the room private
  338. user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs.
  339. """
  340. def _update_users_who_share_room_txn(txn):
  341. sql = """
  342. UPDATE users_who_share_rooms
  343. SET room_id = ?, share_private = ?
  344. WHERE user_id = ? AND other_user_id = ?
  345. """
  346. txn.executemany(
  347. sql,
  348. (
  349. (room_id, share_private, uid, oid)
  350. for uid, oid in user_id_sets
  351. )
  352. )
  353. for user_id, other_user_id in user_id_sets:
  354. txn.call_after(
  355. self.get_users_who_share_room_from_dir.invalidate,
  356. (user_id,),
  357. )
  358. txn.call_after(
  359. self.get_if_users_share_a_room.invalidate,
  360. (user_id, other_user_id),
  361. )
  362. return self.runInteraction(
  363. "update_users_who_share_room", _update_users_who_share_room_txn
  364. )
  365. def remove_user_who_share_room(self, user_id, other_user_id):
  366. """Deletes entries in the users_who_share_rooms table. The first
  367. user should be a local user.
  368. Args:
  369. room_id (str)
  370. share_private (bool): Is the room private
  371. user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs.
  372. """
  373. def _remove_user_who_share_room_txn(txn):
  374. self._simple_delete_txn(
  375. txn,
  376. table="users_who_share_rooms",
  377. keyvalues={
  378. "user_id": user_id,
  379. "other_user_id": other_user_id,
  380. },
  381. )
  382. txn.call_after(
  383. self.get_users_who_share_room_from_dir.invalidate,
  384. (user_id,),
  385. )
  386. txn.call_after(
  387. self.get_if_users_share_a_room.invalidate,
  388. (user_id, other_user_id),
  389. )
  390. return self.runInteraction(
  391. "remove_user_who_share_room", _remove_user_who_share_room_txn
  392. )
  393. @cached(max_entries=500000)
  394. def get_if_users_share_a_room(self, user_id, other_user_id):
  395. """Gets if users share a room.
  396. Args:
  397. user_id (str): Must be a local user_id
  398. other_user_id (str)
  399. Returns:
  400. bool|None: None if they don't share a room, otherwise whether they
  401. share a private room or not.
  402. """
  403. return self._simple_select_one_onecol(
  404. table="users_who_share_rooms",
  405. keyvalues={
  406. "user_id": user_id,
  407. "other_user_id": other_user_id,
  408. },
  409. retcol="share_private",
  410. allow_none=True,
  411. desc="get_if_users_share_a_room",
  412. )
  413. @cachedInlineCallbacks(max_entries=500000, iterable=True)
  414. def get_users_who_share_room_from_dir(self, user_id):
  415. """Returns the set of users who share a room with `user_id`
  416. Args:
  417. user_id(str): Must be a local user
  418. Returns:
  419. dict: user_id -> share_private mapping
  420. """
  421. rows = yield self._simple_select_list(
  422. table="users_who_share_rooms",
  423. keyvalues={
  424. "user_id": user_id,
  425. },
  426. retcols=("other_user_id", "share_private",),
  427. desc="get_users_who_share_room_with_user",
  428. )
  429. defer.returnValue({
  430. row["other_user_id"]: row["share_private"]
  431. for row in rows
  432. })
  433. def get_users_in_share_dir_with_room_id(self, user_id, room_id):
  434. """Get all user tuples that are in the users_who_share_rooms due to the
  435. given room_id.
  436. Returns:
  437. [(user_id, other_user_id)]: where one of the two will match the given
  438. user_id.
  439. """
  440. sql = """
  441. SELECT user_id, other_user_id FROM users_who_share_rooms
  442. WHERE room_id = ? AND (user_id = ? OR other_user_id = ?)
  443. """
  444. return self._execute(
  445. "get_users_in_share_dir_with_room_id", None, sql, room_id, user_id, user_id
  446. )
  447. @defer.inlineCallbacks
  448. def get_rooms_in_common_for_users(self, user_id, other_user_id):
  449. """Given two user_ids find out the list of rooms they share.
  450. """
  451. sql = """
  452. SELECT room_id FROM (
  453. SELECT c.room_id FROM current_state_events AS c
  454. INNER JOIN room_memberships USING (event_id)
  455. WHERE type = 'm.room.member'
  456. AND membership = 'join'
  457. AND state_key = ?
  458. ) AS f1 INNER JOIN (
  459. SELECT c.room_id FROM current_state_events AS c
  460. INNER JOIN room_memberships USING (event_id)
  461. WHERE type = 'm.room.member'
  462. AND membership = 'join'
  463. AND state_key = ?
  464. ) f2 USING (room_id)
  465. """
  466. rows = yield self._execute(
  467. "get_rooms_in_common_for_users", None, sql, user_id, other_user_id
  468. )
  469. defer.returnValue([room_id for room_id, in rows])
  470. def delete_all_from_user_dir(self):
  471. """Delete the entire user directory
  472. """
  473. def _delete_all_from_user_dir_txn(txn):
  474. txn.execute("DELETE FROM user_directory")
  475. txn.execute("DELETE FROM user_directory_search")
  476. txn.execute("DELETE FROM users_in_public_rooms")
  477. txn.execute("DELETE FROM users_who_share_rooms")
  478. txn.call_after(self.get_user_in_directory.invalidate_all)
  479. txn.call_after(self.get_user_in_public_room.invalidate_all)
  480. txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all)
  481. txn.call_after(self.get_if_users_share_a_room.invalidate_all)
  482. return self.runInteraction(
  483. "delete_all_from_user_dir", _delete_all_from_user_dir_txn
  484. )
  485. @cached()
  486. def get_user_in_directory(self, user_id):
  487. return self._simple_select_one(
  488. table="user_directory",
  489. keyvalues={"user_id": user_id},
  490. retcols=("room_id", "display_name", "avatar_url",),
  491. allow_none=True,
  492. desc="get_user_in_directory",
  493. )
  494. @cached()
  495. def get_user_in_public_room(self, user_id):
  496. return self._simple_select_one(
  497. table="users_in_public_rooms",
  498. keyvalues={"user_id": user_id},
  499. retcols=("room_id",),
  500. allow_none=True,
  501. desc="get_user_in_public_room",
  502. )
  503. def get_user_directory_stream_pos(self):
  504. return self._simple_select_one_onecol(
  505. table="user_directory_stream_pos",
  506. keyvalues={},
  507. retcol="stream_id",
  508. desc="get_user_directory_stream_pos",
  509. )
  510. def update_user_directory_stream_pos(self, stream_id):
  511. return self._simple_update_one(
  512. table="user_directory_stream_pos",
  513. keyvalues={},
  514. updatevalues={"stream_id": stream_id},
  515. desc="update_user_directory_stream_pos",
  516. )
  517. def get_current_state_deltas(self, prev_stream_id):
  518. prev_stream_id = int(prev_stream_id)
  519. if not self._curr_state_delta_stream_cache.has_any_entity_changed(prev_stream_id):
  520. return []
  521. def get_current_state_deltas_txn(txn):
  522. # First we calculate the max stream id that will give us less than
  523. # N results.
  524. # We arbitarily limit to 100 stream_id entries to ensure we don't
  525. # select toooo many.
  526. sql = """
  527. SELECT stream_id, count(*)
  528. FROM current_state_delta_stream
  529. WHERE stream_id > ?
  530. GROUP BY stream_id
  531. ORDER BY stream_id ASC
  532. LIMIT 100
  533. """
  534. txn.execute(sql, (prev_stream_id,))
  535. total = 0
  536. max_stream_id = prev_stream_id
  537. for max_stream_id, count in txn:
  538. total += count
  539. if total > 100:
  540. # We arbitarily limit to 100 entries to ensure we don't
  541. # select toooo many.
  542. break
  543. # Now actually get the deltas
  544. sql = """
  545. SELECT stream_id, room_id, type, state_key, event_id, prev_event_id
  546. FROM current_state_delta_stream
  547. WHERE ? < stream_id AND stream_id <= ?
  548. ORDER BY stream_id ASC
  549. """
  550. txn.execute(sql, (prev_stream_id, max_stream_id,))
  551. return self.cursor_to_dict(txn)
  552. return self.runInteraction(
  553. "get_current_state_deltas", get_current_state_deltas_txn
  554. )
  555. def get_max_stream_id_in_current_state_deltas(self):
  556. return self._simple_select_one_onecol(
  557. table="current_state_delta_stream",
  558. keyvalues={},
  559. retcol="COALESCE(MAX(stream_id), -1)",
  560. desc="get_max_stream_id_in_current_state_deltas",
  561. )
  562. @defer.inlineCallbacks
  563. def search_user_dir(self, user_id, search_term, limit):
  564. """Searches for users in directory
  565. Returns:
  566. dict of the form::
  567. {
  568. "limited": <bool>, # whether there were more results or not
  569. "results": [ # Ordered by best match first
  570. {
  571. "user_id": <user_id>,
  572. "display_name": <display_name>,
  573. "avatar_url": <avatar_url>
  574. }
  575. ]
  576. }
  577. """
  578. if self.hs.config.user_directory_search_all_users:
  579. # make s.user_id null to keep the ordering algorithm happy
  580. join_clause = """
  581. CROSS JOIN (SELECT NULL as user_id) AS s
  582. """
  583. join_args = ()
  584. where_clause = "1=1"
  585. else:
  586. join_clause = """
  587. LEFT JOIN users_in_public_rooms AS p USING (user_id)
  588. LEFT JOIN (
  589. SELECT other_user_id AS user_id FROM users_who_share_rooms
  590. WHERE user_id = ? AND share_private
  591. ) AS s USING (user_id)
  592. """
  593. join_args = (user_id,)
  594. where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)"
  595. if isinstance(self.database_engine, PostgresEngine):
  596. full_query, exact_query, prefix_query = _parse_query_postgres(search_term)
  597. # We order by rank and then if they have profile info
  598. # The ranking algorithm is hand tweaked for "best" results. Broadly
  599. # the idea is we give a higher weight to exact matches.
  600. # The array of numbers are the weights for the various part of the
  601. # search: (domain, _, display name, localpart)
  602. sql = """
  603. SELECT d.user_id AS user_id, display_name, avatar_url
  604. FROM user_directory_search
  605. INNER JOIN user_directory AS d USING (user_id)
  606. %s
  607. WHERE
  608. %s
  609. AND vector @@ to_tsquery('english', ?)
  610. ORDER BY
  611. (CASE WHEN s.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END)
  612. * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END)
  613. * (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END)
  614. * (
  615. 3 * ts_rank_cd(
  616. '{0.1, 0.1, 0.9, 1.0}',
  617. vector,
  618. to_tsquery('english', ?),
  619. 8
  620. )
  621. + ts_rank_cd(
  622. '{0.1, 0.1, 0.9, 1.0}',
  623. vector,
  624. to_tsquery('english', ?),
  625. 8
  626. )
  627. )
  628. DESC,
  629. display_name IS NULL,
  630. avatar_url IS NULL
  631. LIMIT ?
  632. """ % (join_clause, where_clause)
  633. args = join_args + (full_query, exact_query, prefix_query, limit + 1,)
  634. elif isinstance(self.database_engine, Sqlite3Engine):
  635. search_query = _parse_query_sqlite(search_term)
  636. sql = """
  637. SELECT d.user_id AS user_id, display_name, avatar_url
  638. FROM user_directory_search
  639. INNER JOIN user_directory AS d USING (user_id)
  640. %s
  641. WHERE
  642. %s
  643. AND value MATCH ?
  644. ORDER BY
  645. rank(matchinfo(user_directory_search)) DESC,
  646. display_name IS NULL,
  647. avatar_url IS NULL
  648. LIMIT ?
  649. """ % (join_clause, where_clause)
  650. args = join_args + (search_query, limit + 1)
  651. else:
  652. # This should be unreachable.
  653. raise Exception("Unrecognized database engine")
  654. results = yield self._execute(
  655. "search_user_dir", self.cursor_to_dict, sql, *args
  656. )
  657. limited = len(results) > limit
  658. defer.returnValue({
  659. "limited": limited,
  660. "results": results,
  661. })
  662. def _parse_query_sqlite(search_term):
  663. """Takes a plain unicode string from the user and converts it into a form
  664. that can be passed to database.
  665. We use this so that we can add prefix matching, which isn't something
  666. that is supported by default.
  667. We specifically add both a prefix and non prefix matching term so that
  668. exact matches get ranked higher.
  669. """
  670. # Pull out the individual words, discarding any non-word characters.
  671. results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
  672. return " & ".join("(%s* OR %s)" % (result, result,) for result in results)
  673. def _parse_query_postgres(search_term):
  674. """Takes a plain unicode string from the user and converts it into a form
  675. that can be passed to database.
  676. We use this so that we can add prefix matching, which isn't something
  677. that is supported by default.
  678. """
  679. # Pull out the individual words, discarding any non-word characters.
  680. results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
  681. both = " & ".join("(%s:* | %s)" % (result, result,) for result in results)
  682. exact = " & ".join("%s" % (result,) for result in results)
  683. prefix = " & ".join("%s:*" % (result,) for result in results)
  684. return both, exact, prefix