presence.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. # Copyright 2014-2016 OpenMarket Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, cast
  15. from synapse.api.presence import PresenceState, UserPresenceState
  16. from synapse.replication.tcp.streams import PresenceStream
  17. from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
  18. from synapse.storage.database import (
  19. DatabasePool,
  20. LoggingDatabaseConnection,
  21. LoggingTransaction,
  22. )
  23. from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
  24. from synapse.storage.engines import PostgresEngine
  25. from synapse.storage.engines._base import IsolationLevel
  26. from synapse.storage.types import Connection
  27. from synapse.storage.util.id_generators import (
  28. AbstractStreamIdGenerator,
  29. MultiWriterIdGenerator,
  30. StreamIdGenerator,
  31. )
  32. from synapse.util.caches.descriptors import cached, cachedList
  33. from synapse.util.caches.stream_change_cache import StreamChangeCache
  34. from synapse.util.iterutils import batch_iter
  35. if TYPE_CHECKING:
  36. from synapse.server import HomeServer
  37. class PresenceBackgroundUpdateStore(SQLBaseStore):
  38. def __init__(
  39. self,
  40. database: DatabasePool,
  41. db_conn: LoggingDatabaseConnection,
  42. hs: "HomeServer",
  43. ) -> None:
  44. super().__init__(database, db_conn, hs)
  45. # Used by `PresenceStore._get_active_presence()`
  46. self.db_pool.updates.register_background_index_update(
  47. "presence_stream_not_offline_index",
  48. index_name="presence_stream_state_not_offline_idx",
  49. table="presence_stream",
  50. columns=["state"],
  51. where_clause="state != 'offline'",
  52. )
  53. class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore):
  54. def __init__(
  55. self,
  56. database: DatabasePool,
  57. db_conn: LoggingDatabaseConnection,
  58. hs: "HomeServer",
  59. ) -> None:
  60. super().__init__(database, db_conn, hs)
  61. self._instance_name = hs.get_instance_name()
  62. self._presence_id_gen: AbstractStreamIdGenerator
  63. self._can_persist_presence = (
  64. self._instance_name in hs.config.worker.writers.presence
  65. )
  66. if isinstance(database.engine, PostgresEngine):
  67. self._presence_id_gen = MultiWriterIdGenerator(
  68. db_conn=db_conn,
  69. db=database,
  70. notifier=hs.get_replication_notifier(),
  71. stream_name="presence_stream",
  72. instance_name=self._instance_name,
  73. tables=[("presence_stream", "instance_name", "stream_id")],
  74. sequence_name="presence_stream_sequence",
  75. writers=hs.config.worker.writers.presence,
  76. )
  77. else:
  78. self._presence_id_gen = StreamIdGenerator(
  79. db_conn, hs.get_replication_notifier(), "presence_stream", "stream_id"
  80. )
  81. self.hs = hs
  82. self._presence_on_startup = self._get_active_presence(db_conn)
  83. presence_cache_prefill, min_presence_val = self.db_pool.get_cache_dict(
  84. db_conn,
  85. "presence_stream",
  86. entity_column="user_id",
  87. stream_column="stream_id",
  88. max_value=self._presence_id_gen.get_current_token(),
  89. )
  90. self.presence_stream_cache = StreamChangeCache(
  91. "PresenceStreamChangeCache",
  92. min_presence_val,
  93. prefilled_cache=presence_cache_prefill,
  94. )
  95. async def update_presence(
  96. self, presence_states: List[UserPresenceState]
  97. ) -> Tuple[int, int]:
  98. assert self._can_persist_presence
  99. stream_ordering_manager = self._presence_id_gen.get_next_mult(
  100. len(presence_states)
  101. )
  102. async with stream_ordering_manager as stream_orderings:
  103. # Run the interaction with an isolation level of READ_COMMITTED to avoid
  104. # serialization errors(and rollbacks) in the database. This way it will
  105. # ignore new rows during the DELETE, but will pick them up the next time
  106. # this is run. Currently, that is between 5-60 seconds.
  107. await self.db_pool.runInteraction(
  108. "update_presence",
  109. self._update_presence_txn,
  110. stream_orderings,
  111. presence_states,
  112. isolation_level=IsolationLevel.READ_COMMITTED,
  113. )
  114. return stream_orderings[-1], self._presence_id_gen.get_current_token()
  115. def _update_presence_txn(
  116. self,
  117. txn: LoggingTransaction,
  118. stream_orderings: List[int],
  119. presence_states: List[UserPresenceState],
  120. ) -> None:
  121. for stream_id, state in zip(stream_orderings, presence_states):
  122. txn.call_after(
  123. self.presence_stream_cache.entity_has_changed, state.user_id, stream_id
  124. )
  125. txn.call_after(self._get_presence_for_user.invalidate, (state.user_id,))
  126. # Delete old rows to stop database from getting really big
  127. sql = "DELETE FROM presence_stream WHERE stream_id < ? AND "
  128. for states in batch_iter(presence_states, 50):
  129. clause, args = make_in_list_sql_clause(
  130. self.database_engine, "user_id", [s.user_id for s in states]
  131. )
  132. txn.execute(sql + clause, [stream_id] + list(args))
  133. # Actually insert new rows
  134. self.db_pool.simple_insert_many_txn(
  135. txn,
  136. table="presence_stream",
  137. keys=(
  138. "stream_id",
  139. "user_id",
  140. "state",
  141. "last_active_ts",
  142. "last_federation_update_ts",
  143. "last_user_sync_ts",
  144. "status_msg",
  145. "currently_active",
  146. "instance_name",
  147. ),
  148. values=[
  149. (
  150. stream_id,
  151. state.user_id,
  152. state.state,
  153. state.last_active_ts,
  154. state.last_federation_update_ts,
  155. state.last_user_sync_ts,
  156. state.status_msg,
  157. state.currently_active,
  158. self._instance_name,
  159. )
  160. for stream_id, state in zip(stream_orderings, presence_states)
  161. ],
  162. )
  163. async def get_all_presence_updates(
  164. self, instance_name: str, last_id: int, current_id: int, limit: int
  165. ) -> Tuple[List[Tuple[int, list]], int, bool]:
  166. """Get updates for presence replication stream.
  167. Args:
  168. instance_name: The writer we want to fetch updates from. Unused
  169. here since there is only ever one writer.
  170. last_id: The token to fetch updates from. Exclusive.
  171. current_id: The token to fetch updates up to. Inclusive.
  172. limit: The requested limit for the number of rows to return. The
  173. function may return more or fewer rows.
  174. Returns:
  175. A tuple consisting of: the updates, a token to use to fetch
  176. subsequent updates, and whether we returned fewer rows than exists
  177. between the requested tokens due to the limit.
  178. The token returned can be used in a subsequent call to this
  179. function to get further updatees.
  180. The updates are a list of 2-tuples of stream ID and the row data
  181. """
  182. if last_id == current_id:
  183. return [], current_id, False
  184. def get_all_presence_updates_txn(
  185. txn: LoggingTransaction,
  186. ) -> Tuple[List[Tuple[int, list]], int, bool]:
  187. sql = """
  188. SELECT stream_id, user_id, state, last_active_ts,
  189. last_federation_update_ts, last_user_sync_ts,
  190. status_msg, currently_active
  191. FROM presence_stream
  192. WHERE ? < stream_id AND stream_id <= ?
  193. ORDER BY stream_id ASC
  194. LIMIT ?
  195. """
  196. txn.execute(sql, (last_id, current_id, limit))
  197. updates = cast(
  198. List[Tuple[int, list]],
  199. [(row[0], row[1:]) for row in txn],
  200. )
  201. upper_bound = current_id
  202. limited = False
  203. if len(updates) >= limit:
  204. upper_bound = updates[-1][0]
  205. limited = True
  206. return updates, upper_bound, limited
  207. return await self.db_pool.runInteraction(
  208. "get_all_presence_updates", get_all_presence_updates_txn
  209. )
  210. @cached()
  211. def _get_presence_for_user(self, user_id: str) -> None:
  212. raise NotImplementedError()
  213. @cachedList(
  214. cached_method_name="_get_presence_for_user",
  215. list_name="user_ids",
  216. num_args=1,
  217. )
  218. async def get_presence_for_users(
  219. self, user_ids: Iterable[str]
  220. ) -> Dict[str, UserPresenceState]:
  221. rows = await self.db_pool.simple_select_many_batch(
  222. table="presence_stream",
  223. column="user_id",
  224. iterable=user_ids,
  225. keyvalues={},
  226. retcols=(
  227. "user_id",
  228. "state",
  229. "last_active_ts",
  230. "last_federation_update_ts",
  231. "last_user_sync_ts",
  232. "status_msg",
  233. "currently_active",
  234. ),
  235. desc="get_presence_for_users",
  236. )
  237. for row in rows:
  238. row["currently_active"] = bool(row["currently_active"])
  239. return {row["user_id"]: UserPresenceState(**row) for row in rows}
  240. async def should_user_receive_full_presence_with_token(
  241. self,
  242. user_id: str,
  243. from_token: int,
  244. ) -> bool:
  245. """Check whether the given user should receive full presence using the stream token
  246. they're updating from.
  247. Args:
  248. user_id: The ID of the user to check.
  249. from_token: The stream token included in their /sync token.
  250. Returns:
  251. True if the user should have full presence sent to them, False otherwise.
  252. """
  253. token = await self._get_full_presence_stream_token_for_user(user_id)
  254. if token is None:
  255. return False
  256. return from_token <= token
  257. @cached()
  258. async def _get_full_presence_stream_token_for_user(
  259. self, user_id: str
  260. ) -> Optional[int]:
  261. """Get the presence token corresponding to the last full presence update
  262. for this user.
  263. If the user presents a sync token with a presence stream token at least
  264. as old as the result, then we need to send them a full presence update.
  265. If this user has never needed a full presence update, returns `None`.
  266. """
  267. return await self.db_pool.simple_select_one_onecol(
  268. table="users_to_send_full_presence_to",
  269. keyvalues={"user_id": user_id},
  270. retcol="presence_stream_id",
  271. allow_none=True,
  272. desc="_get_full_presence_stream_token_for_user",
  273. )
  274. async def add_users_to_send_full_presence_to(self, user_ids: Iterable[str]) -> None:
  275. """Adds to the list of users who should receive a full snapshot of presence
  276. upon their next sync.
  277. Args:
  278. user_ids: An iterable of user IDs.
  279. """
  280. # Add user entries to the table, updating the presence_stream_id column if the user already
  281. # exists in the table.
  282. presence_stream_id = self._presence_id_gen.get_current_token()
  283. def _add_users_to_send_full_presence_to(txn: LoggingTransaction) -> None:
  284. self.db_pool.simple_upsert_many_txn(
  285. txn,
  286. table="users_to_send_full_presence_to",
  287. key_names=("user_id",),
  288. key_values=[(user_id,) for user_id in user_ids],
  289. value_names=("presence_stream_id",),
  290. # We save the current presence stream ID token along with the user ID entry so
  291. # that when a user /sync's, even if they syncing multiple times across separate
  292. # devices at different times, each device will receive full presence once - when
  293. # the presence stream ID in their sync token is less than the one in the table
  294. # for their user ID.
  295. value_values=[(presence_stream_id,) for _ in user_ids],
  296. )
  297. for user_id in user_ids:
  298. self._invalidate_cache_and_stream(
  299. txn, self._get_full_presence_stream_token_for_user, (user_id,)
  300. )
  301. return await self.db_pool.runInteraction(
  302. "add_users_to_send_full_presence_to", _add_users_to_send_full_presence_to
  303. )
  304. async def get_presence_for_all_users(
  305. self,
  306. include_offline: bool = True,
  307. ) -> Dict[str, UserPresenceState]:
  308. """Retrieve the current presence state for all users.
  309. Note that the presence_stream table is culled frequently, so it should only
  310. contain the latest presence state for each user.
  311. Args:
  312. include_offline: Whether to include offline presence states
  313. Returns:
  314. A dict of user IDs to their current UserPresenceState.
  315. """
  316. users_to_state = {}
  317. exclude_keyvalues = None
  318. if not include_offline:
  319. # Exclude offline presence state
  320. exclude_keyvalues = {"state": "offline"}
  321. # This may be a very heavy database query.
  322. # We paginate in order to not block a database connection.
  323. limit = 100
  324. offset = 0
  325. while True:
  326. rows = await self.db_pool.runInteraction(
  327. "get_presence_for_all_users",
  328. self.db_pool.simple_select_list_paginate_txn,
  329. "presence_stream",
  330. orderby="stream_id",
  331. start=offset,
  332. limit=limit,
  333. exclude_keyvalues=exclude_keyvalues,
  334. retcols=(
  335. "user_id",
  336. "state",
  337. "last_active_ts",
  338. "last_federation_update_ts",
  339. "last_user_sync_ts",
  340. "status_msg",
  341. "currently_active",
  342. ),
  343. order_direction="ASC",
  344. )
  345. for row in rows:
  346. users_to_state[row["user_id"]] = UserPresenceState(**row)
  347. # We've run out of updates to query
  348. if len(rows) < limit:
  349. break
  350. offset += limit
  351. return users_to_state
  352. def get_current_presence_token(self) -> int:
  353. return self._presence_id_gen.get_current_token()
  354. def _get_active_presence(self, db_conn: Connection) -> List[UserPresenceState]:
  355. """Fetch non-offline presence from the database so that we can register
  356. the appropriate time outs.
  357. """
  358. # The `presence_stream_state_not_offline_idx` index should be used for this
  359. # query.
  360. sql = (
  361. "SELECT user_id, state, last_active_ts, last_federation_update_ts,"
  362. " last_user_sync_ts, status_msg, currently_active FROM presence_stream"
  363. " WHERE state != ?"
  364. )
  365. txn = db_conn.cursor()
  366. txn.execute(sql, (PresenceState.OFFLINE,))
  367. rows = self.db_pool.cursor_to_dict(txn)
  368. txn.close()
  369. for row in rows:
  370. row["currently_active"] = bool(row["currently_active"])
  371. return [UserPresenceState(**row) for row in rows]
  372. def take_presence_startup_info(self) -> List[UserPresenceState]:
  373. active_on_startup = self._presence_on_startup
  374. self._presence_on_startup = []
  375. return active_on_startup
  376. def process_replication_rows(
  377. self,
  378. stream_name: str,
  379. instance_name: str,
  380. token: int,
  381. rows: Iterable[Any],
  382. ) -> None:
  383. if stream_name == PresenceStream.NAME:
  384. for row in rows:
  385. self.presence_stream_cache.entity_has_changed(row.user_id, token)
  386. self._get_presence_for_user.invalidate((row.user_id,))
  387. return super().process_replication_rows(stream_name, instance_name, token, rows)
  388. def process_replication_position(
  389. self, stream_name: str, instance_name: str, token: int
  390. ) -> None:
  391. if stream_name == PresenceStream.NAME:
  392. self._presence_id_gen.advance(instance_name, token)
  393. super().process_replication_position(stream_name, instance_name, token)