_base.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. # Copyright 2017 Vector Creations Ltd
  2. # Copyright 2019 New Vector Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import heapq
  16. import logging
  17. from typing import (
  18. TYPE_CHECKING,
  19. Any,
  20. Awaitable,
  21. Callable,
  22. List,
  23. Optional,
  24. Tuple,
  25. TypeVar,
  26. )
  27. import attr
  28. from synapse.replication.http.streams import ReplicationGetStreamUpdates
  29. from synapse.types import JsonDict
  30. if TYPE_CHECKING:
  31. from synapse.server import HomeServer
  32. logger = logging.getLogger(__name__)
  33. # the number of rows to request from an update_function.
  34. _STREAM_UPDATE_TARGET_ROW_COUNT = 100
  35. # Some type aliases to make things a bit easier.
  36. # A stream position token
  37. Token = int
  38. # The type of a stream update row, after JSON deserialisation, but before
  39. # parsing with Stream.parse_row (which turns it into a `ROW_TYPE`). Normally it's
  40. # just a row from a database query, though this is dependent on the stream in question.
  41. #
  42. StreamRow = TypeVar("StreamRow", bound=Tuple)
  43. # The type returned by the update_function of a stream, as well as get_updates(),
  44. # get_updates_since, etc.
  45. #
  46. # It consists of a triplet `(updates, new_last_token, limited)`, where:
  47. # * `updates` is a list of `(token, row)` entries.
  48. # * `new_last_token` is the new position in stream.
  49. # * `limited` is whether there are more updates to fetch.
  50. #
  51. StreamUpdateResult = Tuple[List[Tuple[Token, StreamRow]], Token, bool]
  52. # The type of an update_function for a stream
  53. #
  54. # The arguments are:
  55. #
  56. # * instance_name: the writer of the stream
  57. # * from_token: the previous stream token: the starting point for fetching the
  58. # updates
  59. # * to_token: the new stream token: the point to get updates up to
  60. # * target_row_count: a target for the number of rows to be returned.
  61. #
  62. # The update_function is expected to return up to _approximately_ target_row_count rows.
  63. # If there are more updates available, it should set `limited` in the result, and
  64. # it will be called again to get the next batch.
  65. #
  66. UpdateFunction = Callable[[str, Token, Token, int], Awaitable[StreamUpdateResult]]
  67. class Stream:
  68. """Base class for the streams.
  69. Provides a `get_updates()` function that returns new updates since the last
  70. time it was called.
  71. """
  72. NAME: str # The name of the stream
  73. # The type of the row. Used by the default impl of parse_row.
  74. ROW_TYPE: Any = None
  75. @classmethod
  76. def parse_row(cls, row: StreamRow) -> Any:
  77. """Parse a row received over replication
  78. By default, assumes that the row data is an array object and passes its contents
  79. to the constructor of the ROW_TYPE for this stream.
  80. Args:
  81. row: row data from the incoming RDATA command, after json decoding
  82. Returns:
  83. ROW_TYPE object for this stream
  84. """
  85. return cls.ROW_TYPE(*row)
  86. def __init__(
  87. self,
  88. local_instance_name: str,
  89. current_token_function: Callable[[str], Token],
  90. update_function: UpdateFunction,
  91. ):
  92. """Instantiate a Stream
  93. `current_token_function` and `update_function` are callbacks which
  94. should be implemented by subclasses.
  95. `current_token_function` takes an instance name, which is a writer to
  96. the stream, and returns the position in the stream of the writer (as
  97. viewed from the current process). On the writer process this is where
  98. the writer has successfully written up to, whereas on other processes
  99. this is the position which we have received updates up to over
  100. replication. (Note that most streams have a single writer and so their
  101. implementations ignore the instance name passed in).
  102. `update_function` is called to get updates for this stream between a
  103. pair of stream tokens. See the `UpdateFunction` type definition for more
  104. info.
  105. Args:
  106. local_instance_name: The instance name of the current process
  107. current_token_function: callback to get the current token, as above
  108. update_function: callback go get stream updates, as above
  109. """
  110. self.local_instance_name = local_instance_name
  111. self.current_token = current_token_function
  112. self.update_function = update_function
  113. # The token from which we last asked for updates
  114. self.last_token = self.current_token(self.local_instance_name)
  115. def discard_updates_and_advance(self) -> None:
  116. """Called when the stream should advance but the updates would be discarded,
  117. e.g. when there are no currently connected workers.
  118. """
  119. self.last_token = self.current_token(self.local_instance_name)
  120. async def get_updates(self) -> StreamUpdateResult:
  121. """Gets all updates since the last time this function was called (or
  122. since the stream was constructed if it hadn't been called before).
  123. Returns:
  124. A triplet `(updates, new_last_token, limited)`, where `updates` is
  125. a list of `(token, row)` entries, `new_last_token` is the new
  126. position in stream, and `limited` is whether there are more updates
  127. to fetch.
  128. """
  129. current_token = self.current_token(self.local_instance_name)
  130. updates, current_token, limited = await self.get_updates_since(
  131. self.local_instance_name, self.last_token, current_token
  132. )
  133. self.last_token = current_token
  134. return updates, current_token, limited
  135. async def get_updates_since(
  136. self, instance_name: str, from_token: Token, upto_token: Token
  137. ) -> StreamUpdateResult:
  138. """Like get_updates except allows specifying from when we should
  139. stream updates
  140. Returns:
  141. A triplet `(updates, new_last_token, limited)`, where `updates` is
  142. a list of `(token, row)` entries, `new_last_token` is the new
  143. position in stream, and `limited` is whether there are more updates
  144. to fetch.
  145. """
  146. from_token = int(from_token)
  147. if from_token == upto_token:
  148. return [], upto_token, False
  149. updates, upto_token, limited = await self.update_function(
  150. instance_name,
  151. from_token,
  152. upto_token,
  153. _STREAM_UPDATE_TARGET_ROW_COUNT,
  154. )
  155. return updates, upto_token, limited
  156. def current_token_without_instance(
  157. current_token: Callable[[], int]
  158. ) -> Callable[[str], int]:
  159. """Takes a current token callback function for a single writer stream
  160. that doesn't take an instance name parameter and wraps it in a function that
  161. does accept an instance name parameter but ignores it.
  162. """
  163. return lambda instance_name: current_token()
  164. def make_http_update_function(hs: "HomeServer", stream_name: str) -> UpdateFunction:
  165. """Makes a suitable function for use as an `update_function` that queries
  166. the master process for updates.
  167. """
  168. client = ReplicationGetStreamUpdates.make_client(hs)
  169. async def update_function(
  170. instance_name: str, from_token: int, upto_token: int, limit: int
  171. ) -> StreamUpdateResult:
  172. result = await client(
  173. instance_name=instance_name,
  174. stream_name=stream_name,
  175. from_token=from_token,
  176. upto_token=upto_token,
  177. )
  178. return result["updates"], result["upto_token"], result["limited"]
  179. return update_function
  180. class BackfillStream(Stream):
  181. """We fetched some old events and either we had never seen that event before
  182. or it went from being an outlier to not.
  183. """
  184. @attr.s(slots=True, frozen=True, auto_attribs=True)
  185. class BackfillStreamRow:
  186. event_id: str
  187. room_id: str
  188. type: str
  189. state_key: Optional[str]
  190. redacts: Optional[str]
  191. relates_to: Optional[str]
  192. NAME = "backfill"
  193. ROW_TYPE = BackfillStreamRow
  194. def __init__(self, hs: "HomeServer"):
  195. self.store = hs.get_datastores().main
  196. super().__init__(
  197. hs.get_instance_name(),
  198. self._current_token,
  199. self.store.get_all_new_backfill_event_rows,
  200. )
  201. def _current_token(self, instance_name: str) -> int:
  202. # The backfill stream over replication operates on *positive* numbers,
  203. # which means we need to negate it.
  204. return -self.store._backfill_id_gen.get_current_token_for_writer(instance_name)
  205. class PresenceStream(Stream):
  206. @attr.s(slots=True, frozen=True, auto_attribs=True)
  207. class PresenceStreamRow:
  208. user_id: str
  209. state: str
  210. last_active_ts: int
  211. last_federation_update_ts: int
  212. last_user_sync_ts: int
  213. status_msg: str
  214. currently_active: bool
  215. NAME = "presence"
  216. ROW_TYPE = PresenceStreamRow
  217. def __init__(self, hs: "HomeServer"):
  218. store = hs.get_datastores().main
  219. if hs.get_instance_name() in hs.config.worker.writers.presence:
  220. # on the presence writer, query the presence handler
  221. presence_handler = hs.get_presence_handler()
  222. from synapse.handlers.presence import PresenceHandler
  223. assert isinstance(presence_handler, PresenceHandler)
  224. update_function: UpdateFunction = presence_handler.get_all_presence_updates
  225. else:
  226. # Query presence writer process
  227. update_function = make_http_update_function(hs, self.NAME)
  228. super().__init__(
  229. hs.get_instance_name(),
  230. current_token_without_instance(store.get_current_presence_token),
  231. update_function,
  232. )
  233. class PresenceFederationStream(Stream):
  234. """A stream used to send ad hoc presence updates over federation.
  235. Streams the remote destination and the user ID of the presence state to
  236. send.
  237. """
  238. @attr.s(slots=True, frozen=True, auto_attribs=True)
  239. class PresenceFederationStreamRow:
  240. destination: str
  241. user_id: str
  242. NAME = "presence_federation"
  243. ROW_TYPE = PresenceFederationStreamRow
  244. def __init__(self, hs: "HomeServer"):
  245. federation_queue = hs.get_presence_handler().get_federation_queue()
  246. super().__init__(
  247. hs.get_instance_name(),
  248. federation_queue.get_current_token,
  249. federation_queue.get_replication_rows,
  250. )
  251. class TypingStream(Stream):
  252. @attr.s(slots=True, frozen=True, auto_attribs=True)
  253. class TypingStreamRow:
  254. room_id: str
  255. user_ids: List[str]
  256. NAME = "typing"
  257. ROW_TYPE = TypingStreamRow
  258. def __init__(self, hs: "HomeServer"):
  259. if hs.get_instance_name() in hs.config.worker.writers.typing:
  260. # On the writer, query the typing handler
  261. typing_writer_handler = hs.get_typing_writer_handler()
  262. update_function: Callable[
  263. [str, int, int, int], Awaitable[Tuple[List[Tuple[int, Any]], int, bool]]
  264. ] = typing_writer_handler.get_all_typing_updates
  265. current_token_function = typing_writer_handler.get_current_token
  266. else:
  267. # Query the typing writer process
  268. update_function = make_http_update_function(hs, self.NAME)
  269. current_token_function = hs.get_typing_handler().get_current_token
  270. super().__init__(
  271. hs.get_instance_name(),
  272. current_token_without_instance(current_token_function),
  273. update_function,
  274. )
  275. class ReceiptsStream(Stream):
  276. @attr.s(slots=True, frozen=True, auto_attribs=True)
  277. class ReceiptsStreamRow:
  278. room_id: str
  279. receipt_type: str
  280. user_id: str
  281. event_id: str
  282. data: dict
  283. NAME = "receipts"
  284. ROW_TYPE = ReceiptsStreamRow
  285. def __init__(self, hs: "HomeServer"):
  286. store = hs.get_datastores().main
  287. super().__init__(
  288. hs.get_instance_name(),
  289. current_token_without_instance(store.get_max_receipt_stream_id),
  290. store.get_all_updated_receipts,
  291. )
  292. class PushRulesStream(Stream):
  293. """A user has changed their push rules"""
  294. @attr.s(slots=True, frozen=True, auto_attribs=True)
  295. class PushRulesStreamRow:
  296. user_id: str
  297. NAME = "push_rules"
  298. ROW_TYPE = PushRulesStreamRow
  299. def __init__(self, hs: "HomeServer"):
  300. self.store = hs.get_datastores().main
  301. super().__init__(
  302. hs.get_instance_name(),
  303. self._current_token,
  304. self.store.get_all_push_rule_updates,
  305. )
  306. def _current_token(self, instance_name: str) -> int:
  307. push_rules_token = self.store.get_max_push_rules_stream_id()
  308. return push_rules_token
  309. class PushersStream(Stream):
  310. """A user has added/changed/removed a pusher"""
  311. @attr.s(slots=True, frozen=True, auto_attribs=True)
  312. class PushersStreamRow:
  313. user_id: str
  314. app_id: str
  315. pushkey: str
  316. deleted: bool
  317. NAME = "pushers"
  318. ROW_TYPE = PushersStreamRow
  319. def __init__(self, hs: "HomeServer"):
  320. store = hs.get_datastores().main
  321. super().__init__(
  322. hs.get_instance_name(),
  323. current_token_without_instance(store.get_pushers_stream_token),
  324. store.get_all_updated_pushers_rows,
  325. )
  326. class CachesStream(Stream):
  327. """A cache was invalidated on the master and no other stream would invalidate
  328. the cache on the workers
  329. """
  330. @attr.s(slots=True, frozen=True, auto_attribs=True)
  331. class CachesStreamRow:
  332. """Stream to inform workers they should invalidate their cache.
  333. Attributes:
  334. cache_func: Name of the cached function.
  335. keys: The entry in the cache to invalidate. If None then will
  336. invalidate all.
  337. invalidation_ts: Timestamp of when the invalidation took place.
  338. """
  339. cache_func: str
  340. keys: Optional[List[Any]]
  341. invalidation_ts: int
  342. NAME = "caches"
  343. ROW_TYPE = CachesStreamRow
  344. def __init__(self, hs: "HomeServer"):
  345. store = hs.get_datastores().main
  346. super().__init__(
  347. hs.get_instance_name(),
  348. store.get_cache_stream_token_for_writer,
  349. store.get_all_updated_caches,
  350. )
  351. class DeviceListsStream(Stream):
  352. """Either a user has updated their devices or a remote server needs to be
  353. told about a device update.
  354. """
  355. @attr.s(slots=True, frozen=True, auto_attribs=True)
  356. class DeviceListsStreamRow:
  357. entity: str
  358. NAME = "device_lists"
  359. ROW_TYPE = DeviceListsStreamRow
  360. def __init__(self, hs: "HomeServer"):
  361. store = hs.get_datastores().main
  362. super().__init__(
  363. hs.get_instance_name(),
  364. current_token_without_instance(store.get_device_stream_token),
  365. store.get_all_device_list_changes_for_remotes,
  366. )
  367. class ToDeviceStream(Stream):
  368. """New to_device messages for a client"""
  369. @attr.s(slots=True, frozen=True, auto_attribs=True)
  370. class ToDeviceStreamRow:
  371. entity: str
  372. NAME = "to_device"
  373. ROW_TYPE = ToDeviceStreamRow
  374. def __init__(self, hs: "HomeServer"):
  375. store = hs.get_datastores().main
  376. super().__init__(
  377. hs.get_instance_name(),
  378. current_token_without_instance(store.get_to_device_stream_token),
  379. store.get_all_new_device_messages,
  380. )
  381. class TagAccountDataStream(Stream):
  382. """Someone added/removed a tag for a room"""
  383. @attr.s(slots=True, frozen=True, auto_attribs=True)
  384. class TagAccountDataStreamRow:
  385. user_id: str
  386. room_id: str
  387. data: JsonDict
  388. NAME = "tag_account_data"
  389. ROW_TYPE = TagAccountDataStreamRow
  390. def __init__(self, hs: "HomeServer"):
  391. store = hs.get_datastores().main
  392. super().__init__(
  393. hs.get_instance_name(),
  394. current_token_without_instance(store.get_max_account_data_stream_id),
  395. store.get_all_updated_tags,
  396. )
  397. class AccountDataStream(Stream):
  398. """Global or per room account data was changed"""
  399. @attr.s(slots=True, frozen=True, auto_attribs=True)
  400. class AccountDataStreamRow:
  401. user_id: str
  402. room_id: Optional[str]
  403. data_type: str
  404. NAME = "account_data"
  405. ROW_TYPE = AccountDataStreamRow
  406. def __init__(self, hs: "HomeServer"):
  407. self.store = hs.get_datastores().main
  408. super().__init__(
  409. hs.get_instance_name(),
  410. current_token_without_instance(self.store.get_max_account_data_stream_id),
  411. self._update_function,
  412. )
  413. async def _update_function(
  414. self, instance_name: str, from_token: int, to_token: int, limit: int
  415. ) -> StreamUpdateResult:
  416. limited = False
  417. global_results = await self.store.get_updated_global_account_data(
  418. from_token, to_token, limit
  419. )
  420. # if the global results hit the limit, we'll need to limit the room results to
  421. # the same stream token.
  422. if len(global_results) >= limit:
  423. to_token = global_results[-1][0]
  424. limited = True
  425. room_results = await self.store.get_updated_room_account_data(
  426. from_token, to_token, limit
  427. )
  428. # likewise, if the room results hit the limit, limit the global results to
  429. # the same stream token.
  430. if len(room_results) >= limit:
  431. to_token = room_results[-1][0]
  432. limited = True
  433. # convert the global results to the right format, and limit them to the to_token
  434. # at the same time
  435. global_rows = (
  436. (stream_id, (user_id, None, account_data_type))
  437. for stream_id, user_id, account_data_type in global_results
  438. if stream_id <= to_token
  439. )
  440. # we know that the room_results are already limited to `to_token` so no need
  441. # for a check on `stream_id` here.
  442. room_rows = (
  443. (stream_id, (user_id, room_id, account_data_type))
  444. for stream_id, user_id, room_id, account_data_type in room_results
  445. )
  446. # We need to return a sorted list, so merge them together.
  447. #
  448. # Note: We order only by the stream ID to work around a bug where the
  449. # same stream ID could appear in both `global_rows` and `room_rows`,
  450. # leading to a comparison between the data tuples. The comparison could
  451. # fail due to attempting to compare the `room_id` which results in a
  452. # `TypeError` from comparing a `str` vs `None`.
  453. updates = list(heapq.merge(room_rows, global_rows, key=lambda row: row[0]))
  454. return updates, to_token, limited
  455. class GroupServerStream(Stream):
  456. @attr.s(slots=True, frozen=True, auto_attribs=True)
  457. class GroupsStreamRow:
  458. group_id: str
  459. user_id: str
  460. type: str
  461. content: JsonDict
  462. NAME = "groups"
  463. ROW_TYPE = GroupsStreamRow
  464. def __init__(self, hs: "HomeServer"):
  465. store = hs.get_datastores().main
  466. super().__init__(
  467. hs.get_instance_name(),
  468. current_token_without_instance(store.get_group_stream_token),
  469. store.get_all_groups_changes,
  470. )
  471. class UserSignatureStream(Stream):
  472. """A user has signed their own device with their user-signing key"""
  473. @attr.s(slots=True, frozen=True, auto_attribs=True)
  474. class UserSignatureStreamRow:
  475. user_id: str
  476. NAME = "user_signature"
  477. ROW_TYPE = UserSignatureStreamRow
  478. def __init__(self, hs: "HomeServer"):
  479. store = hs.get_datastores().main
  480. super().__init__(
  481. hs.get_instance_name(),
  482. current_token_without_instance(store.get_device_stream_token),
  483. store.get_all_user_signature_changes_for_remotes,
  484. )