events_worker.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2018 New Vector Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. from ._base import SQLBaseStore
  16. from twisted.internet import defer
  17. from synapse.events import FrozenEvent
  18. from synapse.events.utils import prune_event
  19. from synapse.util.logcontext import (
  20. PreserveLoggingContext, make_deferred_yieldable, run_in_background,
  21. LoggingContext,
  22. )
  23. from synapse.util.metrics import Measure
  24. from synapse.api.errors import SynapseError
  25. from collections import namedtuple
  26. import logging
  27. from canonicaljson import json
  28. # these are only included to make the type annotations work
  29. from synapse.events import EventBase # noqa: F401
  30. from synapse.events.snapshot import EventContext # noqa: F401
  31. logger = logging.getLogger(__name__)
  32. # These values are used in the `enqueus_event` and `_do_fetch` methods to
  33. # control how we batch/bulk fetch events from the database.
  34. # The values are plucked out of thing air to make initial sync run faster
  35. # on jki.re
  36. # TODO: Make these configurable.
  37. EVENT_QUEUE_THREADS = 3 # Max number of threads that will fetch events
  38. EVENT_QUEUE_ITERATIONS = 3 # No. times we block waiting for requests for events
  39. EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events
  40. _EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event"))
  41. class EventsWorkerStore(SQLBaseStore):
  42. def get_received_ts(self, event_id):
  43. """Get received_ts (when it was persisted) for the event.
  44. Raises an exception for unknown events.
  45. Args:
  46. event_id (str)
  47. Returns:
  48. Deferred[int|None]: Timestamp in milliseconds, or None for events
  49. that were persisted before received_ts was implemented.
  50. """
  51. return self._simple_select_one_onecol(
  52. table="events",
  53. keyvalues={
  54. "event_id": event_id,
  55. },
  56. retcol="received_ts",
  57. desc="get_received_ts",
  58. )
  59. @defer.inlineCallbacks
  60. def get_event(self, event_id, check_redacted=True,
  61. get_prev_content=False, allow_rejected=False,
  62. allow_none=False):
  63. """Get an event from the database by event_id.
  64. Args:
  65. event_id (str): The event_id of the event to fetch
  66. check_redacted (bool): If True, check if event has been redacted
  67. and redact it.
  68. get_prev_content (bool): If True and event is a state event,
  69. include the previous states content in the unsigned field.
  70. allow_rejected (bool): If True return rejected events.
  71. allow_none (bool): If True, return None if no event found, if
  72. False throw an exception.
  73. Returns:
  74. Deferred : A FrozenEvent.
  75. """
  76. events = yield self._get_events(
  77. [event_id],
  78. check_redacted=check_redacted,
  79. get_prev_content=get_prev_content,
  80. allow_rejected=allow_rejected,
  81. )
  82. if not events and not allow_none:
  83. raise SynapseError(404, "Could not find event %s" % (event_id,))
  84. defer.returnValue(events[0] if events else None)
  85. @defer.inlineCallbacks
  86. def get_events(self, event_ids, check_redacted=True,
  87. get_prev_content=False, allow_rejected=False):
  88. """Get events from the database
  89. Args:
  90. event_ids (list): The event_ids of the events to fetch
  91. check_redacted (bool): If True, check if event has been redacted
  92. and redact it.
  93. get_prev_content (bool): If True and event is a state event,
  94. include the previous states content in the unsigned field.
  95. allow_rejected (bool): If True return rejected events.
  96. Returns:
  97. Deferred : Dict from event_id to event.
  98. """
  99. events = yield self._get_events(
  100. event_ids,
  101. check_redacted=check_redacted,
  102. get_prev_content=get_prev_content,
  103. allow_rejected=allow_rejected,
  104. )
  105. defer.returnValue({e.event_id: e for e in events})
  106. @defer.inlineCallbacks
  107. def _get_events(self, event_ids, check_redacted=True,
  108. get_prev_content=False, allow_rejected=False):
  109. if not event_ids:
  110. defer.returnValue([])
  111. event_id_list = event_ids
  112. event_ids = set(event_ids)
  113. event_entry_map = self._get_events_from_cache(
  114. event_ids,
  115. allow_rejected=allow_rejected,
  116. )
  117. missing_events_ids = [e for e in event_ids if e not in event_entry_map]
  118. if missing_events_ids:
  119. log_ctx = LoggingContext.current_context()
  120. log_ctx.record_event_fetch(len(missing_events_ids))
  121. missing_events = yield self._enqueue_events(
  122. missing_events_ids,
  123. check_redacted=check_redacted,
  124. allow_rejected=allow_rejected,
  125. )
  126. event_entry_map.update(missing_events)
  127. events = []
  128. for event_id in event_id_list:
  129. entry = event_entry_map.get(event_id, None)
  130. if not entry:
  131. continue
  132. if allow_rejected or not entry.event.rejected_reason:
  133. if check_redacted and entry.redacted_event:
  134. event = entry.redacted_event
  135. else:
  136. event = entry.event
  137. events.append(event)
  138. if get_prev_content:
  139. if "replaces_state" in event.unsigned:
  140. prev = yield self.get_event(
  141. event.unsigned["replaces_state"],
  142. get_prev_content=False,
  143. allow_none=True,
  144. )
  145. if prev:
  146. event.unsigned = dict(event.unsigned)
  147. event.unsigned["prev_content"] = prev.content
  148. event.unsigned["prev_sender"] = prev.sender
  149. defer.returnValue(events)
  150. def _invalidate_get_event_cache(self, event_id):
  151. self._get_event_cache.invalidate((event_id,))
  152. def _get_events_from_cache(self, events, allow_rejected, update_metrics=True):
  153. """Fetch events from the caches
  154. Args:
  155. events (list(str)): list of event_ids to fetch
  156. allow_rejected (bool): Whether to teturn events that were rejected
  157. update_metrics (bool): Whether to update the cache hit ratio metrics
  158. Returns:
  159. dict of event_id -> _EventCacheEntry for each event_id in cache. If
  160. allow_rejected is `False` then there will still be an entry but it
  161. will be `None`
  162. """
  163. event_map = {}
  164. for event_id in events:
  165. ret = self._get_event_cache.get(
  166. (event_id,), None,
  167. update_metrics=update_metrics,
  168. )
  169. if not ret:
  170. continue
  171. if allow_rejected or not ret.event.rejected_reason:
  172. event_map[event_id] = ret
  173. else:
  174. event_map[event_id] = None
  175. return event_map
  176. def _do_fetch(self, conn):
  177. """Takes a database connection and waits for requests for events from
  178. the _event_fetch_list queue.
  179. """
  180. event_list = []
  181. i = 0
  182. while True:
  183. try:
  184. with self._event_fetch_lock:
  185. event_list = self._event_fetch_list
  186. self._event_fetch_list = []
  187. if not event_list:
  188. single_threaded = self.database_engine.single_threaded
  189. if single_threaded or i > EVENT_QUEUE_ITERATIONS:
  190. self._event_fetch_ongoing -= 1
  191. return
  192. else:
  193. self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S)
  194. i += 1
  195. continue
  196. i = 0
  197. event_id_lists = zip(*event_list)[0]
  198. event_ids = [
  199. item for sublist in event_id_lists for item in sublist
  200. ]
  201. rows = self._new_transaction(
  202. conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids
  203. )
  204. row_dict = {
  205. r["event_id"]: r
  206. for r in rows
  207. }
  208. # We only want to resolve deferreds from the main thread
  209. def fire(lst, res):
  210. for ids, d in lst:
  211. if not d.called:
  212. try:
  213. with PreserveLoggingContext():
  214. d.callback([
  215. res[i]
  216. for i in ids
  217. if i in res
  218. ])
  219. except Exception:
  220. logger.exception("Failed to callback")
  221. with PreserveLoggingContext():
  222. self.hs.get_reactor().callFromThread(fire, event_list, row_dict)
  223. except Exception as e:
  224. logger.exception("do_fetch")
  225. # We only want to resolve deferreds from the main thread
  226. def fire(evs):
  227. for _, d in evs:
  228. if not d.called:
  229. with PreserveLoggingContext():
  230. d.errback(e)
  231. if event_list:
  232. with PreserveLoggingContext():
  233. self.hs.get_reactor().callFromThread(fire, event_list)
  234. @defer.inlineCallbacks
  235. def _enqueue_events(self, events, check_redacted=True, allow_rejected=False):
  236. """Fetches events from the database using the _event_fetch_list. This
  237. allows batch and bulk fetching of events - it allows us to fetch events
  238. without having to create a new transaction for each request for events.
  239. """
  240. if not events:
  241. defer.returnValue({})
  242. events_d = defer.Deferred()
  243. with self._event_fetch_lock:
  244. self._event_fetch_list.append(
  245. (events, events_d)
  246. )
  247. self._event_fetch_lock.notify()
  248. if self._event_fetch_ongoing < EVENT_QUEUE_THREADS:
  249. self._event_fetch_ongoing += 1
  250. should_start = True
  251. else:
  252. should_start = False
  253. if should_start:
  254. with PreserveLoggingContext():
  255. self.runWithConnection(
  256. self._do_fetch
  257. )
  258. logger.debug("Loading %d events", len(events))
  259. with PreserveLoggingContext():
  260. rows = yield events_d
  261. logger.debug("Loaded %d events (%d rows)", len(events), len(rows))
  262. if not allow_rejected:
  263. rows[:] = [r for r in rows if not r["rejects"]]
  264. res = yield make_deferred_yieldable(defer.gatherResults(
  265. [
  266. run_in_background(
  267. self._get_event_from_row,
  268. row["internal_metadata"], row["json"], row["redacts"],
  269. rejected_reason=row["rejects"],
  270. )
  271. for row in rows
  272. ],
  273. consumeErrors=True
  274. ))
  275. defer.returnValue({
  276. e.event.event_id: e
  277. for e in res if e
  278. })
  279. def _fetch_event_rows(self, txn, events):
  280. rows = []
  281. N = 200
  282. for i in range(1 + len(events) // N):
  283. evs = events[i * N:(i + 1) * N]
  284. if not evs:
  285. break
  286. sql = (
  287. "SELECT "
  288. " e.event_id as event_id, "
  289. " e.internal_metadata,"
  290. " e.json,"
  291. " r.redacts as redacts,"
  292. " rej.event_id as rejects "
  293. " FROM event_json as e"
  294. " LEFT JOIN rejections as rej USING (event_id)"
  295. " LEFT JOIN redactions as r ON e.event_id = r.redacts"
  296. " WHERE e.event_id IN (%s)"
  297. ) % (",".join(["?"] * len(evs)),)
  298. txn.execute(sql, evs)
  299. rows.extend(self.cursor_to_dict(txn))
  300. return rows
  301. @defer.inlineCallbacks
  302. def _get_event_from_row(self, internal_metadata, js, redacted,
  303. rejected_reason=None):
  304. with Measure(self._clock, "_get_event_from_row"):
  305. d = json.loads(js)
  306. internal_metadata = json.loads(internal_metadata)
  307. if rejected_reason:
  308. rejected_reason = yield self._simple_select_one_onecol(
  309. table="rejections",
  310. keyvalues={"event_id": rejected_reason},
  311. retcol="reason",
  312. desc="_get_event_from_row_rejected_reason",
  313. )
  314. original_ev = FrozenEvent(
  315. d,
  316. internal_metadata_dict=internal_metadata,
  317. rejected_reason=rejected_reason,
  318. )
  319. redacted_event = None
  320. if redacted:
  321. redacted_event = prune_event(original_ev)
  322. redaction_id = yield self._simple_select_one_onecol(
  323. table="redactions",
  324. keyvalues={"redacts": redacted_event.event_id},
  325. retcol="event_id",
  326. desc="_get_event_from_row_redactions",
  327. )
  328. redacted_event.unsigned["redacted_by"] = redaction_id
  329. # Get the redaction event.
  330. because = yield self.get_event(
  331. redaction_id,
  332. check_redacted=False,
  333. allow_none=True,
  334. )
  335. if because:
  336. # It's fine to do add the event directly, since get_pdu_json
  337. # will serialise this field correctly
  338. redacted_event.unsigned["redacted_because"] = because
  339. cache_entry = _EventCacheEntry(
  340. event=original_ev,
  341. redacted_event=redacted_event,
  342. )
  343. self._get_event_cache.prefill((original_ev.event_id,), cache_entry)
  344. defer.returnValue(cache_entry)