snapshot.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. # Copyright 2014-2016 OpenMarket Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from abc import ABC, abstractmethod
  15. from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
  16. import attr
  17. from immutabledict import immutabledict
  18. from synapse.appservice import ApplicationService
  19. from synapse.events import EventBase
  20. from synapse.logging.opentracing import tag_args, trace
  21. from synapse.types import JsonDict, StateMap
  22. if TYPE_CHECKING:
  23. from synapse.storage.controllers import StorageControllers
  24. from synapse.storage.databases import StateGroupDataStore
  25. from synapse.storage.databases.main import DataStore
  26. from synapse.types.state import StateFilter
  27. class UnpersistedEventContextBase(ABC):
  28. """
  29. This is a base class for EventContext and UnpersistedEventContext, objects which
  30. hold information relevant to storing an associated event. Note that an
  31. UnpersistedEventContexts must be converted into an EventContext before it is
  32. suitable to send to the db with its associated event.
  33. Attributes:
  34. _storage: storage controllers for interfacing with the database
  35. app_service: If the associated event is being sent by a (local) application service, that
  36. app service.
  37. """
  38. def __init__(self, storage_controller: "StorageControllers"):
  39. self._storage: "StorageControllers" = storage_controller
  40. self.app_service: Optional[ApplicationService] = None
  41. @abstractmethod
  42. async def persist(
  43. self,
  44. event: EventBase,
  45. ) -> "EventContext":
  46. """
  47. A method to convert an UnpersistedEventContext to an EventContext, suitable for
  48. sending to the database with the associated event.
  49. """
  50. pass
  51. @abstractmethod
  52. async def get_prev_state_ids(
  53. self, state_filter: Optional["StateFilter"] = None
  54. ) -> StateMap[str]:
  55. """
  56. Gets the room state at the event (ie not including the event if the event is a
  57. state event).
  58. Args:
  59. state_filter: specifies the type of state event to fetch from DB, example:
  60. EventTypes.JoinRules
  61. """
  62. pass
  63. @attr.s(slots=True, auto_attribs=True)
  64. class EventContext(UnpersistedEventContextBase):
  65. """
  66. Holds information relevant to persisting an event
  67. Attributes:
  68. rejected: A rejection reason if the event was rejected, else None
  69. _state_group: The ID of the state group for this event. Note that state events
  70. are persisted with a state group which includes the new event, so this is
  71. effectively the state *after* the event in question.
  72. For a *rejected* state event, where the state of the rejected event is
  73. ignored, this state_group should never make it into the
  74. event_to_state_groups table. Indeed, inspecting this value for a rejected
  75. state event is almost certainly incorrect.
  76. For an outlier, where we don't have the state at the event, this will be
  77. None.
  78. Note that this is a private attribute: it should be accessed via
  79. the ``state_group`` property.
  80. state_group_before_event: The ID of the state group representing the state
  81. of the room before this event.
  82. If this is a non-state event, this will be the same as ``state_group``. If
  83. it's a state event, it will be the same as ``prev_group``.
  84. If ``state_group`` is None (ie, the event is an outlier),
  85. ``state_group_before_event`` will always also be ``None``.
  86. state_delta_due_to_event: If `state_group` and `state_group_before_event` are not None
  87. then this is the delta of the state between the two groups.
  88. state_group_deltas: If not empty, this is a dict collecting a mapping of the state
  89. difference between state groups.
  90. The keys are a tuple of two integers: the initial group and final state group.
  91. The corresponding value is a state map representing the state delta between
  92. these state groups.
  93. The dictionary is expected to have at most two entries with state groups of:
  94. 1. The state group before the event and after the event.
  95. 2. The state group preceding the state group before the event and the
  96. state group before the event.
  97. This information is collected and stored as part of an optimization for persisting
  98. events.
  99. partial_state: if True, we may be storing this event with a temporary,
  100. incomplete state.
  101. """
  102. _storage: "StorageControllers"
  103. state_group_deltas: Dict[Tuple[int, int], StateMap[str]]
  104. rejected: Optional[str] = None
  105. _state_group: Optional[int] = None
  106. state_group_before_event: Optional[int] = None
  107. _state_delta_due_to_event: Optional[StateMap[str]] = None
  108. app_service: Optional[ApplicationService] = None
  109. partial_state: bool = False
  110. @staticmethod
  111. def with_state(
  112. storage: "StorageControllers",
  113. state_group: Optional[int],
  114. state_group_before_event: Optional[int],
  115. state_delta_due_to_event: Optional[StateMap[str]],
  116. partial_state: bool,
  117. state_group_deltas: Dict[Tuple[int, int], StateMap[str]],
  118. ) -> "EventContext":
  119. return EventContext(
  120. storage=storage,
  121. state_group=state_group,
  122. state_group_before_event=state_group_before_event,
  123. state_delta_due_to_event=state_delta_due_to_event,
  124. state_group_deltas=state_group_deltas,
  125. partial_state=partial_state,
  126. )
  127. @staticmethod
  128. def for_outlier(
  129. storage: "StorageControllers",
  130. ) -> "EventContext":
  131. """Return an EventContext instance suitable for persisting an outlier event"""
  132. return EventContext(storage=storage, state_group_deltas={})
  133. async def persist(self, event: EventBase) -> "EventContext":
  134. return self
  135. async def serialize(self, event: EventBase, store: "DataStore") -> JsonDict:
  136. """Converts self to a type that can be serialized as JSON, and then
  137. deserialized by `deserialize`
  138. Args:
  139. event: The event that this context relates to
  140. Returns:
  141. The serialized event.
  142. """
  143. return {
  144. "state_group": self._state_group,
  145. "state_group_before_event": self.state_group_before_event,
  146. "rejected": self.rejected,
  147. "state_group_deltas": _encode_state_group_delta(self.state_group_deltas),
  148. "state_delta_due_to_event": _encode_state_dict(
  149. self._state_delta_due_to_event
  150. ),
  151. "app_service_id": self.app_service.id if self.app_service else None,
  152. "partial_state": self.partial_state,
  153. # add dummy delta_ids and prev_group for backwards compatibility
  154. "delta_ids": None,
  155. "prev_group": None,
  156. }
  157. @staticmethod
  158. def deserialize(storage: "StorageControllers", input: JsonDict) -> "EventContext":
  159. """Converts a dict that was produced by `serialize` back into a
  160. EventContext.
  161. Args:
  162. storage: Used to convert AS ID to AS object and fetch state.
  163. input: A dict produced by `serialize`
  164. Returns:
  165. The event context.
  166. """
  167. # workaround for backwards/forwards compatibility: if the input doesn't have a value
  168. # for "state_group_deltas" just assign an empty dict
  169. state_group_deltas = input.get("state_group_deltas", None)
  170. if state_group_deltas:
  171. state_group_deltas = _decode_state_group_delta(state_group_deltas)
  172. else:
  173. state_group_deltas = {}
  174. context = EventContext(
  175. # We use the state_group and prev_state_id stuff to pull the
  176. # current_state_ids out of the DB and construct prev_state_ids.
  177. storage=storage,
  178. state_group=input["state_group"],
  179. state_group_before_event=input["state_group_before_event"],
  180. state_group_deltas=state_group_deltas,
  181. state_delta_due_to_event=_decode_state_dict(
  182. input["state_delta_due_to_event"]
  183. ),
  184. rejected=input["rejected"],
  185. partial_state=input.get("partial_state", False),
  186. )
  187. app_service_id = input["app_service_id"]
  188. if app_service_id:
  189. context.app_service = storage.main.get_app_service_by_id(app_service_id)
  190. return context
  191. @property
  192. def state_group(self) -> Optional[int]:
  193. """The ID of the state group for this event.
  194. Note that state events are persisted with a state group which includes the new
  195. event, so this is effectively the state *after* the event in question.
  196. For an outlier, where we don't have the state at the event, this will be None.
  197. It is an error to access this for a rejected event, since rejected state should
  198. not make it into the room state. Accessing this property will raise an exception
  199. if ``rejected`` is set.
  200. """
  201. if self.rejected:
  202. raise RuntimeError("Attempt to access state_group of rejected event")
  203. return self._state_group
  204. @trace
  205. @tag_args
  206. async def get_current_state_ids(
  207. self, state_filter: Optional["StateFilter"] = None
  208. ) -> Optional[StateMap[str]]:
  209. """
  210. Gets the room state map, including this event - ie, the state in ``state_group``
  211. It is an error to access this for a rejected event, since rejected state should
  212. not make it into the room state. This method will raise an exception if
  213. ``rejected`` is set.
  214. Arg:
  215. state_filter: specifies the type of state event to fetch from DB, example: EventTypes.JoinRules
  216. Returns:
  217. Returns None if state_group is None, which happens when the associated
  218. event is an outlier.
  219. Maps a (type, state_key) to the event ID of the state event matching
  220. this tuple.
  221. """
  222. if self.rejected:
  223. raise RuntimeError("Attempt to access state_ids of rejected event")
  224. assert self._state_delta_due_to_event is not None
  225. prev_state_ids = await self.get_prev_state_ids(state_filter)
  226. if self._state_delta_due_to_event:
  227. prev_state_ids = dict(prev_state_ids)
  228. prev_state_ids.update(self._state_delta_due_to_event)
  229. return prev_state_ids
  230. @trace
  231. @tag_args
  232. async def get_prev_state_ids(
  233. self, state_filter: Optional["StateFilter"] = None
  234. ) -> StateMap[str]:
  235. """
  236. Gets the room state map, excluding this event.
  237. For a non-state event, this will be the same as get_current_state_ids().
  238. Args:
  239. state_filter: specifies the type of state event to fetch from DB, example: EventTypes.JoinRules
  240. Returns:
  241. Returns {} if state_group is None, which happens when the associated
  242. event is an outlier.
  243. Maps a (type, state_key) to the event ID of the state event matching
  244. this tuple.
  245. """
  246. assert self.state_group_before_event is not None
  247. return await self._storage.state.get_state_ids_for_group(
  248. self.state_group_before_event, state_filter
  249. )
  250. @attr.s(slots=True, auto_attribs=True)
  251. class UnpersistedEventContext(UnpersistedEventContextBase):
  252. """
  253. The event context holds information about the state groups for an event. It is important
  254. to remember that an event technically has two state groups: the state group before the
  255. event, and the state group after the event. If the event is not a state event, the state
  256. group will not change (ie the state group before the event will be the same as the state
  257. group after the event), but if it is a state event the state group before the event
  258. will differ from the state group after the event.
  259. This is a version of an EventContext before the new state group (if any) has been
  260. computed and stored. It contains information about the state before the event (which
  261. also may be the information after the event, if the event is not a state event). The
  262. UnpersistedEventContext must be converted into an EventContext by calling the method
  263. 'persist' on it before it is suitable to be sent to the DB for processing.
  264. state_group_after_event:
  265. The state group after the event. This will always be None until it is persisted.
  266. If the event is not a state event, this will be the same as
  267. state_group_before_event.
  268. state_group_before_event:
  269. The ID of the state group representing the state of the room before this event.
  270. state_delta_due_to_event:
  271. If the event is a state event, then this is the delta of the state between
  272. `state_group` and `state_group_before_event`
  273. prev_group_for_state_group_before_event:
  274. If it is known, ``state_group_before_event``'s previous state group.
  275. delta_ids_to_state_group_before_event:
  276. If ``prev_group_for_state_group_before_event`` is not None, the state delta
  277. between ``prev_group_for_state_group_before_event`` and ``state_group_before_event``.
  278. partial_state:
  279. Whether the event has partial state.
  280. state_map_before_event:
  281. A map of the state before the event, i.e. the state at `state_group_before_event`
  282. """
  283. _storage: "StorageControllers"
  284. state_group_before_event: Optional[int]
  285. state_group_after_event: Optional[int]
  286. state_delta_due_to_event: Optional[StateMap[str]]
  287. prev_group_for_state_group_before_event: Optional[int]
  288. delta_ids_to_state_group_before_event: Optional[StateMap[str]]
  289. partial_state: bool
  290. state_map_before_event: Optional[StateMap[str]] = None
  291. @classmethod
  292. async def batch_persist_unpersisted_contexts(
  293. cls,
  294. events_and_context: List[Tuple[EventBase, "UnpersistedEventContextBase"]],
  295. room_id: str,
  296. last_known_state_group: int,
  297. datastore: "StateGroupDataStore",
  298. ) -> List[Tuple[EventBase, EventContext]]:
  299. """
  300. Takes a list of events and their associated unpersisted contexts and persists
  301. the unpersisted contexts, returning a list of events and persisted contexts.
  302. Note that all the events must be in a linear chain (ie a <- b <- c).
  303. Args:
  304. events_and_context: A list of events and their unpersisted contexts
  305. room_id: the room_id for the events
  306. last_known_state_group: the last persisted state group
  307. datastore: a state datastore
  308. """
  309. amended_events_and_context = await datastore.store_state_deltas_for_batched(
  310. events_and_context, room_id, last_known_state_group
  311. )
  312. events_and_persisted_context = []
  313. for event, unpersisted_context in amended_events_and_context:
  314. state_group_deltas = unpersisted_context._build_state_group_deltas()
  315. context = EventContext(
  316. storage=unpersisted_context._storage,
  317. state_group=unpersisted_context.state_group_after_event,
  318. state_group_before_event=unpersisted_context.state_group_before_event,
  319. state_delta_due_to_event=unpersisted_context.state_delta_due_to_event,
  320. partial_state=unpersisted_context.partial_state,
  321. state_group_deltas=state_group_deltas,
  322. )
  323. events_and_persisted_context.append((event, context))
  324. return events_and_persisted_context
  325. async def get_prev_state_ids(
  326. self, state_filter: Optional["StateFilter"] = None
  327. ) -> StateMap[str]:
  328. """
  329. Gets the room state map, excluding this event.
  330. Args:
  331. state_filter: specifies the type of state event to fetch from DB
  332. Returns:
  333. Maps a (type, state_key) to the event ID of the state event matching
  334. this tuple.
  335. """
  336. if self.state_map_before_event:
  337. return self.state_map_before_event
  338. assert self.state_group_before_event is not None
  339. return await self._storage.state.get_state_ids_for_group(
  340. self.state_group_before_event, state_filter
  341. )
  342. async def persist(self, event: EventBase) -> EventContext:
  343. """
  344. Creates a full `EventContext` for the event, persisting any referenced state that
  345. has not yet been persisted.
  346. Args:
  347. event: event that the EventContext is associated with.
  348. Returns: An EventContext suitable for sending to the database with the event
  349. for persisting
  350. """
  351. assert self.partial_state is not None
  352. # If we have a full set of state for before the event but don't have a state
  353. # group for that state, we need to get one
  354. if self.state_group_before_event is None:
  355. assert self.state_map_before_event
  356. state_group_before_event = await self._storage.state.store_state_group(
  357. event.event_id,
  358. event.room_id,
  359. prev_group=self.prev_group_for_state_group_before_event,
  360. delta_ids=self.delta_ids_to_state_group_before_event,
  361. current_state_ids=self.state_map_before_event,
  362. )
  363. self.state_group_before_event = state_group_before_event
  364. # if the event isn't a state event the state group doesn't change
  365. if not self.state_delta_due_to_event:
  366. self.state_group_after_event = self.state_group_before_event
  367. # otherwise if it is a state event we need to get a state group for it
  368. else:
  369. self.state_group_after_event = await self._storage.state.store_state_group(
  370. event.event_id,
  371. event.room_id,
  372. prev_group=self.state_group_before_event,
  373. delta_ids=self.state_delta_due_to_event,
  374. current_state_ids=None,
  375. )
  376. state_group_deltas = self._build_state_group_deltas()
  377. return EventContext.with_state(
  378. storage=self._storage,
  379. state_group=self.state_group_after_event,
  380. state_group_before_event=self.state_group_before_event,
  381. state_delta_due_to_event=self.state_delta_due_to_event,
  382. state_group_deltas=state_group_deltas,
  383. partial_state=self.partial_state,
  384. )
  385. def _build_state_group_deltas(self) -> Dict[Tuple[int, int], StateMap]:
  386. """
  387. Collect deltas between the state groups associated with this context
  388. """
  389. state_group_deltas = {}
  390. # if we know the state group before the event and after the event, add them and the
  391. # state delta between them to state_group_deltas
  392. if self.state_group_before_event and self.state_group_after_event:
  393. # if we have the state groups we should have the delta
  394. assert self.state_delta_due_to_event is not None
  395. state_group_deltas[
  396. (
  397. self.state_group_before_event,
  398. self.state_group_after_event,
  399. )
  400. ] = self.state_delta_due_to_event
  401. # the state group before the event may also have a state group which precedes it, if
  402. # we have that and the state group before the event, add them and the state
  403. # delta between them to state_group_deltas
  404. if (
  405. self.prev_group_for_state_group_before_event
  406. and self.state_group_before_event
  407. ):
  408. # if we have both state groups we should have the delta between them
  409. assert self.delta_ids_to_state_group_before_event is not None
  410. state_group_deltas[
  411. (
  412. self.prev_group_for_state_group_before_event,
  413. self.state_group_before_event,
  414. )
  415. ] = self.delta_ids_to_state_group_before_event
  416. return state_group_deltas
  417. def _encode_state_group_delta(
  418. state_group_delta: Dict[Tuple[int, int], StateMap[str]]
  419. ) -> List[Tuple[int, int, Optional[List[Tuple[str, str, str]]]]]:
  420. if not state_group_delta:
  421. return []
  422. state_group_delta_encoded = []
  423. for key, value in state_group_delta.items():
  424. state_group_delta_encoded.append((key[0], key[1], _encode_state_dict(value)))
  425. return state_group_delta_encoded
  426. def _decode_state_group_delta(
  427. input: List[Tuple[int, int, List[Tuple[str, str, str]]]]
  428. ) -> Dict[Tuple[int, int], StateMap[str]]:
  429. if not input:
  430. return {}
  431. state_group_deltas = {}
  432. for state_group_1, state_group_2, state_dict in input:
  433. state_map = _decode_state_dict(state_dict)
  434. assert state_map is not None
  435. state_group_deltas[(state_group_1, state_group_2)] = state_map
  436. return state_group_deltas
  437. def _encode_state_dict(
  438. state_dict: Optional[StateMap[str]],
  439. ) -> Optional[List[Tuple[str, str, str]]]:
  440. """Since dicts of (type, state_key) -> event_id cannot be serialized in
  441. JSON we need to convert them to a form that can.
  442. """
  443. if state_dict is None:
  444. return None
  445. return [(etype, state_key, v) for (etype, state_key), v in state_dict.items()]
  446. def _decode_state_dict(
  447. input: Optional[List[Tuple[str, str, str]]]
  448. ) -> Optional[StateMap[str]]:
  449. """Decodes a state dict encoded using `_encode_state_dict` above"""
  450. if input is None:
  451. return None
  452. return immutabledict({(etype, state_key): v for etype, state_key, v in input})