__init__.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2014-2016 OpenMarket Ltd
  3. # Copyright 2018 New Vector Ltd
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. import logging
  17. from collections import namedtuple
  18. from six import iteritems, itervalues
  19. import attr
  20. from frozendict import frozendict
  21. from twisted.internet import defer
  22. from synapse.api.constants import EventTypes, RoomVersions
  23. from synapse.events.snapshot import EventContext
  24. from synapse.state import v1, v2
  25. from synapse.util.async_helpers import Linearizer
  26. from synapse.util.caches import get_cache_factor_for
  27. from synapse.util.caches.expiringcache import ExpiringCache
  28. from synapse.util.logutils import log_function
  29. from synapse.util.metrics import Measure
  30. logger = logging.getLogger(__name__)
  31. KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key"))
  32. SIZE_OF_CACHE = 100000 * get_cache_factor_for("state_cache")
  33. EVICTION_TIMEOUT_SECONDS = 60 * 60
  34. _NEXT_STATE_ID = 1
  35. POWER_KEY = (EventTypes.PowerLevels, "")
  36. def _gen_state_id():
  37. global _NEXT_STATE_ID
  38. s = "X%d" % (_NEXT_STATE_ID,)
  39. _NEXT_STATE_ID += 1
  40. return s
  41. class _StateCacheEntry(object):
  42. __slots__ = ["state", "state_group", "state_id", "prev_group", "delta_ids"]
  43. def __init__(self, state, state_group, prev_group=None, delta_ids=None):
  44. # dict[(str, str), str] map from (type, state_key) to event_id
  45. self.state = frozendict(state)
  46. # the ID of a state group if one and only one is involved.
  47. # otherwise, None otherwise?
  48. self.state_group = state_group
  49. self.prev_group = prev_group
  50. self.delta_ids = frozendict(delta_ids) if delta_ids is not None else None
  51. # The `state_id` is a unique ID we generate that can be used as ID for
  52. # this collection of state. Usually this would be the same as the
  53. # state group, but on worker instances we can't generate a new state
  54. # group each time we resolve state, so we generate a separate one that
  55. # isn't persisted and is used solely for caches.
  56. # `state_id` is either a state_group (and so an int) or a string. This
  57. # ensures we don't accidentally persist a state_id as a stateg_group
  58. if state_group:
  59. self.state_id = state_group
  60. else:
  61. self.state_id = _gen_state_id()
  62. def __len__(self):
  63. return len(self.state)
  64. class StateHandler(object):
  65. """Fetches bits of state from the stores, and does state resolution
  66. where necessary
  67. """
  68. def __init__(self, hs):
  69. self.clock = hs.get_clock()
  70. self.store = hs.get_datastore()
  71. self.hs = hs
  72. self._state_resolution_handler = hs.get_state_resolution_handler()
  73. @defer.inlineCallbacks
  74. def get_current_state(self, room_id, event_type=None, state_key="",
  75. latest_event_ids=None):
  76. """ Retrieves the current state for the room. This is done by
  77. calling `get_latest_events_in_room` to get the leading edges of the
  78. event graph and then resolving any of the state conflicts.
  79. This is equivalent to getting the state of an event that were to send
  80. next before receiving any new events.
  81. If `event_type` is specified, then the method returns only the one
  82. event (or None) with that `event_type` and `state_key`.
  83. Returns:
  84. map from (type, state_key) to event
  85. """
  86. if not latest_event_ids:
  87. latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id)
  88. logger.debug("calling resolve_state_groups from get_current_state")
  89. ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids)
  90. state = ret.state
  91. if event_type:
  92. event_id = state.get((event_type, state_key))
  93. event = None
  94. if event_id:
  95. event = yield self.store.get_event(event_id, allow_none=True)
  96. defer.returnValue(event)
  97. return
  98. state_map = yield self.store.get_events(list(state.values()),
  99. get_prev_content=False)
  100. state = {
  101. key: state_map[e_id] for key, e_id in iteritems(state) if e_id in state_map
  102. }
  103. defer.returnValue(state)
  104. @defer.inlineCallbacks
  105. def get_current_state_ids(self, room_id, latest_event_ids=None):
  106. """Get the current state, or the state at a set of events, for a room
  107. Args:
  108. room_id (str):
  109. latest_event_ids (iterable[str]|None): if given, the forward
  110. extremities to resolve. If None, we look them up from the
  111. database (via a cache)
  112. Returns:
  113. Deferred[dict[(str, str), str)]]: the state dict, mapping from
  114. (event_type, state_key) -> event_id
  115. """
  116. if not latest_event_ids:
  117. latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id)
  118. logger.debug("calling resolve_state_groups from get_current_state_ids")
  119. ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids)
  120. state = ret.state
  121. defer.returnValue(state)
  122. @defer.inlineCallbacks
  123. def get_current_user_in_room(self, room_id, latest_event_ids=None):
  124. if not latest_event_ids:
  125. latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id)
  126. logger.debug("calling resolve_state_groups from get_current_user_in_room")
  127. entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids)
  128. joined_users = yield self.store.get_joined_users_from_state(room_id, entry)
  129. defer.returnValue(joined_users)
  130. @defer.inlineCallbacks
  131. def get_current_hosts_in_room(self, room_id, latest_event_ids=None):
  132. if not latest_event_ids:
  133. latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id)
  134. logger.debug("calling resolve_state_groups from get_current_hosts_in_room")
  135. entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids)
  136. joined_hosts = yield self.store.get_joined_hosts(room_id, entry)
  137. defer.returnValue(joined_hosts)
  138. @defer.inlineCallbacks
  139. def compute_event_context(self, event, old_state=None):
  140. """Build an EventContext structure for the event.
  141. This works out what the current state should be for the event, and
  142. generates a new state group if necessary.
  143. Args:
  144. event (synapse.events.EventBase):
  145. old_state (dict|None): The state at the event if it can't be
  146. calculated from existing events. This is normally only specified
  147. when receiving an event from federation where we don't have the
  148. prev events for, e.g. when backfilling.
  149. Returns:
  150. synapse.events.snapshot.EventContext:
  151. """
  152. if event.internal_metadata.is_outlier():
  153. # If this is an outlier, then we know it shouldn't have any current
  154. # state. Certainly store.get_current_state won't return any, and
  155. # persisting the event won't store the state group.
  156. if old_state:
  157. prev_state_ids = {
  158. (s.type, s.state_key): s.event_id for s in old_state
  159. }
  160. if event.is_state():
  161. current_state_ids = dict(prev_state_ids)
  162. key = (event.type, event.state_key)
  163. current_state_ids[key] = event.event_id
  164. else:
  165. current_state_ids = prev_state_ids
  166. else:
  167. current_state_ids = {}
  168. prev_state_ids = {}
  169. # We don't store state for outliers, so we don't generate a state
  170. # group for it.
  171. context = EventContext.with_state(
  172. state_group=None,
  173. current_state_ids=current_state_ids,
  174. prev_state_ids=prev_state_ids,
  175. )
  176. defer.returnValue(context)
  177. if old_state:
  178. # We already have the state, so we don't need to calculate it.
  179. # Let's just correctly fill out the context and create a
  180. # new state group for it.
  181. prev_state_ids = {
  182. (s.type, s.state_key): s.event_id for s in old_state
  183. }
  184. if event.is_state():
  185. key = (event.type, event.state_key)
  186. if key in prev_state_ids:
  187. replaces = prev_state_ids[key]
  188. if replaces != event.event_id: # Paranoia check
  189. event.unsigned["replaces_state"] = replaces
  190. current_state_ids = dict(prev_state_ids)
  191. current_state_ids[key] = event.event_id
  192. else:
  193. current_state_ids = prev_state_ids
  194. state_group = yield self.store.store_state_group(
  195. event.event_id,
  196. event.room_id,
  197. prev_group=None,
  198. delta_ids=None,
  199. current_state_ids=current_state_ids,
  200. )
  201. context = EventContext.with_state(
  202. state_group=state_group,
  203. current_state_ids=current_state_ids,
  204. prev_state_ids=prev_state_ids,
  205. )
  206. defer.returnValue(context)
  207. logger.debug("calling resolve_state_groups from compute_event_context")
  208. entry = yield self.resolve_state_groups_for_events(
  209. event.room_id, event.prev_event_ids(),
  210. )
  211. prev_state_ids = entry.state
  212. prev_group = None
  213. delta_ids = None
  214. if event.is_state():
  215. # If this is a state event then we need to create a new state
  216. # group for the state after this event.
  217. key = (event.type, event.state_key)
  218. if key in prev_state_ids:
  219. replaces = prev_state_ids[key]
  220. event.unsigned["replaces_state"] = replaces
  221. current_state_ids = dict(prev_state_ids)
  222. current_state_ids[key] = event.event_id
  223. if entry.state_group:
  224. # If the state at the event has a state group assigned then
  225. # we can use that as the prev group
  226. prev_group = entry.state_group
  227. delta_ids = {
  228. key: event.event_id
  229. }
  230. elif entry.prev_group:
  231. # If the state at the event only has a prev group, then we can
  232. # use that as a prev group too.
  233. prev_group = entry.prev_group
  234. delta_ids = dict(entry.delta_ids)
  235. delta_ids[key] = event.event_id
  236. state_group = yield self.store.store_state_group(
  237. event.event_id,
  238. event.room_id,
  239. prev_group=prev_group,
  240. delta_ids=delta_ids,
  241. current_state_ids=current_state_ids,
  242. )
  243. else:
  244. current_state_ids = prev_state_ids
  245. prev_group = entry.prev_group
  246. delta_ids = entry.delta_ids
  247. if entry.state_group is None:
  248. entry.state_group = yield self.store.store_state_group(
  249. event.event_id,
  250. event.room_id,
  251. prev_group=entry.prev_group,
  252. delta_ids=entry.delta_ids,
  253. current_state_ids=current_state_ids,
  254. )
  255. entry.state_id = entry.state_group
  256. state_group = entry.state_group
  257. context = EventContext.with_state(
  258. state_group=state_group,
  259. current_state_ids=current_state_ids,
  260. prev_state_ids=prev_state_ids,
  261. prev_group=prev_group,
  262. delta_ids=delta_ids,
  263. )
  264. defer.returnValue(context)
  265. @defer.inlineCallbacks
  266. def resolve_state_groups_for_events(self, room_id, event_ids):
  267. """ Given a list of event_ids this method fetches the state at each
  268. event, resolves conflicts between them and returns them.
  269. Args:
  270. room_id (str)
  271. event_ids (list[str])
  272. explicit_room_version (str|None): If set uses the the given room
  273. version to choose the resolution algorithm. If None, then
  274. checks the database for room version.
  275. Returns:
  276. Deferred[_StateCacheEntry]: resolved state
  277. """
  278. logger.debug("resolve_state_groups event_ids %s", event_ids)
  279. # map from state group id to the state in that state group (where
  280. # 'state' is a map from state key to event id)
  281. # dict[int, dict[(str, str), str]]
  282. state_groups_ids = yield self.store.get_state_groups_ids(
  283. room_id, event_ids
  284. )
  285. if len(state_groups_ids) == 0:
  286. defer.returnValue(_StateCacheEntry(
  287. state={},
  288. state_group=None,
  289. ))
  290. elif len(state_groups_ids) == 1:
  291. name, state_list = list(state_groups_ids.items()).pop()
  292. prev_group, delta_ids = yield self.store.get_state_group_delta(name)
  293. defer.returnValue(_StateCacheEntry(
  294. state=state_list,
  295. state_group=name,
  296. prev_group=prev_group,
  297. delta_ids=delta_ids,
  298. ))
  299. room_version = yield self.store.get_room_version(room_id)
  300. result = yield self._state_resolution_handler.resolve_state_groups(
  301. room_id, room_version, state_groups_ids, None,
  302. state_res_store=StateResolutionStore(self.store),
  303. )
  304. defer.returnValue(result)
  305. @defer.inlineCallbacks
  306. def resolve_events(self, room_version, state_sets, event):
  307. logger.info(
  308. "Resolving state for %s with %d groups", event.room_id, len(state_sets)
  309. )
  310. state_set_ids = [{
  311. (ev.type, ev.state_key): ev.event_id
  312. for ev in st
  313. } for st in state_sets]
  314. state_map = {
  315. ev.event_id: ev
  316. for st in state_sets
  317. for ev in st
  318. }
  319. with Measure(self.clock, "state._resolve_events"):
  320. new_state = yield resolve_events_with_store(
  321. room_version, state_set_ids,
  322. event_map=state_map,
  323. state_res_store=StateResolutionStore(self.store),
  324. )
  325. new_state = {
  326. key: state_map[ev_id] for key, ev_id in iteritems(new_state)
  327. }
  328. defer.returnValue(new_state)
  329. class StateResolutionHandler(object):
  330. """Responsible for doing state conflict resolution.
  331. Note that the storage layer depends on this handler, so all functions must
  332. be storage-independent.
  333. """
  334. def __init__(self, hs):
  335. self.clock = hs.get_clock()
  336. # dict of set of event_ids -> _StateCacheEntry.
  337. self._state_cache = None
  338. self.resolve_linearizer = Linearizer(name="state_resolve_lock")
  339. self._state_cache = ExpiringCache(
  340. cache_name="state_cache",
  341. clock=self.clock,
  342. max_len=SIZE_OF_CACHE,
  343. expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000,
  344. iterable=True,
  345. reset_expiry_on_get=True,
  346. )
  347. @defer.inlineCallbacks
  348. @log_function
  349. def resolve_state_groups(
  350. self, room_id, room_version, state_groups_ids, event_map, state_res_store,
  351. ):
  352. """Resolves conflicts between a set of state groups
  353. Always generates a new state group (unless we hit the cache), so should
  354. not be called for a single state group
  355. Args:
  356. room_id (str): room we are resolving for (used for logging)
  357. room_version (str): version of the room
  358. state_groups_ids (dict[int, dict[(str, str), str]]):
  359. map from state group id to the state in that state group
  360. (where 'state' is a map from state key to event id)
  361. event_map(dict[str,FrozenEvent]|None):
  362. a dict from event_id to event, for any events that we happen to
  363. have in flight (eg, those currently being persisted). This will be
  364. used as a starting point fof finding the state we need; any missing
  365. events will be requested via state_res_store.
  366. If None, all events will be fetched via state_res_store.
  367. state_res_store (StateResolutionStore)
  368. Returns:
  369. Deferred[_StateCacheEntry]: resolved state
  370. """
  371. logger.debug(
  372. "resolve_state_groups state_groups %s",
  373. state_groups_ids.keys()
  374. )
  375. group_names = frozenset(state_groups_ids.keys())
  376. with (yield self.resolve_linearizer.queue(group_names)):
  377. if self._state_cache is not None:
  378. cache = self._state_cache.get(group_names, None)
  379. if cache:
  380. defer.returnValue(cache)
  381. logger.info(
  382. "Resolving state for %s with %d groups", room_id, len(state_groups_ids)
  383. )
  384. # start by assuming we won't have any conflicted state, and build up the new
  385. # state map by iterating through the state groups. If we discover a conflict,
  386. # we give up and instead use `resolve_events_with_store`.
  387. #
  388. # XXX: is this actually worthwhile, or should we just let
  389. # resolve_events_with_store do it?
  390. new_state = {}
  391. conflicted_state = False
  392. for st in itervalues(state_groups_ids):
  393. for key, e_id in iteritems(st):
  394. if key in new_state:
  395. conflicted_state = True
  396. break
  397. new_state[key] = e_id
  398. if conflicted_state:
  399. break
  400. if conflicted_state:
  401. logger.info("Resolving conflicted state for %r", room_id)
  402. with Measure(self.clock, "state._resolve_events"):
  403. new_state = yield resolve_events_with_store(
  404. room_version,
  405. list(itervalues(state_groups_ids)),
  406. event_map=event_map,
  407. state_res_store=state_res_store,
  408. )
  409. # if the new state matches any of the input state groups, we can
  410. # use that state group again. Otherwise we will generate a state_id
  411. # which will be used as a cache key for future resolutions, but
  412. # not get persisted.
  413. with Measure(self.clock, "state.create_group_ids"):
  414. cache = _make_state_cache_entry(new_state, state_groups_ids)
  415. if self._state_cache is not None:
  416. self._state_cache[group_names] = cache
  417. defer.returnValue(cache)
  418. def _make_state_cache_entry(
  419. new_state,
  420. state_groups_ids,
  421. ):
  422. """Given a resolved state, and a set of input state groups, pick one to base
  423. a new state group on (if any), and return an appropriately-constructed
  424. _StateCacheEntry.
  425. Args:
  426. new_state (dict[(str, str), str]): resolved state map (mapping from
  427. (type, state_key) to event_id)
  428. state_groups_ids (dict[int, dict[(str, str), str]]):
  429. map from state group id to the state in that state group
  430. (where 'state' is a map from state key to event id)
  431. Returns:
  432. _StateCacheEntry
  433. """
  434. # if the new state matches any of the input state groups, we can
  435. # use that state group again. Otherwise we will generate a state_id
  436. # which will be used as a cache key for future resolutions, but
  437. # not get persisted.
  438. # first look for exact matches
  439. new_state_event_ids = set(itervalues(new_state))
  440. for sg, state in iteritems(state_groups_ids):
  441. if len(new_state_event_ids) != len(state):
  442. continue
  443. old_state_event_ids = set(itervalues(state))
  444. if new_state_event_ids == old_state_event_ids:
  445. # got an exact match.
  446. return _StateCacheEntry(
  447. state=new_state,
  448. state_group=sg,
  449. )
  450. # TODO: We want to create a state group for this set of events, to
  451. # increase cache hits, but we need to make sure that it doesn't
  452. # end up as a prev_group without being added to the database
  453. # failing that, look for the closest match.
  454. prev_group = None
  455. delta_ids = None
  456. for old_group, old_state in iteritems(state_groups_ids):
  457. n_delta_ids = {
  458. k: v
  459. for k, v in iteritems(new_state)
  460. if old_state.get(k) != v
  461. }
  462. if not delta_ids or len(n_delta_ids) < len(delta_ids):
  463. prev_group = old_group
  464. delta_ids = n_delta_ids
  465. return _StateCacheEntry(
  466. state=new_state,
  467. state_group=None,
  468. prev_group=prev_group,
  469. delta_ids=delta_ids,
  470. )
  471. def resolve_events_with_store(room_version, state_sets, event_map, state_res_store):
  472. """
  473. Args:
  474. room_version(str): Version of the room
  475. state_sets(list): List of dicts of (type, state_key) -> event_id,
  476. which are the different state groups to resolve.
  477. event_map(dict[str,FrozenEvent]|None):
  478. a dict from event_id to event, for any events that we happen to
  479. have in flight (eg, those currently being persisted). This will be
  480. used as a starting point fof finding the state we need; any missing
  481. events will be requested via state_map_factory.
  482. If None, all events will be fetched via state_map_factory.
  483. state_res_store (StateResolutionStore)
  484. Returns
  485. Deferred[dict[(str, str), str]]:
  486. a map from (type, state_key) to event_id.
  487. """
  488. if room_version == RoomVersions.V1:
  489. return v1.resolve_events_with_store(
  490. state_sets, event_map, state_res_store.get_events,
  491. )
  492. elif room_version in (RoomVersions.VDH_TEST, RoomVersions.STATE_V2_TEST):
  493. return v2.resolve_events_with_store(
  494. state_sets, event_map, state_res_store,
  495. )
  496. else:
  497. # This should only happen if we added a version but forgot to add it to
  498. # the list above.
  499. raise Exception(
  500. "No state resolution algorithm defined for version %r" % (room_version,)
  501. )
  502. @attr.s
  503. class StateResolutionStore(object):
  504. """Interface that allows state resolution algorithms to access the database
  505. in well defined way.
  506. Args:
  507. store (DataStore)
  508. """
  509. store = attr.ib()
  510. def get_events(self, event_ids, allow_rejected=False):
  511. """Get events from the database
  512. Args:
  513. event_ids (list): The event_ids of the events to fetch
  514. allow_rejected (bool): If True return rejected events.
  515. Returns:
  516. Deferred[dict[str, FrozenEvent]]: Dict from event_id to event.
  517. """
  518. return self.store.get_events(
  519. event_ids,
  520. check_redacted=False,
  521. get_prev_content=False,
  522. allow_rejected=allow_rejected,
  523. )
  524. def get_auth_chain(self, event_ids):
  525. """Gets the full auth chain for a set of events (including rejected
  526. events).
  527. Includes the given event IDs in the result.
  528. Note that:
  529. 1. All events must be state events.
  530. 2. For v1 rooms this may not have the full auth chain in the
  531. presence of rejected events
  532. Args:
  533. event_ids (list): The event IDs of the events to fetch the auth
  534. chain for. Must be state events.
  535. Returns:
  536. Deferred[list[str]]: List of event IDs of the auth chain.
  537. """
  538. return self.store.get_auth_chain_ids(event_ids, include_given=True)