__init__.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2014-2016 OpenMarket Ltd
  3. # Copyright 2018 New Vector Ltd
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. import logging
  17. from collections import namedtuple
  18. from six import iteritems, itervalues
  19. from frozendict import frozendict
  20. from twisted.internet import defer
  21. from synapse.api.constants import EventTypes, RoomVersions
  22. from synapse.events.snapshot import EventContext
  23. from synapse.state import v1
  24. from synapse.util.async import Linearizer
  25. from synapse.util.caches import CACHE_SIZE_FACTOR
  26. from synapse.util.caches.expiringcache import ExpiringCache
  27. from synapse.util.logutils import log_function
  28. from synapse.util.metrics import Measure
  29. logger = logging.getLogger(__name__)
  30. KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key"))
  31. SIZE_OF_CACHE = int(100000 * CACHE_SIZE_FACTOR)
  32. EVICTION_TIMEOUT_SECONDS = 60 * 60
  33. _NEXT_STATE_ID = 1
  34. POWER_KEY = (EventTypes.PowerLevels, "")
  35. def _gen_state_id():
  36. global _NEXT_STATE_ID
  37. s = "X%d" % (_NEXT_STATE_ID,)
  38. _NEXT_STATE_ID += 1
  39. return s
  40. class _StateCacheEntry(object):
  41. __slots__ = ["state", "state_group", "state_id", "prev_group", "delta_ids"]
  42. def __init__(self, state, state_group, prev_group=None, delta_ids=None):
  43. # dict[(str, str), str] map from (type, state_key) to event_id
  44. self.state = frozendict(state)
  45. # the ID of a state group if one and only one is involved.
  46. # otherwise, None otherwise?
  47. self.state_group = state_group
  48. self.prev_group = prev_group
  49. self.delta_ids = frozendict(delta_ids) if delta_ids is not None else None
  50. # The `state_id` is a unique ID we generate that can be used as ID for
  51. # this collection of state. Usually this would be the same as the
  52. # state group, but on worker instances we can't generate a new state
  53. # group each time we resolve state, so we generate a separate one that
  54. # isn't persisted and is used solely for caches.
  55. # `state_id` is either a state_group (and so an int) or a string. This
  56. # ensures we don't accidentally persist a state_id as a stateg_group
  57. if state_group:
  58. self.state_id = state_group
  59. else:
  60. self.state_id = _gen_state_id()
  61. def __len__(self):
  62. return len(self.state)
  63. class StateHandler(object):
  64. """Fetches bits of state from the stores, and does state resolution
  65. where necessary
  66. """
  67. def __init__(self, hs):
  68. self.clock = hs.get_clock()
  69. self.store = hs.get_datastore()
  70. self.hs = hs
  71. self._state_resolution_handler = hs.get_state_resolution_handler()
  72. def start_caching(self):
  73. # TODO: remove this shim
  74. self._state_resolution_handler.start_caching()
  75. @defer.inlineCallbacks
  76. def get_current_state(self, room_id, event_type=None, state_key="",
  77. latest_event_ids=None):
  78. """ Retrieves the current state for the room. This is done by
  79. calling `get_latest_events_in_room` to get the leading edges of the
  80. event graph and then resolving any of the state conflicts.
  81. This is equivalent to getting the state of an event that were to send
  82. next before receiving any new events.
  83. If `event_type` is specified, then the method returns only the one
  84. event (or None) with that `event_type` and `state_key`.
  85. Returns:
  86. map from (type, state_key) to event
  87. """
  88. if not latest_event_ids:
  89. latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id)
  90. logger.debug("calling resolve_state_groups from get_current_state")
  91. ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids)
  92. state = ret.state
  93. if event_type:
  94. event_id = state.get((event_type, state_key))
  95. event = None
  96. if event_id:
  97. event = yield self.store.get_event(event_id, allow_none=True)
  98. defer.returnValue(event)
  99. return
  100. state_map = yield self.store.get_events(list(state.values()),
  101. get_prev_content=False)
  102. state = {
  103. key: state_map[e_id] for key, e_id in iteritems(state) if e_id in state_map
  104. }
  105. defer.returnValue(state)
  106. @defer.inlineCallbacks
  107. def get_current_state_ids(self, room_id, latest_event_ids=None):
  108. """Get the current state, or the state at a set of events, for a room
  109. Args:
  110. room_id (str):
  111. latest_event_ids (iterable[str]|None): if given, the forward
  112. extremities to resolve. If None, we look them up from the
  113. database (via a cache)
  114. Returns:
  115. Deferred[dict[(str, str), str)]]: the state dict, mapping from
  116. (event_type, state_key) -> event_id
  117. """
  118. if not latest_event_ids:
  119. latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id)
  120. logger.debug("calling resolve_state_groups from get_current_state_ids")
  121. ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids)
  122. state = ret.state
  123. defer.returnValue(state)
  124. @defer.inlineCallbacks
  125. def get_current_user_in_room(self, room_id, latest_event_ids=None):
  126. if not latest_event_ids:
  127. latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id)
  128. logger.debug("calling resolve_state_groups from get_current_user_in_room")
  129. entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids)
  130. joined_users = yield self.store.get_joined_users_from_state(room_id, entry)
  131. defer.returnValue(joined_users)
  132. @defer.inlineCallbacks
  133. def get_current_hosts_in_room(self, room_id, latest_event_ids=None):
  134. if not latest_event_ids:
  135. latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id)
  136. logger.debug("calling resolve_state_groups from get_current_hosts_in_room")
  137. entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids)
  138. joined_hosts = yield self.store.get_joined_hosts(room_id, entry)
  139. defer.returnValue(joined_hosts)
  140. @defer.inlineCallbacks
  141. def compute_event_context(self, event, old_state=None):
  142. """Build an EventContext structure for the event.
  143. This works out what the current state should be for the event, and
  144. generates a new state group if necessary.
  145. Args:
  146. event (synapse.events.EventBase):
  147. old_state (dict|None): The state at the event if it can't be
  148. calculated from existing events. This is normally only specified
  149. when receiving an event from federation where we don't have the
  150. prev events for, e.g. when backfilling.
  151. Returns:
  152. synapse.events.snapshot.EventContext:
  153. """
  154. if event.internal_metadata.is_outlier():
  155. # If this is an outlier, then we know it shouldn't have any current
  156. # state. Certainly store.get_current_state won't return any, and
  157. # persisting the event won't store the state group.
  158. if old_state:
  159. prev_state_ids = {
  160. (s.type, s.state_key): s.event_id for s in old_state
  161. }
  162. if event.is_state():
  163. current_state_ids = dict(prev_state_ids)
  164. key = (event.type, event.state_key)
  165. current_state_ids[key] = event.event_id
  166. else:
  167. current_state_ids = prev_state_ids
  168. else:
  169. current_state_ids = {}
  170. prev_state_ids = {}
  171. # We don't store state for outliers, so we don't generate a state
  172. # group for it.
  173. context = EventContext.with_state(
  174. state_group=None,
  175. current_state_ids=current_state_ids,
  176. prev_state_ids=prev_state_ids,
  177. )
  178. defer.returnValue(context)
  179. if old_state:
  180. # We already have the state, so we don't need to calculate it.
  181. # Let's just correctly fill out the context and create a
  182. # new state group for it.
  183. prev_state_ids = {
  184. (s.type, s.state_key): s.event_id for s in old_state
  185. }
  186. if event.is_state():
  187. key = (event.type, event.state_key)
  188. if key in prev_state_ids:
  189. replaces = prev_state_ids[key]
  190. if replaces != event.event_id: # Paranoia check
  191. event.unsigned["replaces_state"] = replaces
  192. current_state_ids = dict(prev_state_ids)
  193. current_state_ids[key] = event.event_id
  194. else:
  195. current_state_ids = prev_state_ids
  196. state_group = yield self.store.store_state_group(
  197. event.event_id,
  198. event.room_id,
  199. prev_group=None,
  200. delta_ids=None,
  201. current_state_ids=current_state_ids,
  202. )
  203. context = EventContext.with_state(
  204. state_group=state_group,
  205. current_state_ids=current_state_ids,
  206. prev_state_ids=prev_state_ids,
  207. )
  208. defer.returnValue(context)
  209. logger.debug("calling resolve_state_groups from compute_event_context")
  210. if event.type == EventTypes.Create:
  211. room_version = event.content.get("room_version", RoomVersions.V1)
  212. else:
  213. room_version = None
  214. entry = yield self.resolve_state_groups_for_events(
  215. event.room_id, [e for e, _ in event.prev_events],
  216. explicit_room_version=room_version,
  217. )
  218. prev_state_ids = entry.state
  219. prev_group = None
  220. delta_ids = None
  221. if event.is_state():
  222. # If this is a state event then we need to create a new state
  223. # group for the state after this event.
  224. key = (event.type, event.state_key)
  225. if key in prev_state_ids:
  226. replaces = prev_state_ids[key]
  227. event.unsigned["replaces_state"] = replaces
  228. current_state_ids = dict(prev_state_ids)
  229. current_state_ids[key] = event.event_id
  230. if entry.state_group:
  231. # If the state at the event has a state group assigned then
  232. # we can use that as the prev group
  233. prev_group = entry.state_group
  234. delta_ids = {
  235. key: event.event_id
  236. }
  237. elif entry.prev_group:
  238. # If the state at the event only has a prev group, then we can
  239. # use that as a prev group too.
  240. prev_group = entry.prev_group
  241. delta_ids = dict(entry.delta_ids)
  242. delta_ids[key] = event.event_id
  243. state_group = yield self.store.store_state_group(
  244. event.event_id,
  245. event.room_id,
  246. prev_group=prev_group,
  247. delta_ids=delta_ids,
  248. current_state_ids=current_state_ids,
  249. )
  250. else:
  251. current_state_ids = prev_state_ids
  252. prev_group = entry.prev_group
  253. delta_ids = entry.delta_ids
  254. if entry.state_group is None:
  255. entry.state_group = yield self.store.store_state_group(
  256. event.event_id,
  257. event.room_id,
  258. prev_group=entry.prev_group,
  259. delta_ids=entry.delta_ids,
  260. current_state_ids=current_state_ids,
  261. )
  262. entry.state_id = entry.state_group
  263. state_group = entry.state_group
  264. context = EventContext.with_state(
  265. state_group=state_group,
  266. current_state_ids=current_state_ids,
  267. prev_state_ids=prev_state_ids,
  268. prev_group=prev_group,
  269. delta_ids=delta_ids,
  270. )
  271. defer.returnValue(context)
  272. @defer.inlineCallbacks
  273. def resolve_state_groups_for_events(self, room_id, event_ids,
  274. explicit_room_version=None):
  275. """ Given a list of event_ids this method fetches the state at each
  276. event, resolves conflicts between them and returns them.
  277. Args:
  278. room_id (str)
  279. event_ids (list[str])
  280. explicit_room_version (str|None): If set uses the the given room
  281. version to choose the resolution algorithm. If None, then
  282. checks the database for room version.
  283. Returns:
  284. Deferred[_StateCacheEntry]: resolved state
  285. """
  286. logger.debug("resolve_state_groups event_ids %s", event_ids)
  287. # map from state group id to the state in that state group (where
  288. # 'state' is a map from state key to event id)
  289. # dict[int, dict[(str, str), str]]
  290. state_groups_ids = yield self.store.get_state_groups_ids(
  291. room_id, event_ids
  292. )
  293. if len(state_groups_ids) == 1:
  294. name, state_list = list(state_groups_ids.items()).pop()
  295. prev_group, delta_ids = yield self.store.get_state_group_delta(name)
  296. defer.returnValue(_StateCacheEntry(
  297. state=state_list,
  298. state_group=name,
  299. prev_group=prev_group,
  300. delta_ids=delta_ids,
  301. ))
  302. room_version = explicit_room_version
  303. if not room_version:
  304. room_version = yield self.store.get_room_version(room_id)
  305. result = yield self._state_resolution_handler.resolve_state_groups(
  306. room_id, room_version, state_groups_ids, None,
  307. self._state_map_factory,
  308. )
  309. defer.returnValue(result)
  310. def _state_map_factory(self, ev_ids):
  311. return self.store.get_events(
  312. ev_ids, get_prev_content=False, check_redacted=False,
  313. )
  314. @defer.inlineCallbacks
  315. def resolve_events(self, room_version, state_sets, event):
  316. logger.info(
  317. "Resolving state for %s with %d groups", event.room_id, len(state_sets)
  318. )
  319. state_set_ids = [{
  320. (ev.type, ev.state_key): ev.event_id
  321. for ev in st
  322. } for st in state_sets]
  323. state_map = {
  324. ev.event_id: ev
  325. for st in state_sets
  326. for ev in st
  327. }
  328. room_version = yield self.store.get_room_version(event.room_id)
  329. with Measure(self.clock, "state._resolve_events"):
  330. new_state = resolve_events_with_state_map(
  331. room_version, state_set_ids, state_map,
  332. )
  333. new_state = {
  334. key: state_map[ev_id] for key, ev_id in iteritems(new_state)
  335. }
  336. defer.returnValue(new_state)
  337. class StateResolutionHandler(object):
  338. """Responsible for doing state conflict resolution.
  339. Note that the storage layer depends on this handler, so all functions must
  340. be storage-independent.
  341. """
  342. def __init__(self, hs):
  343. self.clock = hs.get_clock()
  344. # dict of set of event_ids -> _StateCacheEntry.
  345. self._state_cache = None
  346. self.resolve_linearizer = Linearizer(name="state_resolve_lock")
  347. def start_caching(self):
  348. logger.debug("start_caching")
  349. self._state_cache = ExpiringCache(
  350. cache_name="state_cache",
  351. clock=self.clock,
  352. max_len=SIZE_OF_CACHE,
  353. expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000,
  354. iterable=True,
  355. reset_expiry_on_get=True,
  356. )
  357. self._state_cache.start()
  358. @defer.inlineCallbacks
  359. @log_function
  360. def resolve_state_groups(
  361. self, room_id, room_version, state_groups_ids, event_map, state_map_factory,
  362. ):
  363. """Resolves conflicts between a set of state groups
  364. Always generates a new state group (unless we hit the cache), so should
  365. not be called for a single state group
  366. Args:
  367. room_id (str): room we are resolving for (used for logging)
  368. room_version (str): version of the room
  369. state_groups_ids (dict[int, dict[(str, str), str]]):
  370. map from state group id to the state in that state group
  371. (where 'state' is a map from state key to event id)
  372. event_map(dict[str,FrozenEvent]|None):
  373. a dict from event_id to event, for any events that we happen to
  374. have in flight (eg, those currently being persisted). This will be
  375. used as a starting point fof finding the state we need; any missing
  376. events will be requested via state_map_factory.
  377. If None, all events will be fetched via state_map_factory.
  378. Returns:
  379. Deferred[_StateCacheEntry]: resolved state
  380. """
  381. logger.debug(
  382. "resolve_state_groups state_groups %s",
  383. state_groups_ids.keys()
  384. )
  385. group_names = frozenset(state_groups_ids.keys())
  386. with (yield self.resolve_linearizer.queue(group_names)):
  387. if self._state_cache is not None:
  388. cache = self._state_cache.get(group_names, None)
  389. if cache:
  390. defer.returnValue(cache)
  391. logger.info(
  392. "Resolving state for %s with %d groups", room_id, len(state_groups_ids)
  393. )
  394. # start by assuming we won't have any conflicted state, and build up the new
  395. # state map by iterating through the state groups. If we discover a conflict,
  396. # we give up and instead use `resolve_events_with_factory`.
  397. #
  398. # XXX: is this actually worthwhile, or should we just let
  399. # resolve_events_with_factory do it?
  400. new_state = {}
  401. conflicted_state = False
  402. for st in itervalues(state_groups_ids):
  403. for key, e_id in iteritems(st):
  404. if key in new_state:
  405. conflicted_state = True
  406. break
  407. new_state[key] = e_id
  408. if conflicted_state:
  409. break
  410. if conflicted_state:
  411. logger.info("Resolving conflicted state for %r", room_id)
  412. with Measure(self.clock, "state._resolve_events"):
  413. new_state = yield resolve_events_with_factory(
  414. room_version,
  415. list(itervalues(state_groups_ids)),
  416. event_map=event_map,
  417. state_map_factory=state_map_factory,
  418. )
  419. # if the new state matches any of the input state groups, we can
  420. # use that state group again. Otherwise we will generate a state_id
  421. # which will be used as a cache key for future resolutions, but
  422. # not get persisted.
  423. with Measure(self.clock, "state.create_group_ids"):
  424. cache = _make_state_cache_entry(new_state, state_groups_ids)
  425. if self._state_cache is not None:
  426. self._state_cache[group_names] = cache
  427. defer.returnValue(cache)
  428. def _make_state_cache_entry(
  429. new_state,
  430. state_groups_ids,
  431. ):
  432. """Given a resolved state, and a set of input state groups, pick one to base
  433. a new state group on (if any), and return an appropriately-constructed
  434. _StateCacheEntry.
  435. Args:
  436. new_state (dict[(str, str), str]): resolved state map (mapping from
  437. (type, state_key) to event_id)
  438. state_groups_ids (dict[int, dict[(str, str), str]]):
  439. map from state group id to the state in that state group
  440. (where 'state' is a map from state key to event id)
  441. Returns:
  442. _StateCacheEntry
  443. """
  444. # if the new state matches any of the input state groups, we can
  445. # use that state group again. Otherwise we will generate a state_id
  446. # which will be used as a cache key for future resolutions, but
  447. # not get persisted.
  448. # first look for exact matches
  449. new_state_event_ids = set(itervalues(new_state))
  450. for sg, state in iteritems(state_groups_ids):
  451. if len(new_state_event_ids) != len(state):
  452. continue
  453. old_state_event_ids = set(itervalues(state))
  454. if new_state_event_ids == old_state_event_ids:
  455. # got an exact match.
  456. return _StateCacheEntry(
  457. state=new_state,
  458. state_group=sg,
  459. )
  460. # TODO: We want to create a state group for this set of events, to
  461. # increase cache hits, but we need to make sure that it doesn't
  462. # end up as a prev_group without being added to the database
  463. # failing that, look for the closest match.
  464. prev_group = None
  465. delta_ids = None
  466. for old_group, old_state in iteritems(state_groups_ids):
  467. n_delta_ids = {
  468. k: v
  469. for k, v in iteritems(new_state)
  470. if old_state.get(k) != v
  471. }
  472. if not delta_ids or len(n_delta_ids) < len(delta_ids):
  473. prev_group = old_group
  474. delta_ids = n_delta_ids
  475. return _StateCacheEntry(
  476. state=new_state,
  477. state_group=None,
  478. prev_group=prev_group,
  479. delta_ids=delta_ids,
  480. )
  481. def resolve_events_with_state_map(room_version, state_sets, state_map):
  482. """
  483. Args:
  484. room_version(str): Version of the room
  485. state_sets(list): List of dicts of (type, state_key) -> event_id,
  486. which are the different state groups to resolve.
  487. state_map(dict): a dict from event_id to event, for all events in
  488. state_sets.
  489. Returns
  490. dict[(str, str), str]:
  491. a map from (type, state_key) to event_id.
  492. """
  493. if room_version in (RoomVersions.V1, RoomVersions.VDH_TEST,):
  494. return v1.resolve_events_with_state_map(
  495. state_sets, state_map,
  496. )
  497. else:
  498. # This should only happen if we added a version but forgot to add it to
  499. # the list above.
  500. raise Exception(
  501. "No state resolution algorithm defined for version %r" % (room_version,)
  502. )
  503. def resolve_events_with_factory(room_version, state_sets, event_map, state_map_factory):
  504. """
  505. Args:
  506. room_version(str): Version of the room
  507. state_sets(list): List of dicts of (type, state_key) -> event_id,
  508. which are the different state groups to resolve.
  509. event_map(dict[str,FrozenEvent]|None):
  510. a dict from event_id to event, for any events that we happen to
  511. have in flight (eg, those currently being persisted). This will be
  512. used as a starting point fof finding the state we need; any missing
  513. events will be requested via state_map_factory.
  514. If None, all events will be fetched via state_map_factory.
  515. state_map_factory(func): will be called
  516. with a list of event_ids that are needed, and should return with
  517. a Deferred of dict of event_id to event.
  518. Returns
  519. Deferred[dict[(str, str), str]]:
  520. a map from (type, state_key) to event_id.
  521. """
  522. if room_version in (RoomVersions.V1, RoomVersions.VDH_TEST,):
  523. return v1.resolve_events_with_factory(
  524. state_sets, event_map, state_map_factory,
  525. )
  526. else:
  527. # This should only happen if we added a version but forgot to add it to
  528. # the list above.
  529. raise Exception(
  530. "No state resolution algorithm defined for version %r" % (room_version,)
  531. )