federation_server.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2015, 2016 OpenMarket Ltd
  3. # Copyright 2018 New Vector Ltd
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. import logging
  17. import simplejson as json
  18. from twisted.internet import defer
  19. from synapse.api.errors import AuthError, FederationError, SynapseError, NotFoundError
  20. from synapse.crypto.event_signing import compute_event_signature
  21. from synapse.federation.federation_base import (
  22. FederationBase,
  23. event_from_pdu_json,
  24. )
  25. from synapse.federation.persistence import TransactionActions
  26. from synapse.federation.units import Edu, Transaction
  27. from synapse.types import get_domain_from_id
  28. from synapse.util import async
  29. from synapse.util.caches.response_cache import ResponseCache
  30. from synapse.util.logutils import log_function
  31. from prometheus_client import Counter
  32. from six import iteritems
  33. # when processing incoming transactions, we try to handle multiple rooms in
  34. # parallel, up to this limit.
  35. TRANSACTION_CONCURRENCY_LIMIT = 10
  36. logger = logging.getLogger(__name__)
  37. received_pdus_counter = Counter("synapse_federation_server_received_pdus", "")
  38. received_edus_counter = Counter("synapse_federation_server_received_edus", "")
  39. received_queries_counter = Counter(
  40. "synapse_federation_server_received_queries", "", ["type"]
  41. )
  42. class FederationServer(FederationBase):
  43. def __init__(self, hs):
  44. super(FederationServer, self).__init__(hs)
  45. self.auth = hs.get_auth()
  46. self.handler = hs.get_handlers().federation_handler
  47. self._server_linearizer = async.Linearizer("fed_server")
  48. self._transaction_linearizer = async.Linearizer("fed_txn_handler")
  49. self.transaction_actions = TransactionActions(self.store)
  50. self.registry = hs.get_federation_registry()
  51. # We cache responses to state queries, as they take a while and often
  52. # come in waves.
  53. self._state_resp_cache = ResponseCache(hs, "state_resp", timeout_ms=30000)
  54. @defer.inlineCallbacks
  55. @log_function
  56. def on_backfill_request(self, origin, room_id, versions, limit):
  57. with (yield self._server_linearizer.queue((origin, room_id))):
  58. pdus = yield self.handler.on_backfill_request(
  59. origin, room_id, versions, limit
  60. )
  61. res = self._transaction_from_pdus(pdus).get_dict()
  62. defer.returnValue((200, res))
  63. @defer.inlineCallbacks
  64. @log_function
  65. def on_incoming_transaction(self, transaction_data):
  66. # keep this as early as possible to make the calculated origin ts as
  67. # accurate as possible.
  68. request_time = self._clock.time_msec()
  69. transaction = Transaction(**transaction_data)
  70. if not transaction.transaction_id:
  71. raise Exception("Transaction missing transaction_id")
  72. if not transaction.origin:
  73. raise Exception("Transaction missing origin")
  74. logger.debug("[%s] Got transaction", transaction.transaction_id)
  75. # use a linearizer to ensure that we don't process the same transaction
  76. # multiple times in parallel.
  77. with (yield self._transaction_linearizer.queue(
  78. (transaction.origin, transaction.transaction_id),
  79. )):
  80. result = yield self._handle_incoming_transaction(
  81. transaction, request_time,
  82. )
  83. defer.returnValue(result)
  84. @defer.inlineCallbacks
  85. def _handle_incoming_transaction(self, transaction, request_time):
  86. """ Process an incoming transaction and return the HTTP response
  87. Args:
  88. transaction (Transaction): incoming transaction
  89. request_time (int): timestamp that the HTTP request arrived at
  90. Returns:
  91. Deferred[(int, object)]: http response code and body
  92. """
  93. response = yield self.transaction_actions.have_responded(transaction)
  94. if response:
  95. logger.debug(
  96. "[%s] We've already responded to this request",
  97. transaction.transaction_id
  98. )
  99. defer.returnValue(response)
  100. return
  101. logger.debug("[%s] Transaction is new", transaction.transaction_id)
  102. received_pdus_counter.inc(len(transaction.pdus))
  103. pdus_by_room = {}
  104. for p in transaction.pdus:
  105. if "unsigned" in p:
  106. unsigned = p["unsigned"]
  107. if "age" in unsigned:
  108. p["age"] = unsigned["age"]
  109. if "age" in p:
  110. p["age_ts"] = request_time - int(p["age"])
  111. del p["age"]
  112. event = event_from_pdu_json(p)
  113. room_id = event.room_id
  114. pdus_by_room.setdefault(room_id, []).append(event)
  115. pdu_results = {}
  116. # we can process different rooms in parallel (which is useful if they
  117. # require callouts to other servers to fetch missing events), but
  118. # impose a limit to avoid going too crazy with ram/cpu.
  119. @defer.inlineCallbacks
  120. def process_pdus_for_room(room_id):
  121. logger.debug("Processing PDUs for %s", room_id)
  122. for pdu in pdus_by_room[room_id]:
  123. event_id = pdu.event_id
  124. try:
  125. yield self._handle_received_pdu(
  126. transaction.origin, pdu
  127. )
  128. pdu_results[event_id] = {}
  129. except FederationError as e:
  130. logger.warn("Error handling PDU %s: %s", event_id, e)
  131. pdu_results[event_id] = {"error": str(e)}
  132. except Exception as e:
  133. pdu_results[event_id] = {"error": str(e)}
  134. logger.exception("Failed to handle PDU %s", event_id)
  135. yield async.concurrently_execute(
  136. process_pdus_for_room, pdus_by_room.keys(),
  137. TRANSACTION_CONCURRENCY_LIMIT,
  138. )
  139. if hasattr(transaction, "edus"):
  140. for edu in (Edu(**x) for x in transaction.edus):
  141. yield self.received_edu(
  142. transaction.origin,
  143. edu.edu_type,
  144. edu.content
  145. )
  146. pdu_failures = getattr(transaction, "pdu_failures", [])
  147. for failure in pdu_failures:
  148. logger.info("Got failure %r", failure)
  149. response = {
  150. "pdus": pdu_results,
  151. }
  152. logger.debug("Returning: %s", str(response))
  153. yield self.transaction_actions.set_response(
  154. transaction,
  155. 200, response
  156. )
  157. defer.returnValue((200, response))
  158. @defer.inlineCallbacks
  159. def received_edu(self, origin, edu_type, content):
  160. received_edus_counter.inc()
  161. yield self.registry.on_edu(edu_type, origin, content)
  162. @defer.inlineCallbacks
  163. @log_function
  164. def on_context_state_request(self, origin, room_id, event_id):
  165. if not event_id:
  166. raise NotImplementedError("Specify an event")
  167. in_room = yield self.auth.check_host_in_room(room_id, origin)
  168. if not in_room:
  169. raise AuthError(403, "Host not in room.")
  170. # we grab the linearizer to protect ourselves from servers which hammer
  171. # us. In theory we might already have the response to this query
  172. # in the cache so we could return it without waiting for the linearizer
  173. # - but that's non-trivial to get right, and anyway somewhat defeats
  174. # the point of the linearizer.
  175. with (yield self._server_linearizer.queue((origin, room_id))):
  176. resp = yield self._state_resp_cache.wrap(
  177. (room_id, event_id),
  178. self._on_context_state_request_compute,
  179. room_id, event_id,
  180. )
  181. defer.returnValue((200, resp))
  182. @defer.inlineCallbacks
  183. def on_state_ids_request(self, origin, room_id, event_id):
  184. if not event_id:
  185. raise NotImplementedError("Specify an event")
  186. in_room = yield self.auth.check_host_in_room(room_id, origin)
  187. if not in_room:
  188. raise AuthError(403, "Host not in room.")
  189. state_ids = yield self.handler.get_state_ids_for_pdu(
  190. room_id, event_id,
  191. )
  192. auth_chain_ids = yield self.store.get_auth_chain_ids(state_ids)
  193. defer.returnValue((200, {
  194. "pdu_ids": state_ids,
  195. "auth_chain_ids": auth_chain_ids,
  196. }))
  197. @defer.inlineCallbacks
  198. def _on_context_state_request_compute(self, room_id, event_id):
  199. pdus = yield self.handler.get_state_for_pdu(
  200. room_id, event_id,
  201. )
  202. auth_chain = yield self.store.get_auth_chain(
  203. [pdu.event_id for pdu in pdus]
  204. )
  205. for event in auth_chain:
  206. # We sign these again because there was a bug where we
  207. # incorrectly signed things the first time round
  208. if self.hs.is_mine_id(event.event_id):
  209. event.signatures.update(
  210. compute_event_signature(
  211. event,
  212. self.hs.hostname,
  213. self.hs.config.signing_key[0]
  214. )
  215. )
  216. defer.returnValue({
  217. "pdus": [pdu.get_pdu_json() for pdu in pdus],
  218. "auth_chain": [pdu.get_pdu_json() for pdu in auth_chain],
  219. })
  220. @defer.inlineCallbacks
  221. @log_function
  222. def on_pdu_request(self, origin, event_id):
  223. pdu = yield self._get_persisted_pdu(origin, event_id)
  224. if pdu:
  225. defer.returnValue(
  226. (200, self._transaction_from_pdus([pdu]).get_dict())
  227. )
  228. else:
  229. defer.returnValue((404, ""))
  230. @defer.inlineCallbacks
  231. @log_function
  232. def on_pull_request(self, origin, versions):
  233. raise NotImplementedError("Pull transactions not implemented")
  234. @defer.inlineCallbacks
  235. def on_query_request(self, query_type, args):
  236. received_queries_counter.labels(query_type).inc()
  237. resp = yield self.registry.on_query(query_type, args)
  238. defer.returnValue((200, resp))
  239. @defer.inlineCallbacks
  240. def on_make_join_request(self, room_id, user_id):
  241. pdu = yield self.handler.on_make_join_request(room_id, user_id)
  242. time_now = self._clock.time_msec()
  243. defer.returnValue({"event": pdu.get_pdu_json(time_now)})
  244. @defer.inlineCallbacks
  245. def on_invite_request(self, origin, content):
  246. pdu = event_from_pdu_json(content)
  247. ret_pdu = yield self.handler.on_invite_request(origin, pdu)
  248. time_now = self._clock.time_msec()
  249. defer.returnValue((200, {"event": ret_pdu.get_pdu_json(time_now)}))
  250. @defer.inlineCallbacks
  251. def on_send_join_request(self, origin, content):
  252. logger.debug("on_send_join_request: content: %s", content)
  253. pdu = event_from_pdu_json(content)
  254. logger.debug("on_send_join_request: pdu sigs: %s", pdu.signatures)
  255. res_pdus = yield self.handler.on_send_join_request(origin, pdu)
  256. time_now = self._clock.time_msec()
  257. defer.returnValue((200, {
  258. "state": [p.get_pdu_json(time_now) for p in res_pdus["state"]],
  259. "auth_chain": [
  260. p.get_pdu_json(time_now) for p in res_pdus["auth_chain"]
  261. ],
  262. }))
  263. @defer.inlineCallbacks
  264. def on_make_leave_request(self, room_id, user_id):
  265. pdu = yield self.handler.on_make_leave_request(room_id, user_id)
  266. time_now = self._clock.time_msec()
  267. defer.returnValue({"event": pdu.get_pdu_json(time_now)})
  268. @defer.inlineCallbacks
  269. def on_send_leave_request(self, origin, content):
  270. logger.debug("on_send_leave_request: content: %s", content)
  271. pdu = event_from_pdu_json(content)
  272. logger.debug("on_send_leave_request: pdu sigs: %s", pdu.signatures)
  273. yield self.handler.on_send_leave_request(origin, pdu)
  274. defer.returnValue((200, {}))
  275. @defer.inlineCallbacks
  276. def on_event_auth(self, origin, room_id, event_id):
  277. with (yield self._server_linearizer.queue((origin, room_id))):
  278. time_now = self._clock.time_msec()
  279. auth_pdus = yield self.handler.on_event_auth(event_id)
  280. res = {
  281. "auth_chain": [a.get_pdu_json(time_now) for a in auth_pdus],
  282. }
  283. defer.returnValue((200, res))
  284. @defer.inlineCallbacks
  285. def on_query_auth_request(self, origin, content, room_id, event_id):
  286. """
  287. Content is a dict with keys::
  288. auth_chain (list): A list of events that give the auth chain.
  289. missing (list): A list of event_ids indicating what the other
  290. side (`origin`) think we're missing.
  291. rejects (dict): A mapping from event_id to a 2-tuple of reason
  292. string and a proof (or None) of why the event was rejected.
  293. The keys of this dict give the list of events the `origin` has
  294. rejected.
  295. Args:
  296. origin (str)
  297. content (dict)
  298. event_id (str)
  299. Returns:
  300. Deferred: Results in `dict` with the same format as `content`
  301. """
  302. with (yield self._server_linearizer.queue((origin, room_id))):
  303. auth_chain = [
  304. event_from_pdu_json(e)
  305. for e in content["auth_chain"]
  306. ]
  307. signed_auth = yield self._check_sigs_and_hash_and_fetch(
  308. origin, auth_chain, outlier=True
  309. )
  310. ret = yield self.handler.on_query_auth(
  311. origin,
  312. event_id,
  313. signed_auth,
  314. content.get("rejects", []),
  315. content.get("missing", []),
  316. )
  317. time_now = self._clock.time_msec()
  318. send_content = {
  319. "auth_chain": [
  320. e.get_pdu_json(time_now)
  321. for e in ret["auth_chain"]
  322. ],
  323. "rejects": ret.get("rejects", []),
  324. "missing": ret.get("missing", []),
  325. }
  326. defer.returnValue(
  327. (200, send_content)
  328. )
  329. @log_function
  330. def on_query_client_keys(self, origin, content):
  331. return self.on_query_request("client_keys", content)
  332. def on_query_user_devices(self, origin, user_id):
  333. return self.on_query_request("user_devices", user_id)
  334. @defer.inlineCallbacks
  335. @log_function
  336. def on_claim_client_keys(self, origin, content):
  337. query = []
  338. for user_id, device_keys in content.get("one_time_keys", {}).items():
  339. for device_id, algorithm in device_keys.items():
  340. query.append((user_id, device_id, algorithm))
  341. results = yield self.store.claim_e2e_one_time_keys(query)
  342. json_result = {}
  343. for user_id, device_keys in results.items():
  344. for device_id, keys in device_keys.items():
  345. for key_id, json_bytes in keys.items():
  346. json_result.setdefault(user_id, {})[device_id] = {
  347. key_id: json.loads(json_bytes)
  348. }
  349. logger.info(
  350. "Claimed one-time-keys: %s",
  351. ",".join((
  352. "%s for %s:%s" % (key_id, user_id, device_id)
  353. for user_id, user_keys in iteritems(json_result)
  354. for device_id, device_keys in iteritems(user_keys)
  355. for key_id, _ in iteritems(device_keys)
  356. )),
  357. )
  358. defer.returnValue({"one_time_keys": json_result})
  359. @defer.inlineCallbacks
  360. @log_function
  361. def on_get_missing_events(self, origin, room_id, earliest_events,
  362. latest_events, limit, min_depth):
  363. with (yield self._server_linearizer.queue((origin, room_id))):
  364. logger.info(
  365. "on_get_missing_events: earliest_events: %r, latest_events: %r,"
  366. " limit: %d, min_depth: %d",
  367. earliest_events, latest_events, limit, min_depth
  368. )
  369. missing_events = yield self.handler.on_get_missing_events(
  370. origin, room_id, earliest_events, latest_events, limit, min_depth
  371. )
  372. if len(missing_events) < 5:
  373. logger.info(
  374. "Returning %d events: %r", len(missing_events), missing_events
  375. )
  376. else:
  377. logger.info("Returning %d events", len(missing_events))
  378. time_now = self._clock.time_msec()
  379. defer.returnValue({
  380. "events": [ev.get_pdu_json(time_now) for ev in missing_events],
  381. })
  382. @log_function
  383. def on_openid_userinfo(self, token):
  384. ts_now_ms = self._clock.time_msec()
  385. return self.store.get_user_id_for_open_id_token(token, ts_now_ms)
  386. @log_function
  387. def _get_persisted_pdu(self, origin, event_id, do_auth=True):
  388. """ Get a PDU from the database with given origin and id.
  389. Returns:
  390. Deferred: Results in a `Pdu`.
  391. """
  392. return self.handler.get_persisted_pdu(
  393. origin, event_id, do_auth=do_auth
  394. )
  395. def _transaction_from_pdus(self, pdu_list):
  396. """Returns a new Transaction containing the given PDUs suitable for
  397. transmission.
  398. """
  399. time_now = self._clock.time_msec()
  400. pdus = [p.get_pdu_json(time_now) for p in pdu_list]
  401. return Transaction(
  402. origin=self.server_name,
  403. pdus=pdus,
  404. origin_server_ts=int(time_now),
  405. destination=None,
  406. )
  407. @defer.inlineCallbacks
  408. def _handle_received_pdu(self, origin, pdu):
  409. """ Process a PDU received in a federation /send/ transaction.
  410. If the event is invalid, then this method throws a FederationError.
  411. (The error will then be logged and sent back to the sender (which
  412. probably won't do anything with it), and other events in the
  413. transaction will be processed as normal).
  414. It is likely that we'll then receive other events which refer to
  415. this rejected_event in their prev_events, etc. When that happens,
  416. we'll attempt to fetch the rejected event again, which will presumably
  417. fail, so those second-generation events will also get rejected.
  418. Eventually, we get to the point where there are more than 10 events
  419. between any new events and the original rejected event. Since we
  420. only try to backfill 10 events deep on received pdu, we then accept the
  421. new event, possibly introducing a discontinuity in the DAG, with new
  422. forward extremities, so normal service is approximately returned,
  423. until we try to backfill across the discontinuity.
  424. Args:
  425. origin (str): server which sent the pdu
  426. pdu (FrozenEvent): received pdu
  427. Returns (Deferred): completes with None
  428. Raises: FederationError if the signatures / hash do not match, or
  429. if the event was unacceptable for any other reason (eg, too large,
  430. too many prev_events, couldn't find the prev_events)
  431. """
  432. # check that it's actually being sent from a valid destination to
  433. # workaround bug #1753 in 0.18.5 and 0.18.6
  434. if origin != get_domain_from_id(pdu.event_id):
  435. # We continue to accept join events from any server; this is
  436. # necessary for the federation join dance to work correctly.
  437. # (When we join over federation, the "helper" server is
  438. # responsible for sending out the join event, rather than the
  439. # origin. See bug #1893).
  440. if not (
  441. pdu.type == 'm.room.member' and
  442. pdu.content and
  443. pdu.content.get("membership", None) == 'join'
  444. ):
  445. logger.info(
  446. "Discarding PDU %s from invalid origin %s",
  447. pdu.event_id, origin
  448. )
  449. return
  450. else:
  451. logger.info(
  452. "Accepting join PDU %s from %s",
  453. pdu.event_id, origin
  454. )
  455. # Check signature.
  456. try:
  457. pdu = yield self._check_sigs_and_hash(pdu)
  458. except SynapseError as e:
  459. raise FederationError(
  460. "ERROR",
  461. e.code,
  462. e.msg,
  463. affected=pdu.event_id,
  464. )
  465. yield self.handler.on_receive_pdu(origin, pdu, get_missing=True)
  466. def __str__(self):
  467. return "<ReplicationLayer(%s)>" % self.server_name
  468. @defer.inlineCallbacks
  469. def exchange_third_party_invite(
  470. self,
  471. sender_user_id,
  472. target_user_id,
  473. room_id,
  474. signed,
  475. ):
  476. ret = yield self.handler.exchange_third_party_invite(
  477. sender_user_id,
  478. target_user_id,
  479. room_id,
  480. signed,
  481. )
  482. defer.returnValue(ret)
  483. @defer.inlineCallbacks
  484. def on_exchange_third_party_invite_request(self, origin, room_id, event_dict):
  485. ret = yield self.handler.on_exchange_third_party_invite_request(
  486. origin, room_id, event_dict
  487. )
  488. defer.returnValue(ret)
  489. class FederationHandlerRegistry(object):
  490. """Allows classes to register themselves as handlers for a given EDU or
  491. query type for incoming federation traffic.
  492. """
  493. def __init__(self):
  494. self.edu_handlers = {}
  495. self.query_handlers = {}
  496. def register_edu_handler(self, edu_type, handler):
  497. """Sets the handler callable that will be used to handle an incoming
  498. federation EDU of the given type.
  499. Args:
  500. edu_type (str): The type of the incoming EDU to register handler for
  501. handler (Callable[[str, dict]]): A callable invoked on incoming EDU
  502. of the given type. The arguments are the origin server name and
  503. the EDU contents.
  504. """
  505. if edu_type in self.edu_handlers:
  506. raise KeyError("Already have an EDU handler for %s" % (edu_type,))
  507. self.edu_handlers[edu_type] = handler
  508. def register_query_handler(self, query_type, handler):
  509. """Sets the handler callable that will be used to handle an incoming
  510. federation query of the given type.
  511. Args:
  512. query_type (str): Category name of the query, which should match
  513. the string used by make_query.
  514. handler (Callable[[dict], Deferred[dict]]): Invoked to handle
  515. incoming queries of this type. The return will be yielded
  516. on and the result used as the response to the query request.
  517. """
  518. if query_type in self.query_handlers:
  519. raise KeyError(
  520. "Already have a Query handler for %s" % (query_type,)
  521. )
  522. self.query_handlers[query_type] = handler
  523. @defer.inlineCallbacks
  524. def on_edu(self, edu_type, origin, content):
  525. handler = self.edu_handlers.get(edu_type)
  526. if not handler:
  527. logger.warn("No handler registered for EDU type %s", edu_type)
  528. try:
  529. yield handler(origin, content)
  530. except SynapseError as e:
  531. logger.info("Failed to handle edu %r: %r", edu_type, e)
  532. except Exception as e:
  533. logger.exception("Failed to handle edu %r", edu_type)
  534. def on_query(self, query_type, args):
  535. handler = self.query_handlers.get(query_type)
  536. if not handler:
  537. logger.warn("No handler registered for query type %s", query_type)
  538. raise NotFoundError("No handler for Query type '%s'" % (query_type,))
  539. return handler(args)