123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923 |
- # Copyright 2014-2022 The Matrix.org Foundation C.I.C.
- # Copyright 2020 Sorunome
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """Contains handlers for federation events."""
- import enum
- import itertools
- import logging
- from enum import Enum
- from http import HTTPStatus
- from typing import (
- TYPE_CHECKING,
- Collection,
- Dict,
- Iterable,
- List,
- Optional,
- Set,
- Tuple,
- Union,
- )
- import attr
- from prometheus_client import Histogram
- from signedjson.key import decode_verify_key_bytes
- from signedjson.sign import verify_signed_json
- from unpaddedbase64 import decode_base64
- from synapse import event_auth
- from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership
- from synapse.api.errors import (
- AuthError,
- CodeMessageException,
- Codes,
- FederationDeniedError,
- FederationError,
- FederationPullAttemptBackoffError,
- HttpResponseException,
- LimitExceededError,
- NotFoundError,
- RequestSendFailed,
- SynapseError,
- )
- from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
- from synapse.crypto.event_signing import compute_event_signature
- from synapse.event_auth import validate_event_for_room_version
- from synapse.events import EventBase
- from synapse.events.snapshot import EventContext
- from synapse.events.validator import EventValidator
- from synapse.federation.federation_client import InvalidResponseError
- from synapse.http.servlet import assert_params_in_dict
- from synapse.logging.context import nested_logging_context
- from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace
- from synapse.metrics.background_process_metrics import run_as_background_process
- from synapse.module_api import NOT_SPAM
- from synapse.replication.http.federation import (
- ReplicationCleanRoomRestServlet,
- ReplicationStoreRoomOnOutlierMembershipRestServlet,
- )
- from synapse.storage.databases.main.events import PartialStateConflictError
- from synapse.storage.databases.main.events_worker import EventRedactBehaviour
- from synapse.types import JsonDict, get_domain_from_id
- from synapse.types.state import StateFilter
- from synapse.util.async_helpers import Linearizer
- from synapse.util.retryutils import NotRetryingDestination
- from synapse.visibility import filter_events_for_server
- if TYPE_CHECKING:
- from synapse.server import HomeServer
- logger = logging.getLogger(__name__)
- # Added to debug performance and track progress on optimizations
- backfill_processing_before_timer = Histogram(
- "synapse_federation_backfill_processing_before_time_seconds",
- "sec",
- [],
- buckets=(
- 0.1,
- 0.5,
- 1.0,
- 2.5,
- 5.0,
- 7.5,
- 10.0,
- 15.0,
- 20.0,
- 30.0,
- 40.0,
- 60.0,
- 80.0,
- "+Inf",
- ),
- )
- class _BackfillPointType(Enum):
- # a regular backwards extremity (ie, an event which we don't yet have, but which
- # is referred to by other events in the DAG)
- BACKWARDS_EXTREMITY = enum.auto()
- # an MSC2716 "insertion event"
- INSERTION_PONT = enum.auto()
- @attr.s(slots=True, auto_attribs=True, frozen=True)
- class _BackfillPoint:
- """A potential point we might backfill from"""
- event_id: str
- depth: int
- type: _BackfillPointType
- class FederationHandler:
- """Handles general incoming federation requests
- Incoming events are *not* handled here, for which see FederationEventHandler.
- """
- def __init__(self, hs: "HomeServer"):
- self.hs = hs
- self.clock = hs.get_clock()
- self.store = hs.get_datastores().main
- self._storage_controllers = hs.get_storage_controllers()
- self._state_storage_controller = self._storage_controllers.state
- self.federation_client = hs.get_federation_client()
- self.state_handler = hs.get_state_handler()
- self.server_name = hs.hostname
- self.keyring = hs.get_keyring()
- self.is_mine_id = hs.is_mine_id
- self.spam_checker = hs.get_spam_checker()
- self.event_creation_handler = hs.get_event_creation_handler()
- self.event_builder_factory = hs.get_event_builder_factory()
- self._event_auth_handler = hs.get_event_auth_handler()
- self._server_notices_mxid = hs.config.servernotices.server_notices_mxid
- self.config = hs.config
- self.http_client = hs.get_proxied_blacklisted_http_client()
- self._replication = hs.get_replication_data_handler()
- self._federation_event_handler = hs.get_federation_event_handler()
- self._device_handler = hs.get_device_handler()
- self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator()
- self._notifier = hs.get_notifier()
- self._clean_room_for_join_client = ReplicationCleanRoomRestServlet.make_client(
- hs
- )
- if hs.config.worker.worker_app:
- self._maybe_store_room_on_outlier_membership = (
- ReplicationStoreRoomOnOutlierMembershipRestServlet.make_client(hs)
- )
- else:
- self._maybe_store_room_on_outlier_membership = (
- self.store.maybe_store_room_on_outlier_membership
- )
- self._room_backfill = Linearizer("room_backfill")
- self.third_party_event_rules = hs.get_third_party_event_rules()
- # Tracks running partial state syncs by room ID.
- # Partial state syncs currently only run on the main process, so it's okay to
- # track them in-memory for now.
- self._active_partial_state_syncs: Set[str] = set()
- # Tracks partial state syncs we may want to restart.
- # A dictionary mapping room IDs to (initial destination, other destinations)
- # tuples.
- self._partial_state_syncs_maybe_needing_restart: Dict[
- str, Tuple[Optional[str], Collection[str]]
- ] = {}
- # if this is the main process, fire off a background process to resume
- # any partial-state-resync operations which were in flight when we
- # were shut down.
- if not hs.config.worker.worker_app:
- run_as_background_process(
- "resume_sync_partial_state_room", self._resume_partial_state_room_sync
- )
- @trace
- async def maybe_backfill(
- self, room_id: str, current_depth: int, limit: int
- ) -> bool:
- """Checks the database to see if we should backfill before paginating,
- and if so do.
- Args:
- room_id
- current_depth: The depth from which we're paginating from. This is
- used to decide if we should backfill and what extremities to
- use.
- limit: The number of events that the pagination request will
- return. This is used as part of the heuristic to decide if we
- should back paginate.
- """
- # Starting the processing time here so we can include the room backfill
- # linearizer lock queue in the timing
- processing_start_time = self.clock.time_msec()
- async with self._room_backfill.queue(room_id):
- return await self._maybe_backfill_inner(
- room_id,
- current_depth,
- limit,
- processing_start_time=processing_start_time,
- )
- async def _maybe_backfill_inner(
- self,
- room_id: str,
- current_depth: int,
- limit: int,
- *,
- processing_start_time: Optional[int],
- ) -> bool:
- """
- Checks whether the `current_depth` is at or approaching any backfill
- points in the room and if so, will backfill. We only care about
- checking backfill points that happened before the `current_depth`
- (meaning less than or equal to the `current_depth`).
- Args:
- room_id: The room to backfill in.
- current_depth: The depth to check at for any upcoming backfill points.
- limit: The max number of events to request from the remote federated server.
- processing_start_time: The time when `maybe_backfill` started processing.
- Only used for timing. If `None`, no timing observation will be made.
- """
- backwards_extremities = [
- _BackfillPoint(event_id, depth, _BackfillPointType.BACKWARDS_EXTREMITY)
- for event_id, depth in await self.store.get_backfill_points_in_room(
- room_id=room_id,
- current_depth=current_depth,
- # We only need to end up with 5 extremities combined with the
- # insertion event extremities to make the `/backfill` request
- # but fetch an order of magnitude more to make sure there is
- # enough even after we filter them by whether visible in the
- # history. This isn't fool-proof as all backfill points within
- # our limit could be filtered out but seems like a good amount
- # to try with at least.
- limit=50,
- )
- ]
- insertion_events_to_be_backfilled: List[_BackfillPoint] = []
- if self.hs.config.experimental.msc2716_enabled:
- insertion_events_to_be_backfilled = [
- _BackfillPoint(event_id, depth, _BackfillPointType.INSERTION_PONT)
- for event_id, depth in await self.store.get_insertion_event_backward_extremities_in_room(
- room_id=room_id,
- current_depth=current_depth,
- # We only need to end up with 5 extremities combined with
- # the backfill points to make the `/backfill` request ...
- # (see the other comment above for more context).
- limit=50,
- )
- ]
- logger.debug(
- "_maybe_backfill_inner: backwards_extremities=%s insertion_events_to_be_backfilled=%s",
- backwards_extremities,
- insertion_events_to_be_backfilled,
- )
- # we now have a list of potential places to backpaginate from. We prefer to
- # start with the most recent (ie, max depth), so let's sort the list.
- sorted_backfill_points: List[_BackfillPoint] = sorted(
- itertools.chain(
- backwards_extremities,
- insertion_events_to_be_backfilled,
- ),
- key=lambda e: -int(e.depth),
- )
- logger.debug(
- "_maybe_backfill_inner: room_id: %s: current_depth: %s, limit: %s, "
- "backfill points (%d): %s",
- room_id,
- current_depth,
- limit,
- len(sorted_backfill_points),
- sorted_backfill_points,
- )
- # If we have no backfill points lower than the `current_depth` then
- # either we can a) bail or b) still attempt to backfill. We opt to try
- # backfilling anyway just in case we do get relevant events.
- if not sorted_backfill_points and current_depth != MAX_DEPTH:
- logger.debug(
- "_maybe_backfill_inner: all backfill points are *after* current depth. Trying again with later backfill points."
- )
- return await self._maybe_backfill_inner(
- room_id=room_id,
- # We use `MAX_DEPTH` so that we find all backfill points next
- # time (all events are below the `MAX_DEPTH`)
- current_depth=MAX_DEPTH,
- limit=limit,
- # We don't want to start another timing observation from this
- # nested recursive call. The top-most call can record the time
- # overall otherwise the smaller one will throw off the results.
- processing_start_time=None,
- )
- # Even after recursing with `MAX_DEPTH`, we didn't find any
- # backward extremities to backfill from.
- if not sorted_backfill_points:
- logger.debug(
- "_maybe_backfill_inner: Not backfilling as no backward extremeties found."
- )
- return False
- # If we're approaching an extremity we trigger a backfill, otherwise we
- # no-op.
- #
- # We chose twice the limit here as then clients paginating backwards
- # will send pagination requests that trigger backfill at least twice
- # using the most recent extremity before it gets removed (see below). We
- # chose more than one times the limit in case of failure, but choosing a
- # much larger factor will result in triggering a backfill request much
- # earlier than necessary.
- max_depth_of_backfill_points = sorted_backfill_points[0].depth
- if current_depth - 2 * limit > max_depth_of_backfill_points:
- logger.debug(
- "Not backfilling as we don't need to. %d < %d - 2 * %d",
- max_depth_of_backfill_points,
- current_depth,
- limit,
- )
- return False
- # For performance's sake, we only want to paginate from a particular extremity
- # if we can actually see the events we'll get. Otherwise, we'd just spend a lot
- # of resources to get redacted events. We check each extremity in turn and
- # ignore those which users on our server wouldn't be able to see.
- #
- # Additionally, we limit ourselves to backfilling from at most 5 extremities,
- # for two reasons:
- #
- # - The check which determines if we can see an extremity's events can be
- # expensive (we load the full state for the room at each of the backfill
- # points, or (worse) their successors)
- # - We want to avoid the server-server API request URI becoming too long.
- #
- # *Note*: the spec wants us to keep backfilling until we reach the start
- # of the room in case we are allowed to see some of the history. However,
- # in practice that causes more issues than its worth, as (a) it's
- # relatively rare for there to be any visible history and (b) even when
- # there is it's often sufficiently long ago that clients would stop
- # attempting to paginate before backfill reached the visible history.
- extremities_to_request: List[str] = []
- for bp in sorted_backfill_points:
- if len(extremities_to_request) >= 5:
- break
- # For regular backwards extremities, we don't have the extremity events
- # themselves, so we need to actually check the events that reference them -
- # their "successor" events.
- #
- # TODO: Correctly handle the case where we are allowed to see the
- # successor event but not the backward extremity, e.g. in the case of
- # initial join of the server where we are allowed to see the join
- # event but not anything before it. This would require looking at the
- # state *before* the event, ignoring the special casing certain event
- # types have.
- if bp.type == _BackfillPointType.INSERTION_PONT:
- event_ids_to_check = [bp.event_id]
- else:
- event_ids_to_check = await self.store.get_successor_events(bp.event_id)
- events_to_check = await self.store.get_events_as_list(
- event_ids_to_check,
- redact_behaviour=EventRedactBehaviour.as_is,
- get_prev_content=False,
- )
- # We set `check_history_visibility_only` as we might otherwise get false
- # positives from users having been erased.
- filtered_extremities = await filter_events_for_server(
- self._storage_controllers,
- self.server_name,
- self.server_name,
- events_to_check,
- redact=False,
- check_history_visibility_only=True,
- )
- if filtered_extremities:
- extremities_to_request.append(bp.event_id)
- else:
- logger.debug(
- "_maybe_backfill_inner: skipping extremity %s as it would not be visible",
- bp,
- )
- if not extremities_to_request:
- logger.debug(
- "_maybe_backfill_inner: found no extremities which would be visible"
- )
- return False
- logger.debug(
- "_maybe_backfill_inner: extremities_to_request %s", extremities_to_request
- )
- set_tag(
- SynapseTags.RESULT_PREFIX + "extremities_to_request",
- str(extremities_to_request),
- )
- set_tag(
- SynapseTags.RESULT_PREFIX + "extremities_to_request.length",
- str(len(extremities_to_request)),
- )
- # Now we need to decide which hosts to hit first.
- # First we try hosts that are already in the room.
- # TODO: HEURISTIC ALERT.
- likely_domains = (
- await self._storage_controllers.state.get_current_hosts_in_room_ordered(
- room_id
- )
- )
- async def try_backfill(domains: Collection[str]) -> bool:
- # TODO: Should we try multiple of these at a time?
- # Number of contacted remote homeservers that have denied our backfill
- # request with a 4xx code.
- denied_count = 0
- # Maximum number of contacted remote homeservers that can deny our
- # backfill request with 4xx codes before we give up.
- max_denied_count = 5
- for dom in domains:
- # We don't want to ask our own server for information we don't have
- if dom == self.server_name:
- continue
- try:
- await self._federation_event_handler.backfill(
- dom, room_id, limit=100, extremities=extremities_to_request
- )
- # If this succeeded then we probably already have the
- # appropriate stuff.
- # TODO: We can probably do something more intelligent here.
- return True
- except NotRetryingDestination as e:
- logger.info("_maybe_backfill_inner: %s", e)
- continue
- except FederationDeniedError:
- logger.info(
- "_maybe_backfill_inner: Not attempting to backfill from %s because the homeserver is not on our federation whitelist",
- dom,
- )
- continue
- except (SynapseError, InvalidResponseError) as e:
- logger.info("Failed to backfill from %s because %s", dom, e)
- continue
- except HttpResponseException as e:
- if 400 <= e.code < 500:
- logger.warning(
- "Backfill denied from %s because %s [%d/%d]",
- dom,
- e,
- denied_count,
- max_denied_count,
- )
- denied_count += 1
- if denied_count >= max_denied_count:
- return False
- continue
- logger.info("Failed to backfill from %s because %s", dom, e)
- continue
- except CodeMessageException as e:
- if 400 <= e.code < 500:
- logger.warning(
- "Backfill denied from %s because %s [%d/%d]",
- dom,
- e,
- denied_count,
- max_denied_count,
- )
- denied_count += 1
- if denied_count >= max_denied_count:
- return False
- continue
- logger.info("Failed to backfill from %s because %s", dom, e)
- continue
- except RequestSendFailed as e:
- logger.info("Failed to get backfill from %s because %s", dom, e)
- continue
- except Exception as e:
- logger.exception("Failed to backfill from %s because %s", dom, e)
- continue
- return False
- # If we have the `processing_start_time`, then we can make an
- # observation. We wouldn't have the `processing_start_time` in the case
- # where `_maybe_backfill_inner` is recursively called to find any
- # backfill points regardless of `current_depth`.
- if processing_start_time is not None:
- processing_end_time = self.clock.time_msec()
- backfill_processing_before_timer.observe(
- (processing_end_time - processing_start_time) / 1000
- )
- success = await try_backfill(likely_domains)
- if success:
- return True
- # TODO: we could also try servers which were previously in the room, but
- # are no longer.
- return False
- async def send_invite(self, target_host: str, event: EventBase) -> EventBase:
- """Sends the invite to the remote server for signing.
- Invites must be signed by the invitee's server before distribution.
- """
- try:
- pdu = await self.federation_client.send_invite(
- destination=target_host,
- room_id=event.room_id,
- event_id=event.event_id,
- pdu=event,
- )
- except RequestSendFailed:
- raise SynapseError(502, f"Can't connect to server {target_host}")
- return pdu
- async def on_event_auth(self, event_id: str) -> List[EventBase]:
- event = await self.store.get_event(event_id)
- auth = await self.store.get_auth_chain(
- event.room_id, list(event.auth_event_ids()), include_given=True
- )
- return list(auth)
- async def do_invite_join(
- self, target_hosts: Iterable[str], room_id: str, joinee: str, content: JsonDict
- ) -> Tuple[str, int]:
- """Attempts to join the `joinee` to the room `room_id` via the
- servers contained in `target_hosts`.
- This first triggers a /make_join/ request that returns a partial
- event that we can fill out and sign. This is then sent to the
- remote server via /send_join/ which responds with the state at that
- event and the auth_chains.
- We suspend processing of any received events from this room until we
- have finished processing the join.
- Args:
- target_hosts: List of servers to attempt to join the room with.
- room_id: The ID of the room to join.
- joinee: The User ID of the joining user.
- content: The event content to use for the join event.
- """
- # TODO: We should be able to call this on workers, but the upgrading of
- # room stuff after join currently doesn't work on workers.
- # TODO: Before we relax this condition, we need to allow re-syncing of
- # partial room state to happen on workers.
- assert self.config.worker.worker_app is None
- logger.debug("Joining %s to %s", joinee, room_id)
- origin, event, room_version_obj = await self._make_and_verify_event(
- target_hosts,
- room_id,
- joinee,
- "join",
- content,
- params={"ver": KNOWN_ROOM_VERSIONS},
- )
- # This shouldn't happen, because the RoomMemberHandler has a
- # linearizer lock which only allows one operation per user per room
- # at a time - so this is just paranoia.
- assert room_id not in self._federation_event_handler.room_queues
- self._federation_event_handler.room_queues[room_id] = []
- await self._clean_room_for_join(room_id)
- try:
- # Try the host we successfully got a response to /make_join/
- # request first.
- host_list = list(target_hosts)
- try:
- host_list.remove(origin)
- host_list.insert(0, origin)
- except ValueError:
- pass
- ret = await self.federation_client.send_join(
- host_list, event, room_version_obj
- )
- event = ret.event
- origin = ret.origin
- state = ret.state
- auth_chain = ret.auth_chain
- auth_chain.sort(key=lambda e: e.depth)
- logger.debug("do_invite_join auth_chain: %s", auth_chain)
- logger.debug("do_invite_join state: %s", state)
- logger.debug("do_invite_join event: %s", event)
- # if this is the first time we've joined this room, it's time to add
- # a row to `rooms` with the correct room version. If there's already a
- # row there, we should override it, since it may have been populated
- # based on an invite request which lied about the room version.
- #
- # federation_client.send_join has already checked that the room
- # version in the received create event is the same as room_version_obj,
- # so we can rely on it now.
- #
- await self.store.upsert_room_on_join(
- room_id=room_id,
- room_version=room_version_obj,
- state_events=state,
- )
- if ret.partial_state:
- # Mark the room as having partial state.
- # The background process is responsible for unmarking this flag,
- # even if the join fails.
- await self.store.store_partial_state_room(
- room_id=room_id,
- servers=ret.servers_in_room,
- device_lists_stream_id=self.store.get_device_stream_token(),
- joined_via=origin,
- )
- try:
- max_stream_id = (
- await self._federation_event_handler.process_remote_join(
- origin,
- room_id,
- auth_chain,
- state,
- event,
- room_version_obj,
- partial_state=ret.partial_state,
- )
- )
- except PartialStateConflictError as e:
- # The homeserver was already in the room and it is no longer partial
- # stated. We ought to be doing a local join instead. Turn the error into
- # a 429, as a hint to the client to try again.
- # TODO(faster_joins): `_should_perform_remote_join` suggests that we may
- # do a remote join for restricted rooms even if we have full state.
- logger.error(
- "Room %s was un-partial stated while processing remote join.",
- room_id,
- )
- raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0)
- else:
- # Record the join event id for future use (when we finish the full
- # join). We have to do this after persisting the event to keep foreign
- # key constraints intact.
- if ret.partial_state:
- await self.store.write_partial_state_rooms_join_event_id(
- room_id, event.event_id
- )
- finally:
- # Always kick off the background process that asynchronously fetches
- # state for the room.
- # If the join failed, the background process is responsible for
- # cleaning up — including unmarking the room as a partial state room.
- if ret.partial_state:
- # Kick off the process of asynchronously fetching the state for this
- # room.
- self._start_partial_state_room_sync(
- initial_destination=origin,
- other_destinations=ret.servers_in_room,
- room_id=room_id,
- )
- # We wait here until this instance has seen the events come down
- # replication (if we're using replication) as the below uses caches.
- await self._replication.wait_for_stream_position(
- self.config.worker.events_shard_config.get_instance(room_id),
- "events",
- max_stream_id,
- )
- # Check whether this room is the result of an upgrade of a room we already know
- # about. If so, migrate over user information
- predecessor = await self.store.get_room_predecessor(room_id)
- if not predecessor or not isinstance(predecessor.get("room_id"), str):
- return event.event_id, max_stream_id
- old_room_id = predecessor["room_id"]
- logger.debug(
- "Found predecessor for %s during remote join: %s", room_id, old_room_id
- )
- # We retrieve the room member handler here as to not cause a cyclic dependency
- member_handler = self.hs.get_room_member_handler()
- await member_handler.transfer_room_state_on_room_upgrade(
- old_room_id, room_id
- )
- logger.debug("Finished joining %s to %s", joinee, room_id)
- return event.event_id, max_stream_id
- finally:
- room_queue = self._federation_event_handler.room_queues[room_id]
- del self._federation_event_handler.room_queues[room_id]
- # we don't need to wait for the queued events to be processed -
- # it's just a best-effort thing at this point. We do want to do
- # them roughly in order, though, otherwise we'll end up making
- # lots of requests for missing prev_events which we do actually
- # have. Hence we fire off the background task, but don't wait for it.
- run_as_background_process(
- "handle_queued_pdus", self._handle_queued_pdus, room_queue
- )
- async def do_knock(
- self,
- target_hosts: List[str],
- room_id: str,
- knockee: str,
- content: JsonDict,
- ) -> Tuple[str, int]:
- """Sends the knock to the remote server.
- This first triggers a make_knock request that returns a partial
- event that we can fill out and sign. This is then sent to the
- remote server via send_knock.
- Knock events must be signed by the knockee's server before distributing.
- Args:
- target_hosts: A list of hosts that we want to try knocking through.
- room_id: The ID of the room to knock on.
- knockee: The ID of the user who is knocking.
- content: The content of the knock event.
- Returns:
- A tuple of (event ID, stream ID).
- Raises:
- SynapseError: If the chosen remote server returns a 3xx/4xx code.
- RuntimeError: If no servers were reachable.
- """
- logger.debug("Knocking on room %s on behalf of user %s", room_id, knockee)
- # Inform the remote server of the room versions we support
- supported_room_versions = list(KNOWN_ROOM_VERSIONS.keys())
- # Ask the remote server to create a valid knock event for us. Once received,
- # we sign the event
- params: Dict[str, Iterable[str]] = {"ver": supported_room_versions}
- origin, event, event_format_version = await self._make_and_verify_event(
- target_hosts, room_id, knockee, Membership.KNOCK, content, params=params
- )
- # Mark the knock as an outlier as we don't yet have the state at this point in
- # the DAG.
- event.internal_metadata.outlier = True
- # ... but tell /sync to send it to clients anyway.
- event.internal_metadata.out_of_band_membership = True
- # Record the room ID and its version so that we have a record of the room
- await self._maybe_store_room_on_outlier_membership(
- room_id=event.room_id, room_version=event_format_version
- )
- # Initially try the host that we successfully called /make_knock on
- try:
- target_hosts.remove(origin)
- target_hosts.insert(0, origin)
- except ValueError:
- pass
- # Send the signed event back to the room, and potentially receive some
- # further information about the room in the form of partial state events
- knock_response = await self.federation_client.send_knock(target_hosts, event)
- # Store any stripped room state events in the "unsigned" key of the event.
- # This is a bit of a hack and is cribbing off of invites. Basically we
- # store the room state here and retrieve it again when this event appears
- # in the invitee's sync stream. It is stripped out for all other local users.
- stripped_room_state = (
- knock_response.get("knock_room_state")
- # Since v1.37, Synapse incorrectly used "knock_state_events" for this field.
- # Thus, we also check for a 'knock_state_events' to support old instances.
- # See https://github.com/matrix-org/synapse/issues/14088.
- or knock_response.get("knock_state_events")
- )
- if stripped_room_state is None:
- raise KeyError(
- "Missing 'knock_room_state' (or legacy 'knock_state_events') field in "
- "send_knock response"
- )
- event.unsigned["knock_room_state"] = stripped_room_state
- context = EventContext.for_outlier(self._storage_controllers)
- stream_id = await self._federation_event_handler.persist_events_and_notify(
- event.room_id, [(event, context)]
- )
- return event.event_id, stream_id
- async def _handle_queued_pdus(
- self, room_queue: List[Tuple[EventBase, str]]
- ) -> None:
- """Process PDUs which got queued up while we were busy send_joining.
- Args:
- room_queue: list of PDUs to be processed and the servers that sent them
- """
- for p, origin in room_queue:
- try:
- logger.info(
- "Processing queued PDU %s which was received while we were joining",
- p,
- )
- with nested_logging_context(p.event_id):
- await self._federation_event_handler.on_receive_pdu(origin, p)
- except Exception as e:
- logger.warning(
- "Error handling queued PDU %s from %s: %s", p.event_id, origin, e
- )
- async def on_make_join_request(
- self, origin: str, room_id: str, user_id: str
- ) -> EventBase:
- """We've received a /make_join/ request, so we create a partial
- join event for the room and return that. We do *not* persist or
- process it until the other server has signed it and sent it back.
- Args:
- origin: The (verified) server name of the requesting server.
- room_id: Room to create join event in
- user_id: The user to create the join for
- """
- if get_domain_from_id(user_id) != origin:
- logger.info(
- "Got /make_join request for user %r from different origin %s, ignoring",
- user_id,
- origin,
- )
- raise SynapseError(403, "User not from origin", Codes.FORBIDDEN)
- # checking the room version will check that we've actually heard of the room
- # (and return a 404 otherwise)
- room_version = await self.store.get_room_version(room_id)
- if await self.store.is_partial_state_room(room_id):
- # If our server is still only partially joined, we can't give a complete
- # response to /make_join, so return a 404 as we would if we weren't in the
- # room at all.
- # The main reason we can't respond properly is that we need to know about
- # the auth events for the join event that we would return.
- # We also should not bother entertaining the /make_join since we cannot
- # handle the /send_join.
- logger.info(
- "Rejecting /make_join to %s because it's a partial state room", room_id
- )
- raise SynapseError(
- 404,
- "Unable to handle /make_join right now; this server is not fully joined.",
- errcode=Codes.NOT_FOUND,
- )
- # now check that we are *still* in the room
- is_in_room = await self._event_auth_handler.is_host_in_room(
- room_id, self.server_name
- )
- if not is_in_room:
- logger.info(
- "Got /make_join request for room %s we are no longer in",
- room_id,
- )
- raise NotFoundError("Not an active room on this server")
- event_content = {"membership": Membership.JOIN}
- # If the current room is using restricted join rules, additional information
- # may need to be included in the event content in order to efficiently
- # validate the event.
- #
- # Note that this requires the /send_join request to come back to the
- # same server.
- if room_version.msc3083_join_rules:
- state_ids = await self._state_storage_controller.get_current_state_ids(
- room_id
- )
- if await self._event_auth_handler.has_restricted_join_rules(
- state_ids, room_version
- ):
- prev_member_event_id = state_ids.get((EventTypes.Member, user_id), None)
- # If the user is invited or joined to the room already, then
- # no additional info is needed.
- include_auth_user_id = True
- if prev_member_event_id:
- prev_member_event = await self.store.get_event(prev_member_event_id)
- include_auth_user_id = prev_member_event.membership not in (
- Membership.JOIN,
- Membership.INVITE,
- )
- if include_auth_user_id:
- event_content[
- EventContentFields.AUTHORISING_USER
- ] = await self._event_auth_handler.get_user_which_could_invite(
- room_id,
- state_ids,
- )
- builder = self.event_builder_factory.for_room_version(
- room_version,
- {
- "type": EventTypes.Member,
- "content": event_content,
- "room_id": room_id,
- "sender": user_id,
- "state_key": user_id,
- },
- )
- try:
- event, context = await self.event_creation_handler.create_new_client_event(
- builder=builder
- )
- except SynapseError as e:
- logger.warning("Failed to create join to %s because %s", room_id, e)
- raise
- # Ensure the user can even join the room.
- await self._federation_event_handler.check_join_restrictions(context, event)
- # The remote hasn't signed it yet, obviously. We'll do the full checks
- # when we get the event back in `on_send_join_request`
- await self._event_auth_handler.check_auth_rules_from_context(event)
- return event
- async def on_invite_request(
- self, origin: str, event: EventBase, room_version: RoomVersion
- ) -> EventBase:
- """We've got an invite event. Process and persist it. Sign it.
- Respond with the now signed event.
- """
- if event.state_key is None:
- raise SynapseError(400, "The invite event did not have a state key")
- is_blocked = await self.store.is_room_blocked(event.room_id)
- if is_blocked:
- raise SynapseError(403, "This room has been blocked on this server")
- if self.hs.config.server.block_non_admin_invites:
- raise SynapseError(403, "This server does not accept room invites")
- spam_check = await self.spam_checker.user_may_invite(
- event.sender, event.state_key, event.room_id
- )
- if spam_check != NOT_SPAM:
- raise SynapseError(
- 403,
- "This user is not permitted to send invites to this server/user",
- errcode=spam_check[0],
- additional_fields=spam_check[1],
- )
- membership = event.content.get("membership")
- if event.type != EventTypes.Member or membership != Membership.INVITE:
- raise SynapseError(400, "The event was not an m.room.member invite event")
- sender_domain = get_domain_from_id(event.sender)
- if sender_domain != origin:
- raise SynapseError(
- 400, "The invite event was not from the server sending it"
- )
- if not self.is_mine_id(event.state_key):
- raise SynapseError(400, "The invite event must be for this server")
- # block any attempts to invite the server notices mxid
- if event.state_key == self._server_notices_mxid:
- raise SynapseError(HTTPStatus.FORBIDDEN, "Cannot invite this user")
- # We retrieve the room member handler here as to not cause a cyclic dependency
- member_handler = self.hs.get_room_member_handler()
- # We don't rate limit based on room ID, as that should be done by
- # sending server.
- await member_handler.ratelimit_invite(None, None, event.state_key)
- # keep a record of the room version, if we don't yet know it.
- # (this may get overwritten if we later get a different room version in a
- # join dance).
- await self._maybe_store_room_on_outlier_membership(
- room_id=event.room_id, room_version=room_version
- )
- event.internal_metadata.outlier = True
- event.internal_metadata.out_of_band_membership = True
- event.signatures.update(
- compute_event_signature(
- room_version,
- event.get_pdu_json(),
- self.hs.hostname,
- self.hs.signing_key,
- )
- )
- context = EventContext.for_outlier(self._storage_controllers)
- await self._bulk_push_rule_evaluator.action_for_events_by_user(
- [(event, context)]
- )
- try:
- await self._federation_event_handler.persist_events_and_notify(
- event.room_id, [(event, context)]
- )
- except Exception:
- await self.store.remove_push_actions_from_staging(event.event_id)
- raise
- return event
- async def do_remotely_reject_invite(
- self, target_hosts: Iterable[str], room_id: str, user_id: str, content: JsonDict
- ) -> Tuple[EventBase, int]:
- origin, event, room_version = await self._make_and_verify_event(
- target_hosts, room_id, user_id, "leave", content=content
- )
- # Mark as outlier as we don't have any state for this event; we're not
- # even in the room.
- event.internal_metadata.outlier = True
- event.internal_metadata.out_of_band_membership = True
- # Try the host that we successfully called /make_leave/ on first for
- # the /send_leave/ request.
- host_list = list(target_hosts)
- try:
- host_list.remove(origin)
- host_list.insert(0, origin)
- except ValueError:
- pass
- await self.federation_client.send_leave(host_list, event)
- context = EventContext.for_outlier(self._storage_controllers)
- stream_id = await self._federation_event_handler.persist_events_and_notify(
- event.room_id, [(event, context)]
- )
- return event, stream_id
- async def _make_and_verify_event(
- self,
- target_hosts: Iterable[str],
- room_id: str,
- user_id: str,
- membership: str,
- content: JsonDict,
- params: Optional[Dict[str, Union[str, Iterable[str]]]] = None,
- ) -> Tuple[str, EventBase, RoomVersion]:
- (
- origin,
- event,
- room_version,
- ) = await self.federation_client.make_membership_event(
- target_hosts, room_id, user_id, membership, content, params=params
- )
- logger.debug("Got response to make_%s: %s", membership, event)
- # We should assert some things.
- # FIXME: Do this in a nicer way
- assert event.type == EventTypes.Member
- assert event.user_id == user_id
- assert event.state_key == user_id
- assert event.room_id == room_id
- return origin, event, room_version
- async def on_make_leave_request(
- self, origin: str, room_id: str, user_id: str
- ) -> EventBase:
- """We've received a /make_leave/ request, so we create a partial
- leave event for the room and return that. We do *not* persist or
- process it until the other server has signed it and sent it back.
- Args:
- origin: The (verified) server name of the requesting server.
- room_id: Room to create leave event in
- user_id: The user to create the leave for
- """
- if get_domain_from_id(user_id) != origin:
- logger.info(
- "Got /make_leave request for user %r from different origin %s, ignoring",
- user_id,
- origin,
- )
- raise SynapseError(403, "User not from origin", Codes.FORBIDDEN)
- room_version_obj = await self.store.get_room_version(room_id)
- builder = self.event_builder_factory.for_room_version(
- room_version_obj,
- {
- "type": EventTypes.Member,
- "content": {"membership": Membership.LEAVE},
- "room_id": room_id,
- "sender": user_id,
- "state_key": user_id,
- },
- )
- event, context = await self.event_creation_handler.create_new_client_event(
- builder=builder
- )
- try:
- # The remote hasn't signed it yet, obviously. We'll do the full checks
- # when we get the event back in `on_send_leave_request`
- await self._event_auth_handler.check_auth_rules_from_context(event)
- except AuthError as e:
- logger.warning("Failed to create new leave %r because %s", event, e)
- raise e
- return event
- async def on_make_knock_request(
- self, origin: str, room_id: str, user_id: str
- ) -> EventBase:
- """We've received a make_knock request, so we create a partial
- knock event for the room and return that. We do *not* persist or
- process it until the other server has signed it and sent it back.
- Args:
- origin: The (verified) server name of the requesting server.
- room_id: The room to create the knock event in.
- user_id: The user to create the knock for.
- Returns:
- The partial knock event.
- """
- if get_domain_from_id(user_id) != origin:
- logger.info(
- "Get /make_knock request for user %r from different origin %s, ignoring",
- user_id,
- origin,
- )
- raise SynapseError(403, "User not from origin", Codes.FORBIDDEN)
- room_version_obj = await self.store.get_room_version(room_id)
- builder = self.event_builder_factory.for_room_version(
- room_version_obj,
- {
- "type": EventTypes.Member,
- "content": {"membership": Membership.KNOCK},
- "room_id": room_id,
- "sender": user_id,
- "state_key": user_id,
- },
- )
- event, context = await self.event_creation_handler.create_new_client_event(
- builder=builder
- )
- event_allowed, _ = await self.third_party_event_rules.check_event_allowed(
- event, context
- )
- if not event_allowed:
- logger.warning("Creation of knock %s forbidden by third-party rules", event)
- raise SynapseError(
- 403, "This event is not allowed in this context", Codes.FORBIDDEN
- )
- try:
- # The remote hasn't signed it yet, obviously. We'll do the full checks
- # when we get the event back in `on_send_knock_request`
- await self._event_auth_handler.check_auth_rules_from_context(event)
- except AuthError as e:
- logger.warning("Failed to create new knock %r because %s", event, e)
- raise e
- return event
- @trace
- @tag_args
- async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]:
- """Returns the state at the event. i.e. not including said event."""
- event = await self.store.get_event(event_id, check_room_id=room_id)
- if event.internal_metadata.outlier:
- raise NotFoundError("State not known at event %s" % (event_id,))
- state_groups = await self._state_storage_controller.get_state_groups_ids(
- room_id, [event_id]
- )
- # get_state_groups_ids should return exactly one result
- assert len(state_groups) == 1
- state_map = next(iter(state_groups.values()))
- state_key = event.get_state_key()
- if state_key is not None:
- # the event was not rejected (get_event raises a NotFoundError for rejected
- # events) so the state at the event should include the event itself.
- assert (
- state_map.get((event.type, state_key)) == event.event_id
- ), "State at event did not include event itself"
- # ... but we need the state *before* that event
- if "replaces_state" in event.unsigned:
- prev_id = event.unsigned["replaces_state"]
- state_map[(event.type, state_key)] = prev_id
- else:
- del state_map[(event.type, state_key)]
- return list(state_map.values())
- async def on_backfill_request(
- self, origin: str, room_id: str, pdu_list: List[str], limit: int
- ) -> List[EventBase]:
- # We allow partially joined rooms since in this case we are filtering out
- # non-local events in `filter_events_for_server`.
- await self._event_auth_handler.assert_host_in_room(room_id, origin, True)
- # Synapse asks for 100 events per backfill request. Do not allow more.
- limit = min(limit, 100)
- events = await self.store.get_backfill_events(room_id, pdu_list, limit)
- logger.debug(
- "on_backfill_request: backfill events=%s",
- [
- "event_id=%s,depth=%d,body=%s,prevs=%s\n"
- % (
- event.event_id,
- event.depth,
- event.content.get("body", event.type),
- event.prev_event_ids(),
- )
- for event in events
- ],
- )
- events = await filter_events_for_server(
- self._storage_controllers, origin, self.server_name, events
- )
- return events
- async def get_persisted_pdu(
- self, origin: str, event_id: str
- ) -> Optional[EventBase]:
- """Get an event from the database for the given server.
- Args:
- origin: hostname of server which is requesting the event; we
- will check that the server is allowed to see it.
- event_id: id of the event being requested
- Returns:
- None if we know nothing about the event; otherwise the (possibly-redacted) event.
- Raises:
- AuthError if the server is not currently in the room
- """
- event = await self.store.get_event(
- event_id, allow_none=True, allow_rejected=True
- )
- if not event:
- return None
- await self._event_auth_handler.assert_host_in_room(event.room_id, origin)
- events = await filter_events_for_server(
- self._storage_controllers, origin, self.server_name, [event]
- )
- event = events[0]
- return event
- async def on_get_missing_events(
- self,
- origin: str,
- room_id: str,
- earliest_events: List[str],
- latest_events: List[str],
- limit: int,
- ) -> List[EventBase]:
- # We allow partially joined rooms since in this case we are filtering out
- # non-local events in `filter_events_for_server`.
- await self._event_auth_handler.assert_host_in_room(room_id, origin, True)
- # Only allow up to 20 events to be retrieved per request.
- limit = min(limit, 20)
- missing_events = await self.store.get_missing_events(
- room_id=room_id,
- earliest_events=earliest_events,
- latest_events=latest_events,
- limit=limit,
- )
- missing_events = await filter_events_for_server(
- self._storage_controllers, origin, self.server_name, missing_events
- )
- return missing_events
- async def exchange_third_party_invite(
- self, sender_user_id: str, target_user_id: str, room_id: str, signed: JsonDict
- ) -> None:
- third_party_invite = {"signed": signed}
- event_dict = {
- "type": EventTypes.Member,
- "content": {
- "membership": Membership.INVITE,
- "third_party_invite": third_party_invite,
- },
- "room_id": room_id,
- "sender": sender_user_id,
- "state_key": target_user_id,
- }
- if await self._event_auth_handler.is_host_in_room(room_id, self.hs.hostname):
- room_version_obj = await self.store.get_room_version(room_id)
- builder = self.event_builder_factory.for_room_version(
- room_version_obj, event_dict
- )
- EventValidator().validate_builder(builder)
- # Try several times, it could fail with PartialStateConflictError
- # in send_membership_event, cf comment in except block.
- max_retries = 5
- for i in range(max_retries):
- try:
- (
- event,
- context,
- ) = await self.event_creation_handler.create_new_client_event(
- builder=builder
- )
- event, context = await self.add_display_name_to_third_party_invite(
- room_version_obj, event_dict, event, context
- )
- EventValidator().validate_new(event, self.config)
- # We need to tell the transaction queue to send this out, even
- # though the sender isn't a local user.
- event.internal_metadata.send_on_behalf_of = self.hs.hostname
- try:
- validate_event_for_room_version(event)
- await self._event_auth_handler.check_auth_rules_from_context(
- event
- )
- except AuthError as e:
- logger.warning(
- "Denying new third party invite %r because %s", event, e
- )
- raise e
- await self._check_signature(event, context)
- # We retrieve the room member handler here as to not cause a cyclic dependency
- member_handler = self.hs.get_room_member_handler()
- await member_handler.send_membership_event(None, event, context)
- break
- except PartialStateConflictError as e:
- # Persisting couldn't happen because the room got un-partial stated
- # in the meantime and context needs to be recomputed, so let's do so.
- if i == max_retries - 1:
- raise e
- pass
- else:
- destinations = {x.split(":", 1)[-1] for x in (sender_user_id, room_id)}
- try:
- await self.federation_client.forward_third_party_invite(
- destinations, room_id, event_dict
- )
- except (RequestSendFailed, HttpResponseException):
- raise SynapseError(502, "Failed to forward third party invite")
- async def on_exchange_third_party_invite_request(
- self, event_dict: JsonDict
- ) -> None:
- """Handle an exchange_third_party_invite request from a remote server
- The remote server will call this when it wants to turn a 3pid invite
- into a normal m.room.member invite.
- Args:
- event_dict: Dictionary containing the event body.
- """
- assert_params_in_dict(event_dict, ["room_id"])
- room_version_obj = await self.store.get_room_version(event_dict["room_id"])
- # NB: event_dict has a particular specced format we might need to fudge
- # if we change event formats too much.
- builder = self.event_builder_factory.for_room_version(
- room_version_obj, event_dict
- )
- # Try several times, it could fail with PartialStateConflictError
- # in send_membership_event, cf comment in except block.
- max_retries = 5
- for i in range(max_retries):
- try:
- (
- event,
- context,
- ) = await self.event_creation_handler.create_new_client_event(
- builder=builder
- )
- event, context = await self.add_display_name_to_third_party_invite(
- room_version_obj, event_dict, event, context
- )
- try:
- validate_event_for_room_version(event)
- await self._event_auth_handler.check_auth_rules_from_context(event)
- except AuthError as e:
- logger.warning("Denying third party invite %r because %s", event, e)
- raise e
- await self._check_signature(event, context)
- # We need to tell the transaction queue to send this out, even
- # though the sender isn't a local user.
- event.internal_metadata.send_on_behalf_of = get_domain_from_id(
- event.sender
- )
- # We retrieve the room member handler here as to not cause a cyclic dependency
- member_handler = self.hs.get_room_member_handler()
- await member_handler.send_membership_event(None, event, context)
- break
- except PartialStateConflictError as e:
- # Persisting couldn't happen because the room got un-partial stated
- # in the meantime and context needs to be recomputed, so let's do so.
- if i == max_retries - 1:
- raise e
- pass
- async def add_display_name_to_third_party_invite(
- self,
- room_version_obj: RoomVersion,
- event_dict: JsonDict,
- event: EventBase,
- context: EventContext,
- ) -> Tuple[EventBase, EventContext]:
- key = (
- EventTypes.ThirdPartyInvite,
- event.content["third_party_invite"]["signed"]["token"],
- )
- original_invite = None
- prev_state_ids = await context.get_prev_state_ids(
- StateFilter.from_types([(EventTypes.ThirdPartyInvite, None)])
- )
- original_invite_id = prev_state_ids.get(key)
- if original_invite_id:
- original_invite = await self.store.get_event(
- original_invite_id, allow_none=True
- )
- if original_invite:
- # If the m.room.third_party_invite event's content is empty, it means the
- # invite has been revoked. In this case, we don't have to raise an error here
- # because the auth check will fail on the invite (because it's not able to
- # fetch public keys from the m.room.third_party_invite event's content, which
- # is empty).
- display_name = original_invite.content.get("display_name")
- event_dict["content"]["third_party_invite"]["display_name"] = display_name
- else:
- logger.info(
- "Could not find invite event for third_party_invite: %r", event_dict
- )
- # We don't discard here as this is not the appropriate place to do
- # auth checks. If we need the invite and don't have it then the
- # auth check code will explode appropriately.
- builder = self.event_builder_factory.for_room_version(
- room_version_obj, event_dict
- )
- EventValidator().validate_builder(builder)
- event, context = await self.event_creation_handler.create_new_client_event(
- builder=builder
- )
- EventValidator().validate_new(event, self.config)
- return event, context
- async def _check_signature(self, event: EventBase, context: EventContext) -> None:
- """
- Checks that the signature in the event is consistent with its invite.
- Args:
- event: The m.room.member event to check
- context:
- Raises:
- AuthError: if signature didn't match any keys, or key has been
- revoked,
- SynapseError: if a transient error meant a key couldn't be checked
- for revocation.
- """
- signed = event.content["third_party_invite"]["signed"]
- token = signed["token"]
- prev_state_ids = await context.get_prev_state_ids(
- StateFilter.from_types([(EventTypes.ThirdPartyInvite, None)])
- )
- invite_event_id = prev_state_ids.get((EventTypes.ThirdPartyInvite, token))
- invite_event = None
- if invite_event_id:
- invite_event = await self.store.get_event(invite_event_id, allow_none=True)
- if not invite_event:
- raise AuthError(403, "Could not find invite")
- logger.debug("Checking auth on event %r", event.content)
- last_exception: Optional[Exception] = None
- # for each public key in the 3pid invite event
- for public_key_object in event_auth.get_public_keys(invite_event):
- try:
- # for each sig on the third_party_invite block of the actual invite
- for server, signature_block in signed["signatures"].items():
- for key_name in signature_block.keys():
- if not key_name.startswith("ed25519:"):
- continue
- logger.debug(
- "Attempting to verify sig with key %s from %r "
- "against pubkey %r",
- key_name,
- server,
- public_key_object,
- )
- try:
- public_key = public_key_object["public_key"]
- verify_key = decode_verify_key_bytes(
- key_name, decode_base64(public_key)
- )
- verify_signed_json(signed, server, verify_key)
- logger.debug(
- "Successfully verified sig with key %s from %r "
- "against pubkey %r",
- key_name,
- server,
- public_key_object,
- )
- except Exception:
- logger.info(
- "Failed to verify sig with key %s from %r "
- "against pubkey %r",
- key_name,
- server,
- public_key_object,
- )
- raise
- try:
- if "key_validity_url" in public_key_object:
- await self._check_key_revocation(
- public_key, public_key_object["key_validity_url"]
- )
- except Exception:
- logger.info(
- "Failed to query key_validity_url %s",
- public_key_object["key_validity_url"],
- )
- raise
- return
- except Exception as e:
- last_exception = e
- if last_exception is None:
- # we can only get here if get_public_keys() returned an empty list
- # TODO: make this better
- raise RuntimeError("no public key in invite event")
- raise last_exception
- async def _check_key_revocation(self, public_key: str, url: str) -> None:
- """
- Checks whether public_key has been revoked.
- Args:
- public_key: base-64 encoded public key.
- url: Key revocation URL.
- Raises:
- AuthError: if they key has been revoked.
- SynapseError: if a transient error meant a key couldn't be checked
- for revocation.
- """
- try:
- response = await self.http_client.get_json(url, {"public_key": public_key})
- except Exception:
- raise SynapseError(502, "Third party certificate could not be checked")
- if "valid" not in response or not response["valid"]:
- raise AuthError(403, "Third party certificate was invalid")
- async def _clean_room_for_join(self, room_id: str) -> None:
- """Called to clean up any data in DB for a given room, ready for the
- server to join the room.
- Args:
- room_id
- """
- if self.config.worker.worker_app:
- await self._clean_room_for_join_client(room_id)
- else:
- await self.store.clean_room_for_join(room_id)
- async def get_room_complexity(
- self, remote_room_hosts: List[str], room_id: str
- ) -> Optional[dict]:
- """
- Fetch the complexity of a remote room over federation.
- Args:
- remote_room_hosts: The remote servers to ask.
- room_id: The room ID to ask about.
- Returns:
- Dict contains the complexity
- metric versions, while None means we could not fetch the complexity.
- """
- for host in remote_room_hosts:
- res = await self.federation_client.get_room_complexity(host, room_id)
- # We got a result, return it.
- if res:
- return res
- # We fell off the bottom, couldn't get the complexity from anyone. Oh
- # well.
- return None
- async def _resume_partial_state_room_sync(self) -> None:
- """Resumes resyncing of all partial-state rooms after a restart."""
- assert not self.config.worker.worker_app
- partial_state_rooms = await self.store.get_partial_state_room_resync_info()
- for room_id, resync_info in partial_state_rooms.items():
- self._start_partial_state_room_sync(
- initial_destination=resync_info.joined_via,
- other_destinations=resync_info.servers_in_room,
- room_id=room_id,
- )
- def _start_partial_state_room_sync(
- self,
- initial_destination: Optional[str],
- other_destinations: Collection[str],
- room_id: str,
- ) -> None:
- """Starts the background process to resync the state of a partial state room,
- if it is not already running.
- Args:
- initial_destination: the initial homeserver to pull the state from
- other_destinations: other homeservers to try to pull the state from, if
- `initial_destination` is unavailable
- room_id: room to be resynced
- """
- async def _sync_partial_state_room_wrapper() -> None:
- if room_id in self._active_partial_state_syncs:
- # Another local user has joined the room while there is already a
- # partial state sync running. This implies that there is a new join
- # event to un-partial state. We might find ourselves in one of a few
- # scenarios:
- # 1. There is an existing partial state sync. The partial state sync
- # un-partial states the new join event before completing and all is
- # well.
- # 2. Before the latest join, the homeserver was no longer in the room
- # and there is an existing partial state sync from our previous
- # membership of the room. The partial state sync may have:
- # a) succeeded, but not yet terminated. The room will not be
- # un-partial stated again unless we restart the partial state
- # sync.
- # b) failed, because we were no longer in the room and remote
- # homeservers were refusing our requests, but not yet
- # terminated. After the latest join, remote homeservers may
- # start answering our requests again, so we should restart the
- # partial state sync.
- # In the cases where we would want to restart the partial state sync,
- # the room would have the partial state flag when the partial state sync
- # terminates.
- self._partial_state_syncs_maybe_needing_restart[room_id] = (
- initial_destination,
- other_destinations,
- )
- return
- self._active_partial_state_syncs.add(room_id)
- try:
- await self._sync_partial_state_room(
- initial_destination=initial_destination,
- other_destinations=other_destinations,
- room_id=room_id,
- )
- finally:
- # Read the room's partial state flag while we still hold the claim to
- # being the active partial state sync (so that another partial state
- # sync can't come along and mess with it under us).
- # Normally, the partial state flag will be gone. If it isn't, then we
- # may find ourselves in scenario 2a or 2b as described in the comment
- # above, where we want to restart the partial state sync.
- is_still_partial_state_room = await self.store.is_partial_state_room(
- room_id
- )
- self._active_partial_state_syncs.remove(room_id)
- if room_id in self._partial_state_syncs_maybe_needing_restart:
- (
- restart_initial_destination,
- restart_other_destinations,
- ) = self._partial_state_syncs_maybe_needing_restart.pop(room_id)
- if is_still_partial_state_room:
- self._start_partial_state_room_sync(
- initial_destination=restart_initial_destination,
- other_destinations=restart_other_destinations,
- room_id=room_id,
- )
- run_as_background_process(
- desc="sync_partial_state_room", func=_sync_partial_state_room_wrapper
- )
- async def _sync_partial_state_room(
- self,
- initial_destination: Optional[str],
- other_destinations: Collection[str],
- room_id: str,
- ) -> None:
- """Background process to resync the state of a partial-state room
- Args:
- initial_destination: the initial homeserver to pull the state from
- other_destinations: other homeservers to try to pull the state from, if
- `initial_destination` is unavailable
- room_id: room to be resynced
- """
- assert not self.config.worker.worker_app
- # TODO(faster_joins): do we need to lock to avoid races? What happens if other
- # worker processes kick off a resync in parallel? Perhaps we should just elect
- # a single worker to do the resync.
- # https://github.com/matrix-org/synapse/issues/12994
- #
- # TODO(faster_joins): what happens if we leave the room during a resync? if we
- # really leave, that might mean we have difficulty getting the room state over
- # federation.
- # https://github.com/matrix-org/synapse/issues/12802
- # Make an infinite iterator of destinations to try. Once we find a working
- # destination, we'll stick with it until it flakes.
- destinations = _prioritise_destinations_for_partial_state_resync(
- initial_destination, other_destinations, room_id
- )
- destination_iter = itertools.cycle(destinations)
- # `destination` is the current remote homeserver we're pulling from.
- destination = next(destination_iter)
- logger.info("Syncing state for room %s via %s", room_id, destination)
- # we work through the queue in order of increasing stream ordering.
- while True:
- batch = await self.store.get_partial_state_events_batch(room_id)
- if not batch:
- # all the events are updated, so we can update current state and
- # clear the lazy-loading flag.
- logger.info("Updating current state for %s", room_id)
- # TODO(faster_joins): notify workers in notify_room_un_partial_stated
- # https://github.com/matrix-org/synapse/issues/12994
- await self.state_handler.update_current_state(room_id)
- logger.info("Handling any pending device list updates")
- await self._device_handler.handle_room_un_partial_stated(room_id)
- logger.info("Clearing partial-state flag for %s", room_id)
- success = await self.store.clear_partial_state_room(room_id)
- if success:
- logger.info("State resync complete for %s", room_id)
- self._storage_controllers.state.notify_room_un_partial_stated(
- room_id
- )
- # Poke the notifier so that other workers see the write to
- # the un-partial-stated rooms stream.
- self._notifier.notify_replication()
- # TODO(faster_joins) update room stats and user directory?
- # https://github.com/matrix-org/synapse/issues/12814
- # https://github.com/matrix-org/synapse/issues/12815
- return
- # we raced against more events arriving with partial state. Go round
- # the loop again. We've already logged a warning, so no need for more.
- continue
- events = await self.store.get_events_as_list(
- batch,
- redact_behaviour=EventRedactBehaviour.as_is,
- allow_rejected=True,
- )
- for event in events:
- for attempt in itertools.count():
- try:
- await self._federation_event_handler.update_state_for_partial_state_event(
- destination, event
- )
- break
- except FederationPullAttemptBackoffError as exc:
- # Log a warning about why we failed to process the event (the error message
- # for `FederationPullAttemptBackoffError` is pretty good)
- logger.warning("_sync_partial_state_room: %s", exc)
- # We do not record a failed pull attempt when we backoff fetching a missing
- # `prev_event` because not being able to fetch the `prev_events` just means
- # we won't be able to de-outlier the pulled event. But we can still use an
- # `outlier` in the state/auth chain for another event. So we shouldn't stop
- # a downstream event from trying to pull it.
- #
- # This avoids a cascade of backoff for all events in the DAG downstream from
- # one event backoff upstream.
- except FederationError as e:
- # TODO: We should `record_event_failed_pull_attempt` here,
- # see https://github.com/matrix-org/synapse/issues/13700
- if attempt == len(destinations) - 1:
- # We have tried every remote server for this event. Give up.
- # TODO(faster_joins) giving up isn't the right thing to do
- # if there's a temporary network outage. retrying
- # indefinitely is also not the right thing to do if we can
- # reach all homeservers and they all claim they don't have
- # the state we want.
- # https://github.com/matrix-org/synapse/issues/13000
- logger.error(
- "Failed to get state for %s at %s from %s because %s, "
- "giving up!",
- room_id,
- event,
- destination,
- e,
- )
- raise
- # Try the next remote server.
- logger.info(
- "Failed to get state for %s at %s from %s because %s",
- room_id,
- event,
- destination,
- e,
- )
- destination = next(destination_iter)
- logger.info(
- "Syncing state for room %s via %s instead",
- room_id,
- destination,
- )
- def _prioritise_destinations_for_partial_state_resync(
- initial_destination: Optional[str],
- other_destinations: Collection[str],
- room_id: str,
- ) -> Collection[str]:
- """Work out the order in which we should ask servers to resync events.
- If an `initial_destination` is given, it takes top priority. Otherwise
- all servers are treated equally.
- :raises ValueError: if no destination is provided at all.
- """
- if initial_destination is None and len(other_destinations) == 0:
- raise ValueError(f"Cannot resync state of {room_id}: no destinations provided")
- if initial_destination is None:
- return other_destinations
- # Move `initial_destination` to the front of the list.
- destinations = list(other_destinations)
- if initial_destination in destinations:
- destinations.remove(initial_destination)
- destinations = [initial_destination] + destinations
- return destinations
|