123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452 |
- # Copyright 2018 New Vector Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import abc
- import logging
- import re
- import urllib.parse
- from inspect import signature
- from typing import TYPE_CHECKING, Any, Awaitable, Callable, ClassVar, Dict, List, Tuple
- from prometheus_client import Counter, Gauge
- from twisted.internet.error import ConnectError, DNSLookupError
- from twisted.web.server import Request
- from synapse.api.errors import HttpResponseException, SynapseError
- from synapse.http import RequestTimedOutError
- from synapse.http.server import HttpServer
- from synapse.http.servlet import parse_json_object_from_request
- from synapse.http.site import SynapseRequest
- from synapse.logging import opentracing
- from synapse.logging.opentracing import trace_with_opname
- from synapse.types import JsonDict
- from synapse.util.caches.response_cache import ResponseCache
- from synapse.util.cancellation import is_function_cancellable
- from synapse.util.stringutils import random_string
- if TYPE_CHECKING:
- from synapse.server import HomeServer
- logger = logging.getLogger(__name__)
- _pending_outgoing_requests = Gauge(
- "synapse_pending_outgoing_replication_requests",
- "Number of active outgoing replication requests, by replication method name",
- ["name"],
- )
- _outgoing_request_counter = Counter(
- "synapse_outgoing_replication_requests",
- "Number of outgoing replication requests, by replication method name and result",
- ["name", "code"],
- )
- _STREAM_POSITION_KEY = "_INT_STREAM_POS"
- class ReplicationEndpoint(metaclass=abc.ABCMeta):
- """Helper base class for defining new replication HTTP endpoints.
- This creates an endpoint under `/_synapse/replication/:NAME/:PATH_ARGS..`
- (with a `/:txn_id` suffix for cached requests), where NAME is a name,
- PATH_ARGS are a tuple of parameters to be encoded in the URL.
- For example, if `NAME` is "send_event" and `PATH_ARGS` is `("event_id",)`,
- with `CACHE` set to true then this generates an endpoint:
- /_synapse/replication/send_event/:event_id/:txn_id
- For POST/PUT requests the payload is serialized to json and sent as the
- body, while for GET requests the payload is added as query parameters. See
- `_serialize_payload` for details.
- Incoming requests are handled by overriding `_handle_request`. Servers
- must call `register` to register the path with the HTTP server.
- Requests can be sent by calling the client returned by `make_client`.
- Requests are sent to master process by default, but can be sent to other
- named processes by specifying an `instance_name` keyword argument.
- Attributes:
- NAME (str): A name for the endpoint, added to the path as well as used
- in logging and metrics.
- PATH_ARGS (tuple[str]): A list of parameters to be added to the path.
- Adding parameters to the path (rather than payload) can make it
- easier to follow along in the log files.
- METHOD (str): The method of the HTTP request, defaults to POST. Can be
- one of POST, PUT or GET. If GET then the payload is sent as query
- parameters rather than a JSON body.
- CACHE (bool): Whether server should cache the result of the request/
- If true then transparently adds a txn_id to all requests, and
- `_handle_request` must return a Deferred.
- RETRY_ON_TIMEOUT(bool): Whether or not to retry the request when a 504
- is received.
- RETRY_ON_CONNECT_ERROR (bool): Whether or not to retry the request when
- a connection error is received.
- RETRY_ON_CONNECT_ERROR_ATTEMPTS (int): Number of attempts to retry when
- receiving connection errors, each will backoff exponentially longer.
- WAIT_FOR_STREAMS (bool): Whether to wait for replication streams to
- catch up before processing the request and/or response. Defaults to
- True.
- """
- NAME: str = abc.abstractproperty() # type: ignore
- PATH_ARGS: Tuple[str, ...] = abc.abstractproperty() # type: ignore
- METHOD = "POST"
- CACHE = True
- RETRY_ON_TIMEOUT = True
- RETRY_ON_CONNECT_ERROR = True
- RETRY_ON_CONNECT_ERROR_ATTEMPTS = 5 # =63s (2^6-1)
- WAIT_FOR_STREAMS: ClassVar[bool] = True
- def __init__(self, hs: "HomeServer"):
- if self.CACHE:
- self.response_cache: ResponseCache[str] = ResponseCache(
- hs.get_clock(), "repl." + self.NAME, timeout_ms=30 * 60 * 1000
- )
- # We reserve `instance_name` as a parameter to sending requests, so we
- # assert here that sub classes don't try and use the name.
- assert (
- "instance_name" not in self.PATH_ARGS
- ), "`instance_name` is a reserved parameter name"
- assert (
- "instance_name"
- not in signature(self.__class__._serialize_payload).parameters
- ), "`instance_name` is a reserved parameter name"
- assert self.METHOD in ("PUT", "POST", "GET")
- self._replication_secret = None
- if hs.config.worker.worker_replication_secret:
- self._replication_secret = hs.config.worker.worker_replication_secret
- self._streams = hs.get_replication_command_handler().get_streams_to_replicate()
- self._replication = hs.get_replication_data_handler()
- self._instance_name = hs.get_instance_name()
- def _check_auth(self, request: Request) -> None:
- # Get the authorization header.
- auth_headers = request.requestHeaders.getRawHeaders(b"Authorization")
- if not auth_headers:
- raise RuntimeError("Missing Authorization header.")
- if len(auth_headers) > 1:
- raise RuntimeError("Too many Authorization headers.")
- parts = auth_headers[0].split(b" ")
- if parts[0] == b"Bearer" and len(parts) == 2:
- received_secret = parts[1].decode("ascii")
- if self._replication_secret == received_secret:
- # Success!
- return
- raise RuntimeError("Invalid Authorization header.")
- @abc.abstractmethod
- async def _serialize_payload(**kwargs) -> JsonDict:
- """Static method that is called when creating a request.
- Concrete implementations should have explicit parameters (rather than
- kwargs) so that an appropriate exception is raised if the client is
- called with unexpected parameters. All PATH_ARGS must appear in
- argument list.
- Returns:
- If POST/PUT request then dictionary must be JSON serialisable,
- otherwise must be appropriate for adding as query args.
- """
- return {}
- @abc.abstractmethod
- async def _handle_request(
- self, request: Request, content: JsonDict, **kwargs: Any
- ) -> Tuple[int, JsonDict]:
- """Handle incoming request.
- This is called with the request object and PATH_ARGS.
- Returns:
- HTTP status code and a JSON serialisable dict to be used as response
- body of request.
- """
- @classmethod
- def make_client(cls, hs: "HomeServer") -> Callable:
- """Create a client that makes requests.
- Returns a callable that accepts the same parameters as
- `_serialize_payload`, and also accepts an optional `instance_name`
- parameter to specify which instance to hit (the instance must be in
- the `instance_map` config).
- """
- clock = hs.get_clock()
- client = hs.get_simple_http_client()
- local_instance_name = hs.get_instance_name()
- # The value of these option should match the replication listener settings
- master_host = hs.config.worker.worker_replication_host
- master_port = hs.config.worker.worker_replication_http_port
- master_tls = hs.config.worker.worker_replication_http_tls
- instance_map = hs.config.worker.instance_map
- outgoing_gauge = _pending_outgoing_requests.labels(cls.NAME)
- replication_secret = None
- if hs.config.worker.worker_replication_secret:
- replication_secret = hs.config.worker.worker_replication_secret.encode(
- "ascii"
- )
- @trace_with_opname("outgoing_replication_request")
- async def send_request(*, instance_name: str = "master", **kwargs: Any) -> Any:
- # We have to pull these out here to avoid circular dependencies...
- streams = hs.get_replication_command_handler().get_streams_to_replicate()
- replication = hs.get_replication_data_handler()
- with outgoing_gauge.track_inprogress():
- if instance_name == local_instance_name:
- raise Exception("Trying to send HTTP request to self")
- if instance_name == "master":
- host = master_host
- port = master_port
- tls = master_tls
- elif instance_name in instance_map:
- host = instance_map[instance_name].host
- port = instance_map[instance_name].port
- tls = instance_map[instance_name].tls
- else:
- raise Exception(
- "Instance %r not in 'instance_map' config" % (instance_name,)
- )
- data = await cls._serialize_payload(**kwargs)
- if cls.METHOD != "GET" and cls.WAIT_FOR_STREAMS:
- # Include the current stream positions that we write to. We
- # don't do this for GETs as they don't have a body, and we
- # generally assume that a GET won't rely on data we have
- # written.
- if _STREAM_POSITION_KEY in data:
- raise Exception(
- "data to send contains %r key", _STREAM_POSITION_KEY
- )
- data[_STREAM_POSITION_KEY] = {
- "streams": {
- stream.NAME: stream.current_token(local_instance_name)
- for stream in streams
- },
- "instance_name": local_instance_name,
- }
- url_args = [
- urllib.parse.quote(kwargs[name], safe="") for name in cls.PATH_ARGS
- ]
- if cls.CACHE:
- txn_id = random_string(10)
- url_args.append(txn_id)
- if cls.METHOD == "POST":
- request_func: Callable[
- ..., Awaitable[Any]
- ] = client.post_json_get_json
- elif cls.METHOD == "PUT":
- request_func = client.put_json
- elif cls.METHOD == "GET":
- request_func = client.get_json
- else:
- # We have already asserted in the constructor that a
- # compatible was picked, but lets be paranoid.
- raise Exception(
- "Unknown METHOD on %s replication endpoint" % (cls.NAME,)
- )
- # Here the protocol is hard coded to be http by default or https in case the replication
- # port is set to have tls true.
- scheme = "https" if tls else "http"
- uri = "%s://%s:%s/_synapse/replication/%s/%s" % (
- scheme,
- host,
- port,
- cls.NAME,
- "/".join(url_args),
- )
- headers: Dict[bytes, List[bytes]] = {}
- # Add an authorization header, if configured.
- if replication_secret:
- headers[b"Authorization"] = [b"Bearer " + replication_secret]
- opentracing.inject_header_dict(headers, check_destination=False)
- try:
- # Keep track of attempts made so we can bail if we don't manage to
- # connect to the target after N tries.
- attempts = 0
- # We keep retrying the same request for timeouts. This is so that we
- # have a good idea that the request has either succeeded or failed
- # on the master, and so whether we should clean up or not.
- while True:
- try:
- result = await request_func(uri, data, headers=headers)
- break
- except RequestTimedOutError:
- if not cls.RETRY_ON_TIMEOUT:
- raise
- logger.warning("%s request timed out; retrying", cls.NAME)
- # If we timed out we probably don't need to worry about backing
- # off too much, but lets just wait a little anyway.
- await clock.sleep(1)
- except (ConnectError, DNSLookupError) as e:
- if not cls.RETRY_ON_CONNECT_ERROR:
- raise
- if attempts > cls.RETRY_ON_CONNECT_ERROR_ATTEMPTS:
- raise
- delay = 2**attempts
- logger.warning(
- "%s request connection failed; retrying in %ds: %r",
- cls.NAME,
- delay,
- e,
- )
- await clock.sleep(delay)
- attempts += 1
- except HttpResponseException as e:
- # We convert to SynapseError as we know that it was a SynapseError
- # on the main process that we should send to the client. (And
- # importantly, not stack traces everywhere)
- _outgoing_request_counter.labels(cls.NAME, e.code).inc()
- raise e.to_synapse_error()
- except Exception as e:
- _outgoing_request_counter.labels(cls.NAME, "ERR").inc()
- raise SynapseError(
- 502, f"Failed to talk to {instance_name} process"
- ) from e
- _outgoing_request_counter.labels(cls.NAME, 200).inc()
- # Wait on any streams that the remote may have written to.
- for stream_name, position in result.get(
- _STREAM_POSITION_KEY, {}
- ).items():
- await replication.wait_for_stream_position(
- instance_name=instance_name,
- stream_name=stream_name,
- position=position,
- raise_on_timeout=False,
- )
- return result
- return send_request
- def register(self, http_server: HttpServer) -> None:
- """Called by the server to register this as a handler to the
- appropriate path.
- """
- url_args = list(self.PATH_ARGS)
- method = self.METHOD
- if self.CACHE and is_function_cancellable(self._handle_request):
- raise Exception(
- f"{self.__class__.__name__} has been marked as cancellable, but CACHE "
- "is set. The cancellable flag would have no effect."
- )
- if self.CACHE:
- url_args.append("txn_id")
- args = "/".join("(?P<%s>[^/]+)" % (arg,) for arg in url_args)
- pattern = re.compile("^/_synapse/replication/%s/%s$" % (self.NAME, args))
- http_server.register_paths(
- method,
- [pattern],
- self._check_auth_and_handle,
- self.__class__.__name__,
- )
- async def _check_auth_and_handle(
- self, request: SynapseRequest, **kwargs: Any
- ) -> Tuple[int, JsonDict]:
- """Called on new incoming requests when caching is enabled. Checks
- if there is a cached response for the request and returns that,
- otherwise calls `_handle_request` and caches its response.
- """
- # We just use the txn_id here, but we probably also want to use the
- # other PATH_ARGS as well.
- # Check the authorization headers before handling the request.
- if self._replication_secret:
- self._check_auth(request)
- if self.METHOD == "GET":
- # GET APIs always have an empty body.
- content = {}
- else:
- content = parse_json_object_from_request(request)
- # Wait on any streams that the remote may have written to.
- for stream_name, position in content.get(_STREAM_POSITION_KEY, {"streams": {}})[
- "streams"
- ].items():
- await self._replication.wait_for_stream_position(
- instance_name=content[_STREAM_POSITION_KEY]["instance_name"],
- stream_name=stream_name,
- position=position,
- raise_on_timeout=False,
- )
- if self.CACHE:
- txn_id = kwargs.pop("txn_id")
- # We ignore the `@cancellable` flag, since cancellation wouldn't interupt
- # `_handle_request` and `ResponseCache` does not handle cancellation
- # correctly yet. In particular, there may be issues to do with logging
- # context lifetimes.
- code, response = await self.response_cache.wrap(
- txn_id, self._handle_request, request, content, **kwargs
- )
- else:
- # The `@cancellable` decorator may be applied to `_handle_request`. But we
- # told `HttpServer.register_paths` that our handler is `_check_auth_and_handle`,
- # so we have to set up the cancellable flag ourselves.
- request.is_render_cancellable = is_function_cancellable(
- self._handle_request
- )
- code, response = await self._handle_request(request, content, **kwargs)
- # Return streams we may have written to in the course of processing this
- # request.
- if _STREAM_POSITION_KEY in response:
- raise Exception("data to send contains %r key", _STREAM_POSITION_KEY)
- if self.WAIT_FOR_STREAMS:
- response[_STREAM_POSITION_KEY] = {
- stream.NAME: stream.current_token(self._instance_name)
- for stream in self._streams
- }
- return code, response
|