123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369 |
- # Copyright 2016 OpenMarket Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import contextlib
- import logging
- import time
- from typing import Optional
- from twisted.python.failure import Failure
- from twisted.web.server import Request, Site
- from synapse.http import redact_uri
- from synapse.http.request_metrics import RequestMetrics, requests_counter
- from synapse.logging.context import LoggingContext, PreserveLoggingContext
- logger = logging.getLogger(__name__)
- _next_request_seq = 0
- class SynapseRequest(Request):
- """Class which encapsulates an HTTP request to synapse.
- All of the requests processed in synapse are of this type.
- It extends twisted's twisted.web.server.Request, and adds:
- * Unique request ID
- * A log context associated with the request
- * Redaction of access_token query-params in __repr__
- * Logging at start and end
- * Metrics to record CPU, wallclock and DB time by endpoint.
- It also provides a method `processing`, which returns a context manager. If this
- method is called, the request won't be logged until the context manager is closed;
- this is useful for asynchronous request handlers which may go on processing the
- request even after the client has disconnected.
- Attributes:
- logcontext: the log context for this request
- """
- def __init__(self, channel, *args, **kw):
- Request.__init__(self, channel, *args, **kw)
- self.site = channel.site
- self._channel = channel # this is used by the tests
- self.authenticated_entity = None
- self.start_time = 0.0
- # we can't yet create the logcontext, as we don't know the method.
- self.logcontext = None # type: Optional[LoggingContext]
- global _next_request_seq
- self.request_seq = _next_request_seq
- _next_request_seq += 1
- # whether an asynchronous request handler has called processing()
- self._is_processing = False
- # the time when the asynchronous request handler completed its processing
- self._processing_finished_time = None
- # what time we finished sending the response to the client (or the connection
- # dropped)
- self.finish_time = None
- def __repr__(self):
- # We overwrite this so that we don't log ``access_token``
- return "<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>" % (
- self.__class__.__name__,
- id(self),
- self.get_method(),
- self.get_redacted_uri(),
- self.clientproto.decode("ascii", errors="replace"),
- self.site.site_tag,
- )
- def get_request_id(self):
- return "%s-%i" % (self.get_method(), self.request_seq)
- def get_redacted_uri(self):
- uri = self.uri
- if isinstance(uri, bytes):
- uri = self.uri.decode("ascii", errors="replace")
- return redact_uri(uri)
- def get_method(self):
- """Gets the method associated with the request (or placeholder if not
- method has yet been received).
- Note: This is necessary as the placeholder value in twisted is str
- rather than bytes, so we need to sanitise `self.method`.
- Returns:
- str
- """
- method = self.method
- if isinstance(method, bytes):
- method = self.method.decode("ascii")
- return method
- def get_user_agent(self):
- return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1]
- def render(self, resrc):
- # this is called once a Resource has been found to serve the request; in our
- # case the Resource in question will normally be a JsonResource.
- # create a LogContext for this request
- request_id = self.get_request_id()
- logcontext = self.logcontext = LoggingContext(request_id)
- logcontext.request = request_id
- # override the Server header which is set by twisted
- self.setHeader("Server", self.site.server_version_string)
- with PreserveLoggingContext(self.logcontext):
- # we start the request metrics timer here with an initial stab
- # at the servlet name. For most requests that name will be
- # JsonResource (or a subclass), and JsonResource._async_render
- # will update it once it picks a servlet.
- servlet_name = resrc.__class__.__name__
- self._started_processing(servlet_name)
- Request.render(self, resrc)
- # record the arrival of the request *after*
- # dispatching to the handler, so that the handler
- # can update the servlet name in the request
- # metrics
- requests_counter.labels(self.get_method(), self.request_metrics.name).inc()
- @contextlib.contextmanager
- def processing(self):
- """Record the fact that we are processing this request.
- Returns a context manager; the correct way to use this is:
- @defer.inlineCallbacks
- def handle_request(request):
- with request.processing("FooServlet"):
- yield really_handle_the_request()
- Once the context manager is closed, the completion of the request will be logged,
- and the various metrics will be updated.
- """
- if self._is_processing:
- raise RuntimeError("Request is already processing")
- self._is_processing = True
- try:
- yield
- except Exception:
- # this should already have been caught, and sent back to the client as a 500.
- logger.exception("Asynchronous messge handler raised an uncaught exception")
- finally:
- # the request handler has finished its work and either sent the whole response
- # back, or handed over responsibility to a Producer.
- self._processing_finished_time = time.time()
- self._is_processing = False
- # if we've already sent the response, log it now; otherwise, we wait for the
- # response to be sent.
- if self.finish_time is not None:
- self._finished_processing()
- def finish(self):
- """Called when all response data has been written to this Request.
- Overrides twisted.web.server.Request.finish to record the finish time and do
- logging.
- """
- self.finish_time = time.time()
- Request.finish(self)
- if not self._is_processing:
- assert self.logcontext is not None
- with PreserveLoggingContext(self.logcontext):
- self._finished_processing()
- def connectionLost(self, reason):
- """Called when the client connection is closed before the response is written.
- Overrides twisted.web.server.Request.connectionLost to record the finish time and
- do logging.
- """
- # There is a bug in Twisted where reason is not wrapped in a Failure object
- # Detect this and wrap it manually as a workaround
- # More information: https://github.com/matrix-org/synapse/issues/7441
- if not isinstance(reason, Failure):
- reason = Failure(reason)
- self.finish_time = time.time()
- Request.connectionLost(self, reason)
- if self.logcontext is None:
- logger.info(
- "Connection from %s lost before request headers were read", self.client
- )
- return
- # we only get here if the connection to the client drops before we send
- # the response.
- #
- # It's useful to log it here so that we can get an idea of when
- # the client disconnects.
- with PreserveLoggingContext(self.logcontext):
- logger.warning(
- "Error processing request %r: %s %s", self, reason.type, reason.value
- )
- if not self._is_processing:
- self._finished_processing()
- def _started_processing(self, servlet_name):
- """Record the fact that we are processing this request.
- This will log the request's arrival. Once the request completes,
- be sure to call finished_processing.
- Args:
- servlet_name (str): the name of the servlet which will be
- processing this request. This is used in the metrics.
- It is possible to update this afterwards by updating
- self.request_metrics.name.
- """
- self.start_time = time.time()
- self.request_metrics = RequestMetrics()
- self.request_metrics.start(
- self.start_time, name=servlet_name, method=self.get_method()
- )
- self.site.access_logger.debug(
- "%s - %s - Received request: %s %s",
- self.getClientIP(),
- self.site.site_tag,
- self.get_method(),
- self.get_redacted_uri(),
- )
- def _finished_processing(self):
- """Log the completion of this request and update the metrics
- """
- assert self.logcontext is not None
- usage = self.logcontext.get_resource_usage()
- if self._processing_finished_time is None:
- # we completed the request without anything calling processing()
- self._processing_finished_time = time.time()
- # the time between receiving the request and the request handler finishing
- processing_time = self._processing_finished_time - self.start_time
- # the time between the request handler finishing and the response being sent
- # to the client (nb may be negative)
- response_send_time = self.finish_time - self._processing_finished_time
- # need to decode as it could be raw utf-8 bytes
- # from a IDN servname in an auth header
- authenticated_entity = self.authenticated_entity
- if authenticated_entity is not None and isinstance(authenticated_entity, bytes):
- authenticated_entity = authenticated_entity.decode("utf-8", "replace")
- # ...or could be raw utf-8 bytes in the User-Agent header.
- # N.B. if you don't do this, the logger explodes cryptically
- # with maximum recursion trying to log errors about
- # the charset problem.
- # c.f. https://github.com/matrix-org/synapse/issues/3471
- user_agent = self.get_user_agent()
- if user_agent is not None:
- user_agent = user_agent.decode("utf-8", "replace")
- else:
- user_agent = "-"
- code = str(self.code)
- if not self.finished:
- # we didn't send the full response before we gave up (presumably because
- # the connection dropped)
- code += "!"
- self.site.access_logger.info(
- "%s - %s - {%s}"
- " Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
- ' %sB %s "%s %s %s" "%s" [%d dbevts]',
- self.getClientIP(),
- self.site.site_tag,
- authenticated_entity,
- processing_time,
- response_send_time,
- usage.ru_utime,
- usage.ru_stime,
- usage.db_sched_duration_sec,
- usage.db_txn_duration_sec,
- int(usage.db_txn_count),
- self.sentLength,
- code,
- self.get_method(),
- self.get_redacted_uri(),
- self.clientproto.decode("ascii", errors="replace"),
- user_agent,
- usage.evt_db_fetch_count,
- )
- try:
- self.request_metrics.stop(self.finish_time, self.code, self.sentLength)
- except Exception as e:
- logger.warning("Failed to stop metrics: %r", e)
- class XForwardedForRequest(SynapseRequest):
- def __init__(self, *args, **kw):
- SynapseRequest.__init__(self, *args, **kw)
- """
- Add a layer on top of another request that only uses the value of an
- X-Forwarded-For header as the result of C{getClientIP}.
- """
- def getClientIP(self):
- """
- @return: The client address (the first address) in the value of the
- I{X-Forwarded-For header}. If the header is not present, return
- C{b"-"}.
- """
- return (
- self.requestHeaders.getRawHeaders(b"x-forwarded-for", [b"-"])[0]
- .split(b",")[0]
- .strip()
- .decode("ascii")
- )
- class SynapseSite(Site):
- """
- Subclass of a twisted http Site that does access logging with python's
- standard logging
- """
- def __init__(
- self,
- logger_name,
- site_tag,
- config,
- resource,
- server_version_string,
- *args,
- **kwargs
- ):
- Site.__init__(self, resource, *args, **kwargs)
- self.site_tag = site_tag
- proxied = config.get("x_forwarded", False)
- self.requestFactory = XForwardedForRequest if proxied else SynapseRequest
- self.access_logger = logging.getLogger(logger_name)
- self.server_version_string = server_version_string.encode("ascii")
- def log(self, request):
- pass
|