site.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. # Copyright 2016 OpenMarket Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import contextlib
  15. import logging
  16. import time
  17. from twisted.web.server import Request, Site
  18. from synapse.http import redact_uri
  19. from synapse.http.request_metrics import RequestMetrics
  20. from synapse.util.logcontext import LoggingContext, ContextResourceUsage
  21. logger = logging.getLogger(__name__)
  22. _next_request_seq = 0
  23. class SynapseRequest(Request):
  24. """Class which encapsulates an HTTP request to synapse.
  25. All of the requests processed in synapse are of this type.
  26. It extends twisted's twisted.web.server.Request, and adds:
  27. * Unique request ID
  28. * Redaction of access_token query-params in __repr__
  29. * Logging at start and end
  30. * Metrics to record CPU, wallclock and DB time by endpoint.
  31. It provides a method `processing` which should be called by the Resource
  32. which is handling the request, and returns a context manager.
  33. """
  34. def __init__(self, site, *args, **kw):
  35. Request.__init__(self, *args, **kw)
  36. self.site = site
  37. self.authenticated_entity = None
  38. self.start_time = 0
  39. global _next_request_seq
  40. self.request_seq = _next_request_seq
  41. _next_request_seq += 1
  42. def __repr__(self):
  43. # We overwrite this so that we don't log ``access_token``
  44. return '<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>' % (
  45. self.__class__.__name__,
  46. id(self),
  47. self.method,
  48. self.get_redacted_uri(),
  49. self.clientproto,
  50. self.site.site_tag,
  51. )
  52. def get_request_id(self):
  53. return "%s-%i" % (self.method, self.request_seq)
  54. def get_redacted_uri(self):
  55. return redact_uri(self.uri)
  56. def get_user_agent(self):
  57. return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1]
  58. def render(self, resrc):
  59. # override the Server header which is set by twisted
  60. self.setHeader("Server", self.site.server_version_string)
  61. return Request.render(self, resrc)
  62. def _started_processing(self, servlet_name):
  63. self.start_time = time.time()
  64. self.request_metrics = RequestMetrics()
  65. self.request_metrics.start(
  66. self.start_time, name=servlet_name, method=self.method,
  67. )
  68. self.site.access_logger.info(
  69. "%s - %s - Received request: %s %s",
  70. self.getClientIP(),
  71. self.site.site_tag,
  72. self.method,
  73. self.get_redacted_uri()
  74. )
  75. def _finished_processing(self):
  76. try:
  77. context = LoggingContext.current_context()
  78. usage = context.get_resource_usage()
  79. except Exception:
  80. usage = ContextResourceUsage()
  81. end_time = time.time()
  82. # need to decode as it could be raw utf-8 bytes
  83. # from a IDN servname in an auth header
  84. authenticated_entity = self.authenticated_entity
  85. if authenticated_entity is not None:
  86. authenticated_entity = authenticated_entity.decode("utf-8", "replace")
  87. # ...or could be raw utf-8 bytes in the User-Agent header.
  88. # N.B. if you don't do this, the logger explodes cryptically
  89. # with maximum recursion trying to log errors about
  90. # the charset problem.
  91. # c.f. https://github.com/matrix-org/synapse/issues/3471
  92. user_agent = self.get_user_agent()
  93. if user_agent is not None:
  94. user_agent = user_agent.decode("utf-8", "replace")
  95. self.site.access_logger.info(
  96. "%s - %s - {%s}"
  97. " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
  98. " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]",
  99. self.getClientIP(),
  100. self.site.site_tag,
  101. authenticated_entity,
  102. end_time - self.start_time,
  103. usage.ru_utime,
  104. usage.ru_stime,
  105. usage.db_sched_duration_sec,
  106. usage.db_txn_duration_sec,
  107. int(usage.db_txn_count),
  108. self.sentLength,
  109. self.code,
  110. self.method,
  111. self.get_redacted_uri(),
  112. self.clientproto,
  113. user_agent,
  114. usage.evt_db_fetch_count,
  115. )
  116. try:
  117. self.request_metrics.stop(end_time, self)
  118. except Exception as e:
  119. logger.warn("Failed to stop metrics: %r", e)
  120. @contextlib.contextmanager
  121. def processing(self, servlet_name):
  122. """Record the fact that we are processing this request.
  123. Returns a context manager; the correct way to use this is:
  124. @defer.inlineCallbacks
  125. def handle_request(request):
  126. with request.processing("FooServlet"):
  127. yield really_handle_the_request()
  128. This will log the request's arrival. Once the context manager is
  129. closed, the completion of the request will be logged, and the various
  130. metrics will be updated.
  131. Args:
  132. servlet_name (str): the name of the servlet which will be
  133. processing this request. This is used in the metrics.
  134. It is possible to update this afterwards by updating
  135. self.request_metrics.servlet_name.
  136. """
  137. # TODO: we should probably just move this into render() and finish(),
  138. # to save having to call a separate method.
  139. self._started_processing(servlet_name)
  140. yield
  141. self._finished_processing()
  142. class XForwardedForRequest(SynapseRequest):
  143. def __init__(self, *args, **kw):
  144. SynapseRequest.__init__(self, *args, **kw)
  145. """
  146. Add a layer on top of another request that only uses the value of an
  147. X-Forwarded-For header as the result of C{getClientIP}.
  148. """
  149. def getClientIP(self):
  150. """
  151. @return: The client address (the first address) in the value of the
  152. I{X-Forwarded-For header}. If the header is not present, return
  153. C{b"-"}.
  154. """
  155. return self.requestHeaders.getRawHeaders(
  156. b"x-forwarded-for", [b"-"])[0].split(b",")[0].strip()
  157. class SynapseRequestFactory(object):
  158. def __init__(self, site, x_forwarded_for):
  159. self.site = site
  160. self.x_forwarded_for = x_forwarded_for
  161. def __call__(self, *args, **kwargs):
  162. if self.x_forwarded_for:
  163. return XForwardedForRequest(self.site, *args, **kwargs)
  164. else:
  165. return SynapseRequest(self.site, *args, **kwargs)
  166. class SynapseSite(Site):
  167. """
  168. Subclass of a twisted http Site that does access logging with python's
  169. standard logging
  170. """
  171. def __init__(self, logger_name, site_tag, config, resource,
  172. server_version_string, *args, **kwargs):
  173. Site.__init__(self, resource, *args, **kwargs)
  174. self.site_tag = site_tag
  175. proxied = config.get("x_forwarded", False)
  176. self.requestFactory = SynapseRequestFactory(self, proxied)
  177. self.access_logger = logging.getLogger(logger_name)
  178. self.server_version_string = server_version_string
  179. def log(self, request):
  180. pass