request_metrics.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2014-2016 OpenMarket Ltd
  3. # Copyright 2018 New Vector Ltd
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. import logging
  17. from prometheus_client.core import Counter, Histogram
  18. from synapse.metrics import LaterGauge
  19. from synapse.util.logcontext import LoggingContext
  20. logger = logging.getLogger(__name__)
  21. # total number of responses served, split by method/servlet/tag
  22. response_count = Counter(
  23. "synapse_http_server_response_count", "", ["method", "servlet", "tag"]
  24. )
  25. requests_counter = Counter(
  26. "synapse_http_server_requests_received", "", ["method", "servlet"]
  27. )
  28. outgoing_responses_counter = Counter(
  29. "synapse_http_server_responses", "", ["method", "code"]
  30. )
  31. response_timer = Histogram(
  32. "synapse_http_server_response_time_seconds", "sec", ["method", "servlet", "tag"]
  33. )
  34. response_ru_utime = Counter(
  35. "synapse_http_server_response_ru_utime_seconds", "sec", ["method", "servlet", "tag"]
  36. )
  37. response_ru_stime = Counter(
  38. "synapse_http_server_response_ru_stime_seconds", "sec", ["method", "servlet", "tag"]
  39. )
  40. response_db_txn_count = Counter(
  41. "synapse_http_server_response_db_txn_count", "", ["method", "servlet", "tag"]
  42. )
  43. # seconds spent waiting for db txns, excluding scheduling time, when processing
  44. # this request
  45. response_db_txn_duration = Counter(
  46. "synapse_http_server_response_db_txn_duration_seconds",
  47. "",
  48. ["method", "servlet", "tag"],
  49. )
  50. # seconds spent waiting for a db connection, when processing this request
  51. response_db_sched_duration = Counter(
  52. "synapse_http_server_response_db_sched_duration_seconds",
  53. "",
  54. ["method", "servlet", "tag"],
  55. )
  56. # size in bytes of the response written
  57. response_size = Counter(
  58. "synapse_http_server_response_size", "", ["method", "servlet", "tag"]
  59. )
  60. # In flight metrics are incremented while the requests are in flight, rather
  61. # than when the response was written.
  62. in_flight_requests_ru_utime = Counter(
  63. "synapse_http_server_in_flight_requests_ru_utime_seconds",
  64. "",
  65. ["method", "servlet"],
  66. )
  67. in_flight_requests_ru_stime = Counter(
  68. "synapse_http_server_in_flight_requests_ru_stime_seconds",
  69. "",
  70. ["method", "servlet"],
  71. )
  72. in_flight_requests_db_txn_count = Counter(
  73. "synapse_http_server_in_flight_requests_db_txn_count", "", ["method", "servlet"]
  74. )
  75. # seconds spent waiting for db txns, excluding scheduling time, when processing
  76. # this request
  77. in_flight_requests_db_txn_duration = Counter(
  78. "synapse_http_server_in_flight_requests_db_txn_duration_seconds",
  79. "",
  80. ["method", "servlet"],
  81. )
  82. # seconds spent waiting for a db connection, when processing this request
  83. in_flight_requests_db_sched_duration = Counter(
  84. "synapse_http_server_in_flight_requests_db_sched_duration_seconds",
  85. "",
  86. ["method", "servlet"],
  87. )
  88. # The set of all in flight requests, set[RequestMetrics]
  89. _in_flight_requests = set()
  90. def _get_in_flight_counts():
  91. """Returns a count of all in flight requests by (method, server_name)
  92. Returns:
  93. dict[tuple[str, str], int]
  94. """
  95. for rm in _in_flight_requests:
  96. rm.update_metrics()
  97. # Map from (method, name) -> int, the number of in flight requests of that
  98. # type
  99. counts = {}
  100. for rm in _in_flight_requests:
  101. key = (rm.method, rm.name,)
  102. counts[key] = counts.get(key, 0) + 1
  103. return counts
  104. LaterGauge(
  105. "synapse_http_request_metrics_in_flight_requests_count",
  106. "",
  107. ["method", "servlet"],
  108. _get_in_flight_counts,
  109. )
  110. class RequestMetrics(object):
  111. def start(self, time_sec, name, method):
  112. self.start = time_sec
  113. self.start_context = LoggingContext.current_context()
  114. self.name = name
  115. self.method = method
  116. self._request_stats = _RequestStats.from_context(self.start_context)
  117. _in_flight_requests.add(self)
  118. def stop(self, time_sec, request):
  119. _in_flight_requests.discard(self)
  120. context = LoggingContext.current_context()
  121. tag = ""
  122. if context:
  123. tag = context.tag
  124. if context != self.start_context:
  125. logger.warn(
  126. "Context have unexpectedly changed %r, %r",
  127. context, self.start_context
  128. )
  129. return
  130. outgoing_responses_counter.labels(request.method, str(request.code)).inc()
  131. response_count.labels(request.method, self.name, tag).inc()
  132. response_timer.labels(request.method, self.name, tag).observe(
  133. time_sec - self.start
  134. )
  135. ru_utime, ru_stime = context.get_resource_usage()
  136. response_ru_utime.labels(request.method, self.name, tag).inc(ru_utime)
  137. response_ru_stime.labels(request.method, self.name, tag).inc(ru_stime)
  138. response_db_txn_count.labels(request.method, self.name, tag).inc(
  139. context.db_txn_count
  140. )
  141. response_db_txn_duration.labels(request.method, self.name, tag).inc(
  142. context.db_txn_duration_sec
  143. )
  144. response_db_sched_duration.labels(request.method, self.name, tag).inc(
  145. context.db_sched_duration_sec
  146. )
  147. response_size.labels(request.method, self.name, tag).inc(request.sentLength)
  148. # We always call this at the end to ensure that we update the metrics
  149. # regardless of whether a call to /metrics while the request was in
  150. # flight.
  151. self.update_metrics()
  152. def update_metrics(self):
  153. """Updates the in flight metrics with values from this request.
  154. """
  155. diff = self._request_stats.update(self.start_context)
  156. in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime)
  157. in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime)
  158. in_flight_requests_db_txn_count.labels(self.method, self.name).inc(
  159. diff.db_txn_count
  160. )
  161. in_flight_requests_db_txn_duration.labels(self.method, self.name).inc(
  162. diff.db_txn_duration_sec
  163. )
  164. in_flight_requests_db_sched_duration.labels(self.method, self.name).inc(
  165. diff.db_sched_duration_sec
  166. )
  167. class _RequestStats(object):
  168. """Keeps tracks of various metrics for an in flight request.
  169. """
  170. __slots__ = [
  171. "ru_utime",
  172. "ru_stime",
  173. "db_txn_count",
  174. "db_txn_duration_sec",
  175. "db_sched_duration_sec",
  176. ]
  177. def __init__(
  178. self, ru_utime, ru_stime, db_txn_count, db_txn_duration_sec, db_sched_duration_sec
  179. ):
  180. self.ru_utime = ru_utime
  181. self.ru_stime = ru_stime
  182. self.db_txn_count = db_txn_count
  183. self.db_txn_duration_sec = db_txn_duration_sec
  184. self.db_sched_duration_sec = db_sched_duration_sec
  185. @staticmethod
  186. def from_context(context):
  187. ru_utime, ru_stime = context.get_resource_usage()
  188. return _RequestStats(
  189. ru_utime, ru_stime,
  190. context.db_txn_count,
  191. context.db_txn_duration_sec,
  192. context.db_sched_duration_sec,
  193. )
  194. def update(self, context):
  195. """Updates the current values and returns the difference between the
  196. old and new values.
  197. Returns:
  198. _RequestStats: The difference between the old and new values
  199. """
  200. new = _RequestStats.from_context(context)
  201. diff = _RequestStats(
  202. new.ru_utime - self.ru_utime,
  203. new.ru_stime - self.ru_stime,
  204. new.db_txn_count - self.db_txn_count,
  205. new.db_txn_duration_sec - self.db_txn_duration_sec,
  206. new.db_sched_duration_sec - self.db_sched_duration_sec,
  207. )
  208. self.ru_utime = new.ru_utime
  209. self.ru_stime = new.ru_stime
  210. self.db_txn_count = new.db_txn_count
  211. self.db_txn_duration_sec = new.db_txn_duration_sec
  212. self.db_sched_duration_sec = new.db_sched_duration_sec
  213. return diff