metrics.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2016 OpenMarket Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import logging
  16. from functools import wraps
  17. from prometheus_client import Counter
  18. from twisted.internet import defer
  19. from synapse.logging.context import LoggingContext
  20. from synapse.metrics import InFlightGauge
  21. logger = logging.getLogger(__name__)
  22. block_counter = Counter("synapse_util_metrics_block_count", "", ["block_name"])
  23. block_timer = Counter("synapse_util_metrics_block_time_seconds", "", ["block_name"])
  24. block_ru_utime = Counter(
  25. "synapse_util_metrics_block_ru_utime_seconds", "", ["block_name"]
  26. )
  27. block_ru_stime = Counter(
  28. "synapse_util_metrics_block_ru_stime_seconds", "", ["block_name"]
  29. )
  30. block_db_txn_count = Counter(
  31. "synapse_util_metrics_block_db_txn_count", "", ["block_name"]
  32. )
  33. # seconds spent waiting for db txns, excluding scheduling time, in this block
  34. block_db_txn_duration = Counter(
  35. "synapse_util_metrics_block_db_txn_duration_seconds", "", ["block_name"]
  36. )
  37. # seconds spent waiting for a db connection, in this block
  38. block_db_sched_duration = Counter(
  39. "synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"]
  40. )
  41. # Tracks the number of blocks currently active
  42. in_flight = InFlightGauge(
  43. "synapse_util_metrics_block_in_flight",
  44. "",
  45. labels=["block_name"],
  46. sub_metrics=["real_time_max", "real_time_sum"],
  47. )
  48. def measure_func(name=None):
  49. def wrapper(func):
  50. block_name = func.__name__ if name is None else name
  51. @wraps(func)
  52. @defer.inlineCallbacks
  53. def measured_func(self, *args, **kwargs):
  54. with Measure(self.clock, block_name):
  55. r = yield func(self, *args, **kwargs)
  56. return r
  57. return measured_func
  58. return wrapper
  59. class Measure(object):
  60. __slots__ = [
  61. "clock",
  62. "name",
  63. "start_context",
  64. "start",
  65. "created_context",
  66. "start_usage",
  67. ]
  68. def __init__(self, clock, name):
  69. self.clock = clock
  70. self.name = name
  71. self.start_context = None
  72. self.start = None
  73. self.created_context = False
  74. def __enter__(self):
  75. self.start = self.clock.time()
  76. self.start_context = LoggingContext.current_context()
  77. if not self.start_context:
  78. self.start_context = LoggingContext("Measure")
  79. self.start_context.__enter__()
  80. self.created_context = True
  81. self.start_usage = self.start_context.get_resource_usage()
  82. in_flight.register((self.name,), self._update_in_flight)
  83. def __exit__(self, exc_type, exc_val, exc_tb):
  84. if isinstance(exc_type, Exception) or not self.start_context:
  85. return
  86. in_flight.unregister((self.name,), self._update_in_flight)
  87. duration = self.clock.time() - self.start
  88. block_counter.labels(self.name).inc()
  89. block_timer.labels(self.name).inc(duration)
  90. context = LoggingContext.current_context()
  91. if context != self.start_context:
  92. logger.warning(
  93. "Context has unexpectedly changed from '%s' to '%s'. (%r)",
  94. self.start_context,
  95. context,
  96. self.name,
  97. )
  98. return
  99. if not context:
  100. logger.warning("Expected context. (%r)", self.name)
  101. return
  102. current = context.get_resource_usage()
  103. usage = current - self.start_usage
  104. try:
  105. block_ru_utime.labels(self.name).inc(usage.ru_utime)
  106. block_ru_stime.labels(self.name).inc(usage.ru_stime)
  107. block_db_txn_count.labels(self.name).inc(usage.db_txn_count)
  108. block_db_txn_duration.labels(self.name).inc(usage.db_txn_duration_sec)
  109. block_db_sched_duration.labels(self.name).inc(usage.db_sched_duration_sec)
  110. except ValueError:
  111. logger.warning(
  112. "Failed to save metrics! OLD: %r, NEW: %r", self.start_usage, current
  113. )
  114. if self.created_context:
  115. self.start_context.__exit__(exc_type, exc_val, exc_tb)
  116. def _update_in_flight(self, metrics):
  117. """Gets called when processing in flight metrics
  118. """
  119. duration = self.clock.time() - self.start
  120. metrics.real_time_max = max(metrics.real_time_max, duration)
  121. metrics.real_time_sum += duration
  122. # TODO: Add other in flight metrics.