metric.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2015, 2016 OpenMarket Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. from itertools import chain
  16. import logging
  17. import re
  18. logger = logging.getLogger(__name__)
  19. def flatten(items):
  20. """Flatten a list of lists
  21. Args:
  22. items: iterable[iterable[X]]
  23. Returns:
  24. list[X]: flattened list
  25. """
  26. return list(chain.from_iterable(items))
  27. class BaseMetric(object):
  28. """Base class for metrics which report a single value per label set
  29. """
  30. def __init__(self, name, labels=[], alternative_names=[]):
  31. """
  32. Args:
  33. name (str): principal name for this metric
  34. labels (list(str)): names of the labels which will be reported
  35. for this metric
  36. alternative_names (iterable(str)): list of alternative names for
  37. this metric. This can be useful to provide a migration path
  38. when renaming metrics.
  39. """
  40. self._names = [name] + list(alternative_names)
  41. self.labels = labels # OK not to clone as we never write it
  42. def dimension(self):
  43. return len(self.labels)
  44. def is_scalar(self):
  45. return not len(self.labels)
  46. def _render_labelvalue(self, value):
  47. return '"%s"' % (_escape_label_value(value),)
  48. def _render_key(self, values):
  49. if self.is_scalar():
  50. return ""
  51. return "{%s}" % (
  52. ",".join(["%s=%s" % (k, self._render_labelvalue(v))
  53. for k, v in zip(self.labels, values)])
  54. )
  55. def _render_for_labels(self, label_values, value):
  56. """Render this metric for a single set of labels
  57. Args:
  58. label_values (list[object]): values for each of the labels,
  59. (which get stringified).
  60. value: value of the metric at with these labels
  61. Returns:
  62. iterable[str]: rendered metric
  63. """
  64. rendered_labels = self._render_key(label_values)
  65. return (
  66. "%s%s %.12g" % (name, rendered_labels, value)
  67. for name in self._names
  68. )
  69. def render(self):
  70. """Render this metric
  71. Each metric is rendered as:
  72. name{label1="val1",label2="val2"} value
  73. https://prometheus.io/docs/instrumenting/exposition_formats/#text-format-details
  74. Returns:
  75. iterable[str]: rendered metrics
  76. """
  77. raise NotImplementedError()
  78. class CounterMetric(BaseMetric):
  79. """The simplest kind of metric; one that stores a monotonically-increasing
  80. value that counts events or running totals.
  81. Example use cases for Counters:
  82. - Number of requests processed
  83. - Number of items that were inserted into a queue
  84. - Total amount of data that a system has processed
  85. Counters can only go up (and be reset when the process restarts).
  86. """
  87. def __init__(self, *args, **kwargs):
  88. super(CounterMetric, self).__init__(*args, **kwargs)
  89. # dict[list[str]]: value for each set of label values. the keys are the
  90. # label values, in the same order as the labels in self.labels.
  91. #
  92. # (if the metric is a scalar, the (single) key is the empty tuple).
  93. self.counts = {}
  94. # Scalar metrics are never empty
  95. if self.is_scalar():
  96. self.counts[()] = 0.
  97. def inc_by(self, incr, *values):
  98. if len(values) != self.dimension():
  99. raise ValueError(
  100. "Expected as many values to inc() as labels (%d)" % (self.dimension())
  101. )
  102. # TODO: should assert that the tag values are all strings
  103. if values not in self.counts:
  104. self.counts[values] = incr
  105. else:
  106. self.counts[values] += incr
  107. def inc(self, *values):
  108. self.inc_by(1, *values)
  109. def render(self):
  110. return flatten(
  111. self._render_for_labels(k, self.counts[k])
  112. for k in sorted(self.counts.keys())
  113. )
  114. class GaugeMetric(BaseMetric):
  115. """A metric that can go up or down
  116. """
  117. def __init__(self, *args, **kwargs):
  118. super(GaugeMetric, self).__init__(*args, **kwargs)
  119. # dict[list[str]]: value for each set of label values. the keys are the
  120. # label values, in the same order as the labels in self.labels.
  121. #
  122. # (if the metric is a scalar, the (single) key is the empty tuple).
  123. self.guages = {}
  124. def set(self, v, *values):
  125. if len(values) != self.dimension():
  126. raise ValueError(
  127. "Expected as many values to inc() as labels (%d)" % (self.dimension())
  128. )
  129. # TODO: should assert that the tag values are all strings
  130. self.guages[values] = v
  131. def render(self):
  132. return flatten(
  133. self._render_for_labels(k, self.guages[k])
  134. for k in sorted(self.guages.keys())
  135. )
  136. class CallbackMetric(BaseMetric):
  137. """A metric that returns the numeric value returned by a callback whenever
  138. it is rendered. Typically this is used to implement gauges that yield the
  139. size or other state of some in-memory object by actively querying it."""
  140. def __init__(self, name, callback, labels=[]):
  141. super(CallbackMetric, self).__init__(name, labels=labels)
  142. self.callback = callback
  143. def render(self):
  144. try:
  145. value = self.callback()
  146. except Exception:
  147. logger.exception("Failed to render %s", self.name)
  148. return ["# FAILED to render " + self.name]
  149. if self.is_scalar():
  150. return list(self._render_for_labels([], value))
  151. return flatten(
  152. self._render_for_labels(k, value[k])
  153. for k in sorted(value.keys())
  154. )
  155. class DistributionMetric(object):
  156. """A combination of an event counter and an accumulator, which counts
  157. both the number of events and accumulates the total value. Typically this
  158. could be used to keep track of method-running times, or other distributions
  159. of values that occur in discrete occurances.
  160. TODO(paul): Try to export some heatmap-style stats?
  161. """
  162. def __init__(self, name, *args, **kwargs):
  163. self.counts = CounterMetric(name + ":count", **kwargs)
  164. self.totals = CounterMetric(name + ":total", **kwargs)
  165. def inc_by(self, inc, *values):
  166. self.counts.inc(*values)
  167. self.totals.inc_by(inc, *values)
  168. def render(self):
  169. return self.counts.render() + self.totals.render()
  170. class CacheMetric(object):
  171. __slots__ = (
  172. "name", "cache_name", "hits", "misses", "evicted_size", "size_callback",
  173. )
  174. def __init__(self, name, size_callback, cache_name):
  175. self.name = name
  176. self.cache_name = cache_name
  177. self.hits = 0
  178. self.misses = 0
  179. self.evicted_size = 0
  180. self.size_callback = size_callback
  181. def inc_hits(self):
  182. self.hits += 1
  183. def inc_misses(self):
  184. self.misses += 1
  185. def inc_evictions(self, size=1):
  186. self.evicted_size += size
  187. def render(self):
  188. size = self.size_callback()
  189. hits = self.hits
  190. total = self.misses + self.hits
  191. return [
  192. """%s:hits{name="%s"} %d""" % (self.name, self.cache_name, hits),
  193. """%s:total{name="%s"} %d""" % (self.name, self.cache_name, total),
  194. """%s:size{name="%s"} %d""" % (self.name, self.cache_name, size),
  195. """%s:evicted_size{name="%s"} %d""" % (
  196. self.name, self.cache_name, self.evicted_size
  197. ),
  198. ]
  199. class MemoryUsageMetric(object):
  200. """Keeps track of the current memory usage, using psutil.
  201. The class will keep the current min/max/sum/counts of rss over the last
  202. WINDOW_SIZE_SEC, by polling UPDATE_HZ times per second
  203. """
  204. UPDATE_HZ = 2 # number of times to get memory per second
  205. WINDOW_SIZE_SEC = 30 # the size of the window in seconds
  206. def __init__(self, hs, psutil):
  207. clock = hs.get_clock()
  208. self.memory_snapshots = []
  209. self.process = psutil.Process()
  210. clock.looping_call(self._update_curr_values, 1000 / self.UPDATE_HZ)
  211. def _update_curr_values(self):
  212. max_size = self.UPDATE_HZ * self.WINDOW_SIZE_SEC
  213. self.memory_snapshots.append(self.process.memory_info().rss)
  214. self.memory_snapshots[:] = self.memory_snapshots[-max_size:]
  215. def render(self):
  216. if not self.memory_snapshots:
  217. return []
  218. max_rss = max(self.memory_snapshots)
  219. min_rss = min(self.memory_snapshots)
  220. sum_rss = sum(self.memory_snapshots)
  221. len_rss = len(self.memory_snapshots)
  222. return [
  223. "process_psutil_rss:max %d" % max_rss,
  224. "process_psutil_rss:min %d" % min_rss,
  225. "process_psutil_rss:total %d" % sum_rss,
  226. "process_psutil_rss:count %d" % len_rss,
  227. ]
  228. def _escape_character(m):
  229. """Replaces a single character with its escape sequence.
  230. Args:
  231. m (re.MatchObject): A match object whose first group is the single
  232. character to replace
  233. Returns:
  234. str
  235. """
  236. c = m.group(1)
  237. if c == "\\":
  238. return "\\\\"
  239. elif c == "\"":
  240. return "\\\""
  241. elif c == "\n":
  242. return "\\n"
  243. return c
  244. def _escape_label_value(value):
  245. """Takes a label value and escapes quotes, newlines and backslashes
  246. """
  247. return re.sub(r"([\n\"\\])", _escape_character, str(value))