123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239 |
- # -*- coding: utf-8 -*-
- # Copyright 2018 New Vector Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import logging
- import threading
- from asyncio import iscoroutine
- from functools import wraps
- import six
- from prometheus_client.core import REGISTRY, Counter, GaugeMetricFamily
- from twisted.internet import defer
- from synapse.logging.context import LoggingContext, PreserveLoggingContext
- logger = logging.getLogger(__name__)
- _background_process_start_count = Counter(
- "synapse_background_process_start_count",
- "Number of background processes started",
- ["name"],
- )
- # we set registry=None in all of these to stop them getting registered with
- # the default registry. Instead we collect them all via the CustomCollector,
- # which ensures that we can update them before they are collected.
- #
- _background_process_ru_utime = Counter(
- "synapse_background_process_ru_utime_seconds",
- "User CPU time used by background processes, in seconds",
- ["name"],
- registry=None,
- )
- _background_process_ru_stime = Counter(
- "synapse_background_process_ru_stime_seconds",
- "System CPU time used by background processes, in seconds",
- ["name"],
- registry=None,
- )
- _background_process_db_txn_count = Counter(
- "synapse_background_process_db_txn_count",
- "Number of database transactions done by background processes",
- ["name"],
- registry=None,
- )
- _background_process_db_txn_duration = Counter(
- "synapse_background_process_db_txn_duration_seconds",
- (
- "Seconds spent by background processes waiting for database "
- "transactions, excluding scheduling time"
- ),
- ["name"],
- registry=None,
- )
- _background_process_db_sched_duration = Counter(
- "synapse_background_process_db_sched_duration_seconds",
- "Seconds spent by background processes waiting for database connections",
- ["name"],
- registry=None,
- )
- # map from description to a counter, so that we can name our logcontexts
- # incrementally. (It actually duplicates _background_process_start_count, but
- # it's much simpler to do so than to try to combine them.)
- _background_process_counts = dict() # type: dict[str, int]
- # map from description to the currently running background processes.
- #
- # it's kept as a dict of sets rather than a big set so that we can keep track
- # of process descriptions that no longer have any active processes.
- _background_processes = dict() # type: dict[str, set[_BackgroundProcess]]
- # A lock that covers the above dicts
- _bg_metrics_lock = threading.Lock()
- class _Collector(object):
- """A custom metrics collector for the background process metrics.
- Ensures that all of the metrics are up-to-date with any in-flight processes
- before they are returned.
- """
- def collect(self):
- background_process_in_flight_count = GaugeMetricFamily(
- "synapse_background_process_in_flight_count",
- "Number of background processes in flight",
- labels=["name"],
- )
- # We copy the dict so that it doesn't change from underneath us.
- # We also copy the process lists as that can also change
- with _bg_metrics_lock:
- _background_processes_copy = {
- k: list(v) for k, v in six.iteritems(_background_processes)
- }
- for desc, processes in six.iteritems(_background_processes_copy):
- background_process_in_flight_count.add_metric((desc,), len(processes))
- for process in processes:
- process.update_metrics()
- yield background_process_in_flight_count
- # now we need to run collect() over each of the static Counters, and
- # yield each metric they return.
- for m in (
- _background_process_ru_utime,
- _background_process_ru_stime,
- _background_process_db_txn_count,
- _background_process_db_txn_duration,
- _background_process_db_sched_duration,
- ):
- for r in m.collect():
- yield r
- REGISTRY.register(_Collector())
- class _BackgroundProcess(object):
- def __init__(self, desc, ctx):
- self.desc = desc
- self._context = ctx
- self._reported_stats = None
- def update_metrics(self):
- """Updates the metrics with values from this process."""
- new_stats = self._context.get_resource_usage()
- if self._reported_stats is None:
- diff = new_stats
- else:
- diff = new_stats - self._reported_stats
- self._reported_stats = new_stats
- _background_process_ru_utime.labels(self.desc).inc(diff.ru_utime)
- _background_process_ru_stime.labels(self.desc).inc(diff.ru_stime)
- _background_process_db_txn_count.labels(self.desc).inc(diff.db_txn_count)
- _background_process_db_txn_duration.labels(self.desc).inc(
- diff.db_txn_duration_sec
- )
- _background_process_db_sched_duration.labels(self.desc).inc(
- diff.db_sched_duration_sec
- )
- def run_as_background_process(desc, func, *args, **kwargs):
- """Run the given function in its own logcontext, with resource metrics
- This should be used to wrap processes which are fired off to run in the
- background, instead of being associated with a particular request.
- It returns a Deferred which completes when the function completes, but it doesn't
- follow the synapse logcontext rules, which makes it appropriate for passing to
- clock.looping_call and friends (or for firing-and-forgetting in the middle of a
- normal synapse inlineCallbacks function).
- Args:
- desc (str): a description for this background process type
- func: a function, which may return a Deferred or a coroutine
- args: positional args for func
- kwargs: keyword args for func
- Returns: Deferred which returns the result of func, but note that it does not
- follow the synapse logcontext rules.
- """
- @defer.inlineCallbacks
- def run():
- with _bg_metrics_lock:
- count = _background_process_counts.get(desc, 0)
- _background_process_counts[desc] = count + 1
- _background_process_start_count.labels(desc).inc()
- with LoggingContext(desc) as context:
- context.request = "%s-%i" % (desc, count)
- proc = _BackgroundProcess(desc, context)
- with _bg_metrics_lock:
- _background_processes.setdefault(desc, set()).add(proc)
- try:
- result = func(*args, **kwargs)
- # We probably don't have an ensureDeferred in our call stack to handle
- # coroutine results, so we need to ensureDeferred here.
- #
- # But we need this check because ensureDeferred doesn't like being
- # called on immediate values (as opposed to Deferreds or coroutines).
- if iscoroutine(result):
- result = defer.ensureDeferred(result)
- return (yield result)
- except Exception:
- logger.exception("Background process '%s' threw an exception", desc)
- finally:
- proc.update_metrics()
- with _bg_metrics_lock:
- _background_processes[desc].remove(proc)
- with PreserveLoggingContext():
- return run()
- def wrap_as_background_process(desc):
- """Decorator that wraps a function that gets called as a background
- process.
- Equivalent of calling the function with `run_as_background_process`
- """
- def wrap_as_background_process_inner(func):
- @wraps(func)
- def wrap_as_background_process_inner_2(*args, **kwargs):
- return run_as_background_process(desc, func, *args, **kwargs)
- return wrap_as_background_process_inner_2
- return wrap_as_background_process_inner
|