Browse Source

Support Prometheus_client 0.4.0+ (#5636)

Amber Brown 4 years ago
parent
commit
7ad1d76356

+ 7 - 0
UPGRADE.rst

@@ -49,6 +49,13 @@ returned by the Client-Server API:
     # configured on port 443.
     curl -kv https://<host.name>/_matrix/client/versions 2>&1 | grep "Server:"
 
+Upgrading to v1.2.0
+===================
+
+Some counter metrics have been renamed, with the old names deprecated. See
+`the metrics documentation <docs/metrics-howto.rst#renaming-of-metrics--deprecation-of-old-names-in-12>`_
+for details.
+
 Upgrading to v1.1.0
 ===================
 

+ 1 - 0
changelog.d/5636.misc

@@ -0,0 +1 @@
+Some counter metrics exposed over Prometheus have been renamed, with the old names preserved for backwards compatibility and deprecated. See `docs/metrics-howto.rst` for details.

+ 102 - 0
docs/metrics-howto.rst

@@ -59,6 +59,108 @@ How to monitor Synapse metrics using Prometheus
    Restart Prometheus.
 
 
+Renaming of metrics & deprecation of old names in 1.2
+-----------------------------------------------------
+
+Synapse 1.2 updates the Prometheus metrics to match the naming convention of the
+upstream ``prometheus_client``. The old names are considered deprecated and will
+be removed in a future version of Synapse.
+
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+|                                  New Name                                   |                               Old Name                                |
++=============================================================================+=======================================================================+
+| python_gc_objects_collected_total                                           | python_gc_objects_collected                                           |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| python_gc_objects_uncollectable_total                                       | python_gc_objects_uncollectable                                       |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| python_gc_collections_total                                                 | python_gc_collections                                                 |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| process_cpu_seconds_total                                                   | process_cpu_seconds                                                   |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_federation_client_sent_transactions_total                           | synapse_federation_client_sent_transactions                           |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_federation_client_events_processed_total                            | synapse_federation_client_events_processed                            |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_event_processing_loop_count_total                                   | synapse_event_processing_loop_count                                   |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_event_processing_loop_room_count_total                              | synapse_event_processing_loop_room_count                              |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_util_metrics_block_count_total                                      | synapse_util_metrics_block_count                                      |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_util_metrics_block_time_seconds_total                               | synapse_util_metrics_block_time_seconds                               |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_util_metrics_block_ru_utime_seconds_total                           | synapse_util_metrics_block_ru_utime_seconds                           |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_util_metrics_block_ru_stime_seconds_total                           | synapse_util_metrics_block_ru_stime_seconds                           |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_util_metrics_block_db_txn_count_total                               | synapse_util_metrics_block_db_txn_count                               |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_util_metrics_block_db_txn_duration_seconds_total                    | synapse_util_metrics_block_db_txn_duration_seconds                    |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_util_metrics_block_db_sched_duration_seconds_total                  | synapse_util_metrics_block_db_sched_duration_seconds                  |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_background_process_start_count_total                                | synapse_background_process_start_count                                |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_background_process_ru_utime_seconds_total                           | synapse_background_process_ru_utime_seconds                           |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_background_process_ru_stime_seconds_total                           | synapse_background_process_ru_stime_seconds                           |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_background_process_db_txn_count_total                               | synapse_background_process_db_txn_count                               |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_background_process_db_txn_duration_seconds_total                    | synapse_background_process_db_txn_duration_seconds                    |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_background_process_db_sched_duration_seconds_total                  | synapse_background_process_db_sched_duration_seconds                  |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_storage_events_persisted_events_total                               | synapse_storage_events_persisted_events                               |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_storage_events_persisted_events_sep_total                           | synapse_storage_events_persisted_events_sep                           |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_storage_events_state_delta_total                                    | synapse_storage_events_state_delta                                    |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_storage_events_state_delta_single_event_total                       | synapse_storage_events_state_delta_single_event                       |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_storage_events_state_delta_reuse_delta_total                        | synapse_storage_events_state_delta_reuse_delta                        |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_federation_server_received_pdus_total                               | synapse_federation_server_received_pdus                               |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_federation_server_received_edus_total                               | synapse_federation_server_received_edus                               |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_handler_presence_notified_presence_total                            | synapse_handler_presence_notified_presence                            |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_handler_presence_federation_presence_out_total                      | synapse_handler_presence_federation_presence_out                      |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_handler_presence_presence_updates_total                             | synapse_handler_presence_presence_updates                             |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_handler_presence_timers_fired_total                                 | synapse_handler_presence_timers_fired                                 |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_handler_presence_federation_presence_total                          | synapse_handler_presence_federation_presence                          |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_handler_presence_bump_active_time_total                             | synapse_handler_presence_bump_active_time                             |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_federation_client_sent_edus_total                                   | synapse_federation_client_sent_edus                                   |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_federation_client_sent_pdu_destinations_count_total                 | synapse_federation_client_sent_pdu_destinations:count                 |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_federation_client_sent_pdu_destinations_total                       | synapse_federation_client_sent_pdu_destinations:total                 |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_handlers_appservice_events_processed_total                          | synapse_handlers_appservice_events_processed                          |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_notifier_notified_events_total                                      | synapse_notifier_notified_events                                      |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter_total | synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter_total   | synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter   |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_http_httppusher_http_pushes_processed_total                         | synapse_http_httppusher_http_pushes_processed                         |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_http_httppusher_http_pushes_failed_total                            | synapse_http_httppusher_http_pushes_failed                            |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_http_httppusher_badge_updates_processed_total                       | synapse_http_httppusher_badge_updates_processed                       |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+| synapse_http_httppusher_badge_updates_failed_total                          | synapse_http_httppusher_badge_updates_failed                          |
++-----------------------------------------------------------------------------+-----------------------------------------------------------------------+
+
+
 Removal of deprecated metrics & time based counters becoming histograms in 0.31.0
 ---------------------------------------------------------------------------------
 

+ 1 - 2
synapse/app/_base.py

@@ -149,8 +149,7 @@ def listen_metrics(bind_addresses, port):
     """
     Start Prometheus metrics server.
     """
-    from synapse.metrics import RegistryProxy
-    from prometheus_client import start_http_server
+    from synapse.metrics import RegistryProxy, start_http_server
 
     for host in bind_addresses:
         logger.info("Starting metrics listener on %s:%d", host, port)

+ 1 - 2
synapse/app/appservice.py

@@ -27,8 +27,7 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.config.logger import setup_logging
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext, run_in_background
-from synapse.metrics import RegistryProxy
-from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
+from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
 from synapse.replication.slave.storage.directory import DirectoryStore
 from synapse.replication.slave.storage.events import SlavedEventStore

+ 1 - 2
synapse/app/client_reader.py

@@ -28,8 +28,7 @@ from synapse.config.logger import setup_logging
 from synapse.http.server import JsonResource
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext
-from synapse.metrics import RegistryProxy
-from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
+from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore

+ 1 - 2
synapse/app/event_creator.py

@@ -28,8 +28,7 @@ from synapse.config.logger import setup_logging
 from synapse.http.server import JsonResource
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext
-from synapse.metrics import RegistryProxy
-from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
+from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore

+ 1 - 2
synapse/app/federation_reader.py

@@ -29,8 +29,7 @@ from synapse.config.logger import setup_logging
 from synapse.federation.transport.server import TransportLayerServer
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext
-from synapse.metrics import RegistryProxy
-from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
+from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore

+ 1 - 2
synapse/app/federation_sender.py

@@ -28,9 +28,8 @@ from synapse.config.logger import setup_logging
 from synapse.federation import send_queue
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext, run_in_background
-from synapse.metrics import RegistryProxy
+from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
 from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore
 from synapse.replication.slave.storage.devices import SlavedDeviceStore
 from synapse.replication.slave.storage.events import SlavedEventStore

+ 1 - 2
synapse/app/frontend_proxy.py

@@ -30,8 +30,7 @@ from synapse.http.server import JsonResource
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext
-from synapse.metrics import RegistryProxy
-from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
+from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
 from synapse.replication.slave.storage.client_ips import SlavedClientIpStore

+ 1 - 2
synapse/app/homeserver.py

@@ -55,9 +55,8 @@ from synapse.http.additional_resource import AdditionalResource
 from synapse.http.server import RootRedirect
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext
-from synapse.metrics import RegistryProxy
+from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
 from synapse.module_api import ModuleApi
 from synapse.python_dependencies import check_requirements
 from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource

+ 1 - 2
synapse/app/media_repository.py

@@ -28,8 +28,7 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.config.logger import setup_logging
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext
-from synapse.metrics import RegistryProxy
-from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
+from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
 from synapse.replication.slave.storage.client_ips import SlavedClientIpStore

+ 1 - 2
synapse/app/pusher.py

@@ -27,8 +27,7 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.config.logger import setup_logging
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext, run_in_background
-from synapse.metrics import RegistryProxy
-from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
+from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.slave.storage._base import __func__
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.events import SlavedEventStore

+ 1 - 2
synapse/app/synchrotron.py

@@ -32,8 +32,7 @@ from synapse.handlers.presence import PresenceHandler, get_interested_parties
 from synapse.http.server import JsonResource
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext, run_in_background
-from synapse.metrics import RegistryProxy
-from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
+from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.slave.storage._base import BaseSlavedStore, __func__
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore

+ 1 - 2
synapse/app/user_dir.py

@@ -29,8 +29,7 @@ from synapse.config.logger import setup_logging
 from synapse.http.server import JsonResource
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext, run_in_background
-from synapse.metrics import RegistryProxy
-from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
+from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
 from synapse.replication.slave.storage.client_ips import SlavedClientIpStore

+ 17 - 0
synapse/metrics/__init__.py

@@ -29,8 +29,16 @@ from prometheus_client.core import REGISTRY, GaugeMetricFamily, HistogramMetricF
 
 from twisted.internet import reactor
 
+from synapse.metrics._exposition import (
+    MetricsResource,
+    generate_latest,
+    start_http_server,
+)
+
 logger = logging.getLogger(__name__)
 
+METRICS_PREFIX = "/_synapse/metrics"
+
 running_on_pypy = platform.python_implementation() == "PyPy"
 all_metrics = []
 all_collectors = []
@@ -470,3 +478,12 @@ try:
         gc.disable()
 except AttributeError:
     pass
+
+__all__ = [
+    "MetricsResource",
+    "generate_latest",
+    "start_http_server",
+    "LaterGauge",
+    "InFlightGauge",
+    "BucketCollector",
+]

+ 258 - 0
synapse/metrics/_exposition.py

@@ -0,0 +1,258 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015-2019 Prometheus Python Client Developers
+# Copyright 2019 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This code is based off `prometheus_client/exposition.py` from version 0.7.1.
+
+Due to the renaming of metrics in prometheus_client 0.4.0, this customised
+vendoring of the code will emit both the old versions that Synapse dashboards
+expect, and the newer "best practice" version of the up-to-date official client.
+"""
+
+import math
+import threading
+from collections import namedtuple
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from socketserver import ThreadingMixIn
+from urllib.parse import parse_qs, urlparse
+
+from prometheus_client import REGISTRY
+
+from twisted.web.resource import Resource
+
+try:
+    from prometheus_client.samples import Sample
+except ImportError:
+    Sample = namedtuple("Sample", ["name", "labels", "value", "timestamp", "exemplar"])
+
+
+CONTENT_TYPE_LATEST = str("text/plain; version=0.0.4; charset=utf-8")
+
+
+INF = float("inf")
+MINUS_INF = float("-inf")
+
+
+def floatToGoString(d):
+    d = float(d)
+    if d == INF:
+        return "+Inf"
+    elif d == MINUS_INF:
+        return "-Inf"
+    elif math.isnan(d):
+        return "NaN"
+    else:
+        s = repr(d)
+        dot = s.find(".")
+        # Go switches to exponents sooner than Python.
+        # We only need to care about positive values for le/quantile.
+        if d > 0 and dot > 6:
+            mantissa = "{0}.{1}{2}".format(s[0], s[1:dot], s[dot + 1 :]).rstrip("0.")
+            return "{0}e+0{1}".format(mantissa, dot - 1)
+        return s
+
+
+def sample_line(line, name):
+    if line.labels:
+        labelstr = "{{{0}}}".format(
+            ",".join(
+                [
+                    '{0}="{1}"'.format(
+                        k,
+                        v.replace("\\", r"\\").replace("\n", r"\n").replace('"', r"\""),
+                    )
+                    for k, v in sorted(line.labels.items())
+                ]
+            )
+        )
+    else:
+        labelstr = ""
+    timestamp = ""
+    if line.timestamp is not None:
+        # Convert to milliseconds.
+        timestamp = " {0:d}".format(int(float(line.timestamp) * 1000))
+    return "{0}{1} {2}{3}\n".format(
+        name, labelstr, floatToGoString(line.value), timestamp
+    )
+
+
+def nameify_sample(sample):
+    """
+    If we get a prometheus_client<0.4.0 sample as a tuple, transform it into a
+    namedtuple which has the names we expect.
+    """
+    if not isinstance(sample, Sample):
+        sample = Sample(*sample, None, None)
+
+    return sample
+
+
+def generate_latest(registry, emit_help=False):
+    output = []
+
+    for metric in registry.collect():
+
+        if metric.name.startswith("__unused"):
+            continue
+
+        if not metric.samples:
+            # No samples, don't bother.
+            continue
+
+        mname = metric.name
+        mnewname = metric.name
+        mtype = metric.type
+
+        # OpenMetrics -> Prometheus
+        if mtype == "counter":
+            mnewname = mnewname + "_total"
+        elif mtype == "info":
+            mtype = "gauge"
+            mnewname = mnewname + "_info"
+        elif mtype == "stateset":
+            mtype = "gauge"
+        elif mtype == "gaugehistogram":
+            mtype = "histogram"
+        elif mtype == "unknown":
+            mtype = "untyped"
+
+        # Output in the old format for compatibility.
+        if emit_help:
+            output.append(
+                "# HELP {0} {1}\n".format(
+                    mname,
+                    metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
+                )
+            )
+        output.append("# TYPE {0} {1}\n".format(mname, mtype))
+        for sample in map(nameify_sample, metric.samples):
+            # Get rid of the OpenMetrics specific samples
+            for suffix in ["_created", "_gsum", "_gcount"]:
+                if sample.name.endswith(suffix):
+                    break
+            else:
+                newname = sample.name.replace(mnewname, mname)
+                if ":" in newname and newname.endswith("_total"):
+                    newname = newname[: -len("_total")]
+                output.append(sample_line(sample, newname))
+
+        # Get rid of the weird colon things while we're at it
+        if mtype == "counter":
+            mnewname = mnewname.replace(":total", "")
+        mnewname = mnewname.replace(":", "_")
+
+        if mname == mnewname:
+            continue
+
+        # Also output in the new format, if it's different.
+        if emit_help:
+            output.append(
+                "# HELP {0} {1}\n".format(
+                    mnewname,
+                    metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
+                )
+            )
+        output.append("# TYPE {0} {1}\n".format(mnewname, mtype))
+        for sample in map(nameify_sample, metric.samples):
+            # Get rid of the OpenMetrics specific samples
+            for suffix in ["_created", "_gsum", "_gcount"]:
+                if sample.name.endswith(suffix):
+                    break
+            else:
+                output.append(
+                    sample_line(
+                        sample, sample.name.replace(":total", "").replace(":", "_")
+                    )
+                )
+
+    return "".join(output).encode("utf-8")
+
+
+class MetricsHandler(BaseHTTPRequestHandler):
+    """HTTP handler that gives metrics from ``REGISTRY``."""
+
+    registry = REGISTRY
+
+    def do_GET(self):
+        registry = self.registry
+        params = parse_qs(urlparse(self.path).query)
+
+        if "help" in params:
+            emit_help = True
+        else:
+            emit_help = False
+
+        try:
+            output = generate_latest(registry, emit_help=emit_help)
+        except Exception:
+            self.send_error(500, "error generating metric output")
+            raise
+        self.send_response(200)
+        self.send_header("Content-Type", CONTENT_TYPE_LATEST)
+        self.end_headers()
+        self.wfile.write(output)
+
+    def log_message(self, format, *args):
+        """Log nothing."""
+
+    @classmethod
+    def factory(cls, registry):
+        """Returns a dynamic MetricsHandler class tied
+           to the passed registry.
+        """
+        # This implementation relies on MetricsHandler.registry
+        #  (defined above and defaulted to REGISTRY).
+
+        # As we have unicode_literals, we need to create a str()
+        #  object for type().
+        cls_name = str(cls.__name__)
+        MyMetricsHandler = type(cls_name, (cls, object), {"registry": registry})
+        return MyMetricsHandler
+
+
+class _ThreadingSimpleServer(ThreadingMixIn, HTTPServer):
+    """Thread per request HTTP server."""
+
+    # Make worker threads "fire and forget". Beginning with Python 3.7 this
+    # prevents a memory leak because ``ThreadingMixIn`` starts to gather all
+    # non-daemon threads in a list in order to join on them at server close.
+    # Enabling daemon threads virtually makes ``_ThreadingSimpleServer`` the
+    # same as Python 3.7's ``ThreadingHTTPServer``.
+    daemon_threads = True
+
+
+def start_http_server(port, addr="", registry=REGISTRY):
+    """Starts an HTTP server for prometheus metrics as a daemon thread"""
+    CustomMetricsHandler = MetricsHandler.factory(registry)
+    httpd = _ThreadingSimpleServer((addr, port), CustomMetricsHandler)
+    t = threading.Thread(target=httpd.serve_forever)
+    t.daemon = True
+    t.start()
+
+
+class MetricsResource(Resource):
+    """
+    Twisted ``Resource`` that serves prometheus metrics.
+    """
+
+    isLeaf = True
+
+    def __init__(self, registry=REGISTRY):
+        self.registry = registry
+
+    def render_GET(self, request):
+        request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii"))
+        return generate_latest(self.registry)

+ 0 - 20
synapse/metrics/resource.py

@@ -1,20 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2015, 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from prometheus_client.twisted import MetricsResource
-
-METRICS_PREFIX = "/_synapse/metrics"
-
-__all__ = ["MetricsResource", "METRICS_PREFIX"]

+ 1 - 3
synapse/python_dependencies.py

@@ -65,9 +65,7 @@ REQUIREMENTS = [
     "msgpack>=0.5.2",
     "phonenumbers>=8.2.0",
     "six>=1.10",
-    # prometheus_client 0.4.0 changed the format of counter metrics
-    # (cf https://github.com/matrix-org/synapse/issues/4001)
-    "prometheus_client>=0.0.18,<0.4.0",
+    "prometheus_client>=0.0.18,<0.8.0",
     # we use attr.s(slots), which arrived in 16.0.0
     # Twisted 18.7.0 requires attrs>=17.4.0
     "attrs>=17.4.0",

+ 1 - 3
tests/storage/test_event_metrics.py

@@ -13,9 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from prometheus_client.exposition import generate_latest
-
-from synapse.metrics import REGISTRY
+from synapse.metrics import REGISTRY, generate_latest
 from synapse.types import Requester, UserID
 
 from tests.unittest import HomeserverTestCase