Browse Source

update metrics to be in seconds

Amber Brown 6 years ago
parent
commit
a2eb5db4a0
4 changed files with 38 additions and 37 deletions
  1. 3 3
      synapse/http/request_metrics.py
  2. 10 10
      synapse/http/site.py
  3. 10 9
      synapse/metrics/__init__.py
  4. 15 15
      synapse/storage/_base.py

+ 3 - 3
synapse/http/request_metrics.py

@@ -38,15 +38,15 @@ outgoing_responses_counter = Counter(
 )
 
 response_timer = Histogram(
-    "synapse_http_server_response_time_seconds", "", ["method", "servlet", "tag"]
+    "synapse_http_server_response_time_seconds", "sec", ["method", "servlet", "tag"]
 )
 
 response_ru_utime = Counter(
-    "synapse_http_server_response_ru_utime_seconds", "", ["method", "servlet", "tag"]
+    "synapse_http_server_response_ru_utime_seconds", "sec", ["method", "servlet", "tag"]
 )
 
 response_ru_stime = Counter(
-    "synapse_http_server_response_ru_stime_seconds", "", ["method", "servlet", "tag"]
+    "synapse_http_server_response_ru_stime_seconds", "sec", ["method", "servlet", "tag"]
 )
 
 response_db_txn_count = Counter(

+ 10 - 10
synapse/http/site.py

@@ -83,7 +83,7 @@ class SynapseRequest(Request):
         return Request.render(self, resrc)
 
     def _started_processing(self, servlet_name):
-        self.start_time = int(time.time() * 1000)
+        self.start_time = time.time()
         self.request_metrics = RequestMetrics()
         self.request_metrics.start(
             self.start_time, name=servlet_name, method=self.method,
@@ -102,26 +102,26 @@ class SynapseRequest(Request):
             context = LoggingContext.current_context()
             ru_utime, ru_stime = context.get_resource_usage()
             db_txn_count = context.db_txn_count
-            db_txn_duration_ms = context.db_txn_duration_ms
-            db_sched_duration_ms = context.db_sched_duration_ms
+            db_txn_duration_sec = context.db_txn_duration_sec
+            db_sched_duration_sec = context.db_sched_duration_sec
         except Exception:
             ru_utime, ru_stime = (0, 0)
-            db_txn_count, db_txn_duration_ms = (0, 0)
+            db_txn_count, db_txn_duration_sec = (0, 0)
 
-        end_time = int(time.time() * 1000)
+        end_time = time.time()
 
         self.site.access_logger.info(
             "%s - %s - {%s}"
-            " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)"
+            " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
             " %sB %s \"%s %s %s\" \"%s\"",
             self.getClientIP(),
             self.site.site_tag,
             self.authenticated_entity,
             end_time - self.start_time,
-            int(ru_utime * 1000),
-            int(ru_stime * 1000),
-            db_sched_duration_ms,
-            db_txn_duration_ms,
+            ru_utime,
+            ru_stime,
+            db_sched_duration_sec,
+            db_txn_duration_sec,
             int(db_txn_count),
             self.sentLength,
             self.code,

+ 10 - 9
synapse/metrics/__init__.py

@@ -122,10 +122,10 @@ REGISTRY.register(CPUMetrics())
 gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"])
 gc_time = Histogram(
     "python_gc_time",
-    "Time taken to GC (ms)",
+    "Time taken to GC (sec)",
     ["gen"],
-    buckets=[2.5, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 7500, 15000,
-             30000, 45000, 60000],
+    buckets=[0.0025, 0.005, 0.01, 0.025, 0.05, 0.10, 0.25, 0.50, 1.00, 2.50,
+             5.00, 7.50, 15.00, 30.00, 45.00, 60.00],
 )
 
 
@@ -147,8 +147,9 @@ REGISTRY.register(GCCounts())
 
 tick_time = Histogram(
     "python_twisted_reactor_tick_time",
-    "Tick time of the Twisted reactor (ms)",
-    buckets=[1, 2, 5, 10, 50, 100, 250, 500, 1000, 2000],
+    "Tick time of the Twisted reactor (sec)",
+    buckets=[0.001, 0.002, 0.005, 0.001, 0.005, 0.01. 0.025, 0.05, 0.1, 0.2,
+             0.5, 1, 2, 5],
 )
 pending_calls_metric = Histogram(
     "python_twisted_reactor_pending_calls",
@@ -202,9 +203,9 @@ def runUntilCurrentTimer(func):
             num_pending += 1
 
         num_pending += len(reactor.threadCallQueue)
-        start = time.time() * 1000
+        start = time.time()
         ret = func(*args, **kwargs)
-        end = time.time() * 1000
+        end = time.time()
 
         # record the amount of wallclock time spent running pending calls.
         # This is a proxy for the actual amount of time between reactor polls,
@@ -225,9 +226,9 @@ def runUntilCurrentTimer(func):
             if threshold[i] < counts[i]:
                 logger.info("Collecting gc %d", i)
 
-                start = time.time() * 1000
+                start = time.time()
                 unreachable = gc.collect(i)
-                end = time.time() * 1000
+                end = time.time()
 
                 gc_time.labels(i).observe(end - start)
                 gc_unreachable.labels(i).set(unreachable)

+ 15 - 15
synapse/storage/_base.py

@@ -42,10 +42,10 @@ sql_logger = logging.getLogger("synapse.storage.SQL")
 transaction_logger = logging.getLogger("synapse.storage.txn")
 perf_logger = logging.getLogger("synapse.storage.TIME")
 
-sql_scheduling_timer = Histogram("synapse_storage_schedule_time", "")
+sql_scheduling_timer = Histogram("synapse_storage_schedule_time", "sec")
 
-sql_query_timer = Histogram("synapse_storage_query_time", "", ["verb"])
-sql_txn_timer = Histogram("synapse_storage_transaction_time", "", ["desc"])
+sql_query_timer = Histogram("synapse_storage_query_time", "sec", ["verb"])
+sql_txn_timer = Histogram("synapse_storage_transaction_time", "sec", ["desc"])
 
 
 class LoggingTransaction(object):
@@ -110,7 +110,7 @@ class LoggingTransaction(object):
                 # Don't let logging failures stop SQL from working
                 pass
 
-        start = time.time() * 1000
+        start = time.time()
 
         try:
             return func(
@@ -120,9 +120,9 @@ class LoggingTransaction(object):
             logger.debug("[SQL FAIL] {%s} %s", self.name, e)
             raise
         finally:
-            msecs = (time.time() * 1000) - start
-            sql_logger.debug("[SQL time] {%s} %f", self.name, msecs)
-            sql_query_timer.labels(sql.split()[0]).observe(msecs)
+            secs = time.time() - start
+            sql_logger.debug("[SQL time] {%s} %f sec", self.name, secs)
+            sql_query_timer.labels(sql.split()[0]).observe(secs)
 
 
 class PerformanceCounters(object):
@@ -132,7 +132,7 @@ class PerformanceCounters(object):
 
     def update(self, key, start_time, end_time=None):
         if end_time is None:
-            end_time = time.time() * 1000
+            end_time = time.time()
         duration = end_time - start_time
         count, cum_time = self.current_counters.get(key, (0, 0))
         count += 1
@@ -222,7 +222,7 @@ class SQLBaseStore(object):
 
     def _new_transaction(self, conn, desc, after_callbacks, exception_callbacks,
                          logging_context, func, *args, **kwargs):
-        start = time.time() * 1000
+        start = time.time()
         txn_id = self._TXN_ID
 
         # We don't really need these to be unique, so lets stop it from
@@ -282,13 +282,13 @@ class SQLBaseStore(object):
             logger.debug("[TXN FAIL] {%s} %s", name, e)
             raise
         finally:
-            end = time.time() * 1000
+            end = time.time()
             duration = end - start
 
             if logging_context is not None:
                 logging_context.add_database_transaction(duration)
 
-            transaction_logger.debug("[TXN END] {%s} %f", name, duration)
+            transaction_logger.debug("[TXN END] {%s} %f sec", name, duration)
 
             self._current_txn_total_time += duration
             self._txn_perf_counters.update(desc, start, end)
@@ -349,13 +349,13 @@ class SQLBaseStore(object):
         """
         current_context = LoggingContext.current_context()
 
-        start_time = time.time() * 1000
+        start_time = time.time()
 
         def inner_func(conn, *args, **kwargs):
             with LoggingContext("runWithConnection") as context:
-                sched_duration_ms = time.time() * 1000 - start_time
-                sql_scheduling_timer.observe(sched_duration_ms)
-                current_context.add_database_scheduled(sched_duration_ms)
+                sched_duration_sec = time.time() - start_time
+                sql_scheduling_timer.observe(sched_duration_sec)
+                current_context.add_database_scheduled(sched_duration_sec)
 
                 if self.database_engine.is_connection_closed(conn):
                     logger.debug("Reconnecting closed database connection")