3 years ago · 1fb9a2d0bf
--- a/changelog.d/9902.feature
+++ b/changelog.d/9902.feature
@@ -0,0 +1 @@
 
				+Add limits to how often Synapse will GC, ensuring that large servers do not end up GC thrashing if `gc_thresholds` has not been correctly set.
			
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -152,6 +152,16 @@ presence:
 
				 #
			
 
				 #gc_thresholds: [700, 10, 10]
			
 
				 
			
 
				+# The minimum time in seconds between each GC for a generation, regardless of
			
 
				+# the GC thresholds. This ensures that we don't do GC too frequently.
			
 
				+#
			
 
				+# A value of `[1s, 10s, 30s]` indicates that a second must pass between consecutive
			
 
				+# generation 0 GCs, etc.
			
 
				+#
			
 
				+# Defaults to `[1s, 10s, 30s]`.
			
 
				+#
			
 
				+#gc_min_interval: [0.5s, 30s, 1m]
			
 
				+
			
 
				 # Set the limit on the returned events in the timeline in the get
			
 
				 # and sync operations. The default value is 100. -1 means no upper limit.
			
 
				 #
			
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -455,6 +455,9 @@ def start(config_options):
 
				 
			
 
				     synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts
			
 
				 
			
 
				+    if config.server.gc_seconds:
			
 
				+        synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds
			
 
				+
			
 
				     hs = GenericWorkerServer(
			
 
				         config.server_name,
			
 
				         config=config,
			
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -342,6 +342,9 @@ def setup(config_options):
 
				 
			
 
				     events.USE_FROZEN_DICTS = config.use_frozen_dicts
			
 
				 
			
 
				+    if config.server.gc_seconds:
			
 
				+        synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds
			
 
				+
			
 
				     hs = SynapseHomeServer(
			
 
				         config.server_name,
			
 
				         config=config,
			
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -19,7 +19,7 @@ import logging
 
				 import os.path
			
 
				 import re
			
 
				 from textwrap import indent
			
 
				-from typing import Any, Dict, Iterable, List, Optional, Set
			
 
				+from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
			
 
				 
			
 
				 import attr
			
 
				 import yaml
			
@@ -572,6 +572,7 @@ class ServerConfig(Config):
 
				             _warn_if_webclient_configured(self.listeners)
			
 
				 
			
 
				         self.gc_thresholds = read_gc_thresholds(config.get("gc_thresholds", None))
			
 
				+        self.gc_seconds = self.read_gc_intervals(config.get("gc_min_interval", None))
			
 
				 
			
 
				         @attr.s
			
 
				         class LimitRemoteRoomsConfig:
			
@@ -917,6 +918,16 @@ class ServerConfig(Config):
 
				         #
			
 
				         #gc_thresholds: [700, 10, 10]
			
 
				 
			
 
				+        # The minimum time in seconds between each GC for a generation, regardless of
			
 
				+        # the GC thresholds. This ensures that we don't do GC too frequently.
			
 
				+        #
			
 
				+        # A value of `[1s, 10s, 30s]` indicates that a second must pass between consecutive
			
 
				+        # generation 0 GCs, etc.
			
 
				+        #
			
 
				+        # Defaults to `[1s, 10s, 30s]`.
			
 
				+        #
			
 
				+        #gc_min_interval: [0.5s, 30s, 1m]
			
 
				+
			
 
				         # Set the limit on the returned events in the timeline in the get
			
 
				         # and sync operations. The default value is 100. -1 means no upper limit.
			
 
				         #
			
@@ -1305,6 +1316,24 @@ class ServerConfig(Config):
 
				             help="Turn on the twisted telnet manhole service on the given port.",
			
 
				         )
			
 
				 
			
 
				+    def read_gc_intervals(self, durations) -> Optional[Tuple[float, float, float]]:
			
 
				+        """Reads the three durations for the GC min interval option, returning seconds."""
			
 
				+        if durations is None:
			
 
				+            return None
			
 
				+
			
 
				+        try:
			
 
				+            if len(durations) != 3:
			
 
				+                raise ValueError()
			
 
				+            return (
			
 
				+                self.parse_duration(durations[0]) / 1000,
			
 
				+                self.parse_duration(durations[1]) / 1000,
			
 
				+                self.parse_duration(durations[2]) / 1000,
			
 
				+            )
			
 
				+        except Exception:
			
 
				+            raise ConfigError(
			
 
				+                "Value of `gc_min_interval` must be a list of three durations if set"
			
 
				+            )
			
 
				+
			
 
				 
			
 
				 def is_threepid_reserved(reserved_threepids, threepid):
			
 
				     """Check the threepid against the reserved threepid config
			
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -535,6 +535,13 @@ class ReactorLastSeenMetric:
 
				 
			
 
				 REGISTRY.register(ReactorLastSeenMetric())
			
 
				 
			
 
				+# The minimum time in seconds between GCs for each generation, regardless of the current GC
			
 
				+# thresholds and counts.
			
 
				+MIN_TIME_BETWEEN_GCS = (1.0, 10.0, 30.0)
			
 
				+
			
 
				+# The time (in seconds since the epoch) of the last time we did a GC for each generation.
			
 
				+_last_gc = [0.0, 0.0, 0.0]
			
 
				+
			
 
				 
			
 
				 def runUntilCurrentTimer(reactor, func):
			
 
				     @functools.wraps(func)
			
@@ -575,11 +582,16 @@ def runUntilCurrentTimer(reactor, func):
 
				             return ret
			
 
				 
			
 
				         # Check if we need to do a manual GC (since its been disabled), and do
			
 
				-        # one if necessary.
			
 
				+        # one if necessary. Note we go in reverse order as e.g. a gen 1 GC may
			
 
				+        # promote an object into gen 2, and we don't want to handle the same
			
 
				+        # object multiple times.
			
 
				         threshold = gc.get_threshold()
			
 
				         counts = gc.get_count()
			
 
				         for i in (2, 1, 0):
			
 
				-            if threshold[i] < counts[i]:
			
 
				+            # We check if we need to do one based on a straightforward
			
 
				+            # comparison between the threshold and count. We also do an extra
			
 
				+            # check to make sure that we don't a GC too often.
			
 
				+            if threshold[i] < counts[i] and MIN_TIME_BETWEEN_GCS[i] < end - _last_gc[i]:
			
 
				                 if i == 0:
			
 
				                     logger.debug("Collecting gc %d", i)
			
 
				                 else:
			
@@ -589,6 +601,8 @@ def runUntilCurrentTimer(reactor, func):
 
				                 unreachable = gc.collect(i)
			
 
				                 end = time.time()
			
 
				 
			
 
				+                _last_gc[i] = end
			
 
				+
			
 
				                 gc_time.labels(i).observe(end - start)
			
 
				                 gc_unreachable.labels(i).set(unreachable)
		`@@ -0,0 +1 @@`
		+Add limits to how often Synapse will GC, ensuring that large servers do not end up GC thrashing if `gc_thresholds` has not been correctly set.