Browse Source

Create healthcheck script for synapse-workers container (#11429)

The intent is to iterate through all the worker ports and only
report healthy when all are healthy, starting with the main process.
Michael Kaye 2 years ago
parent
commit
e2c300e7e4

+ 1 - 0
changelog.d/11429.docker

@@ -0,0 +1 @@
+Update `Dockerfile-workers` to healthcheck all workers in container.

+ 3 - 0
docker/Dockerfile-workers

@@ -21,3 +21,6 @@ VOLUME ["/data"]
 # files to run the desired worker configuration. Will start supervisord.
 COPY ./docker/configure_workers_and_start.py /configure_workers_and_start.py
 ENTRYPOINT ["/configure_workers_and_start.py"]
+
+HEALTHCHECK --start-period=5s --interval=15s --timeout=5s \
+    CMD /bin/sh /healthcheck.sh

+ 6 - 0
docker/conf-workers/healthcheck.sh.j2

@@ -0,0 +1,6 @@
+#!/bin/sh
+# This healthcheck script is designed to return OK when every 
+# host involved returns OK
+{%- for healthcheck_url in healthcheck_urls %}
+curl -fSs {{ healthcheck_url }} || exit 1
+{%- endfor %}

+ 13 - 0
docker/configure_workers_and_start.py

@@ -474,10 +474,16 @@ def generate_worker_files(environ, config_path: str, data_dir: str):
 
     # Determine the load-balancing upstreams to configure
     nginx_upstream_config = ""
+
+    # At the same time, prepare a list of internal endpoints to healthcheck
+    # starting with the main process which exists even if no workers do.
+    healthcheck_urls = ["http://localhost:8080/health"]
+
     for upstream_worker_type, upstream_worker_ports in nginx_upstreams.items():
         body = ""
         for port in upstream_worker_ports:
             body += "    server localhost:%d;\n" % (port,)
+            healthcheck_urls.append("http://localhost:%d/health" % (port,))
 
         # Add to the list of configured upstreams
         nginx_upstream_config += NGINX_UPSTREAM_CONFIG_BLOCK.format(
@@ -510,6 +516,13 @@ def generate_worker_files(environ, config_path: str, data_dir: str):
         worker_config=supervisord_config,
     )
 
+    # healthcheck config
+    convert(
+        "/conf/healthcheck.sh.j2",
+        "/healthcheck.sh",
+        healthcheck_urls=healthcheck_urls,
+    )
+
     # Ensure the logging directory exists
     log_dir = data_dir + "/logs"
     if not os.path.exists(log_dir):