workers.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. # Copyright 2016 OpenMarket Ltd
  2. # Copyright 2021 The Matrix.org Foundation C.I.C.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import argparse
  16. import logging
  17. from typing import Any, Dict, List, Union
  18. import attr
  19. from pydantic import BaseModel, Extra, StrictBool, StrictInt, StrictStr
  20. from synapse.config._base import (
  21. Config,
  22. ConfigError,
  23. RoutableShardedWorkerHandlingConfig,
  24. ShardedWorkerHandlingConfig,
  25. )
  26. from synapse.config._util import parse_and_validate_mapping
  27. from synapse.config.server import (
  28. DIRECT_TCP_ERROR,
  29. TCPListenerConfig,
  30. parse_listener_def,
  31. )
  32. from synapse.types import JsonDict
  33. _DEPRECATED_WORKER_DUTY_OPTION_USED = """
  34. The '%s' configuration option is deprecated and will be removed in a future
  35. Synapse version. Please use ``%s: name_of_worker`` instead.
  36. """
  37. _MISSING_MAIN_PROCESS_INSTANCE_MAP_DATA = """
  38. Missing data for a worker to connect to main process. Please include '%s' in the
  39. `instance_map` declared in your shared yaml configuration, or optionally(as a deprecated
  40. solution) in every worker's yaml as various `worker_replication_*` settings as defined
  41. in workers documentation here:
  42. `https://matrix-org.github.io/synapse/latest/workers.html#worker-configuration`
  43. """
  44. # This allows for a handy knob when it's time to change from 'master' to
  45. # something with less 'history'
  46. MAIN_PROCESS_INSTANCE_NAME = "master"
  47. # Use this to adjust what the main process is known as in the yaml instance_map
  48. MAIN_PROCESS_INSTANCE_MAP_NAME = "main"
  49. logger = logging.getLogger(__name__)
  50. def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]:
  51. """Helper for allowing parsing a string or list of strings to a config
  52. option expecting a list of strings.
  53. """
  54. if isinstance(obj, str):
  55. return [obj]
  56. return obj
  57. class ConfigModel(BaseModel):
  58. """A custom version of Pydantic's BaseModel which
  59. - ignores unknown fields and
  60. - does not allow fields to be overwritten after construction,
  61. but otherwise uses Pydantic's default behaviour.
  62. For now, ignore unknown fields. In the future, we could change this so that unknown
  63. config values cause a ValidationError, provided the error messages are meaningful to
  64. server operators.
  65. Subclassing in this way is recommended by
  66. https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally
  67. """
  68. class Config:
  69. # By default, ignore fields that we don't recognise.
  70. extra = Extra.ignore
  71. # By default, don't allow fields to be reassigned after parsing.
  72. allow_mutation = False
  73. class InstanceLocationConfig(ConfigModel):
  74. """The host and port to talk to an instance via HTTP replication."""
  75. host: StrictStr
  76. port: StrictInt
  77. tls: StrictBool = False
  78. def scheme(self) -> str:
  79. """Hardcode a retrievable scheme based on self.tls"""
  80. return "https" if self.tls else "http"
  81. def netloc(self) -> str:
  82. """Nicely format the network location data"""
  83. return f"{self.host}:{self.port}"
  84. @attr.s
  85. class WriterLocations:
  86. """Specifies the instances that write various streams.
  87. Attributes:
  88. events: The instances that write to the event and backfill streams.
  89. typing: The instances that write to the typing stream. Currently
  90. can only be a single instance.
  91. to_device: The instances that write to the to_device stream. Currently
  92. can only be a single instance.
  93. account_data: The instances that write to the account data streams. Currently
  94. can only be a single instance.
  95. receipts: The instances that write to the receipts stream. Currently
  96. can only be a single instance.
  97. presence: The instances that write to the presence stream. Currently
  98. can only be a single instance.
  99. """
  100. events: List[str] = attr.ib(
  101. default=["master"],
  102. converter=_instance_to_list_converter,
  103. )
  104. typing: List[str] = attr.ib(
  105. default=["master"],
  106. converter=_instance_to_list_converter,
  107. )
  108. to_device: List[str] = attr.ib(
  109. default=["master"],
  110. converter=_instance_to_list_converter,
  111. )
  112. account_data: List[str] = attr.ib(
  113. default=["master"],
  114. converter=_instance_to_list_converter,
  115. )
  116. receipts: List[str] = attr.ib(
  117. default=["master"],
  118. converter=_instance_to_list_converter,
  119. )
  120. presence: List[str] = attr.ib(
  121. default=["master"],
  122. converter=_instance_to_list_converter,
  123. )
  124. class WorkerConfig(Config):
  125. """The workers are processes run separately to the main synapse process.
  126. They have their own pid_file and listener configuration. They use the
  127. replication_url to talk to the main synapse process."""
  128. section = "worker"
  129. def read_config(self, config: JsonDict, **kwargs: Any) -> None:
  130. self.worker_app = config.get("worker_app")
  131. # Canonicalise worker_app so that master always has None
  132. if self.worker_app == "synapse.app.homeserver":
  133. self.worker_app = None
  134. self.worker_listeners = [
  135. parse_listener_def(i, x)
  136. for i, x in enumerate(config.get("worker_listeners", []))
  137. ]
  138. self.worker_daemonize = bool(config.get("worker_daemonize"))
  139. self.worker_pid_file = config.get("worker_pid_file")
  140. worker_log_config = config.get("worker_log_config")
  141. if worker_log_config is not None and not isinstance(worker_log_config, str):
  142. raise ConfigError("worker_log_config must be a string")
  143. self.worker_log_config = worker_log_config
  144. # The port on the main synapse for TCP replication
  145. if "worker_replication_port" in config:
  146. raise ConfigError(DIRECT_TCP_ERROR, ("worker_replication_port",))
  147. # The shared secret used for authentication when connecting to the main synapse.
  148. self.worker_replication_secret = config.get("worker_replication_secret", None)
  149. self.worker_name = config.get("worker_name", self.worker_app)
  150. self.instance_name = self.worker_name or MAIN_PROCESS_INSTANCE_NAME
  151. # FIXME: Remove this check after a suitable amount of time.
  152. self.worker_main_http_uri = config.get("worker_main_http_uri", None)
  153. if self.worker_main_http_uri is not None:
  154. logger.warning(
  155. "The config option worker_main_http_uri is unused since Synapse 1.73. "
  156. "It can be safely removed from your configuration."
  157. )
  158. # This option is really only here to support `--manhole` command line
  159. # argument.
  160. manhole = config.get("worker_manhole")
  161. if manhole:
  162. self.worker_listeners.append(
  163. TCPListenerConfig(
  164. port=manhole,
  165. bind_addresses=["127.0.0.1"],
  166. type="manhole",
  167. )
  168. )
  169. federation_sender_instances = self._worker_names_performing_this_duty(
  170. config,
  171. "send_federation",
  172. "synapse.app.federation_sender",
  173. "federation_sender_instances",
  174. )
  175. self.send_federation = self.instance_name in federation_sender_instances
  176. self.federation_shard_config = ShardedWorkerHandlingConfig(
  177. federation_sender_instances
  178. )
  179. # A map from instance name to host/port of their HTTP replication endpoint.
  180. # Check if the main process is declared. Inject it into the map if it's not,
  181. # based first on if a 'main' block is declared then on 'worker_replication_*'
  182. # data. If both are available, default to instance_map. The main process
  183. # itself doesn't need this data as it would never have to talk to itself.
  184. instance_map: Dict[str, Any] = config.get("instance_map", {})
  185. if self.instance_name is not MAIN_PROCESS_INSTANCE_NAME:
  186. # The host used to connect to the main synapse
  187. main_host = config.get("worker_replication_host", None)
  188. # The port on the main synapse for HTTP replication endpoint
  189. main_port = config.get("worker_replication_http_port")
  190. # The tls mode on the main synapse for HTTP replication endpoint.
  191. # For backward compatibility this defaults to False.
  192. main_tls = config.get("worker_replication_http_tls", False)
  193. # For now, accept 'main' in the instance_map, but the replication system
  194. # expects 'master', force that into being until it's changed later.
  195. if MAIN_PROCESS_INSTANCE_MAP_NAME in instance_map:
  196. instance_map[MAIN_PROCESS_INSTANCE_NAME] = instance_map[
  197. MAIN_PROCESS_INSTANCE_MAP_NAME
  198. ]
  199. del instance_map[MAIN_PROCESS_INSTANCE_MAP_NAME]
  200. # This is the backwards compatibility bit that handles the
  201. # worker_replication_* bits using setdefault() to not overwrite anything.
  202. elif main_host is not None and main_port is not None:
  203. instance_map.setdefault(
  204. MAIN_PROCESS_INSTANCE_NAME,
  205. {
  206. "host": main_host,
  207. "port": main_port,
  208. "tls": main_tls,
  209. },
  210. )
  211. else:
  212. # If we've gotten here, it means that the main process is not on the
  213. # instance_map and that not enough worker_replication_* variables
  214. # were declared in the worker's yaml.
  215. raise ConfigError(
  216. _MISSING_MAIN_PROCESS_INSTANCE_MAP_DATA
  217. % MAIN_PROCESS_INSTANCE_MAP_NAME
  218. )
  219. self.instance_map: Dict[
  220. str, InstanceLocationConfig
  221. ] = parse_and_validate_mapping(instance_map, InstanceLocationConfig)
  222. # Map from type of streams to source, c.f. WriterLocations.
  223. writers = config.get("stream_writers") or {}
  224. self.writers = WriterLocations(**writers)
  225. # Check that the configured writers for events and typing also appears in
  226. # `instance_map`.
  227. for stream in (
  228. "events",
  229. "typing",
  230. "to_device",
  231. "account_data",
  232. "receipts",
  233. "presence",
  234. ):
  235. instances = _instance_to_list_converter(getattr(self.writers, stream))
  236. for instance in instances:
  237. if instance != "master" and instance not in self.instance_map:
  238. raise ConfigError(
  239. "Instance %r is configured to write %s but does not appear in `instance_map` config."
  240. % (instance, stream)
  241. )
  242. if len(self.writers.typing) != 1:
  243. raise ConfigError(
  244. "Must only specify one instance to handle `typing` messages."
  245. )
  246. if len(self.writers.to_device) != 1:
  247. raise ConfigError(
  248. "Must only specify one instance to handle `to_device` messages."
  249. )
  250. if len(self.writers.account_data) != 1:
  251. raise ConfigError(
  252. "Must only specify one instance to handle `account_data` messages."
  253. )
  254. if len(self.writers.receipts) != 1:
  255. raise ConfigError(
  256. "Must only specify one instance to handle `receipts` messages."
  257. )
  258. if len(self.writers.events) == 0:
  259. raise ConfigError("Must specify at least one instance to handle `events`.")
  260. if len(self.writers.presence) != 1:
  261. raise ConfigError(
  262. "Must only specify one instance to handle `presence` messages."
  263. )
  264. self.events_shard_config = RoutableShardedWorkerHandlingConfig(
  265. self.writers.events
  266. )
  267. # Handle sharded push
  268. pusher_instances = self._worker_names_performing_this_duty(
  269. config,
  270. "start_pushers",
  271. "synapse.app.pusher",
  272. "pusher_instances",
  273. )
  274. self.start_pushers = self.instance_name in pusher_instances
  275. self.pusher_shard_config = ShardedWorkerHandlingConfig(pusher_instances)
  276. # Whether this worker should run background tasks or not.
  277. #
  278. # As a note for developers, the background tasks guarded by this should
  279. # be able to run on only a single instance (meaning that they don't
  280. # depend on any in-memory state of a particular worker).
  281. #
  282. # No effort is made to ensure only a single instance of these tasks is
  283. # running.
  284. background_tasks_instance = config.get("run_background_tasks_on") or "master"
  285. self.run_background_tasks = (
  286. self.worker_name is None and background_tasks_instance == "master"
  287. ) or self.worker_name == background_tasks_instance
  288. self.should_notify_appservices = self._should_this_worker_perform_duty(
  289. config,
  290. legacy_master_option_name="notify_appservices",
  291. legacy_worker_app_name="synapse.app.appservice",
  292. new_option_name="notify_appservices_from_worker",
  293. )
  294. self.should_update_user_directory = self._should_this_worker_perform_duty(
  295. config,
  296. legacy_master_option_name="update_user_directory",
  297. legacy_worker_app_name="synapse.app.user_dir",
  298. new_option_name="update_user_directory_from_worker",
  299. )
  300. def _should_this_worker_perform_duty(
  301. self,
  302. config: Dict[str, Any],
  303. legacy_master_option_name: str,
  304. legacy_worker_app_name: str,
  305. new_option_name: str,
  306. ) -> bool:
  307. """
  308. Figures out whether this worker should perform a certain duty.
  309. This function is temporary and is only to deal with the complexity
  310. of allowing old, transitional and new configurations all at once.
  311. Contradictions between the legacy and new part of a transitional configuration
  312. will lead to a ConfigError.
  313. Parameters:
  314. config: The config dictionary
  315. legacy_master_option_name: The name of a legacy option, whose value is boolean,
  316. specifying whether it's the master that should handle a certain duty.
  317. e.g. "notify_appservices"
  318. legacy_worker_app_name: The name of a legacy Synapse worker application
  319. that would traditionally perform this duty.
  320. e.g. "synapse.app.appservice"
  321. new_option_name: The name of the new option, whose value is the name of a
  322. designated worker to perform the duty.
  323. e.g. "notify_appservices_from_worker"
  324. """
  325. # None means 'unspecified'; True means 'run here' and False means
  326. # 'don't run here'.
  327. new_option_should_run_here = None
  328. if new_option_name in config:
  329. designated_worker = config[new_option_name] or "master"
  330. new_option_should_run_here = (
  331. designated_worker == "master" and self.worker_name is None
  332. ) or designated_worker == self.worker_name
  333. legacy_option_should_run_here = None
  334. if legacy_master_option_name in config:
  335. run_on_master = bool(config[legacy_master_option_name])
  336. legacy_option_should_run_here = (
  337. self.worker_name is None and run_on_master
  338. ) or (self.worker_app == legacy_worker_app_name and not run_on_master)
  339. # Suggest using the new option instead.
  340. logger.warning(
  341. _DEPRECATED_WORKER_DUTY_OPTION_USED,
  342. legacy_master_option_name,
  343. new_option_name,
  344. )
  345. if self.worker_app == legacy_worker_app_name and config.get(
  346. legacy_master_option_name, True
  347. ):
  348. # As an extra bit of complication, we need to check that the
  349. # specialised worker is only used if the legacy config says the
  350. # master isn't performing the duties.
  351. raise ConfigError(
  352. f"Cannot use deprecated worker app type '{legacy_worker_app_name}' whilst deprecated option '{legacy_master_option_name}' is not set to false.\n"
  353. f"Consider setting `worker_app: synapse.app.generic_worker` and using the '{new_option_name}' option instead.\n"
  354. f"The '{new_option_name}' option replaces '{legacy_master_option_name}'."
  355. )
  356. if new_option_should_run_here is None and legacy_option_should_run_here is None:
  357. # Neither option specified; the fallback behaviour is to run on the main process
  358. return self.worker_name is None
  359. if (
  360. new_option_should_run_here is not None
  361. and legacy_option_should_run_here is not None
  362. ):
  363. # Both options specified; ensure they match!
  364. if new_option_should_run_here != legacy_option_should_run_here:
  365. update_worker_type = (
  366. " and set worker_app: synapse.app.generic_worker"
  367. if self.worker_app == legacy_worker_app_name
  368. else ""
  369. )
  370. # If the values conflict, we suggest the admin removes the legacy option
  371. # for simplicity.
  372. raise ConfigError(
  373. f"Conflicting configuration options: {legacy_master_option_name} (legacy), {new_option_name} (new).\n"
  374. f"Suggestion: remove {legacy_master_option_name}{update_worker_type}.\n"
  375. )
  376. # We've already validated that these aren't conflicting; now just see if
  377. # either is True.
  378. # (By this point, these are either the same value or only one is not None.)
  379. return bool(new_option_should_run_here or legacy_option_should_run_here)
  380. def _worker_names_performing_this_duty(
  381. self,
  382. config: Dict[str, Any],
  383. legacy_option_name: str,
  384. legacy_app_name: str,
  385. modern_instance_list_name: str,
  386. ) -> List[str]:
  387. """
  388. Retrieves the names of the workers handling a given duty, by either legacy
  389. option or instance list.
  390. There are two ways of configuring which instances handle a given duty, e.g.
  391. for configuring pushers:
  392. 1. The old way where "start_pushers" is set to false and running a
  393. `synapse.app.pusher'` worker app.
  394. 2. Specifying the workers sending federation in `pusher_instances`.
  395. Args:
  396. config: settings read from yaml.
  397. legacy_option_name: the old way of enabling options. e.g. 'start_pushers'
  398. legacy_app_name: The historical app name. e.g. 'synapse.app.pusher'
  399. modern_instance_list_name: the string name of the new instance_list. e.g.
  400. 'pusher_instances'
  401. Returns:
  402. A list of worker instance names handling the given duty.
  403. """
  404. legacy_option = config.get(legacy_option_name, True)
  405. worker_instances = config.get(modern_instance_list_name)
  406. if worker_instances is None:
  407. # Default to an empty list, which means "another, unknown, worker is
  408. # responsible for it".
  409. worker_instances = []
  410. # If no worker instances are set we check if the legacy option
  411. # is set, which means use the main process.
  412. if legacy_option:
  413. worker_instances = ["master"]
  414. if self.worker_app == legacy_app_name:
  415. if legacy_option:
  416. # If we're using `legacy_app_name`, and not using
  417. # `modern_instance_list_name`, then we should have
  418. # explicitly set `legacy_option_name` to false.
  419. raise ConfigError(
  420. f"The '{legacy_option_name}' config option must be disabled in "
  421. "the main synapse process before they can be run in a separate "
  422. "worker.\n"
  423. f"Please add `{legacy_option_name}: false` to the main config.\n",
  424. )
  425. worker_instances = [self.worker_name]
  426. return worker_instances
  427. def read_arguments(self, args: argparse.Namespace) -> None:
  428. # We support a bunch of command line arguments that override options in
  429. # the config. A lot of these options have a worker_* prefix when running
  430. # on workers so we also have to override them when command line options
  431. # are specified.
  432. if args.daemonize is not None:
  433. self.worker_daemonize = args.daemonize
  434. if args.manhole is not None:
  435. self.worker_manhole = args.worker_manhole