|
@@ -72,7 +72,7 @@ from synapse.util.metrics import Measure
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
-# These values are used in the `enqueus_event` and `_do_fetch` methods to
|
|
|
+# These values are used in the `enqueue_event` and `_fetch_loop` methods to
|
|
|
# control how we batch/bulk fetch events from the database.
|
|
|
# The values are plucked out of thing air to make initial sync run faster
|
|
|
# on jki.re
|
|
@@ -602,7 +602,7 @@ class EventsWorkerStore(SQLBaseStore):
|
|
|
# already due to `_get_events_from_db`).
|
|
|
fetching_deferred: ObservableDeferred[
|
|
|
Dict[str, _EventCacheEntry]
|
|
|
- ] = ObservableDeferred(defer.Deferred())
|
|
|
+ ] = ObservableDeferred(defer.Deferred(), consumeErrors=True)
|
|
|
for event_id in missing_events_ids:
|
|
|
self._current_event_fetches[event_id] = fetching_deferred
|
|
|
|
|
@@ -736,35 +736,118 @@ class EventsWorkerStore(SQLBaseStore):
|
|
|
for e in state_to_include.values()
|
|
|
]
|
|
|
|
|
|
- def _do_fetch(self, conn: Connection) -> None:
|
|
|
+ def _maybe_start_fetch_thread(self) -> None:
|
|
|
+ """Starts an event fetch thread if we are not yet at the maximum number."""
|
|
|
+ with self._event_fetch_lock:
|
|
|
+ if (
|
|
|
+ self._event_fetch_list
|
|
|
+ and self._event_fetch_ongoing < EVENT_QUEUE_THREADS
|
|
|
+ ):
|
|
|
+ self._event_fetch_ongoing += 1
|
|
|
+ event_fetch_ongoing_gauge.set(self._event_fetch_ongoing)
|
|
|
+ # `_event_fetch_ongoing` is decremented in `_fetch_thread`.
|
|
|
+ should_start = True
|
|
|
+ else:
|
|
|
+ should_start = False
|
|
|
+
|
|
|
+ if should_start:
|
|
|
+ run_as_background_process("fetch_events", self._fetch_thread)
|
|
|
+
|
|
|
+ async def _fetch_thread(self) -> None:
|
|
|
+ """Services requests for events from `_event_fetch_list`."""
|
|
|
+ exc = None
|
|
|
+ try:
|
|
|
+ await self.db_pool.runWithConnection(self._fetch_loop)
|
|
|
+ except BaseException as e:
|
|
|
+ exc = e
|
|
|
+ raise
|
|
|
+ finally:
|
|
|
+ should_restart = False
|
|
|
+ event_fetches_to_fail = []
|
|
|
+ with self._event_fetch_lock:
|
|
|
+ self._event_fetch_ongoing -= 1
|
|
|
+ event_fetch_ongoing_gauge.set(self._event_fetch_ongoing)
|
|
|
+
|
|
|
+ # There may still be work remaining in `_event_fetch_list` if we
|
|
|
+ # failed, or it was added in between us deciding to exit and
|
|
|
+ # decrementing `_event_fetch_ongoing`.
|
|
|
+ if self._event_fetch_list:
|
|
|
+ if exc is None:
|
|
|
+ # We decided to exit, but then some more work was added
|
|
|
+ # before `_event_fetch_ongoing` was decremented.
|
|
|
+ # If a new event fetch thread was not started, we should
|
|
|
+ # restart ourselves since the remaining event fetch threads
|
|
|
+ # may take a while to get around to the new work.
|
|
|
+ #
|
|
|
+ # Unfortunately it is not possible to tell whether a new
|
|
|
+ # event fetch thread was started, so we restart
|
|
|
+ # unconditionally. If we are unlucky, we will end up with
|
|
|
+ # an idle fetch thread, but it will time out after
|
|
|
+ # `EVENT_QUEUE_ITERATIONS * EVENT_QUEUE_TIMEOUT_S` seconds
|
|
|
+ # in any case.
|
|
|
+ #
|
|
|
+ # Note that multiple fetch threads may run down this path at
|
|
|
+ # the same time.
|
|
|
+ should_restart = True
|
|
|
+ elif isinstance(exc, Exception):
|
|
|
+ if self._event_fetch_ongoing == 0:
|
|
|
+ # We were the last remaining fetcher and failed.
|
|
|
+ # Fail any outstanding fetches since no one else will
|
|
|
+ # handle them.
|
|
|
+ event_fetches_to_fail = self._event_fetch_list
|
|
|
+ self._event_fetch_list = []
|
|
|
+ else:
|
|
|
+ # We weren't the last remaining fetcher, so another
|
|
|
+ # fetcher will pick up the work. This will either happen
|
|
|
+ # after their existing work, however long that takes,
|
|
|
+ # or after at most `EVENT_QUEUE_TIMEOUT_S` seconds if
|
|
|
+ # they are idle.
|
|
|
+ pass
|
|
|
+ else:
|
|
|
+ # The exception is a `SystemExit`, `KeyboardInterrupt` or
|
|
|
+ # `GeneratorExit`. Don't try to do anything clever here.
|
|
|
+ pass
|
|
|
+
|
|
|
+ if should_restart:
|
|
|
+ # We exited cleanly but noticed more work.
|
|
|
+ self._maybe_start_fetch_thread()
|
|
|
+
|
|
|
+ if event_fetches_to_fail:
|
|
|
+ # We were the last remaining fetcher and failed.
|
|
|
+ # Fail any outstanding fetches since no one else will handle them.
|
|
|
+ assert exc is not None
|
|
|
+ with PreserveLoggingContext():
|
|
|
+ for _, deferred in event_fetches_to_fail:
|
|
|
+ deferred.errback(exc)
|
|
|
+
|
|
|
+ def _fetch_loop(self, conn: Connection) -> None:
|
|
|
"""Takes a database connection and waits for requests for events from
|
|
|
the _event_fetch_list queue.
|
|
|
"""
|
|
|
- try:
|
|
|
- i = 0
|
|
|
- while True:
|
|
|
- with self._event_fetch_lock:
|
|
|
- event_list = self._event_fetch_list
|
|
|
- self._event_fetch_list = []
|
|
|
-
|
|
|
- if not event_list:
|
|
|
- single_threaded = self.database_engine.single_threaded
|
|
|
- if (
|
|
|
- not self.USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING
|
|
|
- or single_threaded
|
|
|
- or i > EVENT_QUEUE_ITERATIONS
|
|
|
- ):
|
|
|
- break
|
|
|
- else:
|
|
|
- self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S)
|
|
|
- i += 1
|
|
|
- continue
|
|
|
- i = 0
|
|
|
+ i = 0
|
|
|
+ while True:
|
|
|
+ with self._event_fetch_lock:
|
|
|
+ event_list = self._event_fetch_list
|
|
|
+ self._event_fetch_list = []
|
|
|
+
|
|
|
+ if not event_list:
|
|
|
+ # There are no requests waiting. If we haven't yet reached the
|
|
|
+ # maximum iteration limit, wait for some more requests to turn up.
|
|
|
+ # Otherwise, bail out.
|
|
|
+ single_threaded = self.database_engine.single_threaded
|
|
|
+ if (
|
|
|
+ not self.USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING
|
|
|
+ or single_threaded
|
|
|
+ or i > EVENT_QUEUE_ITERATIONS
|
|
|
+ ):
|
|
|
+ return
|
|
|
+
|
|
|
+ self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S)
|
|
|
+ i += 1
|
|
|
+ continue
|
|
|
+ i = 0
|
|
|
|
|
|
- self._fetch_event_list(conn, event_list)
|
|
|
- finally:
|
|
|
- self._event_fetch_ongoing -= 1
|
|
|
- event_fetch_ongoing_gauge.set(self._event_fetch_ongoing)
|
|
|
+ self._fetch_event_list(conn, event_list)
|
|
|
|
|
|
def _fetch_event_list(
|
|
|
self, conn: Connection, event_list: List[Tuple[List[str], defer.Deferred]]
|
|
@@ -806,9 +889,7 @@ class EventsWorkerStore(SQLBaseStore):
|
|
|
# We only want to resolve deferreds from the main thread
|
|
|
def fire(evs, exc):
|
|
|
for _, d in evs:
|
|
|
- if not d.called:
|
|
|
- with PreserveLoggingContext():
|
|
|
- d.errback(exc)
|
|
|
+ d.errback(exc)
|
|
|
|
|
|
with PreserveLoggingContext():
|
|
|
self.hs.get_reactor().callFromThread(fire, event_list, e)
|
|
@@ -983,20 +1064,9 @@ class EventsWorkerStore(SQLBaseStore):
|
|
|
events_d = defer.Deferred()
|
|
|
with self._event_fetch_lock:
|
|
|
self._event_fetch_list.append((events, events_d))
|
|
|
-
|
|
|
self._event_fetch_lock.notify()
|
|
|
|
|
|
- if self._event_fetch_ongoing < EVENT_QUEUE_THREADS:
|
|
|
- self._event_fetch_ongoing += 1
|
|
|
- event_fetch_ongoing_gauge.set(self._event_fetch_ongoing)
|
|
|
- should_start = True
|
|
|
- else:
|
|
|
- should_start = False
|
|
|
-
|
|
|
- if should_start:
|
|
|
- run_as_background_process(
|
|
|
- "fetch_events", self.db_pool.runWithConnection, self._do_fetch
|
|
|
- )
|
|
|
+ self._maybe_start_fetch_thread()
|
|
|
|
|
|
logger.debug("Loading %d events: %s", len(events), events)
|
|
|
with PreserveLoggingContext():
|