Browse Source

Batch look-ups to see if rooms are partial stated. (#14917)

* Batch look-ups to see if rooms are partial stated.

* Fix issues found in linting.

* Fix typo.

* Apply suggestions from code review

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Clarify comments.

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Also improve the cache size while we're at it

* is_partial_state_rooms -> is_partial_state_room_batched

* Run `black`

* Improve annotation for `simple_select_many_batch`

* Fix is_partial_state_room_batched impl

* Okay, _actually_ fix impl

* Update description.

* Update synapse/storage/databases/main/room.py

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Run black.

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
Co-authored-by: David Robertson <davidr@element.io>
Patrick Cloke 1 year ago
parent
commit
8a05d5de21

+ 1 - 0
changelog.d/14917.misc

@@ -0,0 +1 @@
+Faster joins: Improve performance of looking up partial-state status of rooms.

+ 17 - 7
synapse/handlers/sync.py

@@ -1383,16 +1383,21 @@ class SyncHandler:
         if not sync_config.filter_collection.lazy_load_members():
             # Non-lazy syncs should never include partially stated rooms.
             # Exclude all partially stated rooms from this sync.
-            for room_id in mutable_joined_room_ids:
-                if await self.store.is_partial_state_room(room_id):
-                    mutable_rooms_to_exclude.add(room_id)
+            results = await self.store.is_partial_state_room_batched(
+                mutable_joined_room_ids
+            )
+            mutable_rooms_to_exclude.update(
+                room_id
+                for room_id, is_partial_state in results.items()
+                if is_partial_state
+            )
 
         # Incremental eager syncs should additionally include rooms that
         # - we are joined to
         # - are full-stated
         # - became fully-stated at some point during the sync period
         #   (These rooms will have been omitted during a previous eager sync.)
-        forced_newly_joined_room_ids = set()
+        forced_newly_joined_room_ids: Set[str] = set()
         if since_token and not sync_config.filter_collection.lazy_load_members():
             un_partial_stated_rooms = (
                 await self.store.get_un_partial_stated_rooms_between(
@@ -1401,9 +1406,14 @@ class SyncHandler:
                     mutable_joined_room_ids,
                 )
             )
-            for room_id in un_partial_stated_rooms:
-                if not await self.store.is_partial_state_room(room_id):
-                    forced_newly_joined_room_ids.add(room_id)
+            results = await self.store.is_partial_state_room_batched(
+                un_partial_stated_rooms
+            )
+            forced_newly_joined_room_ids.update(
+                room_id
+                for room_id, is_partial_state in results.items()
+                if not is_partial_state
+            )
 
         # Now we have our list of joined room IDs, exclude as configured and freeze
         joined_room_ids = frozenset(

+ 1 - 1
synapse/storage/database.py

@@ -1819,7 +1819,7 @@ class DatabasePool:
         keyvalues: Optional[Dict[str, Any]] = None,
         desc: str = "simple_select_many_batch",
         batch_size: int = 100,
-    ) -> List[Any]:
+    ) -> List[Dict[str, Any]]:
         """Executes a SELECT query on the named table, which may return zero or
         more rows, returning the result as a list of dicts.
 

+ 24 - 3
synapse/storage/databases/main/room.py

@@ -60,9 +60,9 @@ from synapse.storage.util.id_generators import (
     MultiWriterIdGenerator,
     StreamIdGenerator,
 )
-from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID
+from synapse.types import JsonDict, RetentionPolicy, StrCollection, ThirdPartyInstanceID
 from synapse.util import json_encoder
-from synapse.util.caches.descriptors import cached
+from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.stringutils import MXC_REGEX
 
 if TYPE_CHECKING:
@@ -1255,7 +1255,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
         return room_servers
 
-    @cached()
+    @cached(max_entries=10000)
     async def is_partial_state_room(self, room_id: str) -> bool:
         """Checks if this room has partial state.
 
@@ -1274,6 +1274,27 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
         return entry is not None
 
+    @cachedList(cached_method_name="is_partial_state_room", list_name="room_ids")
+    async def is_partial_state_room_batched(
+        self, room_ids: StrCollection
+    ) -> Mapping[str, bool]:
+        """Checks if the given rooms have partial state.
+
+        Returns true for "partial-state" rooms, which means that the state
+        at events in the room, and `current_state_events`, may not yet be
+        complete.
+        """
+
+        rows: List[Dict[str, str]] = await self.db_pool.simple_select_many_batch(
+            table="partial_state_rooms",
+            column="room_id",
+            iterable=room_ids,
+            retcols=("room_id",),
+            desc="is_partial_state_room_batched",
+        )
+        partial_state_rooms = {row_dict["room_id"] for row_dict in rows}
+        return {room_id: room_id in partial_state_rooms for room_id in room_ids}
+
     async def get_join_event_id_and_device_lists_stream_id_for_partial_state(
         self, room_id: str
     ) -> Tuple[str, int]: