Browse Source

Fix 500 error on `/messages` when we accumulate more than 5 backward extremities (#11027)

Found while working on the Gitter backfill script and noticed
it only happened after we sent 7 batches, https://gitlab.com/gitterHQ/webapp/-/merge_requests/2229#note_665906390

When there are more than 5 backward extremities for a given depth,
backfill will throw an error because we sliced the extremity list
to 5 but then try to iterate over the full list. This causes
us to look for state that we never fetched and we get a `KeyError`.

Before when calling `/messages` when there are more than 5 backward extremities:
```
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/site-packages/synapse/http/server.py", line 258, in _async_render_wrapper
    callback_return = await self._async_render(request)
  File "/usr/local/lib/python3.8/site-packages/synapse/http/server.py", line 446, in _async_render
    callback_return = await raw_callback_return
  File "/usr/local/lib/python3.8/site-packages/synapse/rest/client/room.py", line 580, in on_GET
    msgs = await self.pagination_handler.get_messages(
  File "/usr/local/lib/python3.8/site-packages/synapse/handlers/pagination.py", line 396, in get_messages
    await self.hs.get_federation_handler().maybe_backfill(
  File "/usr/local/lib/python3.8/site-packages/synapse/handlers/federation.py", line 133, in maybe_backfill
    return await self._maybe_backfill_inner(room_id, current_depth, limit)
  File "/usr/local/lib/python3.8/site-packages/synapse/handlers/federation.py", line 386, in _maybe_backfill_inner
    likely_extremeties_domains = get_domains_from_state(states[e_id])
KeyError: '$zpFflMEBtZdgcMQWTakaVItTLMjLFdKcRWUPHbbSZJl'
```
Eric Eastwood 2 years ago
parent
commit
daf498e099

+ 1 - 0
changelog.d/11027.bugfix

@@ -0,0 +1 @@
+Fix 500 error on `/messages` when the server accumulates more than 5 backwards extremities at a given depth for a room.

+ 13 - 11
synapse/handlers/federation.py

@@ -238,18 +238,10 @@ class FederationHandler:
             )
             return False
 
-        logger.debug(
-            "room_id: %s, backfill: current_depth: %s, max_depth: %s, extrems: %s",
-            room_id,
-            current_depth,
-            max_depth,
-            sorted_extremeties_tuple,
-        )
-
         # We ignore extremities that have a greater depth than our current depth
         # as:
         #    1. we don't really care about getting events that have happened
-        #       before our current position; and
+        #       after our current position; and
         #    2. we have likely previously tried and failed to backfill from that
         #       extremity, so to avoid getting "stuck" requesting the same
         #       backfill repeatedly we drop those extremities.
@@ -257,9 +249,19 @@ class FederationHandler:
             t for t in sorted_extremeties_tuple if int(t[1]) <= current_depth
         ]
 
+        logger.debug(
+            "room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems: %s filtered_sorted_extremeties_tuple: %s",
+            room_id,
+            current_depth,
+            limit,
+            max_depth,
+            sorted_extremeties_tuple,
+            filtered_sorted_extremeties_tuple,
+        )
+
         # However, we need to check that the filtered extremities are non-empty.
         # If they are empty then either we can a) bail or b) still attempt to
-        # backill. We opt to try backfilling anyway just in case we do get
+        # backfill. We opt to try backfilling anyway just in case we do get
         # relevant events.
         if filtered_sorted_extremeties_tuple:
             sorted_extremeties_tuple = filtered_sorted_extremeties_tuple
@@ -389,7 +391,7 @@ class FederationHandler:
             for key, state_dict in states.items()
         }
 
-        for e_id, _ in sorted_extremeties_tuple:
+        for e_id in event_ids:
             likely_extremeties_domains = get_domains_from_state(states[e_id])
 
             success = await try_backfill(

+ 1 - 1
synapse/handlers/federation_event.py

@@ -392,7 +392,7 @@ class FederationEventHandler:
 
     @log_function
     async def backfill(
-        self, dest: str, room_id: str, limit: int, extremities: List[str]
+        self, dest: str, room_id: str, limit: int, extremities: Iterable[str]
     ) -> None:
         """Trigger a backfill request to `dest` for the given `room_id`
 

+ 64 - 0
tests/handlers/test_federation.py

@@ -23,6 +23,7 @@ from synapse.federation.federation_base import event_from_pdu_json
 from synapse.logging.context import LoggingContext, run_in_background
 from synapse.rest import admin
 from synapse.rest.client import login, room
+from synapse.types import create_requester
 from synapse.util.stringutils import random_string
 
 from tests import unittest
@@ -30,6 +31,10 @@ from tests import unittest
 logger = logging.getLogger(__name__)
 
 
+def generate_fake_event_id() -> str:
+    return "$fake_" + random_string(43)
+
+
 class FederationTestCase(unittest.HomeserverTestCase):
     servlets = [
         admin.register_servlets,
@@ -198,6 +203,65 @@ class FederationTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(sg, sg2)
 
+    def test_backfill_with_many_backward_extremities(self):
+        """
+        Check that we can backfill with many backward extremities.
+        The goal is to make sure that when we only use a portion
+        of backwards extremities(the magic number is more than 5),
+        no errors are thrown.
+
+        Regression test, see #11027
+        """
+        # create the room
+        user_id = self.register_user("kermit", "test")
+        tok = self.login("kermit", "test")
+        requester = create_requester(user_id)
+
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+
+        ev1 = self.helper.send(room_id, "first message", tok=tok)
+
+        # Create "many" backward extremities. The magic number we're trying to
+        # create more than is 5 which corresponds to the number of backward
+        # extremities we slice off in `_maybe_backfill_inner`
+        for _ in range(0, 8):
+            event_handler = self.hs.get_event_creation_handler()
+            event, context = self.get_success(
+                event_handler.create_event(
+                    requester,
+                    {
+                        "type": "m.room.message",
+                        "content": {
+                            "msgtype": "m.text",
+                            "body": "message connected to fake event",
+                        },
+                        "room_id": room_id,
+                        "sender": user_id,
+                    },
+                    prev_event_ids=[
+                        ev1["event_id"],
+                        # We're creating an backward extremity each time thanks
+                        # to this fake event
+                        generate_fake_event_id(),
+                    ],
+                )
+            )
+            self.get_success(
+                event_handler.handle_new_client_event(requester, event, context)
+            )
+
+        current_depth = 1
+        limit = 100
+        with LoggingContext("receive_pdu"):
+            # Make sure backfill still works
+            d = run_in_background(
+                self.hs.get_federation_handler().maybe_backfill,
+                room_id,
+                current_depth,
+                limit,
+            )
+        self.get_success(d)
+
     def test_backfill_floating_outlier_membership_auth(self):
         """
         As the local homeserver, check that we can properly process a federated