purge_events.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. # Copyright 2019 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import itertools
  15. import logging
  16. from typing import TYPE_CHECKING, Set
  17. from synapse.storage.databases import Databases
  18. if TYPE_CHECKING:
  19. from synapse.server import HomeServer
  20. logger = logging.getLogger(__name__)
  21. class PurgeEventsStorageController:
  22. """High level interface for purging rooms and event history."""
  23. def __init__(self, hs: "HomeServer", stores: Databases):
  24. self.stores = stores
  25. async def purge_room(self, room_id: str) -> None:
  26. """Deletes all record of a room"""
  27. state_groups_to_delete = await self.stores.main.purge_room(room_id)
  28. await self.stores.state.purge_room_state(room_id, state_groups_to_delete)
  29. async def purge_history(
  30. self, room_id: str, token: str, delete_local_events: bool
  31. ) -> None:
  32. """Deletes room history before a certain point
  33. Args:
  34. room_id: The room ID
  35. token: A topological token to delete events before
  36. delete_local_events:
  37. if True, we will delete local events as well as remote ones
  38. (instead of just marking them as outliers and deleting their
  39. state groups).
  40. """
  41. state_groups = await self.stores.main.purge_history(
  42. room_id, token, delete_local_events
  43. )
  44. logger.info("[purge] finding state groups that can be deleted")
  45. sg_to_delete = await self._find_unreferenced_groups(state_groups)
  46. await self.stores.state.purge_unreferenced_state_groups(room_id, sg_to_delete)
  47. async def _find_unreferenced_groups(self, state_groups: Set[int]) -> Set[int]:
  48. """Used when purging history to figure out which state groups can be
  49. deleted.
  50. Args:
  51. state_groups: Set of state groups referenced by events
  52. that are going to be deleted.
  53. Returns:
  54. The set of state groups that can be deleted.
  55. """
  56. # Set of events that we have found to be referenced by events
  57. referenced_groups = set()
  58. # Set of state groups we've already seen
  59. state_groups_seen = set(state_groups)
  60. # Set of state groups to handle next.
  61. next_to_search = set(state_groups)
  62. while next_to_search:
  63. # We bound size of groups we're looking up at once, to stop the
  64. # SQL query getting too big
  65. if len(next_to_search) < 100:
  66. current_search = next_to_search
  67. next_to_search = set()
  68. else:
  69. current_search = set(itertools.islice(next_to_search, 100))
  70. next_to_search -= current_search
  71. referenced = await self.stores.main.get_referenced_state_groups(
  72. current_search
  73. )
  74. referenced_groups |= referenced
  75. # We don't continue iterating up the state group graphs for state
  76. # groups that are referenced.
  77. current_search -= referenced
  78. edges = await self.stores.state.get_previous_state_groups(current_search)
  79. prevs = set(edges.values())
  80. # We don't bother re-handling groups we've already seen
  81. prevs -= state_groups_seen
  82. next_to_search |= prevs
  83. state_groups_seen |= prevs
  84. to_delete = state_groups_seen - referenced_groups
  85. return to_delete