123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295 |
- # Copyright 2022 The Matrix.org Foundation C.I.C.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import io
- from typing import Iterable, Optional
- from matrix_common.types.mxc_uri import MXCUri
- from twisted.test.proto_helpers import MemoryReactor
- from synapse.rest import admin
- from synapse.rest.client import login, register, room
- from synapse.server import HomeServer
- from synapse.types import UserID
- from synapse.util import Clock
- from tests import unittest
- from tests.unittest import override_config
- from tests.utils import MockClock
- class MediaRetentionTestCase(unittest.HomeserverTestCase):
- ONE_DAY_IN_MS = 24 * 60 * 60 * 1000
- THIRTY_DAYS_IN_MS = 30 * ONE_DAY_IN_MS
- servlets = [
- room.register_servlets,
- login.register_servlets,
- register.register_servlets,
- admin.register_servlets_for_client_rest_resource,
- ]
- def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
- # We need to be able to test advancing time in the homeserver, so we
- # replace the test homeserver's default clock with a MockClock, which
- # supports advancing time.
- return self.setup_test_homeserver(clock=MockClock())
- def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
- self.remote_server_name = "remote.homeserver"
- self.store = hs.get_datastores().main
- # Create a user to upload media with
- test_user_id = self.register_user("alice", "password")
- # Inject media (recently accessed, old access, never accessed, old access
- # quarantined media) into both the local store and the remote cache, plus
- # one additional local media that is marked as protected from quarantine.
- media_repository = hs.get_media_repository()
- test_media_content = b"example string"
- def _create_media_and_set_attributes(
- last_accessed_ms: Optional[int],
- is_quarantined: Optional[bool] = False,
- is_protected: Optional[bool] = False,
- ) -> MXCUri:
- # "Upload" some media to the local media store
- mxc_uri: MXCUri = self.get_success(
- media_repository.create_content(
- media_type="text/plain",
- upload_name=None,
- content=io.BytesIO(test_media_content),
- content_length=len(test_media_content),
- auth_user=UserID.from_string(test_user_id),
- )
- )
- # Set the last recently accessed time for this media
- if last_accessed_ms is not None:
- self.get_success(
- self.store.update_cached_last_access_time(
- local_media=(mxc_uri.media_id,),
- remote_media=(),
- time_ms=last_accessed_ms,
- )
- )
- if is_quarantined:
- # Mark this media as quarantined
- self.get_success(
- self.store.quarantine_media_by_id(
- server_name=self.hs.config.server.server_name,
- media_id=mxc_uri.media_id,
- quarantined_by="@theadmin:test",
- )
- )
- if is_protected:
- # Mark this media as protected from quarantine
- self.get_success(
- self.store.mark_local_media_as_safe(
- media_id=mxc_uri.media_id,
- safe=True,
- )
- )
- return mxc_uri
- def _cache_remote_media_and_set_attributes(
- media_id: str,
- last_accessed_ms: Optional[int],
- is_quarantined: Optional[bool] = False,
- ) -> MXCUri:
- # Pretend to cache some remote media
- self.get_success(
- self.store.store_cached_remote_media(
- origin=self.remote_server_name,
- media_id=media_id,
- media_type="text/plain",
- media_length=1,
- time_now_ms=clock.time_msec(),
- upload_name="testfile.txt",
- filesystem_id="abcdefg12345",
- )
- )
- # Set the last recently accessed time for this media
- if last_accessed_ms is not None:
- self.get_success(
- hs.get_datastores().main.update_cached_last_access_time(
- local_media=(),
- remote_media=((self.remote_server_name, media_id),),
- time_ms=last_accessed_ms,
- )
- )
- if is_quarantined:
- # Mark this media as quarantined
- self.get_success(
- self.store.quarantine_media_by_id(
- server_name=self.remote_server_name,
- media_id=media_id,
- quarantined_by="@theadmin:test",
- )
- )
- return MXCUri(self.remote_server_name, media_id)
- # Start with the local media store
- self.local_recently_accessed_media = _create_media_and_set_attributes(
- last_accessed_ms=self.THIRTY_DAYS_IN_MS,
- )
- self.local_not_recently_accessed_media = _create_media_and_set_attributes(
- last_accessed_ms=self.ONE_DAY_IN_MS,
- )
- self.local_not_recently_accessed_quarantined_media = (
- _create_media_and_set_attributes(
- last_accessed_ms=self.ONE_DAY_IN_MS,
- is_quarantined=True,
- )
- )
- self.local_not_recently_accessed_protected_media = (
- _create_media_and_set_attributes(
- last_accessed_ms=self.ONE_DAY_IN_MS,
- is_protected=True,
- )
- )
- self.local_never_accessed_media = _create_media_and_set_attributes(
- last_accessed_ms=None,
- )
- # And now the remote media store
- self.remote_recently_accessed_media = _cache_remote_media_and_set_attributes(
- media_id="a",
- last_accessed_ms=self.THIRTY_DAYS_IN_MS,
- )
- self.remote_not_recently_accessed_media = (
- _cache_remote_media_and_set_attributes(
- media_id="b",
- last_accessed_ms=self.ONE_DAY_IN_MS,
- )
- )
- self.remote_not_recently_accessed_quarantined_media = (
- _cache_remote_media_and_set_attributes(
- media_id="c",
- last_accessed_ms=self.ONE_DAY_IN_MS,
- is_quarantined=True,
- )
- )
- # Remote media will always have a "last accessed" attribute, as it would not
- # be fetched from the remote homeserver unless instigated by a user.
- @override_config(
- {
- "media_retention": {
- # Enable retention for local media
- "local_media_lifetime": "30d"
- # Cached remote media should not be purged
- }
- }
- )
- def test_local_media_retention(self) -> None:
- """
- Tests that local media that have not been accessed recently is purged, while
- cached remote media is unaffected.
- """
- # Advance 31 days (in seconds)
- self.reactor.advance(31 * 24 * 60 * 60)
- # Check that media has been correctly purged.
- # Local media accessed <30 days ago should still exist.
- # Remote media should be unaffected.
- self._assert_if_mxc_uris_purged(
- purged=[
- self.local_not_recently_accessed_media,
- self.local_never_accessed_media,
- ],
- not_purged=[
- self.local_recently_accessed_media,
- self.local_not_recently_accessed_quarantined_media,
- self.local_not_recently_accessed_protected_media,
- self.remote_recently_accessed_media,
- self.remote_not_recently_accessed_media,
- self.remote_not_recently_accessed_quarantined_media,
- ],
- )
- @override_config(
- {
- "media_retention": {
- # Enable retention for cached remote media
- "remote_media_lifetime": "30d"
- # Local media should not be purged
- }
- }
- )
- def test_remote_media_cache_retention(self) -> None:
- """
- Tests that entries from the remote media cache that have not been accessed
- recently is purged, while local media is unaffected.
- """
- # Advance 31 days (in seconds)
- self.reactor.advance(31 * 24 * 60 * 60)
- # Check that media has been correctly purged.
- # Local media should be unaffected.
- # Remote media accessed <30 days ago should still exist.
- self._assert_if_mxc_uris_purged(
- purged=[
- self.remote_not_recently_accessed_media,
- ],
- not_purged=[
- self.remote_recently_accessed_media,
- self.local_recently_accessed_media,
- self.local_not_recently_accessed_media,
- self.local_not_recently_accessed_quarantined_media,
- self.local_not_recently_accessed_protected_media,
- self.remote_not_recently_accessed_quarantined_media,
- self.local_never_accessed_media,
- ],
- )
- def _assert_if_mxc_uris_purged(
- self, purged: Iterable[MXCUri], not_purged: Iterable[MXCUri]
- ) -> None:
- def _assert_mxc_uri_purge_state(mxc_uri: MXCUri, expect_purged: bool) -> None:
- """Given an MXC URI, assert whether it has been purged or not."""
- if mxc_uri.server_name == self.hs.config.server.server_name:
- found_media_dict = self.get_success(
- self.store.get_local_media(mxc_uri.media_id)
- )
- else:
- found_media_dict = self.get_success(
- self.store.get_cached_remote_media(
- mxc_uri.server_name, mxc_uri.media_id
- )
- )
- if expect_purged:
- self.assertIsNone(
- found_media_dict, msg=f"{mxc_uri} unexpectedly not purged"
- )
- else:
- self.assertIsNotNone(
- found_media_dict,
- msg=f"{mxc_uri} unexpectedly purged",
- )
- # Assert that the given MXC URIs have either been correctly purged or not.
- for mxc_uri in purged:
- _assert_mxc_uri_purge_state(mxc_uri, expect_purged=True)
- for mxc_uri in not_purged:
- _assert_mxc_uri_purge_state(mxc_uri, expect_purged=False)
|