test_media_storage.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820
  1. # Copyright 2018-2021 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import shutil
  16. import tempfile
  17. from binascii import unhexlify
  18. from io import BytesIO
  19. from typing import Any, BinaryIO, ClassVar, Dict, List, Optional, Tuple, Union
  20. from unittest.mock import Mock
  21. from urllib import parse
  22. import attr
  23. from parameterized import parameterized, parameterized_class
  24. from PIL import Image as Image
  25. from typing_extensions import Literal
  26. from twisted.internet import defer
  27. from twisted.internet.defer import Deferred
  28. from twisted.test.proto_helpers import MemoryReactor
  29. from twisted.web.resource import Resource
  30. from synapse.api.errors import Codes
  31. from synapse.events import EventBase
  32. from synapse.http.types import QueryParams
  33. from synapse.logging.context import make_deferred_yieldable
  34. from synapse.media._base import FileInfo, ThumbnailInfo
  35. from synapse.media.filepath import MediaFilePaths
  36. from synapse.media.media_storage import MediaStorage, ReadableFileWrapper
  37. from synapse.media.storage_provider import FileStorageProviderBackend
  38. from synapse.module_api import ModuleApi
  39. from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
  40. from synapse.rest import admin
  41. from synapse.rest.client import login
  42. from synapse.rest.media.thumbnail_resource import ThumbnailResource
  43. from synapse.server import HomeServer
  44. from synapse.types import JsonDict, RoomAlias
  45. from synapse.util import Clock
  46. from tests import unittest
  47. from tests.server import FakeChannel
  48. from tests.test_utils import SMALL_PNG
  49. from tests.utils import default_config
  50. class MediaStorageTests(unittest.HomeserverTestCase):
  51. needs_threadpool = True
  52. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  53. self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-")
  54. self.addCleanup(shutil.rmtree, self.test_dir)
  55. self.primary_base_path = os.path.join(self.test_dir, "primary")
  56. self.secondary_base_path = os.path.join(self.test_dir, "secondary")
  57. hs.config.media.media_store_path = self.primary_base_path
  58. storage_providers = [FileStorageProviderBackend(hs, self.secondary_base_path)]
  59. self.filepaths = MediaFilePaths(self.primary_base_path)
  60. self.media_storage = MediaStorage(
  61. hs, self.primary_base_path, self.filepaths, storage_providers
  62. )
  63. def test_ensure_media_is_in_local_cache(self) -> None:
  64. media_id = "some_media_id"
  65. test_body = "Test\n"
  66. # First we create a file that is in a storage provider but not in the
  67. # local primary media store
  68. rel_path = self.filepaths.local_media_filepath_rel(media_id)
  69. secondary_path = os.path.join(self.secondary_base_path, rel_path)
  70. os.makedirs(os.path.dirname(secondary_path))
  71. with open(secondary_path, "w") as f:
  72. f.write(test_body)
  73. # Now we run ensure_media_is_in_local_cache, which should copy the file
  74. # to the local cache.
  75. file_info = FileInfo(None, media_id)
  76. # This uses a real blocking threadpool so we have to wait for it to be
  77. # actually done :/
  78. x = defer.ensureDeferred(
  79. self.media_storage.ensure_media_is_in_local_cache(file_info)
  80. )
  81. # Hotloop until the threadpool does its job...
  82. self.wait_on_thread(x)
  83. local_path = self.get_success(x)
  84. self.assertTrue(os.path.exists(local_path))
  85. # Asserts the file is under the expected local cache directory
  86. self.assertEqual(
  87. os.path.commonprefix([self.primary_base_path, local_path]),
  88. self.primary_base_path,
  89. )
  90. with open(local_path) as f:
  91. body = f.read()
  92. self.assertEqual(test_body, body)
  93. @attr.s(auto_attribs=True, slots=True, frozen=True)
  94. class _TestImage:
  95. """An image for testing thumbnailing with the expected results
  96. Attributes:
  97. data: The raw image to thumbnail
  98. content_type: The type of the image as a content type, e.g. "image/png"
  99. extension: The extension associated with the format, e.g. ".png"
  100. expected_cropped: The expected bytes from cropped thumbnailing, or None if
  101. test should just check for success.
  102. expected_scaled: The expected bytes from scaled thumbnailing, or None if
  103. test should just check for a valid image returned.
  104. expected_found: True if the file should exist on the server, or False if
  105. a 404/400 is expected.
  106. unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
  107. False if the thumbnailing should succeed or a normal 404 is expected.
  108. is_inline: True if we expect the file to be served using an inline
  109. Content-Disposition or False if we expect an attachment.
  110. """
  111. data: bytes
  112. content_type: bytes
  113. extension: bytes
  114. expected_cropped: Optional[bytes] = None
  115. expected_scaled: Optional[bytes] = None
  116. expected_found: bool = True
  117. unable_to_thumbnail: bool = False
  118. is_inline: bool = True
  119. @parameterized_class(
  120. ("test_image",),
  121. [
  122. # small png
  123. (
  124. _TestImage(
  125. SMALL_PNG,
  126. b"image/png",
  127. b".png",
  128. unhexlify(
  129. b"89504e470d0a1a0a0000000d4948445200000020000000200806"
  130. b"000000737a7af40000001a49444154789cedc101010000008220"
  131. b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
  132. b"44ae426082"
  133. ),
  134. unhexlify(
  135. b"89504e470d0a1a0a0000000d4948445200000001000000010806"
  136. b"0000001f15c4890000000d49444154789c636060606000000005"
  137. b"0001a5f645400000000049454e44ae426082"
  138. ),
  139. ),
  140. ),
  141. # small png with transparency.
  142. (
  143. _TestImage(
  144. unhexlify(
  145. b"89504e470d0a1a0a0000000d49484452000000010000000101000"
  146. b"00000376ef9240000000274524e5300010194fdae0000000a4944"
  147. b"4154789c636800000082008177cd72b60000000049454e44ae426"
  148. b"082"
  149. ),
  150. b"image/png",
  151. b".png",
  152. # Note that we don't check the output since it varies across
  153. # different versions of Pillow.
  154. ),
  155. ),
  156. # small lossless webp
  157. (
  158. _TestImage(
  159. unhexlify(
  160. b"524946461a000000574542505650384c0d0000002f0000001007"
  161. b"1011118888fe0700"
  162. ),
  163. b"image/webp",
  164. b".webp",
  165. ),
  166. ),
  167. # an empty file
  168. (
  169. _TestImage(
  170. b"",
  171. b"image/gif",
  172. b".gif",
  173. expected_found=False,
  174. unable_to_thumbnail=True,
  175. ),
  176. ),
  177. # An SVG.
  178. (
  179. _TestImage(
  180. b"""<?xml version="1.0"?>
  181. <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  182. "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  183. <svg xmlns="http://www.w3.org/2000/svg"
  184. width="400" height="400">
  185. <circle cx="100" cy="100" r="50" stroke="black"
  186. stroke-width="5" fill="red" />
  187. </svg>""",
  188. b"image/svg",
  189. b".svg",
  190. expected_found=False,
  191. unable_to_thumbnail=True,
  192. is_inline=False,
  193. ),
  194. ),
  195. ],
  196. )
  197. class MediaRepoTests(unittest.HomeserverTestCase):
  198. test_image: ClassVar[_TestImage]
  199. hijack_auth = True
  200. user_id = "@test:user"
  201. def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
  202. self.fetches: List[
  203. Tuple[
  204. "Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]]",
  205. str,
  206. str,
  207. Optional[QueryParams],
  208. ]
  209. ] = []
  210. def get_file(
  211. destination: str,
  212. path: str,
  213. output_stream: BinaryIO,
  214. args: Optional[QueryParams] = None,
  215. retry_on_dns_fail: bool = True,
  216. max_size: Optional[int] = None,
  217. ignore_backoff: bool = False,
  218. ) -> "Deferred[Tuple[int, Dict[bytes, List[bytes]]]]":
  219. """A mock for MatrixFederationHttpClient.get_file."""
  220. def write_to(
  221. r: Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]
  222. ) -> Tuple[int, Dict[bytes, List[bytes]]]:
  223. data, response = r
  224. output_stream.write(data)
  225. return response
  226. d: Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]] = Deferred()
  227. self.fetches.append((d, destination, path, args))
  228. # Note that this callback changes the value held by d.
  229. d_after_callback = d.addCallback(write_to)
  230. return make_deferred_yieldable(d_after_callback)
  231. # Mock out the homeserver's MatrixFederationHttpClient
  232. client = Mock()
  233. client.get_file = get_file
  234. self.storage_path = self.mktemp()
  235. self.media_store_path = self.mktemp()
  236. os.mkdir(self.storage_path)
  237. os.mkdir(self.media_store_path)
  238. config = self.default_config()
  239. config["media_store_path"] = self.media_store_path
  240. config["max_image_pixels"] = 2000000
  241. provider_config = {
  242. "module": "synapse.media.storage_provider.FileStorageProviderBackend",
  243. "store_local": True,
  244. "store_synchronous": False,
  245. "store_remote": True,
  246. "config": {"directory": self.storage_path},
  247. }
  248. config["media_storage_providers"] = [provider_config]
  249. hs = self.setup_test_homeserver(config=config, federation_http_client=client)
  250. return hs
  251. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  252. self.store = hs.get_datastores().main
  253. self.media_repo = hs.get_media_repository()
  254. self.media_id = "example.com/12345"
  255. def create_resource_dict(self) -> Dict[str, Resource]:
  256. resources = super().create_resource_dict()
  257. resources["/_matrix/media"] = self.hs.get_media_repository_resource()
  258. return resources
  259. def _req(
  260. self, content_disposition: Optional[bytes], include_content_type: bool = True
  261. ) -> FakeChannel:
  262. channel = self.make_request(
  263. "GET",
  264. f"/_matrix/media/v3/download/{self.media_id}",
  265. shorthand=False,
  266. await_result=False,
  267. )
  268. self.pump()
  269. # We've made one fetch, to example.com, using the media URL, and asking
  270. # the other server not to do a remote fetch
  271. self.assertEqual(len(self.fetches), 1)
  272. self.assertEqual(self.fetches[0][1], "example.com")
  273. self.assertEqual(
  274. self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id
  275. )
  276. self.assertEqual(
  277. self.fetches[0][3], {"allow_remote": "false", "timeout_ms": "20000"}
  278. )
  279. headers = {
  280. b"Content-Length": [b"%d" % (len(self.test_image.data))],
  281. }
  282. if include_content_type:
  283. headers[b"Content-Type"] = [self.test_image.content_type]
  284. if content_disposition:
  285. headers[b"Content-Disposition"] = [content_disposition]
  286. self.fetches[0][0].callback(
  287. (self.test_image.data, (len(self.test_image.data), headers))
  288. )
  289. self.pump()
  290. self.assertEqual(channel.code, 200)
  291. return channel
  292. def test_handle_missing_content_type(self) -> None:
  293. channel = self._req(
  294. b"attachment; filename=out" + self.test_image.extension,
  295. include_content_type=False,
  296. )
  297. headers = channel.headers
  298. self.assertEqual(channel.code, 200)
  299. self.assertEqual(
  300. headers.getRawHeaders(b"Content-Type"), [b"application/octet-stream"]
  301. )
  302. def test_disposition_filename_ascii(self) -> None:
  303. """
  304. If the filename is filename=<ascii> then Synapse will decode it as an
  305. ASCII string, and use filename= in the response.
  306. """
  307. channel = self._req(b"attachment; filename=out" + self.test_image.extension)
  308. headers = channel.headers
  309. self.assertEqual(
  310. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  311. )
  312. self.assertEqual(
  313. headers.getRawHeaders(b"Content-Disposition"),
  314. [
  315. (b"inline" if self.test_image.is_inline else b"attachment")
  316. + b"; filename=out"
  317. + self.test_image.extension
  318. ],
  319. )
  320. def test_disposition_filenamestar_utf8escaped(self) -> None:
  321. """
  322. If the filename is filename=*utf8''<utf8 escaped> then Synapse will
  323. correctly decode it as the UTF-8 string, and use filename* in the
  324. response.
  325. """
  326. filename = parse.quote("\u2603".encode()).encode("ascii")
  327. channel = self._req(
  328. b"attachment; filename*=utf-8''" + filename + self.test_image.extension
  329. )
  330. headers = channel.headers
  331. self.assertEqual(
  332. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  333. )
  334. self.assertEqual(
  335. headers.getRawHeaders(b"Content-Disposition"),
  336. [
  337. (b"inline" if self.test_image.is_inline else b"attachment")
  338. + b"; filename*=utf-8''"
  339. + filename
  340. + self.test_image.extension
  341. ],
  342. )
  343. def test_disposition_none(self) -> None:
  344. """
  345. If there is no filename, Content-Disposition should only
  346. be a disposition type.
  347. """
  348. channel = self._req(None)
  349. headers = channel.headers
  350. self.assertEqual(
  351. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  352. )
  353. self.assertEqual(
  354. headers.getRawHeaders(b"Content-Disposition"),
  355. [b"inline" if self.test_image.is_inline else b"attachment"],
  356. )
  357. def test_thumbnail_crop(self) -> None:
  358. """Test that a cropped remote thumbnail is available."""
  359. self._test_thumbnail(
  360. "crop",
  361. self.test_image.expected_cropped,
  362. expected_found=self.test_image.expected_found,
  363. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  364. )
  365. def test_thumbnail_scale(self) -> None:
  366. """Test that a scaled remote thumbnail is available."""
  367. self._test_thumbnail(
  368. "scale",
  369. self.test_image.expected_scaled,
  370. expected_found=self.test_image.expected_found,
  371. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  372. )
  373. def test_invalid_type(self) -> None:
  374. """An invalid thumbnail type is never available."""
  375. self._test_thumbnail(
  376. "invalid",
  377. None,
  378. expected_found=False,
  379. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  380. )
  381. @unittest.override_config(
  382. {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "scale"}]}
  383. )
  384. def test_no_thumbnail_crop(self) -> None:
  385. """
  386. Override the config to generate only scaled thumbnails, but request a cropped one.
  387. """
  388. self._test_thumbnail(
  389. "crop",
  390. None,
  391. expected_found=False,
  392. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  393. )
  394. @unittest.override_config(
  395. {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "crop"}]}
  396. )
  397. def test_no_thumbnail_scale(self) -> None:
  398. """
  399. Override the config to generate only cropped thumbnails, but request a scaled one.
  400. """
  401. self._test_thumbnail(
  402. "scale",
  403. None,
  404. expected_found=False,
  405. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  406. )
  407. def test_thumbnail_repeated_thumbnail(self) -> None:
  408. """Test that fetching the same thumbnail works, and deleting the on disk
  409. thumbnail regenerates it.
  410. """
  411. self._test_thumbnail(
  412. "scale",
  413. self.test_image.expected_scaled,
  414. expected_found=self.test_image.expected_found,
  415. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  416. )
  417. if not self.test_image.expected_found:
  418. return
  419. # Fetching again should work, without re-requesting the image from the
  420. # remote.
  421. params = "?width=32&height=32&method=scale"
  422. channel = self.make_request(
  423. "GET",
  424. f"/_matrix/media/v3/thumbnail/{self.media_id}{params}",
  425. shorthand=False,
  426. await_result=False,
  427. )
  428. self.pump()
  429. self.assertEqual(channel.code, 200)
  430. if self.test_image.expected_scaled:
  431. self.assertEqual(
  432. channel.result["body"],
  433. self.test_image.expected_scaled,
  434. channel.result["body"],
  435. )
  436. # Deleting the thumbnail on disk then re-requesting it should work as
  437. # Synapse should regenerate missing thumbnails.
  438. origin, media_id = self.media_id.split("/")
  439. info = self.get_success(self.store.get_cached_remote_media(origin, media_id))
  440. assert info is not None
  441. file_id = info.filesystem_id
  442. thumbnail_dir = self.media_repo.filepaths.remote_media_thumbnail_dir(
  443. origin, file_id
  444. )
  445. shutil.rmtree(thumbnail_dir, ignore_errors=True)
  446. channel = self.make_request(
  447. "GET",
  448. f"/_matrix/media/v3/thumbnail/{self.media_id}{params}",
  449. shorthand=False,
  450. await_result=False,
  451. )
  452. self.pump()
  453. self.assertEqual(channel.code, 200)
  454. if self.test_image.expected_scaled:
  455. self.assertEqual(
  456. channel.result["body"],
  457. self.test_image.expected_scaled,
  458. channel.result["body"],
  459. )
  460. def _test_thumbnail(
  461. self,
  462. method: str,
  463. expected_body: Optional[bytes],
  464. expected_found: bool,
  465. unable_to_thumbnail: bool = False,
  466. ) -> None:
  467. """Test the given thumbnailing method works as expected.
  468. Args:
  469. method: The thumbnailing method to use (crop, scale).
  470. expected_body: The expected bytes from thumbnailing, or None if
  471. test should just check for a valid image.
  472. expected_found: True if the file should exist on the server, or False if
  473. a 404/400 is expected.
  474. unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
  475. False if the thumbnailing should succeed or a normal 404 is expected.
  476. """
  477. params = "?width=32&height=32&method=" + method
  478. channel = self.make_request(
  479. "GET",
  480. f"/_matrix/media/r0/thumbnail/{self.media_id}{params}",
  481. shorthand=False,
  482. await_result=False,
  483. )
  484. self.pump()
  485. headers = {
  486. b"Content-Length": [b"%d" % (len(self.test_image.data))],
  487. b"Content-Type": [self.test_image.content_type],
  488. }
  489. self.fetches[0][0].callback(
  490. (self.test_image.data, (len(self.test_image.data), headers))
  491. )
  492. self.pump()
  493. if expected_found:
  494. self.assertEqual(channel.code, 200)
  495. self.assertEqual(
  496. channel.headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
  497. [b"cross-origin"],
  498. )
  499. if expected_body is not None:
  500. self.assertEqual(
  501. channel.result["body"], expected_body, channel.result["body"]
  502. )
  503. else:
  504. # ensure that the result is at least some valid image
  505. Image.open(BytesIO(channel.result["body"]))
  506. elif unable_to_thumbnail:
  507. # A 400 with a JSON body.
  508. self.assertEqual(channel.code, 400)
  509. self.assertEqual(
  510. channel.json_body,
  511. {
  512. "errcode": "M_UNKNOWN",
  513. "error": "Cannot find any thumbnails for the requested media ('/_matrix/media/r0/thumbnail/example.com/12345'). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
  514. },
  515. )
  516. else:
  517. # A 404 with a JSON body.
  518. self.assertEqual(channel.code, 404)
  519. self.assertEqual(
  520. channel.json_body,
  521. {
  522. "errcode": "M_NOT_FOUND",
  523. "error": "Not found '/_matrix/media/r0/thumbnail/example.com/12345'",
  524. },
  525. )
  526. @parameterized.expand([("crop", 16), ("crop", 64), ("scale", 16), ("scale", 64)])
  527. def test_same_quality(self, method: str, desired_size: int) -> None:
  528. """Test that choosing between thumbnails with the same quality rating succeeds.
  529. We are not particular about which thumbnail is chosen."""
  530. content_type = self.test_image.content_type.decode()
  531. media_repo = self.hs.get_media_repository()
  532. thumbnail_resouce = ThumbnailResource(
  533. self.hs, media_repo, media_repo.media_storage
  534. )
  535. self.assertIsNotNone(
  536. thumbnail_resouce._select_thumbnail(
  537. desired_width=desired_size,
  538. desired_height=desired_size,
  539. desired_method=method,
  540. desired_type=content_type,
  541. # Provide two identical thumbnails which are guaranteed to have the same
  542. # quality rating.
  543. thumbnail_infos=[
  544. ThumbnailInfo(
  545. width=32,
  546. height=32,
  547. method=method,
  548. type=content_type,
  549. length=256,
  550. ),
  551. ThumbnailInfo(
  552. width=32,
  553. height=32,
  554. method=method,
  555. type=content_type,
  556. length=256,
  557. ),
  558. ],
  559. file_id=f"image{self.test_image.extension.decode()}",
  560. url_cache=False,
  561. server_name=None,
  562. )
  563. )
  564. def test_x_robots_tag_header(self) -> None:
  565. """
  566. Tests that the `X-Robots-Tag` header is present, which informs web crawlers
  567. to not index, archive, or follow links in media.
  568. """
  569. channel = self._req(b"attachment; filename=out" + self.test_image.extension)
  570. headers = channel.headers
  571. self.assertEqual(
  572. headers.getRawHeaders(b"X-Robots-Tag"),
  573. [b"noindex, nofollow, noarchive, noimageindex"],
  574. )
  575. def test_cross_origin_resource_policy_header(self) -> None:
  576. """
  577. Test that the Cross-Origin-Resource-Policy header is set to "cross-origin"
  578. allowing web clients to embed media from the downloads API.
  579. """
  580. channel = self._req(b"attachment; filename=out" + self.test_image.extension)
  581. headers = channel.headers
  582. self.assertEqual(
  583. headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
  584. [b"cross-origin"],
  585. )
  586. class TestSpamCheckerLegacy:
  587. """A spam checker module that rejects all media that includes the bytes
  588. `evil`.
  589. Uses the legacy Spam-Checker API.
  590. """
  591. def __init__(self, config: Dict[str, Any], api: ModuleApi) -> None:
  592. self.config = config
  593. self.api = api
  594. @staticmethod
  595. def parse_config(config: Dict[str, Any]) -> Dict[str, Any]:
  596. return config
  597. async def check_event_for_spam(self, event: EventBase) -> Union[bool, str]:
  598. return False # allow all events
  599. async def user_may_invite(
  600. self,
  601. inviter_userid: str,
  602. invitee_userid: str,
  603. room_id: str,
  604. ) -> bool:
  605. return True # allow all invites
  606. async def user_may_create_room(self, userid: str) -> bool:
  607. return True # allow all room creations
  608. async def user_may_create_room_alias(
  609. self, userid: str, room_alias: RoomAlias
  610. ) -> bool:
  611. return True # allow all room aliases
  612. async def user_may_publish_room(self, userid: str, room_id: str) -> bool:
  613. return True # allow publishing of all rooms
  614. async def check_media_file_for_spam(
  615. self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
  616. ) -> bool:
  617. buf = BytesIO()
  618. await file_wrapper.write_chunks_to(buf.write)
  619. return b"evil" in buf.getvalue()
  620. class SpamCheckerTestCaseLegacy(unittest.HomeserverTestCase):
  621. servlets = [
  622. login.register_servlets,
  623. admin.register_servlets,
  624. ]
  625. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  626. self.user = self.register_user("user", "pass")
  627. self.tok = self.login("user", "pass")
  628. load_legacy_spam_checkers(hs)
  629. def create_resource_dict(self) -> Dict[str, Resource]:
  630. resources = super().create_resource_dict()
  631. resources["/_matrix/media"] = self.hs.get_media_repository_resource()
  632. return resources
  633. def default_config(self) -> Dict[str, Any]:
  634. config = default_config("test")
  635. config.update(
  636. {
  637. "spam_checker": [
  638. {
  639. "module": TestSpamCheckerLegacy.__module__
  640. + ".TestSpamCheckerLegacy",
  641. "config": {},
  642. }
  643. ]
  644. }
  645. )
  646. return config
  647. def test_upload_innocent(self) -> None:
  648. """Attempt to upload some innocent data that should be allowed."""
  649. self.helper.upload_media(SMALL_PNG, tok=self.tok, expect_code=200)
  650. def test_upload_ban(self) -> None:
  651. """Attempt to upload some data that includes bytes "evil", which should
  652. get rejected by the spam checker.
  653. """
  654. data = b"Some evil data"
  655. self.helper.upload_media(data, tok=self.tok, expect_code=400)
  656. EVIL_DATA = b"Some evil data"
  657. EVIL_DATA_EXPERIMENT = b"Some evil data to trigger the experimental tuple API"
  658. class SpamCheckerTestCase(unittest.HomeserverTestCase):
  659. servlets = [
  660. login.register_servlets,
  661. admin.register_servlets,
  662. ]
  663. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  664. self.user = self.register_user("user", "pass")
  665. self.tok = self.login("user", "pass")
  666. hs.get_module_api().register_spam_checker_callbacks(
  667. check_media_file_for_spam=self.check_media_file_for_spam
  668. )
  669. def create_resource_dict(self) -> Dict[str, Resource]:
  670. resources = super().create_resource_dict()
  671. resources["/_matrix/media"] = self.hs.get_media_repository_resource()
  672. return resources
  673. async def check_media_file_for_spam(
  674. self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
  675. ) -> Union[Codes, Literal["NOT_SPAM"], Tuple[Codes, JsonDict]]:
  676. buf = BytesIO()
  677. await file_wrapper.write_chunks_to(buf.write)
  678. if buf.getvalue() == EVIL_DATA:
  679. return Codes.FORBIDDEN
  680. elif buf.getvalue() == EVIL_DATA_EXPERIMENT:
  681. return (Codes.FORBIDDEN, {})
  682. else:
  683. return "NOT_SPAM"
  684. def test_upload_innocent(self) -> None:
  685. """Attempt to upload some innocent data that should be allowed."""
  686. self.helper.upload_media(SMALL_PNG, tok=self.tok, expect_code=200)
  687. def test_upload_ban(self) -> None:
  688. """Attempt to upload some data that includes bytes "evil", which should
  689. get rejected by the spam checker.
  690. """
  691. self.helper.upload_media(EVIL_DATA, tok=self.tok, expect_code=400)
  692. self.helper.upload_media(
  693. EVIL_DATA_EXPERIMENT,
  694. tok=self.tok,
  695. expect_code=400,
  696. )