test_media_storage.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874
  1. # Copyright 2018-2021 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import shutil
  16. import tempfile
  17. from binascii import unhexlify
  18. from io import BytesIO
  19. from typing import Any, BinaryIO, ClassVar, Dict, List, Optional, Tuple, Union
  20. from unittest.mock import Mock
  21. from urllib import parse
  22. import attr
  23. from parameterized import parameterized, parameterized_class
  24. from PIL import Image as Image
  25. from typing_extensions import Literal
  26. from twisted.internet import defer
  27. from twisted.internet.defer import Deferred
  28. from twisted.python.failure import Failure
  29. from twisted.test.proto_helpers import MemoryReactor
  30. from twisted.web.resource import Resource
  31. from synapse.api.errors import Codes, HttpResponseException
  32. from synapse.events import EventBase
  33. from synapse.http.types import QueryParams
  34. from synapse.logging.context import make_deferred_yieldable
  35. from synapse.media._base import FileInfo, ThumbnailInfo
  36. from synapse.media.filepath import MediaFilePaths
  37. from synapse.media.media_storage import MediaStorage, ReadableFileWrapper
  38. from synapse.media.storage_provider import FileStorageProviderBackend
  39. from synapse.module_api import ModuleApi
  40. from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
  41. from synapse.rest import admin
  42. from synapse.rest.client import login
  43. from synapse.rest.media.thumbnail_resource import ThumbnailResource
  44. from synapse.server import HomeServer
  45. from synapse.types import JsonDict, RoomAlias
  46. from synapse.util import Clock
  47. from tests import unittest
  48. from tests.server import FakeChannel
  49. from tests.test_utils import SMALL_PNG
  50. from tests.utils import default_config
  51. class MediaStorageTests(unittest.HomeserverTestCase):
  52. needs_threadpool = True
  53. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  54. self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-")
  55. self.addCleanup(shutil.rmtree, self.test_dir)
  56. self.primary_base_path = os.path.join(self.test_dir, "primary")
  57. self.secondary_base_path = os.path.join(self.test_dir, "secondary")
  58. hs.config.media.media_store_path = self.primary_base_path
  59. storage_providers = [FileStorageProviderBackend(hs, self.secondary_base_path)]
  60. self.filepaths = MediaFilePaths(self.primary_base_path)
  61. self.media_storage = MediaStorage(
  62. hs, self.primary_base_path, self.filepaths, storage_providers
  63. )
  64. def test_ensure_media_is_in_local_cache(self) -> None:
  65. media_id = "some_media_id"
  66. test_body = "Test\n"
  67. # First we create a file that is in a storage provider but not in the
  68. # local primary media store
  69. rel_path = self.filepaths.local_media_filepath_rel(media_id)
  70. secondary_path = os.path.join(self.secondary_base_path, rel_path)
  71. os.makedirs(os.path.dirname(secondary_path))
  72. with open(secondary_path, "w") as f:
  73. f.write(test_body)
  74. # Now we run ensure_media_is_in_local_cache, which should copy the file
  75. # to the local cache.
  76. file_info = FileInfo(None, media_id)
  77. # This uses a real blocking threadpool so we have to wait for it to be
  78. # actually done :/
  79. x = defer.ensureDeferred(
  80. self.media_storage.ensure_media_is_in_local_cache(file_info)
  81. )
  82. # Hotloop until the threadpool does its job...
  83. self.wait_on_thread(x)
  84. local_path = self.get_success(x)
  85. self.assertTrue(os.path.exists(local_path))
  86. # Asserts the file is under the expected local cache directory
  87. self.assertEqual(
  88. os.path.commonprefix([self.primary_base_path, local_path]),
  89. self.primary_base_path,
  90. )
  91. with open(local_path) as f:
  92. body = f.read()
  93. self.assertEqual(test_body, body)
  94. @attr.s(auto_attribs=True, slots=True, frozen=True)
  95. class _TestImage:
  96. """An image for testing thumbnailing with the expected results
  97. Attributes:
  98. data: The raw image to thumbnail
  99. content_type: The type of the image as a content type, e.g. "image/png"
  100. extension: The extension associated with the format, e.g. ".png"
  101. expected_cropped: The expected bytes from cropped thumbnailing, or None if
  102. test should just check for success.
  103. expected_scaled: The expected bytes from scaled thumbnailing, or None if
  104. test should just check for a valid image returned.
  105. expected_found: True if the file should exist on the server, or False if
  106. a 404/400 is expected.
  107. unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
  108. False if the thumbnailing should succeed or a normal 404 is expected.
  109. is_inline: True if we expect the file to be served using an inline
  110. Content-Disposition or False if we expect an attachment.
  111. """
  112. data: bytes
  113. content_type: bytes
  114. extension: bytes
  115. expected_cropped: Optional[bytes] = None
  116. expected_scaled: Optional[bytes] = None
  117. expected_found: bool = True
  118. unable_to_thumbnail: bool = False
  119. is_inline: bool = True
  120. @parameterized_class(
  121. ("test_image",),
  122. [
  123. # small png
  124. (
  125. _TestImage(
  126. SMALL_PNG,
  127. b"image/png",
  128. b".png",
  129. unhexlify(
  130. b"89504e470d0a1a0a0000000d4948445200000020000000200806"
  131. b"000000737a7af40000001a49444154789cedc101010000008220"
  132. b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
  133. b"44ae426082"
  134. ),
  135. unhexlify(
  136. b"89504e470d0a1a0a0000000d4948445200000001000000010806"
  137. b"0000001f15c4890000000d49444154789c636060606000000005"
  138. b"0001a5f645400000000049454e44ae426082"
  139. ),
  140. ),
  141. ),
  142. # small png with transparency.
  143. (
  144. _TestImage(
  145. unhexlify(
  146. b"89504e470d0a1a0a0000000d49484452000000010000000101000"
  147. b"00000376ef9240000000274524e5300010194fdae0000000a4944"
  148. b"4154789c636800000082008177cd72b60000000049454e44ae426"
  149. b"082"
  150. ),
  151. b"image/png",
  152. b".png",
  153. # Note that we don't check the output since it varies across
  154. # different versions of Pillow.
  155. ),
  156. ),
  157. # small lossless webp
  158. (
  159. _TestImage(
  160. unhexlify(
  161. b"524946461a000000574542505650384c0d0000002f0000001007"
  162. b"1011118888fe0700"
  163. ),
  164. b"image/webp",
  165. b".webp",
  166. ),
  167. ),
  168. # an empty file
  169. (
  170. _TestImage(
  171. b"",
  172. b"image/gif",
  173. b".gif",
  174. expected_found=False,
  175. unable_to_thumbnail=True,
  176. ),
  177. ),
  178. # An SVG.
  179. (
  180. _TestImage(
  181. b"""<?xml version="1.0"?>
  182. <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  183. "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  184. <svg xmlns="http://www.w3.org/2000/svg"
  185. width="400" height="400">
  186. <circle cx="100" cy="100" r="50" stroke="black"
  187. stroke-width="5" fill="red" />
  188. </svg>""",
  189. b"image/svg",
  190. b".svg",
  191. expected_found=False,
  192. unable_to_thumbnail=True,
  193. is_inline=False,
  194. ),
  195. ),
  196. ],
  197. )
  198. class MediaRepoTests(unittest.HomeserverTestCase):
  199. test_image: ClassVar[_TestImage]
  200. hijack_auth = True
  201. user_id = "@test:user"
  202. def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
  203. self.fetches: List[
  204. Tuple[
  205. "Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]]",
  206. str,
  207. str,
  208. Optional[QueryParams],
  209. ]
  210. ] = []
  211. def get_file(
  212. destination: str,
  213. path: str,
  214. output_stream: BinaryIO,
  215. args: Optional[QueryParams] = None,
  216. retry_on_dns_fail: bool = True,
  217. max_size: Optional[int] = None,
  218. ignore_backoff: bool = False,
  219. follow_redirects: bool = False,
  220. ) -> "Deferred[Tuple[int, Dict[bytes, List[bytes]]]]":
  221. """A mock for MatrixFederationHttpClient.get_file."""
  222. def write_to(
  223. r: Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]
  224. ) -> Tuple[int, Dict[bytes, List[bytes]]]:
  225. data, response = r
  226. output_stream.write(data)
  227. return response
  228. def write_err(f: Failure) -> Failure:
  229. f.trap(HttpResponseException)
  230. output_stream.write(f.value.response)
  231. return f
  232. d: Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]] = Deferred()
  233. self.fetches.append((d, destination, path, args))
  234. # Note that this callback changes the value held by d.
  235. d_after_callback = d.addCallbacks(write_to, write_err)
  236. return make_deferred_yieldable(d_after_callback)
  237. # Mock out the homeserver's MatrixFederationHttpClient
  238. client = Mock()
  239. client.get_file = get_file
  240. self.storage_path = self.mktemp()
  241. self.media_store_path = self.mktemp()
  242. os.mkdir(self.storage_path)
  243. os.mkdir(self.media_store_path)
  244. config = self.default_config()
  245. config["media_store_path"] = self.media_store_path
  246. config["max_image_pixels"] = 2000000
  247. provider_config = {
  248. "module": "synapse.media.storage_provider.FileStorageProviderBackend",
  249. "store_local": True,
  250. "store_synchronous": False,
  251. "store_remote": True,
  252. "config": {"directory": self.storage_path},
  253. }
  254. config["media_storage_providers"] = [provider_config]
  255. hs = self.setup_test_homeserver(config=config, federation_http_client=client)
  256. return hs
  257. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  258. self.store = hs.get_datastores().main
  259. self.media_repo = hs.get_media_repository()
  260. self.media_id = "example.com/12345"
  261. def create_resource_dict(self) -> Dict[str, Resource]:
  262. resources = super().create_resource_dict()
  263. resources["/_matrix/media"] = self.hs.get_media_repository_resource()
  264. return resources
  265. def _req(
  266. self, content_disposition: Optional[bytes], include_content_type: bool = True
  267. ) -> FakeChannel:
  268. channel = self.make_request(
  269. "GET",
  270. f"/_matrix/media/v3/download/{self.media_id}",
  271. shorthand=False,
  272. await_result=False,
  273. )
  274. self.pump()
  275. # We've made one fetch, to example.com, using the media URL, and asking
  276. # the other server not to do a remote fetch
  277. self.assertEqual(len(self.fetches), 1)
  278. self.assertEqual(self.fetches[0][1], "example.com")
  279. self.assertEqual(
  280. self.fetches[0][2], "/_matrix/media/v3/download/" + self.media_id
  281. )
  282. self.assertEqual(
  283. self.fetches[0][3],
  284. {"allow_remote": "false", "timeout_ms": "20000", "allow_redirect": "true"},
  285. )
  286. headers = {
  287. b"Content-Length": [b"%d" % (len(self.test_image.data))],
  288. }
  289. if include_content_type:
  290. headers[b"Content-Type"] = [self.test_image.content_type]
  291. if content_disposition:
  292. headers[b"Content-Disposition"] = [content_disposition]
  293. self.fetches[0][0].callback(
  294. (self.test_image.data, (len(self.test_image.data), headers))
  295. )
  296. self.pump()
  297. self.assertEqual(channel.code, 200)
  298. return channel
  299. def test_handle_missing_content_type(self) -> None:
  300. channel = self._req(
  301. b"attachment; filename=out" + self.test_image.extension,
  302. include_content_type=False,
  303. )
  304. headers = channel.headers
  305. self.assertEqual(channel.code, 200)
  306. self.assertEqual(
  307. headers.getRawHeaders(b"Content-Type"), [b"application/octet-stream"]
  308. )
  309. def test_disposition_filename_ascii(self) -> None:
  310. """
  311. If the filename is filename=<ascii> then Synapse will decode it as an
  312. ASCII string, and use filename= in the response.
  313. """
  314. channel = self._req(b"attachment; filename=out" + self.test_image.extension)
  315. headers = channel.headers
  316. self.assertEqual(
  317. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  318. )
  319. self.assertEqual(
  320. headers.getRawHeaders(b"Content-Disposition"),
  321. [
  322. (b"inline" if self.test_image.is_inline else b"attachment")
  323. + b"; filename=out"
  324. + self.test_image.extension
  325. ],
  326. )
  327. def test_disposition_filenamestar_utf8escaped(self) -> None:
  328. """
  329. If the filename is filename=*utf8''<utf8 escaped> then Synapse will
  330. correctly decode it as the UTF-8 string, and use filename* in the
  331. response.
  332. """
  333. filename = parse.quote("\u2603".encode()).encode("ascii")
  334. channel = self._req(
  335. b"attachment; filename*=utf-8''" + filename + self.test_image.extension
  336. )
  337. headers = channel.headers
  338. self.assertEqual(
  339. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  340. )
  341. self.assertEqual(
  342. headers.getRawHeaders(b"Content-Disposition"),
  343. [
  344. (b"inline" if self.test_image.is_inline else b"attachment")
  345. + b"; filename*=utf-8''"
  346. + filename
  347. + self.test_image.extension
  348. ],
  349. )
  350. def test_disposition_none(self) -> None:
  351. """
  352. If there is no filename, Content-Disposition should only
  353. be a disposition type.
  354. """
  355. channel = self._req(None)
  356. headers = channel.headers
  357. self.assertEqual(
  358. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  359. )
  360. self.assertEqual(
  361. headers.getRawHeaders(b"Content-Disposition"),
  362. [b"inline" if self.test_image.is_inline else b"attachment"],
  363. )
  364. def test_thumbnail_crop(self) -> None:
  365. """Test that a cropped remote thumbnail is available."""
  366. self._test_thumbnail(
  367. "crop",
  368. self.test_image.expected_cropped,
  369. expected_found=self.test_image.expected_found,
  370. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  371. )
  372. def test_thumbnail_scale(self) -> None:
  373. """Test that a scaled remote thumbnail is available."""
  374. self._test_thumbnail(
  375. "scale",
  376. self.test_image.expected_scaled,
  377. expected_found=self.test_image.expected_found,
  378. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  379. )
  380. def test_invalid_type(self) -> None:
  381. """An invalid thumbnail type is never available."""
  382. self._test_thumbnail(
  383. "invalid",
  384. None,
  385. expected_found=False,
  386. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  387. )
  388. @unittest.override_config(
  389. {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "scale"}]}
  390. )
  391. def test_no_thumbnail_crop(self) -> None:
  392. """
  393. Override the config to generate only scaled thumbnails, but request a cropped one.
  394. """
  395. self._test_thumbnail(
  396. "crop",
  397. None,
  398. expected_found=False,
  399. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  400. )
  401. @unittest.override_config(
  402. {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "crop"}]}
  403. )
  404. def test_no_thumbnail_scale(self) -> None:
  405. """
  406. Override the config to generate only cropped thumbnails, but request a scaled one.
  407. """
  408. self._test_thumbnail(
  409. "scale",
  410. None,
  411. expected_found=False,
  412. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  413. )
  414. def test_thumbnail_repeated_thumbnail(self) -> None:
  415. """Test that fetching the same thumbnail works, and deleting the on disk
  416. thumbnail regenerates it.
  417. """
  418. self._test_thumbnail(
  419. "scale",
  420. self.test_image.expected_scaled,
  421. expected_found=self.test_image.expected_found,
  422. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  423. )
  424. if not self.test_image.expected_found:
  425. return
  426. # Fetching again should work, without re-requesting the image from the
  427. # remote.
  428. params = "?width=32&height=32&method=scale"
  429. channel = self.make_request(
  430. "GET",
  431. f"/_matrix/media/v3/thumbnail/{self.media_id}{params}",
  432. shorthand=False,
  433. await_result=False,
  434. )
  435. self.pump()
  436. self.assertEqual(channel.code, 200)
  437. if self.test_image.expected_scaled:
  438. self.assertEqual(
  439. channel.result["body"],
  440. self.test_image.expected_scaled,
  441. channel.result["body"],
  442. )
  443. # Deleting the thumbnail on disk then re-requesting it should work as
  444. # Synapse should regenerate missing thumbnails.
  445. origin, media_id = self.media_id.split("/")
  446. info = self.get_success(self.store.get_cached_remote_media(origin, media_id))
  447. assert info is not None
  448. file_id = info.filesystem_id
  449. thumbnail_dir = self.media_repo.filepaths.remote_media_thumbnail_dir(
  450. origin, file_id
  451. )
  452. shutil.rmtree(thumbnail_dir, ignore_errors=True)
  453. channel = self.make_request(
  454. "GET",
  455. f"/_matrix/media/v3/thumbnail/{self.media_id}{params}",
  456. shorthand=False,
  457. await_result=False,
  458. )
  459. self.pump()
  460. self.assertEqual(channel.code, 200)
  461. if self.test_image.expected_scaled:
  462. self.assertEqual(
  463. channel.result["body"],
  464. self.test_image.expected_scaled,
  465. channel.result["body"],
  466. )
  467. def _test_thumbnail(
  468. self,
  469. method: str,
  470. expected_body: Optional[bytes],
  471. expected_found: bool,
  472. unable_to_thumbnail: bool = False,
  473. ) -> None:
  474. """Test the given thumbnailing method works as expected.
  475. Args:
  476. method: The thumbnailing method to use (crop, scale).
  477. expected_body: The expected bytes from thumbnailing, or None if
  478. test should just check for a valid image.
  479. expected_found: True if the file should exist on the server, or False if
  480. a 404/400 is expected.
  481. unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
  482. False if the thumbnailing should succeed or a normal 404 is expected.
  483. """
  484. params = "?width=32&height=32&method=" + method
  485. channel = self.make_request(
  486. "GET",
  487. f"/_matrix/media/r0/thumbnail/{self.media_id}{params}",
  488. shorthand=False,
  489. await_result=False,
  490. )
  491. self.pump()
  492. headers = {
  493. b"Content-Length": [b"%d" % (len(self.test_image.data))],
  494. b"Content-Type": [self.test_image.content_type],
  495. }
  496. self.fetches[0][0].callback(
  497. (self.test_image.data, (len(self.test_image.data), headers))
  498. )
  499. self.pump()
  500. if expected_found:
  501. self.assertEqual(channel.code, 200)
  502. self.assertEqual(
  503. channel.headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
  504. [b"cross-origin"],
  505. )
  506. if expected_body is not None:
  507. self.assertEqual(
  508. channel.result["body"], expected_body, channel.result["body"]
  509. )
  510. else:
  511. # ensure that the result is at least some valid image
  512. Image.open(BytesIO(channel.result["body"]))
  513. elif unable_to_thumbnail:
  514. # A 400 with a JSON body.
  515. self.assertEqual(channel.code, 400)
  516. self.assertEqual(
  517. channel.json_body,
  518. {
  519. "errcode": "M_UNKNOWN",
  520. "error": "Cannot find any thumbnails for the requested media ('/_matrix/media/r0/thumbnail/example.com/12345'). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
  521. },
  522. )
  523. else:
  524. # A 404 with a JSON body.
  525. self.assertEqual(channel.code, 404)
  526. self.assertEqual(
  527. channel.json_body,
  528. {
  529. "errcode": "M_NOT_FOUND",
  530. "error": "Not found '/_matrix/media/r0/thumbnail/example.com/12345'",
  531. },
  532. )
  533. @parameterized.expand([("crop", 16), ("crop", 64), ("scale", 16), ("scale", 64)])
  534. def test_same_quality(self, method: str, desired_size: int) -> None:
  535. """Test that choosing between thumbnails with the same quality rating succeeds.
  536. We are not particular about which thumbnail is chosen."""
  537. content_type = self.test_image.content_type.decode()
  538. media_repo = self.hs.get_media_repository()
  539. thumbnail_resouce = ThumbnailResource(
  540. self.hs, media_repo, media_repo.media_storage
  541. )
  542. self.assertIsNotNone(
  543. thumbnail_resouce._select_thumbnail(
  544. desired_width=desired_size,
  545. desired_height=desired_size,
  546. desired_method=method,
  547. desired_type=content_type,
  548. # Provide two identical thumbnails which are guaranteed to have the same
  549. # quality rating.
  550. thumbnail_infos=[
  551. ThumbnailInfo(
  552. width=32,
  553. height=32,
  554. method=method,
  555. type=content_type,
  556. length=256,
  557. ),
  558. ThumbnailInfo(
  559. width=32,
  560. height=32,
  561. method=method,
  562. type=content_type,
  563. length=256,
  564. ),
  565. ],
  566. file_id=f"image{self.test_image.extension.decode()}",
  567. url_cache=False,
  568. server_name=None,
  569. )
  570. )
  571. def test_x_robots_tag_header(self) -> None:
  572. """
  573. Tests that the `X-Robots-Tag` header is present, which informs web crawlers
  574. to not index, archive, or follow links in media.
  575. """
  576. channel = self._req(b"attachment; filename=out" + self.test_image.extension)
  577. headers = channel.headers
  578. self.assertEqual(
  579. headers.getRawHeaders(b"X-Robots-Tag"),
  580. [b"noindex, nofollow, noarchive, noimageindex"],
  581. )
  582. def test_cross_origin_resource_policy_header(self) -> None:
  583. """
  584. Test that the Cross-Origin-Resource-Policy header is set to "cross-origin"
  585. allowing web clients to embed media from the downloads API.
  586. """
  587. channel = self._req(b"attachment; filename=out" + self.test_image.extension)
  588. headers = channel.headers
  589. self.assertEqual(
  590. headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
  591. [b"cross-origin"],
  592. )
  593. def test_unknown_v3_endpoint(self) -> None:
  594. """
  595. If the v3 endpoint fails, try the r0 one.
  596. """
  597. channel = self.make_request(
  598. "GET",
  599. f"/_matrix/media/v3/download/{self.media_id}",
  600. shorthand=False,
  601. await_result=False,
  602. )
  603. self.pump()
  604. # We've made one fetch, to example.com, using the media URL, and asking
  605. # the other server not to do a remote fetch
  606. self.assertEqual(len(self.fetches), 1)
  607. self.assertEqual(self.fetches[0][1], "example.com")
  608. self.assertEqual(
  609. self.fetches[0][2], "/_matrix/media/v3/download/" + self.media_id
  610. )
  611. # The result which says the endpoint is unknown.
  612. unknown_endpoint = b'{"errcode":"M_UNRECOGNIZED","error":"Unknown request"}'
  613. self.fetches[0][0].errback(
  614. HttpResponseException(404, "NOT FOUND", unknown_endpoint)
  615. )
  616. self.pump()
  617. # There should now be another request to the r0 URL.
  618. self.assertEqual(len(self.fetches), 2)
  619. self.assertEqual(self.fetches[1][1], "example.com")
  620. self.assertEqual(
  621. self.fetches[1][2], f"/_matrix/media/r0/download/{self.media_id}"
  622. )
  623. headers = {
  624. b"Content-Length": [b"%d" % (len(self.test_image.data))],
  625. }
  626. self.fetches[1][0].callback(
  627. (self.test_image.data, (len(self.test_image.data), headers))
  628. )
  629. self.pump()
  630. self.assertEqual(channel.code, 200)
  631. class TestSpamCheckerLegacy:
  632. """A spam checker module that rejects all media that includes the bytes
  633. `evil`.
  634. Uses the legacy Spam-Checker API.
  635. """
  636. def __init__(self, config: Dict[str, Any], api: ModuleApi) -> None:
  637. self.config = config
  638. self.api = api
  639. @staticmethod
  640. def parse_config(config: Dict[str, Any]) -> Dict[str, Any]:
  641. return config
  642. async def check_event_for_spam(self, event: EventBase) -> Union[bool, str]:
  643. return False # allow all events
  644. async def user_may_invite(
  645. self,
  646. inviter_userid: str,
  647. invitee_userid: str,
  648. room_id: str,
  649. ) -> bool:
  650. return True # allow all invites
  651. async def user_may_create_room(self, userid: str) -> bool:
  652. return True # allow all room creations
  653. async def user_may_create_room_alias(
  654. self, userid: str, room_alias: RoomAlias
  655. ) -> bool:
  656. return True # allow all room aliases
  657. async def user_may_publish_room(self, userid: str, room_id: str) -> bool:
  658. return True # allow publishing of all rooms
  659. async def check_media_file_for_spam(
  660. self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
  661. ) -> bool:
  662. buf = BytesIO()
  663. await file_wrapper.write_chunks_to(buf.write)
  664. return b"evil" in buf.getvalue()
  665. class SpamCheckerTestCaseLegacy(unittest.HomeserverTestCase):
  666. servlets = [
  667. login.register_servlets,
  668. admin.register_servlets,
  669. ]
  670. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  671. self.user = self.register_user("user", "pass")
  672. self.tok = self.login("user", "pass")
  673. load_legacy_spam_checkers(hs)
  674. def create_resource_dict(self) -> Dict[str, Resource]:
  675. resources = super().create_resource_dict()
  676. resources["/_matrix/media"] = self.hs.get_media_repository_resource()
  677. return resources
  678. def default_config(self) -> Dict[str, Any]:
  679. config = default_config("test")
  680. config.update(
  681. {
  682. "spam_checker": [
  683. {
  684. "module": TestSpamCheckerLegacy.__module__
  685. + ".TestSpamCheckerLegacy",
  686. "config": {},
  687. }
  688. ]
  689. }
  690. )
  691. return config
  692. def test_upload_innocent(self) -> None:
  693. """Attempt to upload some innocent data that should be allowed."""
  694. self.helper.upload_media(SMALL_PNG, tok=self.tok, expect_code=200)
  695. def test_upload_ban(self) -> None:
  696. """Attempt to upload some data that includes bytes "evil", which should
  697. get rejected by the spam checker.
  698. """
  699. data = b"Some evil data"
  700. self.helper.upload_media(data, tok=self.tok, expect_code=400)
  701. EVIL_DATA = b"Some evil data"
  702. EVIL_DATA_EXPERIMENT = b"Some evil data to trigger the experimental tuple API"
  703. class SpamCheckerTestCase(unittest.HomeserverTestCase):
  704. servlets = [
  705. login.register_servlets,
  706. admin.register_servlets,
  707. ]
  708. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  709. self.user = self.register_user("user", "pass")
  710. self.tok = self.login("user", "pass")
  711. hs.get_module_api().register_spam_checker_callbacks(
  712. check_media_file_for_spam=self.check_media_file_for_spam
  713. )
  714. def create_resource_dict(self) -> Dict[str, Resource]:
  715. resources = super().create_resource_dict()
  716. resources["/_matrix/media"] = self.hs.get_media_repository_resource()
  717. return resources
  718. async def check_media_file_for_spam(
  719. self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
  720. ) -> Union[Codes, Literal["NOT_SPAM"], Tuple[Codes, JsonDict]]:
  721. buf = BytesIO()
  722. await file_wrapper.write_chunks_to(buf.write)
  723. if buf.getvalue() == EVIL_DATA:
  724. return Codes.FORBIDDEN
  725. elif buf.getvalue() == EVIL_DATA_EXPERIMENT:
  726. return (Codes.FORBIDDEN, {})
  727. else:
  728. return "NOT_SPAM"
  729. def test_upload_innocent(self) -> None:
  730. """Attempt to upload some innocent data that should be allowed."""
  731. self.helper.upload_media(SMALL_PNG, tok=self.tok, expect_code=200)
  732. def test_upload_ban(self) -> None:
  733. """Attempt to upload some data that includes bytes "evil", which should
  734. get rejected by the spam checker.
  735. """
  736. self.helper.upload_media(EVIL_DATA, tok=self.tok, expect_code=400)
  737. self.helper.upload_media(
  738. EVIL_DATA_EXPERIMENT,
  739. tok=self.tok,
  740. expect_code=400,
  741. )