test_media_storage.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792
  1. # Copyright 2018-2021 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import shutil
  16. import tempfile
  17. from binascii import unhexlify
  18. from io import BytesIO
  19. from typing import Any, BinaryIO, ClassVar, Dict, List, Optional, Tuple, Union
  20. from unittest.mock import Mock
  21. from urllib import parse
  22. import attr
  23. from parameterized import parameterized, parameterized_class
  24. from PIL import Image as Image
  25. from typing_extensions import Literal
  26. from twisted.internet import defer
  27. from twisted.internet.defer import Deferred
  28. from twisted.test.proto_helpers import MemoryReactor
  29. from synapse.api.errors import Codes
  30. from synapse.events import EventBase
  31. from synapse.events.spamcheck import load_legacy_spam_checkers
  32. from synapse.http.types import QueryParams
  33. from synapse.logging.context import make_deferred_yieldable
  34. from synapse.media._base import FileInfo
  35. from synapse.media.filepath import MediaFilePaths
  36. from synapse.media.media_storage import MediaStorage, ReadableFileWrapper
  37. from synapse.media.storage_provider import FileStorageProviderBackend
  38. from synapse.module_api import ModuleApi
  39. from synapse.rest import admin
  40. from synapse.rest.client import login
  41. from synapse.server import HomeServer
  42. from synapse.types import JsonDict, RoomAlias
  43. from synapse.util import Clock
  44. from tests import unittest
  45. from tests.server import FakeChannel, FakeSite, make_request
  46. from tests.test_utils import SMALL_PNG
  47. from tests.utils import default_config
  48. class MediaStorageTests(unittest.HomeserverTestCase):
  49. needs_threadpool = True
  50. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  51. self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-")
  52. self.addCleanup(shutil.rmtree, self.test_dir)
  53. self.primary_base_path = os.path.join(self.test_dir, "primary")
  54. self.secondary_base_path = os.path.join(self.test_dir, "secondary")
  55. hs.config.media.media_store_path = self.primary_base_path
  56. storage_providers = [FileStorageProviderBackend(hs, self.secondary_base_path)]
  57. self.filepaths = MediaFilePaths(self.primary_base_path)
  58. self.media_storage = MediaStorage(
  59. hs, self.primary_base_path, self.filepaths, storage_providers
  60. )
  61. def test_ensure_media_is_in_local_cache(self) -> None:
  62. media_id = "some_media_id"
  63. test_body = "Test\n"
  64. # First we create a file that is in a storage provider but not in the
  65. # local primary media store
  66. rel_path = self.filepaths.local_media_filepath_rel(media_id)
  67. secondary_path = os.path.join(self.secondary_base_path, rel_path)
  68. os.makedirs(os.path.dirname(secondary_path))
  69. with open(secondary_path, "w") as f:
  70. f.write(test_body)
  71. # Now we run ensure_media_is_in_local_cache, which should copy the file
  72. # to the local cache.
  73. file_info = FileInfo(None, media_id)
  74. # This uses a real blocking threadpool so we have to wait for it to be
  75. # actually done :/
  76. x = defer.ensureDeferred(
  77. self.media_storage.ensure_media_is_in_local_cache(file_info)
  78. )
  79. # Hotloop until the threadpool does its job...
  80. self.wait_on_thread(x)
  81. local_path = self.get_success(x)
  82. self.assertTrue(os.path.exists(local_path))
  83. # Asserts the file is under the expected local cache directory
  84. self.assertEqual(
  85. os.path.commonprefix([self.primary_base_path, local_path]),
  86. self.primary_base_path,
  87. )
  88. with open(local_path) as f:
  89. body = f.read()
  90. self.assertEqual(test_body, body)
  91. @attr.s(auto_attribs=True, slots=True, frozen=True)
  92. class _TestImage:
  93. """An image for testing thumbnailing with the expected results
  94. Attributes:
  95. data: The raw image to thumbnail
  96. content_type: The type of the image as a content type, e.g. "image/png"
  97. extension: The extension associated with the format, e.g. ".png"
  98. expected_cropped: The expected bytes from cropped thumbnailing, or None if
  99. test should just check for success.
  100. expected_scaled: The expected bytes from scaled thumbnailing, or None if
  101. test should just check for a valid image returned.
  102. expected_found: True if the file should exist on the server, or False if
  103. a 404/400 is expected.
  104. unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
  105. False if the thumbnailing should succeed or a normal 404 is expected.
  106. """
  107. data: bytes
  108. content_type: bytes
  109. extension: bytes
  110. expected_cropped: Optional[bytes] = None
  111. expected_scaled: Optional[bytes] = None
  112. expected_found: bool = True
  113. unable_to_thumbnail: bool = False
  114. @parameterized_class(
  115. ("test_image",),
  116. [
  117. # small png
  118. (
  119. _TestImage(
  120. SMALL_PNG,
  121. b"image/png",
  122. b".png",
  123. unhexlify(
  124. b"89504e470d0a1a0a0000000d4948445200000020000000200806"
  125. b"000000737a7af40000001a49444154789cedc101010000008220"
  126. b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
  127. b"44ae426082"
  128. ),
  129. unhexlify(
  130. b"89504e470d0a1a0a0000000d4948445200000001000000010806"
  131. b"0000001f15c4890000000d49444154789c636060606000000005"
  132. b"0001a5f645400000000049454e44ae426082"
  133. ),
  134. ),
  135. ),
  136. # small png with transparency.
  137. (
  138. _TestImage(
  139. unhexlify(
  140. b"89504e470d0a1a0a0000000d49484452000000010000000101000"
  141. b"00000376ef9240000000274524e5300010194fdae0000000a4944"
  142. b"4154789c636800000082008177cd72b60000000049454e44ae426"
  143. b"082"
  144. ),
  145. b"image/png",
  146. b".png",
  147. # Note that we don't check the output since it varies across
  148. # different versions of Pillow.
  149. ),
  150. ),
  151. # small lossless webp
  152. (
  153. _TestImage(
  154. unhexlify(
  155. b"524946461a000000574542505650384c0d0000002f0000001007"
  156. b"1011118888fe0700"
  157. ),
  158. b"image/webp",
  159. b".webp",
  160. ),
  161. ),
  162. # an empty file
  163. (
  164. _TestImage(
  165. b"",
  166. b"image/gif",
  167. b".gif",
  168. expected_found=False,
  169. unable_to_thumbnail=True,
  170. ),
  171. ),
  172. ],
  173. )
  174. class MediaRepoTests(unittest.HomeserverTestCase):
  175. test_image: ClassVar[_TestImage]
  176. hijack_auth = True
  177. user_id = "@test:user"
  178. def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
  179. self.fetches: List[
  180. Tuple[
  181. "Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]]",
  182. str,
  183. str,
  184. Optional[QueryParams],
  185. ]
  186. ] = []
  187. def get_file(
  188. destination: str,
  189. path: str,
  190. output_stream: BinaryIO,
  191. args: Optional[QueryParams] = None,
  192. retry_on_dns_fail: bool = True,
  193. max_size: Optional[int] = None,
  194. ignore_backoff: bool = False,
  195. ) -> "Deferred[Tuple[int, Dict[bytes, List[bytes]]]]":
  196. """A mock for MatrixFederationHttpClient.get_file."""
  197. def write_to(
  198. r: Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]
  199. ) -> Tuple[int, Dict[bytes, List[bytes]]]:
  200. data, response = r
  201. output_stream.write(data)
  202. return response
  203. d: Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]] = Deferred()
  204. self.fetches.append((d, destination, path, args))
  205. # Note that this callback changes the value held by d.
  206. d_after_callback = d.addCallback(write_to)
  207. return make_deferred_yieldable(d_after_callback)
  208. # Mock out the homeserver's MatrixFederationHttpClient
  209. client = Mock()
  210. client.get_file = get_file
  211. self.storage_path = self.mktemp()
  212. self.media_store_path = self.mktemp()
  213. os.mkdir(self.storage_path)
  214. os.mkdir(self.media_store_path)
  215. config = self.default_config()
  216. config["media_store_path"] = self.media_store_path
  217. config["max_image_pixels"] = 2000000
  218. provider_config = {
  219. "module": "synapse.media.storage_provider.FileStorageProviderBackend",
  220. "store_local": True,
  221. "store_synchronous": False,
  222. "store_remote": True,
  223. "config": {"directory": self.storage_path},
  224. }
  225. config["media_storage_providers"] = [provider_config]
  226. hs = self.setup_test_homeserver(config=config, federation_http_client=client)
  227. return hs
  228. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  229. media_resource = hs.get_media_repository_resource()
  230. self.download_resource = media_resource.children[b"download"]
  231. self.thumbnail_resource = media_resource.children[b"thumbnail"]
  232. self.store = hs.get_datastores().main
  233. self.media_repo = hs.get_media_repository()
  234. self.media_id = "example.com/12345"
  235. def _req(
  236. self, content_disposition: Optional[bytes], include_content_type: bool = True
  237. ) -> FakeChannel:
  238. channel = make_request(
  239. self.reactor,
  240. FakeSite(self.download_resource, self.reactor),
  241. "GET",
  242. self.media_id,
  243. shorthand=False,
  244. await_result=False,
  245. )
  246. self.pump()
  247. # We've made one fetch, to example.com, using the media URL, and asking
  248. # the other server not to do a remote fetch
  249. self.assertEqual(len(self.fetches), 1)
  250. self.assertEqual(self.fetches[0][1], "example.com")
  251. self.assertEqual(
  252. self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id
  253. )
  254. self.assertEqual(self.fetches[0][3], {"allow_remote": "false"})
  255. headers = {
  256. b"Content-Length": [b"%d" % (len(self.test_image.data))],
  257. }
  258. if include_content_type:
  259. headers[b"Content-Type"] = [self.test_image.content_type]
  260. if content_disposition:
  261. headers[b"Content-Disposition"] = [content_disposition]
  262. self.fetches[0][0].callback(
  263. (self.test_image.data, (len(self.test_image.data), headers))
  264. )
  265. self.pump()
  266. self.assertEqual(channel.code, 200)
  267. return channel
  268. def test_handle_missing_content_type(self) -> None:
  269. channel = self._req(
  270. b"inline; filename=out" + self.test_image.extension,
  271. include_content_type=False,
  272. )
  273. headers = channel.headers
  274. self.assertEqual(channel.code, 200)
  275. self.assertEqual(
  276. headers.getRawHeaders(b"Content-Type"), [b"application/octet-stream"]
  277. )
  278. def test_disposition_filename_ascii(self) -> None:
  279. """
  280. If the filename is filename=<ascii> then Synapse will decode it as an
  281. ASCII string, and use filename= in the response.
  282. """
  283. channel = self._req(b"inline; filename=out" + self.test_image.extension)
  284. headers = channel.headers
  285. self.assertEqual(
  286. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  287. )
  288. self.assertEqual(
  289. headers.getRawHeaders(b"Content-Disposition"),
  290. [b"inline; filename=out" + self.test_image.extension],
  291. )
  292. def test_disposition_filenamestar_utf8escaped(self) -> None:
  293. """
  294. If the filename is filename=*utf8''<utf8 escaped> then Synapse will
  295. correctly decode it as the UTF-8 string, and use filename* in the
  296. response.
  297. """
  298. filename = parse.quote("\u2603".encode()).encode("ascii")
  299. channel = self._req(
  300. b"inline; filename*=utf-8''" + filename + self.test_image.extension
  301. )
  302. headers = channel.headers
  303. self.assertEqual(
  304. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  305. )
  306. self.assertEqual(
  307. headers.getRawHeaders(b"Content-Disposition"),
  308. [b"inline; filename*=utf-8''" + filename + self.test_image.extension],
  309. )
  310. def test_disposition_none(self) -> None:
  311. """
  312. If there is no filename, one isn't passed on in the Content-Disposition
  313. of the request.
  314. """
  315. channel = self._req(None)
  316. headers = channel.headers
  317. self.assertEqual(
  318. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  319. )
  320. self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), None)
  321. def test_thumbnail_crop(self) -> None:
  322. """Test that a cropped remote thumbnail is available."""
  323. self._test_thumbnail(
  324. "crop",
  325. self.test_image.expected_cropped,
  326. expected_found=self.test_image.expected_found,
  327. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  328. )
  329. def test_thumbnail_scale(self) -> None:
  330. """Test that a scaled remote thumbnail is available."""
  331. self._test_thumbnail(
  332. "scale",
  333. self.test_image.expected_scaled,
  334. expected_found=self.test_image.expected_found,
  335. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  336. )
  337. def test_invalid_type(self) -> None:
  338. """An invalid thumbnail type is never available."""
  339. self._test_thumbnail(
  340. "invalid",
  341. None,
  342. expected_found=False,
  343. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  344. )
  345. @unittest.override_config(
  346. {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "scale"}]}
  347. )
  348. def test_no_thumbnail_crop(self) -> None:
  349. """
  350. Override the config to generate only scaled thumbnails, but request a cropped one.
  351. """
  352. self._test_thumbnail(
  353. "crop",
  354. None,
  355. expected_found=False,
  356. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  357. )
  358. @unittest.override_config(
  359. {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "crop"}]}
  360. )
  361. def test_no_thumbnail_scale(self) -> None:
  362. """
  363. Override the config to generate only cropped thumbnails, but request a scaled one.
  364. """
  365. self._test_thumbnail(
  366. "scale",
  367. None,
  368. expected_found=False,
  369. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  370. )
  371. def test_thumbnail_repeated_thumbnail(self) -> None:
  372. """Test that fetching the same thumbnail works, and deleting the on disk
  373. thumbnail regenerates it.
  374. """
  375. self._test_thumbnail(
  376. "scale",
  377. self.test_image.expected_scaled,
  378. expected_found=self.test_image.expected_found,
  379. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  380. )
  381. if not self.test_image.expected_found:
  382. return
  383. # Fetching again should work, without re-requesting the image from the
  384. # remote.
  385. params = "?width=32&height=32&method=scale"
  386. channel = make_request(
  387. self.reactor,
  388. FakeSite(self.thumbnail_resource, self.reactor),
  389. "GET",
  390. self.media_id + params,
  391. shorthand=False,
  392. await_result=False,
  393. )
  394. self.pump()
  395. self.assertEqual(channel.code, 200)
  396. if self.test_image.expected_scaled:
  397. self.assertEqual(
  398. channel.result["body"],
  399. self.test_image.expected_scaled,
  400. channel.result["body"],
  401. )
  402. # Deleting the thumbnail on disk then re-requesting it should work as
  403. # Synapse should regenerate missing thumbnails.
  404. origin, media_id = self.media_id.split("/")
  405. info = self.get_success(self.store.get_cached_remote_media(origin, media_id))
  406. assert info is not None
  407. file_id = info["filesystem_id"]
  408. thumbnail_dir = self.media_repo.filepaths.remote_media_thumbnail_dir(
  409. origin, file_id
  410. )
  411. shutil.rmtree(thumbnail_dir, ignore_errors=True)
  412. channel = make_request(
  413. self.reactor,
  414. FakeSite(self.thumbnail_resource, self.reactor),
  415. "GET",
  416. self.media_id + params,
  417. shorthand=False,
  418. await_result=False,
  419. )
  420. self.pump()
  421. self.assertEqual(channel.code, 200)
  422. if self.test_image.expected_scaled:
  423. self.assertEqual(
  424. channel.result["body"],
  425. self.test_image.expected_scaled,
  426. channel.result["body"],
  427. )
  428. def _test_thumbnail(
  429. self,
  430. method: str,
  431. expected_body: Optional[bytes],
  432. expected_found: bool,
  433. unable_to_thumbnail: bool = False,
  434. ) -> None:
  435. """Test the given thumbnailing method works as expected.
  436. Args:
  437. method: The thumbnailing method to use (crop, scale).
  438. expected_body: The expected bytes from thumbnailing, or None if
  439. test should just check for a valid image.
  440. expected_found: True if the file should exist on the server, or False if
  441. a 404/400 is expected.
  442. unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
  443. False if the thumbnailing should succeed or a normal 404 is expected.
  444. """
  445. params = "?width=32&height=32&method=" + method
  446. channel = make_request(
  447. self.reactor,
  448. FakeSite(self.thumbnail_resource, self.reactor),
  449. "GET",
  450. self.media_id + params,
  451. shorthand=False,
  452. await_result=False,
  453. )
  454. self.pump()
  455. headers = {
  456. b"Content-Length": [b"%d" % (len(self.test_image.data))],
  457. b"Content-Type": [self.test_image.content_type],
  458. }
  459. self.fetches[0][0].callback(
  460. (self.test_image.data, (len(self.test_image.data), headers))
  461. )
  462. self.pump()
  463. if expected_found:
  464. self.assertEqual(channel.code, 200)
  465. self.assertEqual(
  466. channel.headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
  467. [b"cross-origin"],
  468. )
  469. if expected_body is not None:
  470. self.assertEqual(
  471. channel.result["body"], expected_body, channel.result["body"]
  472. )
  473. else:
  474. # ensure that the result is at least some valid image
  475. Image.open(BytesIO(channel.result["body"]))
  476. elif unable_to_thumbnail:
  477. # A 400 with a JSON body.
  478. self.assertEqual(channel.code, 400)
  479. self.assertEqual(
  480. channel.json_body,
  481. {
  482. "errcode": "M_UNKNOWN",
  483. "error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
  484. },
  485. )
  486. else:
  487. # A 404 with a JSON body.
  488. self.assertEqual(channel.code, 404)
  489. self.assertEqual(
  490. channel.json_body,
  491. {
  492. "errcode": "M_NOT_FOUND",
  493. "error": "Not found [b'example.com', b'12345']",
  494. },
  495. )
  496. @parameterized.expand([("crop", 16), ("crop", 64), ("scale", 16), ("scale", 64)])
  497. def test_same_quality(self, method: str, desired_size: int) -> None:
  498. """Test that choosing between thumbnails with the same quality rating succeeds.
  499. We are not particular about which thumbnail is chosen."""
  500. self.assertIsNotNone(
  501. self.thumbnail_resource._select_thumbnail(
  502. desired_width=desired_size,
  503. desired_height=desired_size,
  504. desired_method=method,
  505. desired_type=self.test_image.content_type,
  506. # Provide two identical thumbnails which are guaranteed to have the same
  507. # quality rating.
  508. thumbnail_infos=[
  509. {
  510. "thumbnail_width": 32,
  511. "thumbnail_height": 32,
  512. "thumbnail_method": method,
  513. "thumbnail_type": self.test_image.content_type,
  514. "thumbnail_length": 256,
  515. "filesystem_id": f"thumbnail1{self.test_image.extension.decode()}",
  516. },
  517. {
  518. "thumbnail_width": 32,
  519. "thumbnail_height": 32,
  520. "thumbnail_method": method,
  521. "thumbnail_type": self.test_image.content_type,
  522. "thumbnail_length": 256,
  523. "filesystem_id": f"thumbnail2{self.test_image.extension.decode()}",
  524. },
  525. ],
  526. file_id=f"image{self.test_image.extension.decode()}",
  527. url_cache=None,
  528. server_name=None,
  529. )
  530. )
  531. def test_x_robots_tag_header(self) -> None:
  532. """
  533. Tests that the `X-Robots-Tag` header is present, which informs web crawlers
  534. to not index, archive, or follow links in media.
  535. """
  536. channel = self._req(b"inline; filename=out" + self.test_image.extension)
  537. headers = channel.headers
  538. self.assertEqual(
  539. headers.getRawHeaders(b"X-Robots-Tag"),
  540. [b"noindex, nofollow, noarchive, noimageindex"],
  541. )
  542. def test_cross_origin_resource_policy_header(self) -> None:
  543. """
  544. Test that the Cross-Origin-Resource-Policy header is set to "cross-origin"
  545. allowing web clients to embed media from the downloads API.
  546. """
  547. channel = self._req(b"inline; filename=out" + self.test_image.extension)
  548. headers = channel.headers
  549. self.assertEqual(
  550. headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
  551. [b"cross-origin"],
  552. )
  553. class TestSpamCheckerLegacy:
  554. """A spam checker module that rejects all media that includes the bytes
  555. `evil`.
  556. Uses the legacy Spam-Checker API.
  557. """
  558. def __init__(self, config: Dict[str, Any], api: ModuleApi) -> None:
  559. self.config = config
  560. self.api = api
  561. @staticmethod
  562. def parse_config(config: Dict[str, Any]) -> Dict[str, Any]:
  563. return config
  564. async def check_event_for_spam(self, event: EventBase) -> Union[bool, str]:
  565. return False # allow all events
  566. async def user_may_invite(
  567. self,
  568. inviter_userid: str,
  569. invitee_userid: str,
  570. room_id: str,
  571. ) -> bool:
  572. return True # allow all invites
  573. async def user_may_create_room(self, userid: str) -> bool:
  574. return True # allow all room creations
  575. async def user_may_create_room_alias(
  576. self, userid: str, room_alias: RoomAlias
  577. ) -> bool:
  578. return True # allow all room aliases
  579. async def user_may_publish_room(self, userid: str, room_id: str) -> bool:
  580. return True # allow publishing of all rooms
  581. async def check_media_file_for_spam(
  582. self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
  583. ) -> bool:
  584. buf = BytesIO()
  585. await file_wrapper.write_chunks_to(buf.write)
  586. return b"evil" in buf.getvalue()
  587. class SpamCheckerTestCaseLegacy(unittest.HomeserverTestCase):
  588. servlets = [
  589. login.register_servlets,
  590. admin.register_servlets,
  591. ]
  592. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  593. self.user = self.register_user("user", "pass")
  594. self.tok = self.login("user", "pass")
  595. # Allow for uploading and downloading to/from the media repo
  596. self.media_repo = hs.get_media_repository_resource()
  597. self.download_resource = self.media_repo.children[b"download"]
  598. self.upload_resource = self.media_repo.children[b"upload"]
  599. load_legacy_spam_checkers(hs)
  600. def default_config(self) -> Dict[str, Any]:
  601. config = default_config("test")
  602. config.update(
  603. {
  604. "spam_checker": [
  605. {
  606. "module": TestSpamCheckerLegacy.__module__
  607. + ".TestSpamCheckerLegacy",
  608. "config": {},
  609. }
  610. ]
  611. }
  612. )
  613. return config
  614. def test_upload_innocent(self) -> None:
  615. """Attempt to upload some innocent data that should be allowed."""
  616. self.helper.upload_media(
  617. self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200
  618. )
  619. def test_upload_ban(self) -> None:
  620. """Attempt to upload some data that includes bytes "evil", which should
  621. get rejected by the spam checker.
  622. """
  623. data = b"Some evil data"
  624. self.helper.upload_media(
  625. self.upload_resource, data, tok=self.tok, expect_code=400
  626. )
  627. EVIL_DATA = b"Some evil data"
  628. EVIL_DATA_EXPERIMENT = b"Some evil data to trigger the experimental tuple API"
  629. class SpamCheckerTestCase(unittest.HomeserverTestCase):
  630. servlets = [
  631. login.register_servlets,
  632. admin.register_servlets,
  633. ]
  634. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  635. self.user = self.register_user("user", "pass")
  636. self.tok = self.login("user", "pass")
  637. # Allow for uploading and downloading to/from the media repo
  638. self.media_repo = hs.get_media_repository_resource()
  639. self.download_resource = self.media_repo.children[b"download"]
  640. self.upload_resource = self.media_repo.children[b"upload"]
  641. hs.get_module_api().register_spam_checker_callbacks(
  642. check_media_file_for_spam=self.check_media_file_for_spam
  643. )
  644. async def check_media_file_for_spam(
  645. self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
  646. ) -> Union[Codes, Literal["NOT_SPAM"], Tuple[Codes, JsonDict]]:
  647. buf = BytesIO()
  648. await file_wrapper.write_chunks_to(buf.write)
  649. if buf.getvalue() == EVIL_DATA:
  650. return Codes.FORBIDDEN
  651. elif buf.getvalue() == EVIL_DATA_EXPERIMENT:
  652. return (Codes.FORBIDDEN, {})
  653. else:
  654. return "NOT_SPAM"
  655. def test_upload_innocent(self) -> None:
  656. """Attempt to upload some innocent data that should be allowed."""
  657. self.helper.upload_media(
  658. self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200
  659. )
  660. def test_upload_ban(self) -> None:
  661. """Attempt to upload some data that includes bytes "evil", which should
  662. get rejected by the spam checker.
  663. """
  664. self.helper.upload_media(
  665. self.upload_resource, EVIL_DATA, tok=self.tok, expect_code=400
  666. )
  667. self.helper.upload_media(
  668. self.upload_resource,
  669. EVIL_DATA_EXPERIMENT,
  670. tok=self.tok,
  671. expect_code=400,
  672. )