test_media_storage.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782
  1. # Copyright 2018-2021 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import shutil
  16. import tempfile
  17. from binascii import unhexlify
  18. from io import BytesIO
  19. from typing import Any, BinaryIO, Dict, List, Optional, Union
  20. from unittest.mock import Mock
  21. from urllib import parse
  22. import attr
  23. from parameterized import parameterized, parameterized_class
  24. from PIL import Image as Image
  25. from typing_extensions import Literal
  26. from twisted.internet import defer
  27. from twisted.internet.defer import Deferred
  28. from twisted.test.proto_helpers import MemoryReactor
  29. from synapse.api.errors import Codes
  30. from synapse.events import EventBase
  31. from synapse.events.spamcheck import load_legacy_spam_checkers
  32. from synapse.logging.context import make_deferred_yieldable
  33. from synapse.module_api import ModuleApi
  34. from synapse.rest import admin
  35. from synapse.rest.client import login
  36. from synapse.rest.media.v1._base import FileInfo
  37. from synapse.rest.media.v1.filepath import MediaFilePaths
  38. from synapse.rest.media.v1.media_storage import MediaStorage, ReadableFileWrapper
  39. from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend
  40. from synapse.server import HomeServer
  41. from synapse.types import RoomAlias
  42. from synapse.util import Clock
  43. from tests import unittest
  44. from tests.server import FakeChannel, FakeSite, make_request
  45. from tests.test_utils import SMALL_PNG
  46. from tests.utils import default_config
  47. class MediaStorageTests(unittest.HomeserverTestCase):
  48. needs_threadpool = True
  49. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  50. self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-")
  51. self.addCleanup(shutil.rmtree, self.test_dir)
  52. self.primary_base_path = os.path.join(self.test_dir, "primary")
  53. self.secondary_base_path = os.path.join(self.test_dir, "secondary")
  54. hs.config.media.media_store_path = self.primary_base_path
  55. storage_providers = [FileStorageProviderBackend(hs, self.secondary_base_path)]
  56. self.filepaths = MediaFilePaths(self.primary_base_path)
  57. self.media_storage = MediaStorage(
  58. hs, self.primary_base_path, self.filepaths, storage_providers
  59. )
  60. def test_ensure_media_is_in_local_cache(self) -> None:
  61. media_id = "some_media_id"
  62. test_body = "Test\n"
  63. # First we create a file that is in a storage provider but not in the
  64. # local primary media store
  65. rel_path = self.filepaths.local_media_filepath_rel(media_id)
  66. secondary_path = os.path.join(self.secondary_base_path, rel_path)
  67. os.makedirs(os.path.dirname(secondary_path))
  68. with open(secondary_path, "w") as f:
  69. f.write(test_body)
  70. # Now we run ensure_media_is_in_local_cache, which should copy the file
  71. # to the local cache.
  72. file_info = FileInfo(None, media_id)
  73. # This uses a real blocking threadpool so we have to wait for it to be
  74. # actually done :/
  75. x = defer.ensureDeferred(
  76. self.media_storage.ensure_media_is_in_local_cache(file_info)
  77. )
  78. # Hotloop until the threadpool does its job...
  79. self.wait_on_thread(x)
  80. local_path = self.get_success(x)
  81. self.assertTrue(os.path.exists(local_path))
  82. # Asserts the file is under the expected local cache directory
  83. self.assertEqual(
  84. os.path.commonprefix([self.primary_base_path, local_path]),
  85. self.primary_base_path,
  86. )
  87. with open(local_path) as f:
  88. body = f.read()
  89. self.assertEqual(test_body, body)
  90. @attr.s(auto_attribs=True, slots=True, frozen=True)
  91. class _TestImage:
  92. """An image for testing thumbnailing with the expected results
  93. Attributes:
  94. data: The raw image to thumbnail
  95. content_type: The type of the image as a content type, e.g. "image/png"
  96. extension: The extension associated with the format, e.g. ".png"
  97. expected_cropped: The expected bytes from cropped thumbnailing, or None if
  98. test should just check for success.
  99. expected_scaled: The expected bytes from scaled thumbnailing, or None if
  100. test should just check for a valid image returned.
  101. expected_found: True if the file should exist on the server, or False if
  102. a 404/400 is expected.
  103. unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
  104. False if the thumbnailing should succeed or a normal 404 is expected.
  105. """
  106. data: bytes
  107. content_type: bytes
  108. extension: bytes
  109. expected_cropped: Optional[bytes] = None
  110. expected_scaled: Optional[bytes] = None
  111. expected_found: bool = True
  112. unable_to_thumbnail: bool = False
  113. @parameterized_class(
  114. ("test_image",),
  115. [
  116. # small png
  117. (
  118. _TestImage(
  119. SMALL_PNG,
  120. b"image/png",
  121. b".png",
  122. unhexlify(
  123. b"89504e470d0a1a0a0000000d4948445200000020000000200806"
  124. b"000000737a7af40000001a49444154789cedc101010000008220"
  125. b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
  126. b"44ae426082"
  127. ),
  128. unhexlify(
  129. b"89504e470d0a1a0a0000000d4948445200000001000000010806"
  130. b"0000001f15c4890000000d49444154789c636060606000000005"
  131. b"0001a5f645400000000049454e44ae426082"
  132. ),
  133. ),
  134. ),
  135. # small png with transparency.
  136. (
  137. _TestImage(
  138. unhexlify(
  139. b"89504e470d0a1a0a0000000d49484452000000010000000101000"
  140. b"00000376ef9240000000274524e5300010194fdae0000000a4944"
  141. b"4154789c636800000082008177cd72b60000000049454e44ae426"
  142. b"082"
  143. ),
  144. b"image/png",
  145. b".png",
  146. # Note that we don't check the output since it varies across
  147. # different versions of Pillow.
  148. ),
  149. ),
  150. # small lossless webp
  151. (
  152. _TestImage(
  153. unhexlify(
  154. b"524946461a000000574542505650384c0d0000002f0000001007"
  155. b"1011118888fe0700"
  156. ),
  157. b"image/webp",
  158. b".webp",
  159. ),
  160. ),
  161. # an empty file
  162. (
  163. _TestImage(
  164. b"",
  165. b"image/gif",
  166. b".gif",
  167. expected_found=False,
  168. unable_to_thumbnail=True,
  169. ),
  170. ),
  171. ],
  172. )
  173. class MediaRepoTests(unittest.HomeserverTestCase):
  174. hijack_auth = True
  175. user_id = "@test:user"
  176. def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
  177. self.fetches = []
  178. def get_file(
  179. destination: str,
  180. path: str,
  181. output_stream: BinaryIO,
  182. args: Optional[Dict[str, Union[str, List[str]]]] = None,
  183. max_size: Optional[int] = None,
  184. ) -> Deferred:
  185. """
  186. Returns tuple[int,dict,str,int] of file length, response headers,
  187. absolute URI, and response code.
  188. """
  189. def write_to(r):
  190. data, response = r
  191. output_stream.write(data)
  192. return response
  193. d = Deferred()
  194. d.addCallback(write_to)
  195. self.fetches.append((d, destination, path, args))
  196. return make_deferred_yieldable(d)
  197. client = Mock()
  198. client.get_file = get_file
  199. self.storage_path = self.mktemp()
  200. self.media_store_path = self.mktemp()
  201. os.mkdir(self.storage_path)
  202. os.mkdir(self.media_store_path)
  203. config = self.default_config()
  204. config["media_store_path"] = self.media_store_path
  205. config["max_image_pixels"] = 2000000
  206. provider_config = {
  207. "module": "synapse.rest.media.v1.storage_provider.FileStorageProviderBackend",
  208. "store_local": True,
  209. "store_synchronous": False,
  210. "store_remote": True,
  211. "config": {"directory": self.storage_path},
  212. }
  213. config["media_storage_providers"] = [provider_config]
  214. hs = self.setup_test_homeserver(config=config, federation_http_client=client)
  215. return hs
  216. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  217. media_resource = hs.get_media_repository_resource()
  218. self.download_resource = media_resource.children[b"download"]
  219. self.thumbnail_resource = media_resource.children[b"thumbnail"]
  220. self.store = hs.get_datastores().main
  221. self.media_repo = hs.get_media_repository()
  222. self.media_id = "example.com/12345"
  223. def _req(
  224. self, content_disposition: Optional[bytes], include_content_type: bool = True
  225. ) -> FakeChannel:
  226. channel = make_request(
  227. self.reactor,
  228. FakeSite(self.download_resource, self.reactor),
  229. "GET",
  230. self.media_id,
  231. shorthand=False,
  232. await_result=False,
  233. )
  234. self.pump()
  235. # We've made one fetch, to example.com, using the media URL, and asking
  236. # the other server not to do a remote fetch
  237. self.assertEqual(len(self.fetches), 1)
  238. self.assertEqual(self.fetches[0][1], "example.com")
  239. self.assertEqual(
  240. self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id
  241. )
  242. self.assertEqual(self.fetches[0][3], {"allow_remote": "false"})
  243. headers = {
  244. b"Content-Length": [b"%d" % (len(self.test_image.data))],
  245. }
  246. if include_content_type:
  247. headers[b"Content-Type"] = [self.test_image.content_type]
  248. if content_disposition:
  249. headers[b"Content-Disposition"] = [content_disposition]
  250. self.fetches[0][0].callback(
  251. (self.test_image.data, (len(self.test_image.data), headers))
  252. )
  253. self.pump()
  254. self.assertEqual(channel.code, 200)
  255. return channel
  256. def test_handle_missing_content_type(self) -> None:
  257. channel = self._req(
  258. b"inline; filename=out" + self.test_image.extension,
  259. include_content_type=False,
  260. )
  261. headers = channel.headers
  262. self.assertEqual(channel.code, 200)
  263. self.assertEqual(
  264. headers.getRawHeaders(b"Content-Type"), [b"application/octet-stream"]
  265. )
  266. def test_disposition_filename_ascii(self) -> None:
  267. """
  268. If the filename is filename=<ascii> then Synapse will decode it as an
  269. ASCII string, and use filename= in the response.
  270. """
  271. channel = self._req(b"inline; filename=out" + self.test_image.extension)
  272. headers = channel.headers
  273. self.assertEqual(
  274. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  275. )
  276. self.assertEqual(
  277. headers.getRawHeaders(b"Content-Disposition"),
  278. [b"inline; filename=out" + self.test_image.extension],
  279. )
  280. def test_disposition_filenamestar_utf8escaped(self) -> None:
  281. """
  282. If the filename is filename=*utf8''<utf8 escaped> then Synapse will
  283. correctly decode it as the UTF-8 string, and use filename* in the
  284. response.
  285. """
  286. filename = parse.quote("\u2603".encode()).encode("ascii")
  287. channel = self._req(
  288. b"inline; filename*=utf-8''" + filename + self.test_image.extension
  289. )
  290. headers = channel.headers
  291. self.assertEqual(
  292. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  293. )
  294. self.assertEqual(
  295. headers.getRawHeaders(b"Content-Disposition"),
  296. [b"inline; filename*=utf-8''" + filename + self.test_image.extension],
  297. )
  298. def test_disposition_none(self) -> None:
  299. """
  300. If there is no filename, one isn't passed on in the Content-Disposition
  301. of the request.
  302. """
  303. channel = self._req(None)
  304. headers = channel.headers
  305. self.assertEqual(
  306. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  307. )
  308. self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), None)
  309. def test_thumbnail_crop(self) -> None:
  310. """Test that a cropped remote thumbnail is available."""
  311. self._test_thumbnail(
  312. "crop",
  313. self.test_image.expected_cropped,
  314. expected_found=self.test_image.expected_found,
  315. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  316. )
  317. def test_thumbnail_scale(self) -> None:
  318. """Test that a scaled remote thumbnail is available."""
  319. self._test_thumbnail(
  320. "scale",
  321. self.test_image.expected_scaled,
  322. expected_found=self.test_image.expected_found,
  323. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  324. )
  325. def test_invalid_type(self) -> None:
  326. """An invalid thumbnail type is never available."""
  327. self._test_thumbnail(
  328. "invalid",
  329. None,
  330. expected_found=False,
  331. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  332. )
  333. @unittest.override_config(
  334. {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "scale"}]}
  335. )
  336. def test_no_thumbnail_crop(self) -> None:
  337. """
  338. Override the config to generate only scaled thumbnails, but request a cropped one.
  339. """
  340. self._test_thumbnail(
  341. "crop",
  342. None,
  343. expected_found=False,
  344. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  345. )
  346. @unittest.override_config(
  347. {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "crop"}]}
  348. )
  349. def test_no_thumbnail_scale(self) -> None:
  350. """
  351. Override the config to generate only cropped thumbnails, but request a scaled one.
  352. """
  353. self._test_thumbnail(
  354. "scale",
  355. None,
  356. expected_found=False,
  357. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  358. )
  359. def test_thumbnail_repeated_thumbnail(self) -> None:
  360. """Test that fetching the same thumbnail works, and deleting the on disk
  361. thumbnail regenerates it.
  362. """
  363. self._test_thumbnail(
  364. "scale",
  365. self.test_image.expected_scaled,
  366. expected_found=self.test_image.expected_found,
  367. unable_to_thumbnail=self.test_image.unable_to_thumbnail,
  368. )
  369. if not self.test_image.expected_found:
  370. return
  371. # Fetching again should work, without re-requesting the image from the
  372. # remote.
  373. params = "?width=32&height=32&method=scale"
  374. channel = make_request(
  375. self.reactor,
  376. FakeSite(self.thumbnail_resource, self.reactor),
  377. "GET",
  378. self.media_id + params,
  379. shorthand=False,
  380. await_result=False,
  381. )
  382. self.pump()
  383. self.assertEqual(channel.code, 200)
  384. if self.test_image.expected_scaled:
  385. self.assertEqual(
  386. channel.result["body"],
  387. self.test_image.expected_scaled,
  388. channel.result["body"],
  389. )
  390. # Deleting the thumbnail on disk then re-requesting it should work as
  391. # Synapse should regenerate missing thumbnails.
  392. origin, media_id = self.media_id.split("/")
  393. info = self.get_success(self.store.get_cached_remote_media(origin, media_id))
  394. file_id = info["filesystem_id"]
  395. thumbnail_dir = self.media_repo.filepaths.remote_media_thumbnail_dir(
  396. origin, file_id
  397. )
  398. shutil.rmtree(thumbnail_dir, ignore_errors=True)
  399. channel = make_request(
  400. self.reactor,
  401. FakeSite(self.thumbnail_resource, self.reactor),
  402. "GET",
  403. self.media_id + params,
  404. shorthand=False,
  405. await_result=False,
  406. )
  407. self.pump()
  408. self.assertEqual(channel.code, 200)
  409. if self.test_image.expected_scaled:
  410. self.assertEqual(
  411. channel.result["body"],
  412. self.test_image.expected_scaled,
  413. channel.result["body"],
  414. )
  415. def _test_thumbnail(
  416. self,
  417. method: str,
  418. expected_body: Optional[bytes],
  419. expected_found: bool,
  420. unable_to_thumbnail: bool = False,
  421. ) -> None:
  422. """Test the given thumbnailing method works as expected.
  423. Args:
  424. method: The thumbnailing method to use (crop, scale).
  425. expected_body: The expected bytes from thumbnailing, or None if
  426. test should just check for a valid image.
  427. expected_found: True if the file should exist on the server, or False if
  428. a 404/400 is expected.
  429. unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
  430. False if the thumbnailing should succeed or a normal 404 is expected.
  431. """
  432. params = "?width=32&height=32&method=" + method
  433. channel = make_request(
  434. self.reactor,
  435. FakeSite(self.thumbnail_resource, self.reactor),
  436. "GET",
  437. self.media_id + params,
  438. shorthand=False,
  439. await_result=False,
  440. )
  441. self.pump()
  442. headers = {
  443. b"Content-Length": [b"%d" % (len(self.test_image.data))],
  444. b"Content-Type": [self.test_image.content_type],
  445. }
  446. self.fetches[0][0].callback(
  447. (self.test_image.data, (len(self.test_image.data), headers))
  448. )
  449. self.pump()
  450. if expected_found:
  451. self.assertEqual(channel.code, 200)
  452. self.assertEqual(
  453. channel.headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
  454. [b"cross-origin"],
  455. )
  456. if expected_body is not None:
  457. self.assertEqual(
  458. channel.result["body"], expected_body, channel.result["body"]
  459. )
  460. else:
  461. # ensure that the result is at least some valid image
  462. Image.open(BytesIO(channel.result["body"]))
  463. elif unable_to_thumbnail:
  464. # A 400 with a JSON body.
  465. self.assertEqual(channel.code, 400)
  466. self.assertEqual(
  467. channel.json_body,
  468. {
  469. "errcode": "M_UNKNOWN",
  470. "error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
  471. },
  472. )
  473. else:
  474. # A 404 with a JSON body.
  475. self.assertEqual(channel.code, 404)
  476. self.assertEqual(
  477. channel.json_body,
  478. {
  479. "errcode": "M_NOT_FOUND",
  480. "error": "Not found [b'example.com', b'12345']",
  481. },
  482. )
  483. @parameterized.expand([("crop", 16), ("crop", 64), ("scale", 16), ("scale", 64)])
  484. def test_same_quality(self, method: str, desired_size: int) -> None:
  485. """Test that choosing between thumbnails with the same quality rating succeeds.
  486. We are not particular about which thumbnail is chosen."""
  487. self.assertIsNotNone(
  488. self.thumbnail_resource._select_thumbnail(
  489. desired_width=desired_size,
  490. desired_height=desired_size,
  491. desired_method=method,
  492. desired_type=self.test_image.content_type,
  493. # Provide two identical thumbnails which are guaranteed to have the same
  494. # quality rating.
  495. thumbnail_infos=[
  496. {
  497. "thumbnail_width": 32,
  498. "thumbnail_height": 32,
  499. "thumbnail_method": method,
  500. "thumbnail_type": self.test_image.content_type,
  501. "thumbnail_length": 256,
  502. "filesystem_id": f"thumbnail1{self.test_image.extension}",
  503. },
  504. {
  505. "thumbnail_width": 32,
  506. "thumbnail_height": 32,
  507. "thumbnail_method": method,
  508. "thumbnail_type": self.test_image.content_type,
  509. "thumbnail_length": 256,
  510. "filesystem_id": f"thumbnail2{self.test_image.extension}",
  511. },
  512. ],
  513. file_id=f"image{self.test_image.extension}",
  514. url_cache=None,
  515. server_name=None,
  516. )
  517. )
  518. def test_x_robots_tag_header(self) -> None:
  519. """
  520. Tests that the `X-Robots-Tag` header is present, which informs web crawlers
  521. to not index, archive, or follow links in media.
  522. """
  523. channel = self._req(b"inline; filename=out" + self.test_image.extension)
  524. headers = channel.headers
  525. self.assertEqual(
  526. headers.getRawHeaders(b"X-Robots-Tag"),
  527. [b"noindex, nofollow, noarchive, noimageindex"],
  528. )
  529. def test_cross_origin_resource_policy_header(self) -> None:
  530. """
  531. Test that the Cross-Origin-Resource-Policy header is set to "cross-origin"
  532. allowing web clients to embed media from the downloads API.
  533. """
  534. channel = self._req(b"inline; filename=out" + self.test_image.extension)
  535. headers = channel.headers
  536. self.assertEqual(
  537. headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
  538. [b"cross-origin"],
  539. )
  540. class TestSpamCheckerLegacy:
  541. """A spam checker module that rejects all media that includes the bytes
  542. `evil`.
  543. Uses the legacy Spam-Checker API.
  544. """
  545. def __init__(self, config: Dict[str, Any], api: ModuleApi) -> None:
  546. self.config = config
  547. self.api = api
  548. def parse_config(config: Dict[str, Any]) -> Dict[str, Any]:
  549. return config
  550. async def check_event_for_spam(self, event: EventBase) -> Union[bool, str]:
  551. return False # allow all events
  552. async def user_may_invite(
  553. self,
  554. inviter_userid: str,
  555. invitee_userid: str,
  556. room_id: str,
  557. ) -> bool:
  558. return True # allow all invites
  559. async def user_may_create_room(self, userid: str) -> bool:
  560. return True # allow all room creations
  561. async def user_may_create_room_alias(
  562. self, userid: str, room_alias: RoomAlias
  563. ) -> bool:
  564. return True # allow all room aliases
  565. async def user_may_publish_room(self, userid: str, room_id: str) -> bool:
  566. return True # allow publishing of all rooms
  567. async def check_media_file_for_spam(
  568. self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
  569. ) -> bool:
  570. buf = BytesIO()
  571. await file_wrapper.write_chunks_to(buf.write)
  572. return b"evil" in buf.getvalue()
  573. class SpamCheckerTestCaseLegacy(unittest.HomeserverTestCase):
  574. servlets = [
  575. login.register_servlets,
  576. admin.register_servlets,
  577. ]
  578. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  579. self.user = self.register_user("user", "pass")
  580. self.tok = self.login("user", "pass")
  581. # Allow for uploading and downloading to/from the media repo
  582. self.media_repo = hs.get_media_repository_resource()
  583. self.download_resource = self.media_repo.children[b"download"]
  584. self.upload_resource = self.media_repo.children[b"upload"]
  585. load_legacy_spam_checkers(hs)
  586. def default_config(self) -> Dict[str, Any]:
  587. config = default_config("test")
  588. config.update(
  589. {
  590. "spam_checker": [
  591. {
  592. "module": TestSpamCheckerLegacy.__module__
  593. + ".TestSpamCheckerLegacy",
  594. "config": {},
  595. }
  596. ]
  597. }
  598. )
  599. return config
  600. def test_upload_innocent(self) -> None:
  601. """Attempt to upload some innocent data that should be allowed."""
  602. self.helper.upload_media(
  603. self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200
  604. )
  605. def test_upload_ban(self) -> None:
  606. """Attempt to upload some data that includes bytes "evil", which should
  607. get rejected by the spam checker.
  608. """
  609. data = b"Some evil data"
  610. self.helper.upload_media(
  611. self.upload_resource, data, tok=self.tok, expect_code=400
  612. )
  613. EVIL_DATA = b"Some evil data"
  614. EVIL_DATA_EXPERIMENT = b"Some evil data to trigger the experimental tuple API"
  615. class SpamCheckerTestCase(unittest.HomeserverTestCase):
  616. servlets = [
  617. login.register_servlets,
  618. admin.register_servlets,
  619. ]
  620. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  621. self.user = self.register_user("user", "pass")
  622. self.tok = self.login("user", "pass")
  623. # Allow for uploading and downloading to/from the media repo
  624. self.media_repo = hs.get_media_repository_resource()
  625. self.download_resource = self.media_repo.children[b"download"]
  626. self.upload_resource = self.media_repo.children[b"upload"]
  627. hs.get_module_api().register_spam_checker_callbacks(
  628. check_media_file_for_spam=self.check_media_file_for_spam
  629. )
  630. async def check_media_file_for_spam(
  631. self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
  632. ) -> Union[Codes, Literal["NOT_SPAM"]]:
  633. buf = BytesIO()
  634. await file_wrapper.write_chunks_to(buf.write)
  635. if buf.getvalue() == EVIL_DATA:
  636. return Codes.FORBIDDEN
  637. elif buf.getvalue() == EVIL_DATA_EXPERIMENT:
  638. return (Codes.FORBIDDEN, {})
  639. else:
  640. return "NOT_SPAM"
  641. def test_upload_innocent(self) -> None:
  642. """Attempt to upload some innocent data that should be allowed."""
  643. self.helper.upload_media(
  644. self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200
  645. )
  646. def test_upload_ban(self) -> None:
  647. """Attempt to upload some data that includes bytes "evil", which should
  648. get rejected by the spam checker.
  649. """
  650. self.helper.upload_media(
  651. self.upload_resource, EVIL_DATA, tok=self.tok, expect_code=400
  652. )
  653. self.helper.upload_media(
  654. self.upload_resource,
  655. EVIL_DATA_EXPERIMENT,
  656. tok=self.tok,
  657. expect_code=400,
  658. )