test_media_storage.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2018 New Vector Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import os
  16. import shutil
  17. import tempfile
  18. from binascii import unhexlify
  19. from io import BytesIO
  20. from typing import Optional
  21. from urllib import parse
  22. from mock import Mock
  23. import attr
  24. from parameterized import parameterized_class
  25. from PIL import Image as Image
  26. from twisted.internet import defer
  27. from twisted.internet.defer import Deferred
  28. from synapse.logging.context import make_deferred_yieldable
  29. from synapse.rest import admin
  30. from synapse.rest.client.v1 import login
  31. from synapse.rest.media.v1._base import FileInfo
  32. from synapse.rest.media.v1.filepath import MediaFilePaths
  33. from synapse.rest.media.v1.media_storage import MediaStorage
  34. from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend
  35. from tests import unittest
  36. from tests.server import FakeSite, make_request
  37. from tests.utils import default_config
  38. class MediaStorageTests(unittest.HomeserverTestCase):
  39. needs_threadpool = True
  40. def prepare(self, reactor, clock, hs):
  41. self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-")
  42. self.addCleanup(shutil.rmtree, self.test_dir)
  43. self.primary_base_path = os.path.join(self.test_dir, "primary")
  44. self.secondary_base_path = os.path.join(self.test_dir, "secondary")
  45. hs.config.media_store_path = self.primary_base_path
  46. storage_providers = [FileStorageProviderBackend(hs, self.secondary_base_path)]
  47. self.filepaths = MediaFilePaths(self.primary_base_path)
  48. self.media_storage = MediaStorage(
  49. hs, self.primary_base_path, self.filepaths, storage_providers
  50. )
  51. def test_ensure_media_is_in_local_cache(self):
  52. media_id = "some_media_id"
  53. test_body = "Test\n"
  54. # First we create a file that is in a storage provider but not in the
  55. # local primary media store
  56. rel_path = self.filepaths.local_media_filepath_rel(media_id)
  57. secondary_path = os.path.join(self.secondary_base_path, rel_path)
  58. os.makedirs(os.path.dirname(secondary_path))
  59. with open(secondary_path, "w") as f:
  60. f.write(test_body)
  61. # Now we run ensure_media_is_in_local_cache, which should copy the file
  62. # to the local cache.
  63. file_info = FileInfo(None, media_id)
  64. # This uses a real blocking threadpool so we have to wait for it to be
  65. # actually done :/
  66. x = defer.ensureDeferred(
  67. self.media_storage.ensure_media_is_in_local_cache(file_info)
  68. )
  69. # Hotloop until the threadpool does its job...
  70. self.wait_on_thread(x)
  71. local_path = self.get_success(x)
  72. self.assertTrue(os.path.exists(local_path))
  73. # Asserts the file is under the expected local cache directory
  74. self.assertEquals(
  75. os.path.commonprefix([self.primary_base_path, local_path]),
  76. self.primary_base_path,
  77. )
  78. with open(local_path) as f:
  79. body = f.read()
  80. self.assertEqual(test_body, body)
  81. @attr.s
  82. class _TestImage:
  83. """An image for testing thumbnailing with the expected results
  84. Attributes:
  85. data: The raw image to thumbnail
  86. content_type: The type of the image as a content type, e.g. "image/png"
  87. extension: The extension associated with the format, e.g. ".png"
  88. expected_cropped: The expected bytes from cropped thumbnailing, or None if
  89. test should just check for success.
  90. expected_scaled: The expected bytes from scaled thumbnailing, or None if
  91. test should just check for a valid image returned.
  92. """
  93. data = attr.ib(type=bytes)
  94. content_type = attr.ib(type=bytes)
  95. extension = attr.ib(type=bytes)
  96. expected_cropped = attr.ib(type=Optional[bytes])
  97. expected_scaled = attr.ib(type=Optional[bytes])
  98. expected_found = attr.ib(default=True, type=bool)
  99. @parameterized_class(
  100. ("test_image",),
  101. [
  102. # smoll png
  103. (
  104. _TestImage(
  105. unhexlify(
  106. b"89504e470d0a1a0a0000000d4948445200000001000000010806"
  107. b"0000001f15c4890000000a49444154789c63000100000500010d"
  108. b"0a2db40000000049454e44ae426082"
  109. ),
  110. b"image/png",
  111. b".png",
  112. unhexlify(
  113. b"89504e470d0a1a0a0000000d4948445200000020000000200806"
  114. b"000000737a7af40000001a49444154789cedc101010000008220"
  115. b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
  116. b"44ae426082"
  117. ),
  118. unhexlify(
  119. b"89504e470d0a1a0a0000000d4948445200000001000000010806"
  120. b"0000001f15c4890000000d49444154789c636060606000000005"
  121. b"0001a5f645400000000049454e44ae426082"
  122. ),
  123. ),
  124. ),
  125. # small lossless webp
  126. (
  127. _TestImage(
  128. unhexlify(
  129. b"524946461a000000574542505650384c0d0000002f0000001007"
  130. b"1011118888fe0700"
  131. ),
  132. b"image/webp",
  133. b".webp",
  134. None,
  135. None,
  136. ),
  137. ),
  138. # an empty file
  139. (_TestImage(b"", b"image/gif", b".gif", None, None, False,),),
  140. ],
  141. )
  142. class MediaRepoTests(unittest.HomeserverTestCase):
  143. hijack_auth = True
  144. user_id = "@test:user"
  145. def make_homeserver(self, reactor, clock):
  146. self.fetches = []
  147. def get_file(destination, path, output_stream, args=None, max_size=None):
  148. """
  149. Returns tuple[int,dict,str,int] of file length, response headers,
  150. absolute URI, and response code.
  151. """
  152. def write_to(r):
  153. data, response = r
  154. output_stream.write(data)
  155. return response
  156. d = Deferred()
  157. d.addCallback(write_to)
  158. self.fetches.append((d, destination, path, args))
  159. return make_deferred_yieldable(d)
  160. client = Mock()
  161. client.get_file = get_file
  162. self.storage_path = self.mktemp()
  163. self.media_store_path = self.mktemp()
  164. os.mkdir(self.storage_path)
  165. os.mkdir(self.media_store_path)
  166. config = self.default_config()
  167. config["media_store_path"] = self.media_store_path
  168. config["max_image_pixels"] = 2000000
  169. provider_config = {
  170. "module": "synapse.rest.media.v1.storage_provider.FileStorageProviderBackend",
  171. "store_local": True,
  172. "store_synchronous": False,
  173. "store_remote": True,
  174. "config": {"directory": self.storage_path},
  175. }
  176. config["media_storage_providers"] = [provider_config]
  177. hs = self.setup_test_homeserver(config=config, federation_http_client=client)
  178. return hs
  179. def prepare(self, reactor, clock, hs):
  180. self.media_repo = hs.get_media_repository_resource()
  181. self.download_resource = self.media_repo.children[b"download"]
  182. self.thumbnail_resource = self.media_repo.children[b"thumbnail"]
  183. self.media_id = "example.com/12345"
  184. def _req(self, content_disposition):
  185. channel = make_request(
  186. self.reactor,
  187. FakeSite(self.download_resource),
  188. "GET",
  189. self.media_id,
  190. shorthand=False,
  191. await_result=False,
  192. )
  193. self.pump()
  194. # We've made one fetch, to example.com, using the media URL, and asking
  195. # the other server not to do a remote fetch
  196. self.assertEqual(len(self.fetches), 1)
  197. self.assertEqual(self.fetches[0][1], "example.com")
  198. self.assertEqual(
  199. self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id
  200. )
  201. self.assertEqual(self.fetches[0][3], {"allow_remote": "false"})
  202. headers = {
  203. b"Content-Length": [b"%d" % (len(self.test_image.data))],
  204. b"Content-Type": [self.test_image.content_type],
  205. }
  206. if content_disposition:
  207. headers[b"Content-Disposition"] = [content_disposition]
  208. self.fetches[0][0].callback(
  209. (self.test_image.data, (len(self.test_image.data), headers))
  210. )
  211. self.pump()
  212. self.assertEqual(channel.code, 200)
  213. return channel
  214. def test_disposition_filename_ascii(self):
  215. """
  216. If the filename is filename=<ascii> then Synapse will decode it as an
  217. ASCII string, and use filename= in the response.
  218. """
  219. channel = self._req(b"inline; filename=out" + self.test_image.extension)
  220. headers = channel.headers
  221. self.assertEqual(
  222. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  223. )
  224. self.assertEqual(
  225. headers.getRawHeaders(b"Content-Disposition"),
  226. [b"inline; filename=out" + self.test_image.extension],
  227. )
  228. def test_disposition_filenamestar_utf8escaped(self):
  229. """
  230. If the filename is filename=*utf8''<utf8 escaped> then Synapse will
  231. correctly decode it as the UTF-8 string, and use filename* in the
  232. response.
  233. """
  234. filename = parse.quote("\u2603".encode("utf8")).encode("ascii")
  235. channel = self._req(
  236. b"inline; filename*=utf-8''" + filename + self.test_image.extension
  237. )
  238. headers = channel.headers
  239. self.assertEqual(
  240. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  241. )
  242. self.assertEqual(
  243. headers.getRawHeaders(b"Content-Disposition"),
  244. [b"inline; filename*=utf-8''" + filename + self.test_image.extension],
  245. )
  246. def test_disposition_none(self):
  247. """
  248. If there is no filename, one isn't passed on in the Content-Disposition
  249. of the request.
  250. """
  251. channel = self._req(None)
  252. headers = channel.headers
  253. self.assertEqual(
  254. headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
  255. )
  256. self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), None)
  257. def test_thumbnail_crop(self):
  258. """Test that a cropped remote thumbnail is available."""
  259. self._test_thumbnail(
  260. "crop", self.test_image.expected_cropped, self.test_image.expected_found
  261. )
  262. def test_thumbnail_scale(self):
  263. """Test that a scaled remote thumbnail is available."""
  264. self._test_thumbnail(
  265. "scale", self.test_image.expected_scaled, self.test_image.expected_found
  266. )
  267. def test_invalid_type(self):
  268. """An invalid thumbnail type is never available."""
  269. self._test_thumbnail("invalid", None, False)
  270. @unittest.override_config(
  271. {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "scale"}]}
  272. )
  273. def test_no_thumbnail_crop(self):
  274. """
  275. Override the config to generate only scaled thumbnails, but request a cropped one.
  276. """
  277. self._test_thumbnail("crop", None, False)
  278. @unittest.override_config(
  279. {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "crop"}]}
  280. )
  281. def test_no_thumbnail_scale(self):
  282. """
  283. Override the config to generate only cropped thumbnails, but request a scaled one.
  284. """
  285. self._test_thumbnail("scale", None, False)
  286. def _test_thumbnail(self, method, expected_body, expected_found):
  287. params = "?width=32&height=32&method=" + method
  288. channel = make_request(
  289. self.reactor,
  290. FakeSite(self.thumbnail_resource),
  291. "GET",
  292. self.media_id + params,
  293. shorthand=False,
  294. await_result=False,
  295. )
  296. self.pump()
  297. headers = {
  298. b"Content-Length": [b"%d" % (len(self.test_image.data))],
  299. b"Content-Type": [self.test_image.content_type],
  300. }
  301. self.fetches[0][0].callback(
  302. (self.test_image.data, (len(self.test_image.data), headers))
  303. )
  304. self.pump()
  305. if expected_found:
  306. self.assertEqual(channel.code, 200)
  307. if expected_body is not None:
  308. self.assertEqual(
  309. channel.result["body"], expected_body, channel.result["body"]
  310. )
  311. else:
  312. # ensure that the result is at least some valid image
  313. Image.open(BytesIO(channel.result["body"]))
  314. else:
  315. # A 404 with a JSON body.
  316. self.assertEqual(channel.code, 404)
  317. self.assertEqual(
  318. channel.json_body,
  319. {
  320. "errcode": "M_NOT_FOUND",
  321. "error": "Not found [b'example.com', b'12345']",
  322. },
  323. )
  324. def test_x_robots_tag_header(self):
  325. """
  326. Tests that the `X-Robots-Tag` header is present, which informs web crawlers
  327. to not index, archive, or follow links in media.
  328. """
  329. channel = self._req(b"inline; filename=out" + self.test_image.extension)
  330. headers = channel.headers
  331. self.assertEqual(
  332. headers.getRawHeaders(b"X-Robots-Tag"),
  333. [b"noindex, nofollow, noarchive, noimageindex"],
  334. )
  335. class TestSpamChecker:
  336. """A spam checker module that rejects all media that includes the bytes
  337. `evil`.
  338. """
  339. def __init__(self, config, api):
  340. self.config = config
  341. self.api = api
  342. def parse_config(config):
  343. return config
  344. async def check_event_for_spam(self, foo):
  345. return False # allow all events
  346. async def user_may_invite(self, inviter_userid, invitee_userid, room_id):
  347. return True # allow all invites
  348. async def user_may_create_room(self, userid):
  349. return True # allow all room creations
  350. async def user_may_create_room_alias(self, userid, room_alias):
  351. return True # allow all room aliases
  352. async def user_may_publish_room(self, userid, room_id):
  353. return True # allow publishing of all rooms
  354. async def check_media_file_for_spam(self, file_wrapper, file_info) -> bool:
  355. buf = BytesIO()
  356. await file_wrapper.write_chunks_to(buf.write)
  357. return b"evil" in buf.getvalue()
  358. class SpamCheckerTestCase(unittest.HomeserverTestCase):
  359. servlets = [
  360. login.register_servlets,
  361. admin.register_servlets,
  362. ]
  363. def prepare(self, reactor, clock, hs):
  364. self.user = self.register_user("user", "pass")
  365. self.tok = self.login("user", "pass")
  366. # Allow for uploading and downloading to/from the media repo
  367. self.media_repo = hs.get_media_repository_resource()
  368. self.download_resource = self.media_repo.children[b"download"]
  369. self.upload_resource = self.media_repo.children[b"upload"]
  370. def default_config(self):
  371. config = default_config("test")
  372. config.update(
  373. {
  374. "spam_checker": [
  375. {
  376. "module": TestSpamChecker.__module__ + ".TestSpamChecker",
  377. "config": {},
  378. }
  379. ]
  380. }
  381. )
  382. return config
  383. def test_upload_innocent(self):
  384. """Attempt to upload some innocent data that should be allowed.
  385. """
  386. image_data = unhexlify(
  387. b"89504e470d0a1a0a0000000d4948445200000001000000010806"
  388. b"0000001f15c4890000000a49444154789c63000100000500010d"
  389. b"0a2db40000000049454e44ae426082"
  390. )
  391. self.helper.upload_media(
  392. self.upload_resource, image_data, tok=self.tok, expect_code=200
  393. )
  394. def test_upload_ban(self):
  395. """Attempt to upload some data that includes bytes "evil", which should
  396. get rejected by the spam checker.
  397. """
  398. data = b"Some evil data"
  399. self.helper.upload_media(
  400. self.upload_resource, data, tok=self.tok, expect_code=400
  401. )