base_resource.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2014-2016 OpenMarket Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. from .thumbnailer import Thumbnailer
  16. from synapse.http.matrixfederationclient import MatrixFederationHttpClient
  17. from synapse.http.server import respond_with_json, finish_request
  18. from synapse.util.stringutils import random_string
  19. from synapse.api.errors import (
  20. cs_error, Codes, SynapseError
  21. )
  22. from twisted.internet import defer, threads
  23. from twisted.web.resource import Resource
  24. from twisted.protocols.basic import FileSender
  25. from synapse.util.async import ObservableDeferred
  26. from synapse.util.stringutils import is_ascii
  27. from synapse.util.logcontext import preserve_context_over_fn
  28. import os
  29. import cgi
  30. import logging
  31. import urllib
  32. import urlparse
  33. logger = logging.getLogger(__name__)
  34. def parse_media_id(request):
  35. try:
  36. # This allows users to append e.g. /test.png to the URL. Useful for
  37. # clients that parse the URL to see content type.
  38. server_name, media_id = request.postpath[:2]
  39. file_name = None
  40. if len(request.postpath) > 2:
  41. try:
  42. file_name = urlparse.unquote(request.postpath[-1]).decode("utf-8")
  43. except UnicodeDecodeError:
  44. pass
  45. return server_name, media_id, file_name
  46. except:
  47. raise SynapseError(
  48. 404,
  49. "Invalid media id token %r" % (request.postpath,),
  50. Codes.UNKNOWN,
  51. )
  52. class BaseMediaResource(Resource):
  53. isLeaf = True
  54. def __init__(self, hs, filepaths):
  55. Resource.__init__(self)
  56. self.auth = hs.get_auth()
  57. self.client = MatrixFederationHttpClient(hs)
  58. self.clock = hs.get_clock()
  59. self.server_name = hs.hostname
  60. self.store = hs.get_datastore()
  61. self.max_upload_size = hs.config.max_upload_size
  62. self.max_image_pixels = hs.config.max_image_pixels
  63. self.max_spider_size = hs.config.max_spider_size
  64. self.filepaths = filepaths
  65. self.version_string = hs.version_string
  66. self.downloads = {}
  67. self.dynamic_thumbnails = hs.config.dynamic_thumbnails
  68. self.thumbnail_requirements = hs.config.thumbnail_requirements
  69. def _respond_404(self, request):
  70. respond_with_json(
  71. request, 404,
  72. cs_error(
  73. "Not found %r" % (request.postpath,),
  74. code=Codes.NOT_FOUND,
  75. ),
  76. send_cors=True
  77. )
  78. @staticmethod
  79. def _makedirs(filepath):
  80. dirname = os.path.dirname(filepath)
  81. if not os.path.exists(dirname):
  82. os.makedirs(dirname)
  83. def _get_remote_media(self, server_name, media_id):
  84. key = (server_name, media_id)
  85. download = self.downloads.get(key)
  86. if download is None:
  87. download = self._get_remote_media_impl(server_name, media_id)
  88. download = ObservableDeferred(
  89. download,
  90. consumeErrors=True
  91. )
  92. self.downloads[key] = download
  93. @download.addBoth
  94. def callback(media_info):
  95. del self.downloads[key]
  96. return media_info
  97. return download.observe()
  98. @defer.inlineCallbacks
  99. def _get_remote_media_impl(self, server_name, media_id):
  100. media_info = yield self.store.get_cached_remote_media(
  101. server_name, media_id
  102. )
  103. if not media_info:
  104. media_info = yield self._download_remote_file(
  105. server_name, media_id
  106. )
  107. defer.returnValue(media_info)
  108. @defer.inlineCallbacks
  109. def _download_remote_file(self, server_name, media_id):
  110. file_id = random_string(24)
  111. fname = self.filepaths.remote_media_filepath(
  112. server_name, file_id
  113. )
  114. self._makedirs(fname)
  115. try:
  116. with open(fname, "wb") as f:
  117. request_path = "/".join((
  118. "/_matrix/media/v1/download", server_name, media_id,
  119. ))
  120. length, headers = yield self.client.get_file(
  121. server_name, request_path, output_stream=f,
  122. max_size=self.max_upload_size,
  123. )
  124. media_type = headers["Content-Type"][0]
  125. time_now_ms = self.clock.time_msec()
  126. content_disposition = headers.get("Content-Disposition", None)
  127. if content_disposition:
  128. _, params = cgi.parse_header(content_disposition[0],)
  129. upload_name = None
  130. # First check if there is a valid UTF-8 filename
  131. upload_name_utf8 = params.get("filename*", None)
  132. if upload_name_utf8:
  133. if upload_name_utf8.lower().startswith("utf-8''"):
  134. upload_name = upload_name_utf8[7:]
  135. # If there isn't check for an ascii name.
  136. if not upload_name:
  137. upload_name_ascii = params.get("filename", None)
  138. if upload_name_ascii and is_ascii(upload_name_ascii):
  139. upload_name = upload_name_ascii
  140. if upload_name:
  141. upload_name = urlparse.unquote(upload_name)
  142. try:
  143. upload_name = upload_name.decode("utf-8")
  144. except UnicodeDecodeError:
  145. upload_name = None
  146. else:
  147. upload_name = None
  148. yield self.store.store_cached_remote_media(
  149. origin=server_name,
  150. media_id=media_id,
  151. media_type=media_type,
  152. time_now_ms=self.clock.time_msec(),
  153. upload_name=upload_name,
  154. media_length=length,
  155. filesystem_id=file_id,
  156. )
  157. except:
  158. os.remove(fname)
  159. raise
  160. media_info = {
  161. "media_type": media_type,
  162. "media_length": length,
  163. "upload_name": upload_name,
  164. "created_ts": time_now_ms,
  165. "filesystem_id": file_id,
  166. }
  167. yield self._generate_remote_thumbnails(
  168. server_name, media_id, media_info
  169. )
  170. defer.returnValue(media_info)
  171. @defer.inlineCallbacks
  172. def _respond_with_file(self, request, media_type, file_path,
  173. file_size=None, upload_name=None):
  174. logger.debug("Responding with %r", file_path)
  175. if os.path.isfile(file_path):
  176. request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
  177. if upload_name:
  178. if is_ascii(upload_name):
  179. request.setHeader(
  180. b"Content-Disposition",
  181. b"inline; filename=%s" % (
  182. urllib.quote(upload_name.encode("utf-8")),
  183. ),
  184. )
  185. else:
  186. request.setHeader(
  187. b"Content-Disposition",
  188. b"inline; filename*=utf-8''%s" % (
  189. urllib.quote(upload_name.encode("utf-8")),
  190. ),
  191. )
  192. # cache for at least a day.
  193. # XXX: we might want to turn this off for data we don't want to
  194. # recommend caching as it's sensitive or private - or at least
  195. # select private. don't bother setting Expires as all our
  196. # clients are smart enough to be happy with Cache-Control
  197. request.setHeader(
  198. b"Cache-Control", b"public,max-age=86400,s-maxage=86400"
  199. )
  200. if file_size is None:
  201. stat = os.stat(file_path)
  202. file_size = stat.st_size
  203. request.setHeader(
  204. b"Content-Length", b"%d" % (file_size,)
  205. )
  206. with open(file_path, "rb") as f:
  207. yield FileSender().beginFileTransfer(f, request)
  208. finish_request(request)
  209. else:
  210. self._respond_404(request)
  211. def _get_thumbnail_requirements(self, media_type):
  212. return self.thumbnail_requirements.get(media_type, ())
  213. def _generate_thumbnail(self, input_path, t_path, t_width, t_height,
  214. t_method, t_type):
  215. thumbnailer = Thumbnailer(input_path)
  216. m_width = thumbnailer.width
  217. m_height = thumbnailer.height
  218. if m_width * m_height >= self.max_image_pixels:
  219. logger.info(
  220. "Image too large to thumbnail %r x %r > %r",
  221. m_width, m_height, self.max_image_pixels
  222. )
  223. return
  224. if t_method == "crop":
  225. t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
  226. elif t_method == "scale":
  227. t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
  228. else:
  229. t_len = None
  230. return t_len
  231. @defer.inlineCallbacks
  232. def _generate_local_exact_thumbnail(self, media_id, t_width, t_height,
  233. t_method, t_type):
  234. input_path = self.filepaths.local_media_filepath(media_id)
  235. t_path = self.filepaths.local_media_thumbnail(
  236. media_id, t_width, t_height, t_type, t_method
  237. )
  238. self._makedirs(t_path)
  239. t_len = yield preserve_context_over_fn(
  240. threads.deferToThread,
  241. self._generate_thumbnail,
  242. input_path, t_path, t_width, t_height, t_method, t_type
  243. )
  244. if t_len:
  245. yield self.store.store_local_thumbnail(
  246. media_id, t_width, t_height, t_type, t_method, t_len
  247. )
  248. defer.returnValue(t_path)
  249. @defer.inlineCallbacks
  250. def _generate_remote_exact_thumbnail(self, server_name, file_id, media_id,
  251. t_width, t_height, t_method, t_type):
  252. input_path = self.filepaths.remote_media_filepath(server_name, file_id)
  253. t_path = self.filepaths.remote_media_thumbnail(
  254. server_name, file_id, t_width, t_height, t_type, t_method
  255. )
  256. self._makedirs(t_path)
  257. t_len = yield preserve_context_over_fn(
  258. threads.deferToThread,
  259. self._generate_thumbnail,
  260. input_path, t_path, t_width, t_height, t_method, t_type
  261. )
  262. if t_len:
  263. yield self.store.store_remote_media_thumbnail(
  264. server_name, media_id, file_id,
  265. t_width, t_height, t_type, t_method, t_len
  266. )
  267. defer.returnValue(t_path)
  268. @defer.inlineCallbacks
  269. def _generate_local_thumbnails(self, media_id, media_info):
  270. media_type = media_info["media_type"]
  271. requirements = self._get_thumbnail_requirements(media_type)
  272. if not requirements:
  273. return
  274. input_path = self.filepaths.local_media_filepath(media_id)
  275. thumbnailer = Thumbnailer(input_path)
  276. m_width = thumbnailer.width
  277. m_height = thumbnailer.height
  278. if m_width * m_height >= self.max_image_pixels:
  279. logger.info(
  280. "Image too large to thumbnail %r x %r > %r",
  281. m_width, m_height, self.max_image_pixels
  282. )
  283. return
  284. local_thumbnails = []
  285. def generate_thumbnails():
  286. scales = set()
  287. crops = set()
  288. for r_width, r_height, r_method, r_type in requirements:
  289. if r_method == "scale":
  290. t_width, t_height = thumbnailer.aspect(r_width, r_height)
  291. scales.add((
  292. min(m_width, t_width), min(m_height, t_height), r_type,
  293. ))
  294. elif r_method == "crop":
  295. crops.add((r_width, r_height, r_type))
  296. for t_width, t_height, t_type in scales:
  297. t_method = "scale"
  298. t_path = self.filepaths.local_media_thumbnail(
  299. media_id, t_width, t_height, t_type, t_method
  300. )
  301. self._makedirs(t_path)
  302. t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
  303. local_thumbnails.append((
  304. media_id, t_width, t_height, t_type, t_method, t_len
  305. ))
  306. for t_width, t_height, t_type in crops:
  307. if (t_width, t_height, t_type) in scales:
  308. # If the aspect ratio of the cropped thumbnail matches a purely
  309. # scaled one then there is no point in calculating a separate
  310. # thumbnail.
  311. continue
  312. t_method = "crop"
  313. t_path = self.filepaths.local_media_thumbnail(
  314. media_id, t_width, t_height, t_type, t_method
  315. )
  316. self._makedirs(t_path)
  317. t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
  318. local_thumbnails.append((
  319. media_id, t_width, t_height, t_type, t_method, t_len
  320. ))
  321. yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails)
  322. for l in local_thumbnails:
  323. yield self.store.store_local_thumbnail(*l)
  324. defer.returnValue({
  325. "width": m_width,
  326. "height": m_height,
  327. })
  328. @defer.inlineCallbacks
  329. def _generate_remote_thumbnails(self, server_name, media_id, media_info):
  330. media_type = media_info["media_type"]
  331. file_id = media_info["filesystem_id"]
  332. requirements = self._get_thumbnail_requirements(media_type)
  333. if not requirements:
  334. return
  335. remote_thumbnails = []
  336. input_path = self.filepaths.remote_media_filepath(server_name, file_id)
  337. thumbnailer = Thumbnailer(input_path)
  338. m_width = thumbnailer.width
  339. m_height = thumbnailer.height
  340. def generate_thumbnails():
  341. if m_width * m_height >= self.max_image_pixels:
  342. logger.info(
  343. "Image too large to thumbnail %r x %r > %r",
  344. m_width, m_height, self.max_image_pixels
  345. )
  346. return
  347. scales = set()
  348. crops = set()
  349. for r_width, r_height, r_method, r_type in requirements:
  350. if r_method == "scale":
  351. t_width, t_height = thumbnailer.aspect(r_width, r_height)
  352. scales.add((
  353. min(m_width, t_width), min(m_height, t_height), r_type,
  354. ))
  355. elif r_method == "crop":
  356. crops.add((r_width, r_height, r_type))
  357. for t_width, t_height, t_type in scales:
  358. t_method = "scale"
  359. t_path = self.filepaths.remote_media_thumbnail(
  360. server_name, file_id, t_width, t_height, t_type, t_method
  361. )
  362. self._makedirs(t_path)
  363. t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
  364. remote_thumbnails.append([
  365. server_name, media_id, file_id,
  366. t_width, t_height, t_type, t_method, t_len
  367. ])
  368. for t_width, t_height, t_type in crops:
  369. if (t_width, t_height, t_type) in scales:
  370. # If the aspect ratio of the cropped thumbnail matches a purely
  371. # scaled one then there is no point in calculating a separate
  372. # thumbnail.
  373. continue
  374. t_method = "crop"
  375. t_path = self.filepaths.remote_media_thumbnail(
  376. server_name, file_id, t_width, t_height, t_type, t_method
  377. )
  378. self._makedirs(t_path)
  379. t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
  380. remote_thumbnails.append([
  381. server_name, media_id, file_id,
  382. t_width, t_height, t_type, t_method, t_len
  383. ])
  384. yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails)
  385. for r in remote_thumbnails:
  386. yield self.store.store_remote_media_thumbnail(*r)
  387. defer.returnValue({
  388. "width": m_width,
  389. "height": m_height,
  390. })