content_repository.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2014-2016 OpenMarket Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. from synapse.http.server import respond_with_json_bytes, finish_request
  16. from synapse.util.stringutils import random_string
  17. from synapse.api.errors import (
  18. cs_exception, SynapseError, CodeMessageException, Codes, cs_error
  19. )
  20. from twisted.protocols.basic import FileSender
  21. from twisted.web import server, resource
  22. from twisted.internet import defer
  23. import base64
  24. import simplejson as json
  25. import logging
  26. import os
  27. import re
  28. logger = logging.getLogger(__name__)
  29. class ContentRepoResource(resource.Resource):
  30. """Provides file uploading and downloading.
  31. Uploads are POSTed to wherever this Resource is linked to. This resource
  32. returns a "content token" which can be used to GET this content again. The
  33. token is typically a path, but it may not be. Tokens can expire, be
  34. one-time uses, etc.
  35. In this case, the token is a path to the file and contains 3 interesting
  36. sections:
  37. - User ID base64d (for namespacing content to each user)
  38. - random 24 char string
  39. - Content type base64d (so we can return it when clients GET it)
  40. """
  41. isLeaf = True
  42. def __init__(self, hs, directory, auth, external_addr):
  43. resource.Resource.__init__(self)
  44. self.hs = hs
  45. self.directory = directory
  46. self.auth = auth
  47. self.external_addr = external_addr.rstrip('/')
  48. self.max_upload_size = hs.config.max_upload_size
  49. if not os.path.isdir(self.directory):
  50. os.mkdir(self.directory)
  51. logger.info("ContentRepoResource : Created %s directory.",
  52. self.directory)
  53. @defer.inlineCallbacks
  54. def map_request_to_name(self, request):
  55. # auth the user
  56. requester = yield self.auth.get_user_by_req(request)
  57. # namespace all file uploads on the user
  58. prefix = base64.urlsafe_b64encode(
  59. requester.user.to_string()
  60. ).replace('=', '')
  61. # use a random string for the main portion
  62. main_part = random_string(24)
  63. # suffix with a file extension if we can make one. This is nice to
  64. # provide a hint to clients on the file information. We will also reuse
  65. # this info to spit back the content type to the client.
  66. suffix = ""
  67. if request.requestHeaders.hasHeader("Content-Type"):
  68. content_type = request.requestHeaders.getRawHeaders(
  69. "Content-Type")[0]
  70. suffix = "." + base64.urlsafe_b64encode(content_type)
  71. if (content_type.split("/")[0].lower() in
  72. ["image", "video", "audio"]):
  73. file_ext = content_type.split("/")[-1]
  74. # be a little paranoid and only allow a-z
  75. file_ext = re.sub("[^a-z]", "", file_ext)
  76. suffix += "." + file_ext
  77. file_name = prefix + main_part + suffix
  78. file_path = os.path.join(self.directory, file_name)
  79. logger.info("User %s is uploading a file to path %s",
  80. request.user.user_id.to_string(),
  81. file_path)
  82. # keep trying to make a non-clashing file, with a sensible max attempts
  83. attempts = 0
  84. while os.path.exists(file_path):
  85. main_part = random_string(24)
  86. file_name = prefix + main_part + suffix
  87. file_path = os.path.join(self.directory, file_name)
  88. attempts += 1
  89. if attempts > 25: # really? Really?
  90. raise SynapseError(500, "Unable to create file.")
  91. defer.returnValue(file_path)
  92. def render_GET(self, request):
  93. # no auth here on purpose, to allow anyone to view, even across home
  94. # servers.
  95. # TODO: A little crude here, we could do this better.
  96. filename = request.path.split('/')[-1]
  97. # be paranoid
  98. filename = re.sub("[^0-9A-z.-_]", "", filename)
  99. file_path = self.directory + "/" + filename
  100. logger.debug("Searching for %s", file_path)
  101. if os.path.isfile(file_path):
  102. # filename has the content type
  103. base64_contentype = filename.split(".")[1]
  104. content_type = base64.urlsafe_b64decode(base64_contentype)
  105. logger.info("Sending file %s", file_path)
  106. f = open(file_path, 'rb')
  107. request.setHeader('Content-Type', content_type)
  108. # cache for at least a day.
  109. # XXX: we might want to turn this off for data we don't want to
  110. # recommend caching as it's sensitive or private - or at least
  111. # select private. don't bother setting Expires as all our matrix
  112. # clients are smart enough to be happy with Cache-Control (right?)
  113. request.setHeader(
  114. "Cache-Control", "public,max-age=86400,s-maxage=86400"
  115. )
  116. d = FileSender().beginFileTransfer(f, request)
  117. # after the file has been sent, clean up and finish the request
  118. def cbFinished(ignored):
  119. f.close()
  120. finish_request(request)
  121. d.addCallback(cbFinished)
  122. else:
  123. respond_with_json_bytes(
  124. request,
  125. 404,
  126. json.dumps(cs_error("Not found", code=Codes.NOT_FOUND)),
  127. send_cors=True)
  128. return server.NOT_DONE_YET
  129. def render_POST(self, request):
  130. self._async_render(request)
  131. return server.NOT_DONE_YET
  132. def render_OPTIONS(self, request):
  133. respond_with_json_bytes(request, 200, {}, send_cors=True)
  134. return server.NOT_DONE_YET
  135. @defer.inlineCallbacks
  136. def _async_render(self, request):
  137. try:
  138. # TODO: The checks here are a bit late. The content will have
  139. # already been uploaded to a tmp file at this point
  140. content_length = request.getHeader("Content-Length")
  141. if content_length is None:
  142. raise SynapseError(
  143. msg="Request must specify a Content-Length", code=400
  144. )
  145. if int(content_length) > self.max_upload_size:
  146. raise SynapseError(
  147. msg="Upload request body is too large",
  148. code=413,
  149. )
  150. fname = yield self.map_request_to_name(request)
  151. # TODO I have a suspicious feeling this is just going to block
  152. with open(fname, "wb") as f:
  153. f.write(request.content.read())
  154. # FIXME (erikj): These should use constants.
  155. file_name = os.path.basename(fname)
  156. # FIXME: we can't assume what the repo's public mounted path is
  157. # ...plus self-signed SSL won't work to remote clients anyway
  158. # ...and we can't assume that it's SSL anyway, as we might want to
  159. # serve it via the non-SSL listener...
  160. url = "%s/_matrix/content/%s" % (
  161. self.external_addr, file_name
  162. )
  163. respond_with_json_bytes(request, 200,
  164. json.dumps({"content_token": url}),
  165. send_cors=True)
  166. except CodeMessageException as e:
  167. logger.exception(e)
  168. respond_with_json_bytes(request, e.code,
  169. json.dumps(cs_exception(e)))
  170. except Exception as e:
  171. logger.error("Failed to store file: %s" % e)
  172. respond_with_json_bytes(
  173. request,
  174. 500,
  175. json.dumps({"error": "Internal server error"}),
  176. send_cors=True)