test_url_preview.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2018 New Vector Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import os
  16. from mock import Mock
  17. from twisted.internet.defer import Deferred
  18. from synapse.config.repository import MediaStorageProviderConfig
  19. from synapse.util.module_loader import load_module
  20. from tests import unittest
  21. class URLPreviewTests(unittest.HomeserverTestCase):
  22. hijack_auth = True
  23. user_id = "@test:user"
  24. def make_homeserver(self, reactor, clock):
  25. self.storage_path = self.mktemp()
  26. os.mkdir(self.storage_path)
  27. config = self.default_config()
  28. config.url_preview_enabled = True
  29. config.max_spider_size = 9999999
  30. config.url_preview_url_blacklist = []
  31. config.media_store_path = self.storage_path
  32. provider_config = {
  33. "module": "synapse.rest.media.v1.storage_provider.FileStorageProviderBackend",
  34. "store_local": True,
  35. "store_synchronous": False,
  36. "store_remote": True,
  37. "config": {"directory": self.storage_path},
  38. }
  39. loaded = list(load_module(provider_config)) + [
  40. MediaStorageProviderConfig(False, False, False)
  41. ]
  42. config.media_storage_providers = [loaded]
  43. hs = self.setup_test_homeserver(config=config)
  44. return hs
  45. def prepare(self, reactor, clock, hs):
  46. self.fetches = []
  47. def get_file(url, output_stream, max_size):
  48. """
  49. Returns tuple[int,dict,str,int] of file length, response headers,
  50. absolute URI, and response code.
  51. """
  52. def write_to(r):
  53. data, response = r
  54. output_stream.write(data)
  55. return response
  56. d = Deferred()
  57. d.addCallback(write_to)
  58. self.fetches.append((d, url))
  59. return d
  60. client = Mock()
  61. client.get_file = get_file
  62. self.media_repo = hs.get_media_repository_resource()
  63. preview_url = self.media_repo.children[b'preview_url']
  64. preview_url.client = client
  65. self.preview_url = preview_url
  66. def test_cache_returns_correct_type(self):
  67. request, channel = self.make_request(
  68. "GET", "url_preview?url=matrix.org", shorthand=False
  69. )
  70. request.render(self.preview_url)
  71. self.pump()
  72. # We've made one fetch
  73. self.assertEqual(len(self.fetches), 1)
  74. end_content = (
  75. b'<html><head>'
  76. b'<meta property="og:title" content="~matrix~" />'
  77. b'<meta property="og:description" content="hi" />'
  78. b'</head></html>'
  79. )
  80. self.fetches[0][0].callback(
  81. (
  82. end_content,
  83. (
  84. len(end_content),
  85. {
  86. b"Content-Length": [b"%d" % (len(end_content))],
  87. b"Content-Type": [b'text/html; charset="utf8"'],
  88. },
  89. "https://example.com",
  90. 200,
  91. ),
  92. )
  93. )
  94. self.pump()
  95. self.assertEqual(channel.code, 200)
  96. self.assertEqual(
  97. channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
  98. )
  99. # Check the cache returns the correct response
  100. request, channel = self.make_request(
  101. "GET", "url_preview?url=matrix.org", shorthand=False
  102. )
  103. request.render(self.preview_url)
  104. self.pump()
  105. # Only one fetch, still, since we'll lean on the cache
  106. self.assertEqual(len(self.fetches), 1)
  107. # Check the cache response has the same content
  108. self.assertEqual(channel.code, 200)
  109. self.assertEqual(
  110. channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
  111. )
  112. # Clear the in-memory cache
  113. self.assertIn("matrix.org", self.preview_url._cache)
  114. self.preview_url._cache.pop("matrix.org")
  115. self.assertNotIn("matrix.org", self.preview_url._cache)
  116. # Check the database cache returns the correct response
  117. request, channel = self.make_request(
  118. "GET", "url_preview?url=matrix.org", shorthand=False
  119. )
  120. request.render(self.preview_url)
  121. self.pump()
  122. # Only one fetch, still, since we'll lean on the cache
  123. self.assertEqual(len(self.fetches), 1)
  124. # Check the cache response has the same content
  125. self.assertEqual(channel.code, 200)
  126. self.assertEqual(
  127. channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
  128. )