test_url_previewer.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. # Copyright 2023 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. from twisted.test.proto_helpers import MemoryReactor
  16. from synapse.server import HomeServer
  17. from synapse.util import Clock
  18. from tests import unittest
  19. from tests.unittest import override_config
  20. try:
  21. import lxml
  22. except ImportError:
  23. lxml = None # type: ignore[assignment]
  24. class URLPreviewTests(unittest.HomeserverTestCase):
  25. if not lxml:
  26. skip = "url preview feature requires lxml"
  27. def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
  28. config = self.default_config()
  29. config["url_preview_enabled"] = True
  30. config["max_spider_size"] = 9999999
  31. config["url_preview_ip_range_blacklist"] = (
  32. "192.168.1.1",
  33. "1.0.0.0/8",
  34. "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
  35. "2001:800::/21",
  36. )
  37. self.storage_path = self.mktemp()
  38. self.media_store_path = self.mktemp()
  39. os.mkdir(self.storage_path)
  40. os.mkdir(self.media_store_path)
  41. config["media_store_path"] = self.media_store_path
  42. provider_config = {
  43. "module": "synapse.media.storage_provider.FileStorageProviderBackend",
  44. "store_local": True,
  45. "store_synchronous": False,
  46. "store_remote": True,
  47. "config": {"directory": self.storage_path},
  48. }
  49. config["media_storage_providers"] = [provider_config]
  50. return self.setup_test_homeserver(config=config)
  51. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  52. media_repo_resource = hs.get_media_repository_resource()
  53. preview_url = media_repo_resource.children[b"preview_url"]
  54. self.url_previewer = preview_url._url_previewer
  55. def test_all_urls_allowed(self) -> None:
  56. self.assertFalse(self.url_previewer._is_url_blocked("http://matrix.org"))
  57. self.assertFalse(self.url_previewer._is_url_blocked("https://matrix.org"))
  58. self.assertFalse(self.url_previewer._is_url_blocked("http://localhost:8000"))
  59. self.assertFalse(
  60. self.url_previewer._is_url_blocked("http://user:pass@matrix.org")
  61. )
  62. @override_config(
  63. {
  64. "url_preview_url_blacklist": [
  65. {"username": "user"},
  66. {"scheme": "http", "netloc": "matrix.org"},
  67. ]
  68. }
  69. )
  70. def test_blocked_url(self) -> None:
  71. # Blocked via scheme and URL.
  72. self.assertTrue(self.url_previewer._is_url_blocked("http://matrix.org"))
  73. # Not blocked because all components must match.
  74. self.assertFalse(self.url_previewer._is_url_blocked("https://matrix.org"))
  75. # Blocked due to the user.
  76. self.assertTrue(
  77. self.url_previewer._is_url_blocked("http://user:pass@example.com")
  78. )
  79. self.assertTrue(self.url_previewer._is_url_blocked("http://user@example.com"))
  80. @override_config({"url_preview_url_blacklist": [{"netloc": "*.example.com"}]})
  81. def test_glob_blocked_url(self) -> None:
  82. # All subdomains are blocked.
  83. self.assertTrue(self.url_previewer._is_url_blocked("http://foo.example.com"))
  84. self.assertTrue(self.url_previewer._is_url_blocked("http://.example.com"))
  85. # The TLD is not blocked.
  86. self.assertFalse(self.url_previewer._is_url_blocked("https://example.com"))
  87. @override_config({"url_preview_url_blacklist": [{"netloc": "^.+\\.example\\.com"}]})
  88. def test_regex_blocked_urL(self) -> None:
  89. # All subdomains are blocked.
  90. self.assertTrue(self.url_previewer._is_url_blocked("http://foo.example.com"))
  91. # Requires a non-empty subdomain.
  92. self.assertFalse(self.url_previewer._is_url_blocked("http://.example.com"))
  93. # The TLD is not blocked.
  94. self.assertFalse(self.url_previewer._is_url_blocked("https://example.com"))