Browse Source

Sanitize the optional dependencies for spider API

Erik Johnston 8 years ago
parent
commit
d0633e6dbe

+ 28 - 10
synapse/config/repository.py

@@ -13,10 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ._base import Config
+from ._base import Config, ConfigError
 from collections import namedtuple
 
-import sys
+
+MISSING_NETADDR = (
+    "Missing netaddr library. This is required for URL preview API."
+)
+
+MISSING_LXML = (
+    "Missing lxml library. This is required for URL preview API."
+)
+
 
 ThumbnailRequirement = namedtuple(
     "ThumbnailRequirement", ["width", "height", "method", "media_type"]
@@ -62,18 +70,28 @@ class ContentRepositoryConfig(Config):
         self.thumbnail_requirements = parse_thumbnail_requirements(
             config["thumbnail_sizes"]
         )
-        self.url_preview_enabled = config["url_preview_enabled"]
+        self.url_preview_enabled = config.get("url_preview_enabled", False)
         if self.url_preview_enabled:
+            try:
+                import lxml
+                lxml  # To stop unused lint.
+            except ImportError:
+                raise ConfigError(MISSING_LXML)
+
             try:
                 from netaddr import IPSet
-                if "url_preview_ip_range_blacklist" in config:
-                    self.url_preview_ip_range_blacklist = IPSet(
-                        config["url_preview_ip_range_blacklist"]
-                    )
-                if "url_preview_url_blacklist" in config:
-                    self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
             except ImportError:
-                sys.stderr.write("\nmissing netaddr dep - disabling preview_url API\n")
+                raise ConfigError(MISSING_NETADDR)
+
+            if "url_preview_ip_range_blacklist" in config:
+                self.url_preview_ip_range_blacklist = IPSet(
+                    config["url_preview_ip_range_blacklist"]
+                )
+            else:
+                raise ConfigError("url_preview_url_blacklist is required")
+
+            if "url_preview_url_blacklist" in config:
+                self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
 
     def default_config(self, **kwargs):
         media_store = self.default_path("media_store")

+ 0 - 1
synapse/python_dependencies.py

@@ -43,7 +43,6 @@ CONDITIONAL_REQUIREMENTS = {
         "matrix_angular_sdk>=0.6.8": ["syweb>=0.6.8"],
     },
     "preview_url": {
-        "lxml>=3.6.0": ["lxml"],
         "netaddr>=0.7.18": ["netaddr"],
     },
 }

+ 1 - 5
synapse/rest/media/v1/media_repository.py

@@ -80,8 +80,4 @@ class MediaRepositoryResource(Resource):
         self.putChild("thumbnail", ThumbnailResource(hs, filepaths))
         self.putChild("identicon", IdenticonResource())
         if hs.config.url_preview_enabled:
-            try:
-                self.putChild("preview_url", PreviewUrlResource(hs, filepaths))
-            except Exception as e:
-                logger.warn("Failed to mount preview_url")
-                logger.exception(e)
+            self.putChild("preview_url", PreviewUrlResource(hs, filepaths))

+ 2 - 22
synapse/rest/media/v1/preview_url_resource.py

@@ -40,33 +40,11 @@ import ujson as json
 import logging
 logger = logging.getLogger(__name__)
 
-try:
-    from lxml import html
-except ImportError:
-    pass
-
 
 class PreviewUrlResource(BaseMediaResource):
     isLeaf = True
 
     def __init__(self, hs, filepaths):
-        try:
-            if html:
-                pass
-        except:
-            raise RuntimeError("Disabling PreviewUrlResource as lxml not available")
-
-        if not hasattr(hs.config, "url_preview_ip_range_blacklist"):
-            logger.warn(
-                "For security, you must specify an explicit target IP address "
-                "blacklist in url_preview_ip_range_blacklist for url previewing "
-                "to work"
-            )
-            raise RuntimeError(
-                "Disabling PreviewUrlResource as "
-                "url_preview_ip_range_blacklist not specified"
-            )
-
         BaseMediaResource.__init__(self, hs, filepaths)
         self.client = SpiderHttpClient(hs)
         if hasattr(hs.config, "url_preview_url_blacklist"):
@@ -201,6 +179,8 @@ class PreviewUrlResource(BaseMediaResource):
         elif self._is_html(media_info['media_type']):
             # TODO: somehow stop a big HTML tree from exploding synapse's RAM
 
+            from lxml import html
+
             try:
                 tree = html.parse(media_info['filename'])
                 og = yield self._calc_og(tree, media_info, requester)