From d0633e6dbe1e62bd9def0554e6a063a034ce3248 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 13 Apr 2016 11:57:46 +0100 Subject: [PATCH] Sanitize the optional dependencies for spider API --- synapse/config/repository.py | 40 ++++++++++++++----- synapse/python_dependencies.py | 1 - synapse/rest/media/v1/media_repository.py | 6 +-- synapse/rest/media/v1/preview_url_resource.py | 24 +---------- 4 files changed, 32 insertions(+), 39 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 49922c6d0..82ff8df59 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -13,10 +13,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import Config +from ._base import Config, ConfigError from collections import namedtuple -import sys + +MISSING_NETADDR = ( + "Missing netaddr library. This is required for URL preview API." +) + +MISSING_LXML = ( + "Missing lxml library. This is required for URL preview API." +) + ThumbnailRequirement = namedtuple( "ThumbnailRequirement", ["width", "height", "method", "media_type"] @@ -62,18 +70,28 @@ class ContentRepositoryConfig(Config): self.thumbnail_requirements = parse_thumbnail_requirements( config["thumbnail_sizes"] ) - self.url_preview_enabled = config["url_preview_enabled"] + self.url_preview_enabled = config.get("url_preview_enabled", False) if self.url_preview_enabled: try: - from netaddr import IPSet - if "url_preview_ip_range_blacklist" in config: - self.url_preview_ip_range_blacklist = IPSet( - config["url_preview_ip_range_blacklist"] - ) - if "url_preview_url_blacklist" in config: - self.url_preview_url_blacklist = config["url_preview_url_blacklist"] + import lxml + lxml # To stop unused lint. except ImportError: - sys.stderr.write("\nmissing netaddr dep - disabling preview_url API\n") + raise ConfigError(MISSING_LXML) + + try: + from netaddr import IPSet + except ImportError: + raise ConfigError(MISSING_NETADDR) + + if "url_preview_ip_range_blacklist" in config: + self.url_preview_ip_range_blacklist = IPSet( + config["url_preview_ip_range_blacklist"] + ) + else: + raise ConfigError("url_preview_url_blacklist is required") + + if "url_preview_url_blacklist" in config: + self.url_preview_url_blacklist = config["url_preview_url_blacklist"] def default_config(self, **kwargs): media_store = self.default_path("media_store") diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 1adbdd942..b25b73649 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -43,7 +43,6 @@ CONDITIONAL_REQUIREMENTS = { "matrix_angular_sdk>=0.6.8": ["syweb>=0.6.8"], }, "preview_url": { - "lxml>=3.6.0": ["lxml"], "netaddr>=0.7.18": ["netaddr"], }, } diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 97b7e84af..77fb0313c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -80,8 +80,4 @@ class MediaRepositoryResource(Resource): self.putChild("thumbnail", ThumbnailResource(hs, filepaths)) self.putChild("identicon", IdenticonResource()) if hs.config.url_preview_enabled: - try: - self.putChild("preview_url", PreviewUrlResource(hs, filepaths)) - except Exception as e: - logger.warn("Failed to mount preview_url") - logger.exception(e) + self.putChild("preview_url", PreviewUrlResource(hs, filepaths)) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 4dd97ac0e..8e1cf6e2f 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -40,33 +40,11 @@ import ujson as json import logging logger = logging.getLogger(__name__) -try: - from lxml import html -except ImportError: - pass - class PreviewUrlResource(BaseMediaResource): isLeaf = True def __init__(self, hs, filepaths): - try: - if html: - pass - except: - raise RuntimeError("Disabling PreviewUrlResource as lxml not available") - - if not hasattr(hs.config, "url_preview_ip_range_blacklist"): - logger.warn( - "For security, you must specify an explicit target IP address " - "blacklist in url_preview_ip_range_blacklist for url previewing " - "to work" - ) - raise RuntimeError( - "Disabling PreviewUrlResource as " - "url_preview_ip_range_blacklist not specified" - ) - BaseMediaResource.__init__(self, hs, filepaths) self.client = SpiderHttpClient(hs) if hasattr(hs.config, "url_preview_url_blacklist"): @@ -201,6 +179,8 @@ class PreviewUrlResource(BaseMediaResource): elif self._is_html(media_info['media_type']): # TODO: somehow stop a big HTML tree from exploding synapse's RAM + from lxml import html + try: tree = html.parse(media_info['filename']) og = yield self._calc_og(tree, media_info, requester)