Merge pull request #721 from matrix-org/erikj/spider

Sanitize the optional dependencies for spider API
This commit is contained in:
Erik Johnston 2016-04-14 09:59:29 +01:00
commit ceeb5b909f
4 changed files with 42 additions and 39 deletions

View File

@ -13,10 +13,24 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from ._base import Config from ._base import Config, ConfigError
from collections import namedtuple from collections import namedtuple
import sys
MISSING_NETADDR = (
"Missing netaddr library. This is required for URL preview API."
)
MISSING_LXML = (
"""Missing lxml library. This is required for URL preview API.
Install by running:
pip install lxml
Requires libxslt1-dev system package.
"""
)
ThumbnailRequirement = namedtuple( ThumbnailRequirement = namedtuple(
"ThumbnailRequirement", ["width", "height", "method", "media_type"] "ThumbnailRequirement", ["width", "height", "method", "media_type"]
@ -62,18 +76,32 @@ class ContentRepositoryConfig(Config):
self.thumbnail_requirements = parse_thumbnail_requirements( self.thumbnail_requirements = parse_thumbnail_requirements(
config["thumbnail_sizes"] config["thumbnail_sizes"]
) )
self.url_preview_enabled = config["url_preview_enabled"] self.url_preview_enabled = config.get("url_preview_enabled", False)
if self.url_preview_enabled: if self.url_preview_enabled:
try:
import lxml
lxml # To stop unused lint.
except ImportError:
raise ConfigError(MISSING_LXML)
try: try:
from netaddr import IPSet from netaddr import IPSet
except ImportError:
raise ConfigError(MISSING_NETADDR)
if "url_preview_ip_range_blacklist" in config: if "url_preview_ip_range_blacklist" in config:
self.url_preview_ip_range_blacklist = IPSet( self.url_preview_ip_range_blacklist = IPSet(
config["url_preview_ip_range_blacklist"] config["url_preview_ip_range_blacklist"]
) )
else:
raise ConfigError(
"For security, you must specify an explicit target IP address "
"blacklist in url_preview_ip_range_blacklist for url previewing "
"to work"
)
if "url_preview_url_blacklist" in config: if "url_preview_url_blacklist" in config:
self.url_preview_url_blacklist = config["url_preview_url_blacklist"] self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
except ImportError:
sys.stderr.write("\nmissing netaddr dep - disabling preview_url API\n")
def default_config(self, **kwargs): def default_config(self, **kwargs):
media_store = self.default_path("media_store") media_store = self.default_path("media_store")

View File

@ -43,7 +43,6 @@ CONDITIONAL_REQUIREMENTS = {
"matrix_angular_sdk>=0.6.8": ["syweb>=0.6.8"], "matrix_angular_sdk>=0.6.8": ["syweb>=0.6.8"],
}, },
"preview_url": { "preview_url": {
"lxml>=3.6.0": ["lxml"],
"netaddr>=0.7.18": ["netaddr"], "netaddr>=0.7.18": ["netaddr"],
}, },
} }

View File

@ -80,8 +80,4 @@ class MediaRepositoryResource(Resource):
self.putChild("thumbnail", ThumbnailResource(hs, filepaths)) self.putChild("thumbnail", ThumbnailResource(hs, filepaths))
self.putChild("identicon", IdenticonResource()) self.putChild("identicon", IdenticonResource())
if hs.config.url_preview_enabled: if hs.config.url_preview_enabled:
try:
self.putChild("preview_url", PreviewUrlResource(hs, filepaths)) self.putChild("preview_url", PreviewUrlResource(hs, filepaths))
except Exception as e:
logger.warn("Failed to mount preview_url")
logger.exception(e)

View File

@ -40,33 +40,11 @@ import ujson as json
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
try:
from lxml import html
except ImportError:
pass
class PreviewUrlResource(BaseMediaResource): class PreviewUrlResource(BaseMediaResource):
isLeaf = True isLeaf = True
def __init__(self, hs, filepaths): def __init__(self, hs, filepaths):
try:
if html:
pass
except:
raise RuntimeError("Disabling PreviewUrlResource as lxml not available")
if not hasattr(hs.config, "url_preview_ip_range_blacklist"):
logger.warn(
"For security, you must specify an explicit target IP address "
"blacklist in url_preview_ip_range_blacklist for url previewing "
"to work"
)
raise RuntimeError(
"Disabling PreviewUrlResource as "
"url_preview_ip_range_blacklist not specified"
)
BaseMediaResource.__init__(self, hs, filepaths) BaseMediaResource.__init__(self, hs, filepaths)
self.client = SpiderHttpClient(hs) self.client = SpiderHttpClient(hs)
if hasattr(hs.config, "url_preview_url_blacklist"): if hasattr(hs.config, "url_preview_url_blacklist"):
@ -201,6 +179,8 @@ class PreviewUrlResource(BaseMediaResource):
elif self._is_html(media_info['media_type']): elif self._is_html(media_info['media_type']):
# TODO: somehow stop a big HTML tree from exploding synapse's RAM # TODO: somehow stop a big HTML tree from exploding synapse's RAM
from lxml import html
try: try:
tree = html.parse(media_info['filename']) tree = html.parse(media_info['filename'])
og = yield self._calc_og(tree, media_info, requester) og = yield self._calc_og(tree, media_info, requester)