synapse-product/synapse/config/repository.py

# -*- coding: utf-8 -*-
# Copyright 2014, 2015 matrix.org
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ._base import Config, ConfigError
from collections import namedtuple


MISSING_NETADDR = (
    "Missing netaddr library. This is required for URL preview API."
)

MISSING_LXML = (
    """Missing lxml library. This is required for URL preview API.

    Install by running:
        pip install lxml

    Requires libxslt1-dev system package.
    """
)


ThumbnailRequirement = namedtuple(
    "ThumbnailRequirement", ["width", "height", "method", "media_type"]
)


def parse_thumbnail_requirements(thumbnail_sizes):
    """ Takes a list of dictionaries with "width", "height", and "method" keys
    and creates a map from image media types to the thumbnail size, thumbnailing
    method, and thumbnail media type to precalculate

    Args:
        thumbnail_sizes(list): List of dicts with "width", "height", and
            "method" keys
    Returns:
        Dictionary mapping from media type string to list of
        ThumbnailRequirement tuples.
    """
    requirements = {}
    for size in thumbnail_sizes:
        width = size["width"]
        height = size["height"]
        method = size["method"]
        jpeg_thumbnail = ThumbnailRequirement(width, height, method, "image/jpeg")
        png_thumbnail = ThumbnailRequirement(width, height, method, "image/png")
        requirements.setdefault("image/jpeg", []).append(jpeg_thumbnail)
        requirements.setdefault("image/gif", []).append(png_thumbnail)
        requirements.setdefault("image/png", []).append(png_thumbnail)
    return {
        media_type: tuple(thumbnails)
        for media_type, thumbnails in requirements.items()
    }


class ContentRepositoryConfig(Config):
    def read_config(self, config):
        self.max_upload_size = self.parse_size(config["max_upload_size"])
        self.max_image_pixels = self.parse_size(config["max_image_pixels"])
        self.max_spider_size = self.parse_size(config["max_spider_size"])
        self.media_store_path = self.ensure_directory(config["media_store_path"])
        self.uploads_path = self.ensure_directory(config["uploads_path"])
        self.dynamic_thumbnails = config["dynamic_thumbnails"]
        self.thumbnail_requirements = parse_thumbnail_requirements(
            config["thumbnail_sizes"]
        )
        self.url_preview_enabled = config.get("url_preview_enabled", False)
        if self.url_preview_enabled:
            try:
                import lxml
                lxml  # To stop unused lint.
            except ImportError:
                raise ConfigError(MISSING_LXML)

            try:
                from netaddr import IPSet
            except ImportError:
                raise ConfigError(MISSING_NETADDR)

            if "url_preview_ip_range_blacklist" in config:
                self.url_preview_ip_range_blacklist = IPSet(
                    config["url_preview_ip_range_blacklist"]
                )
            else:
                raise ConfigError(
                    "For security, you must specify an explicit target IP address "
                    "blacklist in url_preview_ip_range_blacklist for url previewing "
                    "to work"
                )

            if "url_preview_url_blacklist" in config:
                self.url_preview_url_blacklist = config["url_preview_url_blacklist"]

    def default_config(self, **kwargs):
        media_store = self.default_path("media_store")
        uploads_path = self.default_path("uploads")
        return """
        # Directory where uploaded images and attachments are stored.
        media_store_path: "%(media_store)s"

        # Directory where in-progress uploads are stored.
        uploads_path: "%(uploads_path)s"

        # The largest allowed upload size in bytes
        max_upload_size: "10M"

        # Maximum number of pixels that will be thumbnailed
        max_image_pixels: "32M"

        # Whether to generate new thumbnails on the fly to precisely match
        # the resolution requested by the client. If true then whenever
        # a new resolution is requested by the client the server will
        # generate a new thumbnail. If false the server will pick a thumbnail
        # from a precalculated list.
        dynamic_thumbnails: false

        # List of thumbnail to precalculate when an image is uploaded.
        thumbnail_sizes:
        - width: 32
          height: 32
          method: crop
        - width: 96
          height: 96
          method: crop
        - width: 320
          height: 240
          method: scale
        - width: 640
          height: 480
          method: scale
        - width: 800
          height: 600
          method: scale

        # Is the preview URL API enabled?  If enabled, you *must* specify
        # an explicit url_preview_ip_range_blacklist of IPs that the spider is
        # denied from accessing.
        url_preview_enabled: False

        # List of IP address CIDR ranges that the URL preview spider is denied
        # from accessing.  There are no defaults: you must explicitly
        # specify a list for URL previewing to work.  You should specify any
        # internal services in your network that you do not want synapse to try
        # to connect to, otherwise anyone in any Matrix room could cause your
        # synapse to issue arbitrary GET requests to your internal services,
        # causing serious security issues.
        #
        # url_preview_ip_range_blacklist:
        # - '127.0.0.0/8'
        # - '10.0.0.0/8'
        # - '172.16.0.0/12'
        # - '192.168.0.0/16'

        # Optional list of URL matches that the URL preview spider is
        # denied from accessing.  You should use url_preview_ip_range_blacklist
        # in preference to this, otherwise someone could define a public DNS
        # entry that points to a private IP address and circumvent the blacklist.
        # This is more useful if you know there is an entire shape of URL that
        # you know that will never want synapse to try to spider.
        #
        # Each list entry is a dictionary of url component attributes as returned
        # by urlparse.urlsplit as applied to the absolute form of the URL.  See
        # https://docs.python.org/2/library/urlparse.html#urlparse.urlsplit
        # The values of the dictionary are treated as an filename match pattern
        # applied to that component of URLs, unless they start with a ^ in which
        # case they are treated as a regular expression match.  If all the
        # specified component matches for a given list item succeed, the URL is
        # blacklisted.
        #
        # url_preview_url_blacklist:
        # # blacklist any URL with a username in its URI
        # - username: '*'
        #
        # # blacklist all *.google.com URLs
        # - netloc: 'google.com'
        # - netloc: '*.google.com'
        #
        # # blacklist all plain HTTP URLs
        # - scheme: 'http'
        #
        # # blacklist http(s)://www.acme.com/foo
        # - netloc: 'www.acme.com'
        #   path: '/foo'
        #
        # # blacklist any URL with a literal IPv4 address
        # - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'

        # The largest allowed URL preview spidering size in bytes
        max_spider_size: "10M"


        """ % locals()
Limit the size of uploads 2014-09-03 12:04:00 -04:00			`# -- coding: utf-8 --`
Update copyright notices 2015-01-06 08:21:39 -05:00			`# Copyright 2014, 2015 matrix.org`
Limit the size of uploads 2014-09-03 12:04:00 -04:00			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

Sanitize the optional dependencies for spider API 2016-04-13 06:57:46 -04:00			`from ._base import Config, ConfigError`
Add config option for setting the list of thumbnail sizes to precalculate 2015-08-12 05:55:27 -04:00			`from collections import namedtuple`

Sanitize the optional dependencies for spider API 2016-04-13 06:57:46 -04:00
			`MISSING_NETADDR = (`
			`"Missing netaddr library. This is required for URL preview API."`
			`)`

			`MISSING_LXML = (`
Give install requirements 2016-04-13 09:33:48 -04:00			`"""Missing lxml library. This is required for URL preview API.`

			`Install by running:`
			`pip install lxml`

			`Requires libxslt1-dev system package.`
			`"""`
Sanitize the optional dependencies for spider API 2016-04-13 06:57:46 -04:00			`)`

Add url_preview_enabled config option to turn on/off preview_url endpoint. defaults to off. Add url_preview_ip_range_blacklist to let admins specify internal IP ranges that must not be spidered. Add url_preview_url_blacklist to let admins specify URL patterns that must not be spidered. Implement a custom SpiderEndpoint and associated support classes to implement url_preview_ip_range_blacklist Add commentary and generally address PR feedback 2016-04-08 13:37:15 -04:00
Add config option for setting the list of thumbnail sizes to precalculate 2015-08-12 05:55:27 -04:00			`ThumbnailRequirement = namedtuple(`
			`"ThumbnailRequirement", ["width", "height", "method", "media_type"]`
			`)`

Fix flake8 warning 2015-08-13 12:34:22 -04:00
Add config option for setting the list of thumbnail sizes to precalculate 2015-08-12 05:55:27 -04:00			`def parse_thumbnail_requirements(thumbnail_sizes):`
Doc-string for config ultility function 2015-08-12 09:29:17 -04:00			`""" Takes a list of dictionaries with "width", "height", and "method" keys`
Add url_preview_enabled config option to turn on/off preview_url endpoint. defaults to off. Add url_preview_ip_range_blacklist to let admins specify internal IP ranges that must not be spidered. Add url_preview_url_blacklist to let admins specify URL patterns that must not be spidered. Implement a custom SpiderEndpoint and associated support classes to implement url_preview_ip_range_blacklist Add commentary and generally address PR feedback 2016-04-08 13:37:15 -04:00			`and creates a map from image media types to the thumbnail size, thumbnailing`
Doc-string for config ultility function 2015-08-12 09:29:17 -04:00			`method, and thumbnail media type to precalculate`

			`Args:`
			`thumbnail_sizes(list): List of dicts with "width", "height", and`
			`"method" keys`
			`Returns:`
			`Dictionary mapping from media type string to list of`
			`ThumbnailRequirement tuples.`
			`"""`
Add config option for setting the list of thumbnail sizes to precalculate 2015-08-12 05:55:27 -04:00			`requirements = {}`
			`for size in thumbnail_sizes:`
			`width = size["width"]`
			`height = size["height"]`
			`method = size["method"]`
			`jpeg_thumbnail = ThumbnailRequirement(width, height, method, "image/jpeg")`
			`png_thumbnail = ThumbnailRequirement(width, height, method, "image/png")`
			`requirements.setdefault("image/jpeg", []).append(jpeg_thumbnail)`
			`requirements.setdefault("image/gif", []).append(png_thumbnail)`
			`requirements.setdefault("image/png", []).append(png_thumbnail)`
			`return {`
			`media_type: tuple(thumbnails)`
			`for media_type, thumbnails in requirements.items()`
			`}`
Limit the size of uploads 2014-09-03 12:04:00 -04:00
Fix pep8 warnings 2014-10-30 07:10:17 -04:00
Limit the size of uploads 2014-09-03 12:04:00 -04:00			`class ContentRepositoryConfig(Config):`
Manually generate the default config yaml, remove most of the commandline arguments for synapse anticipating that people will use the yaml instead. Simpify implementing config options by not requiring the classes to hit the super class 2015-04-29 23:24:44 -04:00			`def read_config(self, config):`
			`self.max_upload_size = self.parse_size(config["max_upload_size"])`
			`self.max_image_pixels = self.parse_size(config["max_image_pixels"])`
initial WIP of a tentative preview_url endpoint - incomplete, untested, experimental, etc. just putting it here for safekeeping for now 2016-01-24 18:47:27 -05:00			`self.max_spider_size = self.parse_size(config["max_spider_size"])`
Manually generate the default config yaml, remove most of the commandline arguments for synapse anticipating that people will use the yaml instead. Simpify implementing config options by not requiring the classes to hit the super class 2015-04-29 23:24:44 -04:00			`self.media_store_path = self.ensure_directory(config["media_store_path"])`
Make upload dir a configurable path. Fixes SYN-425. Signed-off-by: Eric Myhre <hash@exultant.us> 2015-06-19 00:38:20 -04:00			`self.uploads_path = self.ensure_directory(config["uploads_path"])`
Make a config option for whether to generate new thumbnail sizes dynamically 2015-08-12 05:54:38 -04:00			`self.dynamic_thumbnails = config["dynamic_thumbnails"]`
Add config option for setting the list of thumbnail sizes to precalculate 2015-08-12 05:55:27 -04:00			`self.thumbnail_requirements = parse_thumbnail_requirements(`
			`config["thumbnail_sizes"]`
			`)`
Sanitize the optional dependencies for spider API 2016-04-13 06:57:46 -04:00			`self.url_preview_enabled = config.get("url_preview_enabled", False)`
Add url_preview_enabled config option to turn on/off preview_url endpoint. defaults to off. Add url_preview_ip_range_blacklist to let admins specify internal IP ranges that must not be spidered. Add url_preview_url_blacklist to let admins specify URL patterns that must not be spidered. Implement a custom SpiderEndpoint and associated support classes to implement url_preview_ip_range_blacklist Add commentary and generally address PR feedback 2016-04-08 13:37:15 -04:00			`if self.url_preview_enabled:`
Sanitize the optional dependencies for spider API 2016-04-13 06:57:46 -04:00			`try:`
			`import lxml`
			`lxml # To stop unused lint.`
			`except ImportError:`
			`raise ConfigError(MISSING_LXML)`

Add url_preview_enabled config option to turn on/off preview_url endpoint. defaults to off. Add url_preview_ip_range_blacklist to let admins specify internal IP ranges that must not be spidered. Add url_preview_url_blacklist to let admins specify URL patterns that must not be spidered. Implement a custom SpiderEndpoint and associated support classes to implement url_preview_ip_range_blacklist Add commentary and generally address PR feedback 2016-04-08 13:37:15 -04:00			`try:`
			`from netaddr import IPSet`
			`except ImportError:`
Sanitize the optional dependencies for spider API 2016-04-13 06:57:46 -04:00			`raise ConfigError(MISSING_NETADDR)`

			`if "url_preview_ip_range_blacklist" in config:`
			`self.url_preview_ip_range_blacklist = IPSet(`
			`config["url_preview_ip_range_blacklist"]`
			`)`
			`else:`
Add back in helpful description for missing url_preview_ip_range_blacklist 2016-04-13 08:52:57 -04:00			`raise ConfigError(`
			`"For security, you must specify an explicit target IP address "`
			`"blacklist in url_preview_ip_range_blacklist for url previewing "`
			`"to work"`
			`)`
Sanitize the optional dependencies for spider API 2016-04-13 06:57:46 -04:00
			`if "url_preview_url_blacklist" in config:`
			`self.url_preview_url_blacklist = config["url_preview_url_blacklist"]`
Limit the size of uploads 2014-09-03 12:04:00 -04:00
Implement configurable stats reporting SYN-287 This requires that HS owners either opt in or out of stats reporting. When --generate-config is passed, --report-stats must be specified If an already-generated config is used, and doesn't have the report_stats key, it is requested to be set. 2015-09-22 07:57:40 -04:00			`def default_config(self, **kwargs):`
Manually generate the default config yaml, remove most of the commandline arguments for synapse anticipating that people will use the yaml instead. Simpify implementing config options by not requiring the classes to hit the super class 2015-04-29 23:24:44 -04:00			`media_store = self.default_path("media_store")`
Make upload dir a configurable path. Fixes SYN-425. Signed-off-by: Eric Myhre <hash@exultant.us> 2015-06-19 00:38:20 -04:00			`uploads_path = self.default_path("uploads")`
Manually generate the default config yaml, remove most of the commandline arguments for synapse anticipating that people will use the yaml instead. Simpify implementing config options by not requiring the classes to hit the super class 2015-04-29 23:24:44 -04:00			`return """`
			`# Directory where uploaded images and attachments are stored.`
			`media_store_path: "%(media_store)s"`

Make upload dir a configurable path. Fixes SYN-425. Signed-off-by: Eric Myhre <hash@exultant.us> 2015-06-19 00:38:20 -04:00			`# Directory where in-progress uploads are stored.`
			`uploads_path: "%(uploads_path)s"`

Manually generate the default config yaml, remove most of the commandline arguments for synapse anticipating that people will use the yaml instead. Simpify implementing config options by not requiring the classes to hit the super class 2015-04-29 23:24:44 -04:00			`# The largest allowed upload size in bytes`
			`max_upload_size: "10M"`

			`# Maximum number of pixels that will be thumbnailed`
			`max_image_pixels: "32M"`
Make a config option for whether to generate new thumbnail sizes dynamically 2015-08-12 05:54:38 -04:00
			`# Whether to generate new thumbnails on the fly to precisely match`
			`# the resolution requested by the client. If true then whenever`
			`# a new resolution is requested by the client the server will`
			`# generate a new thumbnail. If false the server will pick a thumbnail`
initial WIP of a tentative preview_url endpoint - incomplete, untested, experimental, etc. just putting it here for safekeeping for now 2016-01-24 18:47:27 -05:00			`# from a precalculated list.`
Make a config option for whether to generate new thumbnail sizes dynamically 2015-08-12 05:54:38 -04:00			`dynamic_thumbnails: false`
Add config option for setting the list of thumbnail sizes to precalculate 2015-08-12 05:55:27 -04:00
			`# List of thumbnail to precalculate when an image is uploaded.`
			`thumbnail_sizes:`
			`- width: 32`
			`height: 32`
			`method: crop`
			`- width: 96`
			`height: 96`
			`method: crop`
			`- width: 320`
			`height: 240`
			`method: scale`
			`- width: 640`
			`height: 480`
			`method: scale`
add 800x600 thumbnails to make vector look prettier (and anyone else who likes big thumbnails) 2016-03-02 10:57:54 -05:00			`- width: 800`
			`height: 600`
			`method: scale`
Add url_preview_enabled config option to turn on/off preview_url endpoint. defaults to off. Add url_preview_ip_range_blacklist to let admins specify internal IP ranges that must not be spidered. Add url_preview_url_blacklist to let admins specify URL patterns that must not be spidered. Implement a custom SpiderEndpoint and associated support classes to implement url_preview_ip_range_blacklist Add commentary and generally address PR feedback 2016-04-08 13:37:15 -04:00
			`# Is the preview URL API enabled? If enabled, you must specify`
			`# an explicit url_preview_ip_range_blacklist of IPs that the spider is`
			`# denied from accessing.`
			`url_preview_enabled: False`

			`# List of IP address CIDR ranges that the URL preview spider is denied`
			`# from accessing. There are no defaults: you must explicitly`
			`# specify a list for URL previewing to work. You should specify any`
			`# internal services in your network that you do not want synapse to try`
			`# to connect to, otherwise anyone in any Matrix room could cause your`
			`# synapse to issue arbitrary GET requests to your internal services,`
			`# causing serious security issues.`
			`#`
			`# url_preview_ip_range_blacklist:`
			`# - '127.0.0.0/8'`
			`# - '10.0.0.0/8'`
			`# - '172.16.0.0/12'`
			`# - '192.168.0.0/16'`

			`# Optional list of URL matches that the URL preview spider is`
			`# denied from accessing. You should use url_preview_ip_range_blacklist`
			`# in preference to this, otherwise someone could define a public DNS`
			`# entry that points to a private IP address and circumvent the blacklist.`
			`# This is more useful if you know there is an entire shape of URL that`
			`# you know that will never want synapse to try to spider.`
			`#`
			`# Each list entry is a dictionary of url component attributes as returned`
			`# by urlparse.urlsplit as applied to the absolute form of the URL. See`
			`# https://docs.python.org/2/library/urlparse.html#urlparse.urlsplit`
			`# The values of the dictionary are treated as an filename match pattern`
			`# applied to that component of URLs, unless they start with a ^ in which`
			`# case they are treated as a regular expression match. If all the`
			`# specified component matches for a given list item succeed, the URL is`
			`# blacklisted.`
			`#`
			`# url_preview_url_blacklist:`
			`# # blacklist any URL with a username in its URI`
fix typo 2016-04-08 14:08:47 -04:00			`# - username: '*'`
Add url_preview_enabled config option to turn on/off preview_url endpoint. defaults to off. Add url_preview_ip_range_blacklist to let admins specify internal IP ranges that must not be spidered. Add url_preview_url_blacklist to let admins specify URL patterns that must not be spidered. Implement a custom SpiderEndpoint and associated support classes to implement url_preview_ip_range_blacklist Add commentary and generally address PR feedback 2016-04-08 13:37:15 -04:00			`#`
			`# # blacklist all *.google.com URLs`
			`# - netloc: 'google.com'`
			`# - netloc: '*.google.com'`
			`#`
			`# # blacklist all plain HTTP URLs`
			`# - scheme: 'http'`
			`#`
			`# # blacklist http(s)://www.acme.com/foo`
			`# - netloc: 'www.acme.com'`
			`# path: '/foo'`
			`#`
			`# # blacklist any URL with a literal IPv4 address`
			`# - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'`

			`# The largest allowed URL preview spidering size in bytes`
			`max_spider_size: "10M"`


Manually generate the default config yaml, remove most of the commandline arguments for synapse anticipating that people will use the yaml instead. Simpify implementing config options by not requiring the classes to hit the super class 2015-04-29 23:24:44 -04:00			`""" % locals()`