2014-09-03 12:04:00 -04:00
|
|
|
#
|
2023-11-21 15:29:58 -05:00
|
|
|
# This file is licensed under the Affero General Public License (AGPL) version 3.
|
|
|
|
#
|
2024-01-23 06:26:48 -05:00
|
|
|
# Copyright 2014, 2015 OpenMarket Ltd
|
2023-11-21 15:29:58 -05:00
|
|
|
# Copyright (C) 2023 New Vector, Ltd
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as
|
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# See the GNU Affero General Public License for more details:
|
|
|
|
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
|
|
|
#
|
|
|
|
# Originally licensed under the Apache License, Version 2.0:
|
|
|
|
# <http://www.apache.org/licenses/LICENSE-2.0>.
|
|
|
|
#
|
|
|
|
# [This file includes modifications made by New Vector Limited]
|
2014-09-03 12:04:00 -04:00
|
|
|
#
|
|
|
|
#
|
2019-08-13 07:49:28 -04:00
|
|
|
|
2021-08-03 14:13:34 -04:00
|
|
|
import logging
|
2018-12-21 10:04:57 -05:00
|
|
|
import os
|
2022-04-11 12:07:23 -04:00
|
|
|
from typing import Any, Dict, List, Tuple
|
2021-08-03 14:13:34 -04:00
|
|
|
from urllib.request import getproxies_environment # type: ignore
|
2015-08-12 05:55:27 -04:00
|
|
|
|
2021-12-30 13:47:12 -05:00
|
|
|
import attr
|
|
|
|
|
2022-06-14 10:53:42 -04:00
|
|
|
from synapse.config.server import generate_ip_set
|
2021-12-01 07:28:23 -05:00
|
|
|
from synapse.types import JsonDict
|
2022-06-30 13:48:04 -04:00
|
|
|
from synapse.util.check_dependencies import check_requirements
|
2018-01-16 10:44:08 -05:00
|
|
|
from synapse.util.module_loader import load_module
|
|
|
|
|
2018-07-09 02:09:20 -04:00
|
|
|
from ._base import Config, ConfigError
|
2016-04-13 06:57:46 -04:00
|
|
|
|
2021-08-03 14:13:34 -04:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2019-03-19 06:06:40 -04:00
|
|
|
DEFAULT_THUMBNAIL_SIZES = [
|
2019-06-20 05:32:02 -04:00
|
|
|
{"width": 32, "height": 32, "method": "crop"},
|
|
|
|
{"width": 96, "height": 96, "method": "crop"},
|
|
|
|
{"width": 320, "height": 240, "method": "scale"},
|
|
|
|
{"width": 640, "height": 480, "method": "scale"},
|
|
|
|
{"width": 800, "height": 600, "method": "scale"},
|
2019-03-19 06:06:40 -04:00
|
|
|
]
|
|
|
|
|
|
|
|
THUMBNAIL_SIZE_YAML = """\
|
|
|
|
# - width: %(width)i
|
|
|
|
# height: %(height)i
|
|
|
|
# method: %(method)s
|
|
|
|
"""
|
|
|
|
|
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
|
|
|
# A map from the given media type to the type of thumbnail we should generate
|
|
|
|
# for it.
|
|
|
|
THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP = {
|
|
|
|
"image/jpeg": "jpeg",
|
|
|
|
"image/jpg": "jpeg",
|
|
|
|
"image/webp": "jpeg",
|
|
|
|
# Thumbnails can only be jpeg or png. We choose png thumbnails for gif
|
|
|
|
# because it can have transparency.
|
|
|
|
"image/gif": "png",
|
|
|
|
"image/png": "png",
|
|
|
|
}
|
|
|
|
|
2021-08-03 14:13:34 -04:00
|
|
|
HTTP_PROXY_SET_WARNING = """\
|
|
|
|
The Synapse config url_preview_ip_range_blacklist will be ignored as an HTTP(s) proxy is configured."""
|
|
|
|
|
2015-08-12 05:55:27 -04:00
|
|
|
|
2021-12-30 13:47:12 -05:00
|
|
|
@attr.s(frozen=True, slots=True, auto_attribs=True)
|
|
|
|
class ThumbnailRequirement:
|
|
|
|
width: int
|
|
|
|
height: int
|
|
|
|
method: str
|
|
|
|
media_type: str
|
|
|
|
|
|
|
|
|
|
|
|
@attr.s(frozen=True, slots=True, auto_attribs=True)
|
|
|
|
class MediaStorageProviderConfig:
|
|
|
|
store_local: bool # Whether to store newly uploaded local files
|
|
|
|
store_remote: bool # Whether to store newly downloaded remote files
|
|
|
|
store_synchronous: bool # Whether to wait for successful storage for local uploads
|
2018-01-16 10:44:08 -05:00
|
|
|
|
2015-08-13 12:34:22 -04:00
|
|
|
|
2021-12-01 07:28:23 -05:00
|
|
|
def parse_thumbnail_requirements(
|
|
|
|
thumbnail_sizes: List[JsonDict],
|
|
|
|
) -> Dict[str, Tuple[ThumbnailRequirement, ...]]:
|
2021-02-16 17:32:34 -05:00
|
|
|
"""Takes a list of dictionaries with "width", "height", and "method" keys
|
2016-04-08 13:37:15 -04:00
|
|
|
and creates a map from image media types to the thumbnail size, thumbnailing
|
2015-08-12 09:29:17 -04:00
|
|
|
method, and thumbnail media type to precalculate
|
|
|
|
|
|
|
|
Args:
|
2021-12-30 13:47:12 -05:00
|
|
|
thumbnail_sizes: List of dicts with "width", "height", and "method" keys
|
|
|
|
|
2015-08-12 09:29:17 -04:00
|
|
|
Returns:
|
2021-12-30 13:47:12 -05:00
|
|
|
Dictionary mapping from media type string to list of ThumbnailRequirement.
|
2015-08-12 09:29:17 -04:00
|
|
|
"""
|
2021-12-01 07:28:23 -05:00
|
|
|
requirements: Dict[str, List[ThumbnailRequirement]] = {}
|
2015-08-12 05:55:27 -04:00
|
|
|
for size in thumbnail_sizes:
|
|
|
|
width = size["width"]
|
|
|
|
height = size["height"]
|
|
|
|
method = size["method"]
|
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
|
|
|
|
|
|
|
for format, thumbnail_format in THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.items():
|
|
|
|
requirement = requirements.setdefault(format, [])
|
|
|
|
if thumbnail_format == "jpeg":
|
|
|
|
requirement.append(
|
|
|
|
ThumbnailRequirement(width, height, method, "image/jpeg")
|
|
|
|
)
|
|
|
|
elif thumbnail_format == "png":
|
|
|
|
requirement.append(
|
|
|
|
ThumbnailRequirement(width, height, method, "image/png")
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
raise Exception(
|
|
|
|
"Unknown thumbnail mapping from %s to %s. This is a Synapse problem, please report!"
|
|
|
|
% (format, thumbnail_format)
|
|
|
|
)
|
2015-08-12 05:55:27 -04:00
|
|
|
return {
|
2019-06-20 05:32:02 -04:00
|
|
|
media_type: tuple(thumbnails) for media_type, thumbnails in requirements.items()
|
2015-08-12 05:55:27 -04:00
|
|
|
}
|
2014-09-03 12:04:00 -04:00
|
|
|
|
2014-10-30 07:10:17 -04:00
|
|
|
|
2014-09-03 12:04:00 -04:00
|
|
|
class ContentRepositoryConfig(Config):
|
2019-10-10 04:39:35 -04:00
|
|
|
section = "media"
|
|
|
|
|
2022-04-11 12:07:23 -04:00
|
|
|
def read_config(self, config: JsonDict, **kwargs: Any) -> None:
|
2019-08-13 07:49:28 -04:00
|
|
|
# Only enable the media repo if either the media repo is enabled or the
|
|
|
|
# current worker app is the media repo.
|
|
|
|
if (
|
2021-10-06 10:47:41 -04:00
|
|
|
self.root.server.enable_media_repo is False
|
2019-08-13 12:05:11 -04:00
|
|
|
and config.get("worker_app") != "synapse.app.media_repository"
|
2019-08-13 07:49:28 -04:00
|
|
|
):
|
|
|
|
self.can_load_media_repo = False
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
self.can_load_media_repo = True
|
|
|
|
|
2020-06-17 09:13:30 -04:00
|
|
|
# Whether this instance should be the one to run the background jobs to
|
|
|
|
# e.g clean up old URL previews.
|
|
|
|
self.media_instance_running_background_jobs = config.get(
|
|
|
|
"media_instance_running_background_jobs",
|
|
|
|
)
|
|
|
|
|
2020-10-09 11:58:23 -04:00
|
|
|
self.max_upload_size = self.parse_size(config.get("max_upload_size", "50M"))
|
2019-03-19 06:06:40 -04:00
|
|
|
self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M"))
|
|
|
|
self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M"))
|
2017-10-12 10:20:59 -04:00
|
|
|
|
2023-05-09 14:08:51 -04:00
|
|
|
self.prevent_media_downloads_from = config.get(
|
|
|
|
"prevent_media_downloads_from", []
|
|
|
|
)
|
|
|
|
|
2023-11-15 09:19:24 -05:00
|
|
|
self.unused_expiration_time = self.parse_duration(
|
|
|
|
config.get("unused_expiration_time", "24h")
|
|
|
|
)
|
|
|
|
|
|
|
|
self.max_pending_media_uploads = config.get("max_pending_media_uploads", 5)
|
|
|
|
|
2019-06-21 18:39:08 -04:00
|
|
|
self.media_store_path = self.ensure_directory(
|
|
|
|
config.get("media_store_path", "media_store")
|
|
|
|
)
|
2017-10-12 10:20:59 -04:00
|
|
|
|
2018-01-16 10:44:08 -05:00
|
|
|
backup_media_store_path = config.get("backup_media_store_path")
|
2017-10-12 10:20:59 -04:00
|
|
|
|
2018-01-16 10:44:08 -05:00
|
|
|
synchronous_backup_media_store = config.get(
|
2017-10-12 10:20:59 -04:00
|
|
|
"synchronous_backup_media_store", False
|
|
|
|
)
|
|
|
|
|
2018-01-16 10:44:08 -05:00
|
|
|
storage_providers = config.get("media_storage_providers", [])
|
|
|
|
|
|
|
|
if backup_media_store_path:
|
|
|
|
if storage_providers:
|
|
|
|
raise ConfigError(
|
|
|
|
"Cannot use both 'backup_media_store_path' and 'storage_providers'"
|
|
|
|
)
|
|
|
|
|
2019-06-20 05:32:02 -04:00
|
|
|
storage_providers = [
|
|
|
|
{
|
|
|
|
"module": "file_system",
|
|
|
|
"store_local": True,
|
|
|
|
"store_synchronous": synchronous_backup_media_store,
|
|
|
|
"store_remote": True,
|
|
|
|
"config": {"directory": backup_media_store_path},
|
2018-01-16 10:44:08 -05:00
|
|
|
}
|
2019-06-20 05:32:02 -04:00
|
|
|
]
|
2018-01-16 10:44:08 -05:00
|
|
|
|
|
|
|
# This is a list of config that can be used to create the storage
|
|
|
|
# providers. The entries are tuples of (Class, class_config,
|
|
|
|
# MediaStorageProviderConfig), where Class is the class of the provider,
|
|
|
|
# the class_config the config to pass to it, and
|
|
|
|
# MediaStorageProviderConfig are options for StorageProviderWrapper.
|
|
|
|
#
|
|
|
|
# We don't create the storage providers here as not all workers need
|
|
|
|
# them to be started.
|
2021-07-15 06:02:43 -04:00
|
|
|
self.media_storage_providers: List[tuple] = []
|
2018-01-16 10:44:08 -05:00
|
|
|
|
2020-12-08 09:04:35 -05:00
|
|
|
for i, provider_config in enumerate(storage_providers):
|
2018-01-16 10:44:08 -05:00
|
|
|
# We special case the module "file_system" so as not to need to
|
|
|
|
# expose FileStorageProviderBackend
|
2023-02-27 08:26:05 -05:00
|
|
|
if (
|
|
|
|
provider_config["module"] == "file_system"
|
|
|
|
or provider_config["module"] == "synapse.rest.media.v1.storage_provider"
|
|
|
|
):
|
2024-03-13 12:46:44 -04:00
|
|
|
provider_config["module"] = (
|
|
|
|
"synapse.media.storage_provider.FileStorageProviderBackend"
|
|
|
|
)
|
2018-01-16 10:44:08 -05:00
|
|
|
|
2020-12-08 09:04:35 -05:00
|
|
|
provider_class, parsed_config = load_module(
|
|
|
|
provider_config, ("media_storage_providers", "<item %i>" % i)
|
|
|
|
)
|
2018-01-16 10:44:08 -05:00
|
|
|
|
|
|
|
wrapper_config = MediaStorageProviderConfig(
|
|
|
|
provider_config.get("store_local", False),
|
|
|
|
provider_config.get("store_remote", False),
|
|
|
|
provider_config.get("store_synchronous", False),
|
|
|
|
)
|
|
|
|
|
|
|
|
self.media_storage_providers.append(
|
2019-06-20 05:32:02 -04:00
|
|
|
(provider_class, parsed_config, wrapper_config)
|
2018-01-16 10:44:08 -05:00
|
|
|
)
|
|
|
|
|
2019-03-19 06:06:40 -04:00
|
|
|
self.dynamic_thumbnails = config.get("dynamic_thumbnails", False)
|
2015-08-12 05:55:27 -04:00
|
|
|
self.thumbnail_requirements = parse_thumbnail_requirements(
|
2019-06-20 05:32:02 -04:00
|
|
|
config.get("thumbnail_sizes", DEFAULT_THUMBNAIL_SIZES)
|
2015-08-12 05:55:27 -04:00
|
|
|
)
|
2016-04-13 06:57:46 -04:00
|
|
|
self.url_preview_enabled = config.get("url_preview_enabled", False)
|
2016-04-08 13:37:15 -04:00
|
|
|
if self.url_preview_enabled:
|
2022-10-07 10:15:10 -04:00
|
|
|
check_requirements("url-preview")
|
2016-04-13 06:57:46 -04:00
|
|
|
|
2021-08-03 14:13:34 -04:00
|
|
|
proxy_env = getproxies_environment()
|
2019-05-03 08:46:50 -04:00
|
|
|
if "url_preview_ip_range_blacklist" not in config:
|
2021-08-03 14:13:34 -04:00
|
|
|
if "http" not in proxy_env or "https" not in proxy_env:
|
|
|
|
raise ConfigError(
|
|
|
|
"For security, you must specify an explicit target IP address "
|
|
|
|
"blacklist in url_preview_ip_range_blacklist for url previewing "
|
|
|
|
"to work"
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
if "http" in proxy_env or "https" in proxy_env:
|
|
|
|
logger.warning("".join(HTTP_PROXY_SET_WARNING))
|
2016-04-13 06:57:46 -04:00
|
|
|
|
2023-05-19 08:25:25 -04:00
|
|
|
# we always block '0.0.0.0' and '::', which are supposed to be
|
2019-05-03 08:46:50 -04:00
|
|
|
# unroutable addresses.
|
2023-05-19 08:25:25 -04:00
|
|
|
self.url_preview_ip_range_blocklist = generate_ip_set(
|
2021-02-03 07:13:46 -05:00
|
|
|
config["url_preview_ip_range_blacklist"],
|
|
|
|
["0.0.0.0", "::"],
|
|
|
|
config_path=("url_preview_ip_range_blacklist",),
|
|
|
|
)
|
2019-05-03 08:46:50 -04:00
|
|
|
|
2023-05-19 08:25:25 -04:00
|
|
|
self.url_preview_ip_range_allowlist = generate_ip_set(
|
2021-02-03 07:13:46 -05:00
|
|
|
config.get("url_preview_ip_range_whitelist", ()),
|
|
|
|
config_path=("url_preview_ip_range_whitelist",),
|
2016-05-16 08:03:59 -04:00
|
|
|
)
|
2016-05-01 07:44:24 -04:00
|
|
|
|
2023-05-19 08:25:25 -04:00
|
|
|
self.url_preview_url_blocklist = config.get("url_preview_url_blacklist", ())
|
2014-09-03 12:04:00 -04:00
|
|
|
|
2020-04-15 08:35:29 -04:00
|
|
|
self.url_preview_accept_language = config.get(
|
|
|
|
"url_preview_accept_language"
|
|
|
|
) or ["en"]
|
|
|
|
|
2022-05-31 12:35:29 -04:00
|
|
|
media_retention = config.get("media_retention") or {}
|
|
|
|
|
|
|
|
self.media_retention_local_media_lifetime_ms = None
|
|
|
|
local_media_lifetime = media_retention.get("local_media_lifetime")
|
|
|
|
if local_media_lifetime is not None:
|
|
|
|
self.media_retention_local_media_lifetime_ms = self.parse_duration(
|
|
|
|
local_media_lifetime
|
|
|
|
)
|
|
|
|
|
|
|
|
self.media_retention_remote_media_lifetime_ms = None
|
|
|
|
remote_media_lifetime = media_retention.get("remote_media_lifetime")
|
|
|
|
if remote_media_lifetime is not None:
|
|
|
|
self.media_retention_remote_media_lifetime_ms = self.parse_duration(
|
|
|
|
remote_media_lifetime
|
|
|
|
)
|
|
|
|
|
2022-04-11 12:07:23 -04:00
|
|
|
def generate_config_section(self, data_dir_path: str, **kwargs: Any) -> str:
|
|
|
|
assert data_dir_path is not None
|
2018-12-21 10:04:57 -05:00
|
|
|
media_store = os.path.join(data_dir_path, "media_store")
|
2022-06-14 10:53:42 -04:00
|
|
|
return f"media_store_path: {media_store}"
|