2018-01-17 11:56:35 -05:00
#
2023-11-21 15:29:58 -05:00
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
2024-01-23 06:26:48 -05:00
# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
2023-11-21 15:29:58 -05:00
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
2018-01-17 11:56:35 -05:00
#
#
2024-05-24 04:47:37 -04:00
import itertools
2018-07-09 02:09:20 -04:00
import os
import shutil
import tempfile
2020-01-22 07:30:49 -05:00
from binascii import unhexlify
2020-06-05 06:54:27 -04:00
from io import BytesIO
2023-02-07 10:24:44 -05:00
from typing import Any , BinaryIO , ClassVar , Dict , List , Optional , Tuple , Union
2024-06-05 08:43:36 -04:00
from unittest . mock import MagicMock , Mock , patch
2020-06-16 08:51:47 -04:00
from urllib import parse
2018-07-09 02:09:20 -04:00
2020-06-05 06:54:27 -04:00
import attr
2021-08-25 05:51:08 -04:00
from parameterized import parameterized , parameterized_class
2020-07-05 11:32:02 -04:00
from PIL import Image as Image
2022-07-11 12:52:10 -04:00
from typing_extensions import Literal
2020-06-05 06:54:27 -04:00
2020-07-24 09:39:02 -04:00
from twisted . internet import defer
2018-11-15 16:55:58 -05:00
from twisted . internet . defer import Deferred
2023-11-29 14:03:42 -05:00
from twisted . python . failure import Failure
2022-03-11 07:42:22 -05:00
from twisted . test . proto_helpers import MemoryReactor
2024-06-05 08:43:36 -04:00
from twisted . web . http_headers import Headers
from twisted . web . iweb import UNKNOWN_LENGTH , IResponse
2023-10-06 07:22:55 -04:00
from twisted . web . resource import Resource
2018-01-17 11:56:35 -05:00
2023-11-29 14:03:42 -05:00
from synapse . api . errors import Codes , HttpResponseException
2024-06-05 08:43:36 -04:00
from synapse . api . ratelimiting import Ratelimiter
2022-03-11 07:42:22 -05:00
from synapse . events import EventBase
2023-02-07 10:24:44 -05:00
from synapse . http . types import QueryParams
2019-07-03 10:07:04 -04:00
from synapse . logging . context import make_deferred_yieldable
2023-10-06 10:12:43 -04:00
from synapse . media . _base import FileInfo , ThumbnailInfo
2023-02-27 08:26:05 -05:00
from synapse . media . filepath import MediaFilePaths
from synapse . media . media_storage import MediaStorage , ReadableFileWrapper
2024-06-18 11:54:19 -04:00
from synapse . media . storage_provider import FileStorageProviderBackend
2024-05-24 04:47:37 -04:00
from synapse . media . thumbnailer import ThumbnailProvider
2022-03-11 07:42:22 -05:00
from synapse . module_api import ModuleApi
2023-04-17 20:57:40 -04:00
from synapse . module_api . callbacks . spamchecker_callbacks import load_legacy_spam_checkers
2021-02-03 11:44:16 -05:00
from synapse . rest import admin
2024-05-24 04:47:37 -04:00
from synapse . rest . client import login , media
2022-03-11 07:42:22 -05:00
from synapse . server import HomeServer
2023-02-07 10:24:44 -05:00
from synapse . types import JsonDict , RoomAlias
2022-03-11 07:42:22 -05:00
from synapse . util import Clock
2018-01-17 11:56:35 -05:00
from tests import unittest
2023-10-06 07:22:55 -04:00
from tests . server import FakeChannel
2021-09-16 12:01:14 -04:00
from tests . test_utils import SMALL_PNG
2024-06-05 08:43:36 -04:00
from tests . unittest import override_config
2021-02-03 11:44:16 -05:00
from tests . utils import default_config
2018-01-17 11:56:35 -05:00
2019-06-29 03:06:55 -04:00
class MediaStorageTests ( unittest . HomeserverTestCase ) :
needs_threadpool = True
2022-03-11 07:42:22 -05:00
def prepare ( self , reactor : MemoryReactor , clock : Clock , hs : HomeServer ) - > None :
2018-01-17 11:56:35 -05:00
self . test_dir = tempfile . mkdtemp ( prefix = " synapse-tests- " )
2019-06-29 03:06:55 -04:00
self . addCleanup ( shutil . rmtree , self . test_dir )
2018-01-17 11:56:35 -05:00
self . primary_base_path = os . path . join ( self . test_dir , " primary " )
self . secondary_base_path = os . path . join ( self . test_dir , " secondary " )
2021-09-24 07:25:21 -04:00
hs . config . media . media_store_path = self . primary_base_path
2018-02-05 07:44:03 -05:00
2024-06-18 11:54:19 -04:00
storage_providers = [ FileStorageProviderBackend ( hs , self . secondary_base_path ) ]
2018-01-17 11:56:35 -05:00
self . filepaths = MediaFilePaths ( self . primary_base_path )
self . media_storage = MediaStorage (
2018-06-22 04:37:10 -04:00
hs , self . primary_base_path , self . filepaths , storage_providers
2018-01-17 11:56:35 -05:00
)
2022-03-11 07:42:22 -05:00
def test_ensure_media_is_in_local_cache ( self ) - > None :
2018-01-17 11:56:35 -05:00
media_id = " some_media_id "
test_body = " Test \n "
# First we create a file that is in a storage provider but not in the
# local primary media store
rel_path = self . filepaths . local_media_filepath_rel ( media_id )
secondary_path = os . path . join ( self . secondary_base_path , rel_path )
os . makedirs ( os . path . dirname ( secondary_path ) )
with open ( secondary_path , " w " ) as f :
f . write ( test_body )
# Now we run ensure_media_is_in_local_cache, which should copy the file
# to the local cache.
file_info = FileInfo ( None , media_id )
2019-06-29 03:06:55 -04:00
# This uses a real blocking threadpool so we have to wait for it to be
# actually done :/
2020-07-24 09:39:02 -04:00
x = defer . ensureDeferred (
self . media_storage . ensure_media_is_in_local_cache ( file_info )
)
2019-06-29 03:06:55 -04:00
# Hotloop until the threadpool does its job...
self . wait_on_thread ( x )
local_path = self . get_success ( x )
2018-01-17 11:56:35 -05:00
self . assertTrue ( os . path . exists ( local_path ) )
# Asserts the file is under the expected local cache directory
2022-02-28 07:12:29 -05:00
self . assertEqual (
2018-01-17 11:56:35 -05:00
os . path . commonprefix ( [ self . primary_base_path , local_path ] ) ,
self . primary_base_path ,
)
with open ( local_path ) as f :
body = f . read ( )
self . assertEqual ( test_body , body )
2018-11-15 16:55:58 -05:00
2022-03-11 07:42:22 -05:00
@attr.s ( auto_attribs = True , slots = True , frozen = True )
2020-06-05 06:54:27 -04:00
class _TestImage :
""" An image for testing thumbnailing with the expected results
Attributes :
data : The raw image to thumbnail
content_type : The type of the image as a content type , e . g . " image/png "
extension : The extension associated with the format , e . g . " .png "
expected_cropped : The expected bytes from cropped thumbnailing , or None if
test should just check for success .
expected_scaled : The expected bytes from scaled thumbnailing , or None if
test should just check for a valid image returned .
2021-03-09 07:37:09 -05:00
expected_found : True if the file should exist on the server , or False if
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
a 404 / 400 is expected .
unable_to_thumbnail : True if we expect the thumbnailing to fail ( 400 ) , or
False if the thumbnailing should succeed or a normal 404 is expected .
2023-09-29 07:19:38 -04:00
is_inline : True if we expect the file to be served using an inline
Content - Disposition or False if we expect an attachment .
2020-06-05 06:54:27 -04:00
"""
2022-03-11 07:42:22 -05:00
data : bytes
content_type : bytes
extension : bytes
expected_cropped : Optional [ bytes ] = None
expected_scaled : Optional [ bytes ] = None
expected_found : bool = True
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
unable_to_thumbnail : bool = False
2023-09-29 07:19:38 -04:00
is_inline : bool = True
2020-06-05 06:54:27 -04:00
2024-05-24 04:47:37 -04:00
small_png = _TestImage (
SMALL_PNG ,
b " image/png " ,
b " .png " ,
unhexlify (
b " 89504e470d0a1a0a0000000d4948445200000020000000200806 "
b " 000000737a7af40000001a49444154789cedc101010000008220 "
b " ffaf6e484001000000ef0610200001194334ee0000000049454e "
b " 44ae426082 "
) ,
unhexlify (
b " 89504e470d0a1a0a0000000d4948445200000001000000010806 "
b " 0000001f15c4890000000d49444154789c636060606000000005 "
b " 0001a5f645400000000049454e44ae426082 "
) ,
)
small_png_with_transparency = _TestImage (
unhexlify (
b " 89504e470d0a1a0a0000000d49484452000000010000000101000 "
b " 00000376ef9240000000274524e5300010194fdae0000000a4944 "
b " 4154789c636800000082008177cd72b60000000049454e44ae426 "
b " 082 "
) ,
b " image/png " ,
b " .png " ,
# Note that we don't check the output since it varies across
# different versions of Pillow.
)
small_lossless_webp = _TestImage (
unhexlify (
b " 524946461a000000574542505650384c0d0000002f0000001007 " b " 1011118888fe0700 "
) ,
b " image/webp " ,
b " .webp " ,
)
empty_file = _TestImage (
b " " ,
b " image/gif " ,
b " .gif " ,
expected_found = False ,
unable_to_thumbnail = True ,
)
SVG = _TestImage (
b """ <?xml version= " 1.0 " ?>
2023-09-29 07:19:38 -04:00
< ! DOCTYPE svg PUBLIC " -//W3C//DTD SVG 1.1//EN "
" http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd " >
< svg xmlns = " http://www.w3.org/2000/svg "
width = " 400 " height = " 400 " >
< circle cx = " 100 " cy = " 100 " r = " 50 " stroke = " black "
stroke - width = " 5 " fill = " red " / >
< / svg > """ ,
2024-05-24 04:47:37 -04:00
b " image/svg " ,
b " .svg " ,
expected_found = False ,
unable_to_thumbnail = True ,
is_inline = False ,
2020-06-05 06:54:27 -04:00
)
2024-05-24 04:47:37 -04:00
test_images = [
small_png ,
small_png_with_transparency ,
small_lossless_webp ,
empty_file ,
SVG ,
]
urls = [
" _matrix/media/r0/thumbnail " ,
" _matrix/client/unstable/org.matrix.msc3916/media/thumbnail " ,
]
@parameterized_class ( ( " test_image " , " url " ) , itertools . product ( test_images , urls ) )
2018-11-15 16:55:58 -05:00
class MediaRepoTests ( unittest . HomeserverTestCase ) :
2024-05-24 04:47:37 -04:00
servlets = [ media . register_servlets ]
2023-02-07 10:24:44 -05:00
test_image : ClassVar [ _TestImage ]
2018-11-15 16:55:58 -05:00
hijack_auth = True
user_id = " @test:user "
2024-05-24 04:47:37 -04:00
url : ClassVar [ str ]
2018-11-15 16:55:58 -05:00
2022-03-11 07:42:22 -05:00
def make_homeserver ( self , reactor : MemoryReactor , clock : Clock ) - > HomeServer :
2023-02-07 10:24:44 -05:00
self . fetches : List [
Tuple [
" Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]] " ,
str ,
str ,
Optional [ QueryParams ] ,
]
] = [ ]
2018-11-15 16:55:58 -05:00
2022-03-11 07:42:22 -05:00
def get_file (
destination : str ,
path : str ,
output_stream : BinaryIO ,
2024-06-05 08:43:36 -04:00
download_ratelimiter : Ratelimiter ,
ip_address : Any ,
max_size : int ,
2023-02-07 10:24:44 -05:00
args : Optional [ QueryParams ] = None ,
retry_on_dns_fail : bool = True ,
ignore_backoff : bool = False ,
2023-11-29 14:03:42 -05:00
follow_redirects : bool = False ,
2023-02-07 10:24:44 -05:00
) - > " Deferred[Tuple[int, Dict[bytes, List[bytes]]]] " :
""" A mock for MatrixFederationHttpClient.get_file. """
2018-11-15 16:55:58 -05:00
2023-02-07 10:24:44 -05:00
def write_to (
r : Tuple [ bytes , Tuple [ int , Dict [ bytes , List [ bytes ] ] ] ]
) - > Tuple [ int , Dict [ bytes , List [ bytes ] ] ] :
2018-11-15 16:55:58 -05:00
data , response = r
output_stream . write ( data )
return response
2023-11-29 14:03:42 -05:00
def write_err ( f : Failure ) - > Failure :
f . trap ( HttpResponseException )
output_stream . write ( f . value . response )
return f
2023-02-07 10:24:44 -05:00
d : Deferred [ Tuple [ bytes , Tuple [ int , Dict [ bytes , List [ bytes ] ] ] ] ] = Deferred ( )
2018-11-15 16:55:58 -05:00
self . fetches . append ( ( d , destination , path , args ) )
2023-02-07 10:24:44 -05:00
# Note that this callback changes the value held by d.
2023-11-29 14:03:42 -05:00
d_after_callback = d . addCallbacks ( write_to , write_err )
2023-02-07 10:24:44 -05:00
return make_deferred_yieldable ( d_after_callback )
2018-11-15 16:55:58 -05:00
2023-02-07 10:24:44 -05:00
# Mock out the homeserver's MatrixFederationHttpClient
2018-11-15 16:55:58 -05:00
client = Mock ( )
client . get_file = get_file
self . storage_path = self . mktemp ( )
2019-05-13 16:01:14 -04:00
self . media_store_path = self . mktemp ( )
2018-11-15 16:55:58 -05:00
os . mkdir ( self . storage_path )
2019-05-13 16:01:14 -04:00
os . mkdir ( self . media_store_path )
2018-11-15 16:55:58 -05:00
config = self . default_config ( )
2019-05-13 16:01:14 -04:00
config [ " media_store_path " ] = self . media_store_path
config [ " max_image_pixels " ] = 2000000
2018-11-15 16:55:58 -05:00
provider_config = {
2023-02-27 08:26:05 -05:00
" module " : " synapse.media.storage_provider.FileStorageProviderBackend " ,
2018-11-15 16:55:58 -05:00
" store_local " : True ,
" store_synchronous " : False ,
" store_remote " : True ,
" config " : { " directory " : self . storage_path } ,
}
2019-05-13 16:01:14 -04:00
config [ " media_storage_providers " ] = [ provider_config ]
2024-05-24 04:47:37 -04:00
config [ " experimental_features " ] = { " msc3916_authenticated_media_enabled " : True }
2018-11-15 16:55:58 -05:00
2020-12-02 11:09:24 -05:00
hs = self . setup_test_homeserver ( config = config , federation_http_client = client )
2018-11-15 16:55:58 -05:00
return hs
2022-03-11 07:42:22 -05:00
def prepare ( self , reactor : MemoryReactor , clock : Clock , hs : HomeServer ) - > None :
2022-02-23 06:04:02 -05:00
self . store = hs . get_datastores ( ) . main
2021-02-19 10:52:04 -05:00
self . media_repo = hs . get_media_repository ( )
2018-11-15 16:55:58 -05:00
2020-01-22 07:28:07 -05:00
self . media_id = " example.com/12345 "
2023-10-06 07:22:55 -04:00
def create_resource_dict ( self ) - > Dict [ str , Resource ] :
resources = super ( ) . create_resource_dict ( )
resources [ " /_matrix/media " ] = self . hs . get_media_repository_resource ( )
return resources
2022-03-11 07:42:22 -05:00
def _req (
self , content_disposition : Optional [ bytes ] , include_content_type : bool = True
) - > FakeChannel :
2023-10-06 07:22:55 -04:00
channel = self . make_request (
2020-11-13 18:11:43 -05:00
" GET " ,
2023-10-06 07:22:55 -04:00
f " /_matrix/media/v3/download/ { self . media_id } " ,
2020-11-13 18:11:43 -05:00
shorthand = False ,
2020-11-15 17:47:54 -05:00
await_result = False ,
2020-11-13 18:11:43 -05:00
)
2018-11-15 16:55:58 -05:00
self . pump ( )
# We've made one fetch, to example.com, using the media URL, and asking
# the other server not to do a remote fetch
self . assertEqual ( len ( self . fetches ) , 1 )
self . assertEqual ( self . fetches [ 0 ] [ 1 ] , " example.com " )
self . assertEqual (
2023-11-29 14:03:42 -05:00
self . fetches [ 0 ] [ 2 ] , " /_matrix/media/v3/download/ " + self . media_id
2018-11-15 16:55:58 -05:00
)
2023-11-15 09:19:24 -05:00
self . assertEqual (
2023-11-29 14:03:42 -05:00
self . fetches [ 0 ] [ 3 ] ,
{ " allow_remote " : " false " , " timeout_ms " : " 20000 " , " allow_redirect " : " true " } ,
2023-11-15 09:19:24 -05:00
)
2018-11-15 16:55:58 -05:00
headers = {
2020-06-05 06:54:27 -04:00
b " Content-Length " : [ b " %d " % ( len ( self . test_image . data ) ) ] ,
2018-11-15 16:55:58 -05:00
}
2021-11-01 13:26:02 -04:00
if include_content_type :
headers [ b " Content-Type " ] = [ self . test_image . content_type ]
2018-11-15 16:55:58 -05:00
if content_disposition :
headers [ b " Content-Disposition " ] = [ content_disposition ]
self . fetches [ 0 ] [ 0 ] . callback (
2020-06-05 06:54:27 -04:00
( self . test_image . data , ( len ( self . test_image . data ) , headers ) )
2018-11-15 16:55:58 -05:00
)
self . pump ( )
self . assertEqual ( channel . code , 200 )
return channel
2022-03-11 07:42:22 -05:00
def test_handle_missing_content_type ( self ) - > None :
2021-11-01 13:26:02 -04:00
channel = self . _req (
2023-06-15 09:23:27 -04:00
b " attachment; filename=out " + self . test_image . extension ,
2021-11-01 13:26:02 -04:00
include_content_type = False ,
)
headers = channel . headers
self . assertEqual ( channel . code , 200 )
self . assertEqual (
headers . getRawHeaders ( b " Content-Type " ) , [ b " application/octet-stream " ]
)
2022-03-11 07:42:22 -05:00
def test_disposition_filename_ascii ( self ) - > None :
2018-11-15 16:55:58 -05:00
"""
If the filename is filename = < ascii > then Synapse will decode it as an
ASCII string , and use filename = in the response .
"""
2023-06-15 09:23:27 -04:00
channel = self . _req ( b " attachment; filename=out " + self . test_image . extension )
2018-11-15 16:55:58 -05:00
headers = channel . headers
self . assertEqual (
2020-06-05 06:54:27 -04:00
headers . getRawHeaders ( b " Content-Type " ) , [ self . test_image . content_type ]
)
self . assertEqual (
headers . getRawHeaders ( b " Content-Disposition " ) ,
2023-09-29 07:19:38 -04:00
[
( b " inline " if self . test_image . is_inline else b " attachment " )
+ b " ; filename=out "
+ self . test_image . extension
] ,
2018-11-15 16:55:58 -05:00
)
2022-03-11 07:42:22 -05:00
def test_disposition_filenamestar_utf8escaped ( self ) - > None :
2018-11-15 16:55:58 -05:00
"""
If the filename is filename = * utf8 ' ' < utf8 escaped > then Synapse will
correctly decode it as the UTF - 8 string , and use filename * in the
response .
"""
2021-07-13 06:43:15 -04:00
filename = parse . quote ( " \u2603 " . encode ( ) ) . encode ( " ascii " )
2020-06-05 06:54:27 -04:00
channel = self . _req (
2023-06-15 09:23:27 -04:00
b " attachment; filename*=utf-8 ' ' " + filename + self . test_image . extension
2020-06-05 06:54:27 -04:00
)
2018-11-15 16:55:58 -05:00
headers = channel . headers
2020-06-05 06:54:27 -04:00
self . assertEqual (
headers . getRawHeaders ( b " Content-Type " ) , [ self . test_image . content_type ]
)
2018-11-15 16:55:58 -05:00
self . assertEqual (
headers . getRawHeaders ( b " Content-Disposition " ) ,
2023-09-29 07:19:38 -04:00
[
( b " inline " if self . test_image . is_inline else b " attachment " )
+ b " ; filename*=utf-8 ' ' "
+ filename
+ self . test_image . extension
] ,
2018-11-15 16:55:58 -05:00
)
2022-03-11 07:42:22 -05:00
def test_disposition_none ( self ) - > None :
2018-11-15 16:55:58 -05:00
"""
2023-06-15 09:23:27 -04:00
If there is no filename , Content - Disposition should only
be a disposition type .
2018-11-15 16:55:58 -05:00
"""
channel = self . _req ( None )
headers = channel . headers
2020-06-05 06:54:27 -04:00
self . assertEqual (
headers . getRawHeaders ( b " Content-Type " ) , [ self . test_image . content_type ]
)
2023-09-29 07:19:38 -04:00
self . assertEqual (
headers . getRawHeaders ( b " Content-Disposition " ) ,
[ b " inline " if self . test_image . is_inline else b " attachment " ] ,
)
2020-01-22 07:28:07 -05:00
2022-03-11 07:42:22 -05:00
def test_thumbnail_crop ( self ) - > None :
2021-01-21 14:53:58 -05:00
""" Test that a cropped remote thumbnail is available. """
2020-09-09 12:59:41 -04:00
self . _test_thumbnail (
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
" crop " ,
self . test_image . expected_cropped ,
expected_found = self . test_image . expected_found ,
unable_to_thumbnail = self . test_image . unable_to_thumbnail ,
2020-09-09 12:59:41 -04:00
)
2020-01-22 07:28:07 -05:00
2022-03-11 07:42:22 -05:00
def test_thumbnail_scale ( self ) - > None :
2021-01-21 14:53:58 -05:00
""" Test that a scaled remote thumbnail is available. """
2020-09-09 12:59:41 -04:00
self . _test_thumbnail (
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
" scale " ,
self . test_image . expected_scaled ,
expected_found = self . test_image . expected_found ,
unable_to_thumbnail = self . test_image . unable_to_thumbnail ,
2020-09-09 12:59:41 -04:00
)
2020-01-22 07:28:07 -05:00
2022-03-11 07:42:22 -05:00
def test_invalid_type ( self ) - > None :
2021-01-21 14:53:58 -05:00
""" An invalid thumbnail type is never available. """
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
self . _test_thumbnail (
" invalid " ,
None ,
expected_found = False ,
unable_to_thumbnail = self . test_image . unable_to_thumbnail ,
)
2021-01-21 14:53:58 -05:00
@unittest.override_config (
{ " thumbnail_sizes " : [ { " width " : 32 , " height " : 32 , " method " : " scale " } ] }
)
2022-03-11 07:42:22 -05:00
def test_no_thumbnail_crop ( self ) - > None :
2021-01-21 14:53:58 -05:00
"""
Override the config to generate only scaled thumbnails , but request a cropped one .
"""
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
self . _test_thumbnail (
" crop " ,
None ,
expected_found = False ,
unable_to_thumbnail = self . test_image . unable_to_thumbnail ,
)
2021-01-21 14:53:58 -05:00
@unittest.override_config (
{ " thumbnail_sizes " : [ { " width " : 32 , " height " : 32 , " method " : " crop " } ] }
)
2022-03-11 07:42:22 -05:00
def test_no_thumbnail_scale ( self ) - > None :
2021-01-21 14:53:58 -05:00
"""
Override the config to generate only cropped thumbnails , but request a scaled one .
"""
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
self . _test_thumbnail (
" scale " ,
None ,
expected_found = False ,
unable_to_thumbnail = self . test_image . unable_to_thumbnail ,
)
2021-01-21 14:53:58 -05:00
2022-03-11 07:42:22 -05:00
def test_thumbnail_repeated_thumbnail ( self ) - > None :
2021-02-19 10:52:04 -05:00
""" Test that fetching the same thumbnail works, and deleting the on disk
thumbnail regenerates it .
"""
self . _test_thumbnail (
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
" scale " ,
self . test_image . expected_scaled ,
expected_found = self . test_image . expected_found ,
unable_to_thumbnail = self . test_image . unable_to_thumbnail ,
2021-02-19 10:52:04 -05:00
)
if not self . test_image . expected_found :
return
# Fetching again should work, without re-requesting the image from the
# remote.
params = " ?width=32&height=32&method=scale "
2023-10-06 07:22:55 -04:00
channel = self . make_request (
2021-02-19 10:52:04 -05:00
" GET " ,
2024-05-24 04:47:37 -04:00
f " / { self . url } / { self . media_id } { params } " ,
2021-02-19 10:52:04 -05:00
shorthand = False ,
await_result = False ,
)
self . pump ( )
self . assertEqual ( channel . code , 200 )
if self . test_image . expected_scaled :
self . assertEqual (
channel . result [ " body " ] ,
self . test_image . expected_scaled ,
channel . result [ " body " ] ,
)
# Deleting the thumbnail on disk then re-requesting it should work as
# Synapse should regenerate missing thumbnails.
origin , media_id = self . media_id . split ( " / " )
info = self . get_success ( self . store . get_cached_remote_media ( origin , media_id ) )
2023-02-07 10:24:44 -05:00
assert info is not None
2023-11-09 11:00:30 -05:00
file_id = info . filesystem_id
2021-02-19 10:52:04 -05:00
thumbnail_dir = self . media_repo . filepaths . remote_media_thumbnail_dir (
origin , file_id
)
shutil . rmtree ( thumbnail_dir , ignore_errors = True )
2023-10-06 07:22:55 -04:00
channel = self . make_request (
2021-02-19 10:52:04 -05:00
" GET " ,
2024-05-24 04:47:37 -04:00
f " / { self . url } / { self . media_id } { params } " ,
2021-02-19 10:52:04 -05:00
shorthand = False ,
await_result = False ,
)
self . pump ( )
self . assertEqual ( channel . code , 200 )
if self . test_image . expected_scaled :
self . assertEqual (
channel . result [ " body " ] ,
self . test_image . expected_scaled ,
channel . result [ " body " ] ,
)
2022-03-11 07:42:22 -05:00
def _test_thumbnail (
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
self ,
method : str ,
expected_body : Optional [ bytes ] ,
expected_found : bool ,
unable_to_thumbnail : bool = False ,
2022-03-11 07:42:22 -05:00
) - > None :
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
""" Test the given thumbnailing method works as expected.
Args :
method : The thumbnailing method to use ( crop , scale ) .
expected_body : The expected bytes from thumbnailing , or None if
test should just check for a valid image .
expected_found : True if the file should exist on the server , or False if
a 404 / 400 is expected .
unable_to_thumbnail : True if we expect the thumbnailing to fail ( 400 ) , or
False if the thumbnailing should succeed or a normal 404 is expected .
"""
2020-01-22 07:28:07 -05:00
params = " ?width=32&height=32&method= " + method
2023-10-06 07:22:55 -04:00
channel = self . make_request (
2020-11-13 18:11:43 -05:00
" GET " ,
2024-05-24 04:47:37 -04:00
f " / { self . url } / { self . media_id } { params } " ,
2020-11-13 18:11:43 -05:00
shorthand = False ,
2020-11-15 17:47:54 -05:00
await_result = False ,
2020-01-22 07:28:07 -05:00
)
self . pump ( )
headers = {
2020-06-05 06:54:27 -04:00
b " Content-Length " : [ b " %d " % ( len ( self . test_image . data ) ) ] ,
b " Content-Type " : [ self . test_image . content_type ] ,
2020-01-22 07:28:07 -05:00
}
self . fetches [ 0 ] [ 0 ] . callback (
2020-06-05 06:54:27 -04:00
( self . test_image . data , ( len ( self . test_image . data ) , headers ) )
2020-01-22 07:28:07 -05:00
)
self . pump ( )
2020-09-09 12:59:41 -04:00
if expected_found :
self . assertEqual ( channel . code , 200 )
2022-06-27 09:44:05 -04:00
self . assertEqual (
channel . headers . getRawHeaders ( b " Cross-Origin-Resource-Policy " ) ,
[ b " cross-origin " ] ,
)
2020-09-09 12:59:41 -04:00
if expected_body is not None :
self . assertEqual (
channel . result [ " body " ] , expected_body , channel . result [ " body " ]
)
else :
# ensure that the result is at least some valid image
Image . open ( BytesIO ( channel . result [ " body " ] ) )
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
elif unable_to_thumbnail :
# A 400 with a JSON body.
self . assertEqual ( channel . code , 400 )
self . assertEqual (
channel . json_body ,
{
" errcode " : " M_UNKNOWN " ,
2024-05-24 04:47:37 -04:00
" error " : f " Cannot find any thumbnails for the requested media ( ' / { self . url } /example.com/12345 ' ). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.) " ,
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 12:42:21 -04:00
} ,
)
2020-09-09 12:59:41 -04:00
else :
# A 404 with a JSON body.
self . assertEqual ( channel . code , 404 )
2020-06-05 06:54:27 -04:00
self . assertEqual (
2020-09-09 12:59:41 -04:00
channel . json_body ,
{
" errcode " : " M_NOT_FOUND " ,
2024-05-24 04:47:37 -04:00
" error " : f " Not found ' / { self . url } /example.com/12345 ' " ,
2020-09-09 12:59:41 -04:00
} ,
2020-06-05 06:54:27 -04:00
)
2020-12-08 17:51:03 -05:00
2021-08-25 05:51:08 -04:00
@parameterized.expand ( [ ( " crop " , 16 ) , ( " crop " , 64 ) , ( " scale " , 16 ) , ( " scale " , 64 ) ] )
2022-03-11 07:42:22 -05:00
def test_same_quality ( self , method : str , desired_size : int ) - > None :
2021-08-25 05:51:08 -04:00
""" Test that choosing between thumbnails with the same quality rating succeeds.
We are not particular about which thumbnail is chosen . """
2023-10-06 10:12:43 -04:00
content_type = self . test_image . content_type . decode ( )
2023-10-06 07:22:55 -04:00
media_repo = self . hs . get_media_repository ( )
2024-05-24 04:47:37 -04:00
thumbnail_provider = ThumbnailProvider (
2023-10-06 07:22:55 -04:00
self . hs , media_repo , media_repo . media_storage
)
2021-08-25 05:51:08 -04:00
self . assertIsNotNone (
2024-05-24 04:47:37 -04:00
thumbnail_provider . _select_thumbnail (
2021-08-25 05:51:08 -04:00
desired_width = desired_size ,
desired_height = desired_size ,
desired_method = method ,
2023-10-06 10:12:43 -04:00
desired_type = content_type ,
2021-08-25 05:51:08 -04:00
# Provide two identical thumbnails which are guaranteed to have the same
# quality rating.
thumbnail_infos = [
2023-10-06 10:12:43 -04:00
ThumbnailInfo (
width = 32 ,
height = 32 ,
method = method ,
type = content_type ,
length = 256 ,
) ,
ThumbnailInfo (
width = 32 ,
height = 32 ,
method = method ,
type = content_type ,
length = 256 ,
) ,
2021-08-25 05:51:08 -04:00
] ,
2023-02-07 10:24:44 -05:00
file_id = f " image { self . test_image . extension . decode ( ) } " ,
2023-10-06 07:22:55 -04:00
url_cache = False ,
2021-08-25 05:51:08 -04:00
server_name = None ,
)
)
2022-03-11 07:42:22 -05:00
def test_x_robots_tag_header ( self ) - > None :
2020-12-08 17:51:03 -05:00
"""
Tests that the ` X - Robots - Tag ` header is present , which informs web crawlers
to not index , archive , or follow links in media .
"""
2023-06-15 09:23:27 -04:00
channel = self . _req ( b " attachment; filename=out " + self . test_image . extension )
2020-12-08 17:51:03 -05:00
headers = channel . headers
self . assertEqual (
headers . getRawHeaders ( b " X-Robots-Tag " ) ,
[ b " noindex, nofollow, noarchive, noimageindex " ] ,
)
2021-02-03 11:44:16 -05:00
2022-06-27 09:44:05 -04:00
def test_cross_origin_resource_policy_header ( self ) - > None :
"""
Test that the Cross - Origin - Resource - Policy header is set to " cross-origin "
allowing web clients to embed media from the downloads API .
"""
2023-06-15 09:23:27 -04:00
channel = self . _req ( b " attachment; filename=out " + self . test_image . extension )
2022-06-27 09:44:05 -04:00
headers = channel . headers
self . assertEqual (
headers . getRawHeaders ( b " Cross-Origin-Resource-Policy " ) ,
[ b " cross-origin " ] ,
)
2023-11-29 14:03:42 -05:00
def test_unknown_v3_endpoint ( self ) - > None :
"""
If the v3 endpoint fails , try the r0 one .
"""
channel = self . make_request (
" GET " ,
f " /_matrix/media/v3/download/ { self . media_id } " ,
shorthand = False ,
await_result = False ,
)
self . pump ( )
# We've made one fetch, to example.com, using the media URL, and asking
# the other server not to do a remote fetch
self . assertEqual ( len ( self . fetches ) , 1 )
self . assertEqual ( self . fetches [ 0 ] [ 1 ] , " example.com " )
self . assertEqual (
self . fetches [ 0 ] [ 2 ] , " /_matrix/media/v3/download/ " + self . media_id
)
# The result which says the endpoint is unknown.
unknown_endpoint = b ' { " errcode " : " M_UNRECOGNIZED " , " error " : " Unknown request " } '
self . fetches [ 0 ] [ 0 ] . errback (
HttpResponseException ( 404 , " NOT FOUND " , unknown_endpoint )
)
self . pump ( )
# There should now be another request to the r0 URL.
self . assertEqual ( len ( self . fetches ) , 2 )
self . assertEqual ( self . fetches [ 1 ] [ 1 ] , " example.com " )
self . assertEqual (
self . fetches [ 1 ] [ 2 ] , f " /_matrix/media/r0/download/ { self . media_id } "
)
headers = {
b " Content-Length " : [ b " %d " % ( len ( self . test_image . data ) ) ] ,
}
self . fetches [ 1 ] [ 0 ] . callback (
( self . test_image . data , ( len ( self . test_image . data ) , headers ) )
)
self . pump ( )
self . assertEqual ( channel . code , 200 )
2021-02-03 11:44:16 -05:00
2022-07-11 12:52:10 -04:00
class TestSpamCheckerLegacy :
2021-02-03 11:44:16 -05:00
""" A spam checker module that rejects all media that includes the bytes
` evil ` .
2022-07-11 12:52:10 -04:00
Uses the legacy Spam - Checker API .
2021-02-03 11:44:16 -05:00
"""
2022-03-11 07:42:22 -05:00
def __init__ ( self , config : Dict [ str , Any ] , api : ModuleApi ) - > None :
2021-02-03 11:44:16 -05:00
self . config = config
self . api = api
2023-02-07 10:24:44 -05:00
@staticmethod
2022-03-11 07:42:22 -05:00
def parse_config ( config : Dict [ str , Any ] ) - > Dict [ str , Any ] :
2021-02-03 11:44:16 -05:00
return config
2022-03-11 07:42:22 -05:00
async def check_event_for_spam ( self , event : EventBase ) - > Union [ bool , str ] :
2021-02-03 11:44:16 -05:00
return False # allow all events
2022-03-11 07:42:22 -05:00
async def user_may_invite (
self ,
inviter_userid : str ,
invitee_userid : str ,
room_id : str ,
) - > bool :
2021-02-03 11:44:16 -05:00
return True # allow all invites
2022-03-11 07:42:22 -05:00
async def user_may_create_room ( self , userid : str ) - > bool :
2021-02-03 11:44:16 -05:00
return True # allow all room creations
2022-03-11 07:42:22 -05:00
async def user_may_create_room_alias (
self , userid : str , room_alias : RoomAlias
) - > bool :
2021-02-03 11:44:16 -05:00
return True # allow all room aliases
2022-03-11 07:42:22 -05:00
async def user_may_publish_room ( self , userid : str , room_id : str ) - > bool :
2021-02-03 11:44:16 -05:00
return True # allow publishing of all rooms
2022-03-11 07:42:22 -05:00
async def check_media_file_for_spam (
self , file_wrapper : ReadableFileWrapper , file_info : FileInfo
) - > bool :
2021-02-03 11:44:16 -05:00
buf = BytesIO ( )
await file_wrapper . write_chunks_to ( buf . write )
return b " evil " in buf . getvalue ( )
2022-07-11 12:52:10 -04:00
class SpamCheckerTestCaseLegacy ( unittest . HomeserverTestCase ) :
2021-02-03 11:44:16 -05:00
servlets = [
login . register_servlets ,
admin . register_servlets ,
]
2022-03-11 07:42:22 -05:00
def prepare ( self , reactor : MemoryReactor , clock : Clock , hs : HomeServer ) - > None :
2021-02-03 11:44:16 -05:00
self . user = self . register_user ( " user " , " pass " )
self . tok = self . login ( " user " , " pass " )
2021-06-18 07:15:52 -04:00
load_legacy_spam_checkers ( hs )
2023-10-06 07:22:55 -04:00
def create_resource_dict ( self ) - > Dict [ str , Resource ] :
resources = super ( ) . create_resource_dict ( )
resources [ " /_matrix/media " ] = self . hs . get_media_repository_resource ( )
return resources
2022-03-11 07:42:22 -05:00
def default_config ( self ) - > Dict [ str , Any ] :
2021-02-03 11:44:16 -05:00
config = default_config ( " test " )
config . update (
{
" spam_checker " : [
{
2022-07-11 12:52:10 -04:00
" module " : TestSpamCheckerLegacy . __module__
+ " .TestSpamCheckerLegacy " ,
2021-02-03 11:44:16 -05:00
" config " : { } ,
}
]
}
)
return config
2022-03-11 07:42:22 -05:00
def test_upload_innocent ( self ) - > None :
2021-02-03 11:44:16 -05:00
""" Attempt to upload some innocent data that should be allowed. """
2023-10-06 07:22:55 -04:00
self . helper . upload_media ( SMALL_PNG , tok = self . tok , expect_code = 200 )
2021-02-03 11:44:16 -05:00
2022-03-11 07:42:22 -05:00
def test_upload_ban ( self ) - > None :
2021-02-03 11:44:16 -05:00
""" Attempt to upload some data that includes bytes " evil " , which should
get rejected by the spam checker .
"""
data = b " Some evil data "
2023-10-06 07:22:55 -04:00
self . helper . upload_media ( data , tok = self . tok , expect_code = 400 )
2022-07-11 12:52:10 -04:00
EVIL_DATA = b " Some evil data "
EVIL_DATA_EXPERIMENT = b " Some evil data to trigger the experimental tuple API "
class SpamCheckerTestCase ( unittest . HomeserverTestCase ) :
servlets = [
login . register_servlets ,
admin . register_servlets ,
]
def prepare ( self , reactor : MemoryReactor , clock : Clock , hs : HomeServer ) - > None :
self . user = self . register_user ( " user " , " pass " )
self . tok = self . login ( " user " , " pass " )
hs . get_module_api ( ) . register_spam_checker_callbacks (
check_media_file_for_spam = self . check_media_file_for_spam
)
2023-10-06 07:22:55 -04:00
def create_resource_dict ( self ) - > Dict [ str , Resource ] :
resources = super ( ) . create_resource_dict ( )
resources [ " /_matrix/media " ] = self . hs . get_media_repository_resource ( )
return resources
2022-07-11 12:52:10 -04:00
async def check_media_file_for_spam (
self , file_wrapper : ReadableFileWrapper , file_info : FileInfo
2023-02-07 10:24:44 -05:00
) - > Union [ Codes , Literal [ " NOT_SPAM " ] , Tuple [ Codes , JsonDict ] ] :
2022-07-11 12:52:10 -04:00
buf = BytesIO ( )
await file_wrapper . write_chunks_to ( buf . write )
if buf . getvalue ( ) == EVIL_DATA :
return Codes . FORBIDDEN
elif buf . getvalue ( ) == EVIL_DATA_EXPERIMENT :
return ( Codes . FORBIDDEN , { } )
else :
return " NOT_SPAM "
def test_upload_innocent ( self ) - > None :
""" Attempt to upload some innocent data that should be allowed. """
2023-10-06 07:22:55 -04:00
self . helper . upload_media ( SMALL_PNG , tok = self . tok , expect_code = 200 )
2022-07-11 12:52:10 -04:00
def test_upload_ban ( self ) - > None :
""" Attempt to upload some data that includes bytes " evil " , which should
get rejected by the spam checker .
"""
2023-10-06 07:22:55 -04:00
self . helper . upload_media ( EVIL_DATA , tok = self . tok , expect_code = 400 )
2022-07-11 12:52:10 -04:00
self . helper . upload_media (
EVIL_DATA_EXPERIMENT ,
tok = self . tok ,
expect_code = 400 ,
)
2024-06-05 08:43:36 -04:00
class RemoteDownloadLimiterTestCase ( unittest . HomeserverTestCase ) :
def make_homeserver ( self , reactor : MemoryReactor , clock : Clock ) - > HomeServer :
config = self . default_config ( )
self . storage_path = self . mktemp ( )
self . media_store_path = self . mktemp ( )
os . mkdir ( self . storage_path )
os . mkdir ( self . media_store_path )
config [ " media_store_path " ] = self . media_store_path
provider_config = {
" module " : " synapse.media.storage_provider.FileStorageProviderBackend " ,
" store_local " : True ,
" store_synchronous " : False ,
" store_remote " : True ,
" config " : { " directory " : self . storage_path } ,
}
config [ " media_storage_providers " ] = [ provider_config ]
return self . setup_test_homeserver ( config = config )
def prepare ( self , reactor : MemoryReactor , clock : Clock , hs : HomeServer ) - > None :
self . repo = hs . get_media_repository ( )
self . client = hs . get_federation_http_client ( )
self . store = hs . get_datastores ( ) . main
def create_resource_dict ( self ) - > Dict [ str , Resource ] :
# We need to manually set the resource tree to include media, the
# default only does `/_matrix/client` APIs.
return { " /_matrix/media " : self . hs . get_media_repository_resource ( ) }
# mock actually reading file body
def read_body_with_max_size_30MiB ( * args : Any , * * kwargs : Any ) - > Deferred :
d : Deferred = defer . Deferred ( )
d . callback ( 31457280 )
return d
def read_body_with_max_size_50MiB ( * args : Any , * * kwargs : Any ) - > Deferred :
d : Deferred = defer . Deferred ( )
d . callback ( 52428800 )
return d
@patch (
" synapse.http.matrixfederationclient.read_body_with_max_size " ,
read_body_with_max_size_30MiB ,
)
def test_download_ratelimit_default ( self ) - > None :
"""
Test remote media download ratelimiting against default configuration - 500 MB bucket
and 87 kb / second drain rate
"""
# mock out actually sending the request, returns a 30MiB response
async def _send_request ( * args : Any , * * kwargs : Any ) - > IResponse :
resp = MagicMock ( spec = IResponse )
resp . code = 200
resp . length = 31457280
resp . headers = Headers ( { " Content-Type " : [ " application/octet-stream " ] } )
resp . phrase = b " OK "
return resp
self . client . _send_request = _send_request # type: ignore
# first request should go through
channel = self . make_request (
" GET " ,
" /_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxyz " ,
shorthand = False ,
)
assert channel . code == 200
# next 15 should go through
for i in range ( 15 ) :
channel2 = self . make_request (
" GET " ,
f " /_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxy { i } " ,
shorthand = False ,
)
assert channel2 . code == 200
# 17th will hit ratelimit
channel3 = self . make_request (
" GET " ,
" /_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxyx " ,
shorthand = False ,
)
assert channel3 . code == 429
# however, a request from a different IP will go through
channel4 = self . make_request (
" GET " ,
" /_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxyz " ,
shorthand = False ,
client_ip = " 187.233.230.159 " ,
)
assert channel4 . code == 200
# at 87Kib/s it should take about 2 minutes for enough to drain from bucket that another
# 30MiB download is authorized - The last download was blocked at 503,316,480.
# The next download will be authorized when bucket hits 492,830,720
# (524,288,000 total capacity - 31,457,280 download size) so 503,316,480 - 492,830,720 ~= 10,485,760
# needs to drain before another download will be authorized, that will take ~=
# 2 minutes (10,485,760/89,088/60)
self . reactor . pump ( [ 2.0 * 60.0 ] )
# enough has drained and next request goes through
channel5 = self . make_request (
" GET " ,
" /_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxyb " ,
shorthand = False ,
)
assert channel5 . code == 200
@override_config (
{
" remote_media_download_per_second " : " 50M " ,
" remote_media_download_burst_count " : " 50M " ,
}
)
@patch (
" synapse.http.matrixfederationclient.read_body_with_max_size " ,
read_body_with_max_size_50MiB ,
)
def test_download_rate_limit_config ( self ) - > None :
"""
Test that download rate limit config options are correctly picked up and applied
"""
async def _send_request ( * args : Any , * * kwargs : Any ) - > IResponse :
resp = MagicMock ( spec = IResponse )
resp . code = 200
resp . length = 52428800
resp . headers = Headers ( { " Content-Type " : [ " application/octet-stream " ] } )
resp . phrase = b " OK "
return resp
self . client . _send_request = _send_request # type: ignore
# first request should go through
channel = self . make_request (
" GET " ,
" /_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxyz " ,
shorthand = False ,
)
assert channel . code == 200
# immediate second request should fail
channel = self . make_request (
" GET " ,
" /_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxy1 " ,
shorthand = False ,
)
assert channel . code == 429
# advance half a second
self . reactor . pump ( [ 0.5 ] )
# request still fails
channel = self . make_request (
" GET " ,
" /_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxy2 " ,
shorthand = False ,
)
assert channel . code == 429
# advance another half second
self . reactor . pump ( [ 0.5 ] )
# enough has drained from bucket and request is successful
channel = self . make_request (
" GET " ,
" /_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxy3 " ,
shorthand = False ,
)
assert channel . code == 200
@patch (
" synapse.http.matrixfederationclient.read_body_with_max_size " ,
read_body_with_max_size_30MiB ,
)
def test_download_ratelimit_max_size_sub ( self ) - > None :
"""
Test that if no content - length is provided , the default max size is applied instead
"""
# mock out actually sending the request
async def _send_request ( * args : Any , * * kwargs : Any ) - > IResponse :
resp = MagicMock ( spec = IResponse )
resp . code = 200
resp . length = UNKNOWN_LENGTH
resp . headers = Headers ( { " Content-Type " : [ " application/octet-stream " ] } )
resp . phrase = b " OK "
return resp
self . client . _send_request = _send_request # type: ignore
# ten requests should go through using the max size (500MB/50MB)
for i in range ( 10 ) :
channel2 = self . make_request (
" GET " ,
f " /_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxy { i } " ,
shorthand = False ,
)
assert channel2 . code == 200
# eleventh will hit ratelimit
channel3 = self . make_request (
" GET " ,
" /_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxyx " ,
shorthand = False ,
)
assert channel3 . code == 429