mirror of
https://git.anonymousland.org/anonymousland/synapse-product.git
synced 2025-05-02 17:54:48 -04:00
Implement a content type allow list for URL previews (#11936)
This implements an allow list for content types for which Synapse will attempt URL preview. If a URL resolves to a resource with a content type which isn't in the list, the download will terminate immediately. This makes sense given that Synapse would never successfully generate a URL preview for such files in the first place, and helps prevent issues with streaming media servers, such as #8302. Signed-off-by: Denis Kasak dkasak@termina.org.uk
This commit is contained in:
parent
06e5a76322
commit
337f38cac3
4 changed files with 99 additions and 0 deletions
|
@ -20,6 +20,7 @@ from typing import (
|
|||
TYPE_CHECKING,
|
||||
Any,
|
||||
BinaryIO,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
|
@ -693,12 +694,18 @@ class SimpleHttpClient:
|
|||
output_stream: BinaryIO,
|
||||
max_size: Optional[int] = None,
|
||||
headers: Optional[RawHeaders] = None,
|
||||
is_allowed_content_type: Optional[Callable[[str], bool]] = None,
|
||||
) -> Tuple[int, Dict[bytes, List[bytes]], str, int]:
|
||||
"""GETs a file from a given URL
|
||||
Args:
|
||||
url: The URL to GET
|
||||
output_stream: File to write the response body to.
|
||||
headers: A map from header name to a list of values for that header
|
||||
is_allowed_content_type: A predicate to determine whether the
|
||||
content type of the file we're downloading is allowed. If set and
|
||||
it evaluates to False when called with the content type, the
|
||||
request will be terminated before completing the download by
|
||||
raising SynapseError.
|
||||
Returns:
|
||||
A tuple of the file length, dict of the response
|
||||
headers, absolute URI of the response and HTTP response code.
|
||||
|
@ -726,6 +733,17 @@ class SimpleHttpClient:
|
|||
HTTPStatus.BAD_GATEWAY, "Got error %d" % (response.code,), Codes.UNKNOWN
|
||||
)
|
||||
|
||||
if is_allowed_content_type and b"Content-Type" in resp_headers:
|
||||
content_type = resp_headers[b"Content-Type"][0].decode("ascii")
|
||||
if not is_allowed_content_type(content_type):
|
||||
raise SynapseError(
|
||||
HTTPStatus.BAD_GATEWAY,
|
||||
(
|
||||
"Requested file's content type not allowed for this operation: %s"
|
||||
% content_type
|
||||
),
|
||||
)
|
||||
|
||||
# TODO: if our Content-Type is HTML or something, just read the first
|
||||
# N bytes into RAM rather than saving it all to disk only to read it
|
||||
# straight back in again
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue