Fix Content-Disposition in media repository (#4176)

2025-08-18 21:57:46 -04:00 · 2018-11-15 15:55:58 -06:00 · 2018-11-15 15:55:58 -06:00 · 8b1affe7d5
commit 8b1affe7d5
parent 835779f7fb
6 changed files with 271 additions and 90 deletions
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@ -16,6 +16,7 @@
 import logging
 import os

+from six import PY3
 from six.moves import urllib

 from twisted.internet import defer
@ -48,26 +49,21 @@ def parse_media_id(request):
        return server_name, media_id, file_name
    except Exception:
        raise SynapseError(
-            404,
-            "Invalid media id token %r" % (request.postpath,),
-            Codes.UNKNOWN,
+            404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN
        )


 def respond_404(request):
    respond_with_json(
-        request, 404,
-        cs_error(
-            "Not found %r" % (request.postpath,),
-            code=Codes.NOT_FOUND,
-        ),
-        send_cors=True
+        request,
+        404,
+        cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND),
+        send_cors=True,
    )


@defer.inlineCallbacks
-def respond_with_file(request, media_type, file_path,
-                      file_size=None, upload_name=None):
+def respond_with_file(request, media_type, file_path, file_size=None, upload_name=None):
    logger.debug("Responding with %r", file_path)

    if os.path.isfile(file_path):
@ -97,31 +93,26 @@ def add_file_headers(request, media_type, file_size, upload_name):
        file_size (int): Size in bytes of the media, if known.
        upload_name (str): The name of the requested file, if any.
    """
+
    def _quote(x):
        return urllib.parse.quote(x.encode("utf-8"))

    request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
    if upload_name:
        if is_ascii(upload_name):
-            disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii")
+            disposition = "inline; filename=%s" % (_quote(upload_name),)
        else:
-            disposition = (
-                "inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii")
+            disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),)

-        request.setHeader(b"Content-Disposition", disposition)
+        request.setHeader(b"Content-Disposition", disposition.encode('ascii'))

    # cache for at least a day.
    # XXX: we might want to turn this off for data we don't want to
    # recommend caching as it's sensitive or private - or at least
    # select private. don't bother setting Expires as all our
    # clients are smart enough to be happy with Cache-Control
-    request.setHeader(
-        b"Cache-Control", b"public,max-age=86400,s-maxage=86400"
-    )
-
-    request.setHeader(
-        b"Content-Length", b"%d" % (file_size,)
-    )
+    request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
+    request.setHeader(b"Content-Length", b"%d" % (file_size,))


@defer.inlineCallbacks
@ -153,6 +144,7 @@ class Responder(object):
    Responder is a context manager which *must* be used, so that any resources
    held can be cleaned up.
    """
+
    def write_to_consumer(self, consumer):
        """Stream response into consumer

@ -186,9 +178,18 @@ class FileInfo(object):
        thumbnail_method (str)
        thumbnail_type (str): Content type of thumbnail, e.g. image/png
    """
-    def __init__(self, server_name, file_id, url_cache=False,
-                 thumbnail=False, thumbnail_width=None, thumbnail_height=None,
-                 thumbnail_method=None, thumbnail_type=None):
+
+    def __init__(
+        self,
+        server_name,
+        file_id,
+        url_cache=False,
+        thumbnail=False,
+        thumbnail_width=None,
+        thumbnail_height=None,
+        thumbnail_method=None,
+        thumbnail_type=None,
+    ):
        self.server_name = server_name
        self.file_id = file_id
        self.url_cache = url_cache
@ -197,3 +198,74 @@ class FileInfo(object):
        self.thumbnail_height = thumbnail_height
        self.thumbnail_method = thumbnail_method
        self.thumbnail_type = thumbnail_type
+
+
+def get_filename_from_headers(headers):
+    """
+    Get the filename of the downloaded file by inspecting the
+    Content-Disposition HTTP header.
+
+    Args:
+        headers (twisted.web.http_headers.Headers): The HTTP
+            request headers.
+
+    Returns:
+        A Unicode string of the filename, or None.
+    """
+    content_disposition = headers.get(b"Content-Disposition", [b''])
+
+    # No header, bail out.
+    if not content_disposition[0]:
+        return
+
+    # dict of unicode: bytes, corresponding to the key value sections of the
+    # Content-Disposition header.
+    params = {}
+    parts = content_disposition[0].split(b";")
+    for i in parts:
+        # Split into key-value pairs, if able
+        # We don't care about things like `inline`, so throw it out
+        if b"=" not in i:
+            continue
+
+        key, value = i.strip().split(b"=")
+        params[key.decode('ascii')] = value
+
+    upload_name = None
+
+    # First check if there is a valid UTF-8 filename
+    upload_name_utf8 = params.get("filename*", None)
+    if upload_name_utf8:
+        if upload_name_utf8.lower().startswith(b"utf-8''"):
+            upload_name_utf8 = upload_name_utf8[7:]
+            # We have a filename*= section. This MUST be ASCII, and any UTF-8
+            # bytes are %-quoted.
+            if PY3:
+                try:
+                    # Once it is decoded, we can then unquote the %-encoded
+                    # parts strictly into a unicode string.
+                    upload_name = urllib.parse.unquote(
+                        upload_name_utf8.decode('ascii'), errors="strict"
+                    )
+                except UnicodeDecodeError:
+                    # Incorrect UTF-8.
+                    pass
+            else:
+                # On Python 2, we first unquote the %-encoded parts and then
+                # decode it strictly using UTF-8.
+                try:
+                    upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8')
+                except UnicodeDecodeError:
+                    pass
+
+    # If there isn't check for an ascii name.
+    if not upload_name:
+        upload_name_ascii = params.get("filename", None)
+        if upload_name_ascii and is_ascii(upload_name_ascii):
+            # Make sure there's no %-quoted bytes. If there is, reject it as
+            # non-valid ASCII.
+            if b"%" not in upload_name_ascii:
+                upload_name = upload_name_ascii.decode('ascii')
+
+    # This may be None here, indicating we did not find a matching name.
+    return upload_name