Fix Content-Disposition in media repository (#4176)

This commit is contained in:
Amber Brown 2018-11-15 15:55:58 -06:00 committed by GitHub
parent 835779f7fb
commit 8b1affe7d5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 271 additions and 90 deletions

View file

@ -16,6 +16,7 @@
import logging
import os
from six import PY3
from six.moves import urllib
from twisted.internet import defer
@ -48,26 +49,21 @@ def parse_media_id(request):
return server_name, media_id, file_name
except Exception:
raise SynapseError(
404,
"Invalid media id token %r" % (request.postpath,),
Codes.UNKNOWN,
404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN
)
def respond_404(request):
respond_with_json(
request, 404,
cs_error(
"Not found %r" % (request.postpath,),
code=Codes.NOT_FOUND,
),
send_cors=True
request,
404,
cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND),
send_cors=True,
)
@defer.inlineCallbacks
def respond_with_file(request, media_type, file_path,
file_size=None, upload_name=None):
def respond_with_file(request, media_type, file_path, file_size=None, upload_name=None):
logger.debug("Responding with %r", file_path)
if os.path.isfile(file_path):
@ -97,31 +93,26 @@ def add_file_headers(request, media_type, file_size, upload_name):
file_size (int): Size in bytes of the media, if known.
upload_name (str): The name of the requested file, if any.
"""
def _quote(x):
return urllib.parse.quote(x.encode("utf-8"))
request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
if upload_name:
if is_ascii(upload_name):
disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii")
disposition = "inline; filename=%s" % (_quote(upload_name),)
else:
disposition = (
"inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii")
disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),)
request.setHeader(b"Content-Disposition", disposition)
request.setHeader(b"Content-Disposition", disposition.encode('ascii'))
# cache for at least a day.
# XXX: we might want to turn this off for data we don't want to
# recommend caching as it's sensitive or private - or at least
# select private. don't bother setting Expires as all our
# clients are smart enough to be happy with Cache-Control
request.setHeader(
b"Cache-Control", b"public,max-age=86400,s-maxage=86400"
)
request.setHeader(
b"Content-Length", b"%d" % (file_size,)
)
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
request.setHeader(b"Content-Length", b"%d" % (file_size,))
@defer.inlineCallbacks
@ -153,6 +144,7 @@ class Responder(object):
Responder is a context manager which *must* be used, so that any resources
held can be cleaned up.
"""
def write_to_consumer(self, consumer):
"""Stream response into consumer
@ -186,9 +178,18 @@ class FileInfo(object):
thumbnail_method (str)
thumbnail_type (str): Content type of thumbnail, e.g. image/png
"""
def __init__(self, server_name, file_id, url_cache=False,
thumbnail=False, thumbnail_width=None, thumbnail_height=None,
thumbnail_method=None, thumbnail_type=None):
def __init__(
self,
server_name,
file_id,
url_cache=False,
thumbnail=False,
thumbnail_width=None,
thumbnail_height=None,
thumbnail_method=None,
thumbnail_type=None,
):
self.server_name = server_name
self.file_id = file_id
self.url_cache = url_cache
@ -197,3 +198,74 @@ class FileInfo(object):
self.thumbnail_height = thumbnail_height
self.thumbnail_method = thumbnail_method
self.thumbnail_type = thumbnail_type
def get_filename_from_headers(headers):
"""
Get the filename of the downloaded file by inspecting the
Content-Disposition HTTP header.
Args:
headers (twisted.web.http_headers.Headers): The HTTP
request headers.
Returns:
A Unicode string of the filename, or None.
"""
content_disposition = headers.get(b"Content-Disposition", [b''])
# No header, bail out.
if not content_disposition[0]:
return
# dict of unicode: bytes, corresponding to the key value sections of the
# Content-Disposition header.
params = {}
parts = content_disposition[0].split(b";")
for i in parts:
# Split into key-value pairs, if able
# We don't care about things like `inline`, so throw it out
if b"=" not in i:
continue
key, value = i.strip().split(b"=")
params[key.decode('ascii')] = value
upload_name = None
# First check if there is a valid UTF-8 filename
upload_name_utf8 = params.get("filename*", None)
if upload_name_utf8:
if upload_name_utf8.lower().startswith(b"utf-8''"):
upload_name_utf8 = upload_name_utf8[7:]
# We have a filename*= section. This MUST be ASCII, and any UTF-8
# bytes are %-quoted.
if PY3:
try:
# Once it is decoded, we can then unquote the %-encoded
# parts strictly into a unicode string.
upload_name = urllib.parse.unquote(
upload_name_utf8.decode('ascii'), errors="strict"
)
except UnicodeDecodeError:
# Incorrect UTF-8.
pass
else:
# On Python 2, we first unquote the %-encoded parts and then
# decode it strictly using UTF-8.
try:
upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8')
except UnicodeDecodeError:
pass
# If there isn't check for an ascii name.
if not upload_name:
upload_name_ascii = params.get("filename", None)
if upload_name_ascii and is_ascii(upload_name_ascii):
# Make sure there's no %-quoted bytes. If there is, reject it as
# non-valid ASCII.
if b"%" not in upload_name_ascii:
upload_name = upload_name_ascii.decode('ascii')
# This may be None here, indicating we did not find a matching name.
return upload_name

View file

@ -14,14 +14,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import cgi
import errno
import logging
import os
import shutil
from six import PY3, iteritems
from six.moves.urllib import parse as urlparse
from six import iteritems
import twisted.internet.error
import twisted.web.http
@ -34,14 +32,18 @@ from synapse.api.errors import (
NotFoundError,
SynapseError,
)
from synapse.http.matrixfederationclient import MatrixFederationHttpClient
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.util import logcontext
from synapse.util.async_helpers import Linearizer
from synapse.util.retryutils import NotRetryingDestination
from synapse.util.stringutils import is_ascii, random_string
from synapse.util.stringutils import random_string
from ._base import FileInfo, respond_404, respond_with_responder
from ._base import (
FileInfo,
get_filename_from_headers,
respond_404,
respond_with_responder,
)
from .config_resource import MediaConfigResource
from .download_resource import DownloadResource
from .filepath import MediaFilePaths
@ -62,7 +64,7 @@ class MediaRepository(object):
def __init__(self, hs):
self.hs = hs
self.auth = hs.get_auth()
self.client = MatrixFederationHttpClient(hs)
self.client = hs.get_http_client()
self.clock = hs.get_clock()
self.server_name = hs.hostname
self.store = hs.get_datastore()
@ -397,39 +399,9 @@ class MediaRepository(object):
yield finish()
media_type = headers[b"Content-Type"][0].decode('ascii')
upload_name = get_filename_from_headers(headers)
time_now_ms = self.clock.time_msec()
content_disposition = headers.get(b"Content-Disposition", None)
if content_disposition:
_, params = cgi.parse_header(content_disposition[0].decode('ascii'),)
upload_name = None
# First check if there is a valid UTF-8 filename
upload_name_utf8 = params.get("filename*", None)
if upload_name_utf8:
if upload_name_utf8.lower().startswith("utf-8''"):
upload_name = upload_name_utf8[7:]
# If there isn't check for an ascii name.
if not upload_name:
upload_name_ascii = params.get("filename", None)
if upload_name_ascii and is_ascii(upload_name_ascii):
upload_name = upload_name_ascii
if upload_name:
if PY3:
upload_name = urlparse.unquote(upload_name)
else:
upload_name = urlparse.unquote(upload_name.encode('ascii'))
try:
if isinstance(upload_name, bytes):
upload_name = upload_name.decode("utf-8")
except UnicodeDecodeError:
upload_name = None
else:
upload_name = None
logger.info("Stored remote media in file %r", fname)
yield self.store.store_cached_remote_media(

View file

@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import cgi
import datetime
import errno
import fnmatch
@ -44,10 +43,11 @@ from synapse.http.server import (
)
from synapse.http.servlet import parse_integer, parse_string
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.rest.media.v1._base import get_filename_from_headers
from synapse.util.async_helpers import ObservableDeferred
from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.logcontext import make_deferred_yieldable, run_in_background
from synapse.util.stringutils import is_ascii, random_string
from synapse.util.stringutils import random_string
from ._base import FileInfo
@ -336,31 +336,7 @@ class PreviewUrlResource(Resource):
media_type = "application/octet-stream"
time_now_ms = self.clock.time_msec()
content_disposition = headers.get(b"Content-Disposition", None)
if content_disposition:
_, params = cgi.parse_header(content_disposition[0],)
download_name = None
# First check if there is a valid UTF-8 filename
download_name_utf8 = params.get("filename*", None)
if download_name_utf8:
if download_name_utf8.lower().startswith("utf-8''"):
download_name = download_name_utf8[7:]
# If there isn't check for an ascii name.
if not download_name:
download_name_ascii = params.get("filename", None)
if download_name_ascii and is_ascii(download_name_ascii):
download_name = download_name_ascii
if download_name:
download_name = urlparse.unquote(download_name)
try:
download_name = download_name.decode("utf-8")
except UnicodeDecodeError:
download_name = None
else:
download_name = None
download_name = get_filename_from_headers(headers)
yield self.store.store_local_media(
media_id=file_id,