2014-12-02 10:09:51 -05:00
|
|
|
# -*- coding: utf-8 -*-
|
2016-01-06 23:26:29 -05:00
|
|
|
# Copyright 2014-2016 OpenMarket Ltd
|
2018-01-12 06:15:31 -05:00
|
|
|
# Copyright 2018 New Vector Ltd
|
2014-12-02 10:09:51 -05:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2017-03-14 09:36:06 -04:00
|
|
|
|
|
|
|
from twisted.internet import defer, threads
|
2017-03-13 09:50:16 -04:00
|
|
|
import twisted.internet.error
|
2017-03-14 09:36:06 -04:00
|
|
|
import twisted.web.http
|
|
|
|
from twisted.web.resource import Resource
|
2014-12-02 10:09:51 -05:00
|
|
|
|
2018-01-08 12:52:06 -05:00
|
|
|
from ._base import respond_404, FileInfo, respond_with_responder
|
2014-12-02 14:51:47 -05:00
|
|
|
from .upload_resource import UploadResource
|
2014-12-04 09:22:31 -05:00
|
|
|
from .download_resource import DownloadResource
|
2014-12-10 10:46:18 -05:00
|
|
|
from .thumbnail_resource import ThumbnailResource
|
2015-02-02 11:02:31 -05:00
|
|
|
from .identicon_resource import IdenticonResource
|
2016-01-24 18:47:27 -05:00
|
|
|
from .preview_url_resource import PreviewUrlResource
|
2014-12-02 14:51:47 -05:00
|
|
|
from .filepath import MediaFilePaths
|
2016-04-19 06:31:43 -04:00
|
|
|
from .thumbnailer import Thumbnailer
|
2018-01-16 10:44:08 -05:00
|
|
|
from .storage_provider import StorageProviderWrapper
|
2018-01-08 12:45:11 -05:00
|
|
|
from .media_storage import MediaStorage
|
2016-04-19 06:31:43 -04:00
|
|
|
|
|
|
|
from synapse.http.matrixfederationclient import MatrixFederationHttpClient
|
|
|
|
from synapse.util.stringutils import random_string
|
2018-01-22 13:11:18 -05:00
|
|
|
from synapse.api.errors import (
|
|
|
|
SynapseError, HttpResponseException, NotFoundError, FederationDeniedError,
|
|
|
|
)
|
2016-04-19 06:31:43 -04:00
|
|
|
|
2016-06-29 09:57:59 -04:00
|
|
|
from synapse.util.async import Linearizer
|
2016-04-19 06:31:43 -04:00
|
|
|
from synapse.util.stringutils import is_ascii
|
2018-01-08 12:45:11 -05:00
|
|
|
from synapse.util.logcontext import make_deferred_yieldable
|
2017-05-15 10:42:18 -04:00
|
|
|
from synapse.util.retryutils import NotRetryingDestination
|
2016-04-19 06:31:43 -04:00
|
|
|
|
|
|
|
import os
|
2016-06-29 09:57:59 -04:00
|
|
|
import errno
|
|
|
|
import shutil
|
2016-04-19 06:31:43 -04:00
|
|
|
|
|
|
|
import cgi
|
2014-12-02 10:09:51 -05:00
|
|
|
import logging
|
2016-04-19 06:31:43 -04:00
|
|
|
import urlparse
|
2014-12-02 10:09:51 -05:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2018-01-12 11:42:43 -05:00
|
|
|
UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000
|
2016-06-29 06:41:20 -04:00
|
|
|
|
|
|
|
|
2016-04-19 06:31:43 -04:00
|
|
|
class MediaRepository(object):
|
2016-06-29 09:57:59 -04:00
|
|
|
def __init__(self, hs):
|
2016-04-19 06:31:43 -04:00
|
|
|
self.auth = hs.get_auth()
|
|
|
|
self.client = MatrixFederationHttpClient(hs)
|
|
|
|
self.clock = hs.get_clock()
|
|
|
|
self.server_name = hs.hostname
|
|
|
|
self.store = hs.get_datastore()
|
|
|
|
self.max_upload_size = hs.config.max_upload_size
|
|
|
|
self.max_image_pixels = hs.config.max_image_pixels
|
2017-10-12 10:20:59 -04:00
|
|
|
|
2017-10-12 12:31:24 -04:00
|
|
|
self.primary_base_path = hs.config.media_store_path
|
|
|
|
self.filepaths = MediaFilePaths(self.primary_base_path)
|
|
|
|
|
2016-04-19 06:31:43 -04:00
|
|
|
self.dynamic_thumbnails = hs.config.dynamic_thumbnails
|
|
|
|
self.thumbnail_requirements = hs.config.thumbnail_requirements
|
|
|
|
|
2017-01-09 12:17:10 -05:00
|
|
|
self.remote_media_linearizer = Linearizer(name="media_remote")
|
2016-06-29 09:57:59 -04:00
|
|
|
|
2016-06-29 06:41:20 -04:00
|
|
|
self.recently_accessed_remotes = set()
|
2018-01-12 11:42:43 -05:00
|
|
|
self.recently_accessed_locals = set()
|
2016-06-29 06:41:20 -04:00
|
|
|
|
2018-01-22 13:11:18 -05:00
|
|
|
self.federation_domain_whitelist = hs.config.federation_domain_whitelist
|
|
|
|
|
2018-01-12 06:15:31 -05:00
|
|
|
# List of StorageProviders where we should search for media and
|
2018-01-08 12:19:55 -05:00
|
|
|
# potentially upload to.
|
2018-01-12 06:23:54 -05:00
|
|
|
storage_providers = []
|
2018-01-08 12:19:55 -05:00
|
|
|
|
2018-01-16 10:44:08 -05:00
|
|
|
for clz, provider_config, wrapper_config in hs.config.media_storage_providers:
|
|
|
|
backend = clz(hs, provider_config)
|
2018-01-08 12:19:55 -05:00
|
|
|
provider = StorageProviderWrapper(
|
|
|
|
backend,
|
2018-01-16 10:44:08 -05:00
|
|
|
store_local=wrapper_config.store_local,
|
|
|
|
store_remote=wrapper_config.store_remote,
|
|
|
|
store_synchronous=wrapper_config.store_synchronous,
|
2018-01-08 12:19:55 -05:00
|
|
|
)
|
2018-01-12 06:23:54 -05:00
|
|
|
storage_providers.append(provider)
|
2018-01-08 12:19:55 -05:00
|
|
|
|
|
|
|
self.media_storage = MediaStorage(
|
2018-01-12 06:23:54 -05:00
|
|
|
self.primary_base_path, self.filepaths, storage_providers,
|
2018-01-08 12:19:55 -05:00
|
|
|
)
|
2018-01-08 12:45:11 -05:00
|
|
|
|
2016-06-29 06:41:20 -04:00
|
|
|
self.clock.looping_call(
|
2018-01-12 11:42:43 -05:00
|
|
|
self._update_recently_accessed,
|
|
|
|
UPDATE_RECENTLY_ACCESSED_TS,
|
2016-06-29 06:41:20 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
2018-01-12 11:42:43 -05:00
|
|
|
def _update_recently_accessed(self):
|
|
|
|
remote_media = self.recently_accessed_remotes
|
2016-06-29 06:41:20 -04:00
|
|
|
self.recently_accessed_remotes = set()
|
|
|
|
|
2018-01-12 11:42:43 -05:00
|
|
|
local_media = self.recently_accessed_locals
|
|
|
|
self.recently_accessed_locals = set()
|
|
|
|
|
2016-06-29 06:41:20 -04:00
|
|
|
yield self.store.update_cached_last_access_time(
|
2018-01-12 11:42:43 -05:00
|
|
|
local_media, remote_media, self.clock.time_msec()
|
2016-06-29 06:41:20 -04:00
|
|
|
)
|
|
|
|
|
2018-01-12 11:42:43 -05:00
|
|
|
def mark_recently_accessed(self, server_name, media_id):
|
|
|
|
"""Mark the given media as recently accessed.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
server_name (str|None): Origin server of media, or None if local
|
|
|
|
media_id (str): The media ID of the content
|
|
|
|
"""
|
|
|
|
if server_name:
|
|
|
|
self.recently_accessed_remotes.add((server_name, media_id))
|
|
|
|
else:
|
|
|
|
self.recently_accessed_locals.add(media_id)
|
|
|
|
|
2016-04-19 06:31:43 -04:00
|
|
|
@defer.inlineCallbacks
|
|
|
|
def create_content(self, media_type, upload_name, content, content_length,
|
|
|
|
auth_user):
|
2017-10-13 05:39:32 -04:00
|
|
|
"""Store uploaded content for a local user and return the mxc URL
|
|
|
|
|
|
|
|
Args:
|
|
|
|
media_type(str): The content type of the file
|
|
|
|
upload_name(str): The name of the file
|
|
|
|
content: A file like object that is the content to store
|
|
|
|
content_length(int): The length of the content
|
|
|
|
auth_user(str): The user_id of the uploader
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Deferred[str]: The mxc url of the stored content
|
|
|
|
"""
|
2016-04-19 06:31:43 -04:00
|
|
|
media_id = random_string(24)
|
|
|
|
|
2018-01-08 12:45:11 -05:00
|
|
|
file_info = FileInfo(
|
|
|
|
server_name=None,
|
|
|
|
file_id=media_id,
|
2017-10-12 10:20:59 -04:00
|
|
|
)
|
2016-04-19 06:31:43 -04:00
|
|
|
|
2018-01-08 12:45:11 -05:00
|
|
|
fname = yield self.media_storage.store_file(content, file_info)
|
|
|
|
|
2017-01-10 09:19:50 -05:00
|
|
|
logger.info("Stored local media in file %r", fname)
|
|
|
|
|
2016-04-19 06:31:43 -04:00
|
|
|
yield self.store.store_local_media(
|
|
|
|
media_id=media_id,
|
|
|
|
media_type=media_type,
|
|
|
|
time_now_ms=self.clock.time_msec(),
|
|
|
|
upload_name=upload_name,
|
|
|
|
media_length=content_length,
|
|
|
|
user_id=auth_user,
|
|
|
|
)
|
|
|
|
|
2018-01-16 11:17:38 -05:00
|
|
|
yield self._generate_thumbnails(
|
2018-01-16 11:24:15 -05:00
|
|
|
None, media_id, media_id, media_type,
|
2018-01-16 11:17:38 -05:00
|
|
|
)
|
2016-04-19 06:31:43 -04:00
|
|
|
|
|
|
|
defer.returnValue("mxc://%s/%s" % (self.server_name, media_id))
|
|
|
|
|
2018-01-08 12:45:11 -05:00
|
|
|
@defer.inlineCallbacks
|
|
|
|
def get_local_media(self, request, media_id, name):
|
|
|
|
"""Responds to reqests for local media, if exists, or returns 404.
|
2018-01-12 06:15:31 -05:00
|
|
|
|
|
|
|
Args:
|
|
|
|
request(twisted.web.http.Request)
|
2018-01-12 10:02:46 -05:00
|
|
|
media_id (str): The media ID of the content. (This is the same as
|
|
|
|
the file_id for local content.)
|
2018-01-12 06:15:31 -05:00
|
|
|
name (str|None): Optional name that, if specified, will be used as
|
|
|
|
the filename in the Content-Disposition header of the response.
|
|
|
|
|
2018-01-12 10:02:46 -05:00
|
|
|
Returns:
|
2018-01-12 06:15:31 -05:00
|
|
|
Deferred: Resolves once a response has successfully been written
|
|
|
|
to request
|
2018-01-08 12:45:11 -05:00
|
|
|
"""
|
|
|
|
media_info = yield self.store.get_local_media(media_id)
|
|
|
|
if not media_info or media_info["quarantined_by"]:
|
|
|
|
respond_404(request)
|
|
|
|
return
|
|
|
|
|
2018-01-12 11:42:43 -05:00
|
|
|
self.mark_recently_accessed(None, media_id)
|
|
|
|
|
2018-01-08 12:45:11 -05:00
|
|
|
media_type = media_info["media_type"]
|
|
|
|
media_length = media_info["media_length"]
|
|
|
|
upload_name = name if name else media_info["upload_name"]
|
|
|
|
url_cache = media_info["url_cache"]
|
|
|
|
|
|
|
|
file_info = FileInfo(
|
|
|
|
None, media_id,
|
|
|
|
url_cache=url_cache,
|
|
|
|
)
|
|
|
|
|
|
|
|
responder = yield self.media_storage.fetch_media(file_info)
|
|
|
|
yield respond_with_responder(
|
|
|
|
request, responder, media_type, media_length, upload_name,
|
|
|
|
)
|
|
|
|
|
2016-06-29 09:57:59 -04:00
|
|
|
@defer.inlineCallbacks
|
2018-01-08 12:52:06 -05:00
|
|
|
def get_remote_media(self, request, server_name, media_id, name):
|
|
|
|
"""Respond to requests for remote media.
|
2018-01-12 06:15:31 -05:00
|
|
|
|
|
|
|
Args:
|
|
|
|
request(twisted.web.http.Request)
|
|
|
|
server_name (str): Remote server_name where the media originated.
|
2018-01-12 10:02:46 -05:00
|
|
|
media_id (str): The media ID of the content (as defined by the
|
|
|
|
remote server).
|
2018-01-12 06:15:31 -05:00
|
|
|
name (str|None): Optional name that, if specified, will be used as
|
|
|
|
the filename in the Content-Disposition header of the response.
|
|
|
|
|
2018-01-12 10:02:46 -05:00
|
|
|
Returns:
|
2018-01-12 06:15:31 -05:00
|
|
|
Deferred: Resolves once a response has successfully been written
|
|
|
|
to request
|
2018-01-08 12:52:06 -05:00
|
|
|
"""
|
2018-01-22 13:11:18 -05:00
|
|
|
if (
|
|
|
|
self.federation_domain_whitelist is not None and
|
|
|
|
server_name not in self.federation_domain_whitelist
|
|
|
|
):
|
|
|
|
raise FederationDeniedError(server_name)
|
|
|
|
|
2018-01-12 11:42:43 -05:00
|
|
|
self.mark_recently_accessed(server_name, media_id)
|
2018-01-08 12:52:06 -05:00
|
|
|
|
|
|
|
# We linearize here to ensure that we don't try and download remote
|
2018-01-12 06:15:31 -05:00
|
|
|
# media multiple times concurrently
|
2016-04-19 06:31:43 -04:00
|
|
|
key = (server_name, media_id)
|
2016-06-29 09:57:59 -04:00
|
|
|
with (yield self.remote_media_linearizer.queue(key)):
|
2018-01-08 12:52:06 -05:00
|
|
|
responder, media_info = yield self._get_remote_media_impl(
|
|
|
|
server_name, media_id,
|
|
|
|
)
|
|
|
|
|
2018-01-12 06:15:31 -05:00
|
|
|
# We deliberately stream the file outside the lock
|
2018-01-08 12:52:06 -05:00
|
|
|
if responder:
|
|
|
|
media_type = media_info["media_type"]
|
|
|
|
media_length = media_info["media_length"]
|
|
|
|
upload_name = name if name else media_info["upload_name"]
|
|
|
|
yield respond_with_responder(
|
|
|
|
request, responder, media_type, media_length, upload_name,
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
respond_404(request)
|
2016-04-19 06:31:43 -04:00
|
|
|
|
2018-01-16 06:06:42 -05:00
|
|
|
@defer.inlineCallbacks
|
|
|
|
def get_remote_media_info(self, server_name, media_id):
|
|
|
|
"""Gets the media info associated with the remote file, downloading
|
|
|
|
if necessary.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
server_name (str): Remote server_name where the media originated.
|
|
|
|
media_id (str): The media ID of the content (as defined by the
|
|
|
|
remote server).
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Deferred[dict]: The media_info of the file
|
|
|
|
"""
|
2018-01-22 13:11:18 -05:00
|
|
|
if (
|
|
|
|
self.federation_domain_whitelist is not None and
|
|
|
|
server_name not in self.federation_domain_whitelist
|
|
|
|
):
|
|
|
|
raise FederationDeniedError(server_name)
|
|
|
|
|
2018-01-16 06:06:42 -05:00
|
|
|
# We linearize here to ensure that we don't try and download remote
|
|
|
|
# media multiple times concurrently
|
|
|
|
key = (server_name, media_id)
|
|
|
|
with (yield self.remote_media_linearizer.queue(key)):
|
|
|
|
responder, media_info = yield self._get_remote_media_impl(
|
|
|
|
server_name, media_id,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Ensure we actually use the responder so that it releases resources
|
|
|
|
if responder:
|
|
|
|
with responder:
|
|
|
|
pass
|
|
|
|
|
|
|
|
defer.returnValue(media_info)
|
|
|
|
|
2016-04-19 06:31:43 -04:00
|
|
|
@defer.inlineCallbacks
|
|
|
|
def _get_remote_media_impl(self, server_name, media_id):
|
2018-01-08 12:52:06 -05:00
|
|
|
"""Looks for media in local cache, if not there then attempt to
|
|
|
|
download from remote server.
|
|
|
|
|
2018-01-12 10:02:46 -05:00
|
|
|
Args:
|
|
|
|
server_name (str): Remote server_name where the media originated.
|
|
|
|
media_id (str): The media ID of the content (as defined by the
|
|
|
|
remote server).
|
|
|
|
|
2018-01-08 12:52:06 -05:00
|
|
|
Returns:
|
2018-01-12 06:15:31 -05:00
|
|
|
Deferred[(Responder, media_info)]
|
2018-01-08 12:52:06 -05:00
|
|
|
"""
|
2016-04-19 06:31:43 -04:00
|
|
|
media_info = yield self.store.get_cached_remote_media(
|
|
|
|
server_name, media_id
|
|
|
|
)
|
2018-01-08 12:52:06 -05:00
|
|
|
|
|
|
|
# file_id is the ID we use to track the file locally. If we've already
|
|
|
|
# seen the file then reuse the existing ID, otherwise genereate a new
|
|
|
|
# one.
|
|
|
|
if media_info:
|
|
|
|
file_id = media_info["filesystem_id"]
|
2016-06-29 06:41:20 -04:00
|
|
|
else:
|
2018-01-08 12:52:06 -05:00
|
|
|
file_id = random_string(24)
|
|
|
|
|
|
|
|
file_info = FileInfo(server_name, file_id)
|
|
|
|
|
|
|
|
# If we have an entry in the DB, try and look for it
|
|
|
|
if media_info:
|
|
|
|
if media_info["quarantined_by"]:
|
2018-01-16 08:53:43 -05:00
|
|
|
logger.info("Media is quarantined")
|
2018-01-08 12:52:06 -05:00
|
|
|
raise NotFoundError()
|
|
|
|
|
|
|
|
responder = yield self.media_storage.fetch_media(file_info)
|
|
|
|
if responder:
|
|
|
|
defer.returnValue((responder, media_info))
|
|
|
|
|
|
|
|
# Failed to find the file anywhere, lets download it.
|
|
|
|
|
|
|
|
media_info = yield self._download_remote_file(
|
|
|
|
server_name, media_id, file_id
|
|
|
|
)
|
|
|
|
|
|
|
|
responder = yield self.media_storage.fetch_media(file_info)
|
2018-01-12 06:28:32 -05:00
|
|
|
defer.returnValue((responder, media_info))
|
2016-04-19 06:31:43 -04:00
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
2018-01-08 12:52:06 -05:00
|
|
|
def _download_remote_file(self, server_name, media_id, file_id):
|
|
|
|
"""Attempt to download the remote file from the given server name,
|
|
|
|
using the given file_id as the local id.
|
2018-01-12 06:15:31 -05:00
|
|
|
|
|
|
|
Args:
|
|
|
|
server_name (str): Originating server
|
2018-01-12 10:02:46 -05:00
|
|
|
media_id (str): The media ID of the content (as defined by the
|
|
|
|
remote server). This is different than the file_id, which is
|
|
|
|
locally generated.
|
2018-01-12 06:15:31 -05:00
|
|
|
file_id (str): Local file ID
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Deferred[MediaInfo]
|
2018-01-08 12:52:06 -05:00
|
|
|
"""
|
2016-04-19 06:31:43 -04:00
|
|
|
|
2018-01-08 12:52:06 -05:00
|
|
|
file_info = FileInfo(
|
|
|
|
server_name=server_name,
|
|
|
|
file_id=file_id,
|
2016-04-19 06:31:43 -04:00
|
|
|
)
|
2018-01-08 12:52:06 -05:00
|
|
|
|
|
|
|
with self.media_storage.store_into_file(file_info) as (f, fname, finish):
|
|
|
|
request_path = "/".join((
|
|
|
|
"/_matrix/media/v1/download", server_name, media_id,
|
|
|
|
))
|
|
|
|
try:
|
|
|
|
length, headers = yield self.client.get_file(
|
|
|
|
server_name, request_path, output_stream=f,
|
|
|
|
max_size=self.max_upload_size, args={
|
|
|
|
# tell the remote server to 404 if it doesn't
|
|
|
|
# recognise the server_name, to make sure we don't
|
|
|
|
# end up with a routing loop.
|
|
|
|
"allow_remote": "false",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
except twisted.internet.error.DNSLookupError as e:
|
|
|
|
logger.warn("HTTP error fetching remote media %s/%s: %r",
|
|
|
|
server_name, media_id, e)
|
|
|
|
raise NotFoundError()
|
|
|
|
|
|
|
|
except HttpResponseException as e:
|
|
|
|
logger.warn("HTTP error fetching remote media %s/%s: %s",
|
|
|
|
server_name, media_id, e.response)
|
|
|
|
if e.code == twisted.web.http.NOT_FOUND:
|
|
|
|
raise SynapseError.from_http_response_exception(e)
|
|
|
|
raise SynapseError(502, "Failed to fetch remote media")
|
|
|
|
|
|
|
|
except SynapseError:
|
|
|
|
logger.exception("Failed to fetch remote media %s/%s",
|
|
|
|
server_name, media_id)
|
|
|
|
raise
|
|
|
|
except NotRetryingDestination:
|
|
|
|
logger.warn("Not retrying destination %r", server_name)
|
|
|
|
raise SynapseError(502, "Failed to fetch remote media")
|
|
|
|
except Exception:
|
|
|
|
logger.exception("Failed to fetch remote media %s/%s",
|
|
|
|
server_name, media_id)
|
|
|
|
raise SynapseError(502, "Failed to fetch remote media")
|
|
|
|
|
|
|
|
yield finish()
|
|
|
|
|
|
|
|
media_type = headers["Content-Type"][0]
|
|
|
|
|
|
|
|
time_now_ms = self.clock.time_msec()
|
|
|
|
|
|
|
|
content_disposition = headers.get("Content-Disposition", None)
|
|
|
|
if content_disposition:
|
|
|
|
_, params = cgi.parse_header(content_disposition[0],)
|
|
|
|
upload_name = None
|
|
|
|
|
|
|
|
# First check if there is a valid UTF-8 filename
|
|
|
|
upload_name_utf8 = params.get("filename*", None)
|
|
|
|
if upload_name_utf8:
|
|
|
|
if upload_name_utf8.lower().startswith("utf-8''"):
|
|
|
|
upload_name = upload_name_utf8[7:]
|
|
|
|
|
|
|
|
# If there isn't check for an ascii name.
|
|
|
|
if not upload_name:
|
|
|
|
upload_name_ascii = params.get("filename", None)
|
|
|
|
if upload_name_ascii and is_ascii(upload_name_ascii):
|
|
|
|
upload_name = upload_name_ascii
|
|
|
|
|
|
|
|
if upload_name:
|
|
|
|
upload_name = urlparse.unquote(upload_name)
|
2016-06-09 06:29:43 -04:00
|
|
|
try:
|
2018-01-08 12:52:06 -05:00
|
|
|
upload_name = upload_name.decode("utf-8")
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
upload_name = None
|
|
|
|
else:
|
|
|
|
upload_name = None
|
|
|
|
|
|
|
|
logger.info("Stored remote media in file %r", fname)
|
|
|
|
|
|
|
|
yield self.store.store_cached_remote_media(
|
|
|
|
origin=server_name,
|
|
|
|
media_id=media_id,
|
|
|
|
media_type=media_type,
|
|
|
|
time_now_ms=self.clock.time_msec(),
|
|
|
|
upload_name=upload_name,
|
|
|
|
media_length=length,
|
|
|
|
filesystem_id=file_id,
|
|
|
|
)
|
2016-04-19 06:31:43 -04:00
|
|
|
|
|
|
|
media_info = {
|
|
|
|
"media_type": media_type,
|
|
|
|
"media_length": length,
|
|
|
|
"upload_name": upload_name,
|
|
|
|
"created_ts": time_now_ms,
|
|
|
|
"filesystem_id": file_id,
|
|
|
|
}
|
|
|
|
|
2017-10-13 06:23:53 -04:00
|
|
|
yield self._generate_thumbnails(
|
2018-01-16 11:24:15 -05:00
|
|
|
server_name, media_id, file_id, media_type,
|
2016-04-19 06:31:43 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
defer.returnValue(media_info)
|
|
|
|
|
|
|
|
def _get_thumbnail_requirements(self, media_type):
|
|
|
|
return self.thumbnail_requirements.get(media_type, ())
|
|
|
|
|
2017-10-12 10:20:59 -04:00
|
|
|
def _generate_thumbnail(self, thumbnailer, t_width, t_height,
|
2016-04-19 06:31:43 -04:00
|
|
|
t_method, t_type):
|
|
|
|
m_width = thumbnailer.width
|
|
|
|
m_height = thumbnailer.height
|
|
|
|
|
|
|
|
if m_width * m_height >= self.max_image_pixels:
|
|
|
|
logger.info(
|
|
|
|
"Image too large to thumbnail %r x %r > %r",
|
|
|
|
m_width, m_height, self.max_image_pixels
|
|
|
|
)
|
|
|
|
return
|
|
|
|
|
|
|
|
if t_method == "crop":
|
2017-10-12 10:20:59 -04:00
|
|
|
t_byte_source = thumbnailer.crop(t_width, t_height, t_type)
|
2016-04-19 06:31:43 -04:00
|
|
|
elif t_method == "scale":
|
2017-02-24 16:42:38 -05:00
|
|
|
t_width, t_height = thumbnailer.aspect(t_width, t_height)
|
|
|
|
t_width = min(m_width, t_width)
|
|
|
|
t_height = min(m_height, t_height)
|
2017-10-12 10:20:59 -04:00
|
|
|
t_byte_source = thumbnailer.scale(t_width, t_height, t_type)
|
2016-04-19 06:31:43 -04:00
|
|
|
else:
|
2017-10-12 10:20:59 -04:00
|
|
|
t_byte_source = None
|
2016-04-19 06:31:43 -04:00
|
|
|
|
2017-10-12 10:20:59 -04:00
|
|
|
return t_byte_source
|
2016-04-19 06:31:43 -04:00
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def generate_local_exact_thumbnail(self, media_id, t_width, t_height,
|
|
|
|
t_method, t_type):
|
|
|
|
input_path = self.filepaths.local_media_filepath(media_id)
|
|
|
|
|
2017-10-12 10:20:59 -04:00
|
|
|
thumbnailer = Thumbnailer(input_path)
|
2017-10-13 06:33:49 -04:00
|
|
|
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
|
2016-04-19 06:31:43 -04:00
|
|
|
self._generate_thumbnail,
|
2017-10-12 10:20:59 -04:00
|
|
|
thumbnailer, t_width, t_height, t_method, t_type
|
2017-10-13 06:33:49 -04:00
|
|
|
))
|
2016-04-19 06:31:43 -04:00
|
|
|
|
2017-10-12 10:20:59 -04:00
|
|
|
if t_byte_source:
|
2017-10-13 10:34:08 -04:00
|
|
|
try:
|
2018-01-08 12:45:11 -05:00
|
|
|
file_info = FileInfo(
|
|
|
|
server_name=None,
|
|
|
|
file_id=media_id,
|
|
|
|
thumbnail=True,
|
|
|
|
thumbnail_width=t_width,
|
|
|
|
thumbnail_height=t_height,
|
|
|
|
thumbnail_method=t_method,
|
|
|
|
thumbnail_type=t_type,
|
|
|
|
)
|
|
|
|
|
|
|
|
output_path = yield self.media_storage.store_file(
|
|
|
|
t_byte_source, file_info,
|
2017-10-12 10:20:59 -04:00
|
|
|
)
|
2017-10-13 10:34:08 -04:00
|
|
|
finally:
|
|
|
|
t_byte_source.close()
|
|
|
|
|
2017-10-12 10:20:59 -04:00
|
|
|
logger.info("Stored thumbnail in file %r", output_path)
|
|
|
|
|
2017-10-12 12:52:30 -04:00
|
|
|
t_len = os.path.getsize(output_path)
|
|
|
|
|
2017-10-13 05:39:59 -04:00
|
|
|
yield self.store.store_local_thumbnail(
|
2017-10-12 12:39:23 -04:00
|
|
|
media_id, t_width, t_height, t_type, t_method, t_len
|
2016-04-19 06:31:43 -04:00
|
|
|
)
|
|
|
|
|
2017-10-12 10:28:24 -04:00
|
|
|
defer.returnValue(output_path)
|
2016-04-19 06:31:43 -04:00
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def generate_remote_exact_thumbnail(self, server_name, file_id, media_id,
|
|
|
|
t_width, t_height, t_method, t_type):
|
|
|
|
input_path = self.filepaths.remote_media_filepath(server_name, file_id)
|
|
|
|
|
2017-10-12 10:20:59 -04:00
|
|
|
thumbnailer = Thumbnailer(input_path)
|
2017-10-13 06:33:49 -04:00
|
|
|
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
|
2016-04-19 06:31:43 -04:00
|
|
|
self._generate_thumbnail,
|
2017-10-12 10:20:59 -04:00
|
|
|
thumbnailer, t_width, t_height, t_method, t_type
|
2017-10-13 06:33:49 -04:00
|
|
|
))
|
2016-04-19 06:31:43 -04:00
|
|
|
|
2017-10-12 10:20:59 -04:00
|
|
|
if t_byte_source:
|
2017-10-13 10:34:08 -04:00
|
|
|
try:
|
2018-01-08 12:45:11 -05:00
|
|
|
file_info = FileInfo(
|
|
|
|
server_name=server_name,
|
|
|
|
file_id=media_id,
|
|
|
|
thumbnail=True,
|
|
|
|
thumbnail_width=t_width,
|
|
|
|
thumbnail_height=t_height,
|
|
|
|
thumbnail_method=t_method,
|
|
|
|
thumbnail_type=t_type,
|
|
|
|
)
|
|
|
|
|
|
|
|
output_path = yield self.media_storage.store_file(
|
|
|
|
t_byte_source, file_info,
|
2017-10-12 10:20:59 -04:00
|
|
|
)
|
2017-10-13 10:34:08 -04:00
|
|
|
finally:
|
|
|
|
t_byte_source.close()
|
|
|
|
|
2017-10-12 10:20:59 -04:00
|
|
|
logger.info("Stored thumbnail in file %r", output_path)
|
|
|
|
|
2017-10-12 12:52:30 -04:00
|
|
|
t_len = os.path.getsize(output_path)
|
|
|
|
|
|
|
|
yield self.store.store_remote_media_thumbnail(
|
2016-04-19 06:31:43 -04:00
|
|
|
server_name, media_id, file_id,
|
2017-10-12 12:39:23 -04:00
|
|
|
t_width, t_height, t_type, t_method, t_len
|
2016-04-19 06:31:43 -04:00
|
|
|
)
|
|
|
|
|
2017-10-12 10:28:24 -04:00
|
|
|
defer.returnValue(output_path)
|
2016-04-19 06:31:43 -04:00
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
2018-01-16 11:17:38 -05:00
|
|
|
def _generate_thumbnails(self, server_name, media_id, file_id, media_type,
|
2018-01-16 11:03:05 -05:00
|
|
|
url_cache=False):
|
2017-10-13 06:23:53 -04:00
|
|
|
"""Generate and store thumbnails for an image.
|
|
|
|
|
|
|
|
Args:
|
2018-01-16 11:03:05 -05:00
|
|
|
server_name (str|None): The server name if remote media, else None if local
|
|
|
|
media_id (str): The media ID of the content. (This is the same as
|
|
|
|
the file_id for local content)
|
|
|
|
file_id (str): Local file ID
|
2018-01-17 05:06:14 -05:00
|
|
|
media_type (str): The content type of the file
|
2018-01-16 11:03:05 -05:00
|
|
|
url_cache (bool): If we are thumbnailing images downloaded for the URL cache,
|
2017-10-13 06:23:53 -04:00
|
|
|
used exclusively by the url previewer
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Deferred[dict]: Dict with "width" and "height" keys of original image
|
|
|
|
"""
|
2016-04-19 06:31:43 -04:00
|
|
|
requirements = self._get_thumbnail_requirements(media_type)
|
|
|
|
if not requirements:
|
|
|
|
return
|
|
|
|
|
2017-10-13 06:23:53 -04:00
|
|
|
if server_name:
|
|
|
|
input_path = self.filepaths.remote_media_filepath(server_name, file_id)
|
|
|
|
elif url_cache:
|
2017-06-23 06:14:11 -04:00
|
|
|
input_path = self.filepaths.url_cache_filepath(media_id)
|
|
|
|
else:
|
|
|
|
input_path = self.filepaths.local_media_filepath(media_id)
|
|
|
|
|
2016-04-19 06:31:43 -04:00
|
|
|
thumbnailer = Thumbnailer(input_path)
|
|
|
|
m_width = thumbnailer.width
|
|
|
|
m_height = thumbnailer.height
|
|
|
|
|
|
|
|
if m_width * m_height >= self.max_image_pixels:
|
|
|
|
logger.info(
|
|
|
|
"Image too large to thumbnail %r x %r > %r",
|
|
|
|
m_width, m_height, self.max_image_pixels
|
|
|
|
)
|
|
|
|
return
|
|
|
|
|
2017-10-13 06:23:53 -04:00
|
|
|
# We deduplicate the thumbnail sizes by ignoring the cropped versions if
|
|
|
|
# they have the same dimensions of a scaled one.
|
|
|
|
thumbnails = {}
|
|
|
|
for r_width, r_height, r_method, r_type in requirements:
|
|
|
|
if r_method == "crop":
|
2017-10-13 08:58:57 -04:00
|
|
|
thumbnails.setdefault((r_width, r_height, r_type), r_method)
|
2017-10-13 06:23:53 -04:00
|
|
|
elif r_method == "scale":
|
2017-10-13 06:33:49 -04:00
|
|
|
t_width, t_height = thumbnailer.aspect(r_width, r_height)
|
2017-10-13 06:23:53 -04:00
|
|
|
t_width = min(m_width, t_width)
|
|
|
|
t_height = min(m_height, t_height)
|
2017-10-13 08:47:38 -04:00
|
|
|
thumbnails[(t_width, t_height, r_type)] = r_method
|
2017-10-13 06:23:53 -04:00
|
|
|
|
|
|
|
# Now we generate the thumbnails for each dimension, store it
|
2017-10-13 08:47:38 -04:00
|
|
|
for (t_width, t_height, t_type), t_method in thumbnails.iteritems():
|
2017-10-13 10:34:08 -04:00
|
|
|
# Generate the thumbnail
|
2017-10-19 05:27:18 -04:00
|
|
|
if t_method == "crop":
|
2017-10-13 10:34:08 -04:00
|
|
|
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
|
|
|
|
thumbnailer.crop,
|
2017-10-17 06:45:33 -04:00
|
|
|
t_width, t_height, t_type,
|
2017-10-13 10:34:08 -04:00
|
|
|
))
|
2017-10-19 05:27:18 -04:00
|
|
|
elif t_method == "scale":
|
2017-10-13 10:34:08 -04:00
|
|
|
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
|
|
|
|
thumbnailer.scale,
|
2017-10-17 06:45:33 -04:00
|
|
|
t_width, t_height, t_type,
|
2017-10-13 10:34:08 -04:00
|
|
|
))
|
2017-10-19 05:27:18 -04:00
|
|
|
else:
|
|
|
|
logger.error("Unrecognized method: %r", t_method)
|
|
|
|
continue
|
2017-10-13 10:34:08 -04:00
|
|
|
|
|
|
|
if not t_byte_source:
|
|
|
|
continue
|
|
|
|
|
|
|
|
try:
|
2018-01-08 12:45:11 -05:00
|
|
|
file_info = FileInfo(
|
|
|
|
server_name=server_name,
|
2018-01-16 07:02:06 -05:00
|
|
|
file_id=file_id,
|
2018-01-08 12:45:11 -05:00
|
|
|
thumbnail=True,
|
|
|
|
thumbnail_width=t_width,
|
|
|
|
thumbnail_height=t_height,
|
|
|
|
thumbnail_method=t_method,
|
|
|
|
thumbnail_type=t_type,
|
|
|
|
url_cache=url_cache,
|
|
|
|
)
|
|
|
|
|
|
|
|
output_path = yield self.media_storage.store_file(
|
|
|
|
t_byte_source, file_info,
|
2017-10-13 10:34:08 -04:00
|
|
|
)
|
|
|
|
finally:
|
|
|
|
t_byte_source.close()
|
|
|
|
|
2017-10-12 12:52:30 -04:00
|
|
|
t_len = os.path.getsize(output_path)
|
2017-10-12 10:20:59 -04:00
|
|
|
|
2017-10-13 06:33:49 -04:00
|
|
|
# Write to database
|
2017-10-13 06:23:53 -04:00
|
|
|
if server_name:
|
|
|
|
yield self.store.store_remote_media_thumbnail(
|
2017-10-13 06:33:49 -04:00
|
|
|
server_name, media_id, file_id,
|
|
|
|
t_width, t_height, t_type, t_method, t_len
|
|
|
|
)
|
2017-10-13 06:23:53 -04:00
|
|
|
else:
|
|
|
|
yield self.store.store_local_thumbnail(
|
|
|
|
media_id, t_width, t_height, t_type, t_method, t_len
|
|
|
|
)
|
2017-10-12 10:20:59 -04:00
|
|
|
|
2016-04-19 06:31:43 -04:00
|
|
|
defer.returnValue({
|
|
|
|
"width": m_width,
|
|
|
|
"height": m_height,
|
|
|
|
})
|
|
|
|
|
2016-06-29 09:57:59 -04:00
|
|
|
@defer.inlineCallbacks
|
|
|
|
def delete_old_remote_media(self, before_ts):
|
|
|
|
old_media = yield self.store.get_remote_media_before(before_ts)
|
|
|
|
|
|
|
|
deleted = 0
|
|
|
|
|
|
|
|
for media in old_media:
|
|
|
|
origin = media["media_origin"]
|
|
|
|
media_id = media["media_id"]
|
|
|
|
file_id = media["filesystem_id"]
|
|
|
|
key = (origin, media_id)
|
|
|
|
|
|
|
|
logger.info("Deleting: %r", key)
|
|
|
|
|
2017-10-12 12:31:24 -04:00
|
|
|
# TODO: Should we delete from the backup store
|
|
|
|
|
2016-06-29 09:57:59 -04:00
|
|
|
with (yield self.remote_media_linearizer.queue(key)):
|
|
|
|
full_path = self.filepaths.remote_media_filepath(origin, file_id)
|
|
|
|
try:
|
|
|
|
os.remove(full_path)
|
|
|
|
except OSError as e:
|
|
|
|
logger.warn("Failed to remove file: %r", full_path)
|
|
|
|
if e.errno == errno.ENOENT:
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
continue
|
|
|
|
|
|
|
|
thumbnail_dir = self.filepaths.remote_media_thumbnail_dir(
|
|
|
|
origin, file_id
|
|
|
|
)
|
|
|
|
shutil.rmtree(thumbnail_dir, ignore_errors=True)
|
|
|
|
|
|
|
|
yield self.store.delete_remote_media(origin, media_id)
|
|
|
|
deleted += 1
|
|
|
|
|
|
|
|
defer.returnValue({"deleted": deleted})
|
|
|
|
|
2016-04-19 06:31:43 -04:00
|
|
|
|
2014-12-02 14:51:47 -05:00
|
|
|
class MediaRepositoryResource(Resource):
|
2014-12-11 13:21:08 -05:00
|
|
|
"""File uploading and downloading.
|
2014-12-02 10:09:51 -05:00
|
|
|
|
|
|
|
Uploads are POSTed to a resource which returns a token which is used to GET
|
|
|
|
the download::
|
|
|
|
|
|
|
|
=> POST /_matrix/media/v1/upload HTTP/1.1
|
|
|
|
Content-Type: <media-type>
|
2015-02-07 07:56:21 -05:00
|
|
|
Content-Length: <content-length>
|
2014-12-02 10:09:51 -05:00
|
|
|
|
|
|
|
<media>
|
|
|
|
|
|
|
|
<= HTTP/1.1 200 OK
|
|
|
|
Content-Type: application/json
|
|
|
|
|
2014-12-15 11:57:53 -05:00
|
|
|
{ "content_uri": "mxc://<server-name>/<media-id>" }
|
2014-12-02 10:09:51 -05:00
|
|
|
|
2014-12-15 08:56:43 -05:00
|
|
|
=> GET /_matrix/media/v1/download/<server-name>/<media-id> HTTP/1.1
|
2014-12-02 10:09:51 -05:00
|
|
|
|
|
|
|
<= HTTP/1.1 200 OK
|
|
|
|
Content-Type: <media-type>
|
|
|
|
Content-Disposition: attachment;filename=<upload-filename>
|
|
|
|
|
|
|
|
<media>
|
|
|
|
|
2014-12-11 05:41:43 -05:00
|
|
|
Clients can get thumbnails by supplying a desired width and height and
|
|
|
|
thumbnailing method::
|
2014-12-02 10:09:51 -05:00
|
|
|
|
2014-12-11 13:18:58 -05:00
|
|
|
=> GET /_matrix/media/v1/thumbnail/<server_name>
|
|
|
|
/<media-id>?width=<w>&height=<h>&method=<m> HTTP/1.1
|
2014-12-02 10:09:51 -05:00
|
|
|
|
|
|
|
<= HTTP/1.1 200 OK
|
|
|
|
Content-Type: image/jpeg or image/png
|
|
|
|
|
|
|
|
<thumbnail>
|
2014-12-11 05:41:43 -05:00
|
|
|
|
|
|
|
The thumbnail methods are "crop" and "scale". "scale" trys to return an
|
|
|
|
image where either the width or the height is smaller than the requested
|
|
|
|
size. The client should then scale and letterbox the image if it needs to
|
|
|
|
fit within a given rectangle. "crop" trys to return an image where the
|
|
|
|
width and height are close to the requested size and the aspect matches
|
|
|
|
the requested size. The client should scale the image if it needs to fit
|
|
|
|
within a given rectangle.
|
2014-12-02 10:09:51 -05:00
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, hs):
|
2014-12-02 14:51:47 -05:00
|
|
|
Resource.__init__(self)
|
2016-04-19 06:24:59 -04:00
|
|
|
|
2016-06-29 09:57:59 -04:00
|
|
|
media_repo = hs.get_media_repository()
|
2016-04-19 06:24:59 -04:00
|
|
|
|
|
|
|
self.putChild("upload", UploadResource(hs, media_repo))
|
|
|
|
self.putChild("download", DownloadResource(hs, media_repo))
|
2018-01-09 06:08:46 -05:00
|
|
|
self.putChild("thumbnail", ThumbnailResource(
|
|
|
|
hs, media_repo, media_repo.media_storage,
|
|
|
|
))
|
2015-02-02 11:02:31 -05:00
|
|
|
self.putChild("identicon", IdenticonResource())
|
2016-04-08 13:37:15 -04:00
|
|
|
if hs.config.url_preview_enabled:
|
2018-01-09 09:36:07 -05:00
|
|
|
self.putChild("preview_url", PreviewUrlResource(
|
|
|
|
hs, media_repo, media_repo.media_storage,
|
|
|
|
))
|