2018-01-08 12:07:30 -05:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2018 New Vecotr Ltd
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2018-07-09 02:09:20 -04:00
|
|
|
import contextlib
|
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import shutil
|
2020-07-27 14:40:11 -04:00
|
|
|
from typing import IO, TYPE_CHECKING, Any, Optional, Sequence
|
2018-04-06 16:57:06 -04:00
|
|
|
|
2018-07-09 02:09:20 -04:00
|
|
|
from twisted.protocols.basic import FileSender
|
2018-01-08 12:07:30 -05:00
|
|
|
|
2019-07-03 10:07:04 -04:00
|
|
|
from synapse.logging.context import defer_to_thread, make_deferred_yieldable
|
2018-01-17 11:56:23 -05:00
|
|
|
from synapse.util.file_consumer import BackgroundFileConsumer
|
2018-01-08 12:07:30 -05:00
|
|
|
|
2020-07-24 09:39:02 -04:00
|
|
|
from ._base import FileInfo, Responder
|
2020-07-27 14:40:11 -04:00
|
|
|
from .filepath import MediaFilePaths
|
|
|
|
|
|
|
|
if TYPE_CHECKING:
|
|
|
|
from synapse.server import HomeServer
|
|
|
|
|
2020-08-04 09:44:25 -04:00
|
|
|
from .storage_provider import StorageProviderWrapper
|
2018-01-17 11:56:23 -05:00
|
|
|
|
2018-01-08 12:07:30 -05:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2020-09-04 06:54:56 -04:00
|
|
|
class MediaStorage:
|
2018-01-08 12:07:30 -05:00
|
|
|
"""Responsible for storing/fetching files from local sources.
|
2018-01-12 06:15:31 -05:00
|
|
|
|
|
|
|
Args:
|
2020-07-27 14:40:11 -04:00
|
|
|
hs
|
|
|
|
local_media_directory: Base path where we store media on disk
|
|
|
|
filepaths
|
|
|
|
storage_providers: List of StorageProvider that are used to fetch and store files.
|
2018-01-08 12:07:30 -05:00
|
|
|
"""
|
|
|
|
|
2020-07-27 14:40:11 -04:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
hs: "HomeServer",
|
|
|
|
local_media_directory: str,
|
|
|
|
filepaths: MediaFilePaths,
|
2020-08-04 09:44:25 -04:00
|
|
|
storage_providers: Sequence["StorageProviderWrapper"],
|
2020-07-27 14:40:11 -04:00
|
|
|
):
|
2018-06-22 04:37:10 -04:00
|
|
|
self.hs = hs
|
2020-10-30 06:55:24 -04:00
|
|
|
self.reactor = hs.get_reactor()
|
2018-01-08 12:07:30 -05:00
|
|
|
self.local_media_directory = local_media_directory
|
|
|
|
self.filepaths = filepaths
|
2018-01-08 12:19:55 -05:00
|
|
|
self.storage_providers = storage_providers
|
2018-01-08 12:07:30 -05:00
|
|
|
|
2020-07-27 14:40:11 -04:00
|
|
|
async def store_file(self, source: IO, file_info: FileInfo) -> str:
|
2018-01-08 12:07:30 -05:00
|
|
|
"""Write `source` to the on disk media store, and also any other
|
|
|
|
configured storage providers
|
|
|
|
|
|
|
|
Args:
|
|
|
|
source: A file like object that should be written
|
2020-07-24 09:39:02 -04:00
|
|
|
file_info: Info about the file to store
|
2018-01-08 12:07:30 -05:00
|
|
|
|
|
|
|
Returns:
|
2020-07-24 09:39:02 -04:00
|
|
|
the file path written to in the primary media store
|
2018-01-08 12:07:30 -05:00
|
|
|
"""
|
|
|
|
|
2018-02-14 12:55:18 -05:00
|
|
|
with self.store_into_file(file_info) as (f, fname, finish_cb):
|
|
|
|
# Write to the main repository
|
2020-10-30 06:55:24 -04:00
|
|
|
await self.write_to_file(source, f)
|
2020-07-24 09:39:02 -04:00
|
|
|
await finish_cb()
|
2018-02-07 08:35:08 -05:00
|
|
|
|
2019-07-23 09:00:55 -04:00
|
|
|
return fname
|
2018-01-08 12:07:30 -05:00
|
|
|
|
2020-10-30 06:55:24 -04:00
|
|
|
async def write_to_file(self, source: IO, output: IO):
|
|
|
|
"""Asynchronously write the `source` to `output`.
|
|
|
|
"""
|
|
|
|
await defer_to_thread(self.reactor, _write_file_synchronously, source, output)
|
|
|
|
|
2018-01-08 12:07:30 -05:00
|
|
|
@contextlib.contextmanager
|
2020-07-27 14:40:11 -04:00
|
|
|
def store_into_file(self, file_info: FileInfo):
|
2018-01-08 12:07:30 -05:00
|
|
|
"""Context manager used to get a file like object to write into, as
|
|
|
|
described by file_info.
|
|
|
|
|
2018-01-12 06:15:31 -05:00
|
|
|
Actually yields a 3-tuple (file, fname, finish_cb), where file is a file
|
|
|
|
like object that can be written to, fname is the absolute path of file
|
2020-07-24 09:39:02 -04:00
|
|
|
on disk, and finish_cb is a function that returns an awaitable.
|
2018-01-12 06:15:31 -05:00
|
|
|
|
|
|
|
fname can be used to read the contents from after upload, e.g. to
|
|
|
|
generate thumbnails.
|
|
|
|
|
|
|
|
finish_cb must be called and waited on after the file has been
|
|
|
|
successfully been written to. Should not be called if there was an
|
|
|
|
error.
|
2018-01-08 12:07:30 -05:00
|
|
|
|
|
|
|
Args:
|
2020-07-27 14:40:11 -04:00
|
|
|
file_info: Info about the file to store
|
2018-01-08 12:07:30 -05:00
|
|
|
|
|
|
|
Example:
|
|
|
|
|
|
|
|
with media_storage.store_into_file(info) as (f, fname, finish_cb):
|
|
|
|
# .. write into f ...
|
2020-07-24 09:39:02 -04:00
|
|
|
await finish_cb()
|
2018-01-08 12:07:30 -05:00
|
|
|
"""
|
|
|
|
|
|
|
|
path = self._file_info_to_path(file_info)
|
|
|
|
fname = os.path.join(self.local_media_directory, path)
|
|
|
|
|
|
|
|
dirname = os.path.dirname(fname)
|
|
|
|
if not os.path.exists(dirname):
|
|
|
|
os.makedirs(dirname)
|
|
|
|
|
|
|
|
finished_called = [False]
|
|
|
|
|
|
|
|
try:
|
|
|
|
with open(fname, "wb") as f:
|
2020-10-30 06:55:24 -04:00
|
|
|
|
|
|
|
async def finish():
|
|
|
|
# Ensure that all writes have been flushed and close the
|
|
|
|
# file.
|
|
|
|
f.flush()
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
for provider in self.storage_providers:
|
|
|
|
await provider.store_file(path, file_info)
|
|
|
|
|
|
|
|
finished_called[0] = True
|
|
|
|
|
2018-01-08 12:07:30 -05:00
|
|
|
yield f, fname, finish
|
2018-01-12 09:44:02 -05:00
|
|
|
except Exception:
|
2018-01-08 12:07:30 -05:00
|
|
|
try:
|
|
|
|
os.remove(fname)
|
|
|
|
except Exception:
|
|
|
|
pass
|
2020-06-16 08:51:47 -04:00
|
|
|
raise
|
2018-01-08 12:07:30 -05:00
|
|
|
|
|
|
|
if not finished_called:
|
2018-01-12 06:15:31 -05:00
|
|
|
raise Exception("Finished callback not called")
|
2018-01-08 12:07:30 -05:00
|
|
|
|
2020-07-24 09:39:02 -04:00
|
|
|
async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
|
2018-01-08 12:07:30 -05:00
|
|
|
"""Attempts to fetch media described by file_info from the local cache
|
|
|
|
and configured storage providers.
|
|
|
|
|
|
|
|
Args:
|
2020-07-24 09:39:02 -04:00
|
|
|
file_info
|
2018-01-08 12:07:30 -05:00
|
|
|
|
|
|
|
Returns:
|
2020-07-24 09:39:02 -04:00
|
|
|
Returns a Responder if the file was found, otherwise None.
|
2018-01-08 12:07:30 -05:00
|
|
|
"""
|
2020-10-02 07:29:29 -04:00
|
|
|
paths = [self._file_info_to_path(file_info)]
|
2018-01-08 12:07:30 -05:00
|
|
|
|
2020-10-02 07:29:29 -04:00
|
|
|
# fallback for remote thumbnails with no method in the filename
|
2020-09-08 12:19:50 -04:00
|
|
|
if file_info.thumbnail and file_info.server_name:
|
2020-10-02 07:29:29 -04:00
|
|
|
paths.append(
|
|
|
|
self.filepaths.remote_media_thumbnail_rel_legacy(
|
|
|
|
server_name=file_info.server_name,
|
|
|
|
file_id=file_info.file_id,
|
|
|
|
width=file_info.thumbnail_width,
|
|
|
|
height=file_info.thumbnail_height,
|
|
|
|
content_type=file_info.thumbnail_type,
|
|
|
|
)
|
2020-09-08 12:19:50 -04:00
|
|
|
)
|
2020-10-02 07:29:29 -04:00
|
|
|
|
|
|
|
for path in paths:
|
|
|
|
local_path = os.path.join(self.local_media_directory, path)
|
|
|
|
if os.path.exists(local_path):
|
|
|
|
logger.debug("responding with local file %s", local_path)
|
|
|
|
return FileResponder(open(local_path, "rb"))
|
|
|
|
logger.debug("local file %s did not exist", local_path)
|
2020-09-08 12:19:50 -04:00
|
|
|
|
2018-01-08 12:19:55 -05:00
|
|
|
for provider in self.storage_providers:
|
2020-10-02 07:29:29 -04:00
|
|
|
for path in paths:
|
|
|
|
res = await provider.fetch(path, file_info) # type: Any
|
|
|
|
if res:
|
|
|
|
logger.debug("Streaming %s from %s", path, provider)
|
|
|
|
return res
|
|
|
|
logger.debug("%s not found on %s", path, provider)
|
2018-01-08 12:19:55 -05:00
|
|
|
|
2019-07-23 09:00:55 -04:00
|
|
|
return None
|
2018-01-08 12:07:30 -05:00
|
|
|
|
2020-07-24 09:39:02 -04:00
|
|
|
async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str:
|
2018-01-16 05:52:32 -05:00
|
|
|
"""Ensures that the given file is in the local cache. Attempts to
|
|
|
|
download it from storage providers if it isn't.
|
|
|
|
|
|
|
|
Args:
|
2020-07-24 09:39:02 -04:00
|
|
|
file_info
|
2018-01-16 05:52:32 -05:00
|
|
|
|
|
|
|
Returns:
|
2020-07-24 09:39:02 -04:00
|
|
|
Full path to local file
|
2018-01-16 05:52:32 -05:00
|
|
|
"""
|
|
|
|
path = self._file_info_to_path(file_info)
|
|
|
|
local_path = os.path.join(self.local_media_directory, path)
|
|
|
|
if os.path.exists(local_path):
|
2019-07-23 09:00:55 -04:00
|
|
|
return local_path
|
2018-01-16 05:52:32 -05:00
|
|
|
|
2020-09-08 12:19:50 -04:00
|
|
|
# Fallback for paths without method names
|
|
|
|
# Should be removed in the future
|
|
|
|
if file_info.thumbnail and file_info.server_name:
|
|
|
|
legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy(
|
|
|
|
server_name=file_info.server_name,
|
|
|
|
file_id=file_info.file_id,
|
|
|
|
width=file_info.thumbnail_width,
|
|
|
|
height=file_info.thumbnail_height,
|
|
|
|
content_type=file_info.thumbnail_type,
|
|
|
|
)
|
|
|
|
legacy_local_path = os.path.join(self.local_media_directory, legacy_path)
|
|
|
|
if os.path.exists(legacy_local_path):
|
|
|
|
return legacy_local_path
|
|
|
|
|
2018-01-17 11:56:23 -05:00
|
|
|
dirname = os.path.dirname(local_path)
|
|
|
|
if not os.path.exists(dirname):
|
|
|
|
os.makedirs(dirname)
|
|
|
|
|
2018-01-16 05:52:32 -05:00
|
|
|
for provider in self.storage_providers:
|
2020-08-04 09:44:25 -04:00
|
|
|
res = await provider.fetch(path, file_info) # type: Any
|
2018-01-16 05:52:32 -05:00
|
|
|
if res:
|
|
|
|
with res:
|
2018-06-22 04:37:10 -04:00
|
|
|
consumer = BackgroundFileConsumer(
|
2020-10-30 06:55:24 -04:00
|
|
|
open(local_path, "wb"), self.reactor
|
2019-06-20 05:32:02 -04:00
|
|
|
)
|
2020-07-24 09:39:02 -04:00
|
|
|
await res.write_to_consumer(consumer)
|
|
|
|
await consumer.wait()
|
2019-07-23 09:00:55 -04:00
|
|
|
return local_path
|
2018-01-16 05:52:32 -05:00
|
|
|
|
|
|
|
raise Exception("file could not be found")
|
|
|
|
|
2020-07-27 14:40:11 -04:00
|
|
|
def _file_info_to_path(self, file_info: FileInfo) -> str:
|
2018-01-08 12:07:30 -05:00
|
|
|
"""Converts file_info into a relative path.
|
2018-01-12 06:15:31 -05:00
|
|
|
|
|
|
|
The path is suitable for storing files under a directory, e.g. used to
|
|
|
|
store files on local FS under the base media repository directory.
|
2018-01-08 12:07:30 -05:00
|
|
|
"""
|
|
|
|
if file_info.url_cache:
|
2018-01-19 13:29:39 -05:00
|
|
|
if file_info.thumbnail:
|
|
|
|
return self.filepaths.url_cache_thumbnail_rel(
|
|
|
|
media_id=file_info.file_id,
|
|
|
|
width=file_info.thumbnail_width,
|
|
|
|
height=file_info.thumbnail_height,
|
|
|
|
content_type=file_info.thumbnail_type,
|
|
|
|
method=file_info.thumbnail_method,
|
|
|
|
)
|
2018-01-08 12:07:30 -05:00
|
|
|
return self.filepaths.url_cache_filepath_rel(file_info.file_id)
|
|
|
|
|
|
|
|
if file_info.server_name:
|
|
|
|
if file_info.thumbnail:
|
|
|
|
return self.filepaths.remote_media_thumbnail_rel(
|
|
|
|
server_name=file_info.server_name,
|
|
|
|
file_id=file_info.file_id,
|
|
|
|
width=file_info.thumbnail_width,
|
|
|
|
height=file_info.thumbnail_height,
|
|
|
|
content_type=file_info.thumbnail_type,
|
2019-06-20 05:32:02 -04:00
|
|
|
method=file_info.thumbnail_method,
|
2018-01-08 12:07:30 -05:00
|
|
|
)
|
|
|
|
return self.filepaths.remote_media_filepath_rel(
|
2019-06-20 05:32:02 -04:00
|
|
|
file_info.server_name, file_info.file_id
|
2018-01-08 12:07:30 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
if file_info.thumbnail:
|
|
|
|
return self.filepaths.local_media_thumbnail_rel(
|
|
|
|
media_id=file_info.file_id,
|
|
|
|
width=file_info.thumbnail_width,
|
|
|
|
height=file_info.thumbnail_height,
|
|
|
|
content_type=file_info.thumbnail_type,
|
2019-06-20 05:32:02 -04:00
|
|
|
method=file_info.thumbnail_method,
|
2018-01-08 12:07:30 -05:00
|
|
|
)
|
2019-06-20 05:32:02 -04:00
|
|
|
return self.filepaths.local_media_filepath_rel(file_info.file_id)
|
2018-01-08 12:07:30 -05:00
|
|
|
|
|
|
|
|
2018-02-14 12:55:18 -05:00
|
|
|
def _write_file_synchronously(source, dest):
|
|
|
|
"""Write `source` to the file like `dest` synchronously. Should be called
|
2018-01-08 12:07:30 -05:00
|
|
|
from a thread.
|
|
|
|
|
|
|
|
Args:
|
2018-02-14 12:55:18 -05:00
|
|
|
source: A file like object that's to be written
|
|
|
|
dest: A file like object to be written to
|
2018-01-08 12:07:30 -05:00
|
|
|
"""
|
|
|
|
source.seek(0) # Ensure we read from the start of the file
|
2018-02-14 12:55:18 -05:00
|
|
|
shutil.copyfileobj(source, dest)
|
2018-01-08 12:07:30 -05:00
|
|
|
|
|
|
|
|
|
|
|
class FileResponder(Responder):
|
|
|
|
"""Wraps an open file that can be sent to a request.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
open_file (file): A file like object to be streamed ot the client,
|
|
|
|
is closed when finished streaming.
|
|
|
|
"""
|
2019-06-20 05:32:02 -04:00
|
|
|
|
2018-01-08 12:07:30 -05:00
|
|
|
def __init__(self, open_file):
|
|
|
|
self.open_file = open_file
|
|
|
|
|
|
|
|
def write_to_consumer(self, consumer):
|
2018-05-02 08:29:16 -04:00
|
|
|
return make_deferred_yieldable(
|
|
|
|
FileSender().beginFileTransfer(self.open_file, consumer)
|
|
|
|
)
|
2018-01-08 12:07:30 -05:00
|
|
|
|
2018-01-12 08:32:03 -05:00
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
2018-01-08 12:07:30 -05:00
|
|
|
self.open_file.close()
|