Basic implementation of backup media store

This commit is contained in:
Erik Johnston 2017-10-12 15:20:59 +01:00
parent f807f7f804
commit bf4fb1fb40
4 changed files with 130 additions and 125 deletions

View File

@ -70,7 +70,17 @@ class ContentRepositoryConfig(Config):
self.max_upload_size = self.parse_size(config["max_upload_size"]) self.max_upload_size = self.parse_size(config["max_upload_size"])
self.max_image_pixels = self.parse_size(config["max_image_pixels"]) self.max_image_pixels = self.parse_size(config["max_image_pixels"])
self.max_spider_size = self.parse_size(config["max_spider_size"]) self.max_spider_size = self.parse_size(config["max_spider_size"])
self.media_store_path = self.ensure_directory(config["media_store_path"]) self.media_store_path = self.ensure_directory(config["media_store_path"])
self.backup_media_store_path = config.get("backup_media_store_path")
if self.backup_media_store_path:
self.ensure_directory(self.backup_media_store_path)
self.synchronous_backup_media_store = config.get(
"synchronous_backup_media_store", False
)
self.uploads_path = self.ensure_directory(config["uploads_path"]) self.uploads_path = self.ensure_directory(config["uploads_path"])
self.dynamic_thumbnails = config["dynamic_thumbnails"] self.dynamic_thumbnails = config["dynamic_thumbnails"]
self.thumbnail_requirements = parse_thumbnail_requirements( self.thumbnail_requirements = parse_thumbnail_requirements(
@ -115,6 +125,14 @@ class ContentRepositoryConfig(Config):
# Directory where uploaded images and attachments are stored. # Directory where uploaded images and attachments are stored.
media_store_path: "%(media_store)s" media_store_path: "%(media_store)s"
# A secondary directory where uploaded images and attachments are
# stored as a backup.
# backup_media_store_path: "%(media_store)s"
# Whether to wait for successful write to backup media store before
# returning successfully.
# synchronous_backup_media_store: false
# Directory where in-progress uploads are stored. # Directory where in-progress uploads are stored.
uploads_path: "%(uploads_path)s" uploads_path: "%(uploads_path)s"

View File

@ -33,7 +33,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \
from synapse.util.async import Linearizer from synapse.util.async import Linearizer
from synapse.util.stringutils import is_ascii from synapse.util.stringutils import is_ascii
from synapse.util.logcontext import preserve_context_over_fn from synapse.util.logcontext import preserve_context_over_fn, preserve_fn
from synapse.util.retryutils import NotRetryingDestination from synapse.util.retryutils import NotRetryingDestination
import os import os
@ -59,7 +59,12 @@ class MediaRepository(object):
self.store = hs.get_datastore() self.store = hs.get_datastore()
self.max_upload_size = hs.config.max_upload_size self.max_upload_size = hs.config.max_upload_size
self.max_image_pixels = hs.config.max_image_pixels self.max_image_pixels = hs.config.max_image_pixels
self.filepaths = MediaFilePaths(hs.config.media_store_path) self.filepaths = MediaFilePaths(hs.config.media_store_path)
self.backup_filepaths = None
if hs.config.backup_media_store_path:
self.backup_filepaths = MediaFilePaths(hs.config.backup_media_store_path)
self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.dynamic_thumbnails = hs.config.dynamic_thumbnails
self.thumbnail_requirements = hs.config.thumbnail_requirements self.thumbnail_requirements = hs.config.thumbnail_requirements
@ -87,18 +92,43 @@ class MediaRepository(object):
if not os.path.exists(dirname): if not os.path.exists(dirname):
os.makedirs(dirname) os.makedirs(dirname)
@defer.inlineCallbacks
def _write_to_file(self, source, file_name_func):
def write_file_thread(file_name):
source.seek(0) # Ensure we read from the start of the file
with open(file_name, "wb") as f:
shutil.copyfileobj(source, f)
fname = file_name_func(self.filepaths)
self._makedirs(fname)
# Write to the main repository
yield preserve_context_over_fn(threads.deferToThread, write_file_thread, fname)
# Write to backup repository
if self.backup_filepaths:
backup_fname = file_name_func(backup_filepaths)
self._makedirs(backup_fname)
# We can either wait for successful writing to the backup repository
# or write in the background and immediately return
if hs.config.synchronous_backup_media_store:
yield preserve_context_over_fn(
threads.deferToThread, write_file_thread, backup_fname,
)
else:
preserve_fn(threads.deferToThread)(write_file, backup_fname)
defer.returnValue(fname)
@defer.inlineCallbacks @defer.inlineCallbacks
def create_content(self, media_type, upload_name, content, content_length, def create_content(self, media_type, upload_name, content, content_length,
auth_user): auth_user):
media_id = random_string(24) media_id = random_string(24)
fname = self.filepaths.local_media_filepath(media_id) fname = yield self._write_to_file(
self._makedirs(fname) content, lambda f: f.local_media_filepath(media_id)
)
# This shouldn't block for very long because the content will have
# already been uploaded at this point.
with open(fname, "wb") as f:
f.write(content)
logger.info("Stored local media in file %r", fname) logger.info("Stored local media in file %r", fname)
@ -253,9 +283,8 @@ class MediaRepository(object):
def _get_thumbnail_requirements(self, media_type): def _get_thumbnail_requirements(self, media_type):
return self.thumbnail_requirements.get(media_type, ()) return self.thumbnail_requirements.get(media_type, ())
def _generate_thumbnail(self, input_path, t_path, t_width, t_height, def _generate_thumbnail(self, thumbnailer, t_width, t_height,
t_method, t_type): t_method, t_type):
thumbnailer = Thumbnailer(input_path)
m_width = thumbnailer.width m_width = thumbnailer.width
m_height = thumbnailer.height m_height = thumbnailer.height
@ -267,36 +296,40 @@ class MediaRepository(object):
return return
if t_method == "crop": if t_method == "crop":
t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) t_byte_source = thumbnailer.crop(t_width, t_height, t_type)
elif t_method == "scale": elif t_method == "scale":
t_width, t_height = thumbnailer.aspect(t_width, t_height) t_width, t_height = thumbnailer.aspect(t_width, t_height)
t_width = min(m_width, t_width) t_width = min(m_width, t_width)
t_height = min(m_height, t_height) t_height = min(m_height, t_height)
t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) t_byte_source = thumbnailer.scale(t_width, t_height, t_type)
else: else:
t_len = None t_byte_source = None
return t_len return t_byte_source
@defer.inlineCallbacks @defer.inlineCallbacks
def generate_local_exact_thumbnail(self, media_id, t_width, t_height, def generate_local_exact_thumbnail(self, media_id, t_width, t_height,
t_method, t_type): t_method, t_type):
input_path = self.filepaths.local_media_filepath(media_id) input_path = self.filepaths.local_media_filepath(media_id)
t_path = self.filepaths.local_media_thumbnail( thumbnailer = Thumbnailer(input_path)
media_id, t_width, t_height, t_type, t_method t_byte_source = yield preserve_context_over_fn(
)
self._makedirs(t_path)
t_len = yield preserve_context_over_fn(
threads.deferToThread, threads.deferToThread,
self._generate_thumbnail, self._generate_thumbnail,
input_path, t_path, t_width, t_height, t_method, t_type thumbnailer, t_width, t_height, t_method, t_type
) )
if t_len: if t_byte_source:
output_path = yield self._write_to_file(
content,
lambda f: f.local_media_thumbnail(
media_id, t_width, t_height, t_type, t_method
)
)
logger.info("Stored thumbnail in file %r", output_path)
yield self.store.store_local_thumbnail( yield self.store.store_local_thumbnail(
media_id, t_width, t_height, t_type, t_method, t_len media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue())
) )
defer.returnValue(t_path) defer.returnValue(t_path)
@ -306,21 +339,25 @@ class MediaRepository(object):
t_width, t_height, t_method, t_type): t_width, t_height, t_method, t_type):
input_path = self.filepaths.remote_media_filepath(server_name, file_id) input_path = self.filepaths.remote_media_filepath(server_name, file_id)
t_path = self.filepaths.remote_media_thumbnail( thumbnailer = Thumbnailer(input_path)
server_name, file_id, t_width, t_height, t_type, t_method t_byte_source = yield preserve_context_over_fn(
)
self._makedirs(t_path)
t_len = yield preserve_context_over_fn(
threads.deferToThread, threads.deferToThread,
self._generate_thumbnail, self._generate_thumbnail,
input_path, t_path, t_width, t_height, t_method, t_type thumbnailer, t_width, t_height, t_method, t_type
) )
if t_len: if t_byte_source:
output_path = yield self._write_to_file(
content,
lambda f: f.remote_media_thumbnail(
server_name, file_id, t_width, t_height, t_type, t_method
)
)
logger.info("Stored thumbnail in file %r", output_path)
yield self.store.store_remote_media_thumbnail( yield self.store.store_remote_media_thumbnail(
server_name, media_id, file_id, server_name, media_id, file_id,
t_width, t_height, t_type, t_method, t_len t_width, t_height, t_type, t_method, len(t_byte_source.getvalue())
) )
defer.returnValue(t_path) defer.returnValue(t_path)
@ -351,59 +388,32 @@ class MediaRepository(object):
local_thumbnails = [] local_thumbnails = []
def generate_thumbnails(): def generate_thumbnails():
scales = set()
crops = set()
for r_width, r_height, r_method, r_type in requirements: for r_width, r_height, r_method, r_type in requirements:
if r_method == "scale": t_byte_source = self._generate_thumbnail(
t_width, t_height = thumbnailer.aspect(r_width, r_height) thumbnailer, r_width, r_height, r_method, r_type,
scales.add(( )
min(m_width, t_width), min(m_height, t_height), r_type,
))
elif r_method == "crop":
crops.add((r_width, r_height, r_type))
for t_width, t_height, t_type in scales:
t_method = "scale"
if url_cache:
t_path = self.filepaths.url_cache_thumbnail(
media_id, t_width, t_height, t_type, t_method
)
else:
t_path = self.filepaths.local_media_thumbnail(
media_id, t_width, t_height, t_type, t_method
)
self._makedirs(t_path)
t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
local_thumbnails.append(( local_thumbnails.append((
media_id, t_width, t_height, t_type, t_method, t_len r_width, r_height, r_method, r_type, t_byte_source
))
for t_width, t_height, t_type in crops:
if (t_width, t_height, t_type) in scales:
# If the aspect ratio of the cropped thumbnail matches a purely
# scaled one then there is no point in calculating a separate
# thumbnail.
continue
t_method = "crop"
if url_cache:
t_path = self.filepaths.url_cache_thumbnail(
media_id, t_width, t_height, t_type, t_method
)
else:
t_path = self.filepaths.local_media_thumbnail(
media_id, t_width, t_height, t_type, t_method
)
self._makedirs(t_path)
t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
local_thumbnails.append((
media_id, t_width, t_height, t_type, t_method, t_len
)) ))
yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails)
for l in local_thumbnails: for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails:
yield self.store.store_local_thumbnail(*l) if url_cache:
path_name_func = lambda f: f.url_cache_thumbnail(
media_id, t_width, t_height, t_type, t_method
)
else:
path_name_func = lambda f: f.local_media_thumbnail(
media_id, t_width, t_height, t_type, t_method
)
yield self._write_to_file(t_byte_source, path_name_func)
yield self.store.store_local_thumbnail(
media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue())
)
defer.returnValue({ defer.returnValue({
"width": m_width, "width": m_width,
@ -433,51 +443,32 @@ class MediaRepository(object):
) )
return return
scales = set()
crops = set()
for r_width, r_height, r_method, r_type in requirements: for r_width, r_height, r_method, r_type in requirements:
if r_method == "scale": t_byte_source = self._generate_thumbnail(
t_width, t_height = thumbnailer.aspect(r_width, r_height) thumbnailer, r_width, r_height, r_method, r_type,
scales.add((
min(m_width, t_width), min(m_height, t_height), r_type,
))
elif r_method == "crop":
crops.add((r_width, r_height, r_type))
for t_width, t_height, t_type in scales:
t_method = "scale"
t_path = self.filepaths.remote_media_thumbnail(
server_name, file_id, t_width, t_height, t_type, t_method
) )
self._makedirs(t_path)
t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
remote_thumbnails.append([
server_name, media_id, file_id,
t_width, t_height, t_type, t_method, t_len
])
for t_width, t_height, t_type in crops: remote_thumbnails.append((
if (t_width, t_height, t_type) in scales: r_width, r_height, r_method, r_type, t_byte_source
# If the aspect ratio of the cropped thumbnail matches a purely ))
# scaled one then there is no point in calculating a separate
# thumbnail.
continue
t_method = "crop"
t_path = self.filepaths.remote_media_thumbnail(
server_name, file_id, t_width, t_height, t_type, t_method
)
self._makedirs(t_path)
t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
remote_thumbnails.append([
server_name, media_id, file_id,
t_width, t_height, t_type, t_method, t_len
])
yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails)
for r in remote_thumbnails: for r in remote_thumbnails:
yield self.store.store_remote_media_thumbnail(*r) yield self.store.store_remote_media_thumbnail(*r)
for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails:
path_name_func = lambda f: f.remote_media_thumbnail(
server_name, media_id, file_id, t_width, t_height, t_type, t_method
)
yield self._write_to_file(t_byte_source, path_name_func)
yield self.store.store_remote_media_thumbnail(
server_name, media_id, file_id,
t_width, t_height, t_type, t_method, len(t_byte_source.getvalue())
)
defer.returnValue({ defer.returnValue({
"width": m_width, "width": m_width,
"height": m_height, "height": m_height,

View File

@ -50,12 +50,12 @@ class Thumbnailer(object):
else: else:
return ((max_height * self.width) // self.height, max_height) return ((max_height * self.width) // self.height, max_height)
def scale(self, output_path, width, height, output_type): def scale(self, width, height, output_type):
"""Rescales the image to the given dimensions""" """Rescales the image to the given dimensions"""
scaled = self.image.resize((width, height), Image.ANTIALIAS) scaled = self.image.resize((width, height), Image.ANTIALIAS)
return self.save_image(scaled, output_type, output_path) return self._encode_image(scaled, output_type)
def crop(self, output_path, width, height, output_type): def crop(self, width, height, output_type):
"""Rescales and crops the image to the given dimensions preserving """Rescales and crops the image to the given dimensions preserving
aspect:: aspect::
(w_in / h_in) = (w_scaled / h_scaled) (w_in / h_in) = (w_scaled / h_scaled)
@ -82,13 +82,9 @@ class Thumbnailer(object):
crop_left = (scaled_width - width) // 2 crop_left = (scaled_width - width) // 2
crop_right = width + crop_left crop_right = width + crop_left
cropped = scaled_image.crop((crop_left, 0, crop_right, height)) cropped = scaled_image.crop((crop_left, 0, crop_right, height))
return self.save_image(cropped, output_type, output_path) return self._encode_image(cropped, output_type)
def save_image(self, output_image, output_type, output_path): def _encode_image(self, output_image, output_type):
output_bytes_io = BytesIO() output_bytes_io = BytesIO()
output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80)
output_bytes = output_bytes_io.getvalue() return output_bytes_io
with open(output_path, "wb") as output_file:
output_file.write(output_bytes)
logger.info("Stored thumbnail in file %r", output_path)
return len(output_bytes)

View File

@ -93,7 +93,7 @@ class UploadResource(Resource):
# TODO(markjh): parse content-dispostion # TODO(markjh): parse content-dispostion
content_uri = yield self.media_repo.create_content( content_uri = yield self.media_repo.create_content(
media_type, upload_name, request.content.read(), media_type, upload_name, request.content,
content_length, requester.user content_length, requester.user
) )