From a4c5e4a6451dfc7b378c10b916635de6bdafa80a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 11:06:42 +0000 Subject: [PATCH 1/9] Fix thumbnailing remote files --- synapse/rest/media/v1/media_repository.py | 28 +++++++++++++++++++++ synapse/rest/media/v1/thumbnail_resource.py | 4 +-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 45bc53420..2608fab5d 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -227,6 +227,34 @@ class MediaRepository(object): else: respond_404(request) + @defer.inlineCallbacks + def get_remote_media_info(self, server_name, media_id): + """Gets the media info associated with the remote file, downloading + if necessary. + + Args: + server_name (str): Remote server_name where the media originated. + media_id (str): The media ID of the content (as defined by the + remote server). + + Returns: + Deferred[dict]: The media_info of the file + """ + # We linearize here to ensure that we don't try and download remote + # media multiple times concurrently + key = (server_name, media_id) + with (yield self.remote_media_linearizer.queue(key)): + responder, media_info = yield self._get_remote_media_impl( + server_name, media_id, + ) + + # Ensure we actually use the responder so that it releases resources + if responder: + with responder: + pass + + defer.returnValue(media_info) + @defer.inlineCallbacks def _get_remote_media_impl(self, server_name, media_id): """Looks for media in local cache, if not there then attempt to diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 835540c3d..70cea7782 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -165,7 +165,7 @@ class ThumbnailResource(Resource): def _select_or_generate_remote_thumbnail(self, request, server_name, media_id, desired_width, desired_height, desired_method, desired_type): - media_info = yield self.media_repo.get_remote_media(server_name, media_id) + media_info = yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, @@ -216,7 +216,7 @@ class ThumbnailResource(Resource): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead of # downloading the remote file and generating our own thumbnails. - yield self.media_repo.get_remote_media(server_name, media_id) + yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, From c5b589f2e8205ca0253534cf5826b807253bb8ea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 12:01:40 +0000 Subject: [PATCH 2/9] Log when we respond with 404 --- synapse/rest/media/v1/media_repository.py | 1 + synapse/rest/media/v1/thumbnail_resource.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 2608fab5d..b12fabd94 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -285,6 +285,7 @@ class MediaRepository(object): # If we have an entry in the DB, try and look for it if media_info: if media_info["quarantined_by"]: + logger.info("Media is quarentined") raise NotFoundError() responder = yield self.media_storage.fetch_media(file_info) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 70cea7782..b8c38eb31 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -85,6 +85,7 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -111,6 +112,7 @@ class ThumbnailResource(Resource): responder = yield self.media_storage.fetch_media(file_info) yield respond_with_responder(request, responder, t_type, t_length) else: + logger.info("Couldn't find any generated thumbnails") respond_404(request) @defer.inlineCallbacks @@ -120,6 +122,7 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -159,6 +162,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: + logger.warn("Failed to generate local thumbnail") respond_404(request) @defer.inlineCallbacks @@ -197,7 +201,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a local thumbnail of that size. Generating") + logger.debug("We don't have a remote thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_remote_exact_thumbnail( @@ -208,6 +212,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: + logger.warn("Failed to generate remote thumbnail") respond_404(request) @defer.inlineCallbacks @@ -241,6 +246,7 @@ class ThumbnailResource(Resource): responder = yield self.media_storage.fetch_media(file_info) yield respond_with_responder(request, responder, t_type, t_length) else: + logger.info("Failed to find any generated thumbnails") respond_404(request) def _select_thumbnail(self, desired_width, desired_height, desired_method, From 9795b9ebb11953747e6362fe123725326885667b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 12:02:06 +0000 Subject: [PATCH 3/9] Correctly use server_name/file_id when generating/fetching remote thumbnails --- synapse/rest/media/v1/media_repository.py | 7 +++++-- synapse/rest/media/v1/thumbnail_resource.py | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index b12fabd94..e82dcfdc2 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -539,7 +539,10 @@ class MediaRepository(object): Deferred[dict]: Dict with "width" and "height" keys of original image """ media_type = media_info["media_type"] - file_id = media_info.get("filesystem_id") + if server_name: + file_id = media_info["filesystem_id"] + else: + file_id = media_id requirements = self._get_thumbnail_requirements(media_type) if not requirements: return @@ -597,7 +600,7 @@ class MediaRepository(object): try: file_info = FileInfo( server_name=server_name, - file_id=media_id, + file_id=file_id, thumbnail=True, thumbnail_width=t_width, thumbnail_height=t_height, diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index b8c38eb31..e20d6f10b 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -185,7 +185,7 @@ class ThumbnailResource(Resource): if t_w and t_h and t_method and t_type: file_info = FileInfo( - server_name=None, file_id=media_id, + server_name=server_name, file_id=media_info["filesystem_id"], thumbnail=True, thumbnail_width=info["thumbnail_width"], thumbnail_height=info["thumbnail_height"], @@ -221,7 +221,7 @@ class ThumbnailResource(Resource): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead of # downloading the remote file and generating our own thumbnails. - yield self.media_repo.get_remote_media_info(server_name, media_id) + media_info = yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, @@ -232,7 +232,7 @@ class ThumbnailResource(Resource): width, height, method, m_type, thumbnail_infos ) file_info = FileInfo( - server_name=None, file_id=media_id, + server_name=server_name, file_id=media_info["filesystem_id"], thumbnail=True, thumbnail_width=thumbnail_info["thumbnail_width"], thumbnail_height=thumbnail_info["thumbnail_height"], From 307f88dfb6c157ee2bda6f9b8b3dee82cad490aa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 13:53:43 +0000 Subject: [PATCH 4/9] Fix up log lines --- synapse/rest/media/v1/media_repository.py | 2 +- synapse/rest/media/v1/thumbnail_resource.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e82dcfdc2..d191f9d2f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -285,7 +285,7 @@ class MediaRepository(object): # If we have an entry in the DB, try and look for it if media_info: if media_info["quarantined_by"]: - logger.info("Media is quarentined") + logger.info("Media is quarantined") raise NotFoundError() responder = yield self.media_storage.fetch_media(file_info) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index e20d6f10b..1451f6f10 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -85,7 +85,9 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: - logger.info("Media is quarantined") + if media_info: + logger.info("Media is quarantined") + respond_404(request) return @@ -122,7 +124,8 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: - logger.info("Media is quarantined") + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -152,7 +155,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a local thumbnail of that size. Generating") + logger.debug("We don't have a thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_local_exact_thumbnail( @@ -162,7 +165,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - logger.warn("Failed to generate local thumbnail") + logger.warn("Failed to generate thumbnail") respond_404(request) @defer.inlineCallbacks @@ -201,7 +204,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a remote thumbnail of that size. Generating") + logger.debug("We don't have a thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_remote_exact_thumbnail( @@ -212,7 +215,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - logger.warn("Failed to generate remote thumbnail") + logger.warn("Failed to generate thumbnail") respond_404(request) @defer.inlineCallbacks From 5dfc83704b9f338c975a03bad7854218658b3a80 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 14:32:56 +0000 Subject: [PATCH 5/9] Fix typo --- synapse/rest/media/v1/thumbnail_resource.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 1451f6f10..8c9653843 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -84,10 +84,11 @@ class ThumbnailResource(Resource): method, m_type): media_info = yield self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - if media_info: - logger.info("Media is quarantined") - + if not media_info: + respond_404(request) + return + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -123,9 +124,11 @@ class ThumbnailResource(Resource): desired_type): media_info = yield self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - if media_info["quarantined_by"]: - logger.info("Media is quarantined") + if not media_info: + respond_404(request) + return + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return From 0a90d9ede4a39d720afd131866f98b51aa591cf7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:03:05 +0000 Subject: [PATCH 6/9] Move setting of file_id up to caller --- synapse/rest/media/v1/media_repository.py | 21 +++++++++---------- synapse/rest/media/v1/preview_url_resource.py | 6 ++++-- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d191f9d2f..35c1dcbc9 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -151,7 +151,7 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_thumbnails(None, media_id, media_info) + yield self._generate_thumbnails(None, media_id, media_id, media_info) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -413,7 +413,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, media_info + server_name, media_id, file_id, media_info, ) defer.returnValue(media_info) @@ -525,24 +525,23 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=False): + def _generate_thumbnails(self, server_name, media_id, file_id, media_info, + url_cache=False): """Generate and store thumbnails for an image. Args: - server_name(str|None): The server name if remote media, else None if local - media_id(str) - media_info(dict) - url_cache(bool): If we are thumbnailing images downloaded for the URL cache, + server_name (str|None): The server name if remote media, else None if local + media_id (str): The media ID of the content. (This is the same as + the file_id for local content) + file_id (str): Local file ID + media_info (dict) + url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer Returns: Deferred[dict]: Dict with "width" and "height" keys of original image """ media_type = media_info["media_type"] - if server_name: - file_id = media_info["filesystem_id"] - else: - file_id = media_id requirements = self._get_thumbnail_requirements(media_type) if not requirements: return diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index f3dbbb3fe..5ddf581bd 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -185,8 +185,9 @@ class PreviewUrlResource(Resource): logger.debug("got media_info of '%s'" % media_info) if _is_media(media_info['media_type']): + file_id = media_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, media_info['filesystem_id'], media_info, url_cache=True, + None, file_id, file_id, media_info, url_cache=True, ) og = { @@ -231,8 +232,9 @@ class PreviewUrlResource(Resource): if _is_media(image_info['media_type']): # TODO: make sure we don't choke on white-on-transparent images + file_id = image_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, image_info['filesystem_id'], image_info, url_cache=True, + None, file_id, file_id, image_info, url_cache=True, ) if dims: og["og:image:width"] = dims['width'] From 6368e5c0ab39e43ee6950ba94a826c44db7d43f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:17:38 +0000 Subject: [PATCH 7/9] Change _generate_thumbnails to take media_type --- synapse/rest/media/v1/media_repository.py | 11 ++++++----- synapse/rest/media/v1/preview_url_resource.py | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 35c1dcbc9..e08cf8af4 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -151,7 +151,9 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_thumbnails(None, media_id, media_id, media_info) + yield self._generate_thumbnails( + None, media_id, media_id, media_info["media_type"], + ) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -413,7 +415,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, file_id, media_info, + server_name, media_id, file_id, media_info["media_type"], ) defer.returnValue(media_info) @@ -525,7 +527,7 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_thumbnails(self, server_name, media_id, file_id, media_info, + def _generate_thumbnails(self, server_name, media_id, file_id, media_type, url_cache=False): """Generate and store thumbnails for an image. @@ -534,14 +536,13 @@ class MediaRepository(object): media_id (str): The media ID of the content. (This is the same as the file_id for local content) file_id (str): Local file ID - media_info (dict) + media_type (str) url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer Returns: Deferred[dict]: Dict with "width" and "height" keys of original image """ - media_type = media_info["media_type"] requirements = self._get_thumbnail_requirements(media_type) if not requirements: return diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 5ddf581bd..981f01e41 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -187,7 +187,8 @@ class PreviewUrlResource(Resource): if _is_media(media_info['media_type']): file_id = media_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, file_id, file_id, media_info, url_cache=True, + None, file_id, file_id, media_info["media_type"], + url_cache=True, ) og = { @@ -234,7 +235,8 @@ class PreviewUrlResource(Resource): # TODO: make sure we don't choke on white-on-transparent images file_id = image_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, file_id, file_id, image_info, url_cache=True, + None, file_id, file_id, image_info["media_type"], + url_cache=True, ) if dims: og["og:image:width"] = dims['width'] From d863f68cab9665335b1657feeab5d00724dddd95 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:24:15 +0000 Subject: [PATCH 8/9] Use local vars --- synapse/rest/media/v1/media_repository.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e08cf8af4..22f86781f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -146,13 +146,9 @@ class MediaRepository(object): media_length=content_length, user_id=auth_user, ) - media_info = { - "media_type": media_type, - "media_length": content_length, - } yield self._generate_thumbnails( - None, media_id, media_id, media_info["media_type"], + None, media_id, media_id, media_type, ) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -415,7 +411,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, file_id, media_info["media_type"], + server_name, media_id, file_id, media_type, ) defer.returnValue(media_info) From d728c47142019da2896bba7a84ac92e9959fd7af Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 10:06:14 +0000 Subject: [PATCH 9/9] Add docstring --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 22f86781f..97c82c150 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -532,7 +532,7 @@ class MediaRepository(object): media_id (str): The media ID of the content. (This is the same as the file_id for local content) file_id (str): Local file ID - media_type (str) + media_type (str): The content type of the file url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer