From b106080fb4d92a9969acc511a1276063d95b5287 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Feb 2021 16:22:21 +0000 Subject: [PATCH 1/4] Regenerate exact thumbnails if missing --- synapse/rest/media/v1/media_repository.py | 2 +- synapse/rest/media/v1/thumbnail_resource.py | 49 ++++++++++++++++++- .../databases/main/media_repository.py | 18 +++---- 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a0162d425..3375455c4 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -509,7 +509,7 @@ class MediaRepository: t_height: int, t_method: str, t_type: str, - url_cache: str, + url_cache: Optional[str], ) -> Optional[str]: input_path = await self.media_storage.ensure_media_is_in_local_cache( FileInfo(None, media_id, url_cache=url_cache) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index d653a58be..c345ec649 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -114,6 +114,7 @@ class ThumbnailResource(DirectServeJsonResource): m_type, thumbnail_infos, media_id, + media_id, url_cache=media_info["url_cache"], server_name=None, ) @@ -269,6 +270,7 @@ class ThumbnailResource(DirectServeJsonResource): method, m_type, thumbnail_infos, + media_id, media_info["filesystem_id"], url_cache=None, server_name=server_name, @@ -282,6 +284,7 @@ class ThumbnailResource(DirectServeJsonResource): desired_method: str, desired_type: str, thumbnail_infos: List[Dict[str, Any]], + media_id: str, file_id: str, url_cache: Optional[str] = None, server_name: Optional[str] = None, @@ -316,9 +319,53 @@ class ThumbnailResource(DirectServeJsonResource): respond_404(request) return + responder = await self.media_storage.fetch_media(file_info) + if responder: + await respond_with_responder( + request, + responder, + file_info.thumbnail_type, + file_info.thumbnail_length, + ) + return + + # If we can't find the thumbnail we regenerate it. This can happen + # if e.g. we've deleted the thumbnails but still have the original + # image somewhere. + # + # Since we have an entry for the thumbnail in the DB we a) know we + # have have successfully generated the thumbnail in the past (so we + # don't need to worry about repeatedly failing to generate + # thumbnails), and b) have already calculated that appropriate + # width/height/method so we can just call the "generate exact" + # methods. + + if server_name: + await self.media_repo.generate_remote_exact_thumbnail( + server_name, + file_id=file_id, + media_id=media_id, + t_width=file_info.thumbnail_width, + t_height=file_info.thumbnail_height, + t_method=file_info.thumbnail_method, + t_type=file_info.thumbnail_type, + ) + else: + await self.media_repo.generate_local_exact_thumbnail( + media_id=media_id, + t_width=file_info.thumbnail_width, + t_height=file_info.thumbnail_height, + t_method=file_info.thumbnail_method, + t_type=file_info.thumbnail_type, + url_cache=url_cache, + ) + responder = await self.media_storage.fetch_media(file_info) await respond_with_responder( - request, responder, file_info.thumbnail_type, file_info.thumbnail_length + request, + responder, + file_info.thumbnail_type, + file_info.thumbnail_length, ) else: logger.info("Failed to find any generated thumbnails") diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index a0313c3cc..9ee642c66 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -344,16 +344,16 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): thumbnail_method, thumbnail_length, ): - await self.db_pool.simple_insert( - "local_media_repository_thumbnails", - { + await self.db_pool.simple_upsert( + table="local_media_repository_thumbnails", + keyvalues={ "media_id": media_id, "thumbnail_width": thumbnail_width, "thumbnail_height": thumbnail_height, "thumbnail_method": thumbnail_method, "thumbnail_type": thumbnail_type, - "thumbnail_length": thumbnail_length, }, + values={"thumbnail_length": thumbnail_length}, desc="store_local_thumbnail", ) @@ -498,18 +498,18 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): thumbnail_method, thumbnail_length, ): - await self.db_pool.simple_insert( - "remote_media_cache_thumbnails", - { + await self.db_pool.simple_upsert( + table="remote_media_cache_thumbnails", + keyvalues={ "media_origin": origin, "media_id": media_id, "thumbnail_width": thumbnail_width, "thumbnail_height": thumbnail_height, "thumbnail_method": thumbnail_method, "thumbnail_type": thumbnail_type, - "thumbnail_length": thumbnail_length, - "filesystem_id": filesystem_id, }, + values={"thumbnail_length": thumbnail_length}, + insertion_values={"filesystem_id": filesystem_id}, desc="store_remote_media_thumbnail", ) From 2d577283aba3524f8e7cd9e1db75f87524373f6b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Feb 2021 16:53:00 +0000 Subject: [PATCH 2/4] Newsfile --- changelog.d/9438.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/9438.feature diff --git a/changelog.d/9438.feature b/changelog.d/9438.feature new file mode 100644 index 000000000..a1f6be556 --- /dev/null +++ b/changelog.d/9438.feature @@ -0,0 +1 @@ +Add support for regenerating thumbnails if they have been deleted but the original image is still stored. From 3d2acc930f0a633bf400c96f3e636b5b662a54cb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 Feb 2021 10:46:18 +0000 Subject: [PATCH 3/4] Return a 404 if we don't have the original file --- synapse/rest/media/v1/media_storage.py | 2 +- synapse/rest/media/v1/thumbnail_resource.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 1057e638b..b1b1c9e6e 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -244,7 +244,7 @@ class MediaStorage: await consumer.wait() return local_path - raise Exception("file could not be found") + raise NotFoundError() def _file_info_to_path(self, file_info: FileInfo) -> str: """Converts file_info into a relative path. diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index c345ec649..3ab90e9f9 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -340,6 +340,13 @@ class ThumbnailResource(DirectServeJsonResource): # width/height/method so we can just call the "generate exact" # methods. + # First let's check that we do actually have the original image + # still. This will throw a 404 if we don't. + # TODO: We should refetch the thumbnails for remote media. + await self.media_storage.ensure_media_is_in_local_cache( + FileInfo(server_name, file_id, url_cache=url_cache) + ) + if server_name: await self.media_repo.generate_remote_exact_thumbnail( server_name, From 3a2fe5054fee9566ffc7bd233e7bfc03339eee71 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 Feb 2021 15:52:04 +0000 Subject: [PATCH 4/4] Add test --- tests/rest/media/v1/test_media_storage.py | 69 ++++++++++++++++++++++- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 0789b1239..36d1e6bc4 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -231,9 +231,11 @@ class MediaRepoTests(unittest.HomeserverTestCase): def prepare(self, reactor, clock, hs): - self.media_repo = hs.get_media_repository_resource() - self.download_resource = self.media_repo.children[b"download"] - self.thumbnail_resource = self.media_repo.children[b"thumbnail"] + media_resource = hs.get_media_repository_resource() + self.download_resource = media_resource.children[b"download"] + self.thumbnail_resource = media_resource.children[b"thumbnail"] + self.store = hs.get_datastore() + self.media_repo = hs.get_media_repository() self.media_id = "example.com/12345" @@ -357,6 +359,67 @@ class MediaRepoTests(unittest.HomeserverTestCase): """ self._test_thumbnail("scale", None, False) + def test_thumbnail_repeated_thumbnail(self): + """Test that fetching the same thumbnail works, and deleting the on disk + thumbnail regenerates it. + """ + self._test_thumbnail( + "scale", self.test_image.expected_scaled, self.test_image.expected_found + ) + + if not self.test_image.expected_found: + return + + # Fetching again should work, without re-requesting the image from the + # remote. + params = "?width=32&height=32&method=scale" + channel = make_request( + self.reactor, + FakeSite(self.thumbnail_resource), + "GET", + self.media_id + params, + shorthand=False, + await_result=False, + ) + self.pump() + + self.assertEqual(channel.code, 200) + if self.test_image.expected_scaled: + self.assertEqual( + channel.result["body"], + self.test_image.expected_scaled, + channel.result["body"], + ) + + # Deleting the thumbnail on disk then re-requesting it should work as + # Synapse should regenerate missing thumbnails. + origin, media_id = self.media_id.split("/") + info = self.get_success(self.store.get_cached_remote_media(origin, media_id)) + file_id = info["filesystem_id"] + + thumbnail_dir = self.media_repo.filepaths.remote_media_thumbnail_dir( + origin, file_id + ) + shutil.rmtree(thumbnail_dir, ignore_errors=True) + + channel = make_request( + self.reactor, + FakeSite(self.thumbnail_resource), + "GET", + self.media_id + params, + shorthand=False, + await_result=False, + ) + self.pump() + + self.assertEqual(channel.code, 200) + if self.test_image.expected_scaled: + self.assertEqual( + channel.result["body"], + self.test_image.expected_scaled, + channel.result["body"], + ) + def _test_thumbnail(self, method, expected_body, expected_found): params = "?width=32&height=32&method=" + method channel = make_request(