mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-02-11 17:38:27 -05:00
Avoid storing URL cache files in storage providers (#10911)
URL cache files are short-lived and it does not make sense to offload them (eg. to the cloud) or back them up.
This commit is contained in:
parent
6c83c27107
commit
f7768f62cb
1
changelog.d/10911.bugfix
Normal file
1
changelog.d/10911.bugfix
Normal file
@ -0,0 +1 @@
|
|||||||
|
Avoid storing URL cache files in storage providers. Server admins may safely delete the `url_cache/` and `url_cache_thumbnails/` directories from any configured storage providers to reclaim space.
|
@ -85,6 +85,13 @@ process, for example:
|
|||||||
dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
|
dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# Upgrading to v1.44.0
|
||||||
|
|
||||||
|
## The URL preview cache is no longer mirrored to storage providers
|
||||||
|
The `url_cache/` and `url_cache_thumbnails/` directories in the media store are
|
||||||
|
no longer mirrored to storage providers. These two directories can be safely
|
||||||
|
deleted from any configured storage providers to reclaim space.
|
||||||
|
|
||||||
# Upgrading to v1.43.0
|
# Upgrading to v1.43.0
|
||||||
|
|
||||||
## The spaces summary APIs can now be handled by workers
|
## The spaces summary APIs can now be handled by workers
|
||||||
|
@ -195,23 +195,24 @@ class MediaFilePaths:
|
|||||||
|
|
||||||
url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel)
|
url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel)
|
||||||
|
|
||||||
def url_cache_thumbnail_directory(self, media_id: str) -> str:
|
def url_cache_thumbnail_directory_rel(self, media_id: str) -> str:
|
||||||
# Media id is of the form <DATE><RANDOM_STRING>
|
# Media id is of the form <DATE><RANDOM_STRING>
|
||||||
# E.g.: 2017-09-28-fsdRDt24DS234dsf
|
# E.g.: 2017-09-28-fsdRDt24DS234dsf
|
||||||
|
|
||||||
if NEW_FORMAT_ID_RE.match(media_id):
|
if NEW_FORMAT_ID_RE.match(media_id):
|
||||||
return os.path.join(
|
return os.path.join("url_cache_thumbnails", media_id[:10], media_id[11:])
|
||||||
self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:]
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self.base_path,
|
|
||||||
"url_cache_thumbnails",
|
"url_cache_thumbnails",
|
||||||
media_id[0:2],
|
media_id[0:2],
|
||||||
media_id[2:4],
|
media_id[2:4],
|
||||||
media_id[4:],
|
media_id[4:],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
url_cache_thumbnail_directory = _wrap_in_base_path(
|
||||||
|
url_cache_thumbnail_directory_rel
|
||||||
|
)
|
||||||
|
|
||||||
def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]:
|
def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]:
|
||||||
"The dirs to try and remove if we delete the media_id thumbnails"
|
"The dirs to try and remove if we delete the media_id thumbnails"
|
||||||
# Media id is of the form <DATE><RANDOM_STRING>
|
# Media id is of the form <DATE><RANDOM_STRING>
|
||||||
|
@ -485,7 +485,6 @@ class PreviewUrlResource(DirectServeJsonResource):
|
|||||||
|
|
||||||
async def _expire_url_cache_data(self) -> None:
|
async def _expire_url_cache_data(self) -> None:
|
||||||
"""Clean up expired url cache content, media and thumbnails."""
|
"""Clean up expired url cache content, media and thumbnails."""
|
||||||
# TODO: Delete from backup media store
|
|
||||||
|
|
||||||
assert self._worker_run_media_background_jobs
|
assert self._worker_run_media_background_jobs
|
||||||
|
|
||||||
|
@ -93,6 +93,11 @@ class StorageProviderWrapper(StorageProvider):
|
|||||||
if file_info.server_name and not self.store_remote:
|
if file_info.server_name and not self.store_remote:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if file_info.url_cache:
|
||||||
|
# The URL preview cache is short lived and not worth offloading or
|
||||||
|
# backing up.
|
||||||
|
return None
|
||||||
|
|
||||||
if self.store_synchronous:
|
if self.store_synchronous:
|
||||||
# store_file is supposed to return an Awaitable, but guard
|
# store_file is supposed to return an Awaitable, but guard
|
||||||
# against improper implementations.
|
# against improper implementations.
|
||||||
@ -110,6 +115,11 @@ class StorageProviderWrapper(StorageProvider):
|
|||||||
run_in_background(store)
|
run_in_background(store)
|
||||||
|
|
||||||
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
|
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
|
||||||
|
if file_info.url_cache:
|
||||||
|
# Files in the URL preview cache definitely aren't stored here,
|
||||||
|
# so avoid any potentially slow I/O or network access.
|
||||||
|
return None
|
||||||
|
|
||||||
# store_file is supposed to return an Awaitable, but guard
|
# store_file is supposed to return an Awaitable, but guard
|
||||||
# against improper implementations.
|
# against improper implementations.
|
||||||
return await maybe_awaitable(self.backend.fetch(path, file_info))
|
return await maybe_awaitable(self.backend.fetch(path, file_info))
|
||||||
|
@ -21,6 +21,7 @@ from twisted.internet.error import DNSLookupError
|
|||||||
from twisted.test.proto_helpers import AccumulatingProtocol
|
from twisted.test.proto_helpers import AccumulatingProtocol
|
||||||
|
|
||||||
from synapse.config.oembed import OEmbedEndpointConfig
|
from synapse.config.oembed import OEmbedEndpointConfig
|
||||||
|
from synapse.util.stringutils import parse_and_validate_mxc_uri
|
||||||
|
|
||||||
from tests import unittest
|
from tests import unittest
|
||||||
from tests.server import FakeTransport
|
from tests.server import FakeTransport
|
||||||
@ -721,3 +722,132 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
|||||||
"og:description": "Content Preview",
|
"og:description": "Content Preview",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _download_image(self):
|
||||||
|
"""Downloads an image into the URL cache.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A (host, media_id) tuple representing the MXC URI of the image.
|
||||||
|
"""
|
||||||
|
self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
|
||||||
|
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
"preview_url?url=http://cdn.twitter.com/matrixdotorg",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
client = self.reactor.tcpClients[0][2].buildProtocol(None)
|
||||||
|
server = AccumulatingProtocol()
|
||||||
|
server.makeConnection(FakeTransport(client, self.reactor))
|
||||||
|
client.makeConnection(FakeTransport(server, self.reactor))
|
||||||
|
client.dataReceived(
|
||||||
|
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: image/png\r\n\r\n"
|
||||||
|
% (len(SMALL_PNG),)
|
||||||
|
+ SMALL_PNG
|
||||||
|
)
|
||||||
|
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(channel.code, 200)
|
||||||
|
body = channel.json_body
|
||||||
|
mxc_uri = body["og:image"]
|
||||||
|
host, _port, media_id = parse_and_validate_mxc_uri(mxc_uri)
|
||||||
|
self.assertIsNone(_port)
|
||||||
|
return host, media_id
|
||||||
|
|
||||||
|
def test_storage_providers_exclude_files(self):
|
||||||
|
"""Test that files are not stored in or fetched from storage providers."""
|
||||||
|
host, media_id = self._download_image()
|
||||||
|
|
||||||
|
rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id)
|
||||||
|
media_store_path = os.path.join(self.media_store_path, rel_file_path)
|
||||||
|
storage_provider_path = os.path.join(self.storage_path, rel_file_path)
|
||||||
|
|
||||||
|
# Check storage
|
||||||
|
self.assertTrue(os.path.isfile(media_store_path))
|
||||||
|
self.assertFalse(
|
||||||
|
os.path.isfile(storage_provider_path),
|
||||||
|
"URL cache file was unexpectedly stored in a storage provider",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check fetching
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
f"download/{host}/{media_id}",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(channel.code, 200)
|
||||||
|
|
||||||
|
# Move cached file into the storage provider
|
||||||
|
os.makedirs(os.path.dirname(storage_provider_path), exist_ok=True)
|
||||||
|
os.rename(media_store_path, storage_provider_path)
|
||||||
|
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
f"download/{host}/{media_id}",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(
|
||||||
|
channel.code,
|
||||||
|
404,
|
||||||
|
"URL cache file was unexpectedly retrieved from a storage provider",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_storage_providers_exclude_thumbnails(self):
|
||||||
|
"""Test that thumbnails are not stored in or fetched from storage providers."""
|
||||||
|
host, media_id = self._download_image()
|
||||||
|
|
||||||
|
rel_thumbnail_path = (
|
||||||
|
self.preview_url.filepaths.url_cache_thumbnail_directory_rel(media_id)
|
||||||
|
)
|
||||||
|
media_store_thumbnail_path = os.path.join(
|
||||||
|
self.media_store_path, rel_thumbnail_path
|
||||||
|
)
|
||||||
|
storage_provider_thumbnail_path = os.path.join(
|
||||||
|
self.storage_path, rel_thumbnail_path
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check storage
|
||||||
|
self.assertTrue(os.path.isdir(media_store_thumbnail_path))
|
||||||
|
self.assertFalse(
|
||||||
|
os.path.isdir(storage_provider_thumbnail_path),
|
||||||
|
"URL cache thumbnails were unexpectedly stored in a storage provider",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check fetching
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
f"thumbnail/{host}/{media_id}?width=32&height=32&method=scale",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(channel.code, 200)
|
||||||
|
|
||||||
|
# Remove the original, otherwise thumbnails will regenerate
|
||||||
|
rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id)
|
||||||
|
media_store_path = os.path.join(self.media_store_path, rel_file_path)
|
||||||
|
os.remove(media_store_path)
|
||||||
|
|
||||||
|
# Move cached thumbnails into the storage provider
|
||||||
|
os.makedirs(os.path.dirname(storage_provider_thumbnail_path), exist_ok=True)
|
||||||
|
os.rename(media_store_thumbnail_path, storage_provider_thumbnail_path)
|
||||||
|
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
f"thumbnail/{host}/{media_id}?width=32&height=32&method=scale",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(
|
||||||
|
channel.code,
|
||||||
|
404,
|
||||||
|
"URL cache thumbnail was unexpectedly retrieved from a storage provider",
|
||||||
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user