Add an admin api to delete local media. (#8519)

Related to: #6459, #3479

Add `DELETE /_synapse/admin/v1/media/<server_name>/<media_id>` to delete
a single file from server.
This commit is contained in:
Dirk Klimpel 2020-10-26 18:02:28 +01:00 committed by GitHub
parent f6a3859a73
commit 49d72dea2a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 868 additions and 3 deletions

View file

@ -16,9 +16,10 @@
import logging
from synapse.api.errors import AuthError
from synapse.http.servlet import RestServlet, parse_integer
from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
from synapse.http.servlet import RestServlet, parse_boolean, parse_integer
from synapse.rest.admin._base import (
admin_patterns,
assert_requester_is_admin,
assert_user_is_admin,
historical_admin_path_patterns,
@ -150,6 +151,80 @@ class PurgeMediaCacheRestServlet(RestServlet):
return 200, ret
class DeleteMediaByID(RestServlet):
"""Delete local media by a given ID. Removes it from this server.
"""
PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/(?P<media_id>[^/]+)")
def __init__(self, hs):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
self.server_name = hs.hostname
self.media_repository = hs.get_media_repository()
async def on_DELETE(self, request, server_name: str, media_id: str):
await assert_requester_is_admin(self.auth, request)
if self.server_name != server_name:
raise SynapseError(400, "Can only delete local media")
if await self.store.get_local_media(media_id) is None:
raise NotFoundError("Unknown media")
logging.info("Deleting local media by ID: %s", media_id)
deleted_media, total = await self.media_repository.delete_local_media(media_id)
return 200, {"deleted_media": deleted_media, "total": total}
class DeleteMediaByDateSize(RestServlet):
"""Delete local media and local copies of remote media by
timestamp and size.
"""
PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/delete")
def __init__(self, hs):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
self.server_name = hs.hostname
self.media_repository = hs.get_media_repository()
async def on_POST(self, request, server_name: str):
await assert_requester_is_admin(self.auth, request)
before_ts = parse_integer(request, "before_ts", required=True)
size_gt = parse_integer(request, "size_gt", default=0)
keep_profiles = parse_boolean(request, "keep_profiles", default=True)
if before_ts < 0:
raise SynapseError(
400,
"Query parameter before_ts must be a string representing a positive integer.",
errcode=Codes.INVALID_PARAM,
)
if size_gt < 0:
raise SynapseError(
400,
"Query parameter size_gt must be a string representing a positive integer.",
errcode=Codes.INVALID_PARAM,
)
if self.server_name != server_name:
raise SynapseError(400, "Can only delete local media")
logging.info(
"Deleting local media by timestamp: %s, size larger than: %s, keep profile media: %s"
% (before_ts, size_gt, keep_profiles)
)
deleted_media, total = await self.media_repository.delete_old_local_media(
before_ts, size_gt, keep_profiles
)
return 200, {"deleted_media": deleted_media, "total": total}
def register_servlets_for_media_repo(hs, http_server):
"""
Media repo specific APIs.
@ -159,3 +234,5 @@ def register_servlets_for_media_repo(hs, http_server):
QuarantineMediaByID(hs).register(http_server)
QuarantineMediaByUser(hs).register(http_server)
ListMediaInRoom(hs).register(http_server)
DeleteMediaByID(hs).register(http_server)
DeleteMediaByDateSize(hs).register(http_server)

View file

@ -69,6 +69,23 @@ class MediaFilePaths:
local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel)
def local_media_thumbnail_dir(self, media_id: str) -> str:
"""
Retrieve the local store path of thumbnails of a given media_id
Args:
media_id: The media ID to query.
Returns:
Path of local_thumbnails from media_id
"""
return os.path.join(
self.base_path,
"local_thumbnails",
media_id[0:2],
media_id[2:4],
media_id[4:],
)
def remote_media_filepath_rel(self, server_name, file_id):
return os.path.join(
"remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:]

View file

@ -18,7 +18,7 @@ import errno
import logging
import os
import shutil
from typing import IO, Dict, Optional, Tuple
from typing import IO, Dict, List, Optional, Tuple
import twisted.internet.error
import twisted.web.http
@ -767,6 +767,76 @@ class MediaRepository:
return {"deleted": deleted}
async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]:
"""
Delete the given local or remote media ID from this server
Args:
media_id: The media ID to delete.
Returns:
A tuple of (list of deleted media IDs, total deleted media IDs).
"""
return await self._remove_local_media_from_disk([media_id])
async def delete_old_local_media(
self, before_ts: int, size_gt: int = 0, keep_profiles: bool = True,
) -> Tuple[List[str], int]:
"""
Delete local or remote media from this server by size and timestamp. Removes
media files, any thumbnails and cached URLs.
Args:
before_ts: Unix timestamp in ms.
Files that were last used before this timestamp will be deleted
size_gt: Size of the media in bytes. Files that are larger will be deleted
keep_profiles: Switch to delete also files that are still used in image data
(e.g user profile, room avatar)
If false these files will be deleted
Returns:
A tuple of (list of deleted media IDs, total deleted media IDs).
"""
old_media = await self.store.get_local_media_before(
before_ts, size_gt, keep_profiles,
)
return await self._remove_local_media_from_disk(old_media)
async def _remove_local_media_from_disk(
self, media_ids: List[str]
) -> Tuple[List[str], int]:
"""
Delete local or remote media from this server. Removes media files,
any thumbnails and cached URLs.
Args:
media_ids: List of media_id to delete
Returns:
A tuple of (list of deleted media IDs, total deleted media IDs).
"""
removed_media = []
for media_id in media_ids:
logger.info("Deleting media with ID '%s'", media_id)
full_path = self.filepaths.local_media_filepath(media_id)
try:
os.remove(full_path)
except OSError as e:
logger.warning("Failed to remove file: %r: %s", full_path, e)
if e.errno == errno.ENOENT:
pass
else:
continue
thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id)
shutil.rmtree(thumbnail_dir, ignore_errors=True)
await self.store.delete_remote_media(self.server_name, media_id)
await self.store.delete_url_cache((media_id,))
await self.store.delete_url_cache_media((media_id,))
removed_media.append(media_id)
return removed_media, len(removed_media)
class MediaRepositoryResource(Resource):
"""File uploading and downloading.

View file

@ -93,6 +93,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
def __init__(self, database: DatabasePool, db_conn, hs):
super().__init__(database, db_conn, hs)
self.server_name = hs.hostname
async def get_local_media(self, media_id: str) -> Optional[Dict[str, Any]]:
"""Get the metadata for a local piece of media
@ -115,6 +116,58 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
desc="get_local_media",
)
async def get_local_media_before(
self, before_ts: int, size_gt: int, keep_profiles: bool,
) -> Optional[List[str]]:
# to find files that have never been accessed (last_access_ts IS NULL)
# compare with `created_ts`
sql = """
SELECT media_id
FROM local_media_repository AS lmr
WHERE
( last_access_ts < ?
OR ( created_ts < ? AND last_access_ts IS NULL ) )
AND media_length > ?
"""
if keep_profiles:
sql_keep = """
AND (
NOT EXISTS
(SELECT 1
FROM profiles
WHERE profiles.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM groups
WHERE groups.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM room_memberships
WHERE room_memberships.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM user_directory
WHERE user_directory.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM room_stats_state
WHERE room_stats_state.avatar = '{media_prefix}' || lmr.media_id)
)
""".format(
media_prefix="mxc://%s/" % (self.server_name,),
)
sql += sql_keep
def _get_local_media_before_txn(txn):
txn.execute(sql, (before_ts, before_ts, size_gt))
return [row[0] for row in txn]
return await self.db_pool.runInteraction(
"get_local_media_before", _get_local_media_before_txn
)
async def store_local_media(
self,
media_id,