diff --git a/changelog.d/8978.feature b/changelog.d/8978.feature new file mode 100644 index 000000000..042e257bf --- /dev/null +++ b/changelog.d/8978.feature @@ -0,0 +1 @@ +Add `order_by` to the admin API `GET /_synapse/admin/v1/users//media`. Contributed by @dklimpel. \ No newline at end of file diff --git a/docs/admin_api/user_admin_api.rst b/docs/admin_api/user_admin_api.rst index 33dfbcfb4..8d4ec5a6f 100644 --- a/docs/admin_api/user_admin_api.rst +++ b/docs/admin_api/user_admin_api.rst @@ -379,11 +379,12 @@ The following fields are returned in the JSON response body: - ``total`` - Number of rooms. -List media of an user -================================ +List media of a user +==================== Gets a list of all local media that a specific ``user_id`` has created. -The response is ordered by creation date descending and media ID descending. -The newest media is on top. +By default, the response is ordered by descending creation date and ascending media ID. +The newest media is on top. You can change the order with parameters +``order_by`` and ``dir``. The API is:: @@ -440,6 +441,35 @@ The following parameters should be set in the URL: denoting the offset in the returned results. This should be treated as an opaque value and not explicitly set to anything other than the return value of ``next_token`` from a previous call. Defaults to ``0``. +- ``order_by`` - The method by which to sort the returned list of media. + If the ordered field has duplicates, the second order is always by ascending ``media_id``, + which guarantees a stable ordering. Valid values are: + + - ``media_id`` - Media are ordered alphabetically by ``media_id``. + - ``upload_name`` - Media are ordered alphabetically by name the media was uploaded with. + - ``created_ts`` - Media are ordered by when the content was uploaded in ms. + Smallest to largest. This is the default. + - ``last_access_ts`` - Media are ordered by when the content was last accessed in ms. + Smallest to largest. + - ``media_length`` - Media are ordered by length of the media in bytes. + Smallest to largest. + - ``media_type`` - Media are ordered alphabetically by MIME-type. + - ``quarantined_by`` - Media are ordered alphabetically by the user ID that + initiated the quarantine request for this media. + - ``safe_from_quarantine`` - Media are ordered by the status if this media is safe + from quarantining. + +- ``dir`` - Direction of media order. Either ``f`` for forwards or ``b`` for backwards. + Setting this value to ``b`` will reverse the above sort order. Defaults to ``f``. + +If neither ``order_by`` nor ``dir`` is set, the default order is newest media on top +(corresponds to ``order_by`` = ``created_ts`` and ``dir`` = ``b``). + +Caution. The database only has indexes on the columns ``media_id``, +``user_id`` and ``created_ts``. This means that if a different sort order is used +(``upload_name``, ``last_access_ts``, ``media_length``, ``media_type``, +``quarantined_by`` or ``safe_from_quarantine``), this can cause a large load on the +database, especially for large environments. **Response** diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 998a0ef67..9c701c734 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -35,6 +35,7 @@ from synapse.rest.admin._base import ( assert_user_is_admin, ) from synapse.rest.client.v2_alpha._base import client_patterns +from synapse.storage.databases.main.media_repository import MediaSortOrder from synapse.types import JsonDict, UserID if TYPE_CHECKING: @@ -832,8 +833,33 @@ class UserMediaRestServlet(RestServlet): errcode=Codes.INVALID_PARAM, ) + # If neither `order_by` nor `dir` is set, set the default order + # to newest media is on top for backward compatibility. + if b"order_by" not in request.args and b"dir" not in request.args: + order_by = MediaSortOrder.CREATED_TS.value + direction = "b" + else: + order_by = parse_string( + request, + "order_by", + default=MediaSortOrder.CREATED_TS.value, + allowed_values=( + MediaSortOrder.MEDIA_ID.value, + MediaSortOrder.UPLOAD_NAME.value, + MediaSortOrder.CREATED_TS.value, + MediaSortOrder.LAST_ACCESS_TS.value, + MediaSortOrder.MEDIA_LENGTH.value, + MediaSortOrder.MEDIA_TYPE.value, + MediaSortOrder.QUARANTINED_BY.value, + MediaSortOrder.SAFE_FROM_QUARANTINE.value, + ), + ) + direction = parse_string( + request, "dir", default="f", allowed_values=("f", "b") + ) + media, total = await self.store.get_local_media_by_user_paginate( - start, limit, user_id + start, limit, user_id, order_by, direction ) ret = {"media": media, "total": total} diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index 9ee642c66..274f8de59 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from enum import Enum from typing import Any, Dict, Iterable, List, Optional, Tuple from synapse.storage._base import SQLBaseStore @@ -23,6 +24,22 @@ BG_UPDATE_REMOVE_MEDIA_REPO_INDEX_WITHOUT_METHOD = ( ) +class MediaSortOrder(Enum): + """ + Enum to define the sorting method used when returning media with + get_local_media_by_user_paginate + """ + + MEDIA_ID = "media_id" + UPLOAD_NAME = "upload_name" + CREATED_TS = "created_ts" + LAST_ACCESS_TS = "last_access_ts" + MEDIA_LENGTH = "media_length" + MEDIA_TYPE = "media_type" + QUARANTINED_BY = "quarantined_by" + SAFE_FROM_QUARANTINE = "safe_from_quarantine" + + class MediaRepositoryBackgroundUpdateStore(SQLBaseStore): def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) @@ -118,7 +135,12 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): ) async def get_local_media_by_user_paginate( - self, start: int, limit: int, user_id: str + self, + start: int, + limit: int, + user_id: str, + order_by: MediaSortOrder = MediaSortOrder.CREATED_TS.value, + direction: str = "f", ) -> Tuple[List[Dict[str, Any]], int]: """Get a paginated list of metadata for a local piece of media which an user_id has uploaded @@ -127,6 +149,8 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): start: offset in the list limit: maximum amount of media_ids to retrieve user_id: fully-qualified user id + order_by: the sort order of the returned list + direction: sort ascending or descending Returns: A paginated list of all metadata of user's media, plus the total count of all the user's media @@ -134,6 +158,14 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): def get_local_media_by_user_paginate_txn(txn): + # Set ordering + order_by_column = MediaSortOrder(order_by).value + + if direction == "b": + order = "DESC" + else: + order = "ASC" + args = [user_id] sql = """ SELECT COUNT(*) as total_media @@ -155,9 +187,12 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): "safe_from_quarantine" FROM local_media_repository WHERE user_id = ? - ORDER BY created_ts DESC, media_id DESC + ORDER BY {order_by_column} {order}, media_id ASC LIMIT ? OFFSET ? - """ + """.format( + order_by_column=order_by_column, + order=order, + ) args += [limit, start] txn.execute(sql, args) diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index ba2689539..e58d5cf0d 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -18,7 +18,7 @@ import hmac import json import urllib.parse from binascii import unhexlify -from typing import Optional +from typing import List, Optional from mock import Mock @@ -31,6 +31,7 @@ from synapse.rest.client.v2_alpha import devices, sync from synapse.types import JsonDict from tests import unittest +from tests.server import FakeSite, make_request from tests.test_utils import make_awaitable from tests.unittest import override_config @@ -1954,6 +1955,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): ] def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() self.media_repo = hs.get_media_repository_resource() self.admin_user = self.register_user("admin", "pass", admin=True) @@ -2024,7 +2026,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): number_media = 20 other_user_tok = self.login("user", "pass") - self._create_media(other_user_tok, number_media) + self._create_media_for_user(other_user_tok, number_media) channel = self.make_request( "GET", @@ -2045,7 +2047,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): number_media = 20 other_user_tok = self.login("user", "pass") - self._create_media(other_user_tok, number_media) + self._create_media_for_user(other_user_tok, number_media) channel = self.make_request( "GET", @@ -2066,7 +2068,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): number_media = 20 other_user_tok = self.login("user", "pass") - self._create_media(other_user_tok, number_media) + self._create_media_for_user(other_user_tok, number_media) channel = self.make_request( "GET", @@ -2080,11 +2082,31 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): self.assertEqual(len(channel.json_body["media"]), 10) self._check_fields(channel.json_body["media"]) - def test_limit_is_negative(self): + def test_invalid_parameter(self): """ - Testing that a negative limit parameter returns a 400 + If parameters are invalid, an error is returned. """ + # unkown order_by + channel = self.make_request( + "GET", + self.url + "?order_by=bar", + access_token=self.admin_user_tok, + ) + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"]) + + # invalid search order + channel = self.make_request( + "GET", + self.url + "?dir=bar", + access_token=self.admin_user_tok, + ) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"]) + + # negative limit channel = self.make_request( "GET", self.url + "?limit=-5", @@ -2094,11 +2116,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) - def test_from_is_negative(self): - """ - Testing that a negative from parameter returns a 400 - """ - + # negative from channel = self.make_request( "GET", self.url + "?from=-5", @@ -2115,7 +2133,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): number_media = 20 other_user_tok = self.login("user", "pass") - self._create_media(other_user_tok, number_media) + self._create_media_for_user(other_user_tok, number_media) # `next_token` does not appear # Number of results is the number of entries @@ -2193,7 +2211,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): number_media = 5 other_user_tok = self.login("user", "pass") - self._create_media(other_user_tok, number_media) + self._create_media_for_user(other_user_tok, number_media) channel = self.make_request( "GET", @@ -2207,11 +2225,118 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): self.assertNotIn("next_token", channel.json_body) self._check_fields(channel.json_body["media"]) - def _create_media(self, user_token, number_media): + def test_order_by(self): + """ + Testing order list with parameter `order_by` + """ + + other_user_tok = self.login("user", "pass") + + # Resolution: 1×1, MIME type: image/png, Extension: png, Size: 67 B + image_data1 = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + # Resolution: 1×1, MIME type: image/gif, Extension: gif, Size: 35 B + image_data2 = unhexlify( + b"47494638376101000100800100000000" + b"ffffff2c00000000010001000002024c" + b"01003b" + ) + # Resolution: 1×1, MIME type: image/bmp, Extension: bmp, Size: 54 B + image_data3 = unhexlify( + b"424d3a0000000000000036000000280000000100000001000000" + b"0100180000000000040000000000000000000000000000000000" + b"0000" + ) + + # create media and make sure they do not have the same timestamp + media1 = self._create_media_and_access(other_user_tok, image_data1, "image.png") + self.pump(1.0) + media2 = self._create_media_and_access(other_user_tok, image_data2, "image.gif") + self.pump(1.0) + media3 = self._create_media_and_access(other_user_tok, image_data3, "image.bmp") + self.pump(1.0) + + # Mark one media as safe from quarantine. + self.get_success(self.store.mark_local_media_as_safe(media2)) + # Quarantine one media + self.get_success( + self.store.quarantine_media_by_id("test", media3, self.admin_user) + ) + + # order by default ("created_ts") + # default is backwards + self._order_test([media3, media2, media1], None) + self._order_test([media1, media2, media3], None, "f") + self._order_test([media3, media2, media1], None, "b") + + # sort by media_id + sorted_media = sorted([media1, media2, media3], reverse=False) + sorted_media_reverse = sorted(sorted_media, reverse=True) + + # order by media_id + self._order_test(sorted_media, "media_id") + self._order_test(sorted_media, "media_id", "f") + self._order_test(sorted_media_reverse, "media_id", "b") + + # order by upload_name + self._order_test([media3, media2, media1], "upload_name") + self._order_test([media3, media2, media1], "upload_name", "f") + self._order_test([media1, media2, media3], "upload_name", "b") + + # order by media_type + # result is ordered by media_id + # because of uploaded media_type is always 'application/json' + self._order_test(sorted_media, "media_type") + self._order_test(sorted_media, "media_type", "f") + self._order_test(sorted_media, "media_type", "b") + + # order by media_length + self._order_test([media2, media3, media1], "media_length") + self._order_test([media2, media3, media1], "media_length", "f") + self._order_test([media1, media3, media2], "media_length", "b") + + # order by created_ts + self._order_test([media1, media2, media3], "created_ts") + self._order_test([media1, media2, media3], "created_ts", "f") + self._order_test([media3, media2, media1], "created_ts", "b") + + # order by last_access_ts + self._order_test([media1, media2, media3], "last_access_ts") + self._order_test([media1, media2, media3], "last_access_ts", "f") + self._order_test([media3, media2, media1], "last_access_ts", "b") + + # order by quarantined_by + # one media is in quarantine, others are ordered by media_ids + + # Different sort order of SQlite and PostreSQL + # If a media is not in quarantine `quarantined_by` is NULL + # SQLite considers NULL to be smaller than any other value. + # PostreSQL considers NULL to be larger than any other value. + + # self._order_test(sorted([media1, media2]) + [media3], "quarantined_by") + # self._order_test(sorted([media1, media2]) + [media3], "quarantined_by", "f") + # self._order_test([media3] + sorted([media1, media2]), "quarantined_by", "b") + + # order by safe_from_quarantine + # one media is safe from quarantine, others are ordered by media_ids + self._order_test(sorted([media1, media3]) + [media2], "safe_from_quarantine") + self._order_test( + sorted([media1, media3]) + [media2], "safe_from_quarantine", "f" + ) + self._order_test( + [media2] + sorted([media1, media3]), "safe_from_quarantine", "b" + ) + + def _create_media_for_user(self, user_token: str, number_media: int): """ Create a number of media for a specific user + Args: + user_token: Access token of the user + number_media: Number of media to be created for the user """ - upload_resource = self.media_repo.children[b"upload"] for i in range(number_media): # file size is 67 Byte image_data = unhexlify( @@ -2220,13 +2345,60 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): b"0a2db40000000049454e44ae426082" ) - # Upload some media into the room - self.helper.upload_media( - upload_resource, image_data, tok=user_token, expect_code=200 - ) + self._create_media_and_access(user_token, image_data) - def _check_fields(self, content): - """Checks that all attributes are present in content""" + def _create_media_and_access( + self, + user_token: str, + image_data: bytes, + filename: str = "image1.png", + ) -> str: + """ + Create one media for a specific user, access and returns `media_id` + Args: + user_token: Access token of the user + image_data: binary data of image + filename: The filename of the media to be uploaded + Returns: + The ID of the newly created media. + """ + upload_resource = self.media_repo.children[b"upload"] + download_resource = self.media_repo.children[b"download"] + + # Upload some media into the room + response = self.helper.upload_media( + upload_resource, image_data, user_token, filename, expect_code=200 + ) + + # Extract media ID from the response + server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://' + media_id = server_and_media_id.split("/")[1] + + # Try to access a media and to create `last_access_ts` + channel = make_request( + self.reactor, + FakeSite(download_resource), + "GET", + server_and_media_id, + shorthand=False, + access_token=user_token, + ) + + self.assertEqual( + 200, + channel.code, + msg=( + "Expected to receive a 200 on accessing media: %s" % server_and_media_id + ), + ) + + return media_id + + def _check_fields(self, content: JsonDict): + """Checks that the expected user attributes are present in content + Args: + content: List that is checked for content + """ for m in content: self.assertIn("media_id", m) self.assertIn("media_type", m) @@ -2237,6 +2409,38 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase): self.assertIn("quarantined_by", m) self.assertIn("safe_from_quarantine", m) + def _order_test( + self, + expected_media_list: List[str], + order_by: Optional[str], + dir: Optional[str] = None, + ): + """Request the list of media in a certain order. Assert that order is what + we expect + Args: + expected_media_list: The list of media_ids in the order we expect to get + back from the server + order_by: The type of ordering to give the server + dir: The direction of ordering to give the server + """ + + url = self.url + "?" + if order_by is not None: + url += "order_by=%s&" % (order_by,) + if dir is not None and dir in ("b", "f"): + url += "dir=%s" % (dir,) + channel = self.make_request( + "GET", + url.encode("ascii"), + access_token=self.admin_user_tok, + ) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], len(expected_media_list)) + + returned_order = [row["media_id"] for row in channel.json_body["media"]] + self.assertEqual(expected_media_list, returned_order) + self._check_fields(channel.json_body["media"]) + class UserTokenRestTestCase(unittest.HomeserverTestCase): """Test for /_synapse/admin/v1/users//login"""