Add an admin API for users' media statistics (#8700)

Add `GET /_synapse/admin/v1/statistics/users/media` to get statisics about local media usage by users.
Related to #6094
It is the first API for statistics.
Goal is to avoid/reduce usage of sql queries like [Wiki analyzing Synapse](https://github.com/matrix-org/synapse/wiki/SQL-for-analyzing-Synapse-PostgreSQL-database-stats)

Signed-off-by: Dirk Klimpel dirk@klimpel.org
This commit is contained in:
Dirk Klimpel 2020-11-05 19:59:12 +01:00 committed by GitHub
parent e4676bd877
commit c3119d1536
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 820 additions and 0 deletions

View file

@ -16,15 +16,18 @@
import logging
from collections import Counter
from enum import Enum
from itertools import chain
from typing import Any, Dict, List, Optional, Tuple
from twisted.internet.defer import DeferredLock
from synapse.api.constants import EventTypes, Membership
from synapse.api.errors import StoreError
from synapse.storage.database import DatabasePool
from synapse.storage.databases.main.state_deltas import StateDeltasStore
from synapse.storage.engines import PostgresEngine
from synapse.types import JsonDict
from synapse.util.caches.descriptors import cached
logger = logging.getLogger(__name__)
@ -59,6 +62,23 @@ TYPE_TO_TABLE = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user
TYPE_TO_ORIGIN_TABLE = {"room": ("rooms", "room_id"), "user": ("users", "name")}
class UserSortOrder(Enum):
"""
Enum to define the sorting method used when returning users
with get_users_media_usage_paginate
MEDIA_LENGTH = ordered by size of uploaded media. Smallest to largest.
MEDIA_COUNT = ordered by number of uploaded media. Smallest to largest.
USER_ID = ordered alphabetically by `user_id`.
DISPLAYNAME = ordered alphabetically by `displayname`
"""
MEDIA_LENGTH = "media_length"
MEDIA_COUNT = "media_count"
USER_ID = "user_id"
DISPLAYNAME = "displayname"
class StatsStore(StateDeltasStore):
def __init__(self, database: DatabasePool, db_conn, hs):
super().__init__(database, db_conn, hs)
@ -882,3 +902,110 @@ class StatsStore(StateDeltasStore):
complete_with_stream_id=pos,
absolute_field_overrides={"joined_rooms": joined_rooms},
)
async def get_users_media_usage_paginate(
self,
start: int,
limit: int,
from_ts: Optional[int] = None,
until_ts: Optional[int] = None,
order_by: Optional[UserSortOrder] = UserSortOrder.USER_ID.value,
direction: Optional[str] = "f",
search_term: Optional[str] = None,
) -> Tuple[List[JsonDict], Dict[str, int]]:
"""Function to retrieve a paginated list of users and their uploaded local media
(size and number). This will return a json list of users and the
total number of users matching the filter criteria.
Args:
start: offset to begin the query from
limit: number of rows to retrieve
from_ts: request only media that are created later than this timestamp (ms)
until_ts: request only media that are created earlier than this timestamp (ms)
order_by: the sort order of the returned list
direction: sort ascending or descending
search_term: a string to filter user names by
Returns:
A list of user dicts and an integer representing the total number of
users that exist given this query
"""
def get_users_media_usage_paginate_txn(txn):
filters = []
args = [self.hs.config.server_name]
if search_term:
filters.append("(lmr.user_id LIKE ? OR displayname LIKE ?)")
args.extend(["@%" + search_term + "%:%", "%" + search_term + "%"])
if from_ts:
filters.append("created_ts >= ?")
args.extend([from_ts])
if until_ts:
filters.append("created_ts <= ?")
args.extend([until_ts])
# Set ordering
if UserSortOrder(order_by) == UserSortOrder.MEDIA_LENGTH:
order_by_column = "media_length"
elif UserSortOrder(order_by) == UserSortOrder.MEDIA_COUNT:
order_by_column = "media_count"
elif UserSortOrder(order_by) == UserSortOrder.USER_ID:
order_by_column = "lmr.user_id"
elif UserSortOrder(order_by) == UserSortOrder.DISPLAYNAME:
order_by_column = "displayname"
else:
raise StoreError(
500, "Incorrect value for order_by provided: %s" % order_by
)
if direction == "b":
order = "DESC"
else:
order = "ASC"
where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else ""
sql_base = """
FROM local_media_repository as lmr
LEFT JOIN profiles AS p ON lmr.user_id = '@' || p.user_id || ':' || ?
{}
GROUP BY lmr.user_id, displayname
""".format(
where_clause
)
# SQLite does not support SELECT COUNT(*) OVER()
sql = """
SELECT COUNT(*) FROM (
SELECT lmr.user_id
{sql_base}
) AS count_user_ids
""".format(
sql_base=sql_base,
)
txn.execute(sql, args)
count = txn.fetchone()[0]
sql = """
SELECT
lmr.user_id,
displayname,
COUNT(lmr.user_id) as media_count,
SUM(media_length) as media_length
{sql_base}
ORDER BY {order_by_column} {order}
LIMIT ? OFFSET ?
""".format(
sql_base=sql_base, order_by_column=order_by_column, order=order,
)
args += [limit, start]
txn.execute(sql, args)
users = self.db_pool.cursor_to_dict(txn)
return users, count
return await self.db_pool.runInteraction(
"get_users_media_usage_paginate_txn", get_users_media_usage_paginate_txn
)