Improve the performance of calculating ignored users in large rooms (#9024)

This allows for efficiently finding which users ignore a particular
user.

Co-authored-by: Erik Johnston <erik@matrix.org>
This commit is contained in:
Patrick Cloke 2021-01-07 08:03:38 -05:00 committed by GitHub
parent 1d5c021a45
commit 23d701864f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 304 additions and 34 deletions

1
changelog.d/9024.feature Normal file
View File

@ -0,0 +1 @@
Improved performance when calculating ignored users in large rooms.

View File

@ -203,13 +203,17 @@ class BulkPushRuleEvaluator:
condition_cache = {} # type: Dict[str, bool] condition_cache = {} # type: Dict[str, bool]
# If the event is not a state event check if any users ignore the sender.
if not event.is_state():
ignorers = await self.store.ignored_by(event.sender)
else:
ignorers = set()
for uid, rules in rules_by_user.items(): for uid, rules in rules_by_user.items():
if event.sender == uid: if event.sender == uid:
continue continue
if not event.is_state(): if uid in ignorers:
is_ignored = await self.store.is_ignored_by(event.sender, uid)
if is_ignored:
continue continue
display_name = None display_name = None

View File

@ -16,7 +16,7 @@
import abc import abc
import logging import logging
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Set, Tuple
from synapse.api.constants import AccountDataTypes from synapse.api.constants import AccountDataTypes
from synapse.storage._base import SQLBaseStore, db_to_json from synapse.storage._base import SQLBaseStore, db_to_json
@ -24,7 +24,7 @@ from synapse.storage.database import DatabasePool
from synapse.storage.util.id_generators import StreamIdGenerator from synapse.storage.util.id_generators import StreamIdGenerator
from synapse.types import JsonDict from synapse.types import JsonDict
from synapse.util import json_encoder from synapse.util import json_encoder
from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.caches.descriptors import cached
from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.caches.stream_change_cache import StreamChangeCache
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -287,23 +287,25 @@ class AccountDataWorkerStore(SQLBaseStore, metaclass=abc.ABCMeta):
"get_updated_account_data_for_user", get_updated_account_data_for_user_txn "get_updated_account_data_for_user", get_updated_account_data_for_user_txn
) )
@cached(num_args=2, cache_context=True, max_entries=5000) @cached(max_entries=5000, iterable=True)
async def is_ignored_by( async def ignored_by(self, user_id: str) -> Set[str]:
self, ignored_user_id: str, ignorer_user_id: str, cache_context: _CacheContext """
) -> bool: Get users which ignore the given user.
ignored_account_data = await self.get_global_account_data_by_type_for_user(
AccountDataTypes.IGNORED_USER_LIST,
ignorer_user_id,
on_invalidate=cache_context.invalidate,
)
if not ignored_account_data:
return False
try: Params:
return ignored_user_id in ignored_account_data.get("ignored_users", {}) user_id: The user ID which might be ignored.
except TypeError:
# The type of the ignored_users field is invalid. Return:
return False The user IDs which ignore the given user.
"""
return set(
await self.db_pool.simple_select_onecol(
table="ignored_users",
keyvalues={"ignored_user_id": user_id},
retcol="ignorer_user_id",
desc="ignored_by",
)
)
class AccountDataStore(AccountDataWorkerStore): class AccountDataStore(AccountDataWorkerStore):
@ -390,18 +392,14 @@ class AccountDataStore(AccountDataWorkerStore):
Returns: Returns:
The maximum stream ID. The maximum stream ID.
""" """
content_json = json_encoder.encode(content)
async with self._account_data_id_gen.get_next() as next_id: async with self._account_data_id_gen.get_next() as next_id:
# no need to lock here as account_data has a unique constraint on await self.db_pool.runInteraction(
# (user_id, account_data_type) so simple_upsert will retry if "add_user_account_data",
# there is a conflict. self._add_account_data_for_user,
await self.db_pool.simple_upsert( next_id,
desc="add_user_account_data", user_id,
table="account_data", account_data_type,
keyvalues={"user_id": user_id, "account_data_type": account_data_type}, content,
values={"stream_id": next_id, "content": content_json},
lock=False,
) )
# it's theoretically possible for the above to succeed and the # it's theoretically possible for the above to succeed and the
@ -424,6 +422,71 @@ class AccountDataStore(AccountDataWorkerStore):
return self._account_data_id_gen.get_current_token() return self._account_data_id_gen.get_current_token()
def _add_account_data_for_user(
self,
txn,
next_id: int,
user_id: str,
account_data_type: str,
content: JsonDict,
) -> None:
content_json = json_encoder.encode(content)
# no need to lock here as account_data has a unique constraint on
# (user_id, account_data_type) so simple_upsert will retry if
# there is a conflict.
self.db_pool.simple_upsert_txn(
txn,
table="account_data",
keyvalues={"user_id": user_id, "account_data_type": account_data_type},
values={"stream_id": next_id, "content": content_json},
lock=False,
)
# Ignored users get denormalized into a separate table as an optimisation.
if account_data_type != AccountDataTypes.IGNORED_USER_LIST:
return
# Insert / delete to sync the list of ignored users.
previously_ignored_users = set(
self.db_pool.simple_select_onecol_txn(
txn,
table="ignored_users",
keyvalues={"ignorer_user_id": user_id},
retcol="ignored_user_id",
)
)
# If the data is invalid, no one is ignored.
ignored_users_content = content.get("ignored_users", {})
if isinstance(ignored_users_content, dict):
currently_ignored_users = set(ignored_users_content)
else:
currently_ignored_users = set()
# Delete entries which are no longer ignored.
self.db_pool.simple_delete_many_txn(
txn,
table="ignored_users",
column="ignored_user_id",
iterable=previously_ignored_users - currently_ignored_users,
keyvalues={"ignorer_user_id": user_id},
)
# Add entries which are newly ignored.
self.db_pool.simple_insert_many_txn(
txn,
table="ignored_users",
values=[
{"ignorer_user_id": user_id, "ignored_user_id": u}
for u in currently_ignored_users - previously_ignored_users
],
)
# Invalidate the cache for any ignored users which were added or removed.
for ignored_user_id in previously_ignored_users ^ currently_ignored_users:
self._invalidate_cache_and_stream(txn, self.ignored_by, (ignored_user_id,))
async def _update_max_stream_id(self, next_id: int) -> None: async def _update_max_stream_id(self, next_id: int) -> None:
"""Update the max stream_id """Update the max stream_id

View File

@ -0,0 +1,82 @@
# Copyright 2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This migration denormalises the account_data table into an ignored users table.
"""
import logging
from io import StringIO
from synapse.storage._base import db_to_json
from synapse.storage.engines import BaseDatabaseEngine
from synapse.storage.prepare_database import execute_statements_from_stream
from synapse.storage.types import Cursor
logger = logging.getLogger(__name__)
def run_upgrade(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
pass
def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
logger.info("Creating ignored_users table")
execute_statements_from_stream(cur, StringIO(_create_commands))
# We now upgrade existing data, if any. We don't do this in `run_upgrade` as
# we a) want to run these before adding constraints and b) `run_upgrade` is
# not run on empty databases.
insert_sql = """
INSERT INTO ignored_users (ignorer_user_id, ignored_user_id) VALUES (?, ?)
"""
logger.info("Converting existing ignore lists")
cur.execute(
"SELECT user_id, content FROM account_data WHERE account_data_type = 'm.ignored_user_list'"
)
for user_id, content_json in cur.fetchall():
content = db_to_json(content_json)
# The content should be the form of a dictionary with a key
# "ignored_users" pointing to a dictionary with keys of ignored users.
#
# { "ignored_users": "@someone:example.org": {} }
ignored_users = content.get("ignored_users", {})
if isinstance(ignored_users, dict) and ignored_users:
cur.executemany(insert_sql, [(user_id, u) for u in ignored_users])
# Add indexes after inserting data for efficiency.
logger.info("Adding constraints to ignored_users table")
execute_statements_from_stream(cur, StringIO(_constraints_commands))
# there might be duplicates, so the easiest way to achieve this is to create a new
# table with the right data, and renaming it into place
_create_commands = """
-- Users which are ignored when calculating push notifications. This data is
-- denormalized from account data.
CREATE TABLE IF NOT EXISTS ignored_users(
ignorer_user_id TEXT NOT NULL, -- The user ID of the user who is ignoring another user. (This is a local user.)
ignored_user_id TEXT NOT NULL -- The user ID of the user who is being ignored. (This is a local or remote user.)
);
"""
_constraints_commands = """
CREATE UNIQUE INDEX ignored_users_uniqueness ON ignored_users (ignorer_user_id, ignored_user_id);
-- Add an index on ignored_users since look-ups are done to get all ignorers of an ignored user.
CREATE INDEX ignored_users_ignored_user_id ON ignored_users (ignored_user_id);
"""

View File

@ -38,7 +38,7 @@ logger = logging.getLogger(__name__)
# XXX: If you're about to bump this to 59 (or higher) please create an update # XXX: If you're about to bump this to 59 (or higher) please create an update
# that drops the unused `cache_invalidation_stream` table, as per #7436! # that drops the unused `cache_invalidation_stream` table, as per #7436!
# XXX: Also add an update to drop `account_data_max_stream_id` as per #7656! # XXX: Also add an update to drop `account_data_max_stream_id` as per #7656!
SCHEMA_VERSION = 58 SCHEMA_VERSION = 59
dir_path = os.path.abspath(os.path.dirname(__file__)) dir_path = os.path.abspath(os.path.dirname(__file__))

View File

@ -0,0 +1,120 @@
# -*- coding: utf-8 -*-
# Copyright 2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Iterable, Set
from synapse.api.constants import AccountDataTypes
from tests import unittest
class IgnoredUsersTestCase(unittest.HomeserverTestCase):
def prepare(self, hs, reactor, clock):
self.store = self.hs.get_datastore()
self.user = "@user:test"
def _update_ignore_list(
self, *ignored_user_ids: Iterable[str], ignorer_user_id: str = None
) -> None:
"""Update the account data to block the given users."""
if ignorer_user_id is None:
ignorer_user_id = self.user
self.get_success(
self.store.add_account_data_for_user(
ignorer_user_id,
AccountDataTypes.IGNORED_USER_LIST,
{"ignored_users": {u: {} for u in ignored_user_ids}},
)
)
def assert_ignorers(
self, ignored_user_id: str, expected_ignorer_user_ids: Set[str]
) -> None:
self.assertEqual(
self.get_success(self.store.ignored_by(ignored_user_id)),
expected_ignorer_user_ids,
)
def test_ignoring_users(self):
"""Basic adding/removing of users from the ignore list."""
self._update_ignore_list("@other:test", "@another:remote")
# Check a user which no one ignores.
self.assert_ignorers("@user:test", set())
# Check a local user which is ignored.
self.assert_ignorers("@other:test", {self.user})
# Check a remote user which is ignored.
self.assert_ignorers("@another:remote", {self.user})
# Add one user, remove one user, and leave one user.
self._update_ignore_list("@foo:test", "@another:remote")
# Check the removed user.
self.assert_ignorers("@other:test", set())
# Check the added user.
self.assert_ignorers("@foo:test", {self.user})
# Check the removed user.
self.assert_ignorers("@another:remote", {self.user})
def test_caching(self):
"""Ensure that caching works properly between different users."""
# The first user ignores a user.
self._update_ignore_list("@other:test")
self.assert_ignorers("@other:test", {self.user})
# The second user ignores them.
self._update_ignore_list("@other:test", ignorer_user_id="@second:test")
self.assert_ignorers("@other:test", {self.user, "@second:test"})
# The first user un-ignores them.
self._update_ignore_list()
self.assert_ignorers("@other:test", {"@second:test"})
def test_invalid_data(self):
"""Invalid data ends up clearing out the ignored users list."""
# Add some data and ensure it is there.
self._update_ignore_list("@other:test")
self.assert_ignorers("@other:test", {self.user})
# No ignored_users key.
self.get_success(
self.store.add_account_data_for_user(
self.user, AccountDataTypes.IGNORED_USER_LIST, {},
)
)
# No one ignores the user now.
self.assert_ignorers("@other:test", set())
# Add some data and ensure it is there.
self._update_ignore_list("@other:test")
self.assert_ignorers("@other:test", {self.user})
# Invalid data.
self.get_success(
self.store.add_account_data_for_user(
self.user,
AccountDataTypes.IGNORED_USER_LIST,
{"ignored_users": "unexpected"},
)
)
# No one ignores the user now.
self.assert_ignorers("@other:test", set())