forked-synapse/synapse/push/bulk_push_rule_evaluator.py
Quentin Gliech 7d52ce7d4b
Format files with Ruff (#17643)
I thought ruff check would also format, but it doesn't.

This runs ruff format in CI and dev scripts. The first commit is just a
run of `ruff format .` in the root directory.
2024-09-02 12:39:04 +01:00

584 lines
23 KiB
Python

#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2015 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
import logging
from typing import (
TYPE_CHECKING,
Any,
Collection,
Dict,
List,
Mapping,
Optional,
Sequence,
Tuple,
Union,
cast,
)
from prometheus_client import Counter
from twisted.internet.defer import Deferred
from synapse.api.constants import (
MAIN_TIMELINE,
EventContentFields,
EventTypes,
Membership,
RelationTypes,
)
from synapse.api.room_versions import PushRuleRoomFlag
from synapse.event_auth import auth_types_for_event, get_user_power_level
from synapse.events import EventBase, relation_from_event
from synapse.events.snapshot import EventContext
from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.state import POWER_KEY
from synapse.storage.databases.main.roommember import EventIdMembership
from synapse.storage.roommember import ProfileInfo
from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator
from synapse.types import JsonValue
from synapse.types.state import StateFilter
from synapse.util import unwrapFirstError
from synapse.util.async_helpers import gather_results
from synapse.util.caches import register_cache
from synapse.util.metrics import measure_func
from synapse.visibility import filter_event_for_clients_with_state
if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__)
push_rules_invalidation_counter = Counter(
"synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter", ""
)
push_rules_state_size_counter = Counter(
"synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter", ""
)
STATE_EVENT_TYPES_TO_MARK_UNREAD = {
EventTypes.Topic,
EventTypes.Name,
EventTypes.RoomAvatar,
EventTypes.Tombstone,
}
SENTINEL = object()
def _should_count_as_unread(event: EventBase, context: EventContext) -> bool:
# Exclude rejected and soft-failed events.
if context.rejected or event.internal_metadata.is_soft_failed():
return False
# Exclude notices.
if (
not event.is_state()
and event.type == EventTypes.Message
and event.content.get("msgtype") == "m.notice"
):
return False
# Exclude edits.
relates_to = relation_from_event(event)
if relates_to and relates_to.rel_type == RelationTypes.REPLACE:
return False
# Mark events that have a non-empty string body as unread.
body = event.content.get("body")
if isinstance(body, str) and body:
return True
# Mark some state events as unread.
if event.is_state() and event.type in STATE_EVENT_TYPES_TO_MARK_UNREAD:
return True
# Mark encrypted events as unread.
if not event.is_state() and event.type == EventTypes.Encrypted:
return True
return False
class BulkPushRuleEvaluator:
"""Calculates the outcome of push rules for an event for all users in the
room at once.
"""
def __init__(self, hs: "HomeServer"):
self.hs = hs
self.store = hs.get_datastores().main
self.clock = hs.get_clock()
self._event_auth_handler = hs.get_event_auth_handler()
self.should_calculate_push_rules = self.hs.config.push.enable_push
self._related_event_match_enabled = self.hs.config.experimental.msc3664_enabled
self.room_push_rule_cache_metrics = register_cache(
"cache",
"room_push_rule_cache",
cache=[], # Meaningless size, as this isn't a cache that stores values,
resizable=False,
)
async def _get_rules_for_event(
self,
event: EventBase,
) -> Mapping[str, FilteredPushRules]:
"""Get the push rules for all users who may need to be notified about
the event.
Note: this does not check if the user is allowed to see the event.
Returns:
Mapping of user ID to their push rules.
"""
# If this is a membership event, only calculate push rules for the target.
# While it's possible for users to configure push rules to respond to such an
# event, in practise nobody does this. At the cost of violating the spec a
# little, we can skip fetching a huge number of push rules in large rooms.
# This helps make joins and leaves faster.
if event.type == EventTypes.Member:
local_users: Sequence[str] = []
# We never notify a user about their own actions. This is enforced in
# `_action_for_event_by_user` in the loop over `rules_by_user`, but we
# do the same check here to avoid unnecessary DB queries.
if event.sender != event.state_key and self.hs.is_mine_id(event.state_key):
# Check the target is in the room, to avoid notifying them of
# e.g. a pre-emptive ban.
target_already_in_room = await self.store.check_local_user_in_room(
event.state_key, event.room_id
)
if target_already_in_room:
local_users = [event.state_key]
else:
# We get the users who may need to be notified by first fetching the
# local users currently in the room, finding those that have push rules,
# and *then* checking which users are actually allowed to see the event.
#
# The alternative is to first fetch all users that were joined at the
# event, but that requires fetching the full state at the event, which
# may be expensive for large rooms with few local users.
local_users = await self.store.get_local_users_in_room(event.room_id)
# Filter out appservice users.
local_users = [
u
for u in local_users
if not self.store.get_if_app_services_interested_in_user(u)
]
# if this event is an invite event, we may need to run rules for the user
# who's been invited, otherwise they won't get told they've been invited
if event.type == EventTypes.Member and event.membership == Membership.INVITE:
invited = event.state_key
if invited and self.hs.is_mine_id(invited) and invited not in local_users:
local_users.append(invited)
if not local_users:
return {}
rules_by_user = await self.store.bulk_get_push_rules(local_users)
logger.debug("Users in room: %s", local_users)
if logger.isEnabledFor(logging.DEBUG):
logger.debug(
"Returning push rules for %r %r",
event.room_id,
list(rules_by_user.keys()),
)
return rules_by_user
async def _get_power_levels_and_sender_level(
self,
event: EventBase,
context: EventContext,
event_id_to_event: Mapping[str, EventBase],
) -> Tuple[dict, Optional[int]]:
"""
Given an event and an event context, get the power level event relevant to the event
and the power level of the sender of the event.
Args:
event: event to check
context: context of event to check
event_id_to_event: a mapping of event_id to event for a set of events being
batch persisted. This is needed as the sought-after power level event may
be in this batch rather than the DB
"""
# There are no power levels and sender levels possible to get from outlier
if event.internal_metadata.is_outlier():
return {}, None
event_types = auth_types_for_event(event.room_version, event)
prev_state_ids = await context.get_prev_state_ids(
StateFilter.from_types(event_types)
)
pl_event_id = prev_state_ids.get(POWER_KEY)
# fastpath: if there's a power level event, that's all we need, and
# not having a power level event is an extreme edge case
if pl_event_id:
# Get the power level event from the batch, or fall back to the database.
pl_event = event_id_to_event.get(pl_event_id)
if pl_event:
auth_events = {POWER_KEY: pl_event}
else:
auth_events = {POWER_KEY: await self.store.get_event(pl_event_id)}
else:
auth_events_ids = self._event_auth_handler.compute_auth_events(
event, prev_state_ids, for_verification=False
)
auth_events_dict = await self.store.get_events(auth_events_ids)
# Some needed auth events might be in the batch, combine them with those
# fetched from the database.
for auth_event_id in auth_events_ids:
auth_event = event_id_to_event.get(auth_event_id)
if auth_event:
auth_events_dict[auth_event_id] = auth_event
auth_events = {(e.type, e.state_key): e for e in auth_events_dict.values()}
sender_level = get_user_power_level(event.sender, auth_events)
pl_event = auth_events.get(POWER_KEY)
return pl_event.content if pl_event else {}, sender_level
async def _related_events(
self, event: EventBase
) -> Dict[str, Dict[str, JsonValue]]:
"""Fetches the related events for 'event'. Sets the im.vector.is_falling_back key if the event is from a fallback relation
Returns:
Mapping of relation type to flattened events.
"""
related_events: Dict[str, Dict[str, JsonValue]] = {}
if self._related_event_match_enabled:
related_event_id = event.content.get("m.relates_to", {}).get("event_id")
relation_type = event.content.get("m.relates_to", {}).get("rel_type")
if related_event_id is not None and relation_type is not None:
related_event = await self.store.get_event(
related_event_id, allow_none=True
)
if related_event is not None:
related_events[relation_type] = _flatten_dict(related_event)
reply_event_id = (
event.content.get("m.relates_to", {})
.get("m.in_reply_to", {})
.get("event_id")
)
# convert replies to pseudo relations
if reply_event_id is not None:
related_event = await self.store.get_event(
reply_event_id, allow_none=True
)
if related_event is not None:
related_events["m.in_reply_to"] = _flatten_dict(related_event)
# indicate that this is from a fallback relation.
if relation_type == "m.thread" and event.content.get(
"m.relates_to", {}
).get("is_falling_back", False):
related_events["m.in_reply_to"]["im.vector.is_falling_back"] = (
""
)
return related_events
async def action_for_events_by_user(
self, events_and_context: List[Tuple[EventBase, EventContext]]
) -> None:
"""Given a list of events and their associated contexts, evaluate the push rules
for each event, check if the message should increment the unread count, and
insert the results into the event_push_actions_staging table.
"""
if not self.should_calculate_push_rules:
return
# For batched events the power level events may not have been persisted yet,
# so we pass in the batched events. Thus if the event cannot be found in the
# database we can check in the batch.
event_id_to_event = {e.event_id: e for e, _ in events_and_context}
for event, context in events_and_context:
await self._action_for_event_by_user(event, context, event_id_to_event)
@measure_func("action_for_event_by_user")
async def _action_for_event_by_user(
self,
event: EventBase,
context: EventContext,
event_id_to_event: Mapping[str, EventBase],
) -> None:
if (
not event.internal_metadata.is_notifiable()
or event.room_id in self.hs.config.server.rooms_to_exclude_from_sync
):
# Push rules for events that aren't notifiable can't be processed by this and
# we want to skip push notification actions for historical messages
# because we don't want to notify people about old history back in time.
# The historical messages also do not have the proper `context.current_state_ids`
# and `state_groups` because they have `prev_events` that aren't persisted yet
# (historical messages persisted in reverse-chronological order).
return
# Disable counting as unread unless the experimental configuration is
# enabled, as it can cause additional (unwanted) rows to be added to the
# event_push_actions table.
count_as_unread = False
if self.hs.config.experimental.msc2654_enabled:
count_as_unread = _should_count_as_unread(event, context)
rules_by_user = await self._get_rules_for_event(event)
actions_by_user: Dict[str, Collection[Union[Mapping, str]]] = {}
# Gather a bunch of info in parallel.
#
# This has a lot of ignored types and casting due to the use of @cached
# decorated functions passed into run_in_background.
#
# See https://github.com/matrix-org/synapse/issues/16606
(
room_member_count,
(power_levels, sender_power_level),
related_events,
profiles,
) = await make_deferred_yieldable(
cast(
"Deferred[Tuple[int, Tuple[dict, Optional[int]], Dict[str, Dict[str, JsonValue]], Mapping[str, ProfileInfo]]]",
gather_results(
(
run_in_background( # type: ignore[call-arg]
self.store.get_number_joined_users_in_room,
event.room_id, # type: ignore[arg-type]
),
run_in_background(
self._get_power_levels_and_sender_level,
event,
context,
event_id_to_event,
),
run_in_background(self._related_events, event),
run_in_background( # type: ignore[call-arg]
self.store.get_subset_users_in_room_with_profiles,
event.room_id, # type: ignore[arg-type]
rules_by_user.keys(), # type: ignore[arg-type]
),
),
consumeErrors=True,
).addErrback(unwrapFirstError),
)
)
# Find the event's thread ID.
relation = relation_from_event(event)
# If the event does not have a relation, then it cannot have a thread ID.
thread_id = MAIN_TIMELINE
if relation:
# Recursively attempt to find the thread this event relates to.
if relation.rel_type == RelationTypes.THREAD:
thread_id = relation.parent_id
else:
# Since the event has not yet been persisted we check whether
# the parent is part of a thread.
thread_id = await self.store.get_thread_id(relation.parent_id)
# It's possible that old room versions have non-integer power levels (floats or
# strings; even the occasional `null`). For old rooms, we interpret these as if
# they were integers. Do this here for the `@room` power level threshold.
# Note that this is done automatically for the sender's power level by
# _get_power_levels_and_sender_level in its call to get_user_power_level
# (even for room V10.)
notification_levels = power_levels.get("notifications", {})
if not event.room_version.enforce_int_power_levels:
keys = list(notification_levels.keys())
for key in keys:
level = notification_levels.get(key, SENTINEL)
if level is not SENTINEL and type(level) is not int: # noqa: E721
try:
notification_levels[key] = int(level)
except (TypeError, ValueError):
del notification_levels[key]
# Pull out any user and room mentions.
has_mentions = EventContentFields.MENTIONS in event.content
evaluator = PushRuleEvaluator(
_flatten_dict(event),
has_mentions,
room_member_count,
sender_power_level,
notification_levels,
related_events,
self._related_event_match_enabled,
event.room_version.msc3931_push_features,
self.hs.config.experimental.msc1767_enabled, # MSC3931 flag
)
for uid, rules in rules_by_user.items():
if event.sender == uid:
continue
display_name = None
profile = profiles.get(uid)
if profile:
display_name = profile.display_name
if not display_name:
# Handle the case where we are pushing a membership event to
# that user, as they might not be already joined.
if event.type == EventTypes.Member and event.state_key == uid:
display_name = event.content.get("displayname", None)
if not isinstance(display_name, str):
display_name = None
if count_as_unread:
# Add an element for the current user if the event needs to be marked as
# unread, so that add_push_actions_to_staging iterates over it.
# If the event shouldn't be marked as unread but should notify the
# current user, it'll be added to the dict later.
actions_by_user[uid] = []
actions = evaluator.run(rules, uid, display_name)
if "notify" in actions:
# Push rules say we should notify the user of this event
actions_by_user[uid] = actions
# If there aren't any actions then we can skip the rest of the
# processing.
if not actions_by_user:
return
# This is a check for the case where user joins a room without being
# allowed to see history, and then the server receives a delayed event
# from before the user joined, which they should not be pushed for
#
# We do this *after* calculating the push actions as a) its unlikely
# that we'll filter anyone out and b) for large rooms its likely that
# most users will have push disabled and so the set of users to check is
# much smaller.
uids_with_visibility = await filter_event_for_clients_with_state(
self.store, actions_by_user.keys(), event, context
)
for user_id in set(actions_by_user).difference(uids_with_visibility):
actions_by_user.pop(user_id, None)
# Mark in the DB staging area the push actions for users who should be
# notified for this event. (This will then get handled when we persist
# the event)
await self.store.add_push_actions_to_staging(
event.event_id,
actions_by_user,
count_as_unread,
thread_id,
)
MemberMap = Dict[str, Optional[EventIdMembership]]
Rule = Dict[str, dict]
RulesByUser = Dict[str, List[Rule]]
StateGroup = Union[object, int]
def _is_simple_value(value: Any) -> bool:
return (
isinstance(value, (bool, str))
or type(value) is int # noqa: E721
or value is None
)
def _flatten_dict(
d: Union[EventBase, Mapping[str, Any]],
prefix: Optional[List[str]] = None,
result: Optional[Dict[str, JsonValue]] = None,
) -> Dict[str, JsonValue]:
"""
Given a JSON dictionary (or event) which might contain sub dictionaries,
flatten it into a single layer dictionary by combining the keys & sub-keys.
String, integer, boolean, null or lists of those values are kept. All others are dropped.
Transforms:
{"foo": {"bar": "test"}}
To:
{"foo.bar": "test"}
Args:
d: The event or content to continue flattening.
prefix: The key prefix (from outer dictionaries).
result: The result to mutate.
Returns:
The resulting dictionary.
"""
if prefix is None:
prefix = []
if result is None:
result = {}
for key, value in d.items():
# Escape periods in the key with a backslash (and backslashes with an
# extra backslash). This is since a period is used as a separator between
# nested fields.
key = key.replace("\\", "\\\\").replace(".", "\\.")
if _is_simple_value(value):
result[".".join(prefix + [key])] = value
elif isinstance(value, (list, tuple)):
result[".".join(prefix + [key])] = [v for v in value if _is_simple_value(v)]
elif isinstance(value, Mapping):
# do not set `room_version` due to recursion considerations below
_flatten_dict(value, prefix=(prefix + [key]), result=result)
# `room_version` should only ever be set when looking at the top level of an event
if (
isinstance(d, EventBase)
and PushRuleRoomFlag.EXTENSIBLE_EVENTS in d.room_version.msc3931_push_features
):
# Room supports extensible events: replace `content.body` with the plain text
# representation from `m.markup`, as per MSC1767.
markup = d.get("content").get("m.markup")
if d.room_version.identifier.startswith("org.matrix.msc1767."):
markup = d.get("content").get("org.matrix.msc1767.markup")
if markup is not None and isinstance(markup, list):
text = ""
for rep in markup:
if not isinstance(rep, dict):
# invalid markup - skip all processing
break
if rep.get("mimetype", "text/plain") == "text/plain":
rep_text = rep.get("body")
if rep_text is not None and isinstance(rep_text, str):
text = rep_text.lower()
break
result["content.body"] = text
return result