Implement push rule evaluation in Rust. (#13838)

This commit is contained in:
Erik Johnston 2022-09-29 16:12:09 +01:00 committed by GitHub
parent a466164647
commit ebd9e2dac6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 894 additions and 403 deletions

1
changelog.d/13838.misc Normal file
View File

@ -0,0 +1 @@
Port push rules to using Rust.

View File

@ -11,7 +11,9 @@ rust-version = "1.58.1"
[lib] [lib]
name = "synapse" name = "synapse"
crate-type = ["cdylib"] # We generate a `cdylib` for Python and a standard `lib` for running
# tests/benchmarks.
crate-type = ["lib", "cdylib"]
[package.metadata.maturin] [package.metadata.maturin]
# This is where we tell maturin where to place the built library. # This is where we tell maturin where to place the built library.

149
rust/benches/evaluator.rs Normal file
View File

@ -0,0 +1,149 @@
// Copyright 2022 The Matrix.org Foundation C.I.C.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(test)]
use synapse::push::{
evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, PushRules,
};
use test::Bencher;
extern crate test;
#[bench]
fn bench_match_exact(b: &mut Bencher) {
let flattened_keys = [
("type".to_string(), "m.text".to_string()),
("room_id".to_string(), "!room:server".to_string()),
("content.body".to_string(), "test message".to_string()),
]
.into_iter()
.collect();
let eval = PushRuleEvaluator::py_new(
flattened_keys,
10,
0,
Default::default(),
Default::default(),
true,
)
.unwrap();
let condition = Condition::Known(synapse::push::KnownCondition::EventMatch(
EventMatchCondition {
key: "room_id".into(),
pattern: Some("!room:server".into()),
pattern_type: None,
},
));
let matched = eval.match_condition(&condition, None, None).unwrap();
assert!(matched, "Didn't match");
b.iter(|| eval.match_condition(&condition, None, None).unwrap());
}
#[bench]
fn bench_match_word(b: &mut Bencher) {
let flattened_keys = [
("type".to_string(), "m.text".to_string()),
("room_id".to_string(), "!room:server".to_string()),
("content.body".to_string(), "test message".to_string()),
]
.into_iter()
.collect();
let eval = PushRuleEvaluator::py_new(
flattened_keys,
10,
0,
Default::default(),
Default::default(),
true,
)
.unwrap();
let condition = Condition::Known(synapse::push::KnownCondition::EventMatch(
EventMatchCondition {
key: "content.body".into(),
pattern: Some("test".into()),
pattern_type: None,
},
));
let matched = eval.match_condition(&condition, None, None).unwrap();
assert!(matched, "Didn't match");
b.iter(|| eval.match_condition(&condition, None, None).unwrap());
}
#[bench]
fn bench_match_word_miss(b: &mut Bencher) {
let flattened_keys = [
("type".to_string(), "m.text".to_string()),
("room_id".to_string(), "!room:server".to_string()),
("content.body".to_string(), "test message".to_string()),
]
.into_iter()
.collect();
let eval = PushRuleEvaluator::py_new(
flattened_keys,
10,
0,
Default::default(),
Default::default(),
true,
)
.unwrap();
let condition = Condition::Known(synapse::push::KnownCondition::EventMatch(
EventMatchCondition {
key: "content.body".into(),
pattern: Some("foobar".into()),
pattern_type: None,
},
));
let matched = eval.match_condition(&condition, None, None).unwrap();
assert!(!matched, "Didn't match");
b.iter(|| eval.match_condition(&condition, None, None).unwrap());
}
#[bench]
fn bench_eval_message(b: &mut Bencher) {
let flattened_keys = [
("type".to_string(), "m.text".to_string()),
("room_id".to_string(), "!room:server".to_string()),
("content.body".to_string(), "test message".to_string()),
]
.into_iter()
.collect();
let eval = PushRuleEvaluator::py_new(
flattened_keys,
10,
0,
Default::default(),
Default::default(),
true,
)
.unwrap();
let rules =
FilteredPushRules::py_new(PushRules::new(Vec::new()), Default::default(), false, false);
b.iter(|| eval.run(&rules, Some("bob"), Some("person")));
}

40
rust/benches/glob.rs Normal file
View File

@ -0,0 +1,40 @@
// Copyright 2022 The Matrix.org Foundation C.I.C.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(test)]
use synapse::push::utils::{glob_to_regex, GlobMatchType};
use test::Bencher;
extern crate test;
#[bench]
fn bench_whole(b: &mut Bencher) {
b.iter(|| glob_to_regex("test", GlobMatchType::Whole));
}
#[bench]
fn bench_word(b: &mut Bencher) {
b.iter(|| glob_to_regex("test", GlobMatchType::Word));
}
#[bench]
fn bench_whole_wildcard_run(b: &mut Bencher) {
b.iter(|| glob_to_regex("test***??*?*?foo", GlobMatchType::Whole));
}
#[bench]
fn bench_word_wildcard_run(b: &mut Bencher) {
b.iter(|| glob_to_regex("test***??*?*?foo", GlobMatchType::Whole));
}

View File

@ -22,7 +22,7 @@ fn main() -> Result<(), std::io::Error> {
for entry in entries { for entry in entries {
if entry.is_dir() { if entry.is_dir() {
dirs.push(entry) dirs.push(entry);
} else { } else {
paths.push(entry.to_str().expect("valid rust paths").to_string()); paths.push(entry.to_str().expect("valid rust paths").to_string());
} }

View File

@ -262,6 +262,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[
priority_class: 1, priority_class: 1,
conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelationMatch { conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelationMatch {
rel_type: Cow::Borrowed("m.thread"), rel_type: Cow::Borrowed("m.thread"),
event_type_pattern: None,
sender: None, sender: None,
sender_type: Some(Cow::Borrowed("user_id")), sender_type: Some(Cow::Borrowed("user_id")),
})]), })]),

374
rust/src/push/evaluator.rs Normal file
View File

@ -0,0 +1,374 @@
// Copyright 2022 The Matrix.org Foundation C.I.C.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{
borrow::Cow,
collections::{BTreeMap, BTreeSet},
};
use anyhow::{Context, Error};
use lazy_static::lazy_static;
use log::warn;
use pyo3::prelude::*;
use regex::Regex;
use super::{
utils::{get_glob_matcher, get_localpart_from_id, GlobMatchType},
Action, Condition, EventMatchCondition, FilteredPushRules, KnownCondition,
};
lazy_static! {
/// Used to parse the `is` clause in the room member count condition.
static ref INEQUALITY_EXPR: Regex = Regex::new(r"^([=<>]*)([0-9]+)$").expect("valid regex");
}
/// Allows running a set of push rules against a particular event.
#[pyclass]
pub struct PushRuleEvaluator {
/// A mapping of "flattened" keys to string values in the event, e.g.
/// includes things like "type" and "content.msgtype".
flattened_keys: BTreeMap<String, String>,
/// The "content.body", if any.
body: String,
/// The number of users in the room.
room_member_count: u64,
/// The `notifications` section of the current power levels in the room.
notification_power_levels: BTreeMap<String, i64>,
/// The relations related to the event as a mapping from relation type to
/// set of sender/event type 2-tuples.
relations: BTreeMap<String, BTreeSet<(String, String)>>,
/// Is running "relation" conditions enabled?
relation_match_enabled: bool,
/// The power level of the sender of the event, or None if event is an
/// outlier.
sender_power_level: Option<i64>,
}
#[pymethods]
impl PushRuleEvaluator {
/// Create a new `PushRuleEvaluator`. See struct docstring for details.
#[new]
pub fn py_new(
flattened_keys: BTreeMap<String, String>,
room_member_count: u64,
sender_power_level: Option<i64>,
notification_power_levels: BTreeMap<String, i64>,
relations: BTreeMap<String, BTreeSet<(String, String)>>,
relation_match_enabled: bool,
) -> Result<Self, Error> {
let body = flattened_keys
.get("content.body")
.cloned()
.unwrap_or_default();
Ok(PushRuleEvaluator {
flattened_keys,
body,
room_member_count,
notification_power_levels,
relations,
relation_match_enabled,
sender_power_level,
})
}
/// Run the evaluator with the given push rules, for the given user ID and
/// display name of the user.
///
/// Passing in None will skip evaluating rules matching user ID and display
/// name.
///
/// Returns the set of actions, if any, that match (filtering out any
/// `dont_notify` actions).
pub fn run(
&self,
push_rules: &FilteredPushRules,
user_id: Option<&str>,
display_name: Option<&str>,
) -> Vec<Action> {
'outer: for (push_rule, enabled) in push_rules.iter() {
if !enabled {
continue;
}
for condition in push_rule.conditions.iter() {
match self.match_condition(condition, user_id, display_name) {
Ok(true) => {}
Ok(false) => continue 'outer,
Err(err) => {
warn!("Condition match failed {err}");
continue 'outer;
}
}
}
let actions = push_rule
.actions
.iter()
// Filter out "dont_notify" actions, as we don't store them.
.filter(|a| **a != Action::DontNotify)
.cloned()
.collect();
return actions;
}
Vec::new()
}
/// Check if the given condition matches.
fn matches(
&self,
condition: Condition,
user_id: Option<&str>,
display_name: Option<&str>,
) -> bool {
match self.match_condition(&condition, user_id, display_name) {
Ok(true) => true,
Ok(false) => false,
Err(err) => {
warn!("Condition match failed {err}");
false
}
}
}
}
impl PushRuleEvaluator {
/// Match a given `Condition` for a push rule.
pub fn match_condition(
&self,
condition: &Condition,
user_id: Option<&str>,
display_name: Option<&str>,
) -> Result<bool, Error> {
let known_condition = match condition {
Condition::Known(known) => known,
Condition::Unknown(_) => {
return Ok(false);
}
};
let result = match known_condition {
KnownCondition::EventMatch(event_match) => {
self.match_event_match(event_match, user_id)?
}
KnownCondition::ContainsDisplayName => {
if let Some(dn) = display_name {
if !dn.is_empty() {
get_glob_matcher(dn, GlobMatchType::Word)?.is_match(&self.body)?
} else {
// We specifically ignore empty display names, as otherwise
// they would always match.
false
}
} else {
false
}
}
KnownCondition::RoomMemberCount { is } => {
if let Some(is) = is {
self.match_member_count(is)?
} else {
false
}
}
KnownCondition::SenderNotificationPermission { key } => {
if let Some(sender_power_level) = &self.sender_power_level {
let required_level = self
.notification_power_levels
.get(key.as_ref())
.copied()
.unwrap_or(50);
*sender_power_level >= required_level
} else {
false
}
}
KnownCondition::RelationMatch {
rel_type,
event_type_pattern,
sender,
sender_type,
} => {
self.match_relations(rel_type, sender, sender_type, user_id, event_type_pattern)?
}
};
Ok(result)
}
/// Evaluates a relation condition.
fn match_relations(
&self,
rel_type: &str,
sender: &Option<Cow<str>>,
sender_type: &Option<Cow<str>>,
user_id: Option<&str>,
event_type_pattern: &Option<Cow<str>>,
) -> Result<bool, Error> {
// First check if relation matching is enabled...
if !self.relation_match_enabled {
return Ok(false);
}
// ... and if there are any relations to match against.
let relations = if let Some(relations) = self.relations.get(rel_type) {
relations
} else {
return Ok(false);
};
// Extract the sender pattern from the condition
let sender_pattern = if let Some(sender) = sender {
Some(sender.as_ref())
} else if let Some(sender_type) = sender_type {
if sender_type == "user_id" {
if let Some(user_id) = user_id {
Some(user_id)
} else {
return Ok(false);
}
} else {
warn!("Unrecognized sender_type: {sender_type}");
return Ok(false);
}
} else {
None
};
let mut sender_compiled_pattern = if let Some(pattern) = sender_pattern {
Some(get_glob_matcher(pattern, GlobMatchType::Whole)?)
} else {
None
};
let mut type_compiled_pattern = if let Some(pattern) = event_type_pattern {
Some(get_glob_matcher(pattern, GlobMatchType::Whole)?)
} else {
None
};
for (relation_sender, event_type) in relations {
if let Some(pattern) = &mut sender_compiled_pattern {
if !pattern.is_match(relation_sender)? {
continue;
}
}
if let Some(pattern) = &mut type_compiled_pattern {
if !pattern.is_match(event_type)? {
continue;
}
}
return Ok(true);
}
Ok(false)
}
/// Evaluates a `event_match` condition.
fn match_event_match(
&self,
event_match: &EventMatchCondition,
user_id: Option<&str>,
) -> Result<bool, Error> {
let pattern = if let Some(pattern) = &event_match.pattern {
pattern
} else if let Some(pattern_type) = &event_match.pattern_type {
// The `pattern_type` can either be "user_id" or "user_localpart",
// either way if we don't have a `user_id` then the condition can't
// match.
let user_id = if let Some(user_id) = user_id {
user_id
} else {
return Ok(false);
};
match &**pattern_type {
"user_id" => user_id,
"user_localpart" => get_localpart_from_id(user_id)?,
_ => return Ok(false),
}
} else {
return Ok(false);
};
let haystack = if let Some(haystack) = self.flattened_keys.get(&*event_match.key) {
haystack
} else {
return Ok(false);
};
// For the content.body we match against "words", but for everything
// else we match against the entire value.
let match_type = if event_match.key == "content.body" {
GlobMatchType::Word
} else {
GlobMatchType::Whole
};
let mut compiled_pattern = get_glob_matcher(pattern, match_type)?;
compiled_pattern.is_match(haystack)
}
/// Match the member count against an 'is' condition
/// The `is` condition can be things like '>2', '==3' or even just '4'.
fn match_member_count(&self, is: &str) -> Result<bool, Error> {
let captures = INEQUALITY_EXPR.captures(is).context("bad 'is' clause")?;
let ineq = captures.get(1).map_or("==", |m| m.as_str());
let rhs: u64 = captures
.get(2)
.context("missing number")?
.as_str()
.parse()?;
let matches = match ineq {
"" | "==" => self.room_member_count == rhs,
"<" => self.room_member_count < rhs,
">" => self.room_member_count > rhs,
">=" => self.room_member_count >= rhs,
"<=" => self.room_member_count <= rhs,
_ => false,
};
Ok(matches)
}
}
#[test]
fn push_rule_evaluator() {
let mut flattened_keys = BTreeMap::new();
flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string());
let evaluator = PushRuleEvaluator::py_new(
flattened_keys,
10,
Some(0),
BTreeMap::new(),
BTreeMap::new(),
true,
)
.unwrap();
let result = evaluator.run(&FilteredPushRules::default(), None, Some("bob"));
assert_eq!(result.len(), 3);
}

View File

@ -42,7 +42,6 @@
//! //!
//! The set of "base rules" are the list of rules that every user has by default. A //! The set of "base rules" are the list of rules that every user has by default. A
//! user can modify their copy of the push rules in one of three ways: //! user can modify their copy of the push rules in one of three ways:
//!
//! 1. Adding a new push rule of a certain kind //! 1. Adding a new push rule of a certain kind
//! 2. Changing the actions of a base rule //! 2. Changing the actions of a base rule
//! 3. Enabling/disabling a base rule. //! 3. Enabling/disabling a base rule.
@ -58,12 +57,16 @@ use std::collections::{BTreeMap, HashMap, HashSet};
use anyhow::{Context, Error}; use anyhow::{Context, Error};
use log::warn; use log::warn;
use pyo3::prelude::*; use pyo3::prelude::*;
use pythonize::pythonize; use pythonize::{depythonize, pythonize};
use serde::de::Error as _; use serde::de::Error as _;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use self::evaluator::PushRuleEvaluator;
mod base_rules; mod base_rules;
pub mod evaluator;
pub mod utils;
/// Called when registering modules with python. /// Called when registering modules with python.
pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> { pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
@ -71,6 +74,7 @@ pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
child_module.add_class::<PushRule>()?; child_module.add_class::<PushRule>()?;
child_module.add_class::<PushRules>()?; child_module.add_class::<PushRules>()?;
child_module.add_class::<FilteredPushRules>()?; child_module.add_class::<FilteredPushRules>()?;
child_module.add_class::<PushRuleEvaluator>()?;
child_module.add_function(wrap_pyfunction!(get_base_rule_ids, m)?)?; child_module.add_function(wrap_pyfunction!(get_base_rule_ids, m)?)?;
m.add_submodule(child_module)?; m.add_submodule(child_module)?;
@ -274,6 +278,8 @@ pub enum KnownCondition {
#[serde(rename = "org.matrix.msc3772.relation_match")] #[serde(rename = "org.matrix.msc3772.relation_match")]
RelationMatch { RelationMatch {
rel_type: Cow<'static, str>, rel_type: Cow<'static, str>,
#[serde(skip_serializing_if = "Option::is_none", rename = "type")]
event_type_pattern: Option<Cow<'static, str>>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
sender: Option<Cow<'static, str>>, sender: Option<Cow<'static, str>>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
@ -287,20 +293,26 @@ impl IntoPy<PyObject> for Condition {
} }
} }
impl<'source> FromPyObject<'source> for Condition {
fn extract(ob: &'source PyAny) -> PyResult<Self> {
Ok(depythonize(ob)?)
}
}
/// The body of a [`Condition::EventMatch`] /// The body of a [`Condition::EventMatch`]
#[derive(Serialize, Deserialize, Debug, Clone)] #[derive(Serialize, Deserialize, Debug, Clone)]
pub struct EventMatchCondition { pub struct EventMatchCondition {
key: Cow<'static, str>, pub key: Cow<'static, str>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pattern: Option<Cow<'static, str>>, pub pattern: Option<Cow<'static, str>>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pattern_type: Option<Cow<'static, str>>, pub pattern_type: Option<Cow<'static, str>>,
} }
/// The collection of push rules for a user. /// The collection of push rules for a user.
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
#[pyclass(frozen)] #[pyclass(frozen)]
struct PushRules { pub struct PushRules {
/// Custom push rules that override a base rule. /// Custom push rules that override a base rule.
overridden_base_rules: HashMap<Cow<'static, str>, PushRule>, overridden_base_rules: HashMap<Cow<'static, str>, PushRule>,
@ -319,7 +331,7 @@ struct PushRules {
#[pymethods] #[pymethods]
impl PushRules { impl PushRules {
#[new] #[new]
fn new(rules: Vec<PushRule>) -> PushRules { pub fn new(rules: Vec<PushRule>) -> PushRules {
let mut push_rules: PushRules = Default::default(); let mut push_rules: PushRules = Default::default();
for rule in rules { for rule in rules {
@ -396,7 +408,7 @@ pub struct FilteredPushRules {
#[pymethods] #[pymethods]
impl FilteredPushRules { impl FilteredPushRules {
#[new] #[new]
fn py_new( pub fn py_new(
push_rules: PushRules, push_rules: PushRules,
enabled_map: BTreeMap<String, bool>, enabled_map: BTreeMap<String, bool>,
msc3786_enabled: bool, msc3786_enabled: bool,

215
rust/src/push/utils.rs Normal file
View File

@ -0,0 +1,215 @@
// Copyright 2022 The Matrix.org Foundation C.I.C.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use anyhow::bail;
use anyhow::Context;
use anyhow::Error;
use lazy_static::lazy_static;
use regex;
use regex::Regex;
use regex::RegexBuilder;
lazy_static! {
/// Matches runs of non-wildcard characters followed by wildcard characters.
static ref WILDCARD_RUN: Regex = Regex::new(r"([^\?\*]*)([\?\*]*)").expect("valid regex");
}
/// Extract the localpart from a Matrix style ID
pub(crate) fn get_localpart_from_id(id: &str) -> Result<&str, Error> {
let (localpart, _) = id
.split_once(':')
.with_context(|| format!("ID does not contain colon: {id}"))?;
// We need to strip off the first character, which is the ID type.
if localpart.is_empty() {
bail!("Invalid ID {id}");
}
Ok(&localpart[1..])
}
/// Used by `glob_to_regex` to specify what to match the regex against.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GlobMatchType {
/// The generated regex will match against the entire input.
Whole,
/// The generated regex will match against words.
Word,
}
/// Convert a "glob" style expression to a regex, anchoring either to the entire
/// input or to individual words.
pub fn glob_to_regex(glob: &str, match_type: GlobMatchType) -> Result<Regex, Error> {
let mut chunks = Vec::new();
// Patterns with wildcards must be simplified to avoid performance cliffs
// - The glob `?**?**?` is equivalent to the glob `???*`
// - The glob `???*` is equivalent to the regex `.{3,}`
for captures in WILDCARD_RUN.captures_iter(glob) {
if let Some(chunk) = captures.get(1) {
chunks.push(regex::escape(chunk.as_str()));
}
if let Some(wildcards) = captures.get(2) {
if wildcards.as_str() == "" {
continue;
}
let question_marks = wildcards.as_str().chars().filter(|c| *c == '?').count();
if wildcards.as_str().contains('*') {
chunks.push(format!(".{{{question_marks},}}"));
} else {
chunks.push(format!(".{{{question_marks}}}"));
}
}
}
let joined = chunks.join("");
let regex_str = match match_type {
GlobMatchType::Whole => format!(r"\A{joined}\z"),
// `^|\W` and `\W|$` handle the case where `pattern` starts or ends with a non-word
// character.
GlobMatchType::Word => format!(r"(?:^|\b|\W){joined}(?:\b|\W|$)"),
};
Ok(RegexBuilder::new(&regex_str)
.case_insensitive(true)
.build()?)
}
/// Compiles the glob into a `Matcher`.
pub fn get_glob_matcher(glob: &str, match_type: GlobMatchType) -> Result<Matcher, Error> {
// There are a number of shortcuts we can make if the glob doesn't contain a
// wild card.
let matcher = if glob.contains(['*', '?']) {
let regex = glob_to_regex(glob, match_type)?;
Matcher::Regex(regex)
} else if match_type == GlobMatchType::Whole {
// If there aren't any wildcards and we're matching the whole thing,
// then we simply can do a case-insensitive string match.
Matcher::Whole(glob.to_lowercase())
} else {
// Otherwise, if we're matching against words then can first check
// if the haystack contains the glob at all.
Matcher::Word {
word: glob.to_lowercase(),
regex: None,
}
};
Ok(matcher)
}
/// Matches against a glob
pub enum Matcher {
/// Plain regex matching.
Regex(Regex),
/// Case-insensitive equality.
Whole(String),
/// Word matching. `regex` is a cache of calling [`glob_to_regex`] on word.
Word { word: String, regex: Option<Regex> },
}
impl Matcher {
/// Checks if the glob matches the given haystack.
pub fn is_match(&mut self, haystack: &str) -> Result<bool, Error> {
// We want to to do case-insensitive matching, so we convert to
// lowercase first.
let haystack = haystack.to_lowercase();
match self {
Matcher::Regex(regex) => Ok(regex.is_match(&haystack)),
Matcher::Whole(whole) => Ok(whole == &haystack),
Matcher::Word { word, regex } => {
// If we're looking for a literal word, then we first check if
// the haystack contains the word as a substring.
if !haystack.contains(&*word) {
return Ok(false);
}
// If it does contain the word as a substring, then we need to
// check if it is an actual word by testing it against the regex.
let regex = if let Some(regex) = regex {
regex
} else {
let compiled_regex = glob_to_regex(word, GlobMatchType::Word)?;
regex.insert(compiled_regex)
};
Ok(regex.is_match(&haystack))
}
}
}
}
#[test]
fn test_get_domain_from_id() {
get_localpart_from_id("").unwrap_err();
get_localpart_from_id(":").unwrap_err();
get_localpart_from_id(":asd").unwrap_err();
get_localpart_from_id("::as::asad").unwrap_err();
assert_eq!(get_localpart_from_id("@test:foo").unwrap(), "test");
assert_eq!(get_localpart_from_id("@:").unwrap(), "");
assert_eq!(get_localpart_from_id("@test:foo:907").unwrap(), "test");
}
#[test]
fn tset_glob() -> Result<(), Error> {
assert_eq!(
glob_to_regex("simple", GlobMatchType::Whole)?.as_str(),
r"\Asimple\z"
);
assert_eq!(
glob_to_regex("simple*", GlobMatchType::Whole)?.as_str(),
r"\Asimple.{0,}\z"
);
assert_eq!(
glob_to_regex("simple?", GlobMatchType::Whole)?.as_str(),
r"\Asimple.{1}\z"
);
assert_eq!(
glob_to_regex("simple?*?*", GlobMatchType::Whole)?.as_str(),
r"\Asimple.{2,}\z"
);
assert_eq!(
glob_to_regex("simple???", GlobMatchType::Whole)?.as_str(),
r"\Asimple.{3}\z"
);
assert_eq!(
glob_to_regex("escape.", GlobMatchType::Whole)?.as_str(),
r"\Aescape\.\z"
);
assert!(glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simple"));
assert!(!glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simples"));
assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simples"));
assert!(glob_to_regex("simple?", GlobMatchType::Whole)?.is_match("simples"));
assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simple"));
assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("some simple."));
assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("simple"));
assert!(!glob_to_regex("simple", GlobMatchType::Word)?.is_match("simples"));
assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("Some @user:foo test"));
assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("@user:foo"));
Ok(())
}

View File

@ -1,4 +1,4 @@
from typing import Any, Collection, Dict, Mapping, Sequence, Tuple, Union from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union
from synapse.types import JsonDict from synapse.types import JsonDict
@ -35,3 +35,20 @@ class FilteredPushRules:
def rules(self) -> Collection[Tuple[PushRule, bool]]: ... def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
def get_base_rule_ids() -> Collection[str]: ... def get_base_rule_ids() -> Collection[str]: ...
class PushRuleEvaluator:
def __init__(
self,
flattened_keys: Mapping[str, str],
room_member_count: int,
sender_power_level: Optional[int],
notification_power_levels: Mapping[str, int],
relations: Mapping[str, Set[Tuple[str, str]]],
relation_match_enabled: bool,
): ...
def run(
self,
push_rules: FilteredPushRules,
user_id: Optional[str],
display_name: Optional[str],
) -> Collection[dict]: ...

View File

@ -17,6 +17,7 @@ import itertools
import logging import logging
from typing import ( from typing import (
TYPE_CHECKING, TYPE_CHECKING,
Any,
Collection, Collection,
Dict, Dict,
Iterable, Iterable,
@ -37,13 +38,11 @@ from synapse.events.snapshot import EventContext
from synapse.state import POWER_KEY from synapse.state import POWER_KEY
from synapse.storage.databases.main.roommember import EventIdMembership from synapse.storage.databases.main.roommember import EventIdMembership
from synapse.storage.state import StateFilter from synapse.storage.state import StateFilter
from synapse.synapse_rust.push import FilteredPushRules, PushRule from synapse.synapse_rust.push import FilteredPushRules, PushRule, PushRuleEvaluator
from synapse.util.caches import register_cache from synapse.util.caches import register_cache
from synapse.util.metrics import measure_func from synapse.util.metrics import measure_func
from synapse.visibility import filter_event_for_clients_with_state from synapse.visibility import filter_event_for_clients_with_state
from .push_rule_evaluator import PushRuleEvaluatorForEvent
if TYPE_CHECKING: if TYPE_CHECKING:
from synapse.server import HomeServer from synapse.server import HomeServer
@ -290,11 +289,11 @@ class BulkPushRuleEvaluator:
if relation.rel_type == RelationTypes.THREAD: if relation.rel_type == RelationTypes.THREAD:
thread_id = relation.parent_id thread_id = relation.parent_id
evaluator = PushRuleEvaluatorForEvent( evaluator = PushRuleEvaluator(
event, _flatten_dict(event),
room_member_count, room_member_count,
sender_power_level, sender_power_level,
power_levels, power_levels.get("notifications", {}),
relations, relations,
self._relations_match_enabled, self._relations_match_enabled,
) )
@ -338,17 +337,10 @@ class BulkPushRuleEvaluator:
# current user, it'll be added to the dict later. # current user, it'll be added to the dict later.
actions_by_user[uid] = [] actions_by_user[uid] = []
for rule, enabled in rules.rules(): actions = evaluator.run(rules, uid, display_name)
if not enabled: if "notify" in actions:
continue # Push rules say we should notify the user of this event
actions_by_user[uid] = actions
matches = evaluator.check_conditions(rule.conditions, uid, display_name)
if matches:
actions = [x for x in rule.actions if x != "dont_notify"]
if actions and "notify" in actions:
# Push rules say we should notify the user of this event
actions_by_user[uid] = actions
break
# Mark in the DB staging area the push actions for users who should be # Mark in the DB staging area the push actions for users who should be
# notified for this event. (This will then get handled when we persist # notified for this event. (This will then get handled when we persist
@ -365,3 +357,21 @@ MemberMap = Dict[str, Optional[EventIdMembership]]
Rule = Dict[str, dict] Rule = Dict[str, dict]
RulesByUser = Dict[str, List[Rule]] RulesByUser = Dict[str, List[Rule]]
StateGroup = Union[object, int] StateGroup = Union[object, int]
def _flatten_dict(
d: Union[EventBase, Mapping[str, Any]],
prefix: Optional[List[str]] = None,
result: Optional[Dict[str, str]] = None,
) -> Dict[str, str]:
if prefix is None:
prefix = []
if result is None:
result = {}
for key, value in d.items():
if isinstance(value, str):
result[".".join(prefix + [key])] = value.lower()
elif isinstance(value, Mapping):
_flatten_dict(value, prefix=(prefix + [key]), result=result)
return result

View File

@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
import logging import logging
import urllib.parse import urllib.parse
from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Union from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
from prometheus_client import Counter from prometheus_client import Counter
@ -28,7 +28,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.push import Pusher, PusherConfig, PusherConfigException from synapse.push import Pusher, PusherConfig, PusherConfigException
from synapse.storage.databases.main.event_push_actions import HttpPushAction from synapse.storage.databases.main.event_push_actions import HttpPushAction
from . import push_rule_evaluator, push_tools from . import push_tools
if TYPE_CHECKING: if TYPE_CHECKING:
from synapse.server import HomeServer from synapse.server import HomeServer
@ -56,6 +56,39 @@ http_badges_failed_counter = Counter(
) )
def tweaks_for_actions(actions: List[Union[str, Dict]]) -> Dict[str, Any]:
"""
Converts a list of actions into a `tweaks` dict (which can then be passed to
the push gateway).
This function ignores all actions other than `set_tweak` actions, and treats
absent `value`s as `True`, which agrees with the only spec-defined treatment
of absent `value`s (namely, for `highlight` tweaks).
Args:
actions: list of actions
e.g. [
{"set_tweak": "a", "value": "AAA"},
{"set_tweak": "b", "value": "BBB"},
{"set_tweak": "highlight"},
"notify"
]
Returns:
dictionary of tweaks for those actions
e.g. {"a": "AAA", "b": "BBB", "highlight": True}
"""
tweaks = {}
for a in actions:
if not isinstance(a, dict):
continue
if "set_tweak" in a:
# value is allowed to be absent in which case the value assumed
# should be True.
tweaks[a["set_tweak"]] = a.get("value", True)
return tweaks
class HttpPusher(Pusher): class HttpPusher(Pusher):
INITIAL_BACKOFF_SEC = 1 # in seconds because that's what Twisted takes INITIAL_BACKOFF_SEC = 1 # in seconds because that's what Twisted takes
MAX_BACKOFF_SEC = 60 * 60 MAX_BACKOFF_SEC = 60 * 60
@ -281,7 +314,7 @@ class HttpPusher(Pusher):
if "notify" not in push_action.actions: if "notify" not in push_action.actions:
return True return True
tweaks = push_rule_evaluator.tweaks_for_actions(push_action.actions) tweaks = tweaks_for_actions(push_action.actions)
badge = await push_tools.get_badge_count( badge = await push_tools.get_badge_count(
self.hs.get_datastores().main, self.hs.get_datastores().main,
self.user_id, self.user_id,

View File

@ -1,361 +0,0 @@
# Copyright 2015, 2016 OpenMarket Ltd
# Copyright 2017 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import re
from typing import (
Any,
Dict,
List,
Mapping,
Optional,
Pattern,
Sequence,
Set,
Tuple,
Union,
)
from matrix_common.regex import glob_to_regex, to_word_pattern
from synapse.events import EventBase
from synapse.types import UserID
from synapse.util.caches.lrucache import LruCache
logger = logging.getLogger(__name__)
GLOB_REGEX = re.compile(r"\\\[(\\\!|)(.*)\\\]")
IS_GLOB = re.compile(r"[\?\*\[\]]")
INEQUALITY_EXPR = re.compile("^([=<>]*)([0-9]*)$")
def _room_member_count(condition: Mapping[str, Any], room_member_count: int) -> bool:
return _test_ineq_condition(condition, room_member_count)
def _sender_notification_permission(
condition: Mapping[str, Any],
sender_power_level: Optional[int],
power_levels: Dict[str, Union[int, Dict[str, int]]],
) -> bool:
if sender_power_level is None:
return False
notif_level_key = condition.get("key")
if notif_level_key is None:
return False
notif_levels = power_levels.get("notifications", {})
assert isinstance(notif_levels, dict)
room_notif_level = notif_levels.get(notif_level_key, 50)
return sender_power_level >= room_notif_level
def _test_ineq_condition(condition: Mapping[str, Any], number: int) -> bool:
if "is" not in condition:
return False
m = INEQUALITY_EXPR.match(condition["is"])
if not m:
return False
ineq = m.group(1)
rhs = m.group(2)
if not rhs.isdigit():
return False
rhs_int = int(rhs)
if ineq == "" or ineq == "==":
return number == rhs_int
elif ineq == "<":
return number < rhs_int
elif ineq == ">":
return number > rhs_int
elif ineq == ">=":
return number >= rhs_int
elif ineq == "<=":
return number <= rhs_int
else:
return False
def tweaks_for_actions(actions: List[Union[str, Dict]]) -> Dict[str, Any]:
"""
Converts a list of actions into a `tweaks` dict (which can then be passed to
the push gateway).
This function ignores all actions other than `set_tweak` actions, and treats
absent `value`s as `True`, which agrees with the only spec-defined treatment
of absent `value`s (namely, for `highlight` tweaks).
Args:
actions: list of actions
e.g. [
{"set_tweak": "a", "value": "AAA"},
{"set_tweak": "b", "value": "BBB"},
{"set_tweak": "highlight"},
"notify"
]
Returns:
dictionary of tweaks for those actions
e.g. {"a": "AAA", "b": "BBB", "highlight": True}
"""
tweaks = {}
for a in actions:
if not isinstance(a, dict):
continue
if "set_tweak" in a:
# value is allowed to be absent in which case the value assumed
# should be True.
tweaks[a["set_tweak"]] = a.get("value", True)
return tweaks
class PushRuleEvaluatorForEvent:
def __init__(
self,
event: EventBase,
room_member_count: int,
sender_power_level: Optional[int],
power_levels: Dict[str, Union[int, Dict[str, int]]],
relations: Dict[str, Set[Tuple[str, str]]],
relations_match_enabled: bool,
):
self._event = event
self._room_member_count = room_member_count
self._sender_power_level = sender_power_level
self._power_levels = power_levels
self._relations = relations
self._relations_match_enabled = relations_match_enabled
# Maps strings of e.g. 'content.body' -> event["content"]["body"]
self._value_cache = _flatten_dict(event)
# Maps cache keys to final values.
self._condition_cache: Dict[str, bool] = {}
def check_conditions(
self, conditions: Sequence[Mapping], uid: str, display_name: Optional[str]
) -> bool:
"""
Returns true if a user's conditions/user ID/display name match the event.
Args:
conditions: The user's conditions to match.
uid: The user's MXID.
display_name: The display name.
Returns:
True if all conditions match the event, False otherwise.
"""
for cond in conditions:
_cache_key = cond.get("_cache_key", None)
if _cache_key:
res = self._condition_cache.get(_cache_key, None)
if res is False:
return False
elif res is True:
continue
res = self.matches(cond, uid, display_name)
if _cache_key:
self._condition_cache[_cache_key] = bool(res)
if not res:
return False
return True
def matches(
self, condition: Mapping[str, Any], user_id: str, display_name: Optional[str]
) -> bool:
"""
Returns true if a user's condition/user ID/display name match the event.
Args:
condition: The user's condition to match.
uid: The user's MXID.
display_name: The display name, or None if there is not one.
Returns:
True if the condition matches the event, False otherwise.
"""
if condition["kind"] == "event_match":
return self._event_match(condition, user_id)
elif condition["kind"] == "contains_display_name":
return self._contains_display_name(display_name)
elif condition["kind"] == "room_member_count":
return _room_member_count(condition, self._room_member_count)
elif condition["kind"] == "sender_notification_permission":
return _sender_notification_permission(
condition, self._sender_power_level, self._power_levels
)
elif (
condition["kind"] == "org.matrix.msc3772.relation_match"
and self._relations_match_enabled
):
return self._relation_match(condition, user_id)
else:
# XXX This looks incorrect -- we have reached an unknown condition
# kind and are unconditionally returning that it matches. Note
# that it seems possible to provide a condition to the /pushrules
# endpoint with an unknown kind, see _rule_tuple_from_request_object.
return True
def _event_match(self, condition: Mapping, user_id: str) -> bool:
"""
Check an "event_match" push rule condition.
Args:
condition: The "event_match" push rule condition to match.
user_id: The user's MXID.
Returns:
True if the condition matches the event, False otherwise.
"""
pattern = condition.get("pattern", None)
if not pattern:
pattern_type = condition.get("pattern_type", None)
if pattern_type == "user_id":
pattern = user_id
elif pattern_type == "user_localpart":
pattern = UserID.from_string(user_id).localpart
if not pattern:
logger.warning("event_match condition with no pattern")
return False
# XXX: optimisation: cache our pattern regexps
if condition["key"] == "content.body":
body = self._event.content.get("body", None)
if not body or not isinstance(body, str):
return False
return _glob_matches(pattern, body, word_boundary=True)
else:
haystack = self._value_cache.get(condition["key"], None)
if haystack is None:
return False
return _glob_matches(pattern, haystack)
def _contains_display_name(self, display_name: Optional[str]) -> bool:
"""
Check an "event_match" push rule condition.
Args:
display_name: The display name, or None if there is not one.
Returns:
True if the display name is found in the event body, False otherwise.
"""
if not display_name:
return False
body = self._event.content.get("body", None)
if not body or not isinstance(body, str):
return False
# Similar to _glob_matches, but do not treat display_name as a glob.
r = regex_cache.get((display_name, False, True), None)
if not r:
r1 = re.escape(display_name)
r1 = to_word_pattern(r1)
r = re.compile(r1, flags=re.IGNORECASE)
regex_cache[(display_name, False, True)] = r
return bool(r.search(body))
def _relation_match(self, condition: Mapping, user_id: str) -> bool:
"""
Check an "relation_match" push rule condition.
Args:
condition: The "event_match" push rule condition to match.
user_id: The user's MXID.
Returns:
True if the condition matches the event, False otherwise.
"""
rel_type = condition.get("rel_type")
if not rel_type:
logger.warning("relation_match condition missing rel_type")
return False
sender_pattern = condition.get("sender")
if sender_pattern is None:
sender_type = condition.get("sender_type")
if sender_type == "user_id":
sender_pattern = user_id
type_pattern = condition.get("type")
# If any other relations matches, return True.
for sender, event_type in self._relations.get(rel_type, ()):
if sender_pattern and not _glob_matches(sender_pattern, sender):
continue
if type_pattern and not _glob_matches(type_pattern, event_type):
continue
# All values must have matched.
return True
# No relations matched.
return False
# Caches (string, is_glob, word_boundary) -> regex for push. See _glob_matches
regex_cache: LruCache[Tuple[str, bool, bool], Pattern] = LruCache(
50000, "regex_push_cache"
)
def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool:
"""Tests if value matches glob.
Args:
glob
value: String to test against glob.
word_boundary: Whether to match against word boundaries or entire
string. Defaults to False.
"""
try:
r = regex_cache.get((glob, True, word_boundary), None)
if not r:
r = glob_to_regex(glob, word_boundary=word_boundary)
regex_cache[(glob, True, word_boundary)] = r
return bool(r.search(value))
except re.error:
logger.warning("Failed to parse glob to regex: %r", glob)
return False
def _flatten_dict(
d: Union[EventBase, Mapping[str, Any]],
prefix: Optional[List[str]] = None,
result: Optional[Dict[str, str]] = None,
) -> Dict[str, str]:
if prefix is None:
prefix = []
if result is None:
result = {}
for key, value in d.items():
if isinstance(value, str):
result[".".join(prefix + [key])] = value.lower()
elif isinstance(value, Mapping):
_flatten_dict(value, prefix=(prefix + [key]), result=result)
return result

View File

@ -23,11 +23,12 @@ from synapse.api.constants import EventTypes, Membership
from synapse.api.room_versions import RoomVersions from synapse.api.room_versions import RoomVersions
from synapse.appservice import ApplicationService from synapse.appservice import ApplicationService
from synapse.events import FrozenEvent from synapse.events import FrozenEvent
from synapse.push import push_rule_evaluator from synapse.push.bulk_push_rule_evaluator import _flatten_dict
from synapse.push.push_rule_evaluator import PushRuleEvaluatorForEvent from synapse.push.httppusher import tweaks_for_actions
from synapse.rest.client import login, register, room from synapse.rest.client import login, register, room
from synapse.server import HomeServer from synapse.server import HomeServer
from synapse.storage.databases.main.appservice import _make_exclusive_regex from synapse.storage.databases.main.appservice import _make_exclusive_regex
from synapse.synapse_rust.push import PushRuleEvaluator
from synapse.types import JsonDict from synapse.types import JsonDict
from synapse.util import Clock from synapse.util import Clock
@ -41,7 +42,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
content: JsonDict, content: JsonDict,
relations: Optional[Dict[str, Set[Tuple[str, str]]]] = None, relations: Optional[Dict[str, Set[Tuple[str, str]]]] = None,
relations_match_enabled: bool = False, relations_match_enabled: bool = False,
) -> PushRuleEvaluatorForEvent: ) -> PushRuleEvaluator:
event = FrozenEvent( event = FrozenEvent(
{ {
"event_id": "$event_id", "event_id": "$event_id",
@ -56,12 +57,12 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
room_member_count = 0 room_member_count = 0
sender_power_level = 0 sender_power_level = 0
power_levels: Dict[str, Union[int, Dict[str, int]]] = {} power_levels: Dict[str, Union[int, Dict[str, int]]] = {}
return PushRuleEvaluatorForEvent( return PushRuleEvaluator(
event, _flatten_dict(event),
room_member_count, room_member_count,
sender_power_level, sender_power_level,
power_levels, power_levels.get("notifications", {}),
relations or set(), relations or {},
relations_match_enabled, relations_match_enabled,
) )
@ -293,7 +294,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
] ]
self.assertEqual( self.assertEqual(
push_rule_evaluator.tweaks_for_actions(actions), tweaks_for_actions(actions),
{"sound": "default", "highlight": True}, {"sound": "default", "highlight": True},
) )
@ -304,9 +305,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
evaluator = self._get_evaluator( evaluator = self._get_evaluator(
{}, {"m.annotation": {("@user:test", "m.reaction")}} {}, {"m.annotation": {("@user:test", "m.reaction")}}
) )
condition = {"kind": "relation_match"}
# Oddly, an unknown condition always matches.
self.assertTrue(evaluator.matches(condition, "@user:test", "foo"))
# A push rule evaluator with the experimental rule enabled. # A push rule evaluator with the experimental rule enabled.
evaluator = self._get_evaluator( evaluator = self._get_evaluator(