Implement push rule evaluation in Rust. (#13838)

This commit is contained in:
Erik Johnston 2022-09-29 16:12:09 +01:00 committed by GitHub
parent a466164647
commit ebd9e2dac6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 894 additions and 403 deletions

View file

@ -262,6 +262,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[
priority_class: 1,
conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelationMatch {
rel_type: Cow::Borrowed("m.thread"),
event_type_pattern: None,
sender: None,
sender_type: Some(Cow::Borrowed("user_id")),
})]),

374
rust/src/push/evaluator.rs Normal file
View file

@ -0,0 +1,374 @@
// Copyright 2022 The Matrix.org Foundation C.I.C.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{
borrow::Cow,
collections::{BTreeMap, BTreeSet},
};
use anyhow::{Context, Error};
use lazy_static::lazy_static;
use log::warn;
use pyo3::prelude::*;
use regex::Regex;
use super::{
utils::{get_glob_matcher, get_localpart_from_id, GlobMatchType},
Action, Condition, EventMatchCondition, FilteredPushRules, KnownCondition,
};
lazy_static! {
/// Used to parse the `is` clause in the room member count condition.
static ref INEQUALITY_EXPR: Regex = Regex::new(r"^([=<>]*)([0-9]+)$").expect("valid regex");
}
/// Allows running a set of push rules against a particular event.
#[pyclass]
pub struct PushRuleEvaluator {
/// A mapping of "flattened" keys to string values in the event, e.g.
/// includes things like "type" and "content.msgtype".
flattened_keys: BTreeMap<String, String>,
/// The "content.body", if any.
body: String,
/// The number of users in the room.
room_member_count: u64,
/// The `notifications` section of the current power levels in the room.
notification_power_levels: BTreeMap<String, i64>,
/// The relations related to the event as a mapping from relation type to
/// set of sender/event type 2-tuples.
relations: BTreeMap<String, BTreeSet<(String, String)>>,
/// Is running "relation" conditions enabled?
relation_match_enabled: bool,
/// The power level of the sender of the event, or None if event is an
/// outlier.
sender_power_level: Option<i64>,
}
#[pymethods]
impl PushRuleEvaluator {
/// Create a new `PushRuleEvaluator`. See struct docstring for details.
#[new]
pub fn py_new(
flattened_keys: BTreeMap<String, String>,
room_member_count: u64,
sender_power_level: Option<i64>,
notification_power_levels: BTreeMap<String, i64>,
relations: BTreeMap<String, BTreeSet<(String, String)>>,
relation_match_enabled: bool,
) -> Result<Self, Error> {
let body = flattened_keys
.get("content.body")
.cloned()
.unwrap_or_default();
Ok(PushRuleEvaluator {
flattened_keys,
body,
room_member_count,
notification_power_levels,
relations,
relation_match_enabled,
sender_power_level,
})
}
/// Run the evaluator with the given push rules, for the given user ID and
/// display name of the user.
///
/// Passing in None will skip evaluating rules matching user ID and display
/// name.
///
/// Returns the set of actions, if any, that match (filtering out any
/// `dont_notify` actions).
pub fn run(
&self,
push_rules: &FilteredPushRules,
user_id: Option<&str>,
display_name: Option<&str>,
) -> Vec<Action> {
'outer: for (push_rule, enabled) in push_rules.iter() {
if !enabled {
continue;
}
for condition in push_rule.conditions.iter() {
match self.match_condition(condition, user_id, display_name) {
Ok(true) => {}
Ok(false) => continue 'outer,
Err(err) => {
warn!("Condition match failed {err}");
continue 'outer;
}
}
}
let actions = push_rule
.actions
.iter()
// Filter out "dont_notify" actions, as we don't store them.
.filter(|a| **a != Action::DontNotify)
.cloned()
.collect();
return actions;
}
Vec::new()
}
/// Check if the given condition matches.
fn matches(
&self,
condition: Condition,
user_id: Option<&str>,
display_name: Option<&str>,
) -> bool {
match self.match_condition(&condition, user_id, display_name) {
Ok(true) => true,
Ok(false) => false,
Err(err) => {
warn!("Condition match failed {err}");
false
}
}
}
}
impl PushRuleEvaluator {
/// Match a given `Condition` for a push rule.
pub fn match_condition(
&self,
condition: &Condition,
user_id: Option<&str>,
display_name: Option<&str>,
) -> Result<bool, Error> {
let known_condition = match condition {
Condition::Known(known) => known,
Condition::Unknown(_) => {
return Ok(false);
}
};
let result = match known_condition {
KnownCondition::EventMatch(event_match) => {
self.match_event_match(event_match, user_id)?
}
KnownCondition::ContainsDisplayName => {
if let Some(dn) = display_name {
if !dn.is_empty() {
get_glob_matcher(dn, GlobMatchType::Word)?.is_match(&self.body)?
} else {
// We specifically ignore empty display names, as otherwise
// they would always match.
false
}
} else {
false
}
}
KnownCondition::RoomMemberCount { is } => {
if let Some(is) = is {
self.match_member_count(is)?
} else {
false
}
}
KnownCondition::SenderNotificationPermission { key } => {
if let Some(sender_power_level) = &self.sender_power_level {
let required_level = self
.notification_power_levels
.get(key.as_ref())
.copied()
.unwrap_or(50);
*sender_power_level >= required_level
} else {
false
}
}
KnownCondition::RelationMatch {
rel_type,
event_type_pattern,
sender,
sender_type,
} => {
self.match_relations(rel_type, sender, sender_type, user_id, event_type_pattern)?
}
};
Ok(result)
}
/// Evaluates a relation condition.
fn match_relations(
&self,
rel_type: &str,
sender: &Option<Cow<str>>,
sender_type: &Option<Cow<str>>,
user_id: Option<&str>,
event_type_pattern: &Option<Cow<str>>,
) -> Result<bool, Error> {
// First check if relation matching is enabled...
if !self.relation_match_enabled {
return Ok(false);
}
// ... and if there are any relations to match against.
let relations = if let Some(relations) = self.relations.get(rel_type) {
relations
} else {
return Ok(false);
};
// Extract the sender pattern from the condition
let sender_pattern = if let Some(sender) = sender {
Some(sender.as_ref())
} else if let Some(sender_type) = sender_type {
if sender_type == "user_id" {
if let Some(user_id) = user_id {
Some(user_id)
} else {
return Ok(false);
}
} else {
warn!("Unrecognized sender_type: {sender_type}");
return Ok(false);
}
} else {
None
};
let mut sender_compiled_pattern = if let Some(pattern) = sender_pattern {
Some(get_glob_matcher(pattern, GlobMatchType::Whole)?)
} else {
None
};
let mut type_compiled_pattern = if let Some(pattern) = event_type_pattern {
Some(get_glob_matcher(pattern, GlobMatchType::Whole)?)
} else {
None
};
for (relation_sender, event_type) in relations {
if let Some(pattern) = &mut sender_compiled_pattern {
if !pattern.is_match(relation_sender)? {
continue;
}
}
if let Some(pattern) = &mut type_compiled_pattern {
if !pattern.is_match(event_type)? {
continue;
}
}
return Ok(true);
}
Ok(false)
}
/// Evaluates a `event_match` condition.
fn match_event_match(
&self,
event_match: &EventMatchCondition,
user_id: Option<&str>,
) -> Result<bool, Error> {
let pattern = if let Some(pattern) = &event_match.pattern {
pattern
} else if let Some(pattern_type) = &event_match.pattern_type {
// The `pattern_type` can either be "user_id" or "user_localpart",
// either way if we don't have a `user_id` then the condition can't
// match.
let user_id = if let Some(user_id) = user_id {
user_id
} else {
return Ok(false);
};
match &**pattern_type {
"user_id" => user_id,
"user_localpart" => get_localpart_from_id(user_id)?,
_ => return Ok(false),
}
} else {
return Ok(false);
};
let haystack = if let Some(haystack) = self.flattened_keys.get(&*event_match.key) {
haystack
} else {
return Ok(false);
};
// For the content.body we match against "words", but for everything
// else we match against the entire value.
let match_type = if event_match.key == "content.body" {
GlobMatchType::Word
} else {
GlobMatchType::Whole
};
let mut compiled_pattern = get_glob_matcher(pattern, match_type)?;
compiled_pattern.is_match(haystack)
}
/// Match the member count against an 'is' condition
/// The `is` condition can be things like '>2', '==3' or even just '4'.
fn match_member_count(&self, is: &str) -> Result<bool, Error> {
let captures = INEQUALITY_EXPR.captures(is).context("bad 'is' clause")?;
let ineq = captures.get(1).map_or("==", |m| m.as_str());
let rhs: u64 = captures
.get(2)
.context("missing number")?
.as_str()
.parse()?;
let matches = match ineq {
"" | "==" => self.room_member_count == rhs,
"<" => self.room_member_count < rhs,
">" => self.room_member_count > rhs,
">=" => self.room_member_count >= rhs,
"<=" => self.room_member_count <= rhs,
_ => false,
};
Ok(matches)
}
}
#[test]
fn push_rule_evaluator() {
let mut flattened_keys = BTreeMap::new();
flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string());
let evaluator = PushRuleEvaluator::py_new(
flattened_keys,
10,
Some(0),
BTreeMap::new(),
BTreeMap::new(),
true,
)
.unwrap();
let result = evaluator.run(&FilteredPushRules::default(), None, Some("bob"));
assert_eq!(result.len(), 3);
}

View file

@ -42,7 +42,6 @@
//!
//! The set of "base rules" are the list of rules that every user has by default. A
//! user can modify their copy of the push rules in one of three ways:
//!
//! 1. Adding a new push rule of a certain kind
//! 2. Changing the actions of a base rule
//! 3. Enabling/disabling a base rule.
@ -58,12 +57,16 @@ use std::collections::{BTreeMap, HashMap, HashSet};
use anyhow::{Context, Error};
use log::warn;
use pyo3::prelude::*;
use pythonize::pythonize;
use pythonize::{depythonize, pythonize};
use serde::de::Error as _;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use self::evaluator::PushRuleEvaluator;
mod base_rules;
pub mod evaluator;
pub mod utils;
/// Called when registering modules with python.
pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
@ -71,6 +74,7 @@ pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
child_module.add_class::<PushRule>()?;
child_module.add_class::<PushRules>()?;
child_module.add_class::<FilteredPushRules>()?;
child_module.add_class::<PushRuleEvaluator>()?;
child_module.add_function(wrap_pyfunction!(get_base_rule_ids, m)?)?;
m.add_submodule(child_module)?;
@ -274,6 +278,8 @@ pub enum KnownCondition {
#[serde(rename = "org.matrix.msc3772.relation_match")]
RelationMatch {
rel_type: Cow<'static, str>,
#[serde(skip_serializing_if = "Option::is_none", rename = "type")]
event_type_pattern: Option<Cow<'static, str>>,
#[serde(skip_serializing_if = "Option::is_none")]
sender: Option<Cow<'static, str>>,
#[serde(skip_serializing_if = "Option::is_none")]
@ -287,20 +293,26 @@ impl IntoPy<PyObject> for Condition {
}
}
impl<'source> FromPyObject<'source> for Condition {
fn extract(ob: &'source PyAny) -> PyResult<Self> {
Ok(depythonize(ob)?)
}
}
/// The body of a [`Condition::EventMatch`]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct EventMatchCondition {
key: Cow<'static, str>,
pub key: Cow<'static, str>,
#[serde(skip_serializing_if = "Option::is_none")]
pattern: Option<Cow<'static, str>>,
pub pattern: Option<Cow<'static, str>>,
#[serde(skip_serializing_if = "Option::is_none")]
pattern_type: Option<Cow<'static, str>>,
pub pattern_type: Option<Cow<'static, str>>,
}
/// The collection of push rules for a user.
#[derive(Debug, Clone, Default)]
#[pyclass(frozen)]
struct PushRules {
pub struct PushRules {
/// Custom push rules that override a base rule.
overridden_base_rules: HashMap<Cow<'static, str>, PushRule>,
@ -319,7 +331,7 @@ struct PushRules {
#[pymethods]
impl PushRules {
#[new]
fn new(rules: Vec<PushRule>) -> PushRules {
pub fn new(rules: Vec<PushRule>) -> PushRules {
let mut push_rules: PushRules = Default::default();
for rule in rules {
@ -396,7 +408,7 @@ pub struct FilteredPushRules {
#[pymethods]
impl FilteredPushRules {
#[new]
fn py_new(
pub fn py_new(
push_rules: PushRules,
enabled_map: BTreeMap<String, bool>,
msc3786_enabled: bool,

215
rust/src/push/utils.rs Normal file
View file

@ -0,0 +1,215 @@
// Copyright 2022 The Matrix.org Foundation C.I.C.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use anyhow::bail;
use anyhow::Context;
use anyhow::Error;
use lazy_static::lazy_static;
use regex;
use regex::Regex;
use regex::RegexBuilder;
lazy_static! {
/// Matches runs of non-wildcard characters followed by wildcard characters.
static ref WILDCARD_RUN: Regex = Regex::new(r"([^\?\*]*)([\?\*]*)").expect("valid regex");
}
/// Extract the localpart from a Matrix style ID
pub(crate) fn get_localpart_from_id(id: &str) -> Result<&str, Error> {
let (localpart, _) = id
.split_once(':')
.with_context(|| format!("ID does not contain colon: {id}"))?;
// We need to strip off the first character, which is the ID type.
if localpart.is_empty() {
bail!("Invalid ID {id}");
}
Ok(&localpart[1..])
}
/// Used by `glob_to_regex` to specify what to match the regex against.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GlobMatchType {
/// The generated regex will match against the entire input.
Whole,
/// The generated regex will match against words.
Word,
}
/// Convert a "glob" style expression to a regex, anchoring either to the entire
/// input or to individual words.
pub fn glob_to_regex(glob: &str, match_type: GlobMatchType) -> Result<Regex, Error> {
let mut chunks = Vec::new();
// Patterns with wildcards must be simplified to avoid performance cliffs
// - The glob `?**?**?` is equivalent to the glob `???*`
// - The glob `???*` is equivalent to the regex `.{3,}`
for captures in WILDCARD_RUN.captures_iter(glob) {
if let Some(chunk) = captures.get(1) {
chunks.push(regex::escape(chunk.as_str()));
}
if let Some(wildcards) = captures.get(2) {
if wildcards.as_str() == "" {
continue;
}
let question_marks = wildcards.as_str().chars().filter(|c| *c == '?').count();
if wildcards.as_str().contains('*') {
chunks.push(format!(".{{{question_marks},}}"));
} else {
chunks.push(format!(".{{{question_marks}}}"));
}
}
}
let joined = chunks.join("");
let regex_str = match match_type {
GlobMatchType::Whole => format!(r"\A{joined}\z"),
// `^|\W` and `\W|$` handle the case where `pattern` starts or ends with a non-word
// character.
GlobMatchType::Word => format!(r"(?:^|\b|\W){joined}(?:\b|\W|$)"),
};
Ok(RegexBuilder::new(&regex_str)
.case_insensitive(true)
.build()?)
}
/// Compiles the glob into a `Matcher`.
pub fn get_glob_matcher(glob: &str, match_type: GlobMatchType) -> Result<Matcher, Error> {
// There are a number of shortcuts we can make if the glob doesn't contain a
// wild card.
let matcher = if glob.contains(['*', '?']) {
let regex = glob_to_regex(glob, match_type)?;
Matcher::Regex(regex)
} else if match_type == GlobMatchType::Whole {
// If there aren't any wildcards and we're matching the whole thing,
// then we simply can do a case-insensitive string match.
Matcher::Whole(glob.to_lowercase())
} else {
// Otherwise, if we're matching against words then can first check
// if the haystack contains the glob at all.
Matcher::Word {
word: glob.to_lowercase(),
regex: None,
}
};
Ok(matcher)
}
/// Matches against a glob
pub enum Matcher {
/// Plain regex matching.
Regex(Regex),
/// Case-insensitive equality.
Whole(String),
/// Word matching. `regex` is a cache of calling [`glob_to_regex`] on word.
Word { word: String, regex: Option<Regex> },
}
impl Matcher {
/// Checks if the glob matches the given haystack.
pub fn is_match(&mut self, haystack: &str) -> Result<bool, Error> {
// We want to to do case-insensitive matching, so we convert to
// lowercase first.
let haystack = haystack.to_lowercase();
match self {
Matcher::Regex(regex) => Ok(regex.is_match(&haystack)),
Matcher::Whole(whole) => Ok(whole == &haystack),
Matcher::Word { word, regex } => {
// If we're looking for a literal word, then we first check if
// the haystack contains the word as a substring.
if !haystack.contains(&*word) {
return Ok(false);
}
// If it does contain the word as a substring, then we need to
// check if it is an actual word by testing it against the regex.
let regex = if let Some(regex) = regex {
regex
} else {
let compiled_regex = glob_to_regex(word, GlobMatchType::Word)?;
regex.insert(compiled_regex)
};
Ok(regex.is_match(&haystack))
}
}
}
}
#[test]
fn test_get_domain_from_id() {
get_localpart_from_id("").unwrap_err();
get_localpart_from_id(":").unwrap_err();
get_localpart_from_id(":asd").unwrap_err();
get_localpart_from_id("::as::asad").unwrap_err();
assert_eq!(get_localpart_from_id("@test:foo").unwrap(), "test");
assert_eq!(get_localpart_from_id("@:").unwrap(), "");
assert_eq!(get_localpart_from_id("@test:foo:907").unwrap(), "test");
}
#[test]
fn tset_glob() -> Result<(), Error> {
assert_eq!(
glob_to_regex("simple", GlobMatchType::Whole)?.as_str(),
r"\Asimple\z"
);
assert_eq!(
glob_to_regex("simple*", GlobMatchType::Whole)?.as_str(),
r"\Asimple.{0,}\z"
);
assert_eq!(
glob_to_regex("simple?", GlobMatchType::Whole)?.as_str(),
r"\Asimple.{1}\z"
);
assert_eq!(
glob_to_regex("simple?*?*", GlobMatchType::Whole)?.as_str(),
r"\Asimple.{2,}\z"
);
assert_eq!(
glob_to_regex("simple???", GlobMatchType::Whole)?.as_str(),
r"\Asimple.{3}\z"
);
assert_eq!(
glob_to_regex("escape.", GlobMatchType::Whole)?.as_str(),
r"\Aescape\.\z"
);
assert!(glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simple"));
assert!(!glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simples"));
assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simples"));
assert!(glob_to_regex("simple?", GlobMatchType::Whole)?.is_match("simples"));
assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simple"));
assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("some simple."));
assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("simple"));
assert!(!glob_to_regex("simple", GlobMatchType::Word)?.is_match("simples"));
assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("Some @user:foo test"));
assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("@user:foo"));
Ok(())
}