synapse-product/synapse/push/push_rule_evaluator.py

# -*- coding: utf-8 -*-
# Copyright 2015, 2016 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import re

from synapse.types import UserID
from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache
from synapse.util.caches.lrucache import LruCache

logger = logging.getLogger(__name__)


GLOB_REGEX = re.compile(r'\\\[(\\\!|)(.*)\\\]')
IS_GLOB = re.compile(r'[\?\*\[\]]')
INEQUALITY_EXPR = re.compile("^([=<>]*)([0-9]*)$")
STARTS_WITH_WORD_CHAR_REGEX = re.compile(r"^\w")
ENDS_WITH_WORD_CHAR_REGEX = re.compile(r"\w$")


def _room_member_count(ev, condition, room_member_count):
    if 'is' not in condition:
        return False
    m = INEQUALITY_EXPR.match(condition['is'])
    if not m:
        return False
    ineq = m.group(1)
    rhs = m.group(2)
    if not rhs.isdigit():
        return False
    rhs = int(rhs)

    if ineq == '' or ineq == '==':
        return room_member_count == rhs
    elif ineq == '<':
        return room_member_count < rhs
    elif ineq == '>':
        return room_member_count > rhs
    elif ineq == '>=':
        return room_member_count >= rhs
    elif ineq == '<=':
        return room_member_count <= rhs
    else:
        return False


def tweaks_for_actions(actions):
    tweaks = {}
    for a in actions:
        if not isinstance(a, dict):
            continue
        if 'set_tweak' in a and 'value' in a:
            tweaks[a['set_tweak']] = a['value']
    return tweaks


class PushRuleEvaluatorForEvent(object):
    def __init__(self, event, room_member_count):
        self._event = event
        self._room_member_count = room_member_count

        # Maps strings of e.g. 'content.body' -> event["content"]["body"]
        self._value_cache = _flatten_dict(event)

    def matches(self, condition, user_id, display_name):
        if condition['kind'] == 'event_match':
            return self._event_match(condition, user_id)
        elif condition['kind'] == 'contains_display_name':
            return self._contains_display_name(display_name)
        elif condition['kind'] == 'room_member_count':
            return _room_member_count(
                self._event, condition, self._room_member_count
            )
        else:
            return True

    def _event_match(self, condition, user_id):
        pattern = condition.get('pattern', None)

        if not pattern:
            pattern_type = condition.get('pattern_type', None)
            if pattern_type == "user_id":
                pattern = user_id
            elif pattern_type == "user_localpart":
                pattern = UserID.from_string(user_id).localpart

        if not pattern:
            logger.warn("event_match condition with no pattern")
            return False

        # XXX: optimisation: cache our pattern regexps
        if condition['key'] == 'content.body':
            body = self._event["content"].get("body", None)
            if not body:
                return False

            return _glob_matches(pattern, body, word_boundary=True)
        else:
            haystack = self._get_value(condition['key'])
            if haystack is None:
                return False

            return _glob_matches(pattern, haystack)

    def _contains_display_name(self, display_name):
        if not display_name:
            return False

        body = self._event["content"].get("body", None)
        if not body:
            return False

        return _glob_matches(display_name, body, word_boundary=True)

    def _get_value(self, dotted_key):
        return self._value_cache.get(dotted_key, None)


# Caches (glob, word_boundary) -> regex for push. See _glob_matches
regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR)
register_cache("regex_push_cache", regex_cache)


def _glob_matches(glob, value, word_boundary=False):
    """Tests if value matches glob.

    Args:
        glob (string)
        value (string): String to test against glob.
        word_boundary (bool): Whether to match against word boundaries or entire
            string. Defaults to False.

    Returns:
        bool
    """

    try:
        r = regex_cache.get((glob, word_boundary), None)
        if not r:
            r = _glob_to_re(glob, word_boundary)
            regex_cache[(glob, word_boundary)] = r
        return r.search(value)
    except re.error:
        logger.warn("Failed to parse glob to regex: %r", glob)
        return False


def _glob_to_re(glob, word_boundary):
    """Generates regex for a given glob.

    Args:
        glob (string)
        word_boundary (bool): Whether to match against word boundaries or entire
            string. Defaults to False.

    Returns:
        regex object
    """
    if IS_GLOB.search(glob):
        r = re.escape(glob)

        r = r.replace(r'\*', '.*?')
        r = r.replace(r'\?', '.')

        # handle [abc], [a-z] and [!a-z] style ranges.
        r = GLOB_REGEX.sub(
            lambda x: (
                '[%s%s]' % (
                    x.group(1) and '^' or '',
                    x.group(2).replace(r'\\\-', '-')
                )
            ),
            r,
        )
        if word_boundary:
            r = _re_word_boundary(r)

            return re.compile(r, flags=re.IGNORECASE)
        else:
            r = "^" + r + "$"

            return re.compile(r, flags=re.IGNORECASE)
    elif word_boundary:
        r = re.escape(glob)
        r = _re_word_boundary(r)

        return re.compile(r, flags=re.IGNORECASE)
    else:
        r = "^" + re.escape(glob) + "$"
        return re.compile(r, flags=re.IGNORECASE)

def _re_word_boundary(r):
    """
    Adds word boundary characters to the start and end of an
    expression to require that the match occur as a whole word,
    but do so respecting the fact that strings starting or ending
    with non-word characters will change word boundaries.
    """
    # Matching a regex string aginst a regex, since by definition
    # \b is the boundary between a \w and a \W, so match \w at the
    # start or end of the expression (although this will miss, eg.
    # "[dl]og")
    if STARTS_WITH_WORD_CHAR_REGEX.search(r):
        r = r"\b%s" % (r,)
    if ENDS_WITH_WORD_CHAR_REGEX.search(r):
        r = r"%s\b" % (r,)
    return r


def _flatten_dict(d, prefix=[], result=None):
    if result is None:
        result = {}
    for key, value in d.items():
        if isinstance(value, basestring):
            result[".".join(prefix + [key])] = value.lower()
        elif hasattr(value, "items"):
            _flatten_dict(value, prefix=(prefix + [key]), result=result)

    return result
Split out the push rule evaluator into a separate file so it can be more readily reused. Should be functionally identical. 2015-12-09 10:51:34 -05:00			`# -- coding: utf-8 --`
copyrights 2016-01-06 23:26:29 -05:00			`# Copyright 2015, 2016 OpenMarket Ltd`
Split out the push rule evaluator into a separate file so it can be more readily reused. Should be functionally identical. 2015-12-09 10:51:34 -05:00			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

			`import logging`
			`import re`

Make notifications go quicker 2016-01-18 09:09:47 -05:00			`from synapse.types import UserID`
Cache glob to regex at a higher level for push 2017-03-29 10:53:14 -04:00			`from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache`
Add regex cache. Only caculate push actions for users that have sent read receipts, and are on that server 2016-01-19 11:01:05 -05:00			`from synapse.util.caches.lrucache import LruCache`
Make notifications go quicker 2016-01-18 09:09:47 -05:00
Split out the push rule evaluator into a separate file so it can be more readily reused. Should be functionally identical. 2015-12-09 10:51:34 -05:00			`logger = logging.getLogger(__name__)`


Make notifications go quicker 2016-01-18 09:09:47 -05:00			`GLOB_REGEX = re.compile(r'\\\[(\\\!\|)(.*)\\\]')`
			`IS_GLOB = re.compile(r'[\?\*\[\]]')`
			`INEQUALITY_EXPR = re.compile("^([=<>])([0-9])$")`
Fix notif kws that start/end with non-word chars Only prepend / append word bounary characters if the search expression starts or ends with a word character, otherwise they don't work because there's no word bounary between whitespace and a non-word char. 2017-10-05 06:33:30 -04:00			`STARTS_WITH_WORD_CHAR_REGEX = re.compile(r"^\w")`
			`ENDS_WITH_WORD_CHAR_REGEX = re.compile(r"\w$")`
Make notifications go quicker 2016-01-18 09:09:47 -05:00

			`def _room_member_count(ev, condition, room_member_count):`
			`if 'is' not in condition:`
			`return False`
			`m = INEQUALITY_EXPR.match(condition['is'])`
			`if not m:`
			`return False`
			`ineq = m.group(1)`
			`rhs = m.group(2)`
			`if not rhs.isdigit():`
			`return False`
			`rhs = int(rhs)`

			`if ineq == '' or ineq == '==':`
			`return room_member_count == rhs`
			`elif ineq == '<':`
			`return room_member_count < rhs`
			`elif ineq == '>':`
			`return room_member_count > rhs`
			`elif ineq == '>=':`
			`return room_member_count >= rhs`
			`elif ineq == '<=':`
			`return room_member_count <= rhs`
			`else:`
			`return False`


Remove code that's now been obsoleted or moved elsewhere 2016-04-07 11:31:38 -04:00			`def tweaks_for_actions(actions):`
			`tweaks = {}`
			`for a in actions:`
			`if not isinstance(a, dict):`
			`continue`
			`if 'set_tweak' in a and 'value' in a:`
			`tweaks[a['set_tweak']] = a['value']`
			`return tweaks`
Split out the push rule evaluator into a separate file so it can be more readily reused. Should be functionally identical. 2015-12-09 10:51:34 -05:00

Make notifications go quicker 2016-01-18 09:09:47 -05:00			`class PushRuleEvaluatorForEvent(object):`
Don't split at word boundaries, actually use regex 2016-01-18 11:48:17 -05:00			`def __init__(self, event, room_member_count):`
Make notifications go quicker 2016-01-18 09:09:47 -05:00			`self._event = event`
			`self._room_member_count = room_member_count`

Add comments and remove dead code 2016-01-18 10:42:23 -05:00			`# Maps strings of e.g. 'content.body' -> event["content"]["body"]`
Make notifications go quicker 2016-01-18 09:09:47 -05:00			`self._value_cache = _flatten_dict(event)`
Split out the push rule evaluator into a separate file so it can be more readily reused. Should be functionally identical. 2015-12-09 10:51:34 -05:00
Remove dead code for setting device specific rules. It wasn't possible to hit the code from the API because of a typo in parsing the request path. Since no-one was using the feature we might as well remove the dead code. 2016-02-18 11:05:13 -05:00			`def matches(self, condition, user_id, display_name):`
Make notifications go quicker 2016-01-18 09:09:47 -05:00			`if condition['kind'] == 'event_match':`
Use static for const dicts 2016-01-18 05:09:14 -05:00			`return self._event_match(condition, user_id)`
Split out the push rule evaluator into a separate file so it can be more readily reused. Should be functionally identical. 2015-12-09 10:51:34 -05:00			`elif condition['kind'] == 'contains_display_name':`
Make notifications go quicker 2016-01-18 09:09:47 -05:00			`return self._contains_display_name(display_name)`
Split out the push rule evaluator into a separate file so it can be more readily reused. Should be functionally identical. 2015-12-09 10:51:34 -05:00			`elif condition['kind'] == 'room_member_count':`
Make notifications go quicker 2016-01-18 09:09:47 -05:00			`return _room_member_count(`
			`self._event, condition, self._room_member_count`
			`)`
Split out the push rule evaluator into a separate file so it can be more readily reused. Should be functionally identical. 2015-12-09 10:51:34 -05:00			`else:`
			`return True`

Use static for const dicts 2016-01-18 05:09:14 -05:00			`def _event_match(self, condition, user_id):`
Make notifications go quicker 2016-01-18 09:09:47 -05:00			`pattern = condition.get('pattern', None)`

Use static for const dicts 2016-01-18 05:09:14 -05:00			`if not pattern:`
			`pattern_type = condition.get('pattern_type', None)`
			`if pattern_type == "user_id":`
			`pattern = user_id`
			`elif pattern_type == "user_localpart":`
			`pattern = UserID.from_string(user_id).localpart`

Make notifications go quicker 2016-01-18 09:09:47 -05:00			`if not pattern:`
			`logger.warn("event_match condition with no pattern")`
			`return False`

			`# XXX: optimisation: cache our pattern regexps`
			`if condition['key'] == 'content.body':`
Don't split at word boundaries, actually use regex 2016-01-18 11:48:17 -05:00			`body = self._event["content"].get("body", None)`
			`if not body:`
			`return False`
Make notifications go quicker 2016-01-18 09:09:47 -05:00
Don't split at word boundaries, actually use regex 2016-01-18 11:48:17 -05:00			`return _glob_matches(pattern, body, word_boundary=True)`
Make notifications go quicker 2016-01-18 09:09:47 -05:00			`else:`
			`haystack = self._get_value(condition['key'])`
			`if haystack is None:`
			`return False`

Don't split at word boundaries, actually use regex 2016-01-18 11:48:17 -05:00			`return _glob_matches(pattern, haystack)`
Make notifications go quicker 2016-01-18 09:09:47 -05:00
			`def _contains_display_name(self, display_name):`
			`if not display_name:`
			`return False`

Don't split at word boundaries, actually use regex 2016-01-18 11:48:17 -05:00			`body = self._event["content"].get("body", None)`
			`if not body:`
			`return False`
Make notifications go quicker 2016-01-18 09:09:47 -05:00
Don't split at word boundaries, actually use regex 2016-01-18 11:48:17 -05:00			`return _glob_matches(display_name, body, word_boundary=True)`
Make notifications go quicker 2016-01-18 09:09:47 -05:00
			`def _get_value(self, dotted_key):`
			`return self._value_cache.get(dotted_key, None)`

Split out the push rule evaluator into a separate file so it can be more readily reused. Should be functionally identical. 2015-12-09 10:51:34 -05:00
Cache glob to regex at a higher level for push 2017-03-29 10:53:14 -04:00			`# Caches (glob, word_boundary) -> regex for push. See _glob_matches`
			`regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR)`
			`register_cache("regex_push_cache", regex_cache)`


Don't split at word boundaries, actually use regex 2016-01-18 11:48:17 -05:00			`def _glob_matches(glob, value, word_boundary=False):`
			`"""Tests if value matches glob.`
Make notifications go quicker 2016-01-18 09:09:47 -05:00
Don't split at word boundaries, actually use regex 2016-01-18 11:48:17 -05:00			`Args:`
			`glob (string)`
			`value (string): String to test against glob.`
			`word_boundary (bool): Whether to match against word boundaries or entire`
			`string. Defaults to False.`
Make notifications go quicker 2016-01-18 09:09:47 -05:00
Don't split at word boundaries, actually use regex 2016-01-18 11:48:17 -05:00			`Returns:`
			`bool`
			`"""`
Fix branch didn't check word_boundary 2016-01-18 12:04:36 -05:00
Cache glob to regex at a higher level for push 2017-03-29 10:53:14 -04:00			`try:`
			`r = regex_cache.get((glob, word_boundary), None)`
			`if not r:`
			`r = _glob_to_re(glob, word_boundary)`
			`regex_cache[(glob, word_boundary)] = r`
			`return r.search(value)`
Handle glob -> regex errors 2016-01-19 09:43:24 -05:00			`except re.error:`
			`logger.warn("Failed to parse glob to regex: %r", glob)`
			`return False`
Make notifications go quicker 2016-01-18 09:09:47 -05:00

Cache glob to regex at a higher level for push 2017-03-29 10:53:14 -04:00			`def _glob_to_re(glob, word_boundary):`
			`"""Generates regex for a given glob.`

			`Args:`
			`glob (string)`
			`word_boundary (bool): Whether to match against word boundaries or entire`
			`string. Defaults to False.`

			`Returns:`
			`regex object`
			`"""`
			`if IS_GLOB.search(glob):`
			`r = re.escape(glob)`

			`r = r.replace(r'\', '.?')`
			`r = r.replace(r'\?', '.')`

			`# handle [abc], [a-z] and [!a-z] style ranges.`
			`r = GLOB_REGEX.sub(`
			`lambda x: (`
			`'[%s%s]' % (`
			`x.group(1) and '^' or '',`
			`x.group(2).replace(r'\\\-', '-')`
			`)`
			`),`
			`r,`
			`)`
			`if word_boundary:`
Fix notif kws that start/end with non-word chars Only prepend / append word bounary characters if the search expression starts or ends with a word character, otherwise they don't work because there's no word bounary between whitespace and a non-word char. 2017-10-05 06:33:30 -04:00			`r = _re_word_boundary(r)`
Cache glob to regex at a higher level for push 2017-03-29 10:53:14 -04:00
			`return re.compile(r, flags=re.IGNORECASE)`
			`else:`
			`r = "^" + r + "$"`

			`return re.compile(r, flags=re.IGNORECASE)`
			`elif word_boundary:`
			`r = re.escape(glob)`
Fix notif kws that start/end with non-word chars Only prepend / append word bounary characters if the search expression starts or ends with a word character, otherwise they don't work because there's no word bounary between whitespace and a non-word char. 2017-10-05 06:33:30 -04:00			`r = _re_word_boundary(r)`
Cache glob to regex at a higher level for push 2017-03-29 10:53:14 -04:00
			`return re.compile(r, flags=re.IGNORECASE)`
			`else:`
			`r = "^" + re.escape(glob) + "$"`
			`return re.compile(r, flags=re.IGNORECASE)`

Fix notif kws that start/end with non-word chars Only prepend / append word bounary characters if the search expression starts or ends with a word character, otherwise they don't work because there's no word bounary between whitespace and a non-word char. 2017-10-05 06:33:30 -04:00			`def _re_word_boundary(r):`
			`"""`
			`Adds word boundary characters to the start and end of an`
			`expression to require that the match occur as a whole word,`
			`but do so respecting the fact that strings starting or ending`
			`with non-word characters will change word boundaries.`
			`"""`
			`# Matching a regex string aginst a regex, since by definition`
			`# \b is the boundary between a \w and a \W, so match \w at the`
			`# start or end of the expression (although this will miss, eg.`
			`# "[dl]og")`
			`if STARTS_WITH_WORD_CHAR_REGEX.search(r):`
			`r = r"\b%s" % (r,)`
			`if ENDS_WITH_WORD_CHAR_REGEX.search(r):`
			`r = r"%s\b" % (r,)`
			`return r`

Cache glob to regex at a higher level for push 2017-03-29 10:53:14 -04:00
Fix caching error in the push evaluator Initialising `result` to `{}` in the parameters meant that every call to _flatten_dict used the same target dictionary. I'm hopeful this will fix https://github.com/matrix-org/synapse/issues/2270, but I suspect it won't. (This code seems to have been here since forever, unlike the bug, and I don't really think it explains the observed behaviour). Still, it makes it hard to investigate the problem. 2017-07-04 19:28:43 -04:00			`def _flatten_dict(d, prefix=[], result=None):`
			`if result is None:`
			`result = {}`
Make notifications go quicker 2016-01-18 09:09:47 -05:00			`for key, value in d.items():`
			`if isinstance(value, basestring):`
			`result[".".join(prefix + [key])] = value.lower()`
			`elif hasattr(value, "items"):`
Fix flake8 warnings for new flake8 2016-02-02 12:18:50 -05:00			`_flatten_dict(value, prefix=(prefix + [key]), result=result)`
Make notifications go quicker 2016-01-18 09:09:47 -05:00
			`return result`