Cache glob to regex at a higher level for push

This commit is contained in:
Erik Johnston 2017-03-29 15:53:14 +01:00
parent 3ce8d59176
commit a3810136fe

View File

@ -17,6 +17,7 @@ import logging
import re import re
from synapse.types import UserID from synapse.types import UserID
from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache
from synapse.util.caches.lrucache import LruCache from synapse.util.caches.lrucache import LruCache
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -125,6 +126,11 @@ class PushRuleEvaluatorForEvent(object):
return self._value_cache.get(dotted_key, None) return self._value_cache.get(dotted_key, None)
# Caches (glob, word_boundary) -> regex for push. See _glob_matches
regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR)
register_cache("regex_push_cache", regex_cache)
def _glob_matches(glob, value, word_boundary=False): def _glob_matches(glob, value, word_boundary=False):
"""Tests if value matches glob. """Tests if value matches glob.
@ -137,7 +143,29 @@ def _glob_matches(glob, value, word_boundary=False):
Returns: Returns:
bool bool
""" """
try: try:
r = regex_cache.get((glob, word_boundary), None)
if not r:
r = _glob_to_re(glob, word_boundary)
regex_cache[(glob, word_boundary)] = r
return r.search(value)
except re.error:
logger.warn("Failed to parse glob to regex: %r", glob)
return False
def _glob_to_re(glob, word_boundary):
"""Generates regex for a given glob.
Args:
glob (string)
word_boundary (bool): Whether to match against word boundaries or entire
string. Defaults to False.
Returns:
regex object
"""
if IS_GLOB.search(glob): if IS_GLOB.search(glob):
r = re.escape(glob) r = re.escape(glob)
@ -156,25 +184,20 @@ def _glob_matches(glob, value, word_boundary=False):
) )
if word_boundary: if word_boundary:
r = r"\b%s\b" % (r,) r = r"\b%s\b" % (r,)
r = _compile_regex(r)
return r.search(value) return re.compile(r, flags=re.IGNORECASE)
else: else:
r = r + "$" r = "^" + r + "$"
r = _compile_regex(r)
return r.match(value) return re.compile(r, flags=re.IGNORECASE)
elif word_boundary: elif word_boundary:
r = re.escape(glob) r = re.escape(glob)
r = r"\b%s\b" % (r,) r = r"\b%s\b" % (r,)
r = _compile_regex(r)
return r.search(value) return re.compile(r, flags=re.IGNORECASE)
else: else:
return value.lower() == glob.lower() r = "^" + re.escape(glob) + "$"
except re.error: return re.compile(r, flags=re.IGNORECASE)
logger.warn("Failed to parse glob to regex: %r", glob)
return False
def _flatten_dict(d, prefix=[], result={}): def _flatten_dict(d, prefix=[], result={}):
@ -185,16 +208,3 @@ def _flatten_dict(d, prefix=[], result={}):
_flatten_dict(value, prefix=(prefix + [key]), result=result) _flatten_dict(value, prefix=(prefix + [key]), result=result)
return result return result
regex_cache = LruCache(5000)
def _compile_regex(regex_str):
r = regex_cache.get(regex_str, None)
if r:
return r
r = re.compile(regex_str, flags=re.IGNORECASE)
regex_cache[regex_str] = r
return r