Merge pull request #2500 from matrix-org/dbkr/fix_word_boundary_mentions

Fix notif kws that start/end with non-word chars
This commit is contained in:
David Baker 2017-10-05 12:27:59 +01:00 committed by GitHub
commit 44f8e383f3

View File

@ -183,7 +183,7 @@ def _glob_to_re(glob, word_boundary):
r, r,
) )
if word_boundary: if word_boundary:
r = r"\b%s\b" % (r,) r = _re_word_boundary(r)
return re.compile(r, flags=re.IGNORECASE) return re.compile(r, flags=re.IGNORECASE)
else: else:
@ -192,7 +192,7 @@ def _glob_to_re(glob, word_boundary):
return re.compile(r, flags=re.IGNORECASE) return re.compile(r, flags=re.IGNORECASE)
elif word_boundary: elif word_boundary:
r = re.escape(glob) r = re.escape(glob)
r = r"\b%s\b" % (r,) r = _re_word_boundary(r)
return re.compile(r, flags=re.IGNORECASE) return re.compile(r, flags=re.IGNORECASE)
else: else:
@ -200,6 +200,18 @@ def _glob_to_re(glob, word_boundary):
return re.compile(r, flags=re.IGNORECASE) return re.compile(r, flags=re.IGNORECASE)
def _re_word_boundary(r):
"""
Adds word boundary characters to the start and end of an
expression to require that the match occur as a whole word,
but do so respecting the fact that strings starting or ending
with non-word characters will change word boundaries.
"""
# we can't use \b as it chokes on unicode. however \W seems to be okay
# as shorthand for [^0-9A-Za-z_].
return r"(^|\W)%s(\W|$)" % (r,)
def _flatten_dict(d, prefix=[], result=None): def _flatten_dict(d, prefix=[], result=None):
if result is None: if result is None:
result = {} result = {}