mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-07-24 06:10:50 -04:00
Fix a bug introduced in Synapse v1.74.0 where searching with colons when using ICU for search term tokenisation would fail with an error. (#15079)
Co-authored-by: David Robertson <davidr@element.io>
This commit is contained in:
parent
7ee7f49316
commit
1cbc3f197c
4 changed files with 90 additions and 5 deletions
|
@ -918,11 +918,19 @@ def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]:
|
|||
We use this so that we can add prefix matching, which isn't something
|
||||
that is supported by default.
|
||||
"""
|
||||
results = _parse_words(search_term)
|
||||
escaped_words = []
|
||||
for word in _parse_words(search_term):
|
||||
# Postgres tsvector and tsquery quoting rules:
|
||||
# words potentially containing punctuation should be quoted
|
||||
# and then existing quotes and backslashes should be doubled
|
||||
# See: https://www.postgresql.org/docs/current/datatype-textsearch.html#DATATYPE-TSQUERY
|
||||
|
||||
both = " & ".join("(%s:* | %s)" % (result, result) for result in results)
|
||||
exact = " & ".join("%s" % (result,) for result in results)
|
||||
prefix = " & ".join("%s:*" % (result,) for result in results)
|
||||
quoted_word = word.replace("'", "''").replace("\\", "\\\\")
|
||||
escaped_words.append(f"'{quoted_word}'")
|
||||
|
||||
both = " & ".join("(%s:* | %s)" % (word, word) for word in escaped_words)
|
||||
exact = " & ".join("%s" % (word,) for word in escaped_words)
|
||||
prefix = " & ".join("%s:*" % (word,) for word in escaped_words)
|
||||
|
||||
return both, exact, prefix
|
||||
|
||||
|
@ -944,6 +952,14 @@ def _parse_words(search_term: str) -> List[str]:
|
|||
if USE_ICU:
|
||||
return _parse_words_with_icu(search_term)
|
||||
|
||||
return _parse_words_with_regex(search_term)
|
||||
|
||||
|
||||
def _parse_words_with_regex(search_term: str) -> List[str]:
|
||||
"""
|
||||
Break down search term into words, when we don't have ICU available.
|
||||
See: `_parse_words`
|
||||
"""
|
||||
return re.findall(r"([\w\-]+)", search_term, re.UNICODE)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue