Treat "\u0000" as "\u0020" for the purposes of message search (message indexing) (#10820)

* add test to check if null code points are being inserted

* add logic to detect and replace null code points before insertion into db

* lints

* add license to test

* change approach to null substitution

* add type hint for SearchEntry

* Add changelog entry

Signed-off-by: H.Shay <shaysquared@gmail.com>

* updated changelog

* update chanelog message

* remove duplicate changelog

* Update synapse/storage/databases/main/events.py remove extra space

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* rename and move test file, update tests, delete old test file

* fix typo in comments

* update _find_highlights_in_postgres to replace null byte with space

* replace null byte in sqlite search insertion

* beef up and reorganize test for this pr

* update changelog

* add type hints and update docstring

* check db engine directly vs using env variable

* refactor tests to be less repetetive

* move rplace logic into seperate function

* requested changes

* Fix typo.

* Update synapse/storage/databases/main/search.py

Co-authored-by: reivilibre <olivier@librepush.net>

* Update changelog.d/10820.misc

Co-authored-by: Aaron Raimist <aaron@raim.ist>

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
Co-authored-by: reivilibre <olivier@librepush.net>
Co-authored-by: Aaron Raimist <aaron@raim.ist>
This commit is contained in:
Hillery Shay 2021-09-22 08:25:26 -07:00 committed by GitHub
parent 03db6701d5
commit f78b68a96b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 100 additions and 9 deletions

View file

@ -15,12 +15,12 @@
import logging
import re
from collections import namedtuple
from typing import Collection, List, Optional, Set
from typing import Collection, Iterable, List, Optional, Set
from synapse.api.errors import SynapseError
from synapse.events import EventBase
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
from synapse.storage.database import DatabasePool
from synapse.storage.database import DatabasePool, LoggingTransaction
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
@ -32,14 +32,24 @@ SearchEntry = namedtuple(
)
def _clean_value_for_search(value: str) -> str:
"""
Replaces any null code points in the string with spaces as
Postgres and SQLite do not like the insertion of strings with
null code points into the full-text search tables.
"""
return value.replace("\u0000", " ")
class SearchWorkerStore(SQLBaseStore):
def store_search_entries_txn(self, txn, entries):
def store_search_entries_txn(
self, txn: LoggingTransaction, entries: Iterable[SearchEntry]
) -> None:
"""Add entries to the search table
Args:
txn (cursor):
entries (iterable[SearchEntry]):
entries to be added to the table
txn:
entries: entries to be added to the table
"""
if not self.hs.config.enable_search:
return
@ -55,7 +65,7 @@ class SearchWorkerStore(SQLBaseStore):
entry.event_id,
entry.room_id,
entry.key,
entry.value,
_clean_value_for_search(entry.value),
entry.stream_ordering,
entry.origin_server_ts,
)
@ -70,11 +80,16 @@ class SearchWorkerStore(SQLBaseStore):
" VALUES (?,?,?,?)"
)
args = (
(entry.event_id, entry.room_id, entry.key, entry.value)
(
entry.event_id,
entry.room_id,
entry.key,
_clean_value_for_search(entry.value),
)
for entry in entries
)
txn.execute_batch(sql, args)
else:
# This should be unreachable.
raise Exception("Unrecognized database engine")
@ -646,6 +661,7 @@ class SearchStore(SearchBackgroundUpdateStore):
for key in ("body", "name", "topic"):
v = event.content.get(key, None)
if v:
v = _clean_value_for_search(v)
values.append(v)
if not values: