Fix non-strings in the event_search table (#12037)

Don't attempt to add non-string `value`s to `event_search` and add a
background update to clear out bad rows from `event_search` when
using sqlite.

Signed-off-by: Sean Quah <seanq@element.io>
This commit is contained in:
Sean Quah 2022-02-24 11:52:28 +00:00 committed by GitHub
parent c56bfb08bc
commit 41cf4c2cf6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 173 additions and 11 deletions

View file

@ -1473,10 +1473,10 @@ class PersistEventsStore:
def _update_metadata_tables_txn(
self,
txn,
txn: LoggingTransaction,
*,
events_and_contexts,
all_events_and_contexts,
events_and_contexts: List[Tuple[EventBase, EventContext]],
all_events_and_contexts: List[Tuple[EventBase, EventContext]],
inhibit_local_membership_updates: bool = False,
):
"""Update all the miscellaneous tables for new events
@ -1953,20 +1953,20 @@ class PersistEventsStore:
txn, table="event_relations", keyvalues={"event_id": redacted_event_id}
)
def _store_room_topic_txn(self, txn, event):
if hasattr(event, "content") and "topic" in event.content:
def _store_room_topic_txn(self, txn: LoggingTransaction, event: EventBase):
if isinstance(event.content.get("topic"), str):
self.store_event_search_txn(
txn, event, "content.topic", event.content["topic"]
)
def _store_room_name_txn(self, txn, event):
if hasattr(event, "content") and "name" in event.content:
def _store_room_name_txn(self, txn: LoggingTransaction, event: EventBase):
if isinstance(event.content.get("name"), str):
self.store_event_search_txn(
txn, event, "content.name", event.content["name"]
)
def _store_room_message_txn(self, txn, event):
if hasattr(event, "content") and "body" in event.content:
def _store_room_message_txn(self, txn: LoggingTransaction, event: EventBase):
if isinstance(event.content.get("body"), str):
self.store_event_search_txn(
txn, event, "content.body", event.content["body"]
)

View file

@ -115,6 +115,7 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist"
EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin"
EVENT_SEARCH_DELETE_NON_STRINGS = "event_search_sqlite_delete_non_strings"
def __init__(
self,
@ -147,6 +148,10 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME, self._background_reindex_gin_search
)
self.db_pool.updates.register_background_update_handler(
self.EVENT_SEARCH_DELETE_NON_STRINGS, self._background_delete_non_strings
)
async def _background_reindex_search(self, progress, batch_size):
# we work through the events table from highest stream id to lowest
target_min_stream_id = progress["target_min_stream_id_inclusive"]
@ -372,6 +377,27 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
return num_rows
async def _background_delete_non_strings(
self, progress: JsonDict, batch_size: int
) -> int:
"""Deletes rows with non-string `value`s from `event_search` if using sqlite.
Prior to Synapse 1.44.0, malformed events received over federation could cause integers
to be inserted into the `event_search` table when using sqlite.
"""
def delete_non_strings_txn(txn: LoggingTransaction) -> None:
txn.execute("DELETE FROM event_search WHERE typeof(value) != 'text'")
await self.db_pool.runInteraction(
self.EVENT_SEARCH_DELETE_NON_STRINGS, delete_non_strings_txn
)
await self.db_pool.updates._end_background_update(
self.EVENT_SEARCH_DELETE_NON_STRINGS
)
return 1
class SearchStore(SearchBackgroundUpdateStore):
def __init__(