Fix non-strings in the event_search table (#12037)

Don't attempt to add non-string `value`s to `event_search` and add a
background update to clear out bad rows from `event_search` when
using sqlite.

Signed-off-by: Sean Quah <seanq@element.io>
This commit is contained in:
Sean Quah 2022-02-24 11:52:28 +00:00 committed by GitHub
parent c56bfb08bc
commit 41cf4c2cf6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 173 additions and 11 deletions

View file

@ -13,13 +13,16 @@
# limitations under the License.
import synapse.rest.admin
from synapse.api.constants import EventTypes
from synapse.api.errors import StoreError
from synapse.rest.client import login, room
from synapse.storage.engines import PostgresEngine
from tests.unittest import HomeserverTestCase
from tests.unittest import HomeserverTestCase, skip_unless
from tests.utils import USE_POSTGRES_FOR_TESTS
class NullByteInsertionTest(HomeserverTestCase):
class EventSearchInsertionTest(HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
login.register_servlets,
@ -72,3 +75,113 @@ class NullByteInsertionTest(HomeserverTestCase):
)
if isinstance(store.database_engine, PostgresEngine):
self.assertIn("alice", result.get("highlights"))
def test_non_string(self):
"""Test that non-string `value`s are not inserted into `event_search`.
This is particularly important when using sqlite, since a sqlite column can hold
both strings and integers. When using Postgres, integers are automatically
converted to strings.
Regression test for #11918.
"""
store = self.hs.get_datastores().main
# Register a user and create a room
user_id = self.register_user("alice", "password")
access_token = self.login("alice", "password")
room_id = self.helper.create_room_as("alice", tok=access_token)
room_version = self.get_success(store.get_room_version(room_id))
# Construct a message with a numeric body to be received over federation
# The message can't be sent using the client API, since Synapse's event
# validation will reject it.
prev_event_ids = self.get_success(store.get_prev_events_for_room(room_id))
prev_event = self.get_success(store.get_event(prev_event_ids[0]))
prev_state_map = self.get_success(
self.hs.get_storage().state.get_state_ids_for_event(prev_event_ids[0])
)
event_dict = {
"type": EventTypes.Message,
"content": {"msgtype": "m.text", "body": 2},
"room_id": room_id,
"sender": user_id,
"depth": prev_event.depth + 1,
"prev_events": prev_event_ids,
"origin_server_ts": self.clock.time_msec(),
}
builder = self.hs.get_event_builder_factory().for_room_version(
room_version, event_dict
)
event = self.get_success(
builder.build(
prev_event_ids=prev_event_ids,
auth_event_ids=self.hs.get_event_auth_handler().compute_auth_events(
builder,
prev_state_map,
for_verification=False,
),
depth=event_dict["depth"],
)
)
# Receive the event
self.get_success(
self.hs.get_federation_event_handler().on_receive_pdu(
self.hs.hostname, event
)
)
# The event should not have an entry in the `event_search` table
f = self.get_failure(
store.db_pool.simple_select_one_onecol(
"event_search",
{"room_id": room_id, "event_id": event.event_id},
"event_id",
),
StoreError,
)
self.assertEqual(f.value.code, 404)
@skip_unless(not USE_POSTGRES_FOR_TESTS, "requires sqlite")
def test_sqlite_non_string_deletion_background_update(self):
"""Test the background update to delete bad rows from `event_search`."""
store = self.hs.get_datastores().main
# Populate `event_search` with dummy data
self.get_success(
store.db_pool.simple_insert_many(
"event_search",
keys=["event_id", "room_id", "key", "value"],
values=[
("event1", "room_id", "content.body", "hi"),
("event2", "room_id", "content.body", "2"),
("event3", "room_id", "content.body", 3),
],
desc="populate_event_search",
)
)
# Run the background update
store.db_pool.updates._all_done = False
self.get_success(
store.db_pool.simple_insert(
"background_updates",
{
"update_name": "event_search_sqlite_delete_non_strings",
"progress_json": "{}",
},
)
)
self.wait_for_background_updates()
# The non-string `value`s ought to be gone now.
values = self.get_success(
store.db_pool.simple_select_onecol(
"event_search",
{"room_id": "room_id"},
"value",
),
)
self.assertCountEqual(values, ["hi", "2"])