Add a Postgres REPLICA IDENTITY to tables that do not have an implicit one. This should allow use of Postgres logical replication. (#16456)

* Add Postgres replica identities to tables that don't have an implicit one

Fixes #16224

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

* Move the delta to version 83 as we missed the boat for 82

* Add a test that all tables have a REPLICA IDENTITY

* Extend the test to include when indices are deleted

* isort

* black

* Fully qualify `oid` as it is a 'hidden attribute' in Postgres 11

* Update tests/storage/test_database.py

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Add missed tables

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
This commit is contained in:
reivilibre 2023-11-13 16:03:22 +00:00 committed by GitHub
parent fb2554b11f
commit 69afe3f7a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 202 additions and 1 deletions

1
changelog.d/16456.misc Normal file
View File

@ -0,0 +1 @@
Add a Postgres `REPLICA IDENTITY` to tables that do not have an implicit one. This should allow use of Postgres logical replication.

View File

@ -0,0 +1,88 @@
/* Copyright 2023 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-- Annotate some tables in Postgres with a REPLICA IDENTITY.
-- Any table that doesn't have a primary key should be annotated explicitly with
-- a REPLICA IDENTITY so that logical replication can be used.
-- If this is not done, then UPDATE and DELETE statements on those tables
-- will fail if logical replication is in use.
-- Where possible, re-use unique indices already defined on tables as a replica
-- identity.
ALTER TABLE appservice_room_list REPLICA IDENTITY USING INDEX appservice_room_list_idx;
ALTER TABLE batch_events REPLICA IDENTITY USING INDEX chunk_events_event_id;
ALTER TABLE blocked_rooms REPLICA IDENTITY USING INDEX blocked_rooms_idx;
ALTER TABLE cache_invalidation_stream_by_instance REPLICA IDENTITY USING INDEX cache_invalidation_stream_by_instance_id;
ALTER TABLE device_lists_changes_in_room REPLICA IDENTITY USING INDEX device_lists_changes_in_stream_id;
ALTER TABLE device_lists_outbound_last_success REPLICA IDENTITY USING INDEX device_lists_outbound_last_success_unique_idx;
ALTER TABLE device_lists_remote_cache REPLICA IDENTITY USING INDEX device_lists_remote_cache_unique_id;
ALTER TABLE device_lists_remote_extremeties REPLICA IDENTITY USING INDEX device_lists_remote_extremeties_unique_idx;
ALTER TABLE device_lists_remote_resync REPLICA IDENTITY USING INDEX device_lists_remote_resync_idx;
ALTER TABLE e2e_cross_signing_keys REPLICA IDENTITY USING INDEX e2e_cross_signing_keys_stream_idx;
ALTER TABLE e2e_room_keys REPLICA IDENTITY USING INDEX e2e_room_keys_with_version_idx;
ALTER TABLE e2e_room_keys_versions REPLICA IDENTITY USING INDEX e2e_room_keys_versions_idx;
ALTER TABLE erased_users REPLICA IDENTITY USING INDEX erased_users_user;
ALTER TABLE event_relations REPLICA IDENTITY USING INDEX event_relations_id;
ALTER TABLE federation_inbound_events_staging REPLICA IDENTITY USING INDEX federation_inbound_events_staging_instance_event;
ALTER TABLE federation_stream_position REPLICA IDENTITY USING INDEX federation_stream_position_instance;
ALTER TABLE ignored_users REPLICA IDENTITY USING INDEX ignored_users_uniqueness;
ALTER TABLE insertion_events REPLICA IDENTITY USING INDEX insertion_events_event_id;
ALTER TABLE insertion_event_extremities REPLICA IDENTITY USING INDEX insertion_event_extremities_event_id;
ALTER TABLE monthly_active_users REPLICA IDENTITY USING INDEX monthly_active_users_users;
ALTER TABLE ratelimit_override REPLICA IDENTITY USING INDEX ratelimit_override_idx;
ALTER TABLE room_stats_earliest_token REPLICA IDENTITY USING INDEX room_stats_earliest_token_idx;
ALTER TABLE room_stats_state REPLICA IDENTITY USING INDEX room_stats_state_room;
ALTER TABLE stream_positions REPLICA IDENTITY USING INDEX stream_positions_idx;
ALTER TABLE user_directory REPLICA IDENTITY USING INDEX user_directory_user_idx;
ALTER TABLE user_directory_search REPLICA IDENTITY USING INDEX user_directory_search_user_idx;
ALTER TABLE user_ips REPLICA IDENTITY USING INDEX user_ips_user_token_ip_unique_index;
ALTER TABLE user_signature_stream REPLICA IDENTITY USING INDEX user_signature_stream_idx;
ALTER TABLE users_in_public_rooms REPLICA IDENTITY USING INDEX users_in_public_rooms_u_idx;
ALTER TABLE users_who_share_private_rooms REPLICA IDENTITY USING INDEX users_who_share_private_rooms_u_idx;
ALTER TABLE user_threepid_id_server REPLICA IDENTITY USING INDEX user_threepid_id_server_idx;
ALTER TABLE worker_locks REPLICA IDENTITY USING INDEX worker_locks_key;
-- Where there are no unique indices, use the entire rows as replica identities.
ALTER TABLE current_state_delta_stream REPLICA IDENTITY FULL;
ALTER TABLE deleted_pushers REPLICA IDENTITY FULL;
ALTER TABLE device_auth_providers REPLICA IDENTITY FULL;
ALTER TABLE device_federation_inbox REPLICA IDENTITY FULL;
ALTER TABLE device_federation_outbox REPLICA IDENTITY FULL;
ALTER TABLE device_inbox REPLICA IDENTITY FULL;
ALTER TABLE device_lists_outbound_pokes REPLICA IDENTITY FULL;
ALTER TABLE device_lists_stream REPLICA IDENTITY FULL;
ALTER TABLE e2e_cross_signing_signatures REPLICA IDENTITY FULL;
ALTER TABLE event_auth_chain_links REPLICA IDENTITY FULL;
ALTER TABLE event_auth REPLICA IDENTITY FULL;
ALTER TABLE event_push_actions_staging REPLICA IDENTITY FULL;
ALTER TABLE insertion_event_edges REPLICA IDENTITY FULL;
ALTER TABLE local_media_repository_url_cache REPLICA IDENTITY FULL;
ALTER TABLE presence_stream REPLICA IDENTITY FULL;
ALTER TABLE push_rules_stream REPLICA IDENTITY FULL;
ALTER TABLE room_alias_servers REPLICA IDENTITY FULL;
ALTER TABLE stream_ordering_to_exterm REPLICA IDENTITY FULL;
ALTER TABLE timeline_gaps REPLICA IDENTITY FULL;
ALTER TABLE user_daily_visits REPLICA IDENTITY FULL;
ALTER TABLE users_pending_deactivation REPLICA IDENTITY FULL;
-- special cases: unique indices on nullable columns can't be used
ALTER TABLE event_push_summary REPLICA IDENTITY FULL;
ALTER TABLE event_search REPLICA IDENTITY FULL;
ALTER TABLE local_media_repository_thumbnails REPLICA IDENTITY FULL;
ALTER TABLE remote_media_cache_thumbnails REPLICA IDENTITY FULL;
ALTER TABLE threepid_guest_access_tokens REPLICA IDENTITY FULL;
ALTER TABLE user_filters REPLICA IDENTITY FULL; -- sadly the `CHECK` constraint is not enough here

View File

@ -0,0 +1,30 @@
/* Copyright 2023 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-- Annotate some tables in Postgres with a REPLICA IDENTITY.
-- Any table that doesn't have a primary key should be annotated explicitly with
-- a REPLICA IDENTITY so that logical replication can be used.
-- If this is not done, then UPDATE and DELETE statements on those tables
-- will fail if logical replication is in use.
-- See also: 82/04_replica_identities.sql.postgres on the main database
-- Where possible, re-use unique indices already defined on tables as a replica
-- identity.
ALTER TABLE state_group_edges REPLICA IDENTITY USING INDEX state_group_edges_unique_idx;
-- Where there are no unique indices, use the entire rows as replica identities.
ALTER TABLE state_groups_state REPLICA IDENTITY FULL;

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import Callable, Tuple from typing import Callable, List, Tuple
from unittest.mock import Mock, call from unittest.mock import Mock, call
from twisted.internet import defer from twisted.internet import defer
@ -29,6 +29,7 @@ from synapse.storage.database import (
from synapse.util import Clock from synapse.util import Clock
from tests import unittest from tests import unittest
from tests.utils import USE_POSTGRES_FOR_TESTS
class TupleComparisonClauseTestCase(unittest.TestCase): class TupleComparisonClauseTestCase(unittest.TestCase):
@ -278,3 +279,84 @@ class CancellationTestCase(unittest.HomeserverTestCase):
] ]
) )
self.assertEqual(exception_callback.call_count, 6) # no additional calls self.assertEqual(exception_callback.call_count, 6) # no additional calls
class PostgresReplicaIdentityTestCase(unittest.HomeserverTestCase):
if not USE_POSTGRES_FOR_TESTS:
skip = "Requires Postgres"
def prepare(
self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
) -> None:
self.db_pools = homeserver.get_datastores().databases
def test_all_tables_have_postgres_replica_identity(self) -> None:
"""
Tests that all tables have a Postgres REPLICA IDENTITY.
(See #16224).
Tables with a PRIMARY KEY have an implied REPLICA IDENTITY and are fine.
Other tables need them to be set with `ALTER TABLE`.
A REPLICA IDENTITY is required for Postgres logical replication to work
properly without blocking updates and deletes.
"""
sql = """
-- Select tables that have no primary key and use the default replica identity rule
-- (the default is to use the primary key)
WITH tables_no_pkey AS (
SELECT tbl.table_schema, tbl.table_name
FROM information_schema.tables tbl
WHERE table_type = 'BASE TABLE'
AND table_schema not in ('pg_catalog', 'information_schema')
AND NOT EXISTS (
SELECT 1
FROM information_schema.key_column_usage kcu
WHERE kcu.table_name = tbl.table_name
AND kcu.table_schema = tbl.table_schema
)
)
SELECT pg_class.oid::regclass FROM tables_no_pkey INNER JOIN pg_class ON pg_class.oid::regclass = table_name::regclass
WHERE relreplident = 'd'
UNION
-- Also select tables that use an index as a replica identity
-- but where the index doesn't exist
-- (e.g. it could have been deleted)
SELECT pg_class.oid::regclass
FROM information_schema.tables tbl
INNER JOIN pg_class ON pg_class.oid::regclass = table_name::regclass
WHERE table_type = 'BASE TABLE'
AND table_schema not in ('pg_catalog', 'information_schema')
-- 'i' means an index is used as the replica identity
AND relreplident = 'i'
-- look for indices that are marked as the replica identity
AND NOT EXISTS (
SELECT indexrelid::regclass
FROM pg_index
WHERE indrelid = pg_class.oid::regclass AND indisreplident
)
"""
def _list_tables_with_missing_replica_identities_txn(
txn: LoggingTransaction,
) -> List[str]:
txn.execute(sql)
return [table_name for table_name, in txn]
for pool in self.db_pools:
missing = self.get_success(
pool.runInteraction(
"test_list_missing_replica_identities",
_list_tables_with_missing_replica_identities_txn,
)
)
self.assertEqual(
len(missing),
0,
f"The following tables in the {pool.name()!r} database are missing REPLICA IDENTITIES: {missing!r}.",
)