Track when the pulled event signature fails (#13815)

Because we're doing the recording in `_check_sigs_and_hash_for_pulled_events_and_fetch` (previously named `_check_sigs_and_hash_and_fetch`), this means we will track signature failures for `backfill`, `get_room_state`, `get_event_auth`, and `get_missing_events` (all pulled event scenarios). And we also record signature failures from `get_pdu`.

Part of https://github.com/matrix-org/synapse/issues/13700

Part of https://github.com/matrix-org/synapse/issues/13676 and https://github.com/matrix-org/synapse/issues/13356

This PR will be especially important for https://github.com/matrix-org/synapse/pull/13816 so we can avoid the costly `_get_state_ids_after_missing_prev_event` down the line when `/messages` calls backfill.
This commit is contained in:
Eric Eastwood 2022-10-03 14:53:29 -05:00 committed by GitHub
parent 92ae90aca2
commit 70a4317692
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 140 additions and 15 deletions

View File

@ -0,0 +1 @@
Keep track when an event pulled over federation fails its signature check so we can intelligently back-off in the future.

View File

@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging import logging
from typing import TYPE_CHECKING from typing import TYPE_CHECKING, Awaitable, Callable, Optional
from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership
from synapse.api.errors import Codes, SynapseError from synapse.api.errors import Codes, SynapseError
@ -58,7 +58,12 @@ class FederationBase:
@trace @trace
async def _check_sigs_and_hash( async def _check_sigs_and_hash(
self, room_version: RoomVersion, pdu: EventBase self,
room_version: RoomVersion,
pdu: EventBase,
record_failure_callback: Optional[
Callable[[EventBase, str], Awaitable[None]]
] = None,
) -> EventBase: ) -> EventBase:
"""Checks that event is correctly signed by the sending server. """Checks that event is correctly signed by the sending server.
@ -70,6 +75,11 @@ class FederationBase:
Args: Args:
room_version: The room version of the PDU room_version: The room version of the PDU
pdu: the event to be checked pdu: the event to be checked
record_failure_callback: A callback to run whenever the given event
fails signature or hash checks. This includes exceptions
that would be normally be thrown/raised but also things like
checking for event tampering where we just return the redacted
event.
Returns: Returns:
* the original event if the checks pass * the original event if the checks pass
@ -80,7 +90,12 @@ class FederationBase:
InvalidEventSignatureError if the signature check failed. Nothing InvalidEventSignatureError if the signature check failed. Nothing
will be logged in this case. will be logged in this case.
""" """
try:
await _check_sigs_on_pdu(self.keyring, room_version, pdu) await _check_sigs_on_pdu(self.keyring, room_version, pdu)
except InvalidEventSignatureError as exc:
if record_failure_callback:
await record_failure_callback(pdu, str(exc))
raise exc
if not check_event_content_hash(pdu): if not check_event_content_hash(pdu):
# let's try to distinguish between failures because the event was # let's try to distinguish between failures because the event was
@ -116,6 +131,10 @@ class FederationBase:
"event_id": pdu.event_id, "event_id": pdu.event_id,
} }
) )
if record_failure_callback:
await record_failure_callback(
pdu, "Event content has been tampered with"
)
return redacted_event return redacted_event
spam_check = await self.spam_checker.check_event_for_spam(pdu) spam_check = await self.spam_checker.check_event_for_spam(pdu)

View File

@ -278,7 +278,7 @@ class FederationClient(FederationBase):
pdus = [event_from_pdu_json(p, room_version) for p in transaction_data_pdus] pdus = [event_from_pdu_json(p, room_version) for p in transaction_data_pdus]
# Check signatures and hash of pdus, removing any from the list that fail checks # Check signatures and hash of pdus, removing any from the list that fail checks
pdus[:] = await self._check_sigs_and_hash_and_fetch( pdus[:] = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
dest, pdus, room_version=room_version dest, pdus, room_version=room_version
) )
@ -328,7 +328,17 @@ class FederationClient(FederationBase):
# Check signatures are correct. # Check signatures are correct.
try: try:
signed_pdu = await self._check_sigs_and_hash(room_version, pdu)
async def _record_failure_callback(
event: EventBase, cause: str
) -> None:
await self.store.record_event_failed_pull_attempt(
event.room_id, event.event_id, cause
)
signed_pdu = await self._check_sigs_and_hash(
room_version, pdu, _record_failure_callback
)
except InvalidEventSignatureError as e: except InvalidEventSignatureError as e:
errmsg = f"event id {pdu.event_id}: {e}" errmsg = f"event id {pdu.event_id}: {e}"
logger.warning("%s", errmsg) logger.warning("%s", errmsg)
@ -547,24 +557,28 @@ class FederationClient(FederationBase):
len(auth_event_map), len(auth_event_map),
) )
valid_auth_events = await self._check_sigs_and_hash_and_fetch( valid_auth_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
destination, auth_event_map.values(), room_version destination, auth_event_map.values(), room_version
) )
valid_state_events = await self._check_sigs_and_hash_and_fetch( valid_state_events = (
await self._check_sigs_and_hash_for_pulled_events_and_fetch(
destination, state_event_map.values(), room_version destination, state_event_map.values(), room_version
) )
)
return valid_state_events, valid_auth_events return valid_state_events, valid_auth_events
@trace @trace
async def _check_sigs_and_hash_and_fetch( async def _check_sigs_and_hash_for_pulled_events_and_fetch(
self, self,
origin: str, origin: str,
pdus: Collection[EventBase], pdus: Collection[EventBase],
room_version: RoomVersion, room_version: RoomVersion,
) -> List[EventBase]: ) -> List[EventBase]:
"""Checks the signatures and hashes of a list of events. """
Checks the signatures and hashes of a list of pulled events we got from
federation and records any signature failures as failed pull attempts.
If a PDU fails its signature check then we check if we have it in If a PDU fails its signature check then we check if we have it in
the database, and if not then request it from the sender's server (if that the database, and if not then request it from the sender's server (if that
@ -597,11 +611,17 @@ class FederationClient(FederationBase):
valid_pdus: List[EventBase] = [] valid_pdus: List[EventBase] = []
async def _record_failure_callback(event: EventBase, cause: str) -> None:
await self.store.record_event_failed_pull_attempt(
event.room_id, event.event_id, cause
)
async def _execute(pdu: EventBase) -> None: async def _execute(pdu: EventBase) -> None:
valid_pdu = await self._check_sigs_and_hash_and_fetch_one( valid_pdu = await self._check_sigs_and_hash_and_fetch_one(
pdu=pdu, pdu=pdu,
origin=origin, origin=origin,
room_version=room_version, room_version=room_version,
record_failure_callback=_record_failure_callback,
) )
if valid_pdu: if valid_pdu:
@ -618,6 +638,9 @@ class FederationClient(FederationBase):
pdu: EventBase, pdu: EventBase,
origin: str, origin: str,
room_version: RoomVersion, room_version: RoomVersion,
record_failure_callback: Optional[
Callable[[EventBase, str], Awaitable[None]]
] = None,
) -> Optional[EventBase]: ) -> Optional[EventBase]:
"""Takes a PDU and checks its signatures and hashes. """Takes a PDU and checks its signatures and hashes.
@ -634,6 +657,11 @@ class FederationClient(FederationBase):
origin origin
pdu pdu
room_version room_version
record_failure_callback: A callback to run whenever the given event
fails signature or hash checks. This includes exceptions
that would be normally be thrown/raised but also things like
checking for event tampering where we just return the redacted
event.
Returns: Returns:
The PDU (possibly redacted) if it has valid signatures and hashes. The PDU (possibly redacted) if it has valid signatures and hashes.
@ -641,7 +669,9 @@ class FederationClient(FederationBase):
""" """
try: try:
return await self._check_sigs_and_hash(room_version, pdu) return await self._check_sigs_and_hash(
room_version, pdu, record_failure_callback
)
except InvalidEventSignatureError as e: except InvalidEventSignatureError as e:
logger.warning( logger.warning(
"Signature on retrieved event %s was invalid (%s). " "Signature on retrieved event %s was invalid (%s). "
@ -694,7 +724,7 @@ class FederationClient(FederationBase):
auth_chain = [event_from_pdu_json(p, room_version) for p in res["auth_chain"]] auth_chain = [event_from_pdu_json(p, room_version) for p in res["auth_chain"]]
signed_auth = await self._check_sigs_and_hash_and_fetch( signed_auth = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
destination, auth_chain, room_version=room_version destination, auth_chain, room_version=room_version
) )
@ -1401,7 +1431,7 @@ class FederationClient(FederationBase):
event_from_pdu_json(e, room_version) for e in content.get("events", []) event_from_pdu_json(e, room_version) for e in content.get("events", [])
] ]
signed_events = await self._check_sigs_and_hash_and_fetch( signed_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
destination, events, room_version=room_version destination, events, room_version=room_version
) )
except HttpResponseException as e: except HttpResponseException as e:

View File

@ -23,14 +23,23 @@ from twisted.test.proto_helpers import MemoryReactor
from synapse.api.room_versions import RoomVersions from synapse.api.room_versions import RoomVersions
from synapse.events import EventBase from synapse.events import EventBase
from synapse.rest import admin
from synapse.rest.client import login, room
from synapse.server import HomeServer from synapse.server import HomeServer
from synapse.types import JsonDict from synapse.types import JsonDict
from synapse.util import Clock from synapse.util import Clock
from tests.test_utils import event_injection
from tests.unittest import FederatingHomeserverTestCase from tests.unittest import FederatingHomeserverTestCase
class FederationClientTest(FederatingHomeserverTestCase): class FederationClientTest(FederatingHomeserverTestCase):
servlets = [
admin.register_servlets,
room.register_servlets,
login.register_servlets,
]
def prepare(self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer): def prepare(self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer):
super().prepare(reactor, clock, homeserver) super().prepare(reactor, clock, homeserver)
@ -231,6 +240,72 @@ class FederationClientTest(FederatingHomeserverTestCase):
return remote_pdu return remote_pdu
def test_backfill_invalid_signature_records_failed_pull_attempts(
self,
) -> None:
"""
Test to make sure that events from /backfill with invalid signatures get
recorded as failed pull attempts.
"""
OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
main_store = self.hs.get_datastores().main
# Create the room
user_id = self.register_user("kermit", "test")
tok = self.login("kermit", "test")
room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
# We purposely don't run `add_hashes_and_signatures_from_other_server`
# over this because we want the signature check to fail.
pulled_event, _ = self.get_success(
event_injection.create_event(
self.hs,
room_id=room_id,
sender=OTHER_USER,
type="test_event_type",
content={"body": "garply"},
)
)
# We expect an outbound request to /backfill, so stub that out
self._mock_agent.request.side_effect = lambda *args, **kwargs: defer.succeed(
_mock_response(
{
"origin": "yet.another.server",
"origin_server_ts": 900,
# Mimic the other server returning our new `pulled_event`
"pdus": [pulled_event.get_pdu_json()],
}
)
)
self.get_success(
self.hs.get_federation_client().backfill(
# We use "yet.another.server" instead of
# `self.OTHER_SERVER_NAME` because we want to see the behavior
# from `_check_sigs_and_hash_and_fetch_one` where it tries to
# fetch the PDU again from the origin server if the signature
# fails. Just want to make sure that the failure is counted from
# both code paths.
dest="yet.another.server",
room_id=room_id,
limit=1,
extremities=[pulled_event.event_id],
),
)
# Make sure our failed pull attempt was recorded
backfill_num_attempts = self.get_success(
main_store.db_pool.simple_select_one_onecol(
table="event_failed_pull_attempts",
keyvalues={"event_id": pulled_event.event_id},
retcol="num_attempts",
)
)
# This is 2 because it failed once from `self.OTHER_SERVER_NAME` and the
# other from "yet.another.server"
self.assertEqual(backfill_num_attempts, 2)
def _mock_response(resp: JsonDict): def _mock_response(resp: JsonDict):
body = json.dumps(resp).encode("utf-8") body = json.dumps(resp).encode("utf-8")

View File

@ -86,8 +86,8 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
federation_event_handler._check_event_auth = _check_event_auth federation_event_handler._check_event_auth = _check_event_auth
self.client = self.homeserver.get_federation_client() self.client = self.homeserver.get_federation_client()
self.client._check_sigs_and_hash_and_fetch = lambda dest, pdus, **k: succeed( self.client._check_sigs_and_hash_for_pulled_events_and_fetch = (
pdus lambda dest, pdus, **k: succeed(pdus)
) )
# Send the join, it should return None (which is not an error) # Send the join, it should return None (which is not an error)