From a164a46038b0e51142781619db0e6dec8e0c2aaa Mon Sep 17 00:00:00 2001 From: David Teller Date: Mon, 13 Jun 2022 20:16:16 +0200 Subject: [PATCH] Uniformize spam-checker API, part 4: port other spam-checker callbacks to return `Union[Allow, Codes]`. (#12857) Co-authored-by: Brendan Abolivier --- changelog.d/12857.feature | 1 + docs/modules/spam_checker_callbacks.md | 190 ++++++++++++----- docs/upgrade.md | 41 ++++ synapse/events/spamcheck.py | 277 ++++++++++++++++++------- synapse/handlers/directory.py | 19 +- synapse/handlers/federation.py | 10 +- synapse/handlers/message.py | 12 +- synapse/handlers/room.py | 20 +- synapse/handlers/room_member.py | 32 +-- synapse/module_api/__init__.py | 2 + synapse/rest/media/v1/media_storage.py | 7 +- tests/rest/client/test_rooms.py | 175 +++++++++++++++- 12 files changed, 604 insertions(+), 182 deletions(-) create mode 100644 changelog.d/12857.feature diff --git a/changelog.d/12857.feature b/changelog.d/12857.feature new file mode 100644 index 000000000..ddd1dbe68 --- /dev/null +++ b/changelog.d/12857.feature @@ -0,0 +1 @@ +Port spam-checker API callbacks to a new, richer API. This is part of an ongoing change to let spam-checker modules inform users of the reason their event or operation is rejected. diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index ad35e667e..8ca7d5bdb 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -38,15 +38,13 @@ this callback. _First introduced in Synapse v1.37.0_ +_Changed in Synapse v1.61.0: `synapse.module_api.NOT_SPAM` and `synapse.module_api.errors.Codes` can be returned by this callback. Returning a boolean is now deprecated._ + ```python -async def user_may_join_room(user: str, room: str, is_invited: bool) -> bool +async def user_may_join_room(user: str, room: str, is_invited: bool) -> Union["synapse.module_api.NOT_SPAM", "synapse.module_api.errors.Codes", bool] ``` -Called when a user is trying to join a room. The module must return a `bool` to indicate -whether the user can join the room. Return `False` to prevent the user from joining the -room; otherwise return `True` to permit the joining. - -The user is represented by their Matrix user ID (e.g. +Called when a user is trying to join a room. The user is represented by their Matrix user ID (e.g. `@alice:example.com`) and the room is represented by its Matrix ID (e.g. `!room:example.com`). The module is also given a boolean to indicate whether the user currently has a pending invite in the room. @@ -54,46 +52,67 @@ currently has a pending invite in the room. This callback isn't called if the join is performed by a server administrator, or in the context of a room creation. +The callback must return one of: + - `synapse.module_api.NOT_SPAM`, to allow the operation. Other callbacks may still + decide to reject it. + - `synapse.module_api.errors.Codes` to reject the operation with an error code. In case + of doubt, `synapse.module_api.errors.Codes.FORBIDDEN` is a good error code. + - (deprecated) `False`, which is the same as returning `synapse.module_api.NOT_SPAM`. + - (deprecated) `True`, which is the same as returning `synapse.module_api.errors.Codes.FORBIDDEN`. + If multiple modules implement this callback, they will be considered in order. If a -callback returns `True`, Synapse falls through to the next one. The value of the first -callback that does not return `True` will be used. If this happens, Synapse will not call -any of the subsequent implementations of this callback. +callback returns `synapse.module_api.NOT_SPAM`, Synapse falls through to the next one. +The value of the first callback that does not return `synapse.module_api.NOT_SPAM` will +be used. If this happens, Synapse will not call any of the subsequent implementations of +this callback. ### `user_may_invite` _First introduced in Synapse v1.37.0_ +_Changed in Synapse v1.61.0: `synapse.module_api.NOT_SPAM` and `synapse.module_api.errors.Codes` can be returned by this callback. Returning a boolean is now deprecated._ + ```python -async def user_may_invite(inviter: str, invitee: str, room_id: str) -> bool +async def user_may_invite(inviter: str, invitee: str, room_id: str) -> Union["synapse.module_api.NOT_SPAM", "synapse.module_api.errors.Codes", bool] ``` -Called when processing an invitation. The module must return a `bool` indicating whether -the inviter can invite the invitee to the given room. Both inviter and invitee are -represented by their Matrix user ID (e.g. `@alice:example.com`). Return `False` to prevent -the invitation; otherwise return `True` to permit it. +Called when processing an invitation. Both inviter and invitee are +represented by their Matrix user ID (e.g. `@alice:example.com`). + + +The callback must return one of: + - `synapse.module_api.NOT_SPAM`, to allow the operation. Other callbacks may still + decide to reject it. + - `synapse.module_api.errors.Codes` to reject the operation with an error code. In case + of doubt, `synapse.module_api.errors.Codes.FORBIDDEN` is a good error code. + + - (deprecated) `False`, which is the same as returning `synapse.module_api.NOT_SPAM`. + - (deprecated) `True`, which is the same as returning `synapse.module_api.errors.Codes.FORBIDDEN`. If multiple modules implement this callback, they will be considered in order. If a -callback returns `True`, Synapse falls through to the next one. The value of the first -callback that does not return `True` will be used. If this happens, Synapse will not call -any of the subsequent implementations of this callback. +callback returns `synapse.module_api.NOT_SPAM`, Synapse falls through to the next one. +The value of the first callback that does not return `synapse.module_api.NOT_SPAM` will +be used. If this happens, Synapse will not call any of the subsequent implementations of +this callback. + ### `user_may_send_3pid_invite` _First introduced in Synapse v1.45.0_ +_Changed in Synapse v1.61.0: `synapse.module_api.NOT_SPAM` and `synapse.module_api.errors.Codes` can be returned by this callback. Returning a boolean is now deprecated._ + ```python async def user_may_send_3pid_invite( inviter: str, medium: str, address: str, room_id: str, -) -> bool +) -> Union["synapse.module_api.NOT_SPAM", "synapse.module_api.errors.Codes", bool] ``` Called when processing an invitation using a third-party identifier (also called a 3PID, -e.g. an email address or a phone number). The module must return a `bool` indicating -whether the inviter can invite the invitee to the given room. Return `False` to prevent -the invitation; otherwise return `True` to permit it. +e.g. an email address or a phone number). The inviter is represented by their Matrix user ID (e.g. `@alice:example.com`), and the invitee is represented by its medium (e.g. "email") and its address @@ -115,63 +134,108 @@ await user_may_send_3pid_invite( **Note**: If the third-party identifier is already associated with a matrix user ID, [`user_may_invite`](#user_may_invite) will be used instead. +The callback must return one of: + - `synapse.module_api.NOT_SPAM`, to allow the operation. Other callbacks may still + decide to reject it. + - `synapse.module_api.errors.Codes` to reject the operation with an error code. In case + of doubt, `synapse.module_api.errors.Codes.FORBIDDEN` is a good error code. + + - (deprecated) `False`, which is the same as returning `synapse.module_api.NOT_SPAM`. + - (deprecated) `True`, which is the same as returning `synapse.module_api.errors.Codes.FORBIDDEN`. + If multiple modules implement this callback, they will be considered in order. If a -callback returns `True`, Synapse falls through to the next one. The value of the first -callback that does not return `True` will be used. If this happens, Synapse will not call -any of the subsequent implementations of this callback. +callback returns `synapse.module_api.NOT_SPAM`, Synapse falls through to the next one. +The value of the first callback that does not return `synapse.module_api.NOT_SPAM` will +be used. If this happens, Synapse will not call any of the subsequent implementations of +this callback. + ### `user_may_create_room` _First introduced in Synapse v1.37.0_ +_Changed in Synapse v1.61.0: `synapse.module_api.NOT_SPAM` and `synapse.module_api.errors.Codes` can be returned by this callback. Returning a boolean is now deprecated._ + ```python -async def user_may_create_room(user: str) -> bool +async def user_may_create_room(user_id: str) -> Union["synapse.module_api.NOT_SPAM", "synapse.module_api.errors.Codes", bool] ``` -Called when processing a room creation request. The module must return a `bool` indicating -whether the given user (represented by their Matrix user ID) is allowed to create a room. -Return `False` to prevent room creation; otherwise return `True` to permit it. +Called when processing a room creation request. + +The callback must return one of: + - `synapse.module_api.NOT_SPAM`, to allow the operation. Other callbacks may still + decide to reject it. + - `synapse.module_api.errors.Codes` to reject the operation with an error code. In case + of doubt, `synapse.module_api.errors.Codes.FORBIDDEN` is a good error code. + + - (deprecated) `False`, which is the same as returning `synapse.module_api.NOT_SPAM`. + - (deprecated) `True`, which is the same as returning `synapse.module_api.errors.Codes.FORBIDDEN`. If multiple modules implement this callback, they will be considered in order. If a -callback returns `True`, Synapse falls through to the next one. The value of the first -callback that does not return `True` will be used. If this happens, Synapse will not call -any of the subsequent implementations of this callback. +callback returns `synapse.module_api.NOT_SPAM`, Synapse falls through to the next one. +The value of the first callback that does not return `synapse.module_api.NOT_SPAM` will +be used. If this happens, Synapse will not call any of the subsequent implementations of +this callback. + + ### `user_may_create_room_alias` _First introduced in Synapse v1.37.0_ +_Changed in Synapse v1.61.0: `synapse.module_api.NOT_SPAM` and `synapse.module_api.errors.Codes` can be returned by this callback. Returning a boolean is now deprecated._ + ```python -async def user_may_create_room_alias(user: str, room_alias: "synapse.types.RoomAlias") -> bool +async def user_may_create_room_alias(user_id: str, room_alias: "synapse.module_api.RoomAlias") -> Union["synapse.module_api.NOT_SPAM", "synapse.module_api.errors.Codes", bool] ``` -Called when trying to associate an alias with an existing room. The module must return a -`bool` indicating whether the given user (represented by their Matrix user ID) is allowed -to set the given alias. Return `False` to prevent the alias creation; otherwise return -`True` to permit it. +Called when trying to associate an alias with an existing room. + +The callback must return one of: + - `synapse.module_api.NOT_SPAM`, to allow the operation. Other callbacks may still + decide to reject it. + - `synapse.module_api.errors.Codes` to reject the operation with an error code. In case + of doubt, `synapse.module_api.errors.Codes.FORBIDDEN` is a good error code. + + - (deprecated) `False`, which is the same as returning `synapse.module_api.NOT_SPAM`. + - (deprecated) `True`, which is the same as returning `synapse.module_api.errors.Codes.FORBIDDEN`. If multiple modules implement this callback, they will be considered in order. If a -callback returns `True`, Synapse falls through to the next one. The value of the first -callback that does not return `True` will be used. If this happens, Synapse will not call -any of the subsequent implementations of this callback. +callback returns `synapse.module_api.NOT_SPAM`, Synapse falls through to the next one. +The value of the first callback that does not return `synapse.module_api.NOT_SPAM` will +be used. If this happens, Synapse will not call any of the subsequent implementations of +this callback. + + ### `user_may_publish_room` _First introduced in Synapse v1.37.0_ +_Changed in Synapse v1.61.0: `synapse.module_api.NOT_SPAM` and `synapse.module_api.errors.Codes` can be returned by this callback. Returning a boolean is now deprecated._ + ```python -async def user_may_publish_room(user: str, room_id: str) -> bool +async def user_may_publish_room(user_id: str, room_id: str) -> Union["synapse.module_api.NOT_SPAM", "synapse.module_api.errors.Codes", bool] ``` -Called when trying to publish a room to the homeserver's public rooms directory. The -module must return a `bool` indicating whether the given user (represented by their -Matrix user ID) is allowed to publish the given room. Return `False` to prevent the -room from being published; otherwise return `True` to permit its publication. +Called when trying to publish a room to the homeserver's public rooms directory. + +The callback must return one of: + - `synapse.module_api.NOT_SPAM`, to allow the operation. Other callbacks may still + decide to reject it. + - `synapse.module_api.errors.Codes` to reject the operation with an error code. In case + of doubt, `synapse.module_api.errors.Codes.FORBIDDEN` is a good error code. + + - (deprecated) `False`, which is the same as returning `synapse.module_api.NOT_SPAM`. + - (deprecated) `True`, which is the same as returning `synapse.module_api.errors.Codes.FORBIDDEN`. If multiple modules implement this callback, they will be considered in order. If a -callback returns `True`, Synapse falls through to the next one. The value of the first -callback that does not return `True` will be used. If this happens, Synapse will not call -any of the subsequent implementations of this callback. +callback returns `synapse.module_api.NOT_SPAM`, Synapse falls through to the next one. +The value of the first callback that does not return `synapse.module_api.NOT_SPAM` will +be used. If this happens, Synapse will not call any of the subsequent implementations of +this callback. + + ### `check_username_for_spam` @@ -239,21 +303,32 @@ this callback. _First introduced in Synapse v1.37.0_ +_Changed in Synapse v1.61.0: `synapse.module_api.NOT_SPAM` and `synapse.module_api.errors.Codes` can be returned by this callback. Returning a boolean is now deprecated._ + ```python async def check_media_file_for_spam( file_wrapper: "synapse.rest.media.v1.media_storage.ReadableFileWrapper", file_info: "synapse.rest.media.v1._base.FileInfo", -) -> bool +) -> Union["synapse.module_api.NOT_SPAM", "synapse.module_api.errors.Codes", bool] ``` -Called when storing a local or remote file. The module must return a `bool` indicating -whether the given file should be excluded from the homeserver's media store. Return -`True` to prevent this file from being stored; otherwise return `False`. +Called when storing a local or remote file. + +The callback must return one of: + - `synapse.module_api.NOT_SPAM`, to allow the operation. Other callbacks may still + decide to reject it. + - `synapse.module_api.errors.Codes` to reject the operation with an error code. In case + of doubt, `synapse.module_api.errors.Codes.FORBIDDEN` is a good error code. + + - (deprecated) `False`, which is the same as returning `synapse.module_api.NOT_SPAM`. + - (deprecated) `True`, which is the same as returning `synapse.module_api.errors.Codes.FORBIDDEN`. If multiple modules implement this callback, they will be considered in order. If a -callback returns `False`, Synapse falls through to the next one. The value of the first -callback that does not return `False` will be used. If this happens, Synapse will not call -any of the subsequent implementations of this callback. +callback returns `synapse.module_api.NOT_SPAM`, Synapse falls through to the next one. +The value of the first callback that does not return `synapse.module_api.NOT_SPAM` will +be used. If this happens, Synapse will not call any of the subsequent implementations of +this callback. + ### `should_drop_federated_event` @@ -316,6 +391,9 @@ class ListSpamChecker: resource=IsUserEvilResource(config), ) - async def check_event_for_spam(self, event: "synapse.events.EventBase") -> Union[bool, str]: - return event.sender not in self.evil_users + async def check_event_for_spam(self, event: "synapse.events.EventBase") -> Union[Literal["NOT_SPAM"], Codes]: + if event.sender in self.evil_users: + return Codes.FORBIDDEN + else: + return synapse.module_api.NOT_SPAM ``` diff --git a/docs/upgrade.md b/docs/upgrade.md index e3c64da17..3ade86b1a 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -89,6 +89,47 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.61.0 + +## New signatures for spam checker callbacks + +As a followup to changes in v1.60.0, the following spam-checker callbacks have changed signature: + +- `user_may_join_room` +- `user_may_invite` +- `user_may_send_3pid_invite` +- `user_may_create_room` +- `user_may_create_room_alias` +- `user_may_publish_room` +- `check_media_file_for_spam` + +For each of these methods, the previous callback signature has been deprecated. + +Whereas callbacks used to return `bool`, they should now return `Union["synapse.module_api.NOT_SPAM", "synapse.module_api.errors.Codes"]`. + +For instance, if your module implements `user_may_join_room` as follows: + +```python +async def user_may_join_room(self, user_id: str, room_id: str, is_invited: bool) + if ...: + # Request is spam + return False + # Request is not spam + return True +``` + +you should rewrite it as follows: + +```python +async def user_may_join_room(self, user_id: str, room_id: str, is_invited: bool) + if ...: + # Request is spam, mark it as forbidden (you may use some more precise error + # code if it is useful). + return synapse.module_api.errors.Codes.FORBIDDEN + # Request is not spam, mark it as such. + return synapse.module_api.NOT_SPAM +``` + # Upgrading to v1.60.0 ## Adding a new unique index to `state_group_edges` could fail if your database is corrupted diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index d2e06c754..32712d204 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -28,7 +28,10 @@ from typing import ( Union, ) -from synapse.api.errors import Codes +# `Literal` appears with Python 3.8. +from typing_extensions import Literal + +import synapse from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper from synapse.spam_checker_api import RegistrationBehaviour @@ -47,12 +50,12 @@ CHECK_EVENT_FOR_SPAM_CALLBACK = Callable[ Awaitable[ Union[ str, - Codes, + "synapse.api.errors.Codes", # Highly experimental, not officially part of the spamchecker API, may # disappear without warning depending on the results of ongoing # experiments. # Use this to return additional information as part of an error. - Tuple[Codes, Dict], + Tuple["synapse.api.errors.Codes", Dict], # Deprecated bool, ] @@ -62,12 +65,72 @@ SHOULD_DROP_FEDERATED_EVENT_CALLBACK = Callable[ ["synapse.events.EventBase"], Awaitable[Union[bool, str]], ] -USER_MAY_JOIN_ROOM_CALLBACK = Callable[[str, str, bool], Awaitable[bool]] -USER_MAY_INVITE_CALLBACK = Callable[[str, str, str], Awaitable[bool]] -USER_MAY_SEND_3PID_INVITE_CALLBACK = Callable[[str, str, str, str], Awaitable[bool]] -USER_MAY_CREATE_ROOM_CALLBACK = Callable[[str], Awaitable[bool]] -USER_MAY_CREATE_ROOM_ALIAS_CALLBACK = Callable[[str, RoomAlias], Awaitable[bool]] -USER_MAY_PUBLISH_ROOM_CALLBACK = Callable[[str, str], Awaitable[bool]] +USER_MAY_JOIN_ROOM_CALLBACK = Callable[ + [str, str, bool], + Awaitable[ + Union[ + Literal["NOT_SPAM"], + "synapse.api.errors.Codes", + # Deprecated + bool, + ] + ], +] +USER_MAY_INVITE_CALLBACK = Callable[ + [str, str, str], + Awaitable[ + Union[ + Literal["NOT_SPAM"], + "synapse.api.errors.Codes", + # Deprecated + bool, + ] + ], +] +USER_MAY_SEND_3PID_INVITE_CALLBACK = Callable[ + [str, str, str, str], + Awaitable[ + Union[ + Literal["NOT_SPAM"], + "synapse.api.errors.Codes", + # Deprecated + bool, + ] + ], +] +USER_MAY_CREATE_ROOM_CALLBACK = Callable[ + [str], + Awaitable[ + Union[ + Literal["NOT_SPAM"], + "synapse.api.errors.Codes", + # Deprecated + bool, + ] + ], +] +USER_MAY_CREATE_ROOM_ALIAS_CALLBACK = Callable[ + [str, RoomAlias], + Awaitable[ + Union[ + Literal["NOT_SPAM"], + "synapse.api.errors.Codes", + # Deprecated + bool, + ] + ], +] +USER_MAY_PUBLISH_ROOM_CALLBACK = Callable[ + [str, str], + Awaitable[ + Union[ + Literal["NOT_SPAM"], + "synapse.api.errors.Codes", + # Deprecated + bool, + ] + ], +] CHECK_USERNAME_FOR_SPAM_CALLBACK = Callable[[UserProfile], Awaitable[bool]] LEGACY_CHECK_REGISTRATION_FOR_SPAM_CALLBACK = Callable[ [ @@ -88,7 +151,14 @@ CHECK_REGISTRATION_FOR_SPAM_CALLBACK = Callable[ ] CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK = Callable[ [ReadableFileWrapper, FileInfo], - Awaitable[bool], + Awaitable[ + Union[ + Literal["NOT_SPAM"], + "synapse.api.errors.Codes", + # Deprecated + bool, + ] + ], ] @@ -181,7 +251,7 @@ def load_legacy_spam_checkers(hs: "synapse.server.HomeServer") -> None: class SpamChecker: - NOT_SPAM = "NOT_SPAM" + NOT_SPAM: Literal["NOT_SPAM"] = "NOT_SPAM" def __init__(self, hs: "synapse.server.HomeServer") -> None: self.hs = hs @@ -275,7 +345,7 @@ class SpamChecker: async def check_event_for_spam( self, event: "synapse.events.EventBase" - ) -> Union[Tuple[Codes, Dict], str]: + ) -> Union[Tuple["synapse.api.errors.Codes", Dict], str]: """Checks if a given event is considered "spammy" by this server. If the server considers an event spammy, then it will be rejected if @@ -306,7 +376,7 @@ class SpamChecker: elif res is True: # This spam-checker rejects the event with deprecated # return value `True` - return Codes.FORBIDDEN + return (synapse.api.errors.Codes.FORBIDDEN, {}) elif not isinstance(res, str): # mypy complains that we can't reach this code because of the # return type in CHECK_EVENT_FOR_SPAM_CALLBACK, but we don't know @@ -352,7 +422,7 @@ class SpamChecker: async def user_may_join_room( self, user_id: str, room_id: str, is_invited: bool - ) -> bool: + ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: """Checks if a given users is allowed to join a room. Not called when a user creates a room. @@ -362,54 +432,70 @@ class SpamChecker: is_invited: Whether the user is invited into the room Returns: - Whether the user may join the room + NOT_SPAM if the operation is permitted, Codes otherwise. """ for callback in self._user_may_join_room_callbacks: with Measure( self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) ): - may_join_room = await delay_cancellation( - callback(user_id, room_id, is_invited) - ) - if may_join_room is False: - return False + res = await delay_cancellation(callback(user_id, room_id, is_invited)) + # Normalize return values to `Codes` or `"NOT_SPAM"`. + if res is True or res is self.NOT_SPAM: + continue + elif res is False: + return synapse.api.errors.Codes.FORBIDDEN + elif isinstance(res, synapse.api.errors.Codes): + return res + else: + logger.warning( + "Module returned invalid value, rejecting join as spam" + ) + return synapse.api.errors.Codes.FORBIDDEN - return True + # No spam-checker has rejected the request, let it pass. + return self.NOT_SPAM async def user_may_invite( self, inviter_userid: str, invitee_userid: str, room_id: str - ) -> bool: + ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: """Checks if a given user may send an invite - If this method returns false, the invite will be rejected. - Args: inviter_userid: The user ID of the sender of the invitation invitee_userid: The user ID targeted in the invitation room_id: The room ID Returns: - True if the user may send an invite, otherwise False + NOT_SPAM if the operation is permitted, Codes otherwise. """ for callback in self._user_may_invite_callbacks: with Measure( self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) ): - may_invite = await delay_cancellation( + res = await delay_cancellation( callback(inviter_userid, invitee_userid, room_id) ) - if may_invite is False: - return False + # Normalize return values to `Codes` or `"NOT_SPAM"`. + if res is True or res is self.NOT_SPAM: + continue + elif res is False: + return synapse.api.errors.Codes.FORBIDDEN + elif isinstance(res, synapse.api.errors.Codes): + return res + else: + logger.warning( + "Module returned invalid value, rejecting invite as spam" + ) + return synapse.api.errors.Codes.FORBIDDEN - return True + # No spam-checker has rejected the request, let it pass. + return self.NOT_SPAM async def user_may_send_3pid_invite( self, inviter_userid: str, medium: str, address: str, room_id: str - ) -> bool: + ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: """Checks if a given user may invite a given threepid into the room - If this method returns false, the threepid invite will be rejected. - Note that if the threepid is already associated with a Matrix user ID, Synapse will call user_may_invite with said user ID instead. @@ -420,88 +506,113 @@ class SpamChecker: room_id: The room ID Returns: - True if the user may send the invite, otherwise False + NOT_SPAM if the operation is permitted, Codes otherwise. """ for callback in self._user_may_send_3pid_invite_callbacks: with Measure( self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) ): - may_send_3pid_invite = await delay_cancellation( + res = await delay_cancellation( callback(inviter_userid, medium, address, room_id) ) - if may_send_3pid_invite is False: - return False + # Normalize return values to `Codes` or `"NOT_SPAM"`. + if res is True or res is self.NOT_SPAM: + continue + elif res is False: + return synapse.api.errors.Codes.FORBIDDEN + elif isinstance(res, synapse.api.errors.Codes): + return res + else: + logger.warning( + "Module returned invalid value, rejecting 3pid invite as spam" + ) + return synapse.api.errors.Codes.FORBIDDEN - return True + return self.NOT_SPAM - async def user_may_create_room(self, userid: str) -> bool: + async def user_may_create_room( + self, userid: str + ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: """Checks if a given user may create a room - If this method returns false, the creation request will be rejected. - Args: userid: The ID of the user attempting to create a room - - Returns: - True if the user may create a room, otherwise False """ for callback in self._user_may_create_room_callbacks: with Measure( self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) ): - may_create_room = await delay_cancellation(callback(userid)) - if may_create_room is False: - return False + res = await delay_cancellation(callback(userid)) + if res is True or res is self.NOT_SPAM: + continue + elif res is False: + return synapse.api.errors.Codes.FORBIDDEN + elif isinstance(res, synapse.api.errors.Codes): + return res + else: + logger.warning( + "Module returned invalid value, rejecting room creation as spam" + ) + return synapse.api.errors.Codes.FORBIDDEN - return True + return self.NOT_SPAM async def user_may_create_room_alias( self, userid: str, room_alias: RoomAlias - ) -> bool: + ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: """Checks if a given user may create a room alias - If this method returns false, the association request will be rejected. - Args: userid: The ID of the user attempting to create a room alias room_alias: The alias to be created - Returns: - True if the user may create a room alias, otherwise False """ for callback in self._user_may_create_room_alias_callbacks: with Measure( self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) ): - may_create_room_alias = await delay_cancellation( - callback(userid, room_alias) - ) - if may_create_room_alias is False: - return False + res = await delay_cancellation(callback(userid, room_alias)) + if res is True or res is self.NOT_SPAM: + continue + elif res is False: + return synapse.api.errors.Codes.FORBIDDEN + elif isinstance(res, synapse.api.errors.Codes): + return res + else: + logger.warning( + "Module returned invalid value, rejecting room create as spam" + ) + return synapse.api.errors.Codes.FORBIDDEN - return True + return self.NOT_SPAM - async def user_may_publish_room(self, userid: str, room_id: str) -> bool: + async def user_may_publish_room( + self, userid: str, room_id: str + ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: """Checks if a given user may publish a room to the directory - If this method returns false, the publish request will be rejected. - Args: userid: The user ID attempting to publish the room room_id: The ID of the room that would be published - - Returns: - True if the user may publish the room, otherwise False """ for callback in self._user_may_publish_room_callbacks: with Measure( self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) ): - may_publish_room = await delay_cancellation(callback(userid, room_id)) - if may_publish_room is False: - return False + res = await delay_cancellation(callback(userid, room_id)) + if res is True or res is self.NOT_SPAM: + continue + elif res is False: + return synapse.api.errors.Codes.FORBIDDEN + elif isinstance(res, synapse.api.errors.Codes): + return res + else: + logger.warning( + "Module returned invalid value, rejecting room publication as spam" + ) + return synapse.api.errors.Codes.FORBIDDEN - return True + return self.NOT_SPAM async def check_username_for_spam(self, user_profile: UserProfile) -> bool: """Checks if a user ID or display name are considered "spammy" by this server. @@ -567,7 +678,7 @@ class SpamChecker: async def check_media_file_for_spam( self, file_wrapper: ReadableFileWrapper, file_info: FileInfo - ) -> bool: + ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: """Checks if a piece of newly uploaded media should be blocked. This will be called for local uploads, downloads of remote media, each @@ -580,31 +691,37 @@ class SpamChecker: async def check_media_file_for_spam( self, file: ReadableFileWrapper, file_info: FileInfo - ) -> bool: + ) -> Union[Codes, Literal["NOT_SPAM"]]: buffer = BytesIO() await file.write_chunks_to(buffer.write) if buffer.getvalue() == b"Hello World": - return True + return synapse.module_api.NOT_SPAM - return False + return Codes.FORBIDDEN Args: file: An object that allows reading the contents of the media. file_info: Metadata about the file. - - Returns: - True if the media should be blocked or False if it should be - allowed. """ for callback in self._check_media_file_for_spam_callbacks: with Measure( self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) ): - spam = await delay_cancellation(callback(file_wrapper, file_info)) - if spam: - return True + res = await delay_cancellation(callback(file_wrapper, file_info)) + # Normalize return values to `Codes` or `"NOT_SPAM"`. + if res is False or res is self.NOT_SPAM: + continue + elif res is True: + return synapse.api.errors.Codes.FORBIDDEN + elif isinstance(res, synapse.api.errors.Codes): + return res + else: + logger.warning( + "Module returned invalid value, rejecting media file as spam" + ) + return synapse.api.errors.Codes.FORBIDDEN - return False + return self.NOT_SPAM diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 1459a046d..8b0f16f96 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -28,6 +28,7 @@ from synapse.api.errors import ( SynapseError, ) from synapse.appservice import ApplicationService +from synapse.module_api import NOT_SPAM from synapse.storage.databases.main.directory import RoomAliasMapping from synapse.types import JsonDict, Requester, RoomAlias, UserID, get_domain_from_id @@ -141,10 +142,15 @@ class DirectoryHandler: 403, "You must be in the room to create an alias for it" ) - if not await self.spam_checker.user_may_create_room_alias( + spam_check = await self.spam_checker.user_may_create_room_alias( user_id, room_alias - ): - raise AuthError(403, "This user is not permitted to create this alias") + ) + if spam_check != self.spam_checker.NOT_SPAM: + raise AuthError( + 403, + "This user is not permitted to create this alias", + spam_check, + ) if not self.config.roomdirectory.is_alias_creation_allowed( user_id, room_id, room_alias_str @@ -430,9 +436,12 @@ class DirectoryHandler: """ user_id = requester.user.to_string() - if not await self.spam_checker.user_may_publish_room(user_id, room_id): + spam_check = await self.spam_checker.user_may_publish_room(user_id, room_id) + if spam_check != NOT_SPAM: raise AuthError( - 403, "This user is not permitted to publish rooms to the room list" + 403, + "This user is not permitted to publish rooms to the room list", + spam_check, ) if requester.is_guest: diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 1e5694244..34cc5ecd1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -59,6 +59,7 @@ from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, ReplicationStoreRoomOnOutlierMembershipRestServlet, @@ -820,11 +821,14 @@ class FederationHandler: if self.hs.config.server.block_non_admin_invites: raise SynapseError(403, "This server does not accept room invites") - if not await self.spam_checker.user_may_invite( + spam_check = await self.spam_checker.user_may_invite( event.sender, event.state_key, event.room_id - ): + ) + if spam_check != NOT_SPAM: raise SynapseError( - 403, "This user is not permitted to send invites to this server/user" + 403, + "This user is not permitted to send invites to this server/user", + spam_check, ) membership = event.content.get("membership") diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 9b1793916..ad87c4178 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -954,14 +954,12 @@ class EventCreationHandler: "Spam-check module returned invalid error value. Expecting [code, dict], got %s", spam_check_result, ) - spam_check_result = Codes.FORBIDDEN - if isinstance(spam_check_result, Codes): - raise SynapseError( - 403, - "This message has been rejected as probable spam", - spam_check_result, - ) + raise SynapseError( + 403, + "This message has been rejected as probable spam", + Codes.FORBIDDEN, + ) # Backwards compatibility: if the return value is not an error code, it # means the module returned an error message to be included in the diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index d8918ee1a..42aae4a21 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -62,6 +62,7 @@ from synapse.events.utils import copy_and_fixup_power_levels_contents from synapse.federation.federation_client import InvalidResponseError from synapse.handlers.federation import get_domains_from_state from synapse.handlers.relations import BundledAggregations +from synapse.module_api import NOT_SPAM from synapse.rest.admin._base import assert_user_is_admin from synapse.storage.state import StateFilter from synapse.streams import EventSource @@ -436,10 +437,9 @@ class RoomCreationHandler: """ user_id = requester.user.to_string() - if not await self.spam_checker.user_may_create_room(user_id): - raise SynapseError( - 403, "You are not permitted to create rooms", Codes.FORBIDDEN - ) + spam_check = await self.spam_checker.user_may_create_room(user_id) + if spam_check != NOT_SPAM: + raise SynapseError(403, "You are not permitted to create rooms", spam_check) creation_content: JsonDict = { "room_version": new_room_version.identifier, @@ -726,12 +726,12 @@ class RoomCreationHandler: invite_3pid_list = config.get("invite_3pid", []) invite_list = config.get("invite", []) - if not is_requester_admin and not ( - await self.spam_checker.user_may_create_room(user_id) - ): - raise SynapseError( - 403, "You are not permitted to create rooms", Codes.FORBIDDEN - ) + if not is_requester_admin: + spam_check = await self.spam_checker.user_may_create_room(user_id) + if spam_check != NOT_SPAM: + raise SynapseError( + 403, "You are not permitted to create rooms", spam_check + ) if ratelimit: await self.request_ratelimiter.ratelimit(requester) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index d1199a064..e89b7441a 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -38,6 +38,7 @@ from synapse.event_auth import get_named_level, get_power_level_event from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN +from synapse.module_api import NOT_SPAM from synapse.storage.state import StateFilter from synapse.types import ( JsonDict, @@ -683,7 +684,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if target_id == self._server_notices_mxid: raise SynapseError(HTTPStatus.FORBIDDEN, "Cannot invite this user") - block_invite = False + block_invite_code = None if ( self._server_notices_mxid is not None @@ -701,16 +702,19 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): "Blocking invite: user is not admin and non-admin " "invites disabled" ) - block_invite = True + block_invite_code = Codes.FORBIDDEN - if not await self.spam_checker.user_may_invite( + spam_check = await self.spam_checker.user_may_invite( requester.user.to_string(), target_id, room_id - ): + ) + if spam_check != NOT_SPAM: logger.info("Blocking invite due to spam checker") - block_invite = True + block_invite_code = spam_check - if block_invite: - raise SynapseError(403, "Invites have been disabled on this server") + if block_invite_code is not None: + raise SynapseError( + 403, "Invites have been disabled on this server", block_invite_code + ) # An empty prev_events list is allowed as long as the auth_event_ids are present if prev_event_ids is not None: @@ -818,11 +822,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): # We assume that if the spam checker allowed the user to create # a room then they're allowed to join it. and not new_room - and not await self.spam_checker.user_may_join_room( + ): + spam_check = await self.spam_checker.user_may_join_room( target.to_string(), room_id, is_invited=inviter is not None ) - ): - raise SynapseError(403, "Not allowed to join this room") + if spam_check != NOT_SPAM: + raise SynapseError(403, "Not allowed to join this room", spam_check) # Check if a remote join should be performed. remote_join, remote_room_hosts = await self._should_perform_remote_join( @@ -1369,13 +1374,14 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): ) else: # Check if the spamchecker(s) allow this invite to go through. - if not await self.spam_checker.user_may_send_3pid_invite( + spam_check = await self.spam_checker.user_may_send_3pid_invite( inviter_userid=requester.user.to_string(), medium=medium, address=address, room_id=room_id, - ): - raise SynapseError(403, "Cannot send threepid invite") + ) + if spam_check != NOT_SPAM: + raise SynapseError(403, "Cannot send threepid invite", spam_check) stream_id = await self._make_and_store_3pid_invite( requester, diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 30b2aeffd..6191c2dc9 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -115,6 +115,7 @@ from synapse.types import ( JsonDict, JsonMapping, Requester, + RoomAlias, StateMap, UserID, UserInfo, @@ -163,6 +164,7 @@ __all__ = [ "EventBase", "StateMap", "ProfileInfo", + "RoomAlias", "UserProfile", ] diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 604f18bf5..913741734 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -36,6 +36,7 @@ from twisted.internet.defer import Deferred from twisted.internet.interfaces import IConsumer from twisted.protocols.basic import FileSender +import synapse from synapse.api.errors import NotFoundError from synapse.logging.context import defer_to_thread, make_deferred_yieldable from synapse.util import Clock @@ -145,15 +146,15 @@ class MediaStorage: f.flush() f.close() - spam = await self.spam_checker.check_media_file_for_spam( + spam_check = await self.spam_checker.check_media_file_for_spam( ReadableFileWrapper(self.clock, fname), file_info ) - if spam: + if spam_check != synapse.module_api.NOT_SPAM: logger.info("Blocking media due to spam checker") # Note that we'll delete the stored media, due to the # try/except below. The media also won't be stored in # the DB. - raise SpamMediaException() + raise SpamMediaException(errcode=spam_check) for provider in self.storage_providers: await provider.store_file(path, file_info) diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 4be83dfd6..35c59ee9e 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -18,10 +18,13 @@ """Tests REST events for /rooms paths.""" import json -from typing import Any, Dict, Iterable, List, Optional +from typing import Any, Dict, Iterable, List, Optional, Union from unittest.mock import Mock, call from urllib import parse as urlparse +# `Literal` appears with Python 3.8. +from typing_extensions import Literal + from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin @@ -777,9 +780,11 @@ class RoomsCreateTestCase(RoomBase): channel = self.make_request("POST", "/createRoom", content) self.assertEqual(200, channel.code) - def test_spam_checker_may_join_room(self) -> None: + def test_spam_checker_may_join_room_deprecated(self) -> None: """Tests that the user_may_join_room spam checker callback is correctly bypassed when creating a new room. + + In this test, we use the deprecated API in which callbacks return a bool. """ async def user_may_join_room( @@ -801,6 +806,32 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(join_mock.call_count, 0) + def test_spam_checker_may_join_room(self) -> None: + """Tests that the user_may_join_room spam checker callback is correctly bypassed + when creating a new room. + + In this test, we use the more recent API in which callbacks return a `Union[Codes, Literal["NOT_SPAM"]]`. + """ + + async def user_may_join_room( + mxid: str, + room_id: str, + is_invite: bool, + ) -> Codes: + return Codes.CONSENT_NOT_GIVEN + + join_mock = Mock(side_effect=user_may_join_room) + self.hs.get_spam_checker()._user_may_join_room_callbacks.append(join_mock) + + channel = self.make_request( + "POST", + "/createRoom", + {}, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + self.assertEqual(join_mock.call_count, 0) + class RoomTopicTestCase(RoomBase): """Tests /rooms/$room_id/topic REST events.""" @@ -1011,9 +1042,11 @@ class RoomJoinTestCase(RoomBase): self.room2 = self.helper.create_room_as(room_creator=self.user1, tok=self.tok1) self.room3 = self.helper.create_room_as(room_creator=self.user1, tok=self.tok1) - def test_spam_checker_may_join_room(self) -> None: + def test_spam_checker_may_join_room_deprecated(self) -> None: """Tests that the user_may_join_room spam checker callback is correctly called and blocks room joins when needed. + + This test uses the deprecated API, in which callbacks return booleans. """ # Register a dummy callback. Make it allow all room joins for now. @@ -1026,6 +1059,8 @@ class RoomJoinTestCase(RoomBase): ) -> bool: return return_value + # `spec` argument is needed for this function mock to have `__qualname__`, which + # is needed for `Measure` metrics buried in SpamChecker. callback_mock = Mock(side_effect=user_may_join_room, spec=lambda *x: None) self.hs.get_spam_checker()._user_may_join_room_callbacks.append(callback_mock) @@ -1068,6 +1103,67 @@ class RoomJoinTestCase(RoomBase): return_value = False self.helper.join(self.room3, self.user2, expect_code=403, tok=self.tok2) + def test_spam_checker_may_join_room(self) -> None: + """Tests that the user_may_join_room spam checker callback is correctly called + and blocks room joins when needed. + + This test uses the latest API to this day, in which callbacks return `NOT_SPAM` or `Codes`. + """ + + # Register a dummy callback. Make it allow all room joins for now. + return_value: Union[Literal["NOT_SPAM"], Codes] = synapse.module_api.NOT_SPAM + + async def user_may_join_room( + userid: str, + room_id: str, + is_invited: bool, + ) -> Union[Literal["NOT_SPAM"], Codes]: + return return_value + + # `spec` argument is needed for this function mock to have `__qualname__`, which + # is needed for `Measure` metrics buried in SpamChecker. + callback_mock = Mock(side_effect=user_may_join_room, spec=lambda *x: None) + self.hs.get_spam_checker()._user_may_join_room_callbacks.append(callback_mock) + + # Join a first room, without being invited to it. + self.helper.join(self.room1, self.user2, tok=self.tok2) + + # Check that the callback was called with the right arguments. + expected_call_args = ( + ( + self.user2, + self.room1, + False, + ), + ) + self.assertEqual( + callback_mock.call_args, + expected_call_args, + callback_mock.call_args, + ) + + # Join a second room, this time with an invite for it. + self.helper.invite(self.room2, self.user1, self.user2, tok=self.tok1) + self.helper.join(self.room2, self.user2, tok=self.tok2) + + # Check that the callback was called with the right arguments. + expected_call_args = ( + ( + self.user2, + self.room2, + True, + ), + ) + self.assertEqual( + callback_mock.call_args, + expected_call_args, + callback_mock.call_args, + ) + + # Now make the callback deny all room joins, and check that a join actually fails. + return_value = Codes.CONSENT_NOT_GIVEN + self.helper.join(self.room3, self.user2, expect_code=403, tok=self.tok2) + class RoomJoinRatelimitTestCase(RoomBase): user_id = "@sid1:red" @@ -2945,9 +3041,14 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase): self.room_id = self.helper.create_room_as(self.user_id, tok=self.tok) - def test_threepid_invite_spamcheck(self) -> None: + def test_threepid_invite_spamcheck_deprecated(self) -> None: + """ + Test allowing/blocking threepid invites with a spam-check module. + + In this test, we use the deprecated API in which callbacks return a bool. + """ # Mock a few functions to prevent the test from failing due to failing to talk to - # a remote IS. We keep the mock for _mock_make_and_store_3pid_invite around so we + # a remote IS. We keep the mock for make_and_store_3pid_invite around so we # can check its call_count later on during the test. make_invite_mock = Mock(return_value=make_awaitable(0)) self.hs.get_room_member_handler()._make_and_store_3pid_invite = make_invite_mock @@ -3001,3 +3102,67 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase): # Also check that it stopped before calling _make_and_store_3pid_invite. make_invite_mock.assert_called_once() + + def test_threepid_invite_spamcheck(self) -> None: + """ + Test allowing/blocking threepid invites with a spam-check module. + + In this test, we use the more recent API in which callbacks return a `Union[Codes, Literal["NOT_SPAM"]]`.""" + # Mock a few functions to prevent the test from failing due to failing to talk to + # a remote IS. We keep the mock for make_and_store_3pid_invite around so we + # can check its call_count later on during the test. + make_invite_mock = Mock(return_value=make_awaitable(0)) + self.hs.get_room_member_handler()._make_and_store_3pid_invite = make_invite_mock + self.hs.get_identity_handler().lookup_3pid = Mock( + return_value=make_awaitable(None), + ) + + # Add a mock to the spamchecker callbacks for user_may_send_3pid_invite. Make it + # allow everything for now. + # `spec` argument is needed for this function mock to have `__qualname__`, which + # is needed for `Measure` metrics buried in SpamChecker. + mock = Mock( + return_value=make_awaitable(synapse.module_api.NOT_SPAM), + spec=lambda *x: None, + ) + self.hs.get_spam_checker()._user_may_send_3pid_invite_callbacks.append(mock) + + # Send a 3PID invite into the room and check that it succeeded. + email_to_invite = "teresa@example.com" + channel = self.make_request( + method="POST", + path="/rooms/" + self.room_id + "/invite", + content={ + "id_server": "example.com", + "id_access_token": "sometoken", + "medium": "email", + "address": email_to_invite, + }, + access_token=self.tok, + ) + self.assertEqual(channel.code, 200) + + # Check that the callback was called with the right params. + mock.assert_called_with(self.user_id, "email", email_to_invite, self.room_id) + + # Check that the call to send the invite was made. + make_invite_mock.assert_called_once() + + # Now change the return value of the callback to deny any invite and test that + # we can't send the invite. + mock.return_value = make_awaitable(Codes.CONSENT_NOT_GIVEN) + channel = self.make_request( + method="POST", + path="/rooms/" + self.room_id + "/invite", + content={ + "id_server": "example.com", + "id_access_token": "sometoken", + "medium": "email", + "address": email_to_invite, + }, + access_token=self.tok, + ) + self.assertEqual(channel.code, 403) + + # Also check that it stopped before calling _make_and_store_3pid_invite. + make_invite_mock.assert_called_once()