Uniformize spam-checker API, part 5: expand other spam-checker callbacks to return Tuple[Codes, dict] (#13044)

Signed-off-by: David Teller <davidt@element.io>
Co-authored-by: Brendan Abolivier <babolivier@matrix.org>
This commit is contained in:
David Teller 2022-07-11 18:52:10 +02:00 committed by GitHub
parent d736d5cfad
commit 11f811470f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 426 additions and 60 deletions

View file

@ -22,7 +22,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from unittest.mock import Mock, call
from urllib import parse as urlparse
# `Literal` appears with Python 3.8.
from parameterized import param, parameterized
from typing_extensions import Literal
from twisted.test.proto_helpers import MemoryReactor
@ -815,14 +815,14 @@ class RoomsCreateTestCase(RoomBase):
In this test, we use the more recent API in which callbacks return a `Union[Codes, Literal["NOT_SPAM"]]`.
"""
async def user_may_join_room(
async def user_may_join_room_codes(
mxid: str,
room_id: str,
is_invite: bool,
) -> Codes:
return Codes.CONSENT_NOT_GIVEN
join_mock = Mock(side_effect=user_may_join_room)
join_mock = Mock(side_effect=user_may_join_room_codes)
self.hs.get_spam_checker()._user_may_join_room_callbacks.append(join_mock)
channel = self.make_request(
@ -834,6 +834,25 @@ class RoomsCreateTestCase(RoomBase):
self.assertEqual(join_mock.call_count, 0)
# Now change the return value of the callback to deny any join. Since we're
# creating the room, despite the return value, we should be able to join.
async def user_may_join_room_tuple(
mxid: str,
room_id: str,
is_invite: bool,
) -> Tuple[Codes, dict]:
return Codes.INCOMPATIBLE_ROOM_VERSION, {}
join_mock.side_effect = user_may_join_room_tuple
channel = self.make_request(
"POST",
"/createRoom",
{},
)
self.assertEqual(channel.code, 200, channel.json_body)
self.assertEqual(join_mock.call_count, 0)
class RoomTopicTestCase(RoomBase):
"""Tests /rooms/$room_id/topic REST events."""
@ -1113,13 +1132,15 @@ class RoomJoinTestCase(RoomBase):
"""
# Register a dummy callback. Make it allow all room joins for now.
return_value: Union[Literal["NOT_SPAM"], Codes] = synapse.module_api.NOT_SPAM
return_value: Union[
Literal["NOT_SPAM"], Tuple[Codes, dict], Codes
] = synapse.module_api.NOT_SPAM
async def user_may_join_room(
userid: str,
room_id: str,
is_invited: bool,
) -> Union[Literal["NOT_SPAM"], Codes]:
) -> Union[Literal["NOT_SPAM"], Tuple[Codes, dict], Codes]:
return return_value
# `spec` argument is needed for this function mock to have `__qualname__`, which
@ -1163,8 +1184,28 @@ class RoomJoinTestCase(RoomBase):
)
# Now make the callback deny all room joins, and check that a join actually fails.
# We pick an arbitrary Codes rather than the default `Codes.FORBIDDEN`.
return_value = Codes.CONSENT_NOT_GIVEN
self.helper.join(self.room3, self.user2, expect_code=403, tok=self.tok2)
self.helper.invite(self.room3, self.user1, self.user2, tok=self.tok1)
self.helper.join(
self.room3,
self.user2,
expect_code=403,
expect_errcode=return_value,
tok=self.tok2,
)
# Now make the callback deny all room joins, and check that a join actually fails.
# As above, with the experimental extension that lets us return dictionaries.
return_value = (Codes.BAD_ALIAS, {"another_field": "12345"})
self.helper.join(
self.room3,
self.user2,
expect_code=403,
expect_errcode=return_value[0],
tok=self.tok2,
expect_additional_fields=return_value[1],
)
class RoomJoinRatelimitTestCase(RoomBase):
@ -1314,6 +1355,97 @@ class RoomMessagesTestCase(RoomBase):
channel = self.make_request("PUT", path, content)
self.assertEqual(200, channel.code, msg=channel.result["body"])
@parameterized.expand(
[
# Allow
param(
name="NOT_SPAM", value="NOT_SPAM", expected_code=200, expected_fields={}
),
param(name="False", value=False, expected_code=200, expected_fields={}),
# Block
param(
name="scalene string",
value="ANY OTHER STRING",
expected_code=403,
expected_fields={"errcode": "M_FORBIDDEN"},
),
param(
name="True",
value=True,
expected_code=403,
expected_fields={"errcode": "M_FORBIDDEN"},
),
param(
name="Code",
value=Codes.LIMIT_EXCEEDED,
expected_code=403,
expected_fields={"errcode": "M_LIMIT_EXCEEDED"},
),
param(
name="Tuple",
value=(Codes.SERVER_NOT_TRUSTED, {"additional_field": "12345"}),
expected_code=403,
expected_fields={
"errcode": "M_SERVER_NOT_TRUSTED",
"additional_field": "12345",
},
),
]
)
def test_spam_checker_check_event_for_spam(
self,
name: str,
value: Union[str, bool, Codes, Tuple[Codes, JsonDict]],
expected_code: int,
expected_fields: dict,
) -> None:
class SpamCheck:
mock_return_value: Union[
str, bool, Codes, Tuple[Codes, JsonDict], bool
] = "NOT_SPAM"
mock_content: Optional[JsonDict] = None
async def check_event_for_spam(
self,
event: synapse.events.EventBase,
) -> Union[str, Codes, Tuple[Codes, JsonDict], bool]:
self.mock_content = event.content
return self.mock_return_value
spam_checker = SpamCheck()
self.hs.get_spam_checker()._check_event_for_spam_callbacks.append(
spam_checker.check_event_for_spam
)
# Inject `value` as mock_return_value
spam_checker.mock_return_value = value
path = "/rooms/%s/send/m.room.message/check_event_for_spam_%s" % (
urlparse.quote(self.room_id),
urlparse.quote(name),
)
body = "test-%s" % name
content = '{"body":"%s","msgtype":"m.text"}' % body
channel = self.make_request("PUT", path, content)
# Check that the callback has witnessed the correct event.
self.assertIsNotNone(spam_checker.mock_content)
if (
spam_checker.mock_content is not None
): # Checked just above, but mypy doesn't know about that.
self.assertEqual(
spam_checker.mock_content["body"], body, spam_checker.mock_content
)
# Check that we have the correct result.
self.assertEqual(expected_code, channel.code, msg=channel.result["body"])
for expected_key, expected_value in expected_fields.items():
self.assertEqual(
channel.json_body.get(expected_key, None),
expected_value,
"Field %s absent or invalid " % expected_key,
)
class RoomPowerLevelOverridesTestCase(RoomBase):
"""Tests that the power levels can be overridden with server config."""
@ -3235,7 +3367,8 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
make_invite_mock.assert_called_once()
# Now change the return value of the callback to deny any invite and test that
# we can't send the invite.
# we can't send the invite. We pick an arbitrary error code to be able to check
# that the same code has been returned
mock.return_value = make_awaitable(Codes.CONSENT_NOT_GIVEN)
channel = self.make_request(
method="POST",
@ -3249,6 +3382,27 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
access_token=self.tok,
)
self.assertEqual(channel.code, 403)
self.assertEqual(channel.json_body["errcode"], Codes.CONSENT_NOT_GIVEN)
# Also check that it stopped before calling _make_and_store_3pid_invite.
make_invite_mock.assert_called_once()
# Run variant with `Tuple[Codes, dict]`.
mock.return_value = make_awaitable((Codes.EXPIRED_ACCOUNT, {"field": "value"}))
channel = self.make_request(
method="POST",
path="/rooms/" + self.room_id + "/invite",
content={
"id_server": "example.com",
"id_access_token": "sometoken",
"medium": "email",
"address": email_to_invite,
},
access_token=self.tok,
)
self.assertEqual(channel.code, 403)
self.assertEqual(channel.json_body["errcode"], Codes.EXPIRED_ACCOUNT)
self.assertEqual(channel.json_body["field"], "value")
# Also check that it stopped before calling _make_and_store_3pid_invite.
make_invite_mock.assert_called_once()

View file

@ -41,6 +41,7 @@ from twisted.web.resource import Resource
from twisted.web.server import Site
from synapse.api.constants import Membership
from synapse.api.errors import Codes
from synapse.server import HomeServer
from synapse.types import JsonDict
@ -171,6 +172,8 @@ class RestHelper:
expect_code: int = HTTPStatus.OK,
tok: Optional[str] = None,
appservice_user_id: Optional[str] = None,
expect_errcode: Optional[Codes] = None,
expect_additional_fields: Optional[dict] = None,
) -> None:
self.change_membership(
room=room,
@ -180,6 +183,8 @@ class RestHelper:
appservice_user_id=appservice_user_id,
membership=Membership.JOIN,
expect_code=expect_code,
expect_errcode=expect_errcode,
expect_additional_fields=expect_additional_fields,
)
def knock(
@ -263,6 +268,7 @@ class RestHelper:
appservice_user_id: Optional[str] = None,
expect_code: int = HTTPStatus.OK,
expect_errcode: Optional[str] = None,
expect_additional_fields: Optional[dict] = None,
) -> None:
"""
Send a membership state event into a room.
@ -323,6 +329,21 @@ class RestHelper:
channel.result["body"],
)
if expect_additional_fields is not None:
for expect_key, expect_value in expect_additional_fields.items():
assert expect_key in channel.json_body, "Expected field %s, got %s" % (
expect_key,
channel.json_body,
)
assert (
channel.json_body[expect_key] == expect_value
), "Expected: %s at %s, got: %s, resp: %s" % (
expect_value,
expect_key,
channel.json_body[expect_key],
channel.json_body,
)
self.auth_user_id = temp_id
def send(

View file

@ -23,11 +23,13 @@ from urllib import parse
import attr
from parameterized import parameterized, parameterized_class
from PIL import Image as Image
from typing_extensions import Literal
from twisted.internet import defer
from twisted.internet.defer import Deferred
from twisted.test.proto_helpers import MemoryReactor
from synapse.api.errors import Codes
from synapse.events import EventBase
from synapse.events.spamcheck import load_legacy_spam_checkers
from synapse.logging.context import make_deferred_yieldable
@ -570,9 +572,11 @@ class MediaRepoTests(unittest.HomeserverTestCase):
)
class TestSpamChecker:
class TestSpamCheckerLegacy:
"""A spam checker module that rejects all media that includes the bytes
`evil`.
Uses the legacy Spam-Checker API.
"""
def __init__(self, config: Dict[str, Any], api: ModuleApi) -> None:
@ -613,7 +617,7 @@ class TestSpamChecker:
return b"evil" in buf.getvalue()
class SpamCheckerTestCase(unittest.HomeserverTestCase):
class SpamCheckerTestCaseLegacy(unittest.HomeserverTestCase):
servlets = [
login.register_servlets,
admin.register_servlets,
@ -637,7 +641,8 @@ class SpamCheckerTestCase(unittest.HomeserverTestCase):
{
"spam_checker": [
{
"module": TestSpamChecker.__module__ + ".TestSpamChecker",
"module": TestSpamCheckerLegacy.__module__
+ ".TestSpamCheckerLegacy",
"config": {},
}
]
@ -662,3 +667,62 @@ class SpamCheckerTestCase(unittest.HomeserverTestCase):
self.helper.upload_media(
self.upload_resource, data, tok=self.tok, expect_code=400
)
EVIL_DATA = b"Some evil data"
EVIL_DATA_EXPERIMENT = b"Some evil data to trigger the experimental tuple API"
class SpamCheckerTestCase(unittest.HomeserverTestCase):
servlets = [
login.register_servlets,
admin.register_servlets,
]
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.user = self.register_user("user", "pass")
self.tok = self.login("user", "pass")
# Allow for uploading and downloading to/from the media repo
self.media_repo = hs.get_media_repository_resource()
self.download_resource = self.media_repo.children[b"download"]
self.upload_resource = self.media_repo.children[b"upload"]
hs.get_module_api().register_spam_checker_callbacks(
check_media_file_for_spam=self.check_media_file_for_spam
)
async def check_media_file_for_spam(
self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
) -> Union[Codes, Literal["NOT_SPAM"]]:
buf = BytesIO()
await file_wrapper.write_chunks_to(buf.write)
if buf.getvalue() == EVIL_DATA:
return Codes.FORBIDDEN
elif buf.getvalue() == EVIL_DATA_EXPERIMENT:
return (Codes.FORBIDDEN, {})
else:
return "NOT_SPAM"
def test_upload_innocent(self) -> None:
"""Attempt to upload some innocent data that should be allowed."""
self.helper.upload_media(
self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200
)
def test_upload_ban(self) -> None:
"""Attempt to upload some data that includes bytes "evil", which should
get rejected by the spam checker.
"""
self.helper.upload_media(
self.upload_resource, EVIL_DATA, tok=self.tok, expect_code=400
)
self.helper.upload_media(
self.upload_resource,
EVIL_DATA_EXPERIMENT,
tok=self.tok,
expect_code=400,
)