From 36df9c5e36cbad2a378d922085453726a21ae80c Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 5 May 2023 12:13:50 -0400 Subject: [PATCH] Implement MSC4009 to widen the allowed Matrix ID grammar (#15536) Behind a configuration flag this adds + to the list of allowed characters in Matrix IDs. The main feature this enables is using full E.164 phone numbers as Matrix IDs. --- changelog.d/15536.feature | 1 + synapse/config/experimental.py | 3 +++ synapse/handlers/register.py | 27 ++++++++++++++------------- synapse/handlers/sso.py | 6 ++++-- synapse/types/__init__.py | 21 +++++++++++++++++++-- tests/handlers/test_register.py | 13 +++++++++++++ 6 files changed, 54 insertions(+), 17 deletions(-) create mode 100644 changelog.d/15536.feature diff --git a/changelog.d/15536.feature b/changelog.d/15536.feature new file mode 100644 index 000000000..824c24575 --- /dev/null +++ b/changelog.d/15536.feature @@ -0,0 +1 @@ +Implement [MSC4009](https://github.com/matrix-org/matrix-spec-proposals/pull/4009) to expand the supported characters in Matrix IDs. diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index cab7ccf4b..514d87cb2 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -199,3 +199,6 @@ class ExperimentalConfig(Config): # MSC3970: Scope transaction IDs to devices self.msc3970_enabled = experimental.get("msc3970_enabled", False) + + # MSC4009: E.164 Matrix IDs + self.msc4009_e164_mxids = experimental.get("msc4009_e164_mxids", False) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 61c4b833b..c80946c2e 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -46,7 +46,7 @@ from synapse.replication.http.register import ( ReplicationRegisterServlet, ) from synapse.spam_checker_api import RegistrationBehaviour -from synapse.types import RoomAlias, UserID, create_requester +from synapse.types import GUEST_USER_ID_PATTERN, RoomAlias, UserID, create_requester from synapse.types.state import StateFilter if TYPE_CHECKING: @@ -143,10 +143,15 @@ class RegistrationHandler: assigned_user_id: Optional[str] = None, inhibit_user_in_use_error: bool = False, ) -> None: - if types.contains_invalid_mxid_characters(localpart): + if types.contains_invalid_mxid_characters( + localpart, self.hs.config.experimental.msc4009_e164_mxids + ): + extra_chars = ( + "=_-./+" if self.hs.config.experimental.msc4009_e164_mxids else "=_-./" + ) raise SynapseError( 400, - "User ID can only contain characters a-z, 0-9, or '=_-./'", + f"User ID can only contain characters a-z, 0-9, or '{extra_chars}'", Codes.INVALID_USERNAME, ) @@ -195,16 +200,12 @@ class RegistrationHandler: errcode=Codes.FORBIDDEN, ) - if guest_access_token is None: - try: - int(localpart) - raise SynapseError( - 400, - "Numeric user IDs are reserved for guest users.", - errcode=Codes.INVALID_USERNAME, - ) - except ValueError: - pass + if guest_access_token is None and GUEST_USER_ID_PATTERN.fullmatch(localpart): + raise SynapseError( + 400, + "Numeric user IDs are reserved for guest users.", + errcode=Codes.INVALID_USERNAME, + ) async def register_user( self, diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index 92c374262..25fd2eb3a 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -225,6 +225,8 @@ class SsoHandler: self._consent_at_registration = hs.config.consent.user_consent_at_registration + self._e164_mxids = hs.config.experimental.msc4009_e164_mxids + def register_identity_provider(self, p: SsoIdentityProvider) -> None: p_id = p.idp_id assert p_id not in self._identity_providers @@ -711,7 +713,7 @@ class SsoHandler: # Since the localpart is provided via a potentially untrusted module, # ensure the MXID is valid before registering. if not attributes.localpart or contains_invalid_mxid_characters( - attributes.localpart + attributes.localpart, self._e164_mxids ): raise MappingException("localpart is invalid: %s" % (attributes.localpart,)) @@ -944,7 +946,7 @@ class SsoHandler: localpart, ) - if contains_invalid_mxid_characters(localpart): + if contains_invalid_mxid_characters(localpart, self._e164_mxids): raise SynapseError(400, "localpart is invalid: %s" % (localpart,)) user_id = UserID(localpart, self._server_name).to_string() user_infos = await self._store.get_users_by_id_case_insensitive(user_id) diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index 5cee9c319..325219656 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -335,18 +335,35 @@ class EventID(DomainSpecificString): mxid_localpart_allowed_characters = set( "_-./=" + string.ascii_lowercase + string.digits ) +# MSC4007 adds the + to the allowed characters. +# +# TODO If this was accepted, update the SSO code to support this, see the callers +# of map_username_to_mxid_localpart. +extended_mxid_localpart_allowed_characters = mxid_localpart_allowed_characters | {"+"} + +# Guest user IDs are purely numeric. +GUEST_USER_ID_PATTERN = re.compile(r"^\d+$") -def contains_invalid_mxid_characters(localpart: str) -> bool: +def contains_invalid_mxid_characters( + localpart: str, use_extended_character_set: bool +) -> bool: """Check for characters not allowed in an mxid or groupid localpart Args: localpart: the localpart to be checked + use_extended_character_set: True to use the extended allowed characters + from MSC4009. Returns: True if there are any naughty characters """ - return any(c not in mxid_localpart_allowed_characters for c in localpart) + allowed_characters = ( + extended_mxid_localpart_allowed_characters + if use_extended_character_set + else mxid_localpart_allowed_characters + ) + return any(c not in allowed_characters for c in localpart) UPPER_CASE_PATTERN = re.compile(b"[A-Z_]") diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index aff1ec475..73822b07a 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -586,6 +586,19 @@ class RegistrationTestCase(unittest.HomeserverTestCase): d = self.store.is_support_user(user_id) self.assertFalse(self.get_success(d)) + def test_invalid_user_id(self) -> None: + invalid_user_id = "+abcd" + self.get_failure( + self.handler.register_user(localpart=invalid_user_id), SynapseError + ) + + @override_config({"experimental_features": {"msc4009_e164_mxids": True}}) + def text_extended_user_ids(self) -> None: + """+ should be allowed according to MSC4009.""" + valid_user_id = "+1234" + user_id = self.get_success(self.handler.register_user(localpart=valid_user_id)) + self.assertEqual(user_id, valid_user_id) + def test_invalid_user_id_length(self) -> None: invalid_user_id = "x" * 256 self.get_failure(