Add admin API to reset connection timeouts for remote server (#11639)

* Fix get federation status of destination if no error occured
This commit is contained in:
Dirk Klimpel 2022-01-25 13:06:29 +01:00 committed by GitHub
parent 15c2a6a106
commit 0d6cfea9b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 183 additions and 33 deletions

View File

@ -0,0 +1 @@
Add admin API to reset connection timeouts for remote server.

View File

@ -86,7 +86,7 @@ The following fields are returned in the JSON response body:
- `next_token`: string representing a positive integer - Indication for pagination. See above. - `next_token`: string representing a positive integer - Indication for pagination. See above.
- `total` - integer - Total number of destinations. - `total` - integer - Total number of destinations.
# Destination Details API ## Destination Details API
This API gets the retry timing info for a specific remote server. This API gets the retry timing info for a specific remote server.
@ -108,7 +108,45 @@ A response body like the following is returned:
} }
``` ```
**Parameters**
The following parameters should be set in the URL:
- `destination` - Name of the remote server.
**Response** **Response**
The response fields are the same like in the `destinations` array in The response fields are the same like in the `destinations` array in
[List of destinations](#list-of-destinations) response. [List of destinations](#list-of-destinations) response.
## Reset connection timeout
Synapse makes federation requests to other homeservers. If a federation request fails,
Synapse will mark the destination homeserver as offline, preventing any future requests
to that server for a "cooldown" period. This period grows over time if the server
continues to fail its responses
([exponential backoff](https://en.wikipedia.org/wiki/Exponential_backoff)).
Admins can cancel the cooldown period with this API.
This API resets the retry timing for a specific remote server and tries to connect to
the remote server again. It does not wait for the next `retry_interval`.
The connection must have previously run into an error and `retry_last_ts`
([Destination Details API](#destination-details-api)) must not be equal to `0`.
The connection attempt is carried out in the background and can take a while
even if the API already returns the http status 200.
The API is:
```
POST /_synapse/admin/v1/federation/destinations/<destination>/reset_connection
{}
```
**Parameters**
The following parameters should be set in the URL:
- `destination` - Name of the remote server.

View File

@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging import logging
from typing import Dict, Iterable, List, Optional, Tuple, Type from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Type
from typing_extensions import Literal from typing_extensions import Literal
@ -36,17 +36,19 @@ from synapse.http.servlet import (
parse_integer_from_args, parse_integer_from_args,
parse_string_from_args, parse_string_from_args,
) )
from synapse.server import HomeServer
from synapse.types import JsonDict, ThirdPartyInstanceID from synapse.types import JsonDict, ThirdPartyInstanceID
from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.ratelimitutils import FederationRateLimiter
if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class TransportLayerServer(JsonResource): class TransportLayerServer(JsonResource):
"""Handles incoming federation HTTP requests""" """Handles incoming federation HTTP requests"""
def __init__(self, hs: HomeServer, servlet_groups: Optional[List[str]] = None): def __init__(self, hs: "HomeServer", servlet_groups: Optional[List[str]] = None):
"""Initialize the TransportLayerServer """Initialize the TransportLayerServer
Will by default register all servlets. For custom behaviour, pass in Will by default register all servlets. For custom behaviour, pass in
@ -113,7 +115,7 @@ class PublicRoomList(BaseFederationServlet):
def __init__( def __init__(
self, self,
hs: HomeServer, hs: "HomeServer",
authenticator: Authenticator, authenticator: Authenticator,
ratelimiter: FederationRateLimiter, ratelimiter: FederationRateLimiter,
server_name: str, server_name: str,
@ -203,7 +205,7 @@ class FederationGroupsRenewAttestaionServlet(BaseFederationServlet):
def __init__( def __init__(
self, self,
hs: HomeServer, hs: "HomeServer",
authenticator: Authenticator, authenticator: Authenticator,
ratelimiter: FederationRateLimiter, ratelimiter: FederationRateLimiter,
server_name: str, server_name: str,
@ -251,7 +253,7 @@ class OpenIdUserInfo(BaseFederationServlet):
def __init__( def __init__(
self, self,
hs: HomeServer, hs: "HomeServer",
authenticator: Authenticator, authenticator: Authenticator,
ratelimiter: FederationRateLimiter, ratelimiter: FederationRateLimiter,
server_name: str, server_name: str,
@ -297,7 +299,7 @@ DEFAULT_SERVLET_GROUPS: Dict[str, Iterable[Type[BaseFederationServlet]]] = {
def register_servlets( def register_servlets(
hs: HomeServer, hs: "HomeServer",
resource: HttpServer, resource: HttpServer,
authenticator: Authenticator, authenticator: Authenticator,
ratelimiter: FederationRateLimiter, ratelimiter: FederationRateLimiter,

View File

@ -15,7 +15,7 @@
import functools import functools
import logging import logging
import re import re
from typing import Any, Awaitable, Callable, Optional, Tuple, cast from typing import TYPE_CHECKING, Any, Awaitable, Callable, Optional, Tuple, cast
from synapse.api.errors import Codes, FederationDeniedError, SynapseError from synapse.api.errors import Codes, FederationDeniedError, SynapseError
from synapse.api.urls import FEDERATION_V1_PREFIX from synapse.api.urls import FEDERATION_V1_PREFIX
@ -29,11 +29,13 @@ from synapse.logging.opentracing import (
start_active_span_follows_from, start_active_span_follows_from,
whitelisted_homeserver, whitelisted_homeserver,
) )
from synapse.server import HomeServer
from synapse.types import JsonDict from synapse.types import JsonDict
from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.ratelimitutils import FederationRateLimiter
from synapse.util.stringutils import parse_and_validate_server_name from synapse.util.stringutils import parse_and_validate_server_name
if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -46,7 +48,7 @@ class NoAuthenticationError(AuthenticationError):
class Authenticator: class Authenticator:
def __init__(self, hs: HomeServer): def __init__(self, hs: "HomeServer"):
self._clock = hs.get_clock() self._clock = hs.get_clock()
self.keyring = hs.get_keyring() self.keyring = hs.get_keyring()
self.server_name = hs.hostname self.server_name = hs.hostname
@ -114,11 +116,11 @@ class Authenticator:
# alive # alive
retry_timings = await self.store.get_destination_retry_timings(origin) retry_timings = await self.store.get_destination_retry_timings(origin)
if retry_timings and retry_timings.retry_last_ts: if retry_timings and retry_timings.retry_last_ts:
run_in_background(self._reset_retry_timings, origin) run_in_background(self.reset_retry_timings, origin)
return origin return origin
async def _reset_retry_timings(self, origin: str) -> None: async def reset_retry_timings(self, origin: str) -> None:
try: try:
logger.info("Marking origin %r as up", origin) logger.info("Marking origin %r as up", origin)
await self.store.set_destination_retry_timings(origin, None, 0, 0) await self.store.set_destination_retry_timings(origin, None, 0, 0)
@ -227,7 +229,7 @@ class BaseFederationServlet:
def __init__( def __init__(
self, self,
hs: HomeServer, hs: "HomeServer",
authenticator: Authenticator, authenticator: Authenticator,
ratelimiter: FederationRateLimiter, ratelimiter: FederationRateLimiter,
server_name: str, server_name: str,

View File

@ -12,7 +12,17 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging import logging
from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Type, Union from typing import (
TYPE_CHECKING,
Dict,
List,
Mapping,
Optional,
Sequence,
Tuple,
Type,
Union,
)
from typing_extensions import Literal from typing_extensions import Literal
@ -30,11 +40,13 @@ from synapse.http.servlet import (
parse_string_from_args, parse_string_from_args,
parse_strings_from_args, parse_strings_from_args,
) )
from synapse.server import HomeServer
from synapse.types import JsonDict from synapse.types import JsonDict
from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.ratelimitutils import FederationRateLimiter
from synapse.util.versionstring import get_version_string from synapse.util.versionstring import get_version_string
if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
issue_8631_logger = logging.getLogger("synapse.8631_debug") issue_8631_logger = logging.getLogger("synapse.8631_debug")
@ -47,7 +59,7 @@ class BaseFederationServerServlet(BaseFederationServlet):
def __init__( def __init__(
self, self,
hs: HomeServer, hs: "HomeServer",
authenticator: Authenticator, authenticator: Authenticator,
ratelimiter: FederationRateLimiter, ratelimiter: FederationRateLimiter,
server_name: str, server_name: str,
@ -596,7 +608,7 @@ class FederationSpaceSummaryServlet(BaseFederationServlet):
def __init__( def __init__(
self, self,
hs: HomeServer, hs: "HomeServer",
authenticator: Authenticator, authenticator: Authenticator,
ratelimiter: FederationRateLimiter, ratelimiter: FederationRateLimiter,
server_name: str, server_name: str,
@ -670,7 +682,7 @@ class FederationRoomHierarchyServlet(BaseFederationServlet):
def __init__( def __init__(
self, self,
hs: HomeServer, hs: "HomeServer",
authenticator: Authenticator, authenticator: Authenticator,
ratelimiter: FederationRateLimiter, ratelimiter: FederationRateLimiter,
server_name: str, server_name: str,
@ -706,7 +718,7 @@ class RoomComplexityServlet(BaseFederationServlet):
def __init__( def __init__(
self, self,
hs: HomeServer, hs: "HomeServer",
authenticator: Authenticator, authenticator: Authenticator,
ratelimiter: FederationRateLimiter, ratelimiter: FederationRateLimiter,
server_name: str, server_name: str,

View File

@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import Dict, List, Tuple, Type from typing import TYPE_CHECKING, Dict, List, Tuple, Type
from synapse.api.errors import SynapseError from synapse.api.errors import SynapseError
from synapse.federation.transport.server._base import ( from synapse.federation.transport.server._base import (
@ -19,10 +19,12 @@ from synapse.federation.transport.server._base import (
BaseFederationServlet, BaseFederationServlet,
) )
from synapse.handlers.groups_local import GroupsLocalHandler from synapse.handlers.groups_local import GroupsLocalHandler
from synapse.server import HomeServer
from synapse.types import JsonDict, get_domain_from_id from synapse.types import JsonDict, get_domain_from_id
from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.ratelimitutils import FederationRateLimiter
if TYPE_CHECKING:
from synapse.server import HomeServer
class BaseGroupsLocalServlet(BaseFederationServlet): class BaseGroupsLocalServlet(BaseFederationServlet):
"""Abstract base class for federation servlet classes which provides a groups local handler. """Abstract base class for federation servlet classes which provides a groups local handler.
@ -32,7 +34,7 @@ class BaseGroupsLocalServlet(BaseFederationServlet):
def __init__( def __init__(
self, self,
hs: HomeServer, hs: "HomeServer",
authenticator: Authenticator, authenticator: Authenticator,
ratelimiter: FederationRateLimiter, ratelimiter: FederationRateLimiter,
server_name: str, server_name: str,

View File

@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import Dict, List, Tuple, Type from typing import TYPE_CHECKING, Dict, List, Tuple, Type
from typing_extensions import Literal from typing_extensions import Literal
@ -22,10 +22,12 @@ from synapse.federation.transport.server._base import (
BaseFederationServlet, BaseFederationServlet,
) )
from synapse.http.servlet import parse_string_from_args from synapse.http.servlet import parse_string_from_args
from synapse.server import HomeServer
from synapse.types import JsonDict, get_domain_from_id from synapse.types import JsonDict, get_domain_from_id
from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.ratelimitutils import FederationRateLimiter
if TYPE_CHECKING:
from synapse.server import HomeServer
class BaseGroupsServerServlet(BaseFederationServlet): class BaseGroupsServerServlet(BaseFederationServlet):
"""Abstract base class for federation servlet classes which provides a groups server handler. """Abstract base class for federation servlet classes which provides a groups server handler.
@ -35,7 +37,7 @@ class BaseGroupsServerServlet(BaseFederationServlet):
def __init__( def __init__(
self, self,
hs: HomeServer, hs: "HomeServer",
authenticator: Authenticator, authenticator: Authenticator,
ratelimiter: FederationRateLimiter, ratelimiter: FederationRateLimiter,
server_name: str, server_name: str,

View File

@ -41,7 +41,8 @@ from synapse.rest.admin.event_reports import (
EventReportsRestServlet, EventReportsRestServlet,
) )
from synapse.rest.admin.federation import ( from synapse.rest.admin.federation import (
DestinationsRestServlet, DestinationResetConnectionRestServlet,
DestinationRestServlet,
ListDestinationsRestServlet, ListDestinationsRestServlet,
) )
from synapse.rest.admin.groups import DeleteGroupAdminRestServlet from synapse.rest.admin.groups import DeleteGroupAdminRestServlet
@ -267,7 +268,8 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
ListRegistrationTokensRestServlet(hs).register(http_server) ListRegistrationTokensRestServlet(hs).register(http_server)
NewRegistrationTokenRestServlet(hs).register(http_server) NewRegistrationTokenRestServlet(hs).register(http_server)
RegistrationTokenRestServlet(hs).register(http_server) RegistrationTokenRestServlet(hs).register(http_server)
DestinationsRestServlet(hs).register(http_server) DestinationResetConnectionRestServlet(hs).register(http_server)
DestinationRestServlet(hs).register(http_server)
ListDestinationsRestServlet(hs).register(http_server) ListDestinationsRestServlet(hs).register(http_server)
# Some servlets only get registered for the main process. # Some servlets only get registered for the main process.

View File

@ -16,6 +16,7 @@ from http import HTTPStatus
from typing import TYPE_CHECKING, Tuple from typing import TYPE_CHECKING, Tuple
from synapse.api.errors import Codes, NotFoundError, SynapseError from synapse.api.errors import Codes, NotFoundError, SynapseError
from synapse.federation.transport.server import Authenticator
from synapse.http.servlet import RestServlet, parse_integer, parse_string from synapse.http.servlet import RestServlet, parse_integer, parse_string
from synapse.http.site import SynapseRequest from synapse.http.site import SynapseRequest
from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin
@ -90,7 +91,7 @@ class ListDestinationsRestServlet(RestServlet):
return HTTPStatus.OK, response return HTTPStatus.OK, response
class DestinationsRestServlet(RestServlet): class DestinationRestServlet(RestServlet):
"""Get details of a destination. """Get details of a destination.
This needs user to have administrator access in Synapse. This needs user to have administrator access in Synapse.
@ -145,3 +146,44 @@ class DestinationsRestServlet(RestServlet):
} }
return HTTPStatus.OK, response return HTTPStatus.OK, response
class DestinationResetConnectionRestServlet(RestServlet):
"""Reset destinations' connection timeouts and wake it up.
This needs user to have administrator access in Synapse.
POST /_synapse/admin/v1/federation/destinations/<destination>/reset_connection
{}
returns:
200 OK otherwise an error.
"""
PATTERNS = admin_patterns(
"/federation/destinations/(?P<destination>[^/]+)/reset_connection$"
)
def __init__(self, hs: "HomeServer"):
self._auth = hs.get_auth()
self._store = hs.get_datastore()
self._authenticator = Authenticator(hs)
async def on_POST(
self, request: SynapseRequest, destination: str
) -> Tuple[int, JsonDict]:
await assert_requester_is_admin(self._auth, request)
if not await self._store.is_destination_known(destination):
raise NotFoundError("Unknown destination")
retry_timings = await self._store.get_destination_retry_timings(destination)
if not (retry_timings and retry_timings.retry_last_ts):
raise SynapseError(
HTTPStatus.BAD_REQUEST,
"The retry timing does not need to be reset for this destination.",
)
# reset timings and wake up
await self._authenticator.reset_retry_timings(destination)
return HTTPStatus.OK, {}

View File

@ -43,11 +43,15 @@ class FederationTestCase(unittest.HomeserverTestCase):
@parameterized.expand( @parameterized.expand(
[ [
("/_synapse/admin/v1/federation/destinations",), ("GET", "/_synapse/admin/v1/federation/destinations"),
("/_synapse/admin/v1/federation/destinations/dummy",), ("GET", "/_synapse/admin/v1/federation/destinations/dummy"),
(
"POST",
"/_synapse/admin/v1/federation/destinations/dummy/reset_connection",
),
] ]
) )
def test_requester_is_no_admin(self, url: str) -> None: def test_requester_is_no_admin(self, method: str, url: str) -> None:
""" """
If the user is not a server admin, an error 403 is returned. If the user is not a server admin, an error 403 is returned.
""" """
@ -56,7 +60,7 @@ class FederationTestCase(unittest.HomeserverTestCase):
other_user_tok = self.login("user", "pass") other_user_tok = self.login("user", "pass")
channel = self.make_request( channel = self.make_request(
"GET", method,
url, url,
content={}, content={},
access_token=other_user_tok, access_token=other_user_tok,
@ -120,6 +124,16 @@ class FederationTestCase(unittest.HomeserverTestCase):
self.assertEqual(HTTPStatus.NOT_FOUND, channel.code, msg=channel.json_body) self.assertEqual(HTTPStatus.NOT_FOUND, channel.code, msg=channel.json_body)
self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
# invalid destination
channel = self.make_request(
"POST",
self.url + "/dummy/reset_connection",
access_token=self.admin_user_tok,
)
self.assertEqual(HTTPStatus.NOT_FOUND, channel.code, msg=channel.json_body)
self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
def test_limit(self) -> None: def test_limit(self) -> None:
""" """
Testing list of destinations with limit Testing list of destinations with limit
@ -444,6 +458,39 @@ class FederationTestCase(unittest.HomeserverTestCase):
self.assertIsNone(channel.json_body["failure_ts"]) self.assertIsNone(channel.json_body["failure_ts"])
self.assertIsNone(channel.json_body["last_successful_stream_ordering"]) self.assertIsNone(channel.json_body["last_successful_stream_ordering"])
def test_destination_reset_connection(self) -> None:
"""Reset timeouts and wake up destination."""
self._create_destination("sub0.example.com", 100, 100, 100)
channel = self.make_request(
"POST",
self.url + "/sub0.example.com/reset_connection",
access_token=self.admin_user_tok,
)
self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body)
retry_timings = self.get_success(
self.store.get_destination_retry_timings("sub0.example.com")
)
self.assertIsNone(retry_timings)
def test_destination_reset_connection_not_required(self) -> None:
"""Try to reset timeouts of a destination with no timeouts and get an error."""
self._create_destination("sub0.example.com", None, 0, 0)
channel = self.make_request(
"POST",
self.url + "/sub0.example.com/reset_connection",
access_token=self.admin_user_tok,
)
self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body)
self.assertEqual(
"The retry timing does not need to be reset for this destination.",
channel.json_body["error"],
)
def _create_destination( def _create_destination(
self, self,
destination: str, destination: str,