Merge pull request #1992 from matrix-org/rav/fix_media_loop

Fix routing loop when fetching remote media
This commit is contained in:
Richard van der Hoff 2017-03-14 23:40:35 +00:00 committed by GitHub
commit f2ed64eaaf
4 changed files with 121 additions and 24 deletions

View File

@ -15,6 +15,7 @@
"""Contains exceptions and error codes.""" """Contains exceptions and error codes."""
import json
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -50,27 +51,35 @@ class Codes(object):
class CodeMessageException(RuntimeError): class CodeMessageException(RuntimeError):
"""An exception with integer code and message string attributes.""" """An exception with integer code and message string attributes.
Attributes:
code (int): HTTP error code
msg (str): string describing the error
"""
def __init__(self, code, msg): def __init__(self, code, msg):
super(CodeMessageException, self).__init__("%d: %s" % (code, msg)) super(CodeMessageException, self).__init__("%d: %s" % (code, msg))
self.code = code self.code = code
self.msg = msg self.msg = msg
self.response_code_message = None
def error_dict(self): def error_dict(self):
return cs_error(self.msg) return cs_error(self.msg)
class SynapseError(CodeMessageException): class SynapseError(CodeMessageException):
"""A base error which can be caught for all synapse events.""" """A base exception type for matrix errors which have an errcode and error
message (as well as an HTTP status code).
Attributes:
errcode (str): Matrix error code e.g 'M_FORBIDDEN'
"""
def __init__(self, code, msg, errcode=Codes.UNKNOWN): def __init__(self, code, msg, errcode=Codes.UNKNOWN):
"""Constructs a synapse error. """Constructs a synapse error.
Args: Args:
code (int): The integer error code (an HTTP response code) code (int): The integer error code (an HTTP response code)
msg (str): The human-readable error message. msg (str): The human-readable error message.
err (str): The error code e.g 'M_FORBIDDEN' errcode (str): The matrix error code e.g 'M_FORBIDDEN'
""" """
super(SynapseError, self).__init__(code, msg) super(SynapseError, self).__init__(code, msg)
self.errcode = errcode self.errcode = errcode
@ -81,6 +90,39 @@ class SynapseError(CodeMessageException):
self.errcode, self.errcode,
) )
@classmethod
def from_http_response_exception(cls, err):
"""Make a SynapseError based on an HTTPResponseException
This is useful when a proxied request has failed, and we need to
decide how to map the failure onto a matrix error to send back to the
client.
An attempt is made to parse the body of the http response as a matrix
error. If that succeeds, the errcode and error message from the body
are used as the errcode and error message in the new synapse error.
Otherwise, the errcode is set to M_UNKNOWN, and the error message is
set to the reason code from the HTTP response.
Args:
err (HttpResponseException):
Returns:
SynapseError:
"""
# try to parse the body as json, to get better errcode/msg, but
# default to M_UNKNOWN with the HTTP status as the error text
try:
j = json.loads(err.response)
except ValueError:
j = {}
errcode = j.get('errcode', Codes.UNKNOWN)
errmsg = j.get('error', err.msg)
res = SynapseError(err.code, errmsg, errcode)
return res
class RegistrationError(SynapseError): class RegistrationError(SynapseError):
"""An error raised when a registration event fails.""" """An error raised when a registration event fails."""
@ -106,13 +148,11 @@ class UnrecognizedRequestError(SynapseError):
class NotFoundError(SynapseError): class NotFoundError(SynapseError):
"""An error indicating we can't find the thing you asked for""" """An error indicating we can't find the thing you asked for"""
def __init__(self, *args, **kwargs): def __init__(self, msg="Not found", errcode=Codes.NOT_FOUND):
if "errcode" not in kwargs:
kwargs["errcode"] = Codes.NOT_FOUND
super(NotFoundError, self).__init__( super(NotFoundError, self).__init__(
404, 404,
"Not found", msg,
**kwargs errcode=errcode
) )
@ -173,7 +213,6 @@ class LimitExceededError(SynapseError):
errcode=Codes.LIMIT_EXCEEDED): errcode=Codes.LIMIT_EXCEEDED):
super(LimitExceededError, self).__init__(code, msg, errcode) super(LimitExceededError, self).__init__(code, msg, errcode)
self.retry_after_ms = retry_after_ms self.retry_after_ms = retry_after_ms
self.response_code_message = "Too Many Requests"
def error_dict(self): def error_dict(self):
return cs_error( return cs_error(
@ -243,6 +282,19 @@ class FederationError(RuntimeError):
class HttpResponseException(CodeMessageException): class HttpResponseException(CodeMessageException):
"""
Represents an HTTP-level failure of an outbound request
Attributes:
response (str): body of response
"""
def __init__(self, code, msg, response): def __init__(self, code, msg, response):
self.response = response """
Args:
code (int): HTTP status code
msg (str): reason phrase from HTTP response status line
response (str): body of response
"""
super(HttpResponseException, self).__init__(code, msg) super(HttpResponseException, self).__init__(code, msg)
self.response = response

View File

@ -108,6 +108,12 @@ class MatrixFederationHttpClient(object):
query_bytes=b"", retry_on_dns_fail=True, query_bytes=b"", retry_on_dns_fail=True,
timeout=None, long_retries=False): timeout=None, long_retries=False):
""" Creates and sends a request to the given url """ Creates and sends a request to the given url
Returns:
Deferred: resolves with the http response object on success.
Fails with ``HTTPRequestException``: if we get an HTTP response
code >= 300.
""" """
headers_dict[b"User-Agent"] = [self.version_string] headers_dict[b"User-Agent"] = [self.version_string]
headers_dict[b"Host"] = [destination] headers_dict[b"Host"] = [destination]
@ -408,8 +414,11 @@ class MatrixFederationHttpClient(object):
output_stream (file): File to write the response body to. output_stream (file): File to write the response body to.
args (dict): Optional dictionary used to create the query string. args (dict): Optional dictionary used to create the query string.
Returns: Returns:
A (int,dict) tuple of the file length and a dict of the response Deferred: resolves with an (int,dict) tuple of the file length and
headers. a dict of the response headers.
Fails with ``HTTPRequestException`` if we get an HTTP response code
>= 300
""" """
encoded_args = {} encoded_args = {}
@ -419,7 +428,7 @@ class MatrixFederationHttpClient(object):
encoded_args[k] = [v.encode("UTF-8") for v in vs] encoded_args[k] = [v.encode("UTF-8") for v in vs]
query_bytes = urllib.urlencode(encoded_args, True) query_bytes = urllib.urlencode(encoded_args, True)
logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail) logger.debug("Query bytes: %s Retry DNS: %s", query_bytes, retry_on_dns_fail)
def body_callback(method, url_bytes, headers_dict): def body_callback(method, url_bytes, headers_dict):
self.sign_request(destination, method, url_bytes, headers_dict) self.sign_request(destination, method, url_bytes, headers_dict)

View File

@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import synapse.http.servlet
from ._base import parse_media_id, respond_with_file, respond_404 from ._base import parse_media_id, respond_with_file, respond_404
from twisted.web.resource import Resource from twisted.web.resource import Resource
@ -81,6 +82,17 @@ class DownloadResource(Resource):
@defer.inlineCallbacks @defer.inlineCallbacks
def _respond_remote_file(self, request, server_name, media_id, name): def _respond_remote_file(self, request, server_name, media_id, name):
# don't forward requests for remote media if allow_remote is false
allow_remote = synapse.http.servlet.parse_boolean(
request, "allow_remote", default=True)
if not allow_remote:
logger.info(
"Rejecting request for remote media %s/%s due to allow_remote",
server_name, media_id,
)
respond_404(request)
return
media_info = yield self.media_repo.get_remote_media(server_name, media_id) media_info = yield self.media_repo.get_remote_media(server_name, media_id)
media_type = media_info["media_type"] media_type = media_info["media_type"]

View File

@ -13,22 +13,23 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from twisted.internet import defer, threads
import twisted.internet.error
import twisted.web.http
from twisted.web.resource import Resource
from .upload_resource import UploadResource from .upload_resource import UploadResource
from .download_resource import DownloadResource from .download_resource import DownloadResource
from .thumbnail_resource import ThumbnailResource from .thumbnail_resource import ThumbnailResource
from .identicon_resource import IdenticonResource from .identicon_resource import IdenticonResource
from .preview_url_resource import PreviewUrlResource from .preview_url_resource import PreviewUrlResource
from .filepath import MediaFilePaths from .filepath import MediaFilePaths
from twisted.web.resource import Resource
from .thumbnailer import Thumbnailer from .thumbnailer import Thumbnailer
from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.http.matrixfederationclient import MatrixFederationHttpClient
from synapse.util.stringutils import random_string from synapse.util.stringutils import random_string
from synapse.api.errors import SynapseError from synapse.api.errors import SynapseError, HttpResponseException, \
NotFoundError
from twisted.internet import defer, threads
from synapse.util.async import Linearizer from synapse.util.async import Linearizer
from synapse.util.stringutils import is_ascii from synapse.util.stringutils import is_ascii
@ -157,11 +158,34 @@ class MediaRepository(object):
try: try:
length, headers = yield self.client.get_file( length, headers = yield self.client.get_file(
server_name, request_path, output_stream=f, server_name, request_path, output_stream=f,
max_size=self.max_upload_size, max_size=self.max_upload_size, args={
# tell the remote server to 404 if it doesn't
# recognise the server_name, to make sure we don't
# end up with a routing loop.
"allow_remote": "false",
}
) )
except Exception as e: except twisted.internet.error.DNSLookupError as e:
logger.warn("Failed to fetch remoted media %r", e) logger.warn("HTTP error fetching remote media %s/%s: %r",
raise SynapseError(502, "Failed to fetch remoted media") server_name, media_id, e)
raise NotFoundError()
except HttpResponseException as e:
logger.warn("HTTP error fetching remote media %s/%s: %s",
server_name, media_id, e.response)
if e.code == twisted.web.http.NOT_FOUND:
raise SynapseError.from_http_response_exception(e)
raise SynapseError(502, "Failed to fetch remote media")
except SynapseError:
logger.exception("Failed to fetch remote media %s/%s",
server_name, media_id)
raise
except Exception:
logger.exception("Failed to fetch remote media %s/%s",
server_name, media_id)
raise SynapseError(502, "Failed to fetch remote media")
media_type = headers["Content-Type"][0] media_type = headers["Content-Type"][0]
time_now_ms = self.clock.time_msec() time_now_ms = self.clock.time_msec()