2021-08-31 18:37:07 -04:00
|
|
|
# Copyright 2021 The Matrix.org Foundation C.I.C.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
import logging
|
|
|
|
from typing import TYPE_CHECKING, Optional
|
|
|
|
|
|
|
|
import attr
|
|
|
|
|
|
|
|
from synapse.http.client import SimpleHttpClient
|
|
|
|
|
|
|
|
if TYPE_CHECKING:
|
|
|
|
from synapse.server import HomeServer
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
@attr.s(slots=True, auto_attribs=True)
|
|
|
|
class OEmbedResult:
|
|
|
|
# Either HTML content or URL must be provided.
|
|
|
|
html: Optional[str]
|
|
|
|
url: Optional[str]
|
|
|
|
title: Optional[str]
|
|
|
|
# Number of seconds to cache the content.
|
|
|
|
cache_age: int
|
|
|
|
|
|
|
|
|
|
|
|
class OEmbedError(Exception):
|
|
|
|
"""An error occurred processing the oEmbed object."""
|
|
|
|
|
|
|
|
|
|
|
|
class OEmbedProvider:
|
|
|
|
"""
|
|
|
|
A helper for accessing oEmbed content.
|
|
|
|
|
|
|
|
It can be used to check if a URL should be accessed via oEmbed and for
|
|
|
|
requesting/parsing oEmbed content.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, hs: "HomeServer", client: SimpleHttpClient):
|
|
|
|
self._oembed_patterns = {}
|
|
|
|
for oembed_endpoint in hs.config.oembed.oembed_patterns:
|
2021-09-08 07:17:52 -04:00
|
|
|
api_endpoint = oembed_endpoint.api_endpoint
|
|
|
|
|
|
|
|
# Only JSON is supported at the moment. This could be declared in
|
|
|
|
# the formats field. Otherwise, if the endpoint ends in .xml assume
|
|
|
|
# it doesn't support JSON.
|
|
|
|
if (
|
|
|
|
oembed_endpoint.formats is not None
|
|
|
|
and "json" not in oembed_endpoint.formats
|
|
|
|
) or api_endpoint.endswith(".xml"):
|
|
|
|
logger.info(
|
|
|
|
"Ignoring oEmbed endpoint due to not supporting JSON: %s",
|
|
|
|
api_endpoint,
|
|
|
|
)
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Iterate through each URL pattern and point it to the endpoint.
|
2021-08-31 18:37:07 -04:00
|
|
|
for pattern in oembed_endpoint.url_patterns:
|
2021-09-08 07:17:52 -04:00
|
|
|
self._oembed_patterns[pattern] = api_endpoint
|
2021-08-31 18:37:07 -04:00
|
|
|
self._client = client
|
|
|
|
|
|
|
|
def get_oembed_url(self, url: str) -> Optional[str]:
|
|
|
|
"""
|
|
|
|
Check whether the URL should be downloaded as oEmbed content instead.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
url: The URL to check.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A URL to use instead or None if the original URL should be used.
|
|
|
|
"""
|
|
|
|
for url_pattern, endpoint in self._oembed_patterns.items():
|
|
|
|
if url_pattern.fullmatch(url):
|
|
|
|
return endpoint
|
|
|
|
|
|
|
|
# No match.
|
|
|
|
return None
|
|
|
|
|
|
|
|
async def get_oembed_content(self, endpoint: str, url: str) -> OEmbedResult:
|
|
|
|
"""
|
|
|
|
Request content from an oEmbed endpoint.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
endpoint: The oEmbed API endpoint.
|
|
|
|
url: The URL to pass to the API.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
An object representing the metadata returned.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
OEmbedError if fetching or parsing of the oEmbed information fails.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
logger.debug("Trying to get oEmbed content for url '%s'", url)
|
2021-09-08 07:17:52 -04:00
|
|
|
|
|
|
|
# Note that only the JSON format is supported, some endpoints want
|
|
|
|
# this in the URL, others want it as an argument.
|
|
|
|
endpoint = endpoint.replace("{format}", "json")
|
|
|
|
|
2021-08-31 18:37:07 -04:00
|
|
|
result = await self._client.get_json(
|
|
|
|
endpoint,
|
|
|
|
# TODO Specify max height / width.
|
2021-09-08 07:17:52 -04:00
|
|
|
args={"url": url, "format": "json"},
|
2021-08-31 18:37:07 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
# Ensure there's a version of 1.0.
|
|
|
|
if result.get("version") != "1.0":
|
|
|
|
raise OEmbedError("Invalid version: %s" % (result.get("version"),))
|
|
|
|
|
|
|
|
oembed_type = result.get("type")
|
|
|
|
|
|
|
|
# Ensure the cache age is None or an int.
|
|
|
|
cache_age = result.get("cache_age")
|
|
|
|
if cache_age:
|
|
|
|
cache_age = int(cache_age)
|
|
|
|
|
|
|
|
oembed_result = OEmbedResult(None, None, result.get("title"), cache_age)
|
|
|
|
|
|
|
|
# HTML content.
|
|
|
|
if oembed_type == "rich":
|
|
|
|
oembed_result.html = result.get("html")
|
|
|
|
return oembed_result
|
|
|
|
|
|
|
|
if oembed_type == "photo":
|
|
|
|
oembed_result.url = result.get("url")
|
|
|
|
return oembed_result
|
|
|
|
|
|
|
|
# TODO Handle link and video types.
|
|
|
|
|
|
|
|
if "thumbnail_url" in result:
|
|
|
|
oembed_result.url = result.get("thumbnail_url")
|
|
|
|
return oembed_result
|
|
|
|
|
|
|
|
raise OEmbedError("Incompatible oEmbed information.")
|
|
|
|
|
|
|
|
except OEmbedError as e:
|
|
|
|
# Trap OEmbedErrors first so we can directly re-raise them.
|
|
|
|
logger.warning("Error parsing oEmbed metadata from %s: %r", url, e)
|
|
|
|
raise
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
# Trap any exception and let the code follow as usual.
|
|
|
|
# FIXME: pass through 404s and other error messages nicely
|
|
|
|
logger.warning("Error downloading oEmbed metadata from %s: %r", url, e)
|
|
|
|
raise OEmbedError() from e
|