mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-02-07 07:25:23 -05:00
Request JSON for oEmbed requests (and ignore XML only providers). (#10759)
This adds the format to the request arguments / URL to ensure that JSON data is returned (which is all that Synapse supports). This also adds additional error checking / filtering to the configuration file to ignore XML-only providers.
This commit is contained in:
parent
aacdce8fc0
commit
580a15e039
1
changelog.d/10759.feature
Normal file
1
changelog.d/10759.feature
Normal file
@ -0,0 +1 @@
|
|||||||
|
Allow configuration of the oEmbed URLs used for URL previews.
|
@ -13,7 +13,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from typing import Any, Dict, Iterable, List, Pattern
|
from typing import Any, Dict, Iterable, List, Optional, Pattern
|
||||||
from urllib import parse as urlparse
|
from urllib import parse as urlparse
|
||||||
|
|
||||||
import attr
|
import attr
|
||||||
@ -31,6 +31,8 @@ class OEmbedEndpointConfig:
|
|||||||
api_endpoint: str
|
api_endpoint: str
|
||||||
# The patterns to match.
|
# The patterns to match.
|
||||||
url_patterns: List[Pattern]
|
url_patterns: List[Pattern]
|
||||||
|
# The supported formats.
|
||||||
|
formats: Optional[List[str]]
|
||||||
|
|
||||||
|
|
||||||
class OembedConfig(Config):
|
class OembedConfig(Config):
|
||||||
@ -93,11 +95,22 @@ class OembedConfig(Config):
|
|||||||
# might have multiple patterns to match.
|
# might have multiple patterns to match.
|
||||||
for endpoint in provider["endpoints"]:
|
for endpoint in provider["endpoints"]:
|
||||||
api_endpoint = endpoint["url"]
|
api_endpoint = endpoint["url"]
|
||||||
|
|
||||||
|
# The API endpoint must be an HTTP(S) URL.
|
||||||
|
results = urlparse.urlparse(api_endpoint)
|
||||||
|
if results.scheme not in {"http", "https"}:
|
||||||
|
raise ConfigError(
|
||||||
|
f"Unsupported oEmbed scheme ({results.scheme}) for endpoint {api_endpoint}",
|
||||||
|
config_path,
|
||||||
|
)
|
||||||
|
|
||||||
patterns = [
|
patterns = [
|
||||||
self._glob_to_pattern(glob, config_path)
|
self._glob_to_pattern(glob, config_path)
|
||||||
for glob in endpoint["schemes"]
|
for glob in endpoint["schemes"]
|
||||||
]
|
]
|
||||||
yield OEmbedEndpointConfig(api_endpoint, patterns)
|
yield OEmbedEndpointConfig(
|
||||||
|
api_endpoint, patterns, endpoint.get("formats")
|
||||||
|
)
|
||||||
|
|
||||||
def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern:
|
def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern:
|
||||||
"""
|
"""
|
||||||
@ -114,9 +127,12 @@ class OembedConfig(Config):
|
|||||||
"""
|
"""
|
||||||
results = urlparse.urlparse(glob)
|
results = urlparse.urlparse(glob)
|
||||||
|
|
||||||
# Ensure the scheme does not have wildcards (and is a sane scheme).
|
# The scheme must be HTTP(S) (and cannot contain wildcards).
|
||||||
if results.scheme not in {"http", "https"}:
|
if results.scheme not in {"http", "https"}:
|
||||||
raise ConfigError(f"Insecure oEmbed scheme: {results.scheme}", config_path)
|
raise ConfigError(
|
||||||
|
f"Unsupported oEmbed scheme ({results.scheme}) for pattern: {glob}",
|
||||||
|
config_path,
|
||||||
|
)
|
||||||
|
|
||||||
pattern = urlparse.urlunparse(
|
pattern = urlparse.urlunparse(
|
||||||
[
|
[
|
||||||
|
@ -49,8 +49,24 @@ class OEmbedProvider:
|
|||||||
def __init__(self, hs: "HomeServer", client: SimpleHttpClient):
|
def __init__(self, hs: "HomeServer", client: SimpleHttpClient):
|
||||||
self._oembed_patterns = {}
|
self._oembed_patterns = {}
|
||||||
for oembed_endpoint in hs.config.oembed.oembed_patterns:
|
for oembed_endpoint in hs.config.oembed.oembed_patterns:
|
||||||
|
api_endpoint = oembed_endpoint.api_endpoint
|
||||||
|
|
||||||
|
# Only JSON is supported at the moment. This could be declared in
|
||||||
|
# the formats field. Otherwise, if the endpoint ends in .xml assume
|
||||||
|
# it doesn't support JSON.
|
||||||
|
if (
|
||||||
|
oembed_endpoint.formats is not None
|
||||||
|
and "json" not in oembed_endpoint.formats
|
||||||
|
) or api_endpoint.endswith(".xml"):
|
||||||
|
logger.info(
|
||||||
|
"Ignoring oEmbed endpoint due to not supporting JSON: %s",
|
||||||
|
api_endpoint,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Iterate through each URL pattern and point it to the endpoint.
|
||||||
for pattern in oembed_endpoint.url_patterns:
|
for pattern in oembed_endpoint.url_patterns:
|
||||||
self._oembed_patterns[pattern] = oembed_endpoint.api_endpoint
|
self._oembed_patterns[pattern] = api_endpoint
|
||||||
self._client = client
|
self._client = client
|
||||||
|
|
||||||
def get_oembed_url(self, url: str) -> Optional[str]:
|
def get_oembed_url(self, url: str) -> Optional[str]:
|
||||||
@ -86,11 +102,15 @@ class OEmbedProvider:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
logger.debug("Trying to get oEmbed content for url '%s'", url)
|
logger.debug("Trying to get oEmbed content for url '%s'", url)
|
||||||
|
|
||||||
|
# Note that only the JSON format is supported, some endpoints want
|
||||||
|
# this in the URL, others want it as an argument.
|
||||||
|
endpoint = endpoint.replace("{format}", "json")
|
||||||
|
|
||||||
result = await self._client.get_json(
|
result = await self._client.get_json(
|
||||||
endpoint,
|
endpoint,
|
||||||
# TODO Specify max height / width.
|
# TODO Specify max height / width.
|
||||||
# Note that only the JSON format is supported.
|
args={"url": url, "format": "json"},
|
||||||
args={"url": url},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Ensure there's a version of 1.0.
|
# Ensure there's a version of 1.0.
|
||||||
|
@ -92,7 +92,15 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
|||||||
url_patterns=[
|
url_patterns=[
|
||||||
re.compile(r"http://twitter\.com/.+/status/.+"),
|
re.compile(r"http://twitter\.com/.+/status/.+"),
|
||||||
],
|
],
|
||||||
)
|
formats=None,
|
||||||
|
),
|
||||||
|
OEmbedEndpointConfig(
|
||||||
|
api_endpoint="http://www.hulu.com/api/oembed.{format}",
|
||||||
|
url_patterns=[
|
||||||
|
re.compile(r"http://www\.hulu\.com/watch/.+"),
|
||||||
|
],
|
||||||
|
formats=["json"],
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
return hs
|
return hs
|
||||||
@ -656,3 +664,48 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
|||||||
channel.json_body,
|
channel.json_body,
|
||||||
{"og:title": None, "og:description": "Content Preview"},
|
{"og:title": None, "og:description": "Content Preview"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_oembed_format(self):
|
||||||
|
"""Test an oEmbed endpoint which requires the format in the URL."""
|
||||||
|
self.lookups["www.hulu.com"] = [(IPv4Address, "10.1.2.3")]
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"version": "1.0",
|
||||||
|
"type": "rich",
|
||||||
|
"html": "<div>Content Preview</div>",
|
||||||
|
}
|
||||||
|
end_content = json.dumps(result).encode("utf-8")
|
||||||
|
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
"preview_url?url=http://www.hulu.com/watch/12345",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
client = self.reactor.tcpClients[0][2].buildProtocol(None)
|
||||||
|
server = AccumulatingProtocol()
|
||||||
|
server.makeConnection(FakeTransport(client, self.reactor))
|
||||||
|
client.makeConnection(FakeTransport(server, self.reactor))
|
||||||
|
client.dataReceived(
|
||||||
|
(
|
||||||
|
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
|
||||||
|
b'Content-Type: application/json; charset="utf8"\r\n\r\n'
|
||||||
|
)
|
||||||
|
% (len(end_content),)
|
||||||
|
+ end_content
|
||||||
|
)
|
||||||
|
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
# The {format} should have been turned into json.
|
||||||
|
self.assertIn(b"/api/oembed.json", server.data)
|
||||||
|
# A URL parameter of format=json should be provided.
|
||||||
|
self.assertIn(b"format=json", server.data)
|
||||||
|
|
||||||
|
self.assertEqual(channel.code, 200)
|
||||||
|
self.assertEqual(
|
||||||
|
channel.json_body,
|
||||||
|
{"og:title": None, "og:description": "Content Preview"},
|
||||||
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user