mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-05-04 10:34:52 -04:00
Allow configuration of the oEmbed URLs. (#10714)
This adds configuration options (under an `oembed` section) to configure which URLs are matched to use oEmbed for URL previews.
This commit is contained in:
parent
287918e2d4
commit
e2481dbe93
8 changed files with 458 additions and 247 deletions
180
synapse/config/oembed.py
Normal file
180
synapse/config/oembed.py
Normal file
|
@ -0,0 +1,180 @@
|
|||
# Copyright 2021 The Matrix.org Foundation C.I.C.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import json
|
||||
import re
|
||||
from typing import Any, Dict, Iterable, List, Pattern
|
||||
from urllib import parse as urlparse
|
||||
|
||||
import attr
|
||||
import pkg_resources
|
||||
|
||||
from synapse.types import JsonDict
|
||||
|
||||
from ._base import Config, ConfigError
|
||||
from ._util import validate_config
|
||||
|
||||
|
||||
@attr.s(slots=True, frozen=True, auto_attribs=True)
|
||||
class OEmbedEndpointConfig:
|
||||
# The API endpoint to fetch.
|
||||
api_endpoint: str
|
||||
# The patterns to match.
|
||||
url_patterns: List[Pattern]
|
||||
|
||||
|
||||
class OembedConfig(Config):
|
||||
"""oEmbed Configuration"""
|
||||
|
||||
section = "oembed"
|
||||
|
||||
def read_config(self, config, **kwargs):
|
||||
oembed_config: Dict[str, Any] = config.get("oembed") or {}
|
||||
|
||||
# A list of patterns which will be used.
|
||||
self.oembed_patterns: List[OEmbedEndpointConfig] = list(
|
||||
self._parse_and_validate_providers(oembed_config)
|
||||
)
|
||||
|
||||
def _parse_and_validate_providers(
|
||||
self, oembed_config: dict
|
||||
) -> Iterable[OEmbedEndpointConfig]:
|
||||
"""Extract and parse the oEmbed providers from the given JSON file.
|
||||
|
||||
Returns a generator which yields the OidcProviderConfig objects
|
||||
"""
|
||||
# Whether to use the packaged providers.json file.
|
||||
if not oembed_config.get("disable_default_providers") or False:
|
||||
providers = json.load(
|
||||
pkg_resources.resource_stream("synapse", "res/providers.json")
|
||||
)
|
||||
yield from self._parse_and_validate_provider(
|
||||
providers, config_path=("oembed",)
|
||||
)
|
||||
|
||||
# The JSON files which includes additional provider information.
|
||||
for i, file in enumerate(oembed_config.get("additional_providers") or []):
|
||||
# TODO Error checking.
|
||||
with open(file) as f:
|
||||
providers = json.load(f)
|
||||
|
||||
yield from self._parse_and_validate_provider(
|
||||
providers,
|
||||
config_path=(
|
||||
"oembed",
|
||||
"additional_providers",
|
||||
f"<item {i}>",
|
||||
),
|
||||
)
|
||||
|
||||
def _parse_and_validate_provider(
|
||||
self, providers: List[JsonDict], config_path: Iterable[str]
|
||||
) -> Iterable[OEmbedEndpointConfig]:
|
||||
# Ensure it is the proper form.
|
||||
validate_config(
|
||||
_OEMBED_PROVIDER_SCHEMA,
|
||||
providers,
|
||||
config_path=config_path,
|
||||
)
|
||||
|
||||
# Parse it and yield each result.
|
||||
for provider in providers:
|
||||
# Each provider might have multiple API endpoints, each which
|
||||
# might have multiple patterns to match.
|
||||
for endpoint in provider["endpoints"]:
|
||||
api_endpoint = endpoint["url"]
|
||||
patterns = [
|
||||
self._glob_to_pattern(glob, config_path)
|
||||
for glob in endpoint["schemes"]
|
||||
]
|
||||
yield OEmbedEndpointConfig(api_endpoint, patterns)
|
||||
|
||||
def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern:
|
||||
"""
|
||||
Convert the glob into a sane regular expression to match against. The
|
||||
rules followed will be slightly different for the domain portion vs.
|
||||
the rest.
|
||||
|
||||
1. The scheme must be one of HTTP / HTTPS (and have no globs).
|
||||
2. The domain can have globs, but we limit it to characters that can
|
||||
reasonably be a domain part.
|
||||
TODO: This does not attempt to handle Unicode domain names.
|
||||
TODO: The domain should not allow wildcard TLDs.
|
||||
3. Other parts allow a glob to be any one, or more, characters.
|
||||
"""
|
||||
results = urlparse.urlparse(glob)
|
||||
|
||||
# Ensure the scheme does not have wildcards (and is a sane scheme).
|
||||
if results.scheme not in {"http", "https"}:
|
||||
raise ConfigError(f"Insecure oEmbed scheme: {results.scheme}", config_path)
|
||||
|
||||
pattern = urlparse.urlunparse(
|
||||
[
|
||||
results.scheme,
|
||||
re.escape(results.netloc).replace("\\*", "[a-zA-Z0-9_-]+"),
|
||||
]
|
||||
+ [re.escape(part).replace("\\*", ".+") for part in results[2:]]
|
||||
)
|
||||
return re.compile(pattern)
|
||||
|
||||
def generate_config_section(self, **kwargs):
|
||||
return """\
|
||||
# oEmbed allows for easier embedding content from a website. It can be
|
||||
# used for generating URLs previews of services which support it.
|
||||
#
|
||||
oembed:
|
||||
# A default list of oEmbed providers is included with Synapse.
|
||||
#
|
||||
# Uncomment the following to disable using these default oEmbed URLs.
|
||||
# Defaults to 'false'.
|
||||
#
|
||||
#disable_default_providers: true
|
||||
|
||||
# Additional files with oEmbed configuration (each should be in the
|
||||
# form of providers.json).
|
||||
#
|
||||
# By default, this list is empty (so only the default providers.json
|
||||
# is used).
|
||||
#
|
||||
#additional_providers:
|
||||
# - oembed/my_providers.json
|
||||
"""
|
||||
|
||||
|
||||
_OEMBED_PROVIDER_SCHEMA = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provider_name": {"type": "string"},
|
||||
"provider_url": {"type": "string"},
|
||||
"endpoints": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"schemes": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
},
|
||||
"url": {"type": "string"},
|
||||
"formats": {"type": "array", "items": {"type": "string"}},
|
||||
"discovery": {"type": "boolean"},
|
||||
},
|
||||
"required": ["schemes", "url"],
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": ["provider_name", "provider_url", "endpoints"],
|
||||
},
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue