mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-06-21 20:44:12 -04:00
Add type hints to media rest resources. (#9093)
This commit is contained in:
parent
0dd2649c12
commit
d34c6e1279
13 changed files with 286 additions and 165 deletions
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2016 OpenMarket Ltd
|
||||
# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -12,7 +13,6 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import datetime
|
||||
import errno
|
||||
import fnmatch
|
||||
|
@ -23,12 +23,13 @@ import re
|
|||
import shutil
|
||||
import sys
|
||||
import traceback
|
||||
from typing import Dict, Optional
|
||||
from typing import TYPE_CHECKING, Any, Dict, Generator, Iterable, Optional, Union
|
||||
from urllib import parse as urlparse
|
||||
|
||||
import attr
|
||||
|
||||
from twisted.internet.error import DNSLookupError
|
||||
from twisted.web.http import Request
|
||||
|
||||
from synapse.api.errors import Codes, SynapseError
|
||||
from synapse.http.client import SimpleHttpClient
|
||||
|
@ -41,6 +42,7 @@ from synapse.http.servlet import parse_integer, parse_string
|
|||
from synapse.logging.context import make_deferred_yieldable, run_in_background
|
||||
from synapse.metrics.background_process_metrics import run_as_background_process
|
||||
from synapse.rest.media.v1._base import get_filename_from_headers
|
||||
from synapse.rest.media.v1.media_storage import MediaStorage
|
||||
from synapse.util import json_encoder
|
||||
from synapse.util.async_helpers import ObservableDeferred
|
||||
from synapse.util.caches.expiringcache import ExpiringCache
|
||||
|
@ -48,6 +50,12 @@ from synapse.util.stringutils import random_string
|
|||
|
||||
from ._base import FileInfo
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from lxml import etree
|
||||
|
||||
from synapse.app.homeserver import HomeServer
|
||||
from synapse.rest.media.v1.media_repository import MediaRepository
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I)
|
||||
|
@ -119,7 +127,12 @@ class OEmbedError(Exception):
|
|||
class PreviewUrlResource(DirectServeJsonResource):
|
||||
isLeaf = True
|
||||
|
||||
def __init__(self, hs, media_repo, media_storage):
|
||||
def __init__(
|
||||
self,
|
||||
hs: "HomeServer",
|
||||
media_repo: "MediaRepository",
|
||||
media_storage: MediaStorage,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.auth = hs.get_auth()
|
||||
|
@ -166,11 +179,11 @@ class PreviewUrlResource(DirectServeJsonResource):
|
|||
self._start_expire_url_cache_data, 10 * 1000
|
||||
)
|
||||
|
||||
async def _async_render_OPTIONS(self, request):
|
||||
async def _async_render_OPTIONS(self, request: Request) -> None:
|
||||
request.setHeader(b"Allow", b"OPTIONS, GET")
|
||||
respond_with_json(request, 200, {}, send_cors=True)
|
||||
|
||||
async def _async_render_GET(self, request):
|
||||
async def _async_render_GET(self, request: Request) -> None:
|
||||
|
||||
# XXX: if get_user_by_req fails, what should we do in an async render?
|
||||
requester = await self.auth.get_user_by_req(request)
|
||||
|
@ -450,7 +463,7 @@ class PreviewUrlResource(DirectServeJsonResource):
|
|||
logger.warning("Error downloading oEmbed metadata from %s: %r", url, e)
|
||||
raise OEmbedError() from e
|
||||
|
||||
async def _download_url(self, url: str, user):
|
||||
async def _download_url(self, url: str, user: str) -> Dict[str, Any]:
|
||||
# TODO: we should probably honour robots.txt... except in practice
|
||||
# we're most likely being explicitly triggered by a human rather than a
|
||||
# bot, so are we really a robot?
|
||||
|
@ -580,7 +593,7 @@ class PreviewUrlResource(DirectServeJsonResource):
|
|||
"expire_url_cache_data", self._expire_url_cache_data
|
||||
)
|
||||
|
||||
async def _expire_url_cache_data(self):
|
||||
async def _expire_url_cache_data(self) -> None:
|
||||
"""Clean up expired url cache content, media and thumbnails.
|
||||
"""
|
||||
# TODO: Delete from backup media store
|
||||
|
@ -676,7 +689,9 @@ class PreviewUrlResource(DirectServeJsonResource):
|
|||
logger.debug("No media removed from url cache")
|
||||
|
||||
|
||||
def decode_and_calc_og(body, media_uri, request_encoding=None) -> Dict[str, str]:
|
||||
def decode_and_calc_og(
|
||||
body: bytes, media_uri: str, request_encoding: Optional[str] = None
|
||||
) -> Dict[str, Optional[str]]:
|
||||
# If there's no body, nothing useful is going to be found.
|
||||
if not body:
|
||||
return {}
|
||||
|
@ -697,7 +712,7 @@ def decode_and_calc_og(body, media_uri, request_encoding=None) -> Dict[str, str]
|
|||
return og
|
||||
|
||||
|
||||
def _calc_og(tree, media_uri):
|
||||
def _calc_og(tree, media_uri: str) -> Dict[str, Optional[str]]:
|
||||
# suck our tree into lxml and define our OG response.
|
||||
|
||||
# if we see any image URLs in the OG response, then spider them
|
||||
|
@ -801,7 +816,9 @@ def _calc_og(tree, media_uri):
|
|||
for el in _iterate_over_text(tree.find("body"), *TAGS_TO_REMOVE)
|
||||
)
|
||||
og["og:description"] = summarize_paragraphs(text_nodes)
|
||||
else:
|
||||
elif og["og:description"]:
|
||||
# This must be a non-empty string at this point.
|
||||
assert isinstance(og["og:description"], str)
|
||||
og["og:description"] = summarize_paragraphs([og["og:description"]])
|
||||
|
||||
# TODO: delete the url downloads to stop diskfilling,
|
||||
|
@ -809,7 +826,9 @@ def _calc_og(tree, media_uri):
|
|||
return og
|
||||
|
||||
|
||||
def _iterate_over_text(tree, *tags_to_ignore):
|
||||
def _iterate_over_text(
|
||||
tree, *tags_to_ignore: Iterable[Union[str, "etree.Comment"]]
|
||||
) -> Generator[str, None, None]:
|
||||
"""Iterate over the tree returning text nodes in a depth first fashion,
|
||||
skipping text nodes inside certain tags.
|
||||
"""
|
||||
|
@ -843,32 +862,32 @@ def _iterate_over_text(tree, *tags_to_ignore):
|
|||
)
|
||||
|
||||
|
||||
def _rebase_url(url, base):
|
||||
base = list(urlparse.urlparse(base))
|
||||
url = list(urlparse.urlparse(url))
|
||||
if not url[0]: # fix up schema
|
||||
url[0] = base[0] or "http"
|
||||
if not url[1]: # fix up hostname
|
||||
url[1] = base[1]
|
||||
if not url[2].startswith("/"):
|
||||
url[2] = re.sub(r"/[^/]+$", "/", base[2]) + url[2]
|
||||
return urlparse.urlunparse(url)
|
||||
def _rebase_url(url: str, base: str) -> str:
|
||||
base_parts = list(urlparse.urlparse(base))
|
||||
url_parts = list(urlparse.urlparse(url))
|
||||
if not url_parts[0]: # fix up schema
|
||||
url_parts[0] = base_parts[0] or "http"
|
||||
if not url_parts[1]: # fix up hostname
|
||||
url_parts[1] = base_parts[1]
|
||||
if not url_parts[2].startswith("/"):
|
||||
url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts[2]) + url_parts[2]
|
||||
return urlparse.urlunparse(url_parts)
|
||||
|
||||
|
||||
def _is_media(content_type):
|
||||
if content_type.lower().startswith("image/"):
|
||||
return True
|
||||
def _is_media(content_type: str) -> bool:
|
||||
return content_type.lower().startswith("image/")
|
||||
|
||||
|
||||
def _is_html(content_type):
|
||||
def _is_html(content_type: str) -> bool:
|
||||
content_type = content_type.lower()
|
||||
if content_type.startswith("text/html") or content_type.startswith(
|
||||
return content_type.startswith("text/html") or content_type.startswith(
|
||||
"application/xhtml"
|
||||
):
|
||||
return True
|
||||
)
|
||||
|
||||
|
||||
def summarize_paragraphs(text_nodes, min_size=200, max_size=500):
|
||||
def summarize_paragraphs(
|
||||
text_nodes: Iterable[str], min_size: int = 200, max_size: int = 500
|
||||
) -> Optional[str]:
|
||||
# Try to get a summary of between 200 and 500 words, respecting
|
||||
# first paragraph and then word boundaries.
|
||||
# TODO: Respect sentences?
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue