Support rendering previews with data: URLs in them (#11767)

Images which are data URLs will no longer break URL
previews and will properly be "downloaded" and
thumbnailed.
This commit is contained in:
Patrick Cloke 2022-01-24 08:58:18 -05:00 committed by GitHub
parent c3040dd5cc
commit 807efd26ae
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 299 additions and 74 deletions

View file

@ -321,14 +321,33 @@ def _iterate_over_text(
def rebase_url(url: str, base: str) -> str:
base_parts = list(urlparse.urlparse(base))
"""
Resolves a potentially relative `url` against an absolute `base` URL.
For example:
>>> rebase_url("subpage", "https://example.com/foo/")
'https://example.com/foo/subpage'
>>> rebase_url("sibling", "https://example.com/foo")
'https://example.com/sibling'
>>> rebase_url("/bar", "https://example.com/foo/")
'https://example.com/bar'
>>> rebase_url("https://alice.com/a/", "https://example.com/foo/")
'https://alice.com/a'
"""
base_parts = urlparse.urlparse(base)
# Convert the parsed URL to a list for (potential) modification.
url_parts = list(urlparse.urlparse(url))
if not url_parts[0]: # fix up schema
url_parts[0] = base_parts[0] or "http"
if not url_parts[1]: # fix up hostname
url_parts[1] = base_parts[1]
# Add a scheme, if one does not exist.
if not url_parts[0]:
url_parts[0] = base_parts.scheme or "http"
# Fix up the hostname, if this is not a data URL.
if url_parts[0] != "data" and not url_parts[1]:
url_parts[1] = base_parts.netloc
# If the path does not start with a /, nest it under the base path's last
# directory.
if not url_parts[2].startswith("/"):
url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts[2]) + url_parts[2]
url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts.path) + url_parts[2]
return urlparse.urlunparse(url_parts)