Merge remote-tracking branch 'upstream/release-v1.46'

This commit is contained in:
Tulir Asokan 2021-11-02 15:35:09 +02:00
commit 8d54d3bbbf
6 changed files with 49 additions and 6 deletions

View file

@ -718,9 +718,12 @@ def decode_body(
if not body:
return None
# The idea here is that multiple encodings are tried until one works.
# Unfortunately the result is never used and then LXML will decode the string
# again with the found encoding.
for encoding in get_html_media_encodings(body, content_type):
try:
body_str = body.decode(encoding)
body.decode(encoding)
except Exception:
pass
else:
@ -732,11 +735,11 @@ def decode_body(
from lxml import etree
# Create an HTML parser.
parser = etree.HTMLParser(recover=True, encoding="utf-8")
parser = etree.HTMLParser(recover=True, encoding=encoding)
# Attempt to parse the body. Returns None if the body was successfully
# parsed, but no tree was found.
return etree.fromstring(body_str, parser)
return etree.fromstring(body, parser)
def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: