Support underscores (in addition to hyphens) for charset detection. (#10410)

This commit is contained in:
sri-vidyut 2021-07-28 02:29:42 +09:00 committed by GitHub
parent 5b22d5ee03
commit 8e1febc6a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 18 additions and 2 deletions

View file

@ -325,6 +325,19 @@ class MediaEncodingTestCase(unittest.TestCase):
)
self.assertEqual(encoding, "ascii")
def test_meta_charset_underscores(self):
"""A character encoding contains underscore."""
encoding = get_html_media_encoding(
b"""
<html>
<head><meta charset="Shift_JIS">
</head>
</html>
""",
"text/html",
)
self.assertEqual(encoding, "Shift_JIS")
def test_xml_encoding(self):
"""A character encoding is found via the meta tag."""
encoding = get_html_media_encoding(