Support underscores (in addition to hyphens) for charset detection. (#10410)

This commit is contained in:
sri-vidyut 2021-07-28 02:29:42 +09:00 committed by GitHub
parent 5b22d5ee03
commit 8e1febc6a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 18 additions and 2 deletions

View file

@ -58,9 +58,11 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
_charset_match = re.compile(br'<\s*meta[^>]*charset\s*=\s*"?([a-z0-9-]+)"?', flags=re.I)
_charset_match = re.compile(
br'<\s*meta[^>]*charset\s*=\s*"?([a-z0-9_-]+)"?', flags=re.I
)
_xml_encoding_match = re.compile(
br'\s*<\s*\?\s*xml[^>]*encoding="([a-z0-9-]+)"', flags=re.I
br'\s*<\s*\?\s*xml[^>]*encoding="([a-z0-9_-]+)"', flags=re.I
)
_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I)