Move HTML parsing to a separate file for URL previews. (#11566)

* Splits the logic for parsing HTML from the resource handling code.
* Fix a circular import in the oEmbed code (which uses the HTML parsing code).
* Renames some of the HTML parsing methods to:
  * Make it clear which methods are "internal" to the module.
  * Clarify what the methods do.
This commit is contained in:
Patrick Cloke 2021-12-13 12:55:07 -05:00 committed by GitHub
parent 5305a5e881
commit eb39da6782
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 432 additions and 401 deletions

View file

@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, List, Optional
import attr
from synapse.rest.media.v1.preview_html import parse_html_description
from synapse.types import JsonDict
from synapse.util import json_decoder
@ -245,8 +246,6 @@ def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) ->
if video_urls:
open_graph_response["og:video"] = video_urls[0]
from synapse.rest.media.v1.preview_url_resource import _calc_description
description = _calc_description(tree)
description = parse_html_description(tree)
if description:
open_graph_response["og:description"] = description