Fetch images when previewing Twitter URLs. (#11985)

By including "bot" in the User-Agent, which some sites use
to decide whether to include additional Open Graph information.
This commit is contained in:
AndrewRyanChama 2022-02-22 04:11:39 -08:00 committed by GitHub
parent 79c18e0709
commit 066171643b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 4 deletions

View File

@ -0,0 +1 @@
Fetch images when previewing Twitter URLs. Contributed by @AndrewRyanChama.

View File

@ -5,8 +5,6 @@
"endpoints": [ "endpoints": [
{ {
"schemes": [ "schemes": [
"https://twitter.com/*/status/*",
"https://*.twitter.com/*/status/*",
"https://twitter.com/*/moments/*", "https://twitter.com/*/moments/*",
"https://*.twitter.com/*/moments/*" "https://*.twitter.com/*/moments/*"
], ],

View File

@ -402,7 +402,15 @@ class PreviewUrlResource(DirectServeJsonResource):
url, url,
output_stream=output_stream, output_stream=output_stream,
max_size=self.max_spider_size, max_size=self.max_spider_size,
headers={"Accept-Language": self.url_preview_accept_language}, headers={
b"Accept-Language": self.url_preview_accept_language,
# Use a custom user agent for the preview because some sites will only return
# Open Graph metadata to crawler user agents. Omit the Synapse version
# string to avoid leaking information.
b"User-Agent": [
"Synapse (bot; +https://github.com/matrix-org/synapse)"
],
},
is_allowed_content_type=_is_previewable, is_allowed_content_type=_is_previewable,
) )
except SynapseError: except SynapseError: