mirror of
https://git.anonymousland.org/anonymousland/synapse.git
synced 2025-12-15 23:03:52 -05:00
Fix URL preview bugs (type error when loading cache from db, content-type including quotes) (#4157)
This commit is contained in:
parent
c8ba79327b
commit
b3708830b8
5 changed files with 187 additions and 10 deletions
|
|
@ -12,6 +12,7 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import cgi
|
||||
import datetime
|
||||
import errno
|
||||
|
|
@ -24,6 +25,7 @@ import shutil
|
|||
import sys
|
||||
import traceback
|
||||
|
||||
import six
|
||||
from six import string_types
|
||||
from six.moves import urllib_parse as urlparse
|
||||
|
||||
|
|
@ -98,7 +100,7 @@ class PreviewUrlResource(Resource):
|
|||
# XXX: if get_user_by_req fails, what should we do in an async render?
|
||||
requester = yield self.auth.get_user_by_req(request)
|
||||
url = parse_string(request, "url")
|
||||
if "ts" in request.args:
|
||||
if b"ts" in request.args:
|
||||
ts = parse_integer(request, "ts")
|
||||
else:
|
||||
ts = self.clock.time_msec()
|
||||
|
|
@ -180,7 +182,12 @@ class PreviewUrlResource(Resource):
|
|||
cache_result["expires_ts"] > ts and
|
||||
cache_result["response_code"] / 100 == 2
|
||||
):
|
||||
defer.returnValue(cache_result["og"])
|
||||
# It may be stored as text in the database, not as bytes (such as
|
||||
# PostgreSQL). If so, encode it back before handing it on.
|
||||
og = cache_result["og"]
|
||||
if isinstance(og, six.text_type):
|
||||
og = og.encode('utf8')
|
||||
defer.returnValue(og)
|
||||
return
|
||||
|
||||
media_info = yield self._download_url(url, user)
|
||||
|
|
@ -213,14 +220,17 @@ class PreviewUrlResource(Resource):
|
|||
elif _is_html(media_info['media_type']):
|
||||
# TODO: somehow stop a big HTML tree from exploding synapse's RAM
|
||||
|
||||
file = open(media_info['filename'])
|
||||
body = file.read()
|
||||
file.close()
|
||||
with open(media_info['filename'], 'rb') as file:
|
||||
body = file.read()
|
||||
|
||||
# clobber the encoding from the content-type, or default to utf-8
|
||||
# XXX: this overrides any <meta/> or XML charset headers in the body
|
||||
# which may pose problems, but so far seems to work okay.
|
||||
match = re.match(r'.*; *charset=(.*?)(;|$)', media_info['media_type'], re.I)
|
||||
match = re.match(
|
||||
r'.*; *charset="?(.*?)"?(;|$)',
|
||||
media_info['media_type'],
|
||||
re.I
|
||||
)
|
||||
encoding = match.group(1) if match else "utf-8"
|
||||
|
||||
og = decode_and_calc_og(body, media_info['uri'], encoding)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue