Fix potential thumbnail memory leaks. (#12932)

This commit is contained in:
Erik Johnston 2022-06-01 11:57:49 +01:00 committed by GitHub
parent 2e8763ec96
commit 5949ab86f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 204 additions and 137 deletions

1
changelog.d/12932.bugfix Normal file
View File

@ -0,0 +1 @@
Fix potential memory leak when generating thumbnails.

View File

@ -587,15 +587,16 @@ class MediaRepository:
)
return None
t_byte_source = await defer_to_thread(
self.hs.get_reactor(),
self._generate_thumbnail,
thumbnailer,
t_width,
t_height,
t_method,
t_type,
)
with thumbnailer:
t_byte_source = await defer_to_thread(
self.hs.get_reactor(),
self._generate_thumbnail,
thumbnailer,
t_width,
t_height,
t_method,
t_type,
)
if t_byte_source:
try:
@ -657,15 +658,16 @@ class MediaRepository:
)
return None
t_byte_source = await defer_to_thread(
self.hs.get_reactor(),
self._generate_thumbnail,
thumbnailer,
t_width,
t_height,
t_method,
t_type,
)
with thumbnailer:
t_byte_source = await defer_to_thread(
self.hs.get_reactor(),
self._generate_thumbnail,
thumbnailer,
t_width,
t_height,
t_method,
t_type,
)
if t_byte_source:
try:
@ -749,119 +751,134 @@ class MediaRepository:
)
return None
m_width = thumbnailer.width
m_height = thumbnailer.height
with thumbnailer:
m_width = thumbnailer.width
m_height = thumbnailer.height
if m_width * m_height >= self.max_image_pixels:
logger.info(
"Image too large to thumbnail %r x %r > %r",
m_width,
m_height,
self.max_image_pixels,
)
return None
if thumbnailer.transpose_method is not None:
m_width, m_height = await defer_to_thread(
self.hs.get_reactor(), thumbnailer.transpose
)
# We deduplicate the thumbnail sizes by ignoring the cropped versions if
# they have the same dimensions of a scaled one.
thumbnails: Dict[Tuple[int, int, str], str] = {}
for requirement in requirements:
if requirement.method == "crop":
thumbnails.setdefault(
(requirement.width, requirement.height, requirement.media_type),
requirement.method,
if m_width * m_height >= self.max_image_pixels:
logger.info(
"Image too large to thumbnail %r x %r > %r",
m_width,
m_height,
self.max_image_pixels,
)
elif requirement.method == "scale":
t_width, t_height = thumbnailer.aspect(
requirement.width, requirement.height
return None
if thumbnailer.transpose_method is not None:
m_width, m_height = await defer_to_thread(
self.hs.get_reactor(), thumbnailer.transpose
)
t_width = min(m_width, t_width)
t_height = min(m_height, t_height)
thumbnails[
(t_width, t_height, requirement.media_type)
] = requirement.method
# Now we generate the thumbnails for each dimension, store it
for (t_width, t_height, t_type), t_method in thumbnails.items():
# Generate the thumbnail
if t_method == "crop":
t_byte_source = await defer_to_thread(
self.hs.get_reactor(), thumbnailer.crop, t_width, t_height, t_type
)
elif t_method == "scale":
t_byte_source = await defer_to_thread(
self.hs.get_reactor(), thumbnailer.scale, t_width, t_height, t_type
)
else:
logger.error("Unrecognized method: %r", t_method)
continue
if not t_byte_source:
continue
file_info = FileInfo(
server_name=server_name,
file_id=file_id,
url_cache=url_cache,
thumbnail=ThumbnailInfo(
width=t_width,
height=t_height,
method=t_method,
type=t_type,
),
)
with self.media_storage.store_into_file(file_info) as (f, fname, finish):
try:
await self.media_storage.write_to_file(t_byte_source, f)
await finish()
finally:
t_byte_source.close()
t_len = os.path.getsize(fname)
# Write to database
if server_name:
# Multiple remote media download requests can race (when
# using multiple media repos), so this may throw a violation
# constraint exception. If it does we'll delete the newly
# generated thumbnail from disk (as we're in the ctx
# manager).
#
# However: we've already called `finish()` so we may have
# also written to the storage providers. This is preferable
# to the alternative where we call `finish()` *after* this,
# where we could end up having an entry in the DB but fail
# to write the files to the storage providers.
try:
await self.store.store_remote_media_thumbnail(
server_name,
media_id,
file_id,
t_width,
t_height,
t_type,
t_method,
t_len,
)
except Exception as e:
thumbnail_exists = await self.store.get_remote_media_thumbnail(
server_name,
media_id,
t_width,
t_height,
t_type,
)
if not thumbnail_exists:
raise e
else:
await self.store.store_local_thumbnail(
media_id, t_width, t_height, t_type, t_method, t_len
# We deduplicate the thumbnail sizes by ignoring the cropped versions if
# they have the same dimensions of a scaled one.
thumbnails: Dict[Tuple[int, int, str], str] = {}
for requirement in requirements:
if requirement.method == "crop":
thumbnails.setdefault(
(requirement.width, requirement.height, requirement.media_type),
requirement.method,
)
elif requirement.method == "scale":
t_width, t_height = thumbnailer.aspect(
requirement.width, requirement.height
)
t_width = min(m_width, t_width)
t_height = min(m_height, t_height)
thumbnails[
(t_width, t_height, requirement.media_type)
] = requirement.method
# Now we generate the thumbnails for each dimension, store it
for (t_width, t_height, t_type), t_method in thumbnails.items():
# Generate the thumbnail
if t_method == "crop":
t_byte_source = await defer_to_thread(
self.hs.get_reactor(),
thumbnailer.crop,
t_width,
t_height,
t_type,
)
elif t_method == "scale":
t_byte_source = await defer_to_thread(
self.hs.get_reactor(),
thumbnailer.scale,
t_width,
t_height,
t_type,
)
else:
logger.error("Unrecognized method: %r", t_method)
continue
if not t_byte_source:
continue
file_info = FileInfo(
server_name=server_name,
file_id=file_id,
url_cache=url_cache,
thumbnail=ThumbnailInfo(
width=t_width,
height=t_height,
method=t_method,
type=t_type,
),
)
with self.media_storage.store_into_file(file_info) as (
f,
fname,
finish,
):
try:
await self.media_storage.write_to_file(t_byte_source, f)
await finish()
finally:
t_byte_source.close()
t_len = os.path.getsize(fname)
# Write to database
if server_name:
# Multiple remote media download requests can race (when
# using multiple media repos), so this may throw a violation
# constraint exception. If it does we'll delete the newly
# generated thumbnail from disk (as we're in the ctx
# manager).
#
# However: we've already called `finish()` so we may have
# also written to the storage providers. This is preferable
# to the alternative where we call `finish()` *after* this,
# where we could end up having an entry in the DB but fail
# to write the files to the storage providers.
try:
await self.store.store_remote_media_thumbnail(
server_name,
media_id,
file_id,
t_width,
t_height,
t_type,
t_method,
t_len,
)
except Exception as e:
thumbnail_exists = (
await self.store.get_remote_media_thumbnail(
server_name,
media_id,
t_width,
t_height,
t_type,
)
)
if not thumbnail_exists:
raise e
else:
await self.store.store_local_thumbnail(
media_id, t_width, t_height, t_type, t_method, t_len
)
return {"width": m_width, "height": m_height}

View File

@ -14,7 +14,8 @@
# limitations under the License.
import logging
from io import BytesIO
from typing import Tuple
from types import TracebackType
from typing import Optional, Tuple, Type
from PIL import Image
@ -45,6 +46,9 @@ class Thumbnailer:
Image.MAX_IMAGE_PIXELS = max_image_pixels
def __init__(self, input_path: str):
# Have we closed the image?
self._closed = False
try:
self.image = Image.open(input_path)
except OSError as e:
@ -89,7 +93,8 @@ class Thumbnailer:
# Safety: `transpose` takes an int rather than e.g. an IntEnum.
# self.transpose_method is set above to be a value in
# EXIF_TRANSPOSE_MAPPINGS, and that only contains correct values.
self.image = self.image.transpose(self.transpose_method) # type: ignore[arg-type]
with self.image:
self.image = self.image.transpose(self.transpose_method) # type: ignore[arg-type]
self.width, self.height = self.image.size
self.transpose_method = None
# We don't need EXIF any more
@ -122,9 +127,11 @@ class Thumbnailer:
# If the image has transparency, use RGBA instead.
if self.image.mode in ["1", "L", "P"]:
if self.image.info.get("transparency", None) is not None:
self.image = self.image.convert("RGBA")
with self.image:
self.image = self.image.convert("RGBA")
else:
self.image = self.image.convert("RGB")
with self.image:
self.image = self.image.convert("RGB")
return self.image.resize((width, height), Image.ANTIALIAS)
def scale(self, width: int, height: int, output_type: str) -> BytesIO:
@ -133,8 +140,8 @@ class Thumbnailer:
Returns:
BytesIO: the bytes of the encoded image ready to be written to disk
"""
scaled = self._resize(width, height)
return self._encode_image(scaled, output_type)
with self._resize(width, height) as scaled:
return self._encode_image(scaled, output_type)
def crop(self, width: int, height: int, output_type: str) -> BytesIO:
"""Rescales and crops the image to the given dimensions preserving
@ -151,18 +158,21 @@ class Thumbnailer:
BytesIO: the bytes of the encoded image ready to be written to disk
"""
if width * self.height > height * self.width:
scaled_width = width
scaled_height = (width * self.height) // self.width
scaled_image = self._resize(width, scaled_height)
crop_top = (scaled_height - height) // 2
crop_bottom = height + crop_top
cropped = scaled_image.crop((0, crop_top, width, crop_bottom))
crop = (0, crop_top, width, crop_bottom)
else:
scaled_width = (height * self.width) // self.height
scaled_image = self._resize(scaled_width, height)
scaled_height = height
crop_left = (scaled_width - width) // 2
crop_right = width + crop_left
cropped = scaled_image.crop((crop_left, 0, crop_right, height))
return self._encode_image(cropped, output_type)
crop = (crop_left, 0, crop_right, height)
with self._resize(scaled_width, scaled_height) as scaled_image:
with scaled_image.crop(crop) as cropped:
return self._encode_image(cropped, output_type)
def _encode_image(self, output_image: Image.Image, output_type: str) -> BytesIO:
output_bytes_io = BytesIO()
@ -171,3 +181,42 @@ class Thumbnailer:
output_image = output_image.convert("RGB")
output_image.save(output_bytes_io, fmt, quality=80)
return output_bytes_io
def close(self) -> None:
"""Closes the underlying image file.
Once closed no other functions can be called.
Can be called multiple times.
"""
if self._closed:
return
self._closed = True
# Since we run this on the finalizer then we need to handle `__init__`
# raising an exception before it can define `self.image`.
image = getattr(self, "image", None)
if image is None:
return
image.close()
def __enter__(self) -> "Thumbnailer":
"""Make `Thumbnailer` a context manager that calls `close` on
`__exit__`.
"""
return self
def __exit__(
self,
type: Optional[Type[BaseException]],
value: Optional[BaseException],
traceback: Optional[TracebackType],
) -> None:
self.close()
def __del__(self) -> None:
# Make sure we actually do close the image, rather than leak data.
self.close()