Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.
This commit is contained in:
Dan Brown 2021-10-28 15:54:00 +01:00
parent 3166541002
commit 4f55fe2f8e
No known key found for this signature in database
GPG Key ID: 46D9F943C24A2EF9
3 changed files with 62 additions and 60 deletions

View File

@ -86,30 +86,13 @@ class PageContent
$body = $container->childNodes->item(0);
$childNodes = $body->childNodes;
$xPath = new DOMXPath($doc);
$imageRepo = app()->make(ImageRepo::class);
// Get all img elements with image data blobs
$imageNodes = $xPath->query('//img[contains(@src, \'data:image\')]');
foreach ($imageNodes as $imageNode) {
$imageSrc = $imageNode->getAttribute('src');
[$dataDefinition, $base64ImageData] = explode(',', $imageSrc, 2);
$extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? 'png');
// Validate extension
if (!$imageRepo->imageExtensionSupported($extension)) {
$imageNode->setAttribute('src', '');
continue;
}
// Save image from data with a random name
$imageName = 'embedded-image-' . Str::random(8) . '.' . $extension;
try {
$image = $imageRepo->saveNewFromData($imageName, base64_decode($base64ImageData), 'gallery', $this->page->id);
$imageNode->setAttribute('src', $image->url);
} catch (ImageUploadException $exception) {
$imageNode->setAttribute('src', '');
}
$newUrl = $this->base64ImageUriToUploadedImageUrl($imageSrc);
$imageNode->setAttribute('src', $newUrl);
}
// Generate inner html as a string
@ -126,34 +109,57 @@ class PageContent
*/
protected function extractBase64ImagesFromMarkdown(string $markdown)
{
$imageRepo = app()->make(ImageRepo::class);
$matches = [];
preg_match_all('/!\[.*?]\(.*?(data:image\/.*?)[)"\s]/', $markdown, $matches);
foreach ($matches[1] as $base64Match) {
[$dataDefinition, $base64ImageData] = explode(',', $base64Match, 2);
$extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? 'png');
// Validate extension
if (!$imageRepo->imageExtensionSupported($extension)) {
$markdown = str_replace($base64Match, '', $markdown);
continue;
}
// Save image from data with a random name
$imageName = 'embedded-image-' . Str::random(8) . '.' . $extension;
try {
$image = $imageRepo->saveNewFromData($imageName, base64_decode($base64ImageData), 'gallery', $this->page->id);
$markdown = str_replace($base64Match, $image->url, $markdown);
} catch (ImageUploadException $exception) {
$markdown = str_replace($base64Match, '', $markdown);
}
$newUrl = $this->base64ImageUriToUploadedImageUrl($base64Match);
$markdown = str_replace($base64Match, $newUrl, $markdown);
}
return $markdown;
}
/**
* Parse the given base64 image URI and return the URL to the created image instance.
* Returns an empty string if the parsed URI is invalid or causes an error upon upload.
*/
protected function base64ImageUriToUploadedImageUrl(string $uri): string
{
$imageRepo = app()->make(ImageRepo::class);
$imageInfo = $this->parseBase64ImageUri($uri);
// Validate extension and content
if (empty($imageInfo['data']) || !$imageRepo->imageExtensionSupported($imageInfo['extension'])) {
return '';
}
// Save image from data with a random name
$imageName = 'embedded-image-' . Str::random(8) . '.' . $imageInfo['extension'];
try {
$image = $imageRepo->saveNewFromData($imageName, $imageInfo['data'], 'gallery', $this->page->id);
} catch (ImageUploadException $exception) {
return '';
}
return $image->url;
}
/**
* Parse a base64 image URI into the data and extension.
* @return array{extension: array, data: string}
*/
protected function parseBase64ImageUri(string $uri): array
{
[$dataDefinition, $base64ImageData] = explode(',', $uri, 2);
$extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? '');
return [
'extension' => $extension,
'data' => base64_decode($base64ImageData) ?: '',
];
}
/**
* Formats a page's html to be tagged correctly within the system.
*/

View File

@ -35,10 +35,12 @@ class ImageRepo
/**
* Check if the given image extension is supported by BookStack.
* The extension must not be altered in this function. This check should provide a guarantee
* that the provided extension is safe to use for the image to be saved.
*/
public function imageExtensionSupported(string $extension): bool
{
return in_array(trim($extension, ". \t\n\r\0\x0B"), static::$supportedExtensions);
return in_array($extension, static::$supportedExtensions);
}
/**

View File

@ -596,31 +596,25 @@ class PageContentTest extends TestCase
public function test_base64_images_within_html_blanked_if_not_supported_extension_for_extract()
{
$this->asEditor();
$page = Page::query()->first();
// Relevant to https://github.com/BookStackApp/BookStack/issues/3010 and other cases
$extensions = [
'jiff', 'pngr', 'png ', ' png', '.png', 'png.', 'p.ng', ',png',
'data:image/png', ',data:image/png',
];
$this->put($page->getUrl(), [
'name' => $page->name, 'summary' => '',
'html' => '<p>test<img src="data:image/jiff;base64,' . $this->base64Jpeg . '"/></p>',
]);
foreach ($extensions as $extension) {
$this->asEditor();
$page = Page::query()->first();
$page->refresh();
$this->assertStringContainsString('<img src=""', $page->html);
}
$this->put($page->getUrl(), [
'name' => $page->name, 'summary' => '',
'html' => '<p>test<img src="data:image/' . $extension . ';base64,' . $this->base64Jpeg . '"/></p>',
]);
// Relevant to https://github.com/BookStackApp/BookStack/issues/3010
public function test_base64_images_within_html_blanked_if_extension_incorrect_but_prefix_matches_correct_extension()
{
$this->asEditor();
$page = Page::query()->first();
$page->refresh();
$this->assertStringContainsString('<img src=""', $page->html);
}
$this->put($page->getUrl(), [
'name' => $page->name, 'summary' => '',
'html' => '<p>test<img src="data:image/pngr;base64,' . $this->base64Jpeg . '"/></p>',
]);
$page->refresh();
$this->assertStringContainsString('<img src=""', $page->html);
}
public function test_base64_images_get_extracted_from_markdown_page_content()