Prevented PCRE limit issues in markdown base64 extraction

For #3249
This commit is contained in:
Dan Brown 2022-02-06 07:51:38 +00:00
parent 43f32f6d5a
commit d29a2a647a
No known key found for this signature in database
GPG Key ID: 46D9F943C24A2EF9
2 changed files with 57 additions and 4 deletions

View File

@ -109,15 +109,35 @@ class PageContent
/**
* Convert all inline base64 content to uploaded image files.
* Regex is used to locate the start of data-uri definitions then
* manual looping over content is done to parse the whole data uri.
* Attempting to capture the whole data uri using regex can cause PHP
* PCRE limits to be hit with larger, multi-MB, files.
*/
protected function extractBase64ImagesFromMarkdown(string $markdown)
{
$matches = [];
preg_match_all('/!\[.*?]\(.*?(data:image\/.*?)[)"\s]/', $markdown, $matches);
$contentLength = strlen($markdown);
$replacements = [];
preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $base64Match) {
$newUrl = $this->base64ImageUriToUploadedImageUrl($base64Match);
$markdown = str_replace($base64Match, $newUrl, $markdown);
foreach ($matches[1] as $base64MatchPair) {
[$dataUri, $index] = $base64MatchPair;
for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) {
$char = $markdown[$i];
if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') {
break;
}
$dataUri .= $char;
}
$newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri);
$replacements[] = [$dataUri, $newUrl];
}
foreach ($replacements as [$dataUri, $newUrl]) {
$markdown = str_replace($dataUri, $newUrl, $markdown);
}
return $markdown;

View File

@ -657,6 +657,39 @@ class PageContentTest extends TestCase
$this->deleteImage($imagePath);
}
public function test_markdown_base64_extract_not_limited_by_pcre_limits()
{
$pcreBacktrackLimit = ini_get("pcre.backtrack_limit");
$pcreRecursionLimit = ini_get("pcre.recursion_limit");
$this->asEditor();
$page = Page::query()->first();
ini_set("pcre.backtrack_limit", "500");
ini_set("pcre.recursion_limit", "500");
$content = str_repeat('a', 5000);
$base64Content = base64_encode($content);
$this->put($page->getUrl(), [
'name' => $page->name, 'summary' => '',
'markdown' => 'test ![test](data:image/jpeg;base64,' . $base64Content . ') ![test](data:image/jpeg;base64,' . $base64Content . ')',
]);
$page->refresh();
$this->assertStringMatchesFormat('<p%A>test <img src="http://localhost/uploads/images/gallery/%A.jpeg" alt="test"> <img src="http://localhost/uploads/images/gallery/%A.jpeg" alt="test">%A</p>%A', $page->html);
$matches = [];
preg_match('/src="http:\/\/localhost(.*?)"/', $page->html, $matches);
$imagePath = $matches[1];
$imageFile = public_path($imagePath);
$this->assertEquals($content, file_get_contents($imageFile));
$this->deleteImage($imagePath);
ini_set("pcre.backtrack_limit", $pcreBacktrackLimit);
ini_set("pcre.recursion_limit", $pcreRecursionLimit);
}
public function test_base64_images_within_markdown_blanked_if_not_supported_extension_for_extract()
{
$this->asEditor();