diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index 2c9586265..fbee2ccb6 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -1,17 +1,16 @@ saveBase64Images($this->page, $html); + $html = $this->extractBase64Images($this->page, $html); $this->page->html = $this->formatHtml($html); $this->page->text = $this->toPlainText(); $this->page->markdown = ''; @@ -69,45 +68,40 @@ class PageContent /** * Convert all base64 image data to saved images */ - public function saveBase64Images(Page $page, string $htmlText): string + public function extractBase64Images(Page $page, string $htmlText): string { - if ($htmlText == '') { + if (empty($htmlText) || strpos($htmlText, 'data:image') === false) { return $htmlText; } - libxml_use_internal_errors(true); - $doc = new DOMDocument(); - $doc->loadHTML(mb_convert_encoding($htmlText, 'HTML-ENTITIES', 'UTF-8')); + $doc = $this->loadDocumentFromHtml($htmlText); $container = $doc->documentElement; $body = $container->childNodes->item(0); $childNodes = $body->childNodes; $xPath = new DOMXPath($doc); + $imageRepo = app()->make(ImageRepo::class); + $allowedExtensions = ['jpg', 'jpeg', 'png', 'gif', 'webp']; // Get all img elements with image data blobs $imageNodes = $xPath->query('//img[contains(@src, \'data:image\')]'); - foreach($imageNodes as $imageNode) { + foreach ($imageNodes as $imageNode) { $imageSrc = $imageNode->getAttribute('src'); + [$dataDefinition, $base64ImageData] = explode(',', $imageSrc, 2); + $extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? 'png'); - # Parse base64 data - $result = preg_match('"data:image/[a-zA-Z]*(;base64,[a-zA-Z0-9+/\\= ]*)"', $imageSrc, $matches); + // Validate extension + if (!in_array($extension, $allowedExtensions)) { + $imageNode->setAttribute('src', ''); + continue; + } - if($result === 1) { - $base64ImageData = $matches[1]; - - $image = new Image(); - $imageService = app()->make(ImageService::class); - $permissionService = app(PermissionService::class); - $imageRepo = new ImageRepo(new Image(), $imageService, $permissionService, $page); - - # Use existing saveDrawing method used for Drawio diagrams - $image = $imageRepo->saveDrawing($base64ImageData, $page->id); - - // Create a new img element with the saved image URI - $newNode = $doc->createElement('img'); - $newNode->setAttribute('src', $image->path); - - // Replace the old img element - $imageNode->parentNode->replaceChild($newNode, $imageNode); + // Save image from data with a random name + $imageName = 'embedded-image-' . Str::random(8) . '.' . $extension; + try { + $image = $imageRepo->saveNewFromData($imageName, base64_decode($base64ImageData), 'gallery', $page->id); + $imageNode->setAttribute('src', $image->path); + } catch (ImageUploadException $exception) { + $imageNode->setAttribute('src', ''); } } @@ -125,14 +119,11 @@ class PageContent */ protected function formatHtml(string $htmlText): string { - if ($htmlText == '') { + if (empty($htmlText)) { return $htmlText; } - libxml_use_internal_errors(true); - $doc = new DOMDocument(); - $doc->loadHTML(mb_convert_encoding($htmlText, 'HTML-ENTITIES', 'UTF-8')); - + $doc = $this->loadDocumentFromHtml($htmlText); $container = $doc->documentElement; $body = $container->childNodes->item(0); $childNodes = $body->childNodes; @@ -171,7 +162,7 @@ class PageContent protected function updateLinks(DOMXPath $xpath, string $old, string $new) { $old = str_replace('"', '', $old); - $matchingLinks = $xpath->query('//body//*//*[@href="'.$old.'"]'); + $matchingLinks = $xpath->query('//body//*//*[@href="' . $old . '"]'); foreach ($matchingLinks as $domElem) { $domElem->setAttribute('href', $new); } @@ -224,7 +215,7 @@ class PageContent /** * Render the page for viewing */ - public function render(bool $blankIncludes = false) : string + public function render(bool $blankIncludes = false): string { $content = $this->page->html; @@ -250,9 +241,7 @@ class PageContent return []; } - libxml_use_internal_errors(true); - $doc = new DOMDocument(); - $doc->loadHTML(mb_convert_encoding($htmlContent, 'HTML-ENTITIES', 'UTF-8')); + $doc = $this->loadDocumentFromHtml($htmlContent); $xPath = new DOMXPath($doc); $headers = $xPath->query("//h1|//h2|//h3|//h4|//h5|//h6"); @@ -292,7 +281,7 @@ class PageContent /** * Remove any page include tags within the given HTML. */ - protected function blankPageIncludes(string $html) : string + protected function blankPageIncludes(string $html): string { return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html); } @@ -300,7 +289,7 @@ class PageContent /** * Parse any include tags "{{@#section}}" to be part of the page. */ - protected function parsePageIncludes(string $html) : string + protected function parsePageIncludes(string $html): string { $matches = []; preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches); @@ -343,9 +332,7 @@ class PageContent protected function fetchSectionOfPage(Page $page, string $sectionId): string { $topLevelTags = ['table', 'ul', 'ol']; - $doc = new DOMDocument(); - libxml_use_internal_errors(true); - $doc->loadHTML(mb_convert_encoding(''.$page->html.'', 'HTML-ENTITIES', 'UTF-8')); + $doc = $this->loadDocumentFromHtml('' . $page->html . ''); // Search included content for the id given and blank out if not exists. $matchingElem = $doc->getElementById($sectionId); @@ -368,4 +355,15 @@ class PageContent return $innerContent; } + + /** + * Create and load a DOMDocument from the given html content. + */ + protected function loadDocumentFromHtml(string $html): DOMDocument + { + libxml_use_internal_errors(true); + $doc = new DOMDocument(); + $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); + return $doc; + } } diff --git a/app/Http/Controllers/Api/PageApiController.php b/app/Http/Controllers/Api/PageApiController.php index a6db05833..fd4a16eff 100644 --- a/app/Http/Controllers/Api/PageApiController.php +++ b/app/Http/Controllers/Api/PageApiController.php @@ -60,6 +60,8 @@ class PageApiController extends ApiController * * Any HTML content provided should be kept to a single-block depth of plain HTML * elements to remain compatible with the BookStack front-end and editors. + * Any images included via base64 data URIs will be extracted and saved as gallery + * images against the page during upload. */ public function create(Request $request) { diff --git a/app/Uploads/ImageRepo.php b/app/Uploads/ImageRepo.php index e6f766824..ef249c58b 100644 --- a/app/Uploads/ImageRepo.php +++ b/app/Uploads/ImageRepo.php @@ -130,6 +130,17 @@ class ImageRepo return $image; } + /** + * Save a new image from an existing image data string. + * @throws ImageUploadException + */ + public function saveNewFromData(string $imageName, string $imageData, string $type, int $uploadedTo = 0) + { + $image = $this->imageService->saveNew($imageName, $imageData, $type, $uploadedTo); + $this->loadThumbs($image); + return $image; + } + /** * Save a drawing the the database. * @throws ImageUploadException diff --git a/tests/Entity/PageContentTest.php b/tests/Entity/PageContentTest.php index 6d5200794..670557b0c 100644 --- a/tests/Entity/PageContentTest.php +++ b/tests/Entity/PageContentTest.php @@ -3,9 +3,13 @@ use BookStack\Entities\Tools\PageContent; use BookStack\Entities\Models\Page; use Tests\TestCase; +use Tests\Uploads\UsesImages; class PageContentTest extends TestCase { + use UsesImages; + + protected $base64Jpeg = '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k='; public function test_page_includes() { @@ -479,4 +483,64 @@ class PageContentTest extends TestCase $pageView = $this->get($page->getUrl()); $pageView->assertElementExists('.page-content p > s'); } + + public function test_base64_images_get_extracted_from_page_content() + { + $this->asEditor(); + $page = Page::query()->first(); + + $this->put($page->getUrl(), [ + 'name' => $page->name, 'summary' => '', + 'html' => '

test

', + ]); + + $page->refresh(); + $this->assertStringMatchesFormat('%Atest%A

%A', $page->html); + + $matches = []; + preg_match('/src="(.*?)"/', $page->html, $matches); + $imagePath = $matches[1]; + $imageFile = public_path($imagePath); + $this->assertEquals(base64_decode($this->base64Jpeg), file_get_contents($imageFile)); + + $this->deleteImage($imagePath); + } + + public function test_base64_images_get_extracted_when_containing_whitespace() + { + $this->asEditor(); + $page = Page::query()->first(); + + $base64PngWithWhitespace = "iVBORw0KGg\noAAAANSUhE\tUgAAAAEAAAA BCA YAAAAfFcSJAAA\n\t ACklEQVR4nGMAAQAABQAB"; + $base64PngWithoutWhitespace = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQAB'; + $this->put($page->getUrl(), [ + 'name' => $page->name, 'summary' => '', + 'html' => '

test

', + ]); + + $page->refresh(); + $this->assertStringMatchesFormat('%Atest%A

%A', $page->html); + + $matches = []; + preg_match('/src="(.*?)"/', $page->html, $matches); + $imagePath = $matches[1]; + $imageFile = public_path($imagePath); + $this->assertEquals(base64_decode($base64PngWithoutWhitespace), file_get_contents($imageFile)); + + $this->deleteImage($imagePath); + } + + public function test_base64_images_blanked_if_not_supported_extension_for_extract() + { + $this->asEditor(); + $page = Page::query()->first(); + + $this->put($page->getUrl(), [ + 'name' => $page->name, 'summary' => '', + 'html' => '

test

', + ]); + + $page->refresh(); + $this->assertStringContainsString('html); + } }