mirror of
https://github.com/BookStackApp/BookStack.git
synced 2024-10-01 05:36:00 +00:00
Reviewed base64 image upload support
- Added test cases to cover. - Altered parsing logic to be a little less reliant on regex. - Added new iamge repo method for creating from data. - Added extension validation and additional type support. - Done some cleanup of common operations within PageContent. - Added message to API docs/method to mention image usage. For #2700 and #2631.
This commit is contained in:
parent
40ca50e44f
commit
39928e1c63
@ -1,17 +1,16 @@
|
|||||||
<?php namespace BookStack\Entities\Tools;
|
<?php namespace BookStack\Entities\Tools;
|
||||||
|
|
||||||
use BookStack\Auth\Permissions\PermissionService;
|
|
||||||
use BookStack\Entities\Models\Page;
|
use BookStack\Entities\Models\Page;
|
||||||
use BookStack\Entities\Tools\Markdown\CustomStrikeThroughExtension;
|
use BookStack\Entities\Tools\Markdown\CustomStrikeThroughExtension;
|
||||||
|
use BookStack\Exceptions\ImageUploadException;
|
||||||
use BookStack\Facades\Theme;
|
use BookStack\Facades\Theme;
|
||||||
use BookStack\Theming\ThemeEvents;
|
use BookStack\Theming\ThemeEvents;
|
||||||
use BookStack\Util\HtmlContentFilter;
|
use BookStack\Util\HtmlContentFilter;
|
||||||
use BookStack\Uploads\Image;
|
|
||||||
use BookStack\Uploads\ImageRepo;
|
use BookStack\Uploads\ImageRepo;
|
||||||
use BookStack\Uploads\ImageService;
|
|
||||||
use DOMDocument;
|
use DOMDocument;
|
||||||
use DOMNodeList;
|
use DOMNodeList;
|
||||||
use DOMXPath;
|
use DOMXPath;
|
||||||
|
use Illuminate\Support\Str;
|
||||||
use League\CommonMark\CommonMarkConverter;
|
use League\CommonMark\CommonMarkConverter;
|
||||||
use League\CommonMark\Environment;
|
use League\CommonMark\Environment;
|
||||||
use League\CommonMark\Extension\Table\TableExtension;
|
use League\CommonMark\Extension\Table\TableExtension;
|
||||||
@ -35,7 +34,7 @@ class PageContent
|
|||||||
*/
|
*/
|
||||||
public function setNewHTML(string $html)
|
public function setNewHTML(string $html)
|
||||||
{
|
{
|
||||||
$html = $this->saveBase64Images($this->page, $html);
|
$html = $this->extractBase64Images($this->page, $html);
|
||||||
$this->page->html = $this->formatHtml($html);
|
$this->page->html = $this->formatHtml($html);
|
||||||
$this->page->text = $this->toPlainText();
|
$this->page->text = $this->toPlainText();
|
||||||
$this->page->markdown = '';
|
$this->page->markdown = '';
|
||||||
@ -69,45 +68,40 @@ class PageContent
|
|||||||
/**
|
/**
|
||||||
* Convert all base64 image data to saved images
|
* Convert all base64 image data to saved images
|
||||||
*/
|
*/
|
||||||
public function saveBase64Images(Page $page, string $htmlText): string
|
public function extractBase64Images(Page $page, string $htmlText): string
|
||||||
{
|
{
|
||||||
if ($htmlText == '') {
|
if (empty($htmlText) || strpos($htmlText, 'data:image') === false) {
|
||||||
return $htmlText;
|
return $htmlText;
|
||||||
}
|
}
|
||||||
|
|
||||||
libxml_use_internal_errors(true);
|
$doc = $this->loadDocumentFromHtml($htmlText);
|
||||||
$doc = new DOMDocument();
|
|
||||||
$doc->loadHTML(mb_convert_encoding($htmlText, 'HTML-ENTITIES', 'UTF-8'));
|
|
||||||
$container = $doc->documentElement;
|
$container = $doc->documentElement;
|
||||||
$body = $container->childNodes->item(0);
|
$body = $container->childNodes->item(0);
|
||||||
$childNodes = $body->childNodes;
|
$childNodes = $body->childNodes;
|
||||||
$xPath = new DOMXPath($doc);
|
$xPath = new DOMXPath($doc);
|
||||||
|
$imageRepo = app()->make(ImageRepo::class);
|
||||||
|
$allowedExtensions = ['jpg', 'jpeg', 'png', 'gif', 'webp'];
|
||||||
|
|
||||||
// Get all img elements with image data blobs
|
// Get all img elements with image data blobs
|
||||||
$imageNodes = $xPath->query('//img[contains(@src, \'data:image\')]');
|
$imageNodes = $xPath->query('//img[contains(@src, \'data:image\')]');
|
||||||
foreach($imageNodes as $imageNode) {
|
foreach ($imageNodes as $imageNode) {
|
||||||
$imageSrc = $imageNode->getAttribute('src');
|
$imageSrc = $imageNode->getAttribute('src');
|
||||||
|
[$dataDefinition, $base64ImageData] = explode(',', $imageSrc, 2);
|
||||||
|
$extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? 'png');
|
||||||
|
|
||||||
# Parse base64 data
|
// Validate extension
|
||||||
$result = preg_match('"data:image/[a-zA-Z]*(;base64,[a-zA-Z0-9+/\\= ]*)"', $imageSrc, $matches);
|
if (!in_array($extension, $allowedExtensions)) {
|
||||||
|
$imageNode->setAttribute('src', '');
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if($result === 1) {
|
// Save image from data with a random name
|
||||||
$base64ImageData = $matches[1];
|
$imageName = 'embedded-image-' . Str::random(8) . '.' . $extension;
|
||||||
|
try {
|
||||||
$image = new Image();
|
$image = $imageRepo->saveNewFromData($imageName, base64_decode($base64ImageData), 'gallery', $page->id);
|
||||||
$imageService = app()->make(ImageService::class);
|
$imageNode->setAttribute('src', $image->path);
|
||||||
$permissionService = app(PermissionService::class);
|
} catch (ImageUploadException $exception) {
|
||||||
$imageRepo = new ImageRepo(new Image(), $imageService, $permissionService, $page);
|
$imageNode->setAttribute('src', '');
|
||||||
|
|
||||||
# Use existing saveDrawing method used for Drawio diagrams
|
|
||||||
$image = $imageRepo->saveDrawing($base64ImageData, $page->id);
|
|
||||||
|
|
||||||
// Create a new img element with the saved image URI
|
|
||||||
$newNode = $doc->createElement('img');
|
|
||||||
$newNode->setAttribute('src', $image->path);
|
|
||||||
|
|
||||||
// Replace the old img element
|
|
||||||
$imageNode->parentNode->replaceChild($newNode, $imageNode);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -125,14 +119,11 @@ class PageContent
|
|||||||
*/
|
*/
|
||||||
protected function formatHtml(string $htmlText): string
|
protected function formatHtml(string $htmlText): string
|
||||||
{
|
{
|
||||||
if ($htmlText == '') {
|
if (empty($htmlText)) {
|
||||||
return $htmlText;
|
return $htmlText;
|
||||||
}
|
}
|
||||||
|
|
||||||
libxml_use_internal_errors(true);
|
$doc = $this->loadDocumentFromHtml($htmlText);
|
||||||
$doc = new DOMDocument();
|
|
||||||
$doc->loadHTML(mb_convert_encoding($htmlText, 'HTML-ENTITIES', 'UTF-8'));
|
|
||||||
|
|
||||||
$container = $doc->documentElement;
|
$container = $doc->documentElement;
|
||||||
$body = $container->childNodes->item(0);
|
$body = $container->childNodes->item(0);
|
||||||
$childNodes = $body->childNodes;
|
$childNodes = $body->childNodes;
|
||||||
@ -171,7 +162,7 @@ class PageContent
|
|||||||
protected function updateLinks(DOMXPath $xpath, string $old, string $new)
|
protected function updateLinks(DOMXPath $xpath, string $old, string $new)
|
||||||
{
|
{
|
||||||
$old = str_replace('"', '', $old);
|
$old = str_replace('"', '', $old);
|
||||||
$matchingLinks = $xpath->query('//body//*//*[@href="'.$old.'"]');
|
$matchingLinks = $xpath->query('//body//*//*[@href="' . $old . '"]');
|
||||||
foreach ($matchingLinks as $domElem) {
|
foreach ($matchingLinks as $domElem) {
|
||||||
$domElem->setAttribute('href', $new);
|
$domElem->setAttribute('href', $new);
|
||||||
}
|
}
|
||||||
@ -224,7 +215,7 @@ class PageContent
|
|||||||
/**
|
/**
|
||||||
* Render the page for viewing
|
* Render the page for viewing
|
||||||
*/
|
*/
|
||||||
public function render(bool $blankIncludes = false) : string
|
public function render(bool $blankIncludes = false): string
|
||||||
{
|
{
|
||||||
$content = $this->page->html;
|
$content = $this->page->html;
|
||||||
|
|
||||||
@ -250,9 +241,7 @@ class PageContent
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
libxml_use_internal_errors(true);
|
$doc = $this->loadDocumentFromHtml($htmlContent);
|
||||||
$doc = new DOMDocument();
|
|
||||||
$doc->loadHTML(mb_convert_encoding($htmlContent, 'HTML-ENTITIES', 'UTF-8'));
|
|
||||||
$xPath = new DOMXPath($doc);
|
$xPath = new DOMXPath($doc);
|
||||||
$headers = $xPath->query("//h1|//h2|//h3|//h4|//h5|//h6");
|
$headers = $xPath->query("//h1|//h2|//h3|//h4|//h5|//h6");
|
||||||
|
|
||||||
@ -292,7 +281,7 @@ class PageContent
|
|||||||
/**
|
/**
|
||||||
* Remove any page include tags within the given HTML.
|
* Remove any page include tags within the given HTML.
|
||||||
*/
|
*/
|
||||||
protected function blankPageIncludes(string $html) : string
|
protected function blankPageIncludes(string $html): string
|
||||||
{
|
{
|
||||||
return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html);
|
return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html);
|
||||||
}
|
}
|
||||||
@ -300,7 +289,7 @@ class PageContent
|
|||||||
/**
|
/**
|
||||||
* Parse any include tags "{{@<page_id>#section}}" to be part of the page.
|
* Parse any include tags "{{@<page_id>#section}}" to be part of the page.
|
||||||
*/
|
*/
|
||||||
protected function parsePageIncludes(string $html) : string
|
protected function parsePageIncludes(string $html): string
|
||||||
{
|
{
|
||||||
$matches = [];
|
$matches = [];
|
||||||
preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches);
|
preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches);
|
||||||
@ -343,9 +332,7 @@ class PageContent
|
|||||||
protected function fetchSectionOfPage(Page $page, string $sectionId): string
|
protected function fetchSectionOfPage(Page $page, string $sectionId): string
|
||||||
{
|
{
|
||||||
$topLevelTags = ['table', 'ul', 'ol'];
|
$topLevelTags = ['table', 'ul', 'ol'];
|
||||||
$doc = new DOMDocument();
|
$doc = $this->loadDocumentFromHtml('<body>' . $page->html . '</body>');
|
||||||
libxml_use_internal_errors(true);
|
|
||||||
$doc->loadHTML(mb_convert_encoding('<body>'.$page->html.'</body>', 'HTML-ENTITIES', 'UTF-8'));
|
|
||||||
|
|
||||||
// Search included content for the id given and blank out if not exists.
|
// Search included content for the id given and blank out if not exists.
|
||||||
$matchingElem = $doc->getElementById($sectionId);
|
$matchingElem = $doc->getElementById($sectionId);
|
||||||
@ -368,4 +355,15 @@ class PageContent
|
|||||||
|
|
||||||
return $innerContent;
|
return $innerContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create and load a DOMDocument from the given html content.
|
||||||
|
*/
|
||||||
|
protected function loadDocumentFromHtml(string $html): DOMDocument
|
||||||
|
{
|
||||||
|
libxml_use_internal_errors(true);
|
||||||
|
$doc = new DOMDocument();
|
||||||
|
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
|
||||||
|
return $doc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -60,6 +60,8 @@ class PageApiController extends ApiController
|
|||||||
*
|
*
|
||||||
* Any HTML content provided should be kept to a single-block depth of plain HTML
|
* Any HTML content provided should be kept to a single-block depth of plain HTML
|
||||||
* elements to remain compatible with the BookStack front-end and editors.
|
* elements to remain compatible with the BookStack front-end and editors.
|
||||||
|
* Any images included via base64 data URIs will be extracted and saved as gallery
|
||||||
|
* images against the page during upload.
|
||||||
*/
|
*/
|
||||||
public function create(Request $request)
|
public function create(Request $request)
|
||||||
{
|
{
|
||||||
|
@ -130,6 +130,17 @@ class ImageRepo
|
|||||||
return $image;
|
return $image;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save a new image from an existing image data string.
|
||||||
|
* @throws ImageUploadException
|
||||||
|
*/
|
||||||
|
public function saveNewFromData(string $imageName, string $imageData, string $type, int $uploadedTo = 0)
|
||||||
|
{
|
||||||
|
$image = $this->imageService->saveNew($imageName, $imageData, $type, $uploadedTo);
|
||||||
|
$this->loadThumbs($image);
|
||||||
|
return $image;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Save a drawing the the database.
|
* Save a drawing the the database.
|
||||||
* @throws ImageUploadException
|
* @throws ImageUploadException
|
||||||
|
@ -3,9 +3,13 @@
|
|||||||
use BookStack\Entities\Tools\PageContent;
|
use BookStack\Entities\Tools\PageContent;
|
||||||
use BookStack\Entities\Models\Page;
|
use BookStack\Entities\Models\Page;
|
||||||
use Tests\TestCase;
|
use Tests\TestCase;
|
||||||
|
use Tests\Uploads\UsesImages;
|
||||||
|
|
||||||
class PageContentTest extends TestCase
|
class PageContentTest extends TestCase
|
||||||
{
|
{
|
||||||
|
use UsesImages;
|
||||||
|
|
||||||
|
protected $base64Jpeg = '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k=';
|
||||||
|
|
||||||
public function test_page_includes()
|
public function test_page_includes()
|
||||||
{
|
{
|
||||||
@ -479,4 +483,64 @@ class PageContentTest extends TestCase
|
|||||||
$pageView = $this->get($page->getUrl());
|
$pageView = $this->get($page->getUrl());
|
||||||
$pageView->assertElementExists('.page-content p > s');
|
$pageView->assertElementExists('.page-content p > s');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function test_base64_images_get_extracted_from_page_content()
|
||||||
|
{
|
||||||
|
$this->asEditor();
|
||||||
|
$page = Page::query()->first();
|
||||||
|
|
||||||
|
$this->put($page->getUrl(), [
|
||||||
|
'name' => $page->name, 'summary' => '',
|
||||||
|
'html' => '<p>test<img src="data:image/jpeg;base64,'.$this->base64Jpeg.'"/></p>',
|
||||||
|
]);
|
||||||
|
|
||||||
|
$page->refresh();
|
||||||
|
$this->assertStringMatchesFormat('%A<p%A>test<img src="/uploads/images/gallery/%A.jpeg">%A</p>%A', $page->html);
|
||||||
|
|
||||||
|
$matches = [];
|
||||||
|
preg_match('/src="(.*?)"/', $page->html, $matches);
|
||||||
|
$imagePath = $matches[1];
|
||||||
|
$imageFile = public_path($imagePath);
|
||||||
|
$this->assertEquals(base64_decode($this->base64Jpeg), file_get_contents($imageFile));
|
||||||
|
|
||||||
|
$this->deleteImage($imagePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function test_base64_images_get_extracted_when_containing_whitespace()
|
||||||
|
{
|
||||||
|
$this->asEditor();
|
||||||
|
$page = Page::query()->first();
|
||||||
|
|
||||||
|
$base64PngWithWhitespace = "iVBORw0KGg\noAAAANSUhE\tUgAAAAEAAAA BCA YAAAAfFcSJAAA\n\t ACklEQVR4nGMAAQAABQAB";
|
||||||
|
$base64PngWithoutWhitespace = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQAB';
|
||||||
|
$this->put($page->getUrl(), [
|
||||||
|
'name' => $page->name, 'summary' => '',
|
||||||
|
'html' => '<p>test<img src="data:image/png;base64,'.$base64PngWithWhitespace.'"/></p>',
|
||||||
|
]);
|
||||||
|
|
||||||
|
$page->refresh();
|
||||||
|
$this->assertStringMatchesFormat('%A<p%A>test<img src="/uploads/images/gallery/%A.png">%A</p>%A', $page->html);
|
||||||
|
|
||||||
|
$matches = [];
|
||||||
|
preg_match('/src="(.*?)"/', $page->html, $matches);
|
||||||
|
$imagePath = $matches[1];
|
||||||
|
$imageFile = public_path($imagePath);
|
||||||
|
$this->assertEquals(base64_decode($base64PngWithoutWhitespace), file_get_contents($imageFile));
|
||||||
|
|
||||||
|
$this->deleteImage($imagePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function test_base64_images_blanked_if_not_supported_extension_for_extract()
|
||||||
|
{
|
||||||
|
$this->asEditor();
|
||||||
|
$page = Page::query()->first();
|
||||||
|
|
||||||
|
$this->put($page->getUrl(), [
|
||||||
|
'name' => $page->name, 'summary' => '',
|
||||||
|
'html' => '<p>test<img src="data:image/jiff;base64,'.$this->base64Jpeg.'"/></p>',
|
||||||
|
]);
|
||||||
|
|
||||||
|
$page->refresh();
|
||||||
|
$this->assertStringContainsString('<img src=""', $page->html);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user