BookStack/app/Entities/Tools/PageContent.php

<?php

namespace BookStack\Entities\Tools;

use BookStack\Entities\Models\Page;
use BookStack\Entities\Queries\PageQueries;
use BookStack\Entities\Tools\Markdown\MarkdownToHtml;
use BookStack\Exceptions\ImageUploadException;
use BookStack\Facades\Theme;
use BookStack\Theming\ThemeEvents;
use BookStack\Uploads\ImageRepo;
use BookStack\Uploads\ImageService;
use BookStack\Users\Models\User;
use BookStack\Util\HtmlContentFilter;
use BookStack\Util\HtmlDocument;
use BookStack\Util\WebSafeMimeSniffer;
use Closure;
use DOMElement;
use DOMNode;
use DOMNodeList;
use Illuminate\Support\Str;

class PageContent
{
    protected PageQueries $pageQueries;

    public function __construct(
        protected Page $page
    ) {
        $this->pageQueries = app()->make(PageQueries::class);
    }

    /**
     * Update the content of the page with new provided HTML.
     */
    public function setNewHTML(string $html, User $updater): void
    {
        $html = $this->extractBase64ImagesFromHtml($html, $updater);
        $this->page->html = $this->formatHtml($html);
        $this->page->text = $this->toPlainText();
        $this->page->markdown = '';
    }

    /**
     * Update the content of the page with new provided Markdown content.
     */
    public function setNewMarkdown(string $markdown, User $updater): void
    {
        $markdown = $this->extractBase64ImagesFromMarkdown($markdown, $updater);
        $this->page->markdown = $markdown;
        $html = (new MarkdownToHtml($markdown))->convert();
        $this->page->html = $this->formatHtml($html);
        $this->page->text = $this->toPlainText();
    }

    /**
     * Convert all base64 image data to saved images.
     */
    protected function extractBase64ImagesFromHtml(string $htmlText, User $updater): string
    {
        if (empty($htmlText) || !str_contains($htmlText, 'data:image')) {
            return $htmlText;
        }

        $doc = new HtmlDocument($htmlText);

        // Get all img elements with image data blobs
        $imageNodes = $doc->queryXPath('//img[contains(@src, \'data:image\')]');
        /** @var DOMElement $imageNode */
        foreach ($imageNodes as $imageNode) {
            $imageSrc = $imageNode->getAttribute('src');
            $newUrl = $this->base64ImageUriToUploadedImageUrl($imageSrc, $updater);
            $imageNode->setAttribute('src', $newUrl);
        }

        return $doc->getBodyInnerHtml();
    }

    /**
     * Convert all inline base64 content to uploaded image files.
     * Regex is used to locate the start of data-uri definitions then
     * manual looping over content is done to parse the whole data uri.
     * Attempting to capture the whole data uri using regex can cause PHP
     * PCRE limits to be hit with larger, multi-MB, files.
     */
    protected function extractBase64ImagesFromMarkdown(string $markdown, User $updater): string
    {
        $matches = [];
        $contentLength = strlen($markdown);
        $replacements = [];
        preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE);

        foreach ($matches[1] as $base64MatchPair) {
            [$dataUri, $index] = $base64MatchPair;

            for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) {
                $char = $markdown[$i];
                if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') {
                    break;
                }
                $dataUri .= $char;
            }

            $newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri, $updater);
            $replacements[] = [$dataUri, $newUrl];
        }

        foreach ($replacements as [$dataUri, $newUrl]) {
            $markdown = str_replace($dataUri, $newUrl, $markdown);
        }

        return $markdown;
    }

    /**
     * Parse the given base64 image URI and return the URL to the created image instance.
     * Returns an empty string if the parsed URI is invalid or causes an error upon upload.
     */
    protected function base64ImageUriToUploadedImageUrl(string $uri, User $updater): string
    {
        $imageRepo = app()->make(ImageRepo::class);
        $imageInfo = $this->parseBase64ImageUri($uri);

        // Validate user has permission to create images
        if (!$updater->can('image-create-all')) {
            return '';
        }

        // Validate extension and content
        if (empty($imageInfo['data']) || !ImageService::isExtensionSupported($imageInfo['extension'])) {
            return '';
        }

        // Validate content looks like an image via sniffing mime type
        $mimeSniffer = new WebSafeMimeSniffer();
        $mime = $mimeSniffer->sniff($imageInfo['data']);
        if (!str_starts_with($mime, 'image/')) {
            return '';
        }

        // Validate that the content is not over our upload limit
        $uploadLimitBytes = (config('app.upload_limit') * 1000000);
        if (strlen($imageInfo['data']) > $uploadLimitBytes) {
            return '';
        }

        // Save image from data with a random name
        $imageName = 'embedded-image-' . Str::random(8) . '.' . $imageInfo['extension'];

        try {
            $image = $imageRepo->saveNewFromData($imageName, $imageInfo['data'], 'gallery', $this->page->id);
        } catch (ImageUploadException $exception) {
            return '';
        }

        return $image->url;
    }

    /**
     * Parse a base64 image URI into the data and extension.
     *
     * @return array{extension: string, data: string}
     */
    protected function parseBase64ImageUri(string $uri): array
    {
        [$dataDefinition, $base64ImageData] = explode(',', $uri, 2);
        $extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? '');

        return [
            'extension' => $extension,
            'data'      => base64_decode($base64ImageData) ?: '',
        ];
    }

    /**
     * Formats a page's html to be tagged correctly within the system.
     */
    protected function formatHtml(string $htmlText): string
    {
        if (empty($htmlText)) {
            return $htmlText;
        }

        $doc = new HtmlDocument($htmlText);

        // Map to hold used ID references
        $idMap = [];
        // Map to hold changing ID references
        $changeMap = [];

        $this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
        $this->updateLinks($doc, $changeMap);

        // Generate inner html as a string & perform required string-level tweaks
        $html = $doc->getBodyInnerHtml();
        $html = str_replace(' ', '&nbsp;', $html);

        return $html;
    }

    /**
     * For the given DOMNode, traverse its children recursively and update IDs
     * where required (Top-level, headers & elements with IDs).
     * Will update the provided $changeMap array with changes made, where keys are the old
     * ids and the corresponding values are the new ids.
     */
    protected function updateIdsRecursively(DOMNode $element, int $depth, array &$idMap, array &$changeMap): void
    {
        /* @var DOMNode $child */
        foreach ($element->childNodes as $child) {
            if ($child instanceof DOMElement && ($depth === 0 || in_array($child->nodeName, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) || $child->getAttribute('id'))) {
                [$oldId, $newId] = $this->setUniqueId($child, $idMap);
                if ($newId && $newId !== $oldId && !isset($idMap[$oldId])) {
                    $changeMap[$oldId] = $newId;
                }
            }

            if ($child->hasChildNodes()) {
                $this->updateIdsRecursively($child, $depth + 1, $idMap, $changeMap);
            }
        }
    }

    /**
     * Update the all links in the given xpath to apply requires changes within the
     * given $changeMap array.
     */
    protected function updateLinks(HtmlDocument $doc, array $changeMap): void
    {
        if (empty($changeMap)) {
            return;
        }

        $links = $doc->queryXPath('//body//*//*[@href]');
        /** @var DOMElement $domElem */
        foreach ($links as $domElem) {
            $href = ltrim($domElem->getAttribute('href'), '#');
            $newHref = $changeMap[$href] ?? null;
            if ($newHref) {
                $domElem->setAttribute('href', '#' . $newHref);
            }
        }
    }

    /**
     * Set a unique id on the given DOMElement.
     * A map for existing ID's should be passed in to check for current existence,
     * and this will be updated with any new IDs set upon elements.
     * Returns a pair of strings in the format [old_id, new_id].
     */
    protected function setUniqueId(DOMNode $element, array &$idMap): array
    {
        if (!$element instanceof DOMElement) {
            return ['', ''];
        }

        // Stop if there's an existing valid id that has not already been used.
        $existingId = $element->getAttribute('id');
        if (str_starts_with($existingId, 'bkmrk') && !isset($idMap[$existingId])) {
            $idMap[$existingId] = true;

            return [$existingId, $existingId];
        }

        // Create a unique id for the element
        // Uses the content as a basis to ensure output is the same every time
        // the same content is passed through.
        $contentId = 'bkmrk-' . mb_substr(strtolower(preg_replace('/\s+/', '-', trim($element->nodeValue))), 0, 20);
        $newId = urlencode($contentId);
        $loopIndex = 1;

        while (isset($idMap[$newId])) {
            $newId = urlencode($contentId . '-' . $loopIndex);
            $loopIndex++;
        }

        $element->setAttribute('id', $newId);
        $idMap[$newId] = true;

        return [$existingId, $newId];
    }

    /**
     * Get a plain-text visualisation of this page.
     */
    protected function toPlainText(): string
    {
        $html = $this->render(true);

        return html_entity_decode(strip_tags($html));
    }

    /**
     * Render the page for viewing.
     */
    public function render(bool $blankIncludes = false): string
    {
        $html = $this->page->html ?? '';

        if (empty($html)) {
            return $html;
        }

        $doc = new HtmlDocument($html);
        $contentProvider = $this->getContentProviderClosure($blankIncludes);
        $parser = new PageIncludeParser($doc, $contentProvider);

        $nodesAdded = 1;
        for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) {
            $nodesAdded = $parser->parse();
        }

        if ($includeDepth > 1) {
            $idMap = [];
            $changeMap = [];
            $this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
        }

        if (!config('app.allow_content_scripts')) {
            HtmlContentFilter::removeScriptsFromDocument($doc);
        }

        return $doc->getBodyInnerHtml();
    }

    /**
     * Get the closure used to fetch content for page includes.
     */
    protected function getContentProviderClosure(bool $blankIncludes): Closure
    {
        $contextPage = $this->page;
        $queries = $this->pageQueries;

        return function (PageIncludeTag $tag) use ($blankIncludes, $contextPage, $queries): PageIncludeContent {
            if ($blankIncludes) {
                return PageIncludeContent::fromHtmlAndTag('', $tag);
            }

            $matchedPage = $queries->findVisibleById($tag->getPageId());
            $content = PageIncludeContent::fromHtmlAndTag($matchedPage->html ?? '', $tag);

            if (Theme::hasListeners(ThemeEvents::PAGE_INCLUDE_PARSE)) {
                $themeReplacement = Theme::dispatch(
                    ThemeEvents::PAGE_INCLUDE_PARSE,
                    $tag->tagContent,
                    $content->toHtml(),
                    clone $contextPage,
                    $matchedPage ? (clone $matchedPage) : null,
                );

                if ($themeReplacement !== null) {
                    $content = PageIncludeContent::fromInlineHtml(strval($themeReplacement));
                }
            }

            return $content;
        };
    }

    /**
     * Parse the headers on the page to get a navigation menu.
     */
    public function getNavigation(string $htmlContent): array
    {
        if (empty($htmlContent)) {
            return [];
        }

        $doc = new HtmlDocument($htmlContent);
        $headers = $doc->queryXPath('//h1|//h2|//h3|//h4|//h5|//h6');

        return $headers->count() === 0 ? [] : $this->headerNodesToLevelList($headers);
    }

    /**
     * Convert a DOMNodeList into an array of readable header attributes
     * with levels normalised to the lower header level.
     */
    protected function headerNodesToLevelList(DOMNodeList $nodeList): array
    {
        $tree = collect($nodeList)->map(function (DOMElement $header) {
            $text = trim(str_replace("\xc2\xa0", ' ', $header->nodeValue));
            $text = mb_substr($text, 0, 100);

            return [
                'nodeName' => strtolower($header->nodeName),
                'level'    => intval(str_replace('h', '', $header->nodeName)),
                'link'     => '#' . $header->getAttribute('id'),
                'text'     => $text,
            ];
        })->filter(function ($header) {
            return mb_strlen($header['text']) > 0;
        });

        // Shift headers if only smaller headers have been used
        $levelChange = ($tree->pluck('level')->min() - 1);
        $tree = $tree->map(function ($header) use ($levelChange) {
            $header['level'] -= ($levelChange);

            return $header;
        });

        return $tree->toArray();
    }
}
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								<?php
 								namespace BookStack\Entities\Tools;
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
-												Fixed some mis-refactoring and split search service

Search service broken into index and runner tools.

											
										
										
											2020-11-21 19:17:45 -05:00
+								use BookStack\Entities\Models\Page;
-												Queries: Extracted static page,chapter,shelf queries to classes

											
										
										
											2024-02-07 16:58:27 -05:00
+								use BookStack\Entities\Queries\PageQueries;
-												Added core editor switching functionality

											
										
										
											2022-04-18 12:39:28 -04:00
+								use BookStack\Entities\Tools\Markdown\MarkdownToHtml;
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								use BookStack\Exceptions\ImageUploadException;
-												Includes: Added back support for parse theme event

Managed to do this in an API-compatible way although resuling output may
differ due to new dom handling in general, although user content is used
inline to remain as comptable as possible.

											
										
										
											2023-11-27 16:38:43 -05:00
+								use BookStack\Facades\Theme;
 								use BookStack\Theming\ThemeEvents;
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								use BookStack\Uploads\ImageRepo;
-												Cleaned up logic within ImageRepo

- Moved out extension check to ImageService as that seems more relevant.
- Updated models to use static-style references instead of facade to align with common modern usage within the app.
- Updated custom image_extension validation rule to use shared logic in image service.

											
										
										
											2021-10-31 20:24:42 -04:00
+								use BookStack\Uploads\ImageService;
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								use BookStack\Users\Models\User;
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								use BookStack\Util\HtmlContentFilter;
-												HTML: Aligned and standardised DOMDocument usage

Adds a thin wrapper for DOMDocument to simplify and align usage within
all areas of BookStack.
Also means we move away from old depreacted mb_convert_encoding usage.

Closes #4638

											
										
										
											2023-11-14 10:46:32 -05:00
+								use BookStack\Util\HtmlDocument;
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								use BookStack\Util\WebSafeMimeSniffer;
-												Includes: Added back support for parse theme event

Managed to do this in an API-compatible way although resuling output may
differ due to new dom handling in general, although user content is used
inline to remain as comptable as possible.

											
										
										
											2023-11-27 16:38:43 -05:00
+								use Closure;
-												Applied another round of static analysis updates

											
										
										
											2021-11-22 18:33:55 -05:00
+								use DOMElement;
 								use DOMNode;
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								use DOMNodeList;
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								use Illuminate\Support\Str;
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
 								class PageContent
 								{
-												Queries: Extracted static page,chapter,shelf queries to classes

											
										
										
											2024-02-07 16:58:27 -05:00
+								    protected PageQueries $pageQueries;
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								    public function __construct(
 								        protected Page $page
 								    ) {
-												Queries: Extracted static page,chapter,shelf queries to classes

											
										
										
											2024-02-07 16:58:27 -05:00
+								        $this->pageQueries = app()->make(PageQueries::class);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    }
 								    /**
 								     * Update the content of the page with new provided HTML.
 								     */
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								    public function setNewHTML(string $html, User $updater): void
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    {
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								        $html = $this->extractBase64ImagesFromHtml($html, $updater);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        $this->page->html = $this->formatHtml($html);
-												Moved decode and updated page plaintext decode test

											
										
										
											2020-09-19 10:13:18 -04:00
+								        $this->page->text = $this->toPlainText();
-												Started pages API

											
										
										
											2020-11-22 09:56:19 -05:00
+								        $this->page->markdown = '';
 								    }
 								    /**
 								     * Update the content of the page with new provided Markdown content.
 								     */
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								    public function setNewMarkdown(string $markdown, User $updater): void
-												Started pages API

											
										
										
											2020-11-22 09:56:19 -05:00
+								    {
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								        $markdown = $this->extractBase64ImagesFromMarkdown($markdown, $updater);
-												Started pages API

											
										
										
											2020-11-22 09:56:19 -05:00
+								        $this->page->markdown = $markdown;
-												Added core editor switching functionality

											
										
										
											2022-04-18 12:39:28 -04:00
+								        $html = (new MarkdownToHtml($markdown))->convert();
-												Started pages API

											
										
										
											2020-11-22 09:56:19 -05:00
+								        $this->page->html = $this->formatHtml($html);
 								        $this->page->text = $this->toPlainText();
 								    }
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								    /**
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								     * Convert all base64 image data to saved images.
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								     */
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								    protected function extractBase64ImagesFromHtml(string $htmlText, User $updater): string
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								    {
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        if (empty($htmlText) || !str_contains($htmlText, 'data:image')) {
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								            return $htmlText;
 								        }
-												HTML: Aligned and standardised DOMDocument usage

Adds a thin wrapper for DOMDocument to simplify and align usage within
all areas of BookStack.
Also means we move away from old depreacted mb_convert_encoding usage.

Closes #4638

											
										
										
											2023-11-14 10:46:32 -05:00
+								        $doc = new HtmlDocument($htmlText);
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
 								        // Get all img elements with image data blobs
-												HTML: Aligned and standardised DOMDocument usage

Adds a thin wrapper for DOMDocument to simplify and align usage within
all areas of BookStack.
Also means we move away from old depreacted mb_convert_encoding usage.

Closes #4638

											
										
										
											2023-11-14 10:46:32 -05:00
+								        $imageNodes = $doc->queryXPath('//img[contains(@src, \'data:image\')]');
-												PHPStan: Fixed larastan loading and address some level2 issues

											
										
										
											2023-12-10 09:58:05 -05:00
+								        /** @var DOMElement $imageNode */
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								        foreach ($imageNodes as $imageNode) {
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								            $imageSrc = $imageNode->getAttribute('src');
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								            $newUrl = $this->base64ImageUriToUploadedImageUrl($imageSrc, $updater);
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								            $imageNode->setAttribute('src', $newUrl);
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								        }
-												HTML: Aligned and standardised DOMDocument usage

Adds a thin wrapper for DOMDocument to simplify and align usage within
all areas of BookStack.
Also means we move away from old depreacted mb_convert_encoding usage.

Closes #4638

											
										
										
											2023-11-14 10:46:32 -05:00
+								        return $doc->getBodyInnerHtml();
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								    }
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
+								    /**
 								     * Convert all inline base64 content to uploaded image files.
-												Prevented PCRE limit issues in markdown base64 extraction

For #3249

											
										
										
											2022-02-06 02:51:38 -05:00
+								     * Regex is used to locate the start of data-uri definitions then
 								     * manual looping over content is done to parse the whole data uri.
 								     * Attempting to capture the whole data uri using regex can cause PHP
 								     * PCRE limits to be hit with larger, multi-MB, files.
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
+								     */
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								    protected function extractBase64ImagesFromMarkdown(string $markdown, User $updater): string
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
+								    {
 								        $matches = [];
-												Prevented PCRE limit issues in markdown base64 extraction

For #3249

											
										
										
											2022-02-06 02:51:38 -05:00
+								        $contentLength = strlen($markdown);
 								        $replacements = [];
 								        preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE);
 								        foreach ($matches[1] as $base64MatchPair) {
 								            [$dataUri, $index] = $base64MatchPair;
 								            for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) {
 								                $char = $markdown[$i];
 								                if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') {
 								                    break;
 								                }
 								                $dataUri .= $char;
 								            }
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								            $newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri, $updater);
-												Prevented PCRE limit issues in markdown base64 extraction

For #3249

											
										
										
											2022-02-06 02:51:38 -05:00
+								            $replacements[] = [$dataUri, $newUrl];
 								        }
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
-												Prevented PCRE limit issues in markdown base64 extraction

For #3249

											
										
										
											2022-02-06 02:51:38 -05:00
+								        foreach ($replacements as [$dataUri, $newUrl]) {
 								            $markdown = str_replace($dataUri, $newUrl, $markdown);
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
+								        }
 								        return $markdown;
 								    }
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								    /**
 								     * Parse the given base64 image URI and return the URL to the created image instance.
 								     * Returns an empty string if the parsed URI is invalid or causes an error upon upload.
 								     */
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								    protected function base64ImageUriToUploadedImageUrl(string $uri, User $updater): string
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								    {
 								        $imageRepo = app()->make(ImageRepo::class);
 								        $imageInfo = $this->parseBase64ImageUri($uri);
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								        // Validate user has permission to create images
 								        if (!$updater->can('image-create-all')) {
 								            return '';
 								        }
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								        // Validate extension and content
-												Cleaned up logic within ImageRepo

- Moved out extension check to ImageService as that seems more relevant.
- Updated models to use static-style references instead of facade to align with common modern usage within the app.
- Updated custom image_extension validation rule to use shared logic in image service.

											
										
										
											2021-10-31 20:24:42 -04:00
+								        if (empty($imageInfo['data']) || !ImageService::isExtensionSupported($imageInfo['extension'])) {
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								            return '';
 								        }
-												Images: Prevented base64 extraction without permission

Also added content sniffing as an extra check.
Added tests to cover.

											
										
										
											2023-11-20 08:32:31 -05:00
+								        // Validate content looks like an image via sniffing mime type
 								        $mimeSniffer = new WebSafeMimeSniffer();
 								        $mime = $mimeSniffer->sniff($imageInfo['data']);
 								        if (!str_starts_with($mime, 'image/')) {
 								            return '';
 								        }
-												Added an env configurable file upload size limit

Replaces the old suggestion of setting JS head 'window.uploadLimit'
variable. This new env option will be used by back-end validation and
front-end libs/logic too.

Limits already likely exist within prod environments at a PHP and
webserver level but this allows an app-level limit and centralises the
option on the BookStack side into the .env

Closes #3033

											
										
										
											2021-11-14 17:03:22 -05:00
+								        // Validate that the content is not over our upload limit
 								        $uploadLimitBytes = (config('app.upload_limit') * 1000000);
 								        if (strlen($imageInfo['data']) > $uploadLimitBytes) {
 								            return '';
 								        }
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								        // Save image from data with a random name
 								        $imageName = 'embedded-image-' . Str::random(8) . '.' . $imageInfo['extension'];
 								        try {
 								            $image = $imageRepo->saveNewFromData($imageName, $imageInfo['data'], 'gallery', $this->page->id);
 								        } catch (ImageUploadException $exception) {
 								            return '';
 								        }
 								        return $image->url;
 								    }
 								    /**
 								     * Parse a base64 image URI into the data and extension.
-												Applied latest styleci changes

											
										
										
											2021-11-01 09:26:02 -04:00
+								     *
-												Applied another set of static analysis improvements

											
										
										
											2021-11-20 09:03:56 -05:00
+								     * @return array{extension: string, data: string}
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								     */
 								    protected function parseBase64ImageUri(string $uri): array
 								    {
 								        [$dataDefinition, $base64ImageData] = explode(',', $uri, 2);
 								        $extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? '');
-												Applied latest styleci changes

											
										
										
											2021-11-01 09:26:02 -04:00
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								        return [
 								            'extension' => $extension,
-												Applied latest styleci changes

											
										
										
											2021-11-01 09:26:02 -04:00
+								            'data'      => base64_decode($base64ImageData) ?: '',
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								        ];
 								    }
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    /**
 								     * Formats a page's html to be tagged correctly within the system.
 								     */
 								    protected function formatHtml(string $htmlText): string
 								    {
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								        if (empty($htmlText)) {
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            return $htmlText;
 								        }
-												HTML: Aligned and standardised DOMDocument usage

Adds a thin wrapper for DOMDocument to simplify and align usage within
all areas of BookStack.
Also means we move away from old depreacted mb_convert_encoding usage.

Closes #4638

											
										
										
											2023-11-14 10:46:32 -05:00
+								        $doc = new HtmlDocument($htmlText);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        // Map to hold used ID references
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        $idMap = [];
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        // Map to hold changing ID references
 								        $changeMap = [];
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
-												HTML: Aligned and standardised DOMDocument usage

Adds a thin wrapper for DOMDocument to simplify and align usage within
all areas of BookStack.
Also means we move away from old depreacted mb_convert_encoding usage.

Closes #4638

											
										
										
											2023-11-14 10:46:32 -05:00
+								        $this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
 								        $this->updateLinks($doc, $changeMap);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
-												HTML: Aligned and standardised DOMDocument usage

Adds a thin wrapper for DOMDocument to simplify and align usage within
all areas of BookStack.
Also means we move away from old depreacted mb_convert_encoding usage.

Closes #4638

											
										
										
											2023-11-14 10:46:32 -05:00
+								        // Generate inner html as a string & perform required string-level tweaks
 								        $html = $doc->getBodyInnerHtml();
-												Updated strategy for empty newline sections

- For some reason, TinyMCE would handle empty paragraphs with a '&nbsp'
  by default but this would be removed when the paragraph had an
  attribute. This was fine in the old editor.
- This changes the approach to use '<br>' tags within elements
  for "spaced emptiness".
- For compatbility with any existing empty paragraphs, I updated the
  styles to show default height for empty paragraph sections.
- This also makes changes to help preserve encoded &nbsp; html tags
  since they were getting converted along the journey.

Related to #3302

											
										
										
											2022-03-01 12:26:06 -05:00
+								        $html = str_replace(' ', '&nbsp;', $html);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        return $html;
 								    }
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								    /**
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								     * For the given DOMNode, traverse its children recursively and update IDs
 								     * where required (Top-level, headers & elements with IDs).
 								     * Will update the provided $changeMap array with changes made, where keys are the old
 								     * ids and the corresponding values are the new ids.
 								     */
 								    protected function updateIdsRecursively(DOMNode $element, int $depth, array &$idMap, array &$changeMap): void
 								    {
 								        /* @var DOMNode $child */
 								        foreach ($element->childNodes as $child) {
 								            if ($child instanceof DOMElement && ($depth === 0 || in_array($child->nodeName, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) || $child->getAttribute('id'))) {
 								                [$oldId, $newId] = $this->setUniqueId($child, $idMap);
 								                if ($newId && $newId !== $oldId && !isset($idMap[$oldId])) {
 								                    $changeMap[$oldId] = $newId;
 								                }
 								            }
 								            if ($child->hasChildNodes()) {
 								                $this->updateIdsRecursively($child, $depth + 1, $idMap, $changeMap);
 								            }
 								        }
 								    }
 								    /**
 								     * Update the all links in the given xpath to apply requires changes within the
 								     * given $changeMap array.
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								     */
-												HTML: Aligned and standardised DOMDocument usage

Adds a thin wrapper for DOMDocument to simplify and align usage within
all areas of BookStack.
Also means we move away from old depreacted mb_convert_encoding usage.

Closes #4638

											
										
										
											2023-11-14 10:46:32 -05:00
+								    protected function updateLinks(HtmlDocument $doc, array $changeMap): void
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								    {
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        if (empty($changeMap)) {
 								            return;
 								        }
-												HTML: Aligned and standardised DOMDocument usage

Adds a thin wrapper for DOMDocument to simplify and align usage within
all areas of BookStack.
Also means we move away from old depreacted mb_convert_encoding usage.

Closes #4638

											
										
										
											2023-11-14 10:46:32 -05:00
+								        $links = $doc->queryXPath('//body//*//*[@href]');
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        /** @var DOMElement $domElem */
 								        foreach ($links as $domElem) {
 								            $href = ltrim($domElem->getAttribute('href'), '#');
 								            $newHref = $changeMap[$href] ?? null;
 								            if ($newHref) {
 								                $domElem->setAttribute('href', '#' . $newHref);
 								            }
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								        }
 								    }
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    /**
 								     * Set a unique id on the given DOMElement.
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								     * A map for existing ID's should be passed in to check for current existence,
 								     * and this will be updated with any new IDs set upon elements.
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								     * Returns a pair of strings in the format [old_id, new_id].
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								     */
-												Applied another round of static analysis updates

											
										
										
											2021-11-22 18:33:55 -05:00
+								    protected function setUniqueId(DOMNode $element, array &$idMap): array
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    {
-												Applied another round of static analysis updates

											
										
										
											2021-11-22 18:33:55 -05:00
+								        if (!$element instanceof DOMElement) {
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								            return ['', ''];
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        }
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								        // Stop if there's an existing valid id that has not already been used.
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        $existingId = $element->getAttribute('id');
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        if (str_starts_with($existingId, 'bkmrk') && !isset($idMap[$existingId])) {
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            $idMap[$existingId] = true;
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								            return [$existingId, $existingId];
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        }
-												Applied another set of static analysis improvements

											
										
										
											2021-11-20 09:03:56 -05:00
+								        // Create a unique id for the element
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        // Uses the content as a basis to ensure output is the same every time
 								        // the same content is passed through.
 								        $contentId = 'bkmrk-' . mb_substr(strtolower(preg_replace('/\s+/', '-', trim($element->nodeValue))), 0, 20);
 								        $newId = urlencode($contentId);
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        $loopIndex = 1;
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
 								        while (isset($idMap[$newId])) {
 								            $newId = urlencode($contentId . '-' . $loopIndex);
 								            $loopIndex++;
 								        }
 								        $element->setAttribute('id', $newId);
 								        $idMap[$newId] = true;
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								        return [$existingId, $newId];
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    }
 								    /**
 								     * Get a plain-text visualisation of this page.
 								     */
 								    protected function toPlainText(): string
 								    {
 								        $html = $this->render(true);
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
-												Moved decode and updated page plaintext decode test

											
										
										
											2020-09-19 10:13:18 -04:00
+								        return html_entity_decode(strip_tags($html));
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    }
 								    /**
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								     * Render the page for viewing.
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								     */
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								    public function render(bool $blankIncludes = false): string
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    {
-												Includes: Switched page to new system

- Added mulit-level depth parsing.
- Updating usage of HTML doc in page content to be efficient.
- Removed now redundant PageContentTest cases.
- Made some include system fixes based upon testing.

											
										
										
											2023-11-27 14:54:47 -05:00
+								        $html = $this->page->html ?? '';
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
-												Includes: Switched page to new system

- Added mulit-level depth parsing.
- Updating usage of HTML doc in page content to be efficient.
- Removed now redundant PageContentTest cases.
- Made some include system fixes based upon testing.

											
										
										
											2023-11-27 14:54:47 -05:00
+								        if (empty($html)) {
 								            return $html;
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        }
-												Includes: Switched page to new system

- Added mulit-level depth parsing.
- Updating usage of HTML doc in page content to be efficient.
- Removed now redundant PageContentTest cases.
- Made some include system fixes based upon testing.

											
										
										
											2023-11-27 14:54:47 -05:00
+								        $doc = new HtmlDocument($html);
-												Includes: Added back support for parse theme event

Managed to do this in an API-compatible way although resuling output may
differ due to new dom handling in general, although user content is used
inline to remain as comptable as possible.

											
										
										
											2023-11-27 16:38:43 -05:00
+								        $contentProvider = $this->getContentProviderClosure($blankIncludes);
-												Includes: Switched page to new system

- Added mulit-level depth parsing.
- Updating usage of HTML doc in page content to be efficient.
- Removed now redundant PageContentTest cases.
- Made some include system fixes based upon testing.

											
										
										
											2023-11-27 14:54:47 -05:00
+								        $parser = new PageIncludeParser($doc, $contentProvider);
-												Includes: Added back support for parse theme event

Managed to do this in an API-compatible way although resuling output may
differ due to new dom handling in general, although user content is used
inline to remain as comptable as possible.

											
										
										
											2023-11-27 16:38:43 -05:00
+								        $nodesAdded = 1;
 								        for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) {
-												Includes: Switched page to new system

- Added mulit-level depth parsing.
- Updating usage of HTML doc in page content to be efficient.
- Removed now redundant PageContentTest cases.
- Made some include system fixes based upon testing.

											
										
										
											2023-11-27 14:54:47 -05:00
+								            $nodesAdded = $parser->parse();
 								        }
-												Includes: Added ID de-duplicating and more thorough clean-up

											
										
										
											2023-11-27 15:16:27 -05:00
+								        if ($includeDepth > 1) {
 								            $idMap = [];
 								            $changeMap = [];
 								            $this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
 								        }
-												Includes: Switched page to new system

- Added mulit-level depth parsing.
- Updating usage of HTML doc in page content to be efficient.
- Removed now redundant PageContentTest cases.
- Made some include system fixes based upon testing.

											
										
										
											2023-11-27 14:54:47 -05:00
+								        if (!config('app.allow_content_scripts')) {
 								            HtmlContentFilter::removeScriptsFromDocument($doc);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        }
-												Includes: Switched page to new system

- Added mulit-level depth parsing.
- Updating usage of HTML doc in page content to be efficient.
- Removed now redundant PageContentTest cases.
- Made some include system fixes based upon testing.

											
										
										
											2023-11-27 14:54:47 -05:00
+								        return $doc->getBodyInnerHtml();
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    }
-												Includes: Added back support for parse theme event

Managed to do this in an API-compatible way although resuling output may
differ due to new dom handling in general, although user content is used
inline to remain as comptable as possible.

											
										
										
											2023-11-27 16:38:43 -05:00
+								    /**
 								     * Get the closure used to fetch content for page includes.
 								     */
 								    protected function getContentProviderClosure(bool $blankIncludes): Closure
 								    {
 								        $contextPage = $this->page;
-												Queries: Addressed failing test cases from recent changes

											
										
										
											2024-02-08 12:18:03 -05:00
+								        $queries = $this->pageQueries;
-												Includes: Added back support for parse theme event

Managed to do this in an API-compatible way although resuling output may
differ due to new dom handling in general, although user content is used
inline to remain as comptable as possible.

											
										
										
											2023-11-27 16:38:43 -05:00
-												Queries: Addressed failing test cases from recent changes

											
										
										
											2024-02-08 12:18:03 -05:00
+								        return function (PageIncludeTag $tag) use ($blankIncludes, $contextPage, $queries): PageIncludeContent {
-												Includes: Added back support for parse theme event

Managed to do this in an API-compatible way although resuling output may
differ due to new dom handling in general, although user content is used
inline to remain as comptable as possible.

											
										
										
											2023-11-27 16:38:43 -05:00
+								            if ($blankIncludes) {
 								                return PageIncludeContent::fromHtmlAndTag('', $tag);
 								            }
-												Queries: Addressed failing test cases from recent changes

											
										
										
											2024-02-08 12:18:03 -05:00
+								            $matchedPage = $queries->findVisibleById($tag->getPageId());
-												Includes: Added back support for parse theme event

Managed to do this in an API-compatible way although resuling output may
differ due to new dom handling in general, although user content is used
inline to remain as comptable as possible.

											
										
										
											2023-11-27 16:38:43 -05:00
+								            $content = PageIncludeContent::fromHtmlAndTag($matchedPage->html ?? '', $tag);
 								            if (Theme::hasListeners(ThemeEvents::PAGE_INCLUDE_PARSE)) {
 								                $themeReplacement = Theme::dispatch(
 								                    ThemeEvents::PAGE_INCLUDE_PARSE,
 								                    $tag->tagContent,
 								                    $content->toHtml(),
 								                    clone $contextPage,
 								                    $matchedPage ? (clone $matchedPage) : null,
 								                );
 								                if ($themeReplacement !== null) {
 								                    $content = PageIncludeContent::fromInlineHtml(strval($themeReplacement));
 								                }
 								            }
 								            return $content;
 								        };
 								    }
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    /**
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								     * Parse the headers on the page to get a navigation menu.
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								     */
 								    public function getNavigation(string $htmlContent): array
 								    {
 								        if (empty($htmlContent)) {
 								            return [];
 								        }
-												HTML: Aligned and standardised DOMDocument usage

Adds a thin wrapper for DOMDocument to simplify and align usage within
all areas of BookStack.
Also means we move away from old depreacted mb_convert_encoding usage.

Closes #4638

											
										
										
											2023-11-14 10:46:32 -05:00
+								        $doc = new HtmlDocument($htmlContent);
 								        $headers = $doc->queryXPath('//h1|//h2|//h3|//h4|//h5|//h6');
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
-												HTML: Aligned and standardised DOMDocument usage

Adds a thin wrapper for DOMDocument to simplify and align usage within
all areas of BookStack.
Also means we move away from old depreacted mb_convert_encoding usage.

Closes #4638

											
										
										
											2023-11-14 10:46:32 -05:00
+								        return $headers->count() === 0 ? [] : $this->headerNodesToLevelList($headers);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    }
 								    /**
 								     * Convert a DOMNodeList into an array of readable header attributes
 								     * with levels normalised to the lower header level.
 								     */
 								    protected function headerNodesToLevelList(DOMNodeList $nodeList): array
 								    {
-												Applied another round of static analysis updates

											
										
										
											2021-11-22 18:33:55 -05:00
+								        $tree = collect($nodeList)->map(function (DOMElement $header) {
-												Page nav: Fixed nbsp being represented as nothing

Now represented in page nav using a normal space to avoid complete
removal of space.
Added test to cover.
For #4836

											
										
										
											2024-03-09 10:52:09 -05:00
+								            $text = trim(str_replace("\xc2\xa0", ' ', $header->nodeValue));
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            $text = mb_substr($text, 0, 100);
 								            return [
 								                'nodeName' => strtolower($header->nodeName),
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								                'level'    => intval(str_replace('h', '', $header->nodeName)),
 								                'link'     => '#' . $header->getAttribute('id'),
 								                'text'     => $text,
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            ];
 								        })->filter(function ($header) {
 								            return mb_strlen($header['text']) > 0;
 								        });
 								        // Shift headers if only smaller headers have been used
 								        $levelChange = ($tree->pluck('level')->min() - 1);
 								        $tree = $tree->map(function ($header) use ($levelChange) {
 								            $header['level'] -= ($levelChange);
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            return $header;
 								        });
 								        return $tree->toArray();
 								    }
 								}