BookStack/app/Entities/Tools/PageContent.php

<?php

namespace BookStack\Entities\Tools;

use BookStack\Entities\Models\Page;
use BookStack\Entities\Tools\Markdown\MarkdownToHtml;
use BookStack\Exceptions\ImageUploadException;
use BookStack\Facades\Theme;
use BookStack\Theming\ThemeEvents;
use BookStack\Uploads\ImageRepo;
use BookStack\Uploads\ImageService;
use BookStack\Util\HtmlContentFilter;
use DOMDocument;
use DOMElement;
use DOMNode;
use DOMNodeList;
use DOMXPath;
use Illuminate\Support\Str;

class PageContent
{
    public function __construct(
        protected Page $page
    ) {
    }

    /**
     * Update the content of the page with new provided HTML.
     */
    public function setNewHTML(string $html): void
    {
        $html = $this->extractBase64ImagesFromHtml($html);
        $this->page->html = $this->formatHtml($html);
        $this->page->text = $this->toPlainText();
        $this->page->markdown = '';
    }

    /**
     * Update the content of the page with new provided Markdown content.
     */
    public function setNewMarkdown(string $markdown): void
    {
        $markdown = $this->extractBase64ImagesFromMarkdown($markdown);
        $this->page->markdown = $markdown;
        $html = (new MarkdownToHtml($markdown))->convert();
        $this->page->html = $this->formatHtml($html);
        $this->page->text = $this->toPlainText();
    }

    /**
     * Convert all base64 image data to saved images.
     */
    protected function extractBase64ImagesFromHtml(string $htmlText): string
    {
        if (empty($htmlText) || !str_contains($htmlText, 'data:image')) {
            return $htmlText;
        }

        $doc = $this->loadDocumentFromHtml($htmlText);
        $container = $doc->documentElement;
        $body = $container->childNodes->item(0);
        $childNodes = $body->childNodes;
        $xPath = new DOMXPath($doc);

        // Get all img elements with image data blobs
        $imageNodes = $xPath->query('//img[contains(@src, \'data:image\')]');
        foreach ($imageNodes as $imageNode) {
            $imageSrc = $imageNode->getAttribute('src');
            $newUrl = $this->base64ImageUriToUploadedImageUrl($imageSrc);
            $imageNode->setAttribute('src', $newUrl);
        }

        // Generate inner html as a string
        $html = '';
        foreach ($childNodes as $childNode) {
            $html .= $doc->saveHTML($childNode);
        }

        return $html;
    }

    /**
     * Convert all inline base64 content to uploaded image files.
     * Regex is used to locate the start of data-uri definitions then
     * manual looping over content is done to parse the whole data uri.
     * Attempting to capture the whole data uri using regex can cause PHP
     * PCRE limits to be hit with larger, multi-MB, files.
     */
    protected function extractBase64ImagesFromMarkdown(string $markdown): string
    {
        $matches = [];
        $contentLength = strlen($markdown);
        $replacements = [];
        preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE);

        foreach ($matches[1] as $base64MatchPair) {
            [$dataUri, $index] = $base64MatchPair;

            for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) {
                $char = $markdown[$i];
                if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') {
                    break;
                }
                $dataUri .= $char;
            }

            $newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri);
            $replacements[] = [$dataUri, $newUrl];
        }

        foreach ($replacements as [$dataUri, $newUrl]) {
            $markdown = str_replace($dataUri, $newUrl, $markdown);
        }

        return $markdown;
    }

    /**
     * Parse the given base64 image URI and return the URL to the created image instance.
     * Returns an empty string if the parsed URI is invalid or causes an error upon upload.
     */
    protected function base64ImageUriToUploadedImageUrl(string $uri): string
    {
        $imageRepo = app()->make(ImageRepo::class);
        $imageInfo = $this->parseBase64ImageUri($uri);

        // Validate extension and content
        if (empty($imageInfo['data']) || !ImageService::isExtensionSupported($imageInfo['extension'])) {
            return '';
        }

        // Validate that the content is not over our upload limit
        $uploadLimitBytes = (config('app.upload_limit') * 1000000);
        if (strlen($imageInfo['data']) > $uploadLimitBytes) {
            return '';
        }

        // Save image from data with a random name
        $imageName = 'embedded-image-' . Str::random(8) . '.' . $imageInfo['extension'];

        try {
            $image = $imageRepo->saveNewFromData($imageName, $imageInfo['data'], 'gallery', $this->page->id);
        } catch (ImageUploadException $exception) {
            return '';
        }

        return $image->url;
    }

    /**
     * Parse a base64 image URI into the data and extension.
     *
     * @return array{extension: string, data: string}
     */
    protected function parseBase64ImageUri(string $uri): array
    {
        [$dataDefinition, $base64ImageData] = explode(',', $uri, 2);
        $extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? '');

        return [
            'extension' => $extension,
            'data'      => base64_decode($base64ImageData) ?: '',
        ];
    }

    /**
     * Formats a page's html to be tagged correctly within the system.
     */
    protected function formatHtml(string $htmlText): string
    {
        if (empty($htmlText)) {
            return $htmlText;
        }

        $doc = $this->loadDocumentFromHtml($htmlText);
        $container = $doc->documentElement;
        $body = $container->childNodes->item(0);
        $childNodes = $body->childNodes;
        $xPath = new DOMXPath($doc);

        // Map to hold used ID references
        $idMap = [];
        // Map to hold changing ID references
        $changeMap = [];

        $this->updateIdsRecursively($body, 0, $idMap, $changeMap);
        $this->updateLinks($xPath, $changeMap);

        // Generate inner html as a string
        $html = '';
        foreach ($childNodes as $childNode) {
            $html .= $doc->saveHTML($childNode);
        }

        // Perform required string-level tweaks
        $html = str_replace(' ', '&nbsp;', $html);

        return $html;
    }

    /**
     * For the given DOMNode, traverse its children recursively and update IDs
     * where required (Top-level, headers & elements with IDs).
     * Will update the provided $changeMap array with changes made, where keys are the old
     * ids and the corresponding values are the new ids.
     */
    protected function updateIdsRecursively(DOMNode $element, int $depth, array &$idMap, array &$changeMap): void
    {
        /* @var DOMNode $child */
        foreach ($element->childNodes as $child) {
            if ($child instanceof DOMElement && ($depth === 0 || in_array($child->nodeName, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) || $child->getAttribute('id'))) {
                [$oldId, $newId] = $this->setUniqueId($child, $idMap);
                if ($newId && $newId !== $oldId && !isset($idMap[$oldId])) {
                    $changeMap[$oldId] = $newId;
                }
            }

            if ($child->hasChildNodes()) {
                $this->updateIdsRecursively($child, $depth + 1, $idMap, $changeMap);
            }
        }
    }

    /**
     * Update the all links in the given xpath to apply requires changes within the
     * given $changeMap array.
     */
    protected function updateLinks(DOMXPath $xpath, array $changeMap): void
    {
        if (empty($changeMap)) {
            return;
        }

        $links = $xpath->query('//body//*//*[@href]');
        /** @var DOMElement $domElem */
        foreach ($links as $domElem) {
            $href = ltrim($domElem->getAttribute('href'), '#');
            $newHref = $changeMap[$href] ?? null;
            if ($newHref) {
                $domElem->setAttribute('href', '#' . $newHref);
            }
        }
    }

    /**
     * Set a unique id on the given DOMElement.
     * A map for existing ID's should be passed in to check for current existence,
     * and this will be updated with any new IDs set upon elements.
     * Returns a pair of strings in the format [old_id, new_id].
     */
    protected function setUniqueId(DOMNode $element, array &$idMap): array
    {
        if (!$element instanceof DOMElement) {
            return ['', ''];
        }

        // Stop if there's an existing valid id that has not already been used.
        $existingId = $element->getAttribute('id');
        if (str_starts_with($existingId, 'bkmrk') && !isset($idMap[$existingId])) {
            $idMap[$existingId] = true;

            return [$existingId, $existingId];
        }

        // Create a unique id for the element
        // Uses the content as a basis to ensure output is the same every time
        // the same content is passed through.
        $contentId = 'bkmrk-' . mb_substr(strtolower(preg_replace('/\s+/', '-', trim($element->nodeValue))), 0, 20);
        $newId = urlencode($contentId);
        $loopIndex = 1;

        while (isset($idMap[$newId])) {
            $newId = urlencode($contentId . '-' . $loopIndex);
            $loopIndex++;
        }

        $element->setAttribute('id', $newId);
        $idMap[$newId] = true;

        return [$existingId, $newId];
    }

    /**
     * Get a plain-text visualisation of this page.
     */
    protected function toPlainText(): string
    {
        $html = $this->render(true);

        return html_entity_decode(strip_tags($html));
    }

    /**
     * Render the page for viewing.
     */
    public function render(bool $blankIncludes = false): string
    {
        $content = $this->page->html ?? '';

        if (!config('app.allow_content_scripts')) {
            $content = HtmlContentFilter::removeScripts($content);
        }

        if ($blankIncludes) {
            $content = $this->blankPageIncludes($content);
            } else {
                for ($includeDepth = 0; $includeDepth <= 3; $includeDepth++) {
                    $content = $this->parsePageIncludes($content);
                }
        }

        return $content;
    }

    /**
     * Parse the headers on the page to get a navigation menu.
     */
    public function getNavigation(string $htmlContent): array
    {
        if (empty($htmlContent)) {
            return [];
        }

        $doc = $this->loadDocumentFromHtml($htmlContent);
        $xPath = new DOMXPath($doc);
        $headers = $xPath->query('//h1|//h2|//h3|//h4|//h5|//h6');

        return $headers ? $this->headerNodesToLevelList($headers) : [];
    }

    /**
     * Convert a DOMNodeList into an array of readable header attributes
     * with levels normalised to the lower header level.
     */
    protected function headerNodesToLevelList(DOMNodeList $nodeList): array
    {
        $tree = collect($nodeList)->map(function (DOMElement $header) {
            $text = trim(str_replace("\xc2\xa0", '', $header->nodeValue));
            $text = mb_substr($text, 0, 100);

            return [
                'nodeName' => strtolower($header->nodeName),
                'level'    => intval(str_replace('h', '', $header->nodeName)),
                'link'     => '#' . $header->getAttribute('id'),
                'text'     => $text,
            ];
        })->filter(function ($header) {
            return mb_strlen($header['text']) > 0;
        });

        // Shift headers if only smaller headers have been used
        $levelChange = ($tree->pluck('level')->min() - 1);
        $tree = $tree->map(function ($header) use ($levelChange) {
            $header['level'] -= ($levelChange);

            return $header;
        });

        return $tree->toArray();
    }

    /**
     * Remove any page include tags within the given HTML.
     */
    protected function blankPageIncludes(string $html): string
    {
        return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html);
    }

    /**
     * Parse any include tags "{{@<page_id>#section}}" to be part of the page.
     */
    protected function parsePageIncludes(string $html): string
    {
        $matches = [];
        preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches);

        foreach ($matches[1] as $index => $includeId) {
            $fullMatch = $matches[0][$index];
            $splitInclude = explode('#', $includeId, 2);

            // Get page id from reference
            $pageId = intval($splitInclude[0]);
            if (is_nan($pageId)) {
                continue;
            }

            // Find page to use, and default replacement to empty string for non-matches.
            /** @var ?Page $matchedPage */
            $matchedPage = Page::visible()->find($pageId);
            $replacement = '';

            if ($matchedPage && count($splitInclude) === 1) {
                // If we only have page id, just insert all page html and continue.
                $replacement = $matchedPage->html;
            } elseif ($matchedPage && count($splitInclude) > 1) {
                // Otherwise, if our include tag defines a section, load that specific content
                $innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]);
                $replacement = trim($innerContent);
            }

            $themeReplacement = Theme::dispatch(
                ThemeEvents::PAGE_INCLUDE_PARSE,
                $includeId,
                $replacement,
                clone $this->page,
                $matchedPage ? (clone $matchedPage) : null,
            );

            // Perform the content replacement
            $html = str_replace($fullMatch, $themeReplacement ?? $replacement, $html);
        }

        return $html;
    }

    /**
     * Fetch the content from a specific section of the given page.
     */
    protected function fetchSectionOfPage(Page $page, string $sectionId): string
    {
        $topLevelTags = ['table', 'ul', 'ol', 'pre'];
        $doc = $this->loadDocumentFromHtml($page->html);

        // Search included content for the id given and blank out if not exists.
        $matchingElem = $doc->getElementById($sectionId);
        if ($matchingElem === null) {
            return '';
        }

        // Otherwise replace the content with the found content
        // Checks if the top-level wrapper should be included by matching on tag types
        $innerContent = '';
        $isTopLevel = in_array(strtolower($matchingElem->nodeName), $topLevelTags);
        if ($isTopLevel) {
            $innerContent .= $doc->saveHTML($matchingElem);
        } else {
            foreach ($matchingElem->childNodes as $childNode) {
                $innerContent .= $doc->saveHTML($childNode);
            }
        }
        libxml_clear_errors();

        return $innerContent;
    }

    /**
     * Create and load a DOMDocument from the given html content.
     */
    protected function loadDocumentFromHtml(string $html): DOMDocument
    {
        libxml_use_internal_errors(true);
        $doc = new DOMDocument();
        $html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
        $doc->loadHTML($html);

        return $doc;
    }
}
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								<?php
 								namespace BookStack\Entities\Tools;
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
-												Fixed some mis-refactoring and split search service

Search service broken into index and runner tools.

											
										
										
											2020-11-21 19:17:45 -05:00
+								use BookStack\Entities\Models\Page;
-												Added core editor switching functionality

											
										
										
											2022-04-18 12:39:28 -04:00
+								use BookStack\Entities\Tools\Markdown\MarkdownToHtml;
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								use BookStack\Exceptions\ImageUploadException;
-												Added "page_include_parse" theme event

For custom control of include tag parsing.

											
										
										
											2022-09-05 11:40:42 -04:00
+								use BookStack\Facades\Theme;
 								use BookStack\Theming\ThemeEvents;
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								use BookStack\Uploads\ImageRepo;
-												Cleaned up logic within ImageRepo

- Moved out extension check to ImageService as that seems more relevant.
- Updated models to use static-style references instead of facade to align with common modern usage within the app.
- Updated custom image_extension validation rule to use shared logic in image service.

											
										
										
											2021-10-31 20:24:42 -04:00
+								use BookStack\Uploads\ImageService;
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								use BookStack\Util\HtmlContentFilter;
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								use DOMDocument;
-												Applied another round of static analysis updates

											
										
										
											2021-11-22 18:33:55 -05:00
+								use DOMElement;
 								use DOMNode;
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								use DOMNodeList;
 								use DOMXPath;
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								use Illuminate\Support\Str;
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
 								class PageContent
 								{
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								    public function __construct(
 								        protected Page $page
 								    ) {
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    }
 								    /**
 								     * Update the content of the page with new provided HTML.
 								     */
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								    public function setNewHTML(string $html): void
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    {
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
+								        $html = $this->extractBase64ImagesFromHtml($html);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        $this->page->html = $this->formatHtml($html);
-												Moved decode and updated page plaintext decode test

											
										
										
											2020-09-19 10:13:18 -04:00
+								        $this->page->text = $this->toPlainText();
-												Started pages API

											
										
										
											2020-11-22 09:56:19 -05:00
+								        $this->page->markdown = '';
 								    }
 								    /**
 								     * Update the content of the page with new provided Markdown content.
 								     */
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								    public function setNewMarkdown(string $markdown): void
-												Started pages API

											
										
										
											2020-11-22 09:56:19 -05:00
+								    {
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
+								        $markdown = $this->extractBase64ImagesFromMarkdown($markdown);
-												Started pages API

											
										
										
											2020-11-22 09:56:19 -05:00
+								        $this->page->markdown = $markdown;
-												Added core editor switching functionality

											
										
										
											2022-04-18 12:39:28 -04:00
+								        $html = (new MarkdownToHtml($markdown))->convert();
-												Started pages API

											
										
										
											2020-11-22 09:56:19 -05:00
+								        $this->page->html = $this->formatHtml($html);
 								        $this->page->text = $this->toPlainText();
 								    }
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								    /**
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								     * Convert all base64 image data to saved images.
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								     */
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
+								    protected function extractBase64ImagesFromHtml(string $htmlText): string
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								    {
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        if (empty($htmlText) || !str_contains($htmlText, 'data:image')) {
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								            return $htmlText;
 								        }
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								        $doc = $this->loadDocumentFromHtml($htmlText);
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								        $container = $doc->documentElement;
 								        $body = $container->childNodes->item(0);
 								        $childNodes = $body->childNodes;
 								        $xPath = new DOMXPath($doc);
 								        // Get all img elements with image data blobs
 								        $imageNodes = $xPath->query('//img[contains(@src, \'data:image\')]');
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								        foreach ($imageNodes as $imageNode) {
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								            $imageSrc = $imageNode->getAttribute('src');
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								            $newUrl = $this->base64ImageUriToUploadedImageUrl($imageSrc);
 								            $imageNode->setAttribute('src', $newUrl);
-												Add base64 image support

											
										
										
											2021-04-20 19:41:21 -04:00
+								        }
 								        // Generate inner html as a string
 								        $html = '';
 								        foreach ($childNodes as $childNode) {
 								            $html .= $doc->saveHTML($childNode);
 								        }
 								        return $html;
 								    }
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
+								    /**
 								     * Convert all inline base64 content to uploaded image files.
-												Prevented PCRE limit issues in markdown base64 extraction

For #3249

											
										
										
											2022-02-06 02:51:38 -05:00
+								     * Regex is used to locate the start of data-uri definitions then
 								     * manual looping over content is done to parse the whole data uri.
 								     * Attempting to capture the whole data uri using regex can cause PHP
 								     * PCRE limits to be hit with larger, multi-MB, files.
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
+								     */
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								    protected function extractBase64ImagesFromMarkdown(string $markdown): string
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
+								    {
 								        $matches = [];
-												Prevented PCRE limit issues in markdown base64 extraction

For #3249

											
										
										
											2022-02-06 02:51:38 -05:00
+								        $contentLength = strlen($markdown);
 								        $replacements = [];
 								        preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE);
 								        foreach ($matches[1] as $base64MatchPair) {
 								            [$dataUri, $index] = $base64MatchPair;
 								            for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) {
 								                $char = $markdown[$i];
 								                if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') {
 								                    break;
 								                }
 								                $dataUri .= $char;
 								            }
 								            $newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri);
 								            $replacements[] = [$dataUri, $newUrl];
 								        }
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
-												Prevented PCRE limit issues in markdown base64 extraction

For #3249

											
										
										
											2022-02-06 02:51:38 -05:00
+								        foreach ($replacements as [$dataUri, $newUrl]) {
 								            $markdown = str_replace($dataUri, $newUrl, $markdown);
-												Added base64 image extraction to markdown page content

- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

											
										
										
											2021-10-18 06:42:50 -04:00
+								        }
 								        return $markdown;
 								    }
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								    /**
 								     * Parse the given base64 image URI and return the URL to the created image instance.
 								     * Returns an empty string if the parsed URI is invalid or causes an error upon upload.
 								     */
 								    protected function base64ImageUriToUploadedImageUrl(string $uri): string
 								    {
 								        $imageRepo = app()->make(ImageRepo::class);
 								        $imageInfo = $this->parseBase64ImageUri($uri);
 								        // Validate extension and content
-												Cleaned up logic within ImageRepo

- Moved out extension check to ImageService as that seems more relevant.
- Updated models to use static-style references instead of facade to align with common modern usage within the app.
- Updated custom image_extension validation rule to use shared logic in image service.

											
										
										
											2021-10-31 20:24:42 -04:00
+								        if (empty($imageInfo['data']) || !ImageService::isExtensionSupported($imageInfo['extension'])) {
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								            return '';
 								        }
-												Added an env configurable file upload size limit

Replaces the old suggestion of setting JS head 'window.uploadLimit'
variable. This new env option will be used by back-end validation and
front-end libs/logic too.

Limits already likely exist within prod environments at a PHP and
webserver level but this allows an app-level limit and centralises the
option on the BookStack side into the .env

Closes #3033

											
										
										
											2021-11-14 17:03:22 -05:00
+								        // Validate that the content is not over our upload limit
 								        $uploadLimitBytes = (config('app.upload_limit') * 1000000);
 								        if (strlen($imageInfo['data']) > $uploadLimitBytes) {
 								            return '';
 								        }
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								        // Save image from data with a random name
 								        $imageName = 'embedded-image-' . Str::random(8) . '.' . $imageInfo['extension'];
 								        try {
 								            $image = $imageRepo->saveNewFromData($imageName, $imageInfo['data'], 'gallery', $this->page->id);
 								        } catch (ImageUploadException $exception) {
 								            return '';
 								        }
 								        return $image->url;
 								    }
 								    /**
 								     * Parse a base64 image URI into the data and extension.
-												Applied latest styleci changes

											
										
										
											2021-11-01 09:26:02 -04:00
+								     *
-												Applied another set of static analysis improvements

											
										
										
											2021-11-20 09:03:56 -05:00
+								     * @return array{extension: string, data: string}
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								     */
 								    protected function parseBase64ImageUri(string $uri): array
 								    {
 								        [$dataDefinition, $base64ImageData] = explode(',', $uri, 2);
 								        $extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? '');
-												Applied latest styleci changes

											
										
										
											2021-11-01 09:26:02 -04:00
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								        return [
 								            'extension' => $extension,
-												Applied latest styleci changes

											
										
										
											2021-11-01 09:26:02 -04:00
+								            'data'      => base64_decode($base64ImageData) ?: '',
-												Made further changes to page image extraction validation

Fixes #3019
Increased testing to cover the failing case amoung others.

											
										
										
											2021-10-28 10:54:00 -04:00
+								        ];
 								    }
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    /**
 								     * Formats a page's html to be tagged correctly within the system.
 								     */
 								    protected function formatHtml(string $htmlText): string
 								    {
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								        if (empty($htmlText)) {
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            return $htmlText;
 								        }
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								        $doc = $this->loadDocumentFromHtml($htmlText);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        $container = $doc->documentElement;
 								        $body = $container->childNodes->item(0);
 								        $childNodes = $body->childNodes;
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								        $xPath = new DOMXPath($doc);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        // Map to hold used ID references
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        $idMap = [];
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        // Map to hold changing ID references
 								        $changeMap = [];
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        $this->updateIdsRecursively($body, 0, $idMap, $changeMap);
 								        $this->updateLinks($xPath, $changeMap);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
 								        // Generate inner html as a string
 								        $html = '';
 								        foreach ($childNodes as $childNode) {
 								            $html .= $doc->saveHTML($childNode);
 								        }
-												Updated strategy for empty newline sections

- For some reason, TinyMCE would handle empty paragraphs with a '&nbsp'
  by default but this would be removed when the paragraph had an
  attribute. This was fine in the old editor.
- This changes the approach to use '<br>' tags within elements
  for "spaced emptiness".
- For compatbility with any existing empty paragraphs, I updated the
  styles to show default height for empty paragraph sections.
- This also makes changes to help preserve encoded &nbsp; html tags
  since they were getting converted along the journey.

Related to #3302

											
										
										
											2022-03-01 12:26:06 -05:00
+								        // Perform required string-level tweaks
 								        $html = str_replace(' ', '&nbsp;', $html);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        return $html;
 								    }
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								    /**
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								     * For the given DOMNode, traverse its children recursively and update IDs
 								     * where required (Top-level, headers & elements with IDs).
 								     * Will update the provided $changeMap array with changes made, where keys are the old
 								     * ids and the corresponding values are the new ids.
 								     */
 								    protected function updateIdsRecursively(DOMNode $element, int $depth, array &$idMap, array &$changeMap): void
 								    {
 								        /* @var DOMNode $child */
 								        foreach ($element->childNodes as $child) {
 								            if ($child instanceof DOMElement && ($depth === 0 || in_array($child->nodeName, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) || $child->getAttribute('id'))) {
 								                [$oldId, $newId] = $this->setUniqueId($child, $idMap);
 								                if ($newId && $newId !== $oldId && !isset($idMap[$oldId])) {
 								                    $changeMap[$oldId] = $newId;
 								                }
 								            }
 								            if ($child->hasChildNodes()) {
 								                $this->updateIdsRecursively($child, $depth + 1, $idMap, $changeMap);
 								            }
 								        }
 								    }
 								    /**
 								     * Update the all links in the given xpath to apply requires changes within the
 								     * given $changeMap array.
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								     */
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								    protected function updateLinks(DOMXPath $xpath, array $changeMap): void
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								    {
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        if (empty($changeMap)) {
 								            return;
 								        }
 								        $links = $xpath->query('//body//*//*[@href]');
 								        /** @var DOMElement $domElem */
 								        foreach ($links as $domElem) {
 								            $href = ltrim($domElem->getAttribute('href'), '#');
 								            $newHref = $changeMap[$href] ?? null;
 								            if ($newHref) {
 								                $domElem->setAttribute('href', '#' . $newHref);
 								            }
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								        }
 								    }
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    /**
 								     * Set a unique id on the given DOMElement.
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								     * A map for existing ID's should be passed in to check for current existence,
 								     * and this will be updated with any new IDs set upon elements.
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								     * Returns a pair of strings in the format [old_id, new_id].
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								     */
-												Applied another round of static analysis updates

											
										
										
											2021-11-22 18:33:55 -05:00
+								    protected function setUniqueId(DOMNode $element, array &$idMap): array
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    {
-												Applied another round of static analysis updates

											
										
										
											2021-11-22 18:33:55 -05:00
+								        if (!$element instanceof DOMElement) {
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								            return ['', ''];
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        }
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								        // Stop if there's an existing valid id that has not already been used.
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        $existingId = $element->getAttribute('id');
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        if (str_starts_with($existingId, 'bkmrk') && !isset($idMap[$existingId])) {
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            $idMap[$existingId] = true;
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								            return [$existingId, $existingId];
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        }
-												Applied another set of static analysis improvements

											
										
										
											2021-11-20 09:03:56 -05:00
+								        // Create a unique id for the element
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        // Uses the content as a basis to ensure output is the same every time
 								        // the same content is passed through.
 								        $contentId = 'bkmrk-' . mb_substr(strtolower(preg_replace('/\s+/', '-', trim($element->nodeValue))), 0, 20);
 								        $newId = urlencode($contentId);
-												Made page-save HTML formatting much more efficient

Replaced the existing xpath-heavy system with a more manual traversal
approach. Fixes following slow areas of old system:
- Old system would repeat ID-setting action for elements (Headers could
  be processed up to three times).
- Old system had a few very open xpath queries for headers.
- Old system would update links on every ID change, which triggers it's
  own xpath query for links, leading to exponential scaling issues.

New system only does one xpath query for links when changes are needed.
Added test to cover.

For #3932

											
										
										
											2023-02-22 09:32:40 -05:00
+								        $loopIndex = 1;
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
 								        while (isset($idMap[$newId])) {
 								            $newId = urlencode($contentId . '-' . $loopIndex);
 								            $loopIndex++;
 								        }
 								        $element->setAttribute('id', $newId);
 								        $idMap[$newId] = true;
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
-												Updated page content related links on content id changes

For #2278

											
										
										
											2020-09-28 17:26:50 -04:00
+								        return [$existingId, $newId];
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    }
 								    /**
 								     * Get a plain-text visualisation of this page.
 								     */
 								    protected function toPlainText(): string
 								    {
 								        $html = $this->render(true);
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
-												Moved decode and updated page plaintext decode test

											
										
										
											2020-09-19 10:13:18 -04:00
+								        return html_entity_decode(strip_tags($html));
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    }
 								    /**
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								     * Render the page for viewing.
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								     */
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								    public function render(bool $blankIncludes = false): string
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    {
-												Reorgranised blade view files to form a convention

- Primarily moved and re-organised view files.
- Included readme within views to document the convention.
- Fixed some issues with page field select list in previous commit.
- Tweaked some route names while going through.
- Split some views out further.

Closes #2805

											
										
										
											2021-08-22 08:15:58 -04:00
+								        $content = $this->page->html ?? '';
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
 								        if (!config('app.allow_content_scripts')) {
-												Filtered scripts in custom HTML head for exports

Since it appeared to cause problems in some scenarios.
Related to #2490

											
										
										
											2021-05-03 18:59:52 -04:00
+								            $content = HtmlContentFilter::removeScripts($content);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        }
 								        if ($blankIncludes) {
 								            $content = $this->blankPageIncludes($content);
-												changed PageContent.php to accept nested includes (#4192)

* changed app/Entities/Tools/PageContent.php to accept nested include levels. Tested it and it works.

* changed recommendations

This loop is now only around parsePageIncludes and bugfixes the space indentation.

* Update PageContent.php

fix spaces
											
										
										
											2023-04-27 10:51:46 -04:00
+								            } else {
 								                for ($includeDepth = 0; $includeDepth <= 3; $includeDepth++) {
 								                    $content = $this->parsePageIncludes($content);
 								                }
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        }
 								        return $content;
 								    }
 								    /**
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								     * Parse the headers on the page to get a navigation menu.
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								     */
 								    public function getNavigation(string $htmlContent): array
 								    {
 								        if (empty($htmlContent)) {
 								            return [];
 								        }
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								        $doc = $this->loadDocumentFromHtml($htmlContent);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        $xPath = new DOMXPath($doc);
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								        $headers = $xPath->query('//h1|//h2|//h3|//h4|//h5|//h6');
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
 								        return $headers ? $this->headerNodesToLevelList($headers) : [];
 								    }
 								    /**
 								     * Convert a DOMNodeList into an array of readable header attributes
 								     * with levels normalised to the lower header level.
 								     */
 								    protected function headerNodesToLevelList(DOMNodeList $nodeList): array
 								    {
-												Applied another round of static analysis updates

											
										
										
											2021-11-22 18:33:55 -05:00
+								        $tree = collect($nodeList)->map(function (DOMElement $header) {
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            $text = trim(str_replace("\xc2\xa0", '', $header->nodeValue));
 								            $text = mb_substr($text, 0, 100);
 								            return [
 								                'nodeName' => strtolower($header->nodeName),
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
+								                'level'    => intval(str_replace('h', '', $header->nodeName)),
 								                'link'     => '#' . $header->getAttribute('id'),
 								                'text'     => $text,
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            ];
 								        })->filter(function ($header) {
 								            return mb_strlen($header['text']) > 0;
 								        });
 								        // Shift headers if only smaller headers have been used
 								        $levelChange = ($tree->pluck('level')->min() - 1);
 								        $tree = $tree->map(function ($header) use ($levelChange) {
 								            $header['level'] -= ($levelChange);
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            return $header;
 								        });
 								        return $tree->toArray();
 								    }
 								    /**
 								     * Remove any page include tags within the given HTML.
 								     */
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								    protected function blankPageIncludes(string $html): string
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    {
 								        return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html);
 								    }
 								    /**
 								     * Parse any include tags "{{@<page_id>#section}}" to be part of the page.
 								     */
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								    protected function parsePageIncludes(string $html): string
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								    {
 								        $matches = [];
 								        preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches);
 								        foreach ($matches[1] as $index => $includeId) {
 								            $fullMatch = $matches[0][$index];
 								            $splitInclude = explode('#', $includeId, 2);
 								            // Get page id from reference
 								            $pageId = intval($splitInclude[0]);
 								            if (is_nan($pageId)) {
 								                continue;
 								            }
-												Added "page_include_parse" theme event

For custom control of include tag parsing.

											
										
										
											2022-09-05 11:40:42 -04:00
+								            // Find page to use, and default replacement to empty string for non-matches.
-												Allowed page includes on custom home

For #2279
Old hold-over for when include content permissions were handled less
delicately.

											
										
										
											2021-10-04 06:26:26 -04:00
+								            /** @var ?Page $matchedPage */
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            $matchedPage = Page::visible()->find($pageId);
-												Added "page_include_parse" theme event

For custom control of include tag parsing.

											
										
										
											2022-09-05 11:40:42 -04:00
+								            $replacement = '';
 								            if ($matchedPage && count($splitInclude) === 1) {
 								                // If we only have page id, just insert all page html and continue.
 								                $replacement = $matchedPage->html;
-												Removed old thai files, added romanian as lang option

Also applied styleci changes

											
										
										
											2022-09-06 12:41:32 -04:00
+								            } elseif ($matchedPage && count($splitInclude) > 1) {
-												Added "page_include_parse" theme event

For custom control of include tag parsing.

											
										
										
											2022-09-05 11:40:42 -04:00
+								                // Otherwise, if our include tag defines a section, load that specific content
 								                $innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]);
 								                $replacement = trim($innerContent);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								            }
-												Added "page_include_parse" theme event

For custom control of include tag parsing.

											
										
										
											2022-09-05 11:40:42 -04:00
+								            $themeReplacement = Theme::dispatch(
 								                ThemeEvents::PAGE_INCLUDE_PARSE,
 								                $includeId,
 								                $replacement,
 								                clone $this->page,
 								                $matchedPage ? (clone $matchedPage) : null,
 								            );
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
-												Added "page_include_parse" theme event

For custom control of include tag parsing.

											
										
										
											2022-09-05 11:40:42 -04:00
+								            // Perform the content replacement
 								            $html = str_replace($fullMatch, $themeReplacement ?? $replacement, $html);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								        }
 								        return $html;
 								    }
 								    /**
 								     * Fetch the content from a specific section of the given page.
 								     */
 								    protected function fetchSectionOfPage(Page $page, string $sectionId): string
 								    {
-												Updated page includes to be top-level for code blocks

This change means that code blocks are now included still wrapped in
their pre tags, as we do for tables and lists.
Previously the <code> inner content would be included which would lead
to a generally bad/broken presentation.

Hopefully should not be a breaking change as section include tags for
code was tricky to get to, and it was in a semi-broken state.

For #2406

											
										
										
											2021-11-14 19:48:05 -05:00
+								        $topLevelTags = ['table', 'ul', 'ol', 'pre'];
-												Fixed content parsing break with line html comment

Fixes issues thrown in custom HMTL head & page content filtering when
the content is comprised of only a single HTML comment.
Adds tests to cover.

For #2804

											
										
										
											2021-06-13 07:53:04 -04:00
+								        $doc = $this->loadDocumentFromHtml($page->html);
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
 								        // Search included content for the id given and blank out if not exists.
 								        $matchingElem = $doc->getElementById($sectionId);
 								        if ($matchingElem === null) {
 								            return '';
 								        }
 								        // Otherwise replace the content with the found content
 								        // Checks if the top-level wrapper should be included by matching on tag types
 								        $innerContent = '';
 								        $isTopLevel = in_array(strtolower($matchingElem->nodeName), $topLevelTags);
 								        if ($isTopLevel) {
 								            $innerContent .= $doc->saveHTML($matchingElem);
 								        } else {
 								            foreach ($matchingElem->childNodes as $childNode) {
 								                $innerContent .= $doc->saveHTML($childNode);
 								            }
 								        }
 								        libxml_clear_errors();
 								        return $innerContent;
 								    }
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
 								    /**
 								     * Create and load a DOMDocument from the given html content.
 								     */
 								    protected function loadDocumentFromHtml(string $html): DOMDocument
 								    {
 								        libxml_use_internal_errors(true);
 								        $doc = new DOMDocument();
-												Fixed old deprecated encoding convert on HTML doc load

											
										
										
											2023-02-23 17:59:26 -05:00
+								        $html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
 								        $doc->loadHTML($html);
-												Apply fixes from StyleCI

											
										
										
											2021-06-26 11:23:15 -04:00
-												Reviewed base64 image upload support

- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

											
										
										
											2021-06-02 16:34:34 -04:00
+								        return $doc;
 								    }
-												Entity Repo & Controller Refactor (#1690)

* Started mass-refactoring of the current entity repos

* Rewrote book tree logic

- Now does two simple queries instead of one really complex one.
- Extracted logic into its own class.
- Remove model-level akward union field listing.
- Logic now more readable than being large separate query and
compilation functions.

* Extracted and split book sort logic

* Finished up Book controller/repo organisation

* Refactored bookshelves controllers and repo parts

* Fixed issues found via phpunit

* Refactored Chapter controller

* Updated Chapter export controller

* Started Page controller/repo refactor

* Refactored another chunk of PageController

* Completed initial pagecontroller refactor pass

* Fixed tests and continued reduction of old repos

* Removed old page remove and further reduced entity repo

* Removed old entity repo, split out page controller

* Ran phpcbf and split out some page content methods

* Tidied up some EntityProvider elements

* Fixed issued caused by viewservice change

											
										
										
											2019-10-05 07:55:01 -04:00
+								}