From 344b3a3615f1f39f92ebae9c791dccc423baf61d Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Tue, 16 Aug 2022 13:23:53 +0100 Subject: [PATCH] Added system to extract model references from HTML content For the start of a managed cross-linking system. --- app/Util/CrossLinking/CrossLinkParser.php | 103 ++++++++++++++++++ .../ModelResolvers/BookLinkModelResolver.php | 26 +++++ .../BookshelfLinkModelResolver.php | 26 +++++ .../ChapterLinkModelResolver.php | 27 +++++ .../ModelResolvers/CrossLinkModelResolver.php | 13 +++ .../ModelResolvers/PageLinkModelResolver.php | 27 +++++ .../PagePermalinkModelResolver.php | 25 +++++ tests/Util/CrossLinkParserTest.php | 41 +++++++ 8 files changed, 288 insertions(+) create mode 100644 app/Util/CrossLinking/CrossLinkParser.php create mode 100644 app/Util/CrossLinking/ModelResolvers/BookLinkModelResolver.php create mode 100644 app/Util/CrossLinking/ModelResolvers/BookshelfLinkModelResolver.php create mode 100644 app/Util/CrossLinking/ModelResolvers/ChapterLinkModelResolver.php create mode 100644 app/Util/CrossLinking/ModelResolvers/CrossLinkModelResolver.php create mode 100644 app/Util/CrossLinking/ModelResolvers/PageLinkModelResolver.php create mode 100644 app/Util/CrossLinking/ModelResolvers/PagePermalinkModelResolver.php create mode 100644 tests/Util/CrossLinkParserTest.php diff --git a/app/Util/CrossLinking/CrossLinkParser.php b/app/Util/CrossLinking/CrossLinkParser.php new file mode 100644 index 000000000..774024d52 --- /dev/null +++ b/app/Util/CrossLinking/CrossLinkParser.php @@ -0,0 +1,103 @@ +modelResolvers = $modelResolvers; + } + + /** + * Extract any found models within the given HTML content. + * + * @returns Model[] + */ + public function extractLinkedModels(string $html): array + { + $models = []; + + $links = $this->getLinksFromContent($html); + + foreach ($links as $link) { + $model = $this->linkToModel($link); + if (!is_null($model)) { + $models[get_class($model) . ':' . $model->id] = $model; + } + } + + return array_values($models); + } + + /** + * Get a list of href values from the given document. + * + * @returns string[] + */ + protected function getLinksFromContent(string $html): array + { + $links = []; + + $html = '' . $html . ''; + libxml_use_internal_errors(true); + $doc = new DOMDocument(); + $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); + + $xPath = new DOMXPath($doc); + $anchors = $xPath->query('//a[@href]'); + + /** @var \DOMElement $anchor */ + foreach ($anchors as $anchor) { + $links[] = $anchor->getAttribute('href'); + } + + return $links; + } + + /** + * Attempt to resolve the given link to a model using the instance model resolvers. + */ + protected function linkToModel(string $link): ?Model + { + foreach ($this->modelResolvers as $resolver) { + $model = $resolver->resolve($link); + if (!is_null($model)) { + return $model; + } + } + + return null; + } + + /** + * Create a new instance with a pre-defined set of model resolvers, specifically for the + * default set of entities within BookStack. + */ + public static function createWithEntityResolvers(): self + { + return new static([ + new PagePermalinkModelResolver(), + new PageLinkModelResolver(), + new ChapterLinkModelResolver(), + new BookLinkModelResolver(), + new BookshelfLinkModelResolver(), + ]); + } + +} \ No newline at end of file diff --git a/app/Util/CrossLinking/ModelResolvers/BookLinkModelResolver.php b/app/Util/CrossLinking/ModelResolvers/BookLinkModelResolver.php new file mode 100644 index 000000000..f2ee284cd --- /dev/null +++ b/app/Util/CrossLinking/ModelResolvers/BookLinkModelResolver.php @@ -0,0 +1,26 @@ +where('slug', '=', $bookSlug)->first(); + + return $model; + } +} \ No newline at end of file diff --git a/app/Util/CrossLinking/ModelResolvers/BookshelfLinkModelResolver.php b/app/Util/CrossLinking/ModelResolvers/BookshelfLinkModelResolver.php new file mode 100644 index 000000000..53cb89e3f --- /dev/null +++ b/app/Util/CrossLinking/ModelResolvers/BookshelfLinkModelResolver.php @@ -0,0 +1,26 @@ +where('slug', '=', $shelfSlug)->first(); + + return $model; + } +} \ No newline at end of file diff --git a/app/Util/CrossLinking/ModelResolvers/ChapterLinkModelResolver.php b/app/Util/CrossLinking/ModelResolvers/ChapterLinkModelResolver.php new file mode 100644 index 000000000..55afd183c --- /dev/null +++ b/app/Util/CrossLinking/ModelResolvers/ChapterLinkModelResolver.php @@ -0,0 +1,27 @@ +whereSlugs($bookSlug, $chapterSlug)->first(); + + return $model; + } +} \ No newline at end of file diff --git a/app/Util/CrossLinking/ModelResolvers/CrossLinkModelResolver.php b/app/Util/CrossLinking/ModelResolvers/CrossLinkModelResolver.php new file mode 100644 index 000000000..073764c66 --- /dev/null +++ b/app/Util/CrossLinking/ModelResolvers/CrossLinkModelResolver.php @@ -0,0 +1,13 @@ +whereSlugs($bookSlug, $pageSlug)->first(); + + return $model; + } +} \ No newline at end of file diff --git a/app/Util/CrossLinking/ModelResolvers/PagePermalinkModelResolver.php b/app/Util/CrossLinking/ModelResolvers/PagePermalinkModelResolver.php new file mode 100644 index 000000000..9b31f5013 --- /dev/null +++ b/app/Util/CrossLinking/ModelResolvers/PagePermalinkModelResolver.php @@ -0,0 +1,25 @@ +find($id); + + return $model; + } +} \ No newline at end of file diff --git a/tests/Util/CrossLinkParserTest.php b/tests/Util/CrossLinkParserTest.php new file mode 100644 index 000000000..f8ad59db2 --- /dev/null +++ b/tests/Util/CrossLinkParserTest.php @@ -0,0 +1,41 @@ +getEachEntityType(); + $otherPage = Page::query()->where('id', '!=', $entities['page']->id)->first(); + + $html = ' +Page Permalink +Page Link +Chapter Link +Book Link +Shelf Link +Settings Link + '; + + $parser = CrossLinkParser::createWithEntityResolvers(); + $results = $parser->extractLinkedModels($html); + + $this->assertCount(5, $results); + $this->assertEquals(get_class($otherPage), get_class($results[0])); + $this->assertEquals($otherPage->id, $results[0]->id); + $this->assertEquals(get_class($entities['page']), get_class($results[1])); + $this->assertEquals($entities['page']->id, $results[1]->id); + $this->assertEquals(get_class($entities['chapter']), get_class($results[2])); + $this->assertEquals($entities['chapter']->id, $results[2]->id); + $this->assertEquals(get_class($entities['book']), get_class($results[3])); + $this->assertEquals($entities['book']->id, $results[3]->id); + $this->assertEquals(get_class($entities['bookshelf']), get_class($results[4])); + $this->assertEquals($entities['bookshelf']->id, $results[4]->id); + } +}