diff --git a/app/Actions/Tag.php b/app/Actions/Tag.php index db9328b7d..609c299ad 100644 --- a/app/Actions/Tag.php +++ b/app/Actions/Tag.php @@ -6,6 +6,12 @@ use BookStack\Model; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Relations\MorphTo; +/** + * @property int $id + * @property string $name + * @property string $value + * @property int $order + */ class Tag extends Model { use HasFactory; diff --git a/app/Console/Commands/RegenerateSearch.php b/app/Console/Commands/RegenerateSearch.php index 50e81a2b8..62ee88fc0 100644 --- a/app/Console/Commands/RegenerateSearch.php +++ b/app/Console/Commands/RegenerateSearch.php @@ -2,6 +2,7 @@ namespace BookStack\Console\Commands; +use BookStack\Entities\Models\Entity; use BookStack\Entities\Tools\SearchIndex; use Illuminate\Console\Command; use Illuminate\Support\Facades\DB; @@ -22,6 +23,9 @@ class RegenerateSearch extends Command */ protected $description = 'Re-index all content for searching'; + /** + * @var SearchIndex + */ protected $searchIndex; /** @@ -45,8 +49,13 @@ class RegenerateSearch extends Command DB::setDefaultConnection($this->option('database')); } - $this->searchIndex->indexAllEntities(); + $this->searchIndex->indexAllEntities(function (Entity $model, int $processed, int $total) { + $this->info('Indexed ' . class_basename($model) . ' entries (' . $processed . '/' . $total . ')'); + }); + DB::setDefaultConnection($connection); - $this->comment('Search index regenerated'); + $this->line('Search index regenerated!'); + + return static::SUCCESS; } } diff --git a/app/Entities/Models/Book.php b/app/Entities/Models/Book.php index 982df5c90..735d25a99 100644 --- a/app/Entities/Models/Book.php +++ b/app/Entities/Models/Book.php @@ -24,7 +24,7 @@ class Book extends Entity implements HasCoverImage { use HasFactory; - public $searchFactor = 2; + public $searchFactor = 1.2; protected $fillable = ['name', 'description']; protected $hidden = ['restricted', 'pivot', 'image_id', 'deleted_at']; diff --git a/app/Entities/Models/Bookshelf.php b/app/Entities/Models/Bookshelf.php index 8fe9dbe41..e4d9775b7 100644 --- a/app/Entities/Models/Bookshelf.php +++ b/app/Entities/Models/Bookshelf.php @@ -13,7 +13,7 @@ class Bookshelf extends Entity implements HasCoverImage protected $table = 'bookshelves'; - public $searchFactor = 3; + public $searchFactor = 1.2; protected $fillable = ['name', 'description', 'image_id']; diff --git a/app/Entities/Models/Chapter.php b/app/Entities/Models/Chapter.php index 0e2917af3..224ded935 100644 --- a/app/Entities/Models/Chapter.php +++ b/app/Entities/Models/Chapter.php @@ -16,7 +16,7 @@ class Chapter extends BookChild { use HasFactory; - public $searchFactor = 1.3; + public $searchFactor = 1.2; protected $fillable = ['name', 'description', 'priority', 'book_id']; protected $hidden = ['restricted', 'pivot', 'deleted_at']; diff --git a/app/Entities/Models/Entity.php b/app/Entities/Models/Entity.php index f5f9d91f0..4c4e55bb8 100644 --- a/app/Entities/Models/Entity.php +++ b/app/Entities/Models/Entity.php @@ -238,20 +238,12 @@ abstract class Entity extends Model implements Sluggable, Favouritable, Viewable return mb_substr($this->name, 0, $length - 3) . '...'; } - /** - * Get the body text of this entity. - */ - public function getText(): string - { - return $this->{$this->textField} ?? ''; - } - /** * Get an excerpt of this entity's descriptive content to the specified length. */ public function getExcerpt(int $length = 100): string { - $text = $this->getText(); + $text = $this->{$this->textField} ?? ''; if (mb_strlen($text) > $length) { $text = mb_substr($text, 0, $length - 3) . '...'; diff --git a/app/Entities/Models/Page.php b/app/Entities/Models/Page.php index 27d5dc6a4..c28b9a305 100644 --- a/app/Entities/Models/Page.php +++ b/app/Entities/Models/Page.php @@ -3,13 +3,13 @@ namespace BookStack\Entities\Models; use BookStack\Entities\Tools\PageContent; +use BookStack\Facades\Permissions; use BookStack\Uploads\Attachment; use Illuminate\Database\Eloquent\Builder; use Illuminate\Database\Eloquent\Collection; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Relations\BelongsTo; use Illuminate\Database\Eloquent\Relations\HasMany; -use Permissions; /** * Class Page. @@ -64,10 +64,8 @@ class Page extends BookChild /** * Check if this page has a chapter. - * - * @return bool */ - public function hasChapter() + public function hasChapter(): bool { return $this->chapter()->count() > 0; } diff --git a/app/Entities/Repos/PageRepo.php b/app/Entities/Repos/PageRepo.php index ffa06d459..98fe4ef55 100644 --- a/app/Entities/Repos/PageRepo.php +++ b/app/Entities/Repos/PageRepo.php @@ -157,8 +157,8 @@ class PageRepo */ public function publishDraft(Page $draft, array $input): Page { - $this->baseRepo->update($draft, $input); $this->updateTemplateStatusAndContentFromInput($draft, $input); + $this->baseRepo->update($draft, $input); $draft->draft = false; $draft->revision_count = 1; diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index cc0b32d6a..d748c1695 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -2,26 +2,31 @@ namespace BookStack\Entities\Tools; +use BookStack\Actions\Tag; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; +use BookStack\Entities\Models\Page; use BookStack\Entities\Models\SearchTerm; +use DOMDocument; +use DOMNode; use Illuminate\Support\Collection; class SearchIndex { /** - * @var SearchTerm + * A list of delimiter characters used to break-up parsed content into terms for indexing. + * + * @var string */ - protected $searchTerm; + public static $delimiters = " \n\t.,!?:;()[]{}<>`'\""; /** * @var EntityProvider */ protected $entityProvider; - public function __construct(SearchTerm $searchTerm, EntityProvider $entityProvider) + public function __construct(EntityProvider $entityProvider) { - $this->searchTerm = $searchTerm; $this->entityProvider = $entityProvider; } @@ -31,14 +36,8 @@ class SearchIndex public function indexEntity(Entity $entity) { $this->deleteEntityTerms($entity); - $nameTerms = $this->generateTermArrayFromText($entity->name, 5 * $entity->searchFactor); - $bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor); - $terms = array_merge($nameTerms, $bodyTerms); - foreach ($terms as $index => $term) { - $terms[$index]['entity_type'] = $entity->getMorphClass(); - $terms[$index]['entity_id'] = $entity->id; - } - $this->searchTerm->newQuery()->insert($terms); + $terms = $this->entityToTermDataArray($entity); + SearchTerm::query()->insert($terms); } /** @@ -46,40 +45,54 @@ class SearchIndex * * @param Entity[] $entities */ - protected function indexEntities(array $entities) + public function indexEntities(array $entities) { $terms = []; foreach ($entities as $entity) { - $nameTerms = $this->generateTermArrayFromText($entity->name, 5 * $entity->searchFactor); - $bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor); - foreach (array_merge($nameTerms, $bodyTerms) as $term) { - $term['entity_id'] = $entity->id; - $term['entity_type'] = $entity->getMorphClass(); - $terms[] = $term; - } + $entityTerms = $this->entityToTermDataArray($entity); + array_push($terms, ...$entityTerms); } $chunkedTerms = array_chunk($terms, 500); foreach ($chunkedTerms as $termChunk) { - $this->searchTerm->newQuery()->insert($termChunk); + SearchTerm::query()->insert($termChunk); } } /** * Delete and re-index the terms for all entities in the system. + * Can take a callback which is used for reporting progress. + * Callback receives three arguments: + * - An instance of the model being processed + * - The number that have been processed so far. + * - The total number of that model to be processed. + * + * @param callable(Entity, int, int)|null $progressCallback */ - public function indexAllEntities() + public function indexAllEntities(?callable $progressCallback = null) { - $this->searchTerm->newQuery()->truncate(); + SearchTerm::query()->truncate(); foreach ($this->entityProvider->all() as $entityModel) { - $selectFields = ['id', 'name', $entityModel->textField]; + $indexContentField = $entityModel instanceof Page ? 'html' : 'description'; + $selectFields = ['id', 'name', $indexContentField]; + $total = $entityModel->newQuery()->withTrashed()->count(); + $chunkSize = 250; + $processed = 0; + + $chunkCallback = function (Collection $entities) use ($progressCallback, &$processed, $total, $chunkSize, $entityModel) { + $this->indexEntities($entities->all()); + $processed = min($processed + $chunkSize, $total); + + if (is_callable($progressCallback)) { + $progressCallback($entityModel, $processed, $total); + } + }; + $entityModel->newQuery() - ->withTrashed() ->select($selectFields) - ->chunk(1000, function (Collection $entities) { - $this->indexEntities($entities->all()); - }); + ->with(['tags:id,name,value,entity_id,entity_type']) + ->chunk($chunkSize, $chunkCallback); } } @@ -92,12 +105,97 @@ class SearchIndex } /** - * Create a scored term array from the given text. + * Create a scored term array from the given text, where the keys are the terms + * and the values are their scores. + * + * @returns array */ - protected function generateTermArrayFromText(string $text, int $scoreAdjustment = 1): array + protected function generateTermScoreMapFromText(string $text, int $scoreAdjustment = 1): array + { + $termMap = $this->textToTermCountMap($text); + + foreach ($termMap as $term => $count) { + $termMap[$term] = $count * $scoreAdjustment; + } + + return $termMap; + } + + /** + * Create a scored term array from the given HTML, where the keys are the terms + * and the values are their scores. + * + * @returns array + */ + protected function generateTermScoreMapFromHtml(string $html): array + { + if (empty($html)) { + return []; + } + + $scoresByTerm = []; + $elementScoreAdjustmentMap = [ + 'h1' => 10, + 'h2' => 5, + 'h3' => 4, + 'h4' => 3, + 'h5' => 2, + 'h6' => 1.5, + ]; + + $html = '' . $html . ''; + libxml_use_internal_errors(true); + $doc = new DOMDocument(); + $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); + + $topElems = $doc->documentElement->childNodes->item(0)->childNodes; + /** @var DOMNode $child */ + foreach ($topElems as $child) { + $nodeName = $child->nodeName; + $termCounts = $this->textToTermCountMap(trim($child->textContent)); + foreach ($termCounts as $term => $count) { + $scoreChange = $count * ($elementScoreAdjustmentMap[$nodeName] ?? 1); + $scoresByTerm[$term] = ($scoresByTerm[$term] ?? 0) + $scoreChange; + } + } + + return $scoresByTerm; + } + + /** + * Create a scored term map from the given set of entity tags. + * + * @param Tag[] $tags + * + * @returns array + */ + protected function generateTermScoreMapFromTags(array $tags): array + { + $scoreMap = []; + $names = []; + $values = []; + + foreach ($tags as $tag) { + $names[] = $tag->name; + $values[] = $tag->value; + } + + $nameMap = $this->generateTermScoreMapFromText(implode(' ', $names), 3); + $valueMap = $this->generateTermScoreMapFromText(implode(' ', $values), 5); + + return $this->mergeTermScoreMaps($nameMap, $valueMap); + } + + /** + * For the given text, return an array where the keys are the unique term words + * and the values are the frequency of that term. + * + * @returns array + */ + protected function textToTermCountMap(string $text): array { $tokenMap = []; // {TextToken => OccurrenceCount} - $splitChars = " \n\t.,!?:;()[]{}<>`'\""; + $splitChars = static::$delimiters; $token = strtok($text, $splitChars); while ($token !== false) { @@ -108,14 +206,61 @@ class SearchIndex $token = strtok($splitChars); } - $terms = []; - foreach ($tokenMap as $token => $count) { - $terms[] = [ - 'term' => $token, - 'score' => $count * $scoreAdjustment, + return $tokenMap; + } + + /** + * For the given entity, Generate an array of term data details. + * Is the raw term data, not instances of SearchTerm models. + * + * @returns array{term: string, score: float, entity_id: int, entity_type: string}[] + */ + protected function entityToTermDataArray(Entity $entity): array + { + $nameTermsMap = $this->generateTermScoreMapFromText($entity->name, 40 * $entity->searchFactor); + $tagTermsMap = $this->generateTermScoreMapFromTags($entity->tags->all()); + + if ($entity instanceof Page) { + $bodyTermsMap = $this->generateTermScoreMapFromHtml($entity->html); + } else { + $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description ?? '', $entity->searchFactor); + } + + $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap, $tagTermsMap); + + $dataArray = []; + $entityId = $entity->id; + $entityType = $entity->getMorphClass(); + foreach ($mergedScoreMap as $term => $score) { + $dataArray[] = [ + 'term' => $term, + 'score' => $score, + 'entity_type' => $entityType, + 'entity_id' => $entityId, ]; } - return $terms; + return $dataArray; + } + + /** + * For the given term data arrays, Merge their contents by term + * while combining any scores. + * + * @param array[] ...$scoreMaps + * + * @returns array + */ + protected function mergeTermScoreMaps(...$scoreMaps): array + { + $mergedMap = []; + + foreach ($scoreMaps as $scoreMap) { + foreach ($scoreMap as $term => $score) { + $mergedMap[$term] = ($mergedMap[$term] ?? 0) + $score; + } + } + + return $mergedMap; } } diff --git a/app/Entities/Tools/SearchOptions.php b/app/Entities/Tools/SearchOptions.php index 39074fb38..99271058e 100644 --- a/app/Entities/Tools/SearchOptions.php +++ b/app/Entities/Tools/SearchOptions.php @@ -57,15 +57,22 @@ class SearchOptions $instance = new SearchOptions(); $inputs = $request->only(['search', 'types', 'filters', 'exact', 'tags']); - $instance->searches = explode(' ', $inputs['search'] ?? []); - $instance->exacts = array_filter($inputs['exact'] ?? []); + + $parsedStandardTerms = static::parseStandardTermString($inputs['search'] ?? ''); + $instance->searches = $parsedStandardTerms['terms']; + $instance->exacts = $parsedStandardTerms['exacts']; + + array_push($instance->exacts, ...array_filter($inputs['exact'] ?? [])); + $instance->tags = array_filter($inputs['tags'] ?? []); + foreach (($inputs['filters'] ?? []) as $filterKey => $filterVal) { if (empty($filterVal)) { continue; } $instance->filters[$filterKey] = $filterVal === 'true' ? '' : $filterVal; } + if (isset($inputs['types']) && count($inputs['types']) < 4) { $instance->filters['type'] = implode('|', $inputs['types']); } @@ -102,11 +109,9 @@ class SearchOptions } // Parse standard terms - foreach (explode(' ', trim($searchString)) as $searchTerm) { - if ($searchTerm !== '') { - $terms['searches'][] = $searchTerm; - } - } + $parsedStandardTerms = static::parseStandardTermString($searchString); + array_push($terms['searches'], ...$parsedStandardTerms['terms']); + array_push($terms['exacts'], ...$parsedStandardTerms['exacts']); // Split filter values out $splitFilters = []; @@ -119,6 +124,33 @@ class SearchOptions return $terms; } + /** + * Parse a standard search term string into individual search terms and + * extract any exact terms searches to be made. + * + * @return array{terms: array, exacts: array} + */ + protected static function parseStandardTermString(string $termString): array + { + $terms = explode(' ', $termString); + $indexDelimiters = SearchIndex::$delimiters; + $parsed = [ + 'terms' => [], + 'exacts' => [], + ]; + + foreach ($terms as $searchTerm) { + if ($searchTerm === '') { + continue; + } + + $parsedList = (strpbrk($searchTerm, $indexDelimiters) === false) ? 'terms' : 'exacts'; + $parsed[$parsedList][] = $searchTerm; + } + + return $parsed; + } + /** * Encode this instance to a search string. */ diff --git a/app/Entities/Tools/SearchResultsFormatter.php b/app/Entities/Tools/SearchResultsFormatter.php new file mode 100644 index 000000000..31a8f81c9 --- /dev/null +++ b/app/Entities/Tools/SearchResultsFormatter.php @@ -0,0 +1,236 @@ +setSearchPreview($result, $options); + } + } + + /** + * Update the given entity model to set attributes used for previews of the item + * primarily within search result lists. + */ + protected function setSearchPreview(Entity $entity, SearchOptions $options) + { + $textProperty = $entity->textField; + $textContent = $entity->$textProperty; + $terms = array_merge($options->exacts, $options->searches); + + $originalContentByNewAttribute = [ + 'preview_name' => $entity->name, + 'preview_content' => $textContent, + ]; + + foreach ($originalContentByNewAttribute as $attributeName => $content) { + $targetLength = ($attributeName === 'preview_name') ? 0 : 260; + $matchRefs = $this->getMatchPositions($content, $terms); + $mergedRefs = $this->sortAndMergeMatchPositions($matchRefs); + $formatted = $this->formatTextUsingMatchPositions($mergedRefs, $content, $targetLength); + $entity->setAttribute($attributeName, new HtmlString($formatted)); + } + + $tags = $entity->relationLoaded('tags') ? $entity->tags->all() : []; + $this->highlightTagsContainingTerms($tags, $terms); + } + + /** + * Highlight tags which match the given terms. + * + * @param Tag[] $tags + * @param string[] $terms + */ + protected function highlightTagsContainingTerms(array $tags, array $terms): void + { + foreach ($tags as $tag) { + $tagName = strtolower($tag->name); + $tagValue = strtolower($tag->value); + + foreach ($terms as $term) { + $termLower = strtolower($term); + + if (strpos($tagName, $termLower) !== false) { + $tag->setAttribute('highlight_name', true); + } + + if (strpos($tagValue, $termLower) !== false) { + $tag->setAttribute('highlight_value', true); + } + } + } + } + + /** + * Get positions of the given terms within the given text. + * Is in the array format of [int $startIndex => int $endIndex] where the indexes + * are positions within the provided text. + * + * @return array + */ + protected function getMatchPositions(string $text, array $terms): array + { + $matchRefs = []; + $text = strtolower($text); + + foreach ($terms as $term) { + $offset = 0; + $term = strtolower($term); + $pos = strpos($text, $term, $offset); + while ($pos !== false) { + $end = $pos + strlen($term); + $matchRefs[$pos] = $end; + $offset = $end; + $pos = strpos($text, $term, $offset); + } + } + + return $matchRefs; + } + + /** + * Sort the given match positions before merging them where they're + * adjacent or where they overlap. + * + * @param array $matchPositions + * + * @return array + */ + protected function sortAndMergeMatchPositions(array $matchPositions): array + { + ksort($matchPositions); + $mergedRefs = []; + $lastStart = 0; + $lastEnd = 0; + + foreach ($matchPositions as $start => $end) { + if ($start > $lastEnd) { + $mergedRefs[$start] = $end; + $lastStart = $start; + $lastEnd = $end; + } elseif ($end > $lastEnd) { + $mergedRefs[$lastStart] = $end; + $lastEnd = $end; + } + } + + return $mergedRefs; + } + + /** + * Format the given original text, returning a version where terms are highlighted within. + * Returned content is in HTML text format. + * A given $targetLength of 0 asserts no target length limit. + * + * This is a complex function but written to be relatively efficient, going through the term matches in order + * so that we're only doing a one-time loop through of the matches. There is no further searching + * done within here. + */ + protected function formatTextUsingMatchPositions(array $matchPositions, string $originalText, int $targetLength): string + { + $maxEnd = strlen($originalText); + $fetchAll = ($targetLength === 0); + $contextLength = ($fetchAll ? 0 : 32); + + $firstStart = null; + $lastEnd = 0; + $content = ''; + $contentTextLength = 0; + + if ($fetchAll) { + $targetLength = $maxEnd * 2; + } + + foreach ($matchPositions as $start => $end) { + // Get our outer text ranges for the added context we want to show upon the result. + $contextStart = max($start - $contextLength, 0, $lastEnd); + $contextEnd = min($end + $contextLength, $maxEnd); + + // Adjust the start if we're going to be touching the previous match. + $startDiff = $start - $lastEnd; + if ($startDiff < 0) { + $contextStart = $start; + // Trims off '$startDiff' number of characters to bring it back to the start + // if this current match zone. + $content = substr($content, 0, strlen($content) + $startDiff); + $contentTextLength += $startDiff; + } + + // Add ellipsis between results + if (!$fetchAll && $contextStart !== 0 && $contextStart !== $start) { + $content .= ' ...'; + $contentTextLength += 4; + } else if ($fetchAll) { + // Or fill in gap since the previous match + $fillLength = $contextStart - $lastEnd; + $content .= e(substr($originalText, $lastEnd, $fillLength)); + $contentTextLength += $fillLength; + } + + // Add our content including the bolded matching text + $content .= e(substr($originalText, $contextStart, $start - $contextStart)); + $contentTextLength += $start - $contextStart; + $content .= '' . e(substr($originalText, $start, $end - $start)) . ''; + $contentTextLength += $end - $start; + $content .= e(substr($originalText, $end, $contextEnd - $end)); + $contentTextLength += $contextEnd - $end; + + // Update our last end position + $lastEnd = $contextEnd; + + // Update the first start position if it's not already been set + if (is_null($firstStart)) { + $firstStart = $contextStart; + } + + // Stop if we're near our target + if ($contentTextLength >= $targetLength - 10) { + break; + } + } + + // Just copy out the content if we haven't moved along anywhere. + if ($lastEnd === 0) { + $content = e(substr($originalText, 0, $targetLength)); + $contentTextLength = $targetLength; + $lastEnd = $targetLength; + } + + // Pad out the end if we're low + $remainder = $targetLength - $contentTextLength; + if ($remainder > 10) { + $padEndLength = min($maxEnd - $lastEnd, $remainder); + $content .= e(substr($originalText, $lastEnd, $padEndLength)); + $lastEnd += $padEndLength; + $contentTextLength += $padEndLength; + } + + // Pad out the start if we're still low + $remainder = $targetLength - $contentTextLength; + $firstStart = $firstStart ?: 0; + if (!$fetchAll && $remainder > 10 && $firstStart !== 0) { + $padStart = max(0, $firstStart - $remainder); + $content = ($padStart === 0 ? '' : '...') . e(substr($originalText, $padStart, $firstStart - $padStart)) . substr($content, 4); + } + + // Add ellipsis if we're not at the end + if ($lastEnd < $maxEnd) { + $content .= '...'; + } + + return $content; + } +} diff --git a/app/Entities/Tools/SearchRunner.php b/app/Entities/Tools/SearchRunner.php index 223494d46..f6da871f4 100644 --- a/app/Entities/Tools/SearchRunner.php +++ b/app/Entities/Tools/SearchRunner.php @@ -5,13 +5,18 @@ namespace BookStack\Entities\Tools; use BookStack\Auth\Permissions\PermissionService; use BookStack\Auth\User; use BookStack\Entities\EntityProvider; +use BookStack\Entities\Models\BookChild; use BookStack\Entities\Models\Entity; -use Illuminate\Database\Connection; +use BookStack\Entities\Models\Page; +use BookStack\Entities\Models\SearchTerm; use Illuminate\Database\Eloquent\Builder as EloquentBuilder; +use Illuminate\Database\Eloquent\Collection as EloquentCollection; +use Illuminate\Database\Eloquent\Relations\BelongsTo; use Illuminate\Database\Query\Builder; -use Illuminate\Database\Query\JoinClause; use Illuminate\Support\Collection; +use Illuminate\Support\Facades\DB; use Illuminate\Support\Str; +use SplObjectStorage; class SearchRunner { @@ -20,11 +25,6 @@ class SearchRunner */ protected $entityProvider; - /** - * @var Connection - */ - protected $db; - /** * @var PermissionService */ @@ -37,11 +37,19 @@ class SearchRunner */ protected $queryOperators = ['<=', '>=', '=', '<', '>', 'like', '!=']; - public function __construct(EntityProvider $entityProvider, Connection $db, PermissionService $permissionService) + /** + * Retain a cache of score adjusted terms for specific search options. + * From PHP>=8 this can be made into a WeakMap instead. + * + * @var SplObjectStorage + */ + protected $termAdjustmentCache; + + public function __construct(EntityProvider $entityProvider, PermissionService $permissionService) { $this->entityProvider = $entityProvider; - $this->db = $db; $this->permissionService = $permissionService; + $this->termAdjustmentCache = new SplObjectStorage(); } /** @@ -69,16 +77,17 @@ class SearchRunner continue; } - $search = $this->searchEntityTable($searchOpts, $entityType, $page, $count, $action); - /** @var int $entityTotal */ - $entityTotal = $this->searchEntityTable($searchOpts, $entityType, $page, $count, $action, true); + $entityModelInstance = $this->entityProvider->get($entityType); + $searchQuery = $this->buildQuery($searchOpts, $entityModelInstance, $action); + $entityTotal = $searchQuery->count(); + $searchResults = $this->getPageOfDataFromQuery($searchQuery, $entityModelInstance, $page, $count); if ($entityTotal > ($page * $count)) { $hasMore = true; } $total += $entityTotal; - $results = $results->merge($search); + $results = $results->merge($searchResults); } return [ @@ -103,7 +112,9 @@ class SearchRunner if (!in_array($entityType, $entityTypes)) { continue; } - $search = $this->buildEntitySearchQuery($opts, $entityType)->where('book_id', '=', $bookId)->take(20)->get(); + + $entityModelInstance = $this->entityProvider->get($entityType); + $search = $this->buildQuery($opts, $entityModelInstance)->where('book_id', '=', $bookId)->take(20)->get(); $results = $results->merge($search); } @@ -116,78 +127,199 @@ class SearchRunner public function searchChapter(int $chapterId, string $searchString): Collection { $opts = SearchOptions::fromString($searchString); - $pages = $this->buildEntitySearchQuery($opts, 'page')->where('chapter_id', '=', $chapterId)->take(20)->get(); + $entityModelInstance = $this->entityProvider->get('page'); + $pages = $this->buildQuery($opts, $entityModelInstance)->where('chapter_id', '=', $chapterId)->take(20)->get(); return $pages->sortByDesc('score'); } /** - * Search across a particular entity type. - * Setting getCount = true will return the total - * matching instead of the items themselves. - * - * @return \Illuminate\Database\Eloquent\Collection|int|static[] + * Get a page of result data from the given query based on the provided page parameters. */ - protected function searchEntityTable(SearchOptions $searchOpts, string $entityType = 'page', int $page = 1, int $count = 20, string $action = 'view', bool $getCount = false) + protected function getPageOfDataFromQuery(EloquentBuilder $query, Entity $entityModelInstance, int $page = 1, int $count = 20): EloquentCollection { - $query = $this->buildEntitySearchQuery($searchOpts, $entityType, $action); - if ($getCount) { - return $query->count(); + $relations = ['tags']; + + if ($entityModelInstance instanceof BookChild) { + $relations['book'] = function (BelongsTo $query) { + $query->visible(); + }; } - $query = $query->skip(($page - 1) * $count)->take($count); + if ($entityModelInstance instanceof Page) { + $relations['chapter'] = function (BelongsTo $query) { + $query->visible(); + }; + } - return $query->get(); + return $query->clone() + ->with(array_filter($relations)) + ->skip(($page - 1) * $count) + ->take($count) + ->get(); } /** * Create a search query for an entity. */ - protected function buildEntitySearchQuery(SearchOptions $searchOpts, string $entityType = 'page', string $action = 'view'): EloquentBuilder + protected function buildQuery(SearchOptions $searchOpts, Entity $entityModelInstance, string $action = 'view'): EloquentBuilder { - $entity = $this->entityProvider->get($entityType); - $entitySelect = $entity->newQuery(); + $entityQuery = $entityModelInstance->newQuery(); + + if ($entityModelInstance instanceof Page) { + $entityQuery->select($entityModelInstance::$listAttributes); + } else { + $entityQuery->select(['*']); + } // Handle normal search terms - if (count($searchOpts->searches) > 0) { - $rawScoreSum = $this->db->raw('SUM(score) as score'); - $subQuery = $this->db->table('search_terms')->select('entity_id', 'entity_type', $rawScoreSum); - $subQuery->where('entity_type', '=', $entity->getMorphClass()); - $subQuery->where(function (Builder $query) use ($searchOpts) { - foreach ($searchOpts->searches as $inputTerm) { - $query->orWhere('term', 'like', $inputTerm . '%'); - } - })->groupBy('entity_type', 'entity_id'); - $entitySelect->join($this->db->raw('(' . $subQuery->toSql() . ') as s'), function (JoinClause $join) { - $join->on('id', '=', 'entity_id'); - })->addSelect($entity->getTable() . '.*') - ->selectRaw('s.score') - ->orderBy('score', 'desc'); - $entitySelect->mergeBindings($subQuery); - } + $this->applyTermSearch($entityQuery, $searchOpts, $entityModelInstance); // Handle exact term matching foreach ($searchOpts->exacts as $inputTerm) { - $entitySelect->where(function (EloquentBuilder $query) use ($inputTerm, $entity) { + $entityQuery->where(function (EloquentBuilder $query) use ($inputTerm, $entityModelInstance) { $query->where('name', 'like', '%' . $inputTerm . '%') - ->orWhere($entity->textField, 'like', '%' . $inputTerm . '%'); + ->orWhere($entityModelInstance->textField, 'like', '%' . $inputTerm . '%'); }); } // Handle tag searches foreach ($searchOpts->tags as $inputTerm) { - $this->applyTagSearch($entitySelect, $inputTerm); + $this->applyTagSearch($entityQuery, $inputTerm); } // Handle filters foreach ($searchOpts->filters as $filterTerm => $filterValue) { $functionName = Str::camel('filter_' . $filterTerm); if (method_exists($this, $functionName)) { - $this->$functionName($entitySelect, $entity, $filterValue); + $this->$functionName($entityQuery, $entityModelInstance, $filterValue); } } - return $this->permissionService->enforceEntityRestrictions($entity, $entitySelect, $action); + return $this->permissionService->enforceEntityRestrictions($entityModelInstance, $entityQuery, $action); + } + + /** + * For the given search query, apply the queries for handling the regular search terms. + */ + protected function applyTermSearch(EloquentBuilder $entityQuery, SearchOptions $options, Entity $entity): void + { + $terms = $options->searches; + if (count($terms) === 0) { + return; + } + + $scoredTerms = $this->getTermAdjustments($options); + $scoreSelect = $this->selectForScoredTerms($scoredTerms); + + $subQuery = DB::table('search_terms')->select([ + 'entity_id', + 'entity_type', + DB::raw($scoreSelect['statement']), + ]); + + $subQuery->addBinding($scoreSelect['bindings'], 'select'); + + $subQuery->where('entity_type', '=', $entity->getMorphClass()); + $subQuery->where(function (Builder $query) use ($terms) { + foreach ($terms as $inputTerm) { + $query->orWhere('term', 'like', $inputTerm . '%'); + } + }); + $subQuery->groupBy('entity_type', 'entity_id'); + + $entityQuery->joinSub($subQuery, 's', 'id', '=', 'entity_id'); + $entityQuery->addSelect('s.score'); + $entityQuery->orderBy('score', 'desc'); + } + + /** + * Create a select statement, with prepared bindings, for the given + * set of scored search terms. + * + * @param array $scoredTerms + * + * @return array{statement: string, bindings: string[]} + */ + protected function selectForScoredTerms(array $scoredTerms): array + { + // Within this we walk backwards to create the chain of 'if' statements + // so that each previous statement is used in the 'else' condition of + // the next (earlier) to be built. We start at '0' to have no score + // on no match (Should never actually get to this case). + $ifChain = '0'; + $bindings = []; + foreach ($scoredTerms as $term => $score) { + $ifChain = 'IF(term like ?, score * ' . (float) $score . ', ' . $ifChain . ')'; + $bindings[] = $term . '%'; + } + + return [ + 'statement' => 'SUM(' . $ifChain . ') as score', + 'bindings' => array_reverse($bindings), + ]; + } + + /** + * For the terms in the given search options, query their popularity across all + * search terms then provide that back as score adjustment multiplier applicable + * for their rarity. Returns an array of float multipliers, keyed by term. + * + * @return array + */ + protected function getTermAdjustments(SearchOptions $options): array + { + if (isset($this->termAdjustmentCache[$options])) { + return $this->termAdjustmentCache[$options]; + } + + $termQuery = SearchTerm::query()->toBase(); + $whenStatements = []; + $whenBindings = []; + + foreach ($options->searches as $term) { + $whenStatements[] = 'WHEN term LIKE ? THEN ?'; + $whenBindings[] = $term . '%'; + $whenBindings[] = $term; + + $termQuery->orWhere('term', 'like', $term . '%'); + } + + $case = 'CASE ' . implode(' ', $whenStatements) . ' END'; + $termQuery->selectRaw($case . ' as term', $whenBindings); + $termQuery->selectRaw('COUNT(*) as count'); + $termQuery->groupByRaw($case, $whenBindings); + + $termCounts = $termQuery->pluck('count', 'term')->toArray(); + $adjusted = $this->rawTermCountsToAdjustments($termCounts); + + $this->termAdjustmentCache[$options] = $adjusted; + + return $this->termAdjustmentCache[$options]; + } + + /** + * Convert counts of terms into a relative-count normalised multiplier. + * + * @param array $termCounts + * + * @return array + */ + protected function rawTermCountsToAdjustments(array $termCounts): array + { + if (empty($termCounts)) { + return []; + } + + $multipliers = []; + $max = max(array_values($termCounts)); + + foreach ($termCounts as $term => $count) { + $percent = round($count / $max, 5); + $multipliers[$term] = 1.3 - $percent; + } + + return $multipliers; } /** @@ -238,44 +370,40 @@ class SearchRunner /** * Custom entity search filters. */ - protected function filterUpdatedAfter(EloquentBuilder $query, Entity $model, $input) + protected function filterUpdatedAfter(EloquentBuilder $query, Entity $model, $input): void { try { $date = date_create($input); + $query->where('updated_at', '>=', $date); } catch (\Exception $e) { - return; } - $query->where('updated_at', '>=', $date); } - protected function filterUpdatedBefore(EloquentBuilder $query, Entity $model, $input) + protected function filterUpdatedBefore(EloquentBuilder $query, Entity $model, $input): void { try { $date = date_create($input); + $query->where('updated_at', '<', $date); } catch (\Exception $e) { - return; } - $query->where('updated_at', '<', $date); } - protected function filterCreatedAfter(EloquentBuilder $query, Entity $model, $input) + protected function filterCreatedAfter(EloquentBuilder $query, Entity $model, $input): void { try { $date = date_create($input); + $query->where('created_at', '>=', $date); } catch (\Exception $e) { - return; } - $query->where('created_at', '>=', $date); } protected function filterCreatedBefore(EloquentBuilder $query, Entity $model, $input) { try { $date = date_create($input); + $query->where('created_at', '<', $date); } catch (\Exception $e) { - return; } - $query->where('created_at', '<', $date); } protected function filterCreatedBy(EloquentBuilder $query, Entity $model, $input) @@ -352,9 +480,9 @@ class SearchRunner */ protected function sortByLastCommented(EloquentBuilder $query, Entity $model) { - $commentsTable = $this->db->getTablePrefix() . 'comments'; + $commentsTable = DB::getTablePrefix() . 'comments'; $morphClass = str_replace('\\', '\\\\', $model->getMorphClass()); - $commentQuery = $this->db->raw('(SELECT c1.entity_id, c1.entity_type, c1.created_at as last_commented FROM ' . $commentsTable . ' c1 LEFT JOIN ' . $commentsTable . ' c2 ON (c1.entity_id = c2.entity_id AND c1.entity_type = c2.entity_type AND c1.created_at < c2.created_at) WHERE c1.entity_type = \'' . $morphClass . '\' AND c2.created_at IS NULL) as comments'); + $commentQuery = DB::raw('(SELECT c1.entity_id, c1.entity_type, c1.created_at as last_commented FROM ' . $commentsTable . ' c1 LEFT JOIN ' . $commentsTable . ' c2 ON (c1.entity_id = c2.entity_id AND c1.entity_type = c2.entity_type AND c1.created_at < c2.created_at) WHERE c1.entity_type = \'' . $morphClass . '\' AND c2.created_at IS NULL) as comments'); $query->join($commentQuery, $model->getTable() . '.id', '=', 'comments.entity_id')->orderBy('last_commented', 'desc'); } diff --git a/app/Http/Controllers/SearchController.php b/app/Http/Controllers/SearchController.php index d12c23b5a..6b2be5a2d 100644 --- a/app/Http/Controllers/SearchController.php +++ b/app/Http/Controllers/SearchController.php @@ -4,8 +4,8 @@ namespace BookStack\Http\Controllers; use BookStack\Entities\Queries\Popular; use BookStack\Entities\Tools\SearchOptions; +use BookStack\Entities\Tools\SearchResultsFormatter; use BookStack\Entities\Tools\SearchRunner; -use BookStack\Entities\Tools\ShelfContext; use BookStack\Entities\Tools\SiblingFetcher; use Illuminate\Http\Request; @@ -14,18 +14,15 @@ class SearchController extends Controller protected $searchRunner; protected $entityContextManager; - public function __construct( - SearchRunner $searchRunner, - ShelfContext $entityContextManager - ) { + public function __construct(SearchRunner $searchRunner) + { $this->searchRunner = $searchRunner; - $this->entityContextManager = $entityContextManager; } /** * Searches all entities. */ - public function search(Request $request) + public function search(Request $request, SearchResultsFormatter $formatter) { $searchOpts = SearchOptions::fromRequest($request); $fullSearchString = $searchOpts->toString(); @@ -35,6 +32,7 @@ class SearchController extends Controller $nextPageLink = url('/search?term=' . urlencode($fullSearchString) . '&page=' . ($page + 1)); $results = $this->searchRunner->searchEntities($searchOpts, 'all', $page, 20); + $formatter->format($results['results']->all(), $searchOpts); return view('search.all', [ 'entities' => $results['results'], diff --git a/database/seeders/LargeContentSeeder.php b/database/seeders/LargeContentSeeder.php index 2fbf4a5c9..dd9165978 100644 --- a/database/seeders/LargeContentSeeder.php +++ b/database/seeders/LargeContentSeeder.php @@ -5,6 +5,7 @@ namespace Database\Seeders; use BookStack\Auth\Permissions\PermissionService; use BookStack\Auth\Role; use BookStack\Auth\User; +use BookStack\Entities\Models\Book; use BookStack\Entities\Models\Chapter; use BookStack\Entities\Models\Page; use BookStack\Entities\Tools\SearchIndex; @@ -25,12 +26,16 @@ class LargeContentSeeder extends Seeder $editorRole = Role::getRole('editor'); $editorUser->attachRole($editorRole); - $largeBook = \BookStack\Entities\Models\Book::factory()->create(['name' => 'Large book' . Str::random(10), 'created_by' => $editorUser->id, 'updated_by' => $editorUser->id]); + /** @var Book $largeBook */ + $largeBook = Book::factory()->create(['name' => 'Large book' . Str::random(10), 'created_by' => $editorUser->id, 'updated_by' => $editorUser->id]); $pages = Page::factory()->count(200)->make(['created_by' => $editorUser->id, 'updated_by' => $editorUser->id]); $chapters = Chapter::factory()->count(50)->make(['created_by' => $editorUser->id, 'updated_by' => $editorUser->id]); + $largeBook->pages()->saveMany($pages); $largeBook->chapters()->saveMany($chapters); - app(PermissionService::class)->buildJointPermissions(); - app(SearchIndex::class)->indexAllEntities(); + $all = array_merge([$largeBook], array_values($pages->all()), array_values($chapters->all())); + + app()->make(PermissionService::class)->buildJointPermissionsForEntity($largeBook); + app()->make(SearchIndex::class)->indexEntities($all); } } diff --git a/resources/sass/_blocks.scss b/resources/sass/_blocks.scss index ef03699f1..ae3e7a441 100644 --- a/resources/sass/_blocks.scss +++ b/resources/sass/_blocks.scss @@ -262,6 +262,10 @@ } } +.tag-name.highlight, .tag-value.highlight { + font-weight: bold; +} + .tag-list div:last-child .tag-item { margin-bottom: 0; } diff --git a/resources/views/entities/list-item-basic.blade.php b/resources/views/entities/list-item-basic.blade.php index 2ec4bee5c..398c33b93 100644 --- a/resources/views/entities/list-item-basic.blade.php +++ b/resources/views/entities/list-item-basic.blade.php @@ -2,7 +2,7 @@ @icon($type)
-

{{ $entity->name }}

+

{{ $entity->preview_name ?? $entity->name }}

{{ $slot ?? '' }}
\ No newline at end of file diff --git a/resources/views/entities/list-item.blade.php b/resources/views/entities/list-item.blade.php index 8b5eb20b0..aa4f6c1e8 100644 --- a/resources/views/entities/list-item.blade.php +++ b/resources/views/entities/list-item.blade.php @@ -3,15 +3,15 @@
@if($showPath ?? false) - @if($entity->book_id) + @if($entity->relationLoaded('book') && $entity->book) {{ $entity->book->getShortName(42) }} - @if($entity->chapter_id) + @if($entity->relationLoaded('chapter') && $entity->chapter) @icon('chevron-right') {{ $entity->chapter->getShortName(42) }} @endif @endif @endif -

{{ $entity->getExcerpt() }}

+

{{ $entity->preview_content ?? $entity->getExcerpt() }}

@if(($showTags ?? false) && $entity->tags->count() > 0) diff --git a/resources/views/entities/tag.blade.php b/resources/views/entities/tag.blade.php index 057c70921..de4750c13 100644 --- a/resources/views/entities/tag.blade.php +++ b/resources/views/entities/tag.blade.php @@ -1,9 +1,9 @@
@if($linked ?? true) - - @if($tag->value) @endif + + @if($tag->value) @endif @else -
@icon('tag'){{ $tag->name }}
- @if($tag->value)
{{$tag->value}}
@endif +
@icon('tag'){{ $tag->name }}
+ @if($tag->value)
{{$tag->value}}
@endif @endif
\ No newline at end of file diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index 8d2ef0fde..6929adc74 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -18,15 +18,17 @@ class EntitySearchTest extends TestCase $search = $this->asEditor()->get('/search?term=' . urlencode($page->name)); $search->assertSee('Search Results'); - $search->assertSee($page->name); + $search->assertSeeText($page->name, true); } public function test_bookshelf_search() { - $shelf = Bookshelf::first(); - $search = $this->asEditor()->get('/search?term=' . urlencode(mb_substr($shelf->name, 0, 3)) . ' {type:bookshelf}'); - $search->assertStatus(200); - $search->assertSee($shelf->name); + /** @var Bookshelf $shelf */ + $shelf = Bookshelf::query()->first(); + + $search = $this->asEditor()->get('/search?term=' . urlencode($shelf->name) . ' {type:bookshelf}'); + $search->assertSee('Search Results'); + $search->assertSeeText($shelf->name, true); } public function test_invalid_page_search() @@ -118,6 +120,18 @@ class EntitySearchTest extends TestCase $exactSearchB->assertStatus(200)->assertDontSee($page->name); } + public function test_search_terms_with_delimiters_are_converted_to_exact_matches() + { + $this->asEditor(); + $page = $this->newPage(['name' => 'Delimiter test', 'html' => '

1.1 2,2 3?3 4:4 5;5 (8) <9> "10" \'11\' `12`

']); + $terms = explode(' ', '1.1 2,2 3?3 4:4 5;5 (8) <9> "10" \'11\' `12`'); + + foreach ($terms as $term) { + $search = $this->get('/search?term=' . urlencode($term)); + $search->assertSee($page->name); + } + } + public function test_search_filters() { $page = $this->newPage(['name' => 'My new test quaffleachits', 'html' => 'this is about an orange donkey danzorbhsing']); @@ -302,4 +316,99 @@ class EntitySearchTest extends TestCase $search->assertSeeText($page->name); $search->assertSee($page->getUrl()); } + + public function test_search_ranks_common_words_lower() + { + $this->newPage(['name' => 'Test page A', 'html' => '

dog biscuit dog dog

']); + $this->newPage(['name' => 'Test page B', 'html' => '

cat biscuit

']); + + $search = $this->asEditor()->get('/search?term=cat+dog+biscuit'); + $search->assertElementContains('.entity-list > .page', 'Test page A', 1); + $search->assertElementContains('.entity-list > .page', 'Test page B', 2); + + for ($i = 0; $i < 2; $i++) { + $this->newPage(['name' => 'Test page ' . $i, 'html' => '

dog

']); + } + + $search = $this->asEditor()->get('/search?term=cat+dog+biscuit'); + $search->assertElementContains('.entity-list > .page', 'Test page B', 1); + $search->assertElementContains('.entity-list > .page', 'Test page A', 2); + } + + public function test_terms_in_headers_have_an_adjusted_index_score() + { + $page = $this->newPage(['name' => 'Test page A', 'html' => ' +

TermA

+

TermB TermNested

+

TermC

+

TermD

+

TermE

+
TermF
+
TermG
+ ']); + + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); + + $this->assertEquals(1, $scoreByTerm->get('TermA')); + $this->assertEquals(10, $scoreByTerm->get('TermB')); + $this->assertEquals(10, $scoreByTerm->get('TermNested')); + $this->assertEquals(5, $scoreByTerm->get('TermC')); + $this->assertEquals(4, $scoreByTerm->get('TermD')); + $this->assertEquals(3, $scoreByTerm->get('TermE')); + $this->assertEquals(2, $scoreByTerm->get('TermF')); + // Is 1.5 but stored as integer, rounding up + $this->assertEquals(2, $scoreByTerm->get('TermG')); + } + + public function test_name_and_content_terms_are_merged_to_single_score() + { + $page = $this->newPage(['name' => 'TermA', 'html' => ' +

TermA

+ ']); + + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); + + // Scores 40 for being in the name then 1 for being in the content + $this->assertEquals(41, $scoreByTerm->get('TermA')); + } + + public function test_tag_names_and_values_are_indexed_for_search() + { + $page = $this->newPage(['name' => 'PageA', 'html' => '

content

', 'tags' => [ + ['name' => 'Animal', 'value' => 'MeowieCat'], + ['name' => 'SuperImportant'], + ]]); + + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); + $this->assertEquals(5, $scoreByTerm->get('MeowieCat')); + $this->assertEquals(3, $scoreByTerm->get('Animal')); + $this->assertEquals(3, $scoreByTerm->get('SuperImportant')); + } + + public function test_matching_terms_in_search_results_are_highlighted() + { + $this->newPage(['name' => 'My Meowie Cat', 'html' => '

A superimportant page about meowieable animals

', 'tags' => [ + ['name' => 'Animal', 'value' => 'MeowieCat'], + ['name' => 'SuperImportant'], + ]]); + + $search = $this->asEditor()->get('/search?term=SuperImportant+Meowie'); + // Title + $search->assertSee('My Meowie Cat', false); + // Content + $search->assertSee('A superimportant page about meowieable animals', false); + // Tag name + $search->assertElementContains('.tag-name.highlight', 'SuperImportant'); + // Tag value + $search->assertElementContains('.tag-value.highlight', 'MeowieCat'); + } + + public function test_html_entities_in_item_details_remains_escaped_in_search_results() + { + $this->newPage(['name' => 'My TestPageContent', 'html' => '

My supercool <great> TestPageContent page

']); + + $search = $this->asEditor()->get('/search?term=TestPageContent'); + $search->assertSee('My <cool> TestPageContent', false); + $search->assertSee('My supercool <great> TestPageContent page', false); + } }