Merge pull request #3043 from BookStackApp/search_improvements_a

Search Engine Improvement
This commit is contained in:
Dan Brown 2021-11-13 15:13:29 +00:00 committed by GitHub
commit f2ee95ca03
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 811 additions and 149 deletions

View File

@ -6,6 +6,12 @@ use BookStack\Model;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Relations\MorphTo;
/**
* @property int $id
* @property string $name
* @property string $value
* @property int $order
*/
class Tag extends Model
{
use HasFactory;

View File

@ -2,6 +2,7 @@
namespace BookStack\Console\Commands;
use BookStack\Entities\Models\Entity;
use BookStack\Entities\Tools\SearchIndex;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
@ -22,6 +23,9 @@ class RegenerateSearch extends Command
*/
protected $description = 'Re-index all content for searching';
/**
* @var SearchIndex
*/
protected $searchIndex;
/**
@ -45,8 +49,13 @@ class RegenerateSearch extends Command
DB::setDefaultConnection($this->option('database'));
}
$this->searchIndex->indexAllEntities();
$this->searchIndex->indexAllEntities(function (Entity $model, int $processed, int $total) {
$this->info('Indexed ' . class_basename($model) . ' entries (' . $processed . '/' . $total . ')');
});
DB::setDefaultConnection($connection);
$this->comment('Search index regenerated');
$this->line('Search index regenerated!');
return static::SUCCESS;
}
}

View File

@ -24,7 +24,7 @@ class Book extends Entity implements HasCoverImage
{
use HasFactory;
public $searchFactor = 2;
public $searchFactor = 1.2;
protected $fillable = ['name', 'description'];
protected $hidden = ['restricted', 'pivot', 'image_id', 'deleted_at'];

View File

@ -13,7 +13,7 @@ class Bookshelf extends Entity implements HasCoverImage
protected $table = 'bookshelves';
public $searchFactor = 3;
public $searchFactor = 1.2;
protected $fillable = ['name', 'description', 'image_id'];

View File

@ -16,7 +16,7 @@ class Chapter extends BookChild
{
use HasFactory;
public $searchFactor = 1.3;
public $searchFactor = 1.2;
protected $fillable = ['name', 'description', 'priority', 'book_id'];
protected $hidden = ['restricted', 'pivot', 'deleted_at'];

View File

@ -238,20 +238,12 @@ abstract class Entity extends Model implements Sluggable, Favouritable, Viewable
return mb_substr($this->name, 0, $length - 3) . '...';
}
/**
* Get the body text of this entity.
*/
public function getText(): string
{
return $this->{$this->textField} ?? '';
}
/**
* Get an excerpt of this entity's descriptive content to the specified length.
*/
public function getExcerpt(int $length = 100): string
{
$text = $this->getText();
$text = $this->{$this->textField} ?? '';
if (mb_strlen($text) > $length) {
$text = mb_substr($text, 0, $length - 3) . '...';

View File

@ -3,13 +3,13 @@
namespace BookStack\Entities\Models;
use BookStack\Entities\Tools\PageContent;
use BookStack\Facades\Permissions;
use BookStack\Uploads\Attachment;
use Illuminate\Database\Eloquent\Builder;
use Illuminate\Database\Eloquent\Collection;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Relations\BelongsTo;
use Illuminate\Database\Eloquent\Relations\HasMany;
use Permissions;
/**
* Class Page.
@ -64,10 +64,8 @@ class Page extends BookChild
/**
* Check if this page has a chapter.
*
* @return bool
*/
public function hasChapter()
public function hasChapter(): bool
{
return $this->chapter()->count() > 0;
}

View File

@ -157,8 +157,8 @@ class PageRepo
*/
public function publishDraft(Page $draft, array $input): Page
{
$this->baseRepo->update($draft, $input);
$this->updateTemplateStatusAndContentFromInput($draft, $input);
$this->baseRepo->update($draft, $input);
$draft->draft = false;
$draft->revision_count = 1;

View File

@ -2,26 +2,31 @@
namespace BookStack\Entities\Tools;
use BookStack\Actions\Tag;
use BookStack\Entities\EntityProvider;
use BookStack\Entities\Models\Entity;
use BookStack\Entities\Models\Page;
use BookStack\Entities\Models\SearchTerm;
use DOMDocument;
use DOMNode;
use Illuminate\Support\Collection;
class SearchIndex
{
/**
* @var SearchTerm
* A list of delimiter characters used to break-up parsed content into terms for indexing.
*
* @var string
*/
protected $searchTerm;
public static $delimiters = " \n\t.,!?:;()[]{}<>`'\"";
/**
* @var EntityProvider
*/
protected $entityProvider;
public function __construct(SearchTerm $searchTerm, EntityProvider $entityProvider)
public function __construct(EntityProvider $entityProvider)
{
$this->searchTerm = $searchTerm;
$this->entityProvider = $entityProvider;
}
@ -31,14 +36,8 @@ class SearchIndex
public function indexEntity(Entity $entity)
{
$this->deleteEntityTerms($entity);
$nameTerms = $this->generateTermArrayFromText($entity->name, 5 * $entity->searchFactor);
$bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor);
$terms = array_merge($nameTerms, $bodyTerms);
foreach ($terms as $index => $term) {
$terms[$index]['entity_type'] = $entity->getMorphClass();
$terms[$index]['entity_id'] = $entity->id;
}
$this->searchTerm->newQuery()->insert($terms);
$terms = $this->entityToTermDataArray($entity);
SearchTerm::query()->insert($terms);
}
/**
@ -46,40 +45,54 @@ class SearchIndex
*
* @param Entity[] $entities
*/
protected function indexEntities(array $entities)
public function indexEntities(array $entities)
{
$terms = [];
foreach ($entities as $entity) {
$nameTerms = $this->generateTermArrayFromText($entity->name, 5 * $entity->searchFactor);
$bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor);
foreach (array_merge($nameTerms, $bodyTerms) as $term) {
$term['entity_id'] = $entity->id;
$term['entity_type'] = $entity->getMorphClass();
$terms[] = $term;
}
$entityTerms = $this->entityToTermDataArray($entity);
array_push($terms, ...$entityTerms);
}
$chunkedTerms = array_chunk($terms, 500);
foreach ($chunkedTerms as $termChunk) {
$this->searchTerm->newQuery()->insert($termChunk);
SearchTerm::query()->insert($termChunk);
}
}
/**
* Delete and re-index the terms for all entities in the system.
* Can take a callback which is used for reporting progress.
* Callback receives three arguments:
* - An instance of the model being processed
* - The number that have been processed so far.
* - The total number of that model to be processed.
*
* @param callable(Entity, int, int)|null $progressCallback
*/
public function indexAllEntities()
public function indexAllEntities(?callable $progressCallback = null)
{
$this->searchTerm->newQuery()->truncate();
SearchTerm::query()->truncate();
foreach ($this->entityProvider->all() as $entityModel) {
$selectFields = ['id', 'name', $entityModel->textField];
$indexContentField = $entityModel instanceof Page ? 'html' : 'description';
$selectFields = ['id', 'name', $indexContentField];
$total = $entityModel->newQuery()->withTrashed()->count();
$chunkSize = 250;
$processed = 0;
$chunkCallback = function (Collection $entities) use ($progressCallback, &$processed, $total, $chunkSize, $entityModel) {
$this->indexEntities($entities->all());
$processed = min($processed + $chunkSize, $total);
if (is_callable($progressCallback)) {
$progressCallback($entityModel, $processed, $total);
}
};
$entityModel->newQuery()
->withTrashed()
->select($selectFields)
->chunk(1000, function (Collection $entities) {
$this->indexEntities($entities->all());
});
->with(['tags:id,name,value,entity_id,entity_type'])
->chunk($chunkSize, $chunkCallback);
}
}
@ -92,12 +105,97 @@ class SearchIndex
}
/**
* Create a scored term array from the given text.
* Create a scored term array from the given text, where the keys are the terms
* and the values are their scores.
*
* @returns array<string, int>
*/
protected function generateTermArrayFromText(string $text, int $scoreAdjustment = 1): array
protected function generateTermScoreMapFromText(string $text, int $scoreAdjustment = 1): array
{
$termMap = $this->textToTermCountMap($text);
foreach ($termMap as $term => $count) {
$termMap[$term] = $count * $scoreAdjustment;
}
return $termMap;
}
/**
* Create a scored term array from the given HTML, where the keys are the terms
* and the values are their scores.
*
* @returns array<string, int>
*/
protected function generateTermScoreMapFromHtml(string $html): array
{
if (empty($html)) {
return [];
}
$scoresByTerm = [];
$elementScoreAdjustmentMap = [
'h1' => 10,
'h2' => 5,
'h3' => 4,
'h4' => 3,
'h5' => 2,
'h6' => 1.5,
];
$html = '<body>' . $html . '</body>';
libxml_use_internal_errors(true);
$doc = new DOMDocument();
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
$topElems = $doc->documentElement->childNodes->item(0)->childNodes;
/** @var DOMNode $child */
foreach ($topElems as $child) {
$nodeName = $child->nodeName;
$termCounts = $this->textToTermCountMap(trim($child->textContent));
foreach ($termCounts as $term => $count) {
$scoreChange = $count * ($elementScoreAdjustmentMap[$nodeName] ?? 1);
$scoresByTerm[$term] = ($scoresByTerm[$term] ?? 0) + $scoreChange;
}
}
return $scoresByTerm;
}
/**
* Create a scored term map from the given set of entity tags.
*
* @param Tag[] $tags
*
* @returns array<string, int>
*/
protected function generateTermScoreMapFromTags(array $tags): array
{
$scoreMap = [];
$names = [];
$values = [];
foreach ($tags as $tag) {
$names[] = $tag->name;
$values[] = $tag->value;
}
$nameMap = $this->generateTermScoreMapFromText(implode(' ', $names), 3);
$valueMap = $this->generateTermScoreMapFromText(implode(' ', $values), 5);
return $this->mergeTermScoreMaps($nameMap, $valueMap);
}
/**
* For the given text, return an array where the keys are the unique term words
* and the values are the frequency of that term.
*
* @returns array<string, int>
*/
protected function textToTermCountMap(string $text): array
{
$tokenMap = []; // {TextToken => OccurrenceCount}
$splitChars = " \n\t.,!?:;()[]{}<>`'\"";
$splitChars = static::$delimiters;
$token = strtok($text, $splitChars);
while ($token !== false) {
@ -108,14 +206,61 @@ class SearchIndex
$token = strtok($splitChars);
}
$terms = [];
foreach ($tokenMap as $token => $count) {
$terms[] = [
'term' => $token,
'score' => $count * $scoreAdjustment,
return $tokenMap;
}
/**
* For the given entity, Generate an array of term data details.
* Is the raw term data, not instances of SearchTerm models.
*
* @returns array{term: string, score: float, entity_id: int, entity_type: string}[]
*/
protected function entityToTermDataArray(Entity $entity): array
{
$nameTermsMap = $this->generateTermScoreMapFromText($entity->name, 40 * $entity->searchFactor);
$tagTermsMap = $this->generateTermScoreMapFromTags($entity->tags->all());
if ($entity instanceof Page) {
$bodyTermsMap = $this->generateTermScoreMapFromHtml($entity->html);
} else {
$bodyTermsMap = $this->generateTermScoreMapFromText($entity->description ?? '', $entity->searchFactor);
}
$mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap, $tagTermsMap);
$dataArray = [];
$entityId = $entity->id;
$entityType = $entity->getMorphClass();
foreach ($mergedScoreMap as $term => $score) {
$dataArray[] = [
'term' => $term,
'score' => $score,
'entity_type' => $entityType,
'entity_id' => $entityId,
];
}
return $terms;
return $dataArray;
}
/**
* For the given term data arrays, Merge their contents by term
* while combining any scores.
*
* @param array<string, int>[] ...$scoreMaps
*
* @returns array<string, int>
*/
protected function mergeTermScoreMaps(...$scoreMaps): array
{
$mergedMap = [];
foreach ($scoreMaps as $scoreMap) {
foreach ($scoreMap as $term => $score) {
$mergedMap[$term] = ($mergedMap[$term] ?? 0) + $score;
}
}
return $mergedMap;
}
}

View File

@ -57,15 +57,22 @@ class SearchOptions
$instance = new SearchOptions();
$inputs = $request->only(['search', 'types', 'filters', 'exact', 'tags']);
$instance->searches = explode(' ', $inputs['search'] ?? []);
$instance->exacts = array_filter($inputs['exact'] ?? []);
$parsedStandardTerms = static::parseStandardTermString($inputs['search'] ?? '');
$instance->searches = $parsedStandardTerms['terms'];
$instance->exacts = $parsedStandardTerms['exacts'];
array_push($instance->exacts, ...array_filter($inputs['exact'] ?? []));
$instance->tags = array_filter($inputs['tags'] ?? []);
foreach (($inputs['filters'] ?? []) as $filterKey => $filterVal) {
if (empty($filterVal)) {
continue;
}
$instance->filters[$filterKey] = $filterVal === 'true' ? '' : $filterVal;
}
if (isset($inputs['types']) && count($inputs['types']) < 4) {
$instance->filters['type'] = implode('|', $inputs['types']);
}
@ -102,11 +109,9 @@ class SearchOptions
}
// Parse standard terms
foreach (explode(' ', trim($searchString)) as $searchTerm) {
if ($searchTerm !== '') {
$terms['searches'][] = $searchTerm;
}
}
$parsedStandardTerms = static::parseStandardTermString($searchString);
array_push($terms['searches'], ...$parsedStandardTerms['terms']);
array_push($terms['exacts'], ...$parsedStandardTerms['exacts']);
// Split filter values out
$splitFilters = [];
@ -119,6 +124,33 @@ class SearchOptions
return $terms;
}
/**
* Parse a standard search term string into individual search terms and
* extract any exact terms searches to be made.
*
* @return array{terms: array<string>, exacts: array<string>}
*/
protected static function parseStandardTermString(string $termString): array
{
$terms = explode(' ', $termString);
$indexDelimiters = SearchIndex::$delimiters;
$parsed = [
'terms' => [],
'exacts' => [],
];
foreach ($terms as $searchTerm) {
if ($searchTerm === '') {
continue;
}
$parsedList = (strpbrk($searchTerm, $indexDelimiters) === false) ? 'terms' : 'exacts';
$parsed[$parsedList][] = $searchTerm;
}
return $parsed;
}
/**
* Encode this instance to a search string.
*/

View File

@ -0,0 +1,236 @@
<?php
namespace BookStack\Entities\Tools;
use BookStack\Actions\Tag;
use BookStack\Entities\Models\Entity;
use Illuminate\Support\HtmlString;
class SearchResultsFormatter
{
/**
* For the given array of entities, Prepare the models to be shown in search result
* output. This sets a series of additional attributes.
*
* @param Entity[] $results
*/
public function format(array $results, SearchOptions $options): void
{
foreach ($results as $result) {
$this->setSearchPreview($result, $options);
}
}
/**
* Update the given entity model to set attributes used for previews of the item
* primarily within search result lists.
*/
protected function setSearchPreview(Entity $entity, SearchOptions $options)
{
$textProperty = $entity->textField;
$textContent = $entity->$textProperty;
$terms = array_merge($options->exacts, $options->searches);
$originalContentByNewAttribute = [
'preview_name' => $entity->name,
'preview_content' => $textContent,
];
foreach ($originalContentByNewAttribute as $attributeName => $content) {
$targetLength = ($attributeName === 'preview_name') ? 0 : 260;
$matchRefs = $this->getMatchPositions($content, $terms);
$mergedRefs = $this->sortAndMergeMatchPositions($matchRefs);
$formatted = $this->formatTextUsingMatchPositions($mergedRefs, $content, $targetLength);
$entity->setAttribute($attributeName, new HtmlString($formatted));
}
$tags = $entity->relationLoaded('tags') ? $entity->tags->all() : [];
$this->highlightTagsContainingTerms($tags, $terms);
}
/**
* Highlight tags which match the given terms.
*
* @param Tag[] $tags
* @param string[] $terms
*/
protected function highlightTagsContainingTerms(array $tags, array $terms): void
{
foreach ($tags as $tag) {
$tagName = strtolower($tag->name);
$tagValue = strtolower($tag->value);
foreach ($terms as $term) {
$termLower = strtolower($term);
if (strpos($tagName, $termLower) !== false) {
$tag->setAttribute('highlight_name', true);
}
if (strpos($tagValue, $termLower) !== false) {
$tag->setAttribute('highlight_value', true);
}
}
}
}
/**
* Get positions of the given terms within the given text.
* Is in the array format of [int $startIndex => int $endIndex] where the indexes
* are positions within the provided text.
*
* @return array<int, int>
*/
protected function getMatchPositions(string $text, array $terms): array
{
$matchRefs = [];
$text = strtolower($text);
foreach ($terms as $term) {
$offset = 0;
$term = strtolower($term);
$pos = strpos($text, $term, $offset);
while ($pos !== false) {
$end = $pos + strlen($term);
$matchRefs[$pos] = $end;
$offset = $end;
$pos = strpos($text, $term, $offset);
}
}
return $matchRefs;
}
/**
* Sort the given match positions before merging them where they're
* adjacent or where they overlap.
*
* @param array<int, int> $matchPositions
*
* @return array<int, int>
*/
protected function sortAndMergeMatchPositions(array $matchPositions): array
{
ksort($matchPositions);
$mergedRefs = [];
$lastStart = 0;
$lastEnd = 0;
foreach ($matchPositions as $start => $end) {
if ($start > $lastEnd) {
$mergedRefs[$start] = $end;
$lastStart = $start;
$lastEnd = $end;
} elseif ($end > $lastEnd) {
$mergedRefs[$lastStart] = $end;
$lastEnd = $end;
}
}
return $mergedRefs;
}
/**
* Format the given original text, returning a version where terms are highlighted within.
* Returned content is in HTML text format.
* A given $targetLength of 0 asserts no target length limit.
*
* This is a complex function but written to be relatively efficient, going through the term matches in order
* so that we're only doing a one-time loop through of the matches. There is no further searching
* done within here.
*/
protected function formatTextUsingMatchPositions(array $matchPositions, string $originalText, int $targetLength): string
{
$maxEnd = strlen($originalText);
$fetchAll = ($targetLength === 0);
$contextLength = ($fetchAll ? 0 : 32);
$firstStart = null;
$lastEnd = 0;
$content = '';
$contentTextLength = 0;
if ($fetchAll) {
$targetLength = $maxEnd * 2;
}
foreach ($matchPositions as $start => $end) {
// Get our outer text ranges for the added context we want to show upon the result.
$contextStart = max($start - $contextLength, 0, $lastEnd);
$contextEnd = min($end + $contextLength, $maxEnd);
// Adjust the start if we're going to be touching the previous match.
$startDiff = $start - $lastEnd;
if ($startDiff < 0) {
$contextStart = $start;
// Trims off '$startDiff' number of characters to bring it back to the start
// if this current match zone.
$content = substr($content, 0, strlen($content) + $startDiff);
$contentTextLength += $startDiff;
}
// Add ellipsis between results
if (!$fetchAll && $contextStart !== 0 && $contextStart !== $start) {
$content .= ' ...';
$contentTextLength += 4;
} else if ($fetchAll) {
// Or fill in gap since the previous match
$fillLength = $contextStart - $lastEnd;
$content .= e(substr($originalText, $lastEnd, $fillLength));
$contentTextLength += $fillLength;
}
// Add our content including the bolded matching text
$content .= e(substr($originalText, $contextStart, $start - $contextStart));
$contentTextLength += $start - $contextStart;
$content .= '<strong>' . e(substr($originalText, $start, $end - $start)) . '</strong>';
$contentTextLength += $end - $start;
$content .= e(substr($originalText, $end, $contextEnd - $end));
$contentTextLength += $contextEnd - $end;
// Update our last end position
$lastEnd = $contextEnd;
// Update the first start position if it's not already been set
if (is_null($firstStart)) {
$firstStart = $contextStart;
}
// Stop if we're near our target
if ($contentTextLength >= $targetLength - 10) {
break;
}
}
// Just copy out the content if we haven't moved along anywhere.
if ($lastEnd === 0) {
$content = e(substr($originalText, 0, $targetLength));
$contentTextLength = $targetLength;
$lastEnd = $targetLength;
}
// Pad out the end if we're low
$remainder = $targetLength - $contentTextLength;
if ($remainder > 10) {
$padEndLength = min($maxEnd - $lastEnd, $remainder);
$content .= e(substr($originalText, $lastEnd, $padEndLength));
$lastEnd += $padEndLength;
$contentTextLength += $padEndLength;
}
// Pad out the start if we're still low
$remainder = $targetLength - $contentTextLength;
$firstStart = $firstStart ?: 0;
if (!$fetchAll && $remainder > 10 && $firstStart !== 0) {
$padStart = max(0, $firstStart - $remainder);
$content = ($padStart === 0 ? '' : '...') . e(substr($originalText, $padStart, $firstStart - $padStart)) . substr($content, 4);
}
// Add ellipsis if we're not at the end
if ($lastEnd < $maxEnd) {
$content .= '...';
}
return $content;
}
}

View File

@ -5,13 +5,18 @@ namespace BookStack\Entities\Tools;
use BookStack\Auth\Permissions\PermissionService;
use BookStack\Auth\User;
use BookStack\Entities\EntityProvider;
use BookStack\Entities\Models\BookChild;
use BookStack\Entities\Models\Entity;
use Illuminate\Database\Connection;
use BookStack\Entities\Models\Page;
use BookStack\Entities\Models\SearchTerm;
use Illuminate\Database\Eloquent\Builder as EloquentBuilder;
use Illuminate\Database\Eloquent\Collection as EloquentCollection;
use Illuminate\Database\Eloquent\Relations\BelongsTo;
use Illuminate\Database\Query\Builder;
use Illuminate\Database\Query\JoinClause;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Str;
use SplObjectStorage;
class SearchRunner
{
@ -20,11 +25,6 @@ class SearchRunner
*/
protected $entityProvider;
/**
* @var Connection
*/
protected $db;
/**
* @var PermissionService
*/
@ -37,11 +37,19 @@ class SearchRunner
*/
protected $queryOperators = ['<=', '>=', '=', '<', '>', 'like', '!='];
public function __construct(EntityProvider $entityProvider, Connection $db, PermissionService $permissionService)
/**
* Retain a cache of score adjusted terms for specific search options.
* From PHP>=8 this can be made into a WeakMap instead.
*
* @var SplObjectStorage
*/
protected $termAdjustmentCache;
public function __construct(EntityProvider $entityProvider, PermissionService $permissionService)
{
$this->entityProvider = $entityProvider;
$this->db = $db;
$this->permissionService = $permissionService;
$this->termAdjustmentCache = new SplObjectStorage();
}
/**
@ -69,16 +77,17 @@ class SearchRunner
continue;
}
$search = $this->searchEntityTable($searchOpts, $entityType, $page, $count, $action);
/** @var int $entityTotal */
$entityTotal = $this->searchEntityTable($searchOpts, $entityType, $page, $count, $action, true);
$entityModelInstance = $this->entityProvider->get($entityType);
$searchQuery = $this->buildQuery($searchOpts, $entityModelInstance, $action);
$entityTotal = $searchQuery->count();
$searchResults = $this->getPageOfDataFromQuery($searchQuery, $entityModelInstance, $page, $count);
if ($entityTotal > ($page * $count)) {
$hasMore = true;
}
$total += $entityTotal;
$results = $results->merge($search);
$results = $results->merge($searchResults);
}
return [
@ -103,7 +112,9 @@ class SearchRunner
if (!in_array($entityType, $entityTypes)) {
continue;
}
$search = $this->buildEntitySearchQuery($opts, $entityType)->where('book_id', '=', $bookId)->take(20)->get();
$entityModelInstance = $this->entityProvider->get($entityType);
$search = $this->buildQuery($opts, $entityModelInstance)->where('book_id', '=', $bookId)->take(20)->get();
$results = $results->merge($search);
}
@ -116,78 +127,199 @@ class SearchRunner
public function searchChapter(int $chapterId, string $searchString): Collection
{
$opts = SearchOptions::fromString($searchString);
$pages = $this->buildEntitySearchQuery($opts, 'page')->where('chapter_id', '=', $chapterId)->take(20)->get();
$entityModelInstance = $this->entityProvider->get('page');
$pages = $this->buildQuery($opts, $entityModelInstance)->where('chapter_id', '=', $chapterId)->take(20)->get();
return $pages->sortByDesc('score');
}
/**
* Search across a particular entity type.
* Setting getCount = true will return the total
* matching instead of the items themselves.
*
* @return \Illuminate\Database\Eloquent\Collection|int|static[]
* Get a page of result data from the given query based on the provided page parameters.
*/
protected function searchEntityTable(SearchOptions $searchOpts, string $entityType = 'page', int $page = 1, int $count = 20, string $action = 'view', bool $getCount = false)
protected function getPageOfDataFromQuery(EloquentBuilder $query, Entity $entityModelInstance, int $page = 1, int $count = 20): EloquentCollection
{
$query = $this->buildEntitySearchQuery($searchOpts, $entityType, $action);
if ($getCount) {
return $query->count();
$relations = ['tags'];
if ($entityModelInstance instanceof BookChild) {
$relations['book'] = function (BelongsTo $query) {
$query->visible();
};
}
$query = $query->skip(($page - 1) * $count)->take($count);
if ($entityModelInstance instanceof Page) {
$relations['chapter'] = function (BelongsTo $query) {
$query->visible();
};
}
return $query->get();
return $query->clone()
->with(array_filter($relations))
->skip(($page - 1) * $count)
->take($count)
->get();
}
/**
* Create a search query for an entity.
*/
protected function buildEntitySearchQuery(SearchOptions $searchOpts, string $entityType = 'page', string $action = 'view'): EloquentBuilder
protected function buildQuery(SearchOptions $searchOpts, Entity $entityModelInstance, string $action = 'view'): EloquentBuilder
{
$entity = $this->entityProvider->get($entityType);
$entitySelect = $entity->newQuery();
$entityQuery = $entityModelInstance->newQuery();
if ($entityModelInstance instanceof Page) {
$entityQuery->select($entityModelInstance::$listAttributes);
} else {
$entityQuery->select(['*']);
}
// Handle normal search terms
if (count($searchOpts->searches) > 0) {
$rawScoreSum = $this->db->raw('SUM(score) as score');
$subQuery = $this->db->table('search_terms')->select('entity_id', 'entity_type', $rawScoreSum);
$subQuery->where('entity_type', '=', $entity->getMorphClass());
$subQuery->where(function (Builder $query) use ($searchOpts) {
foreach ($searchOpts->searches as $inputTerm) {
$query->orWhere('term', 'like', $inputTerm . '%');
}
})->groupBy('entity_type', 'entity_id');
$entitySelect->join($this->db->raw('(' . $subQuery->toSql() . ') as s'), function (JoinClause $join) {
$join->on('id', '=', 'entity_id');
})->addSelect($entity->getTable() . '.*')
->selectRaw('s.score')
->orderBy('score', 'desc');
$entitySelect->mergeBindings($subQuery);
}
$this->applyTermSearch($entityQuery, $searchOpts, $entityModelInstance);
// Handle exact term matching
foreach ($searchOpts->exacts as $inputTerm) {
$entitySelect->where(function (EloquentBuilder $query) use ($inputTerm, $entity) {
$entityQuery->where(function (EloquentBuilder $query) use ($inputTerm, $entityModelInstance) {
$query->where('name', 'like', '%' . $inputTerm . '%')
->orWhere($entity->textField, 'like', '%' . $inputTerm . '%');
->orWhere($entityModelInstance->textField, 'like', '%' . $inputTerm . '%');
});
}
// Handle tag searches
foreach ($searchOpts->tags as $inputTerm) {
$this->applyTagSearch($entitySelect, $inputTerm);
$this->applyTagSearch($entityQuery, $inputTerm);
}
// Handle filters
foreach ($searchOpts->filters as $filterTerm => $filterValue) {
$functionName = Str::camel('filter_' . $filterTerm);
if (method_exists($this, $functionName)) {
$this->$functionName($entitySelect, $entity, $filterValue);
$this->$functionName($entityQuery, $entityModelInstance, $filterValue);
}
}
return $this->permissionService->enforceEntityRestrictions($entity, $entitySelect, $action);
return $this->permissionService->enforceEntityRestrictions($entityModelInstance, $entityQuery, $action);
}
/**
* For the given search query, apply the queries for handling the regular search terms.
*/
protected function applyTermSearch(EloquentBuilder $entityQuery, SearchOptions $options, Entity $entity): void
{
$terms = $options->searches;
if (count($terms) === 0) {
return;
}
$scoredTerms = $this->getTermAdjustments($options);
$scoreSelect = $this->selectForScoredTerms($scoredTerms);
$subQuery = DB::table('search_terms')->select([
'entity_id',
'entity_type',
DB::raw($scoreSelect['statement']),
]);
$subQuery->addBinding($scoreSelect['bindings'], 'select');
$subQuery->where('entity_type', '=', $entity->getMorphClass());
$subQuery->where(function (Builder $query) use ($terms) {
foreach ($terms as $inputTerm) {
$query->orWhere('term', 'like', $inputTerm . '%');
}
});
$subQuery->groupBy('entity_type', 'entity_id');
$entityQuery->joinSub($subQuery, 's', 'id', '=', 'entity_id');
$entityQuery->addSelect('s.score');
$entityQuery->orderBy('score', 'desc');
}
/**
* Create a select statement, with prepared bindings, for the given
* set of scored search terms.
*
* @param array<string, float> $scoredTerms
*
* @return array{statement: string, bindings: string[]}
*/
protected function selectForScoredTerms(array $scoredTerms): array
{
// Within this we walk backwards to create the chain of 'if' statements
// so that each previous statement is used in the 'else' condition of
// the next (earlier) to be built. We start at '0' to have no score
// on no match (Should never actually get to this case).
$ifChain = '0';
$bindings = [];
foreach ($scoredTerms as $term => $score) {
$ifChain = 'IF(term like ?, score * ' . (float) $score . ', ' . $ifChain . ')';
$bindings[] = $term . '%';
}
return [
'statement' => 'SUM(' . $ifChain . ') as score',
'bindings' => array_reverse($bindings),
];
}
/**
* For the terms in the given search options, query their popularity across all
* search terms then provide that back as score adjustment multiplier applicable
* for their rarity. Returns an array of float multipliers, keyed by term.
*
* @return array<string, float>
*/
protected function getTermAdjustments(SearchOptions $options): array
{
if (isset($this->termAdjustmentCache[$options])) {
return $this->termAdjustmentCache[$options];
}
$termQuery = SearchTerm::query()->toBase();
$whenStatements = [];
$whenBindings = [];
foreach ($options->searches as $term) {
$whenStatements[] = 'WHEN term LIKE ? THEN ?';
$whenBindings[] = $term . '%';
$whenBindings[] = $term;
$termQuery->orWhere('term', 'like', $term . '%');
}
$case = 'CASE ' . implode(' ', $whenStatements) . ' END';
$termQuery->selectRaw($case . ' as term', $whenBindings);
$termQuery->selectRaw('COUNT(*) as count');
$termQuery->groupByRaw($case, $whenBindings);
$termCounts = $termQuery->pluck('count', 'term')->toArray();
$adjusted = $this->rawTermCountsToAdjustments($termCounts);
$this->termAdjustmentCache[$options] = $adjusted;
return $this->termAdjustmentCache[$options];
}
/**
* Convert counts of terms into a relative-count normalised multiplier.
*
* @param array<string, int> $termCounts
*
* @return array<string, int>
*/
protected function rawTermCountsToAdjustments(array $termCounts): array
{
if (empty($termCounts)) {
return [];
}
$multipliers = [];
$max = max(array_values($termCounts));
foreach ($termCounts as $term => $count) {
$percent = round($count / $max, 5);
$multipliers[$term] = 1.3 - $percent;
}
return $multipliers;
}
/**
@ -238,44 +370,40 @@ class SearchRunner
/**
* Custom entity search filters.
*/
protected function filterUpdatedAfter(EloquentBuilder $query, Entity $model, $input)
protected function filterUpdatedAfter(EloquentBuilder $query, Entity $model, $input): void
{
try {
$date = date_create($input);
$query->where('updated_at', '>=', $date);
} catch (\Exception $e) {
return;
}
$query->where('updated_at', '>=', $date);
}
protected function filterUpdatedBefore(EloquentBuilder $query, Entity $model, $input)
protected function filterUpdatedBefore(EloquentBuilder $query, Entity $model, $input): void
{
try {
$date = date_create($input);
$query->where('updated_at', '<', $date);
} catch (\Exception $e) {
return;
}
$query->where('updated_at', '<', $date);
}
protected function filterCreatedAfter(EloquentBuilder $query, Entity $model, $input)
protected function filterCreatedAfter(EloquentBuilder $query, Entity $model, $input): void
{
try {
$date = date_create($input);
$query->where('created_at', '>=', $date);
} catch (\Exception $e) {
return;
}
$query->where('created_at', '>=', $date);
}
protected function filterCreatedBefore(EloquentBuilder $query, Entity $model, $input)
{
try {
$date = date_create($input);
$query->where('created_at', '<', $date);
} catch (\Exception $e) {
return;
}
$query->where('created_at', '<', $date);
}
protected function filterCreatedBy(EloquentBuilder $query, Entity $model, $input)
@ -352,9 +480,9 @@ class SearchRunner
*/
protected function sortByLastCommented(EloquentBuilder $query, Entity $model)
{
$commentsTable = $this->db->getTablePrefix() . 'comments';
$commentsTable = DB::getTablePrefix() . 'comments';
$morphClass = str_replace('\\', '\\\\', $model->getMorphClass());
$commentQuery = $this->db->raw('(SELECT c1.entity_id, c1.entity_type, c1.created_at as last_commented FROM ' . $commentsTable . ' c1 LEFT JOIN ' . $commentsTable . ' c2 ON (c1.entity_id = c2.entity_id AND c1.entity_type = c2.entity_type AND c1.created_at < c2.created_at) WHERE c1.entity_type = \'' . $morphClass . '\' AND c2.created_at IS NULL) as comments');
$commentQuery = DB::raw('(SELECT c1.entity_id, c1.entity_type, c1.created_at as last_commented FROM ' . $commentsTable . ' c1 LEFT JOIN ' . $commentsTable . ' c2 ON (c1.entity_id = c2.entity_id AND c1.entity_type = c2.entity_type AND c1.created_at < c2.created_at) WHERE c1.entity_type = \'' . $morphClass . '\' AND c2.created_at IS NULL) as comments');
$query->join($commentQuery, $model->getTable() . '.id', '=', 'comments.entity_id')->orderBy('last_commented', 'desc');
}

View File

@ -4,8 +4,8 @@ namespace BookStack\Http\Controllers;
use BookStack\Entities\Queries\Popular;
use BookStack\Entities\Tools\SearchOptions;
use BookStack\Entities\Tools\SearchResultsFormatter;
use BookStack\Entities\Tools\SearchRunner;
use BookStack\Entities\Tools\ShelfContext;
use BookStack\Entities\Tools\SiblingFetcher;
use Illuminate\Http\Request;
@ -14,18 +14,15 @@ class SearchController extends Controller
protected $searchRunner;
protected $entityContextManager;
public function __construct(
SearchRunner $searchRunner,
ShelfContext $entityContextManager
) {
public function __construct(SearchRunner $searchRunner)
{
$this->searchRunner = $searchRunner;
$this->entityContextManager = $entityContextManager;
}
/**
* Searches all entities.
*/
public function search(Request $request)
public function search(Request $request, SearchResultsFormatter $formatter)
{
$searchOpts = SearchOptions::fromRequest($request);
$fullSearchString = $searchOpts->toString();
@ -35,6 +32,7 @@ class SearchController extends Controller
$nextPageLink = url('/search?term=' . urlencode($fullSearchString) . '&page=' . ($page + 1));
$results = $this->searchRunner->searchEntities($searchOpts, 'all', $page, 20);
$formatter->format($results['results']->all(), $searchOpts);
return view('search.all', [
'entities' => $results['results'],

View File

@ -5,6 +5,7 @@ namespace Database\Seeders;
use BookStack\Auth\Permissions\PermissionService;
use BookStack\Auth\Role;
use BookStack\Auth\User;
use BookStack\Entities\Models\Book;
use BookStack\Entities\Models\Chapter;
use BookStack\Entities\Models\Page;
use BookStack\Entities\Tools\SearchIndex;
@ -25,12 +26,16 @@ class LargeContentSeeder extends Seeder
$editorRole = Role::getRole('editor');
$editorUser->attachRole($editorRole);
$largeBook = \BookStack\Entities\Models\Book::factory()->create(['name' => 'Large book' . Str::random(10), 'created_by' => $editorUser->id, 'updated_by' => $editorUser->id]);
/** @var Book $largeBook */
$largeBook = Book::factory()->create(['name' => 'Large book' . Str::random(10), 'created_by' => $editorUser->id, 'updated_by' => $editorUser->id]);
$pages = Page::factory()->count(200)->make(['created_by' => $editorUser->id, 'updated_by' => $editorUser->id]);
$chapters = Chapter::factory()->count(50)->make(['created_by' => $editorUser->id, 'updated_by' => $editorUser->id]);
$largeBook->pages()->saveMany($pages);
$largeBook->chapters()->saveMany($chapters);
app(PermissionService::class)->buildJointPermissions();
app(SearchIndex::class)->indexAllEntities();
$all = array_merge([$largeBook], array_values($pages->all()), array_values($chapters->all()));
app()->make(PermissionService::class)->buildJointPermissionsForEntity($largeBook);
app()->make(SearchIndex::class)->indexEntities($all);
}
}

View File

@ -262,6 +262,10 @@
}
}
.tag-name.highlight, .tag-value.highlight {
font-weight: bold;
}
.tag-list div:last-child .tag-item {
margin-bottom: 0;
}

View File

@ -2,7 +2,7 @@
<a href="{{ $entity->getUrl() }}" class="{{$type}} {{$type === 'page' && $entity->draft ? 'draft' : ''}} {{$classes ?? ''}} entity-list-item" data-entity-type="{{$type}}" data-entity-id="{{$entity->id}}">
<span role="presentation" class="icon text-{{$type}}">@icon($type)</span>
<div class="content">
<h4 class="entity-list-item-name break-text">{{ $entity->name }}</h4>
<h4 class="entity-list-item-name break-text">{{ $entity->preview_name ?? $entity->name }}</h4>
{{ $slot ?? '' }}
</div>
</a>

View File

@ -3,15 +3,15 @@
<div class="entity-item-snippet">
@if($showPath ?? false)
@if($entity->book_id)
@if($entity->relationLoaded('book') && $entity->book)
<span class="text-book">{{ $entity->book->getShortName(42) }}</span>
@if($entity->chapter_id)
@if($entity->relationLoaded('chapter') && $entity->chapter)
<span class="text-muted entity-list-item-path-sep">@icon('chevron-right')</span> <span class="text-chapter">{{ $entity->chapter->getShortName(42) }}</span>
@endif
@endif
@endif
<p class="text-muted break-text">{{ $entity->getExcerpt() }}</p>
<p class="text-muted break-text">{{ $entity->preview_content ?? $entity->getExcerpt() }}</p>
</div>
@if(($showTags ?? false) && $entity->tags->count() > 0)

View File

@ -1,9 +1,9 @@
<div class="tag-item primary-background-light" data-name="{{ $tag->name }}" data-value="{{ $tag->value }}">
@if($linked ?? true)
<div class="tag-name"><a href="{{ $tag->nameUrl() }}">@icon('tag'){{ $tag->name }}</a></div>
@if($tag->value) <div class="tag-value"><a href="{{ $tag->valueUrl() }}">{{$tag->value}}</a></div> @endif
<div class="tag-name {{ $tag->highlight_name ? 'highlight' : '' }}"><a href="{{ $tag->nameUrl() }}">@icon('tag'){{ $tag->name }}</a></div>
@if($tag->value) <div class="tag-value {{ $tag->highlight_value ? 'highlight' : '' }}"><a href="{{ $tag->valueUrl() }}">{{$tag->value}}</a></div> @endif
@else
<div class="tag-name"><span>@icon('tag'){{ $tag->name }}</span></div>
@if($tag->value) <div class="tag-value"><span>{{$tag->value}}</span></div> @endif
<div class="tag-name {{ $tag->highlight_name ? 'highlight' : '' }}"><span>@icon('tag'){{ $tag->name }}</span></div>
@if($tag->value) <div class="tag-value {{ $tag->highlight_value ? 'highlight' : '' }}"><span>{{$tag->value}}</span></div> @endif
@endif
</div>

View File

@ -18,15 +18,17 @@ class EntitySearchTest extends TestCase
$search = $this->asEditor()->get('/search?term=' . urlencode($page->name));
$search->assertSee('Search Results');
$search->assertSee($page->name);
$search->assertSeeText($page->name, true);
}
public function test_bookshelf_search()
{
$shelf = Bookshelf::first();
$search = $this->asEditor()->get('/search?term=' . urlencode(mb_substr($shelf->name, 0, 3)) . ' {type:bookshelf}');
$search->assertStatus(200);
$search->assertSee($shelf->name);
/** @var Bookshelf $shelf */
$shelf = Bookshelf::query()->first();
$search = $this->asEditor()->get('/search?term=' . urlencode($shelf->name) . ' {type:bookshelf}');
$search->assertSee('Search Results');
$search->assertSeeText($shelf->name, true);
}
public function test_invalid_page_search()
@ -118,6 +120,18 @@ class EntitySearchTest extends TestCase
$exactSearchB->assertStatus(200)->assertDontSee($page->name);
}
public function test_search_terms_with_delimiters_are_converted_to_exact_matches()
{
$this->asEditor();
$page = $this->newPage(['name' => 'Delimiter test', 'html' => '<p>1.1 2,2 3?3 4:4 5;5 (8) &lt;9&gt; "10" \'11\' `12`</p>']);
$terms = explode(' ', '1.1 2,2 3?3 4:4 5;5 (8) <9> "10" \'11\' `12`');
foreach ($terms as $term) {
$search = $this->get('/search?term=' . urlencode($term));
$search->assertSee($page->name);
}
}
public function test_search_filters()
{
$page = $this->newPage(['name' => 'My new test quaffleachits', 'html' => 'this is about an orange donkey danzorbhsing']);
@ -302,4 +316,99 @@ class EntitySearchTest extends TestCase
$search->assertSeeText($page->name);
$search->assertSee($page->getUrl());
}
public function test_search_ranks_common_words_lower()
{
$this->newPage(['name' => 'Test page A', 'html' => '<p>dog biscuit dog dog</p>']);
$this->newPage(['name' => 'Test page B', 'html' => '<p>cat biscuit</p>']);
$search = $this->asEditor()->get('/search?term=cat+dog+biscuit');
$search->assertElementContains('.entity-list > .page', 'Test page A', 1);
$search->assertElementContains('.entity-list > .page', 'Test page B', 2);
for ($i = 0; $i < 2; $i++) {
$this->newPage(['name' => 'Test page ' . $i, 'html' => '<p>dog</p>']);
}
$search = $this->asEditor()->get('/search?term=cat+dog+biscuit');
$search->assertElementContains('.entity-list > .page', 'Test page B', 1);
$search->assertElementContains('.entity-list > .page', 'Test page A', 2);
}
public function test_terms_in_headers_have_an_adjusted_index_score()
{
$page = $this->newPage(['name' => 'Test page A', 'html' => '
<p>TermA</p>
<h1>TermB <strong>TermNested</strong></h1>
<h2>TermC</h2>
<h3>TermD</h3>
<h4>TermE</h4>
<h5>TermF</h5>
<h6>TermG</h6>
']);
$scoreByTerm = $page->searchTerms()->pluck('score', 'term');
$this->assertEquals(1, $scoreByTerm->get('TermA'));
$this->assertEquals(10, $scoreByTerm->get('TermB'));
$this->assertEquals(10, $scoreByTerm->get('TermNested'));
$this->assertEquals(5, $scoreByTerm->get('TermC'));
$this->assertEquals(4, $scoreByTerm->get('TermD'));
$this->assertEquals(3, $scoreByTerm->get('TermE'));
$this->assertEquals(2, $scoreByTerm->get('TermF'));
// Is 1.5 but stored as integer, rounding up
$this->assertEquals(2, $scoreByTerm->get('TermG'));
}
public function test_name_and_content_terms_are_merged_to_single_score()
{
$page = $this->newPage(['name' => 'TermA', 'html' => '
<p>TermA</p>
']);
$scoreByTerm = $page->searchTerms()->pluck('score', 'term');
// Scores 40 for being in the name then 1 for being in the content
$this->assertEquals(41, $scoreByTerm->get('TermA'));
}
public function test_tag_names_and_values_are_indexed_for_search()
{
$page = $this->newPage(['name' => 'PageA', 'html' => '<p>content</p>', 'tags' => [
['name' => 'Animal', 'value' => 'MeowieCat'],
['name' => 'SuperImportant'],
]]);
$scoreByTerm = $page->searchTerms()->pluck('score', 'term');
$this->assertEquals(5, $scoreByTerm->get('MeowieCat'));
$this->assertEquals(3, $scoreByTerm->get('Animal'));
$this->assertEquals(3, $scoreByTerm->get('SuperImportant'));
}
public function test_matching_terms_in_search_results_are_highlighted()
{
$this->newPage(['name' => 'My Meowie Cat', 'html' => '<p>A superimportant page about meowieable animals</p>', 'tags' => [
['name' => 'Animal', 'value' => 'MeowieCat'],
['name' => 'SuperImportant'],
]]);
$search = $this->asEditor()->get('/search?term=SuperImportant+Meowie');
// Title
$search->assertSee('My <strong>Meowie</strong> Cat', false);
// Content
$search->assertSee('A <strong>superimportant</strong> page about <strong>meowie</strong>able animals', false);
// Tag name
$search->assertElementContains('.tag-name.highlight', 'SuperImportant');
// Tag value
$search->assertElementContains('.tag-value.highlight', 'MeowieCat');
}
public function test_html_entities_in_item_details_remains_escaped_in_search_results()
{
$this->newPage(['name' => 'My <cool> TestPageContent', 'html' => '<p>My supercool &lt;great&gt; TestPageContent page</p>']);
$search = $this->asEditor()->get('/search?term=TestPageContent');
$search->assertSee('My &lt;cool&gt; <strong>TestPageContent</strong>', false);
$search->assertSee('My supercool &lt;great&gt; <strong>TestPageContent</strong> page', false);
}
}