From e1b8fe45b0271e66adbcc06c7d75ddb1a80b4556 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 8 Nov 2021 11:04:27 +0000 Subject: [PATCH 01/22] Refactored search runner a little to be neater --- app/Entities/Tools/SearchRunner.php | 85 ++++++++++--------------- database/seeders/LargeContentSeeder.php | 10 ++- 2 files changed, 39 insertions(+), 56 deletions(-) diff --git a/app/Entities/Tools/SearchRunner.php b/app/Entities/Tools/SearchRunner.php index 223494d46..dc54649be 100644 --- a/app/Entities/Tools/SearchRunner.php +++ b/app/Entities/Tools/SearchRunner.php @@ -6,11 +6,12 @@ use BookStack\Auth\Permissions\PermissionService; use BookStack\Auth\User; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; -use Illuminate\Database\Connection; use Illuminate\Database\Eloquent\Builder as EloquentBuilder; +use Illuminate\Database\Eloquent\Collection as EloquentCollection; use Illuminate\Database\Query\Builder; use Illuminate\Database\Query\JoinClause; use Illuminate\Support\Collection; +use Illuminate\Support\Facades\DB; use Illuminate\Support\Str; class SearchRunner @@ -20,11 +21,6 @@ class SearchRunner */ protected $entityProvider; - /** - * @var Connection - */ - protected $db; - /** * @var PermissionService */ @@ -37,10 +33,9 @@ class SearchRunner */ protected $queryOperators = ['<=', '>=', '=', '<', '>', 'like', '!=']; - public function __construct(EntityProvider $entityProvider, Connection $db, PermissionService $permissionService) + public function __construct(EntityProvider $entityProvider, PermissionService $permissionService) { $this->entityProvider = $entityProvider; - $this->db = $db; $this->permissionService = $permissionService; } @@ -69,16 +64,16 @@ class SearchRunner continue; } - $search = $this->searchEntityTable($searchOpts, $entityType, $page, $count, $action); - /** @var int $entityTotal */ - $entityTotal = $this->searchEntityTable($searchOpts, $entityType, $page, $count, $action, true); + $searchQuery = $this->buildQuery($searchOpts, $entityType, $action); + $entityTotal = $searchQuery->count(); + $searchResults = $this->getPageOfDataFromQuery($searchQuery, $page, $count); if ($entityTotal > ($page * $count)) { $hasMore = true; } $total += $entityTotal; - $results = $results->merge($search); + $results = $results->merge($searchResults); } return [ @@ -103,7 +98,7 @@ class SearchRunner if (!in_array($entityType, $entityTypes)) { continue; } - $search = $this->buildEntitySearchQuery($opts, $entityType)->where('book_id', '=', $bookId)->take(20)->get(); + $search = $this->buildQuery($opts, $entityType)->where('book_id', '=', $bookId)->take(20)->get(); $results = $results->merge($search); } @@ -116,49 +111,41 @@ class SearchRunner public function searchChapter(int $chapterId, string $searchString): Collection { $opts = SearchOptions::fromString($searchString); - $pages = $this->buildEntitySearchQuery($opts, 'page')->where('chapter_id', '=', $chapterId)->take(20)->get(); + $pages = $this->buildQuery($opts, 'page')->where('chapter_id', '=', $chapterId)->take(20)->get(); return $pages->sortByDesc('score'); } /** - * Search across a particular entity type. - * Setting getCount = true will return the total - * matching instead of the items themselves. - * - * @return \Illuminate\Database\Eloquent\Collection|int|static[] + * Get a page of result data from the given query based on the provided page parameters. */ - protected function searchEntityTable(SearchOptions $searchOpts, string $entityType = 'page', int $page = 1, int $count = 20, string $action = 'view', bool $getCount = false) + protected function getPageOfDataFromQuery(EloquentBuilder $query, int $page = 1, int $count = 20): EloquentCollection { - $query = $this->buildEntitySearchQuery($searchOpts, $entityType, $action); - if ($getCount) { - return $query->count(); - } - - $query = $query->skip(($page - 1) * $count)->take($count); - - return $query->get(); + return $query->clone() + ->skip(($page - 1) * $count) + ->take($count) + ->get(); } /** * Create a search query for an entity. */ - protected function buildEntitySearchQuery(SearchOptions $searchOpts, string $entityType = 'page', string $action = 'view'): EloquentBuilder + protected function buildQuery(SearchOptions $searchOpts, string $entityType = 'page', string $action = 'view'): EloquentBuilder { $entity = $this->entityProvider->get($entityType); $entitySelect = $entity->newQuery(); // Handle normal search terms if (count($searchOpts->searches) > 0) { - $rawScoreSum = $this->db->raw('SUM(score) as score'); - $subQuery = $this->db->table('search_terms')->select('entity_id', 'entity_type', $rawScoreSum); + $rawScoreSum = DB::raw('SUM(score) as score'); + $subQuery = DB::table('search_terms')->select('entity_id', 'entity_type', $rawScoreSum); $subQuery->where('entity_type', '=', $entity->getMorphClass()); $subQuery->where(function (Builder $query) use ($searchOpts) { foreach ($searchOpts->searches as $inputTerm) { $query->orWhere('term', 'like', $inputTerm . '%'); } })->groupBy('entity_type', 'entity_id'); - $entitySelect->join($this->db->raw('(' . $subQuery->toSql() . ') as s'), function (JoinClause $join) { + $entitySelect->join(DB::raw('(' . $subQuery->toSql() . ') as s'), function (JoinClause $join) { $join->on('id', '=', 'entity_id'); })->addSelect($entity->getTable() . '.*') ->selectRaw('s.score') @@ -238,44 +225,36 @@ class SearchRunner /** * Custom entity search filters. */ - protected function filterUpdatedAfter(EloquentBuilder $query, Entity $model, $input) + protected function filterUpdatedAfter(EloquentBuilder $query, Entity $model, $input): void { try { $date = date_create($input); - } catch (\Exception $e) { - return; - } - $query->where('updated_at', '>=', $date); + $query->where('updated_at', '>=', $date); + } catch (\Exception $e) {} } - protected function filterUpdatedBefore(EloquentBuilder $query, Entity $model, $input) + protected function filterUpdatedBefore(EloquentBuilder $query, Entity $model, $input): void { try { $date = date_create($input); - } catch (\Exception $e) { - return; - } - $query->where('updated_at', '<', $date); + $query->where('updated_at', '<', $date); + } catch (\Exception $e) {} } - protected function filterCreatedAfter(EloquentBuilder $query, Entity $model, $input) + protected function filterCreatedAfter(EloquentBuilder $query, Entity $model, $input): void { try { $date = date_create($input); - } catch (\Exception $e) { - return; - } - $query->where('created_at', '>=', $date); + $query->where('created_at', '>=', $date); + } catch (\Exception $e) {} } protected function filterCreatedBefore(EloquentBuilder $query, Entity $model, $input) { try { $date = date_create($input); - } catch (\Exception $e) { - return; - } - $query->where('created_at', '<', $date); + $query->where('created_at', '<', $date); + } catch (\Exception $e) {} } protected function filterCreatedBy(EloquentBuilder $query, Entity $model, $input) @@ -352,9 +331,9 @@ class SearchRunner */ protected function sortByLastCommented(EloquentBuilder $query, Entity $model) { - $commentsTable = $this->db->getTablePrefix() . 'comments'; + $commentsTable = DB::getTablePrefix() . 'comments'; $morphClass = str_replace('\\', '\\\\', $model->getMorphClass()); - $commentQuery = $this->db->raw('(SELECT c1.entity_id, c1.entity_type, c1.created_at as last_commented FROM ' . $commentsTable . ' c1 LEFT JOIN ' . $commentsTable . ' c2 ON (c1.entity_id = c2.entity_id AND c1.entity_type = c2.entity_type AND c1.created_at < c2.created_at) WHERE c1.entity_type = \'' . $morphClass . '\' AND c2.created_at IS NULL) as comments'); + $commentQuery = DB::raw('(SELECT c1.entity_id, c1.entity_type, c1.created_at as last_commented FROM ' . $commentsTable . ' c1 LEFT JOIN ' . $commentsTable . ' c2 ON (c1.entity_id = c2.entity_id AND c1.entity_type = c2.entity_type AND c1.created_at < c2.created_at) WHERE c1.entity_type = \'' . $morphClass . '\' AND c2.created_at IS NULL) as comments'); $query->join($commentQuery, $model->getTable() . '.id', '=', 'comments.entity_id')->orderBy('last_commented', 'desc'); } diff --git a/database/seeders/LargeContentSeeder.php b/database/seeders/LargeContentSeeder.php index 2fbf4a5c9..e8b6d74f6 100644 --- a/database/seeders/LargeContentSeeder.php +++ b/database/seeders/LargeContentSeeder.php @@ -5,6 +5,7 @@ namespace Database\Seeders; use BookStack\Auth\Permissions\PermissionService; use BookStack\Auth\Role; use BookStack\Auth\User; +use BookStack\Entities\Models\Book; use BookStack\Entities\Models\Chapter; use BookStack\Entities\Models\Page; use BookStack\Entities\Tools\SearchIndex; @@ -25,12 +26,15 @@ class LargeContentSeeder extends Seeder $editorRole = Role::getRole('editor'); $editorUser->attachRole($editorRole); - $largeBook = \BookStack\Entities\Models\Book::factory()->create(['name' => 'Large book' . Str::random(10), 'created_by' => $editorUser->id, 'updated_by' => $editorUser->id]); + /** @var Book $largeBook */ + $largeBook = Book::factory()->create(['name' => 'Large book' . Str::random(10), 'created_by' => $editorUser->id, 'updated_by' => $editorUser->id]); $pages = Page::factory()->count(200)->make(['created_by' => $editorUser->id, 'updated_by' => $editorUser->id]); $chapters = Chapter::factory()->count(50)->make(['created_by' => $editorUser->id, 'updated_by' => $editorUser->id]); + $largeBook->pages()->saveMany($pages); $largeBook->chapters()->saveMany($chapters); - app(PermissionService::class)->buildJointPermissions(); - app(SearchIndex::class)->indexAllEntities(); + + app()->make(PermissionService::class)->buildJointPermissions(); + app()->make(SearchIndex::class)->indexAllEntities(); } } From 9e0164f4f45cb68f9dccf96db28c8b05ed493be7 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 8 Nov 2021 11:29:25 +0000 Subject: [PATCH 02/22] Further search system refactorings - Moved search term querying to its own method. - Updated Large content seeder to be more performant --- app/Entities/Tools/SearchIndex.php | 2 +- app/Entities/Tools/SearchRunner.php | 56 +++++++++++++++---------- database/seeders/LargeContentSeeder.php | 5 ++- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index cc0b32d6a..541f06994 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -46,7 +46,7 @@ class SearchIndex * * @param Entity[] $entities */ - protected function indexEntities(array $entities) + public function indexEntities(array $entities) { $terms = []; foreach ($entities as $entity) { diff --git a/app/Entities/Tools/SearchRunner.php b/app/Entities/Tools/SearchRunner.php index dc54649be..6296c70e7 100644 --- a/app/Entities/Tools/SearchRunner.php +++ b/app/Entities/Tools/SearchRunner.php @@ -133,29 +133,14 @@ class SearchRunner protected function buildQuery(SearchOptions $searchOpts, string $entityType = 'page', string $action = 'view'): EloquentBuilder { $entity = $this->entityProvider->get($entityType); - $entitySelect = $entity->newQuery(); + $entityQuery = $entity->newQuery(); // Handle normal search terms - if (count($searchOpts->searches) > 0) { - $rawScoreSum = DB::raw('SUM(score) as score'); - $subQuery = DB::table('search_terms')->select('entity_id', 'entity_type', $rawScoreSum); - $subQuery->where('entity_type', '=', $entity->getMorphClass()); - $subQuery->where(function (Builder $query) use ($searchOpts) { - foreach ($searchOpts->searches as $inputTerm) { - $query->orWhere('term', 'like', $inputTerm . '%'); - } - })->groupBy('entity_type', 'entity_id'); - $entitySelect->join(DB::raw('(' . $subQuery->toSql() . ') as s'), function (JoinClause $join) { - $join->on('id', '=', 'entity_id'); - })->addSelect($entity->getTable() . '.*') - ->selectRaw('s.score') - ->orderBy('score', 'desc'); - $entitySelect->mergeBindings($subQuery); - } + $this->applyTermSearch($entityQuery, $searchOpts->searches, $entity); // Handle exact term matching foreach ($searchOpts->exacts as $inputTerm) { - $entitySelect->where(function (EloquentBuilder $query) use ($inputTerm, $entity) { + $entityQuery->where(function (EloquentBuilder $query) use ($inputTerm, $entity) { $query->where('name', 'like', '%' . $inputTerm . '%') ->orWhere($entity->textField, 'like', '%' . $inputTerm . '%'); }); @@ -163,18 +148,47 @@ class SearchRunner // Handle tag searches foreach ($searchOpts->tags as $inputTerm) { - $this->applyTagSearch($entitySelect, $inputTerm); + $this->applyTagSearch($entityQuery, $inputTerm); } // Handle filters foreach ($searchOpts->filters as $filterTerm => $filterValue) { $functionName = Str::camel('filter_' . $filterTerm); if (method_exists($this, $functionName)) { - $this->$functionName($entitySelect, $entity, $filterValue); + $this->$functionName($entityQuery, $entity, $filterValue); } } - return $this->permissionService->enforceEntityRestrictions($entity, $entitySelect, $action); + return $this->permissionService->enforceEntityRestrictions($entity, $entityQuery, $action); + } + + /** + * For the given search query, apply the queries for handling the regular search terms. + */ + protected function applyTermSearch(EloquentBuilder $entityQuery, array $terms, Entity $entity): void + { + if (count($terms) === 0) { + return; + } + + $subQuery = DB::table('search_terms')->select([ + 'entity_id', + 'entity_type', + DB::raw('SUM(score) as score'), + ]); + + $subQuery->where('entity_type', '=', $entity->getMorphClass()); + $subQuery->where(function (Builder $query) use ($terms) { + foreach ($terms as $inputTerm) { + $query->orWhere('term', 'like', $inputTerm . '%'); + } + })->groupBy('entity_type', 'entity_id'); + $entityQuery->join(DB::raw('(' . $subQuery->toSql() . ') as s'), function (JoinClause $join) { + $join->on('id', '=', 'entity_id'); + })->addSelect($entity->getTable() . '.*') + ->selectRaw('s.score') + ->orderBy('score', 'desc'); + $entityQuery->mergeBindings($subQuery); } /** diff --git a/database/seeders/LargeContentSeeder.php b/database/seeders/LargeContentSeeder.php index e8b6d74f6..dd9165978 100644 --- a/database/seeders/LargeContentSeeder.php +++ b/database/seeders/LargeContentSeeder.php @@ -33,8 +33,9 @@ class LargeContentSeeder extends Seeder $largeBook->pages()->saveMany($pages); $largeBook->chapters()->saveMany($chapters); + $all = array_merge([$largeBook], array_values($pages->all()), array_values($chapters->all())); - app()->make(PermissionService::class)->buildJointPermissions(); - app()->make(SearchIndex::class)->indexAllEntities(); + app()->make(PermissionService::class)->buildJointPermissionsForEntity($largeBook); + app()->make(SearchIndex::class)->indexEntities($all); } } From b0b6f466c18f88ba0474778624195e1719c82532 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 8 Nov 2021 11:41:14 +0000 Subject: [PATCH 03/22] Reduced data retreived from database on page search --- app/Entities/Tools/SearchRunner.php | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/app/Entities/Tools/SearchRunner.php b/app/Entities/Tools/SearchRunner.php index 6296c70e7..aa129eb3c 100644 --- a/app/Entities/Tools/SearchRunner.php +++ b/app/Entities/Tools/SearchRunner.php @@ -6,6 +6,7 @@ use BookStack\Auth\Permissions\PermissionService; use BookStack\Auth\User; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; +use BookStack\Entities\Models\Page; use Illuminate\Database\Eloquent\Builder as EloquentBuilder; use Illuminate\Database\Eloquent\Collection as EloquentCollection; use Illuminate\Database\Query\Builder; @@ -135,6 +136,10 @@ class SearchRunner $entity = $this->entityProvider->get($entityType); $entityQuery = $entity->newQuery(); + if ($entity instanceof Page) { + $entityQuery->select($entity::$listAttributes); + } + // Handle normal search terms $this->applyTermSearch($entityQuery, $searchOpts->searches, $entity); @@ -178,16 +183,19 @@ class SearchRunner ]); $subQuery->where('entity_type', '=', $entity->getMorphClass()); + $subQuery->where(function (Builder $query) use ($terms) { foreach ($terms as $inputTerm) { $query->orWhere('term', 'like', $inputTerm . '%'); } })->groupBy('entity_type', 'entity_id'); + $entityQuery->join(DB::raw('(' . $subQuery->toSql() . ') as s'), function (JoinClause $join) { - $join->on('id', '=', 'entity_id'); - })->addSelect($entity->getTable() . '.*') - ->selectRaw('s.score') + $join->on('id', '=', 'entity_id'); + }) + ->addSelect(DB::raw('s.score')) ->orderBy('score', 'desc'); + $entityQuery->mergeBindings($subQuery); } From 7405613f8d800999713f14f125bacd1132e14818 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 8 Nov 2021 14:12:40 +0000 Subject: [PATCH 04/22] Added search term score popularity adjustment Adds adjustment of search term 'score' (Using in result ranking) so that a relative 0.3 to 1.3 mulitplier is applied based upon relative popularity within the whole database. At this point the term popularity is still done via a prefix match against the search term. Uses a SUM(IF(cond, a, IF(cond, a, ...))) chain to produce the scoring result in the select query. --- app/Entities/Tools/SearchRunner.php | 110 +++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 12 deletions(-) diff --git a/app/Entities/Tools/SearchRunner.php b/app/Entities/Tools/SearchRunner.php index aa129eb3c..3dd0e6b6e 100644 --- a/app/Entities/Tools/SearchRunner.php +++ b/app/Entities/Tools/SearchRunner.php @@ -7,13 +7,14 @@ use BookStack\Auth\User; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; use BookStack\Entities\Models\Page; +use BookStack\Entities\Models\SearchTerm; use Illuminate\Database\Eloquent\Builder as EloquentBuilder; use Illuminate\Database\Eloquent\Collection as EloquentCollection; use Illuminate\Database\Query\Builder; -use Illuminate\Database\Query\JoinClause; use Illuminate\Support\Collection; use Illuminate\Support\Facades\DB; use Illuminate\Support\Str; +use SplObjectStorage; class SearchRunner { @@ -34,10 +35,19 @@ class SearchRunner */ protected $queryOperators = ['<=', '>=', '=', '<', '>', 'like', '!=']; + /** + * Retain a cache of score adjusted terms for specific search options. + * From PHP>=8 this can be made into a WeakMap instead. + * + * @var SplObjectStorage + */ + protected $termAdjustmentCache; + public function __construct(EntityProvider $entityProvider, PermissionService $permissionService) { $this->entityProvider = $entityProvider; $this->permissionService = $permissionService; + $this->termAdjustmentCache = new SplObjectStorage(); } /** @@ -138,10 +148,12 @@ class SearchRunner if ($entity instanceof Page) { $entityQuery->select($entity::$listAttributes); + } else { + $entityQuery->select(['*']); } // Handle normal search terms - $this->applyTermSearch($entityQuery, $searchOpts->searches, $entity); + $this->applyTermSearch($entityQuery, $searchOpts, $entity); // Handle exact term matching foreach ($searchOpts->exacts as $inputTerm) { @@ -170,33 +182,107 @@ class SearchRunner /** * For the given search query, apply the queries for handling the regular search terms. */ - protected function applyTermSearch(EloquentBuilder $entityQuery, array $terms, Entity $entity): void + protected function applyTermSearch(EloquentBuilder $entityQuery, SearchOptions $options, Entity $entity): void { + $terms = $options->searches; if (count($terms) === 0) { return; } + $scoredTerms = $this->getTermAdjustments($options); + $scoreSelect = $this->selectForScoredTerms($scoredTerms); + $subQuery = DB::table('search_terms')->select([ 'entity_id', 'entity_type', - DB::raw('SUM(score) as score'), + DB::raw($scoreSelect['statement']), ]); - $subQuery->where('entity_type', '=', $entity->getMorphClass()); + $subQuery->addBinding($scoreSelect['bindings'], 'select'); + $subQuery->where('entity_type', '=', $entity->getMorphClass()); $subQuery->where(function (Builder $query) use ($terms) { foreach ($terms as $inputTerm) { $query->orWhere('term', 'like', $inputTerm . '%'); } - })->groupBy('entity_type', 'entity_id'); + }); + $subQuery->groupBy('entity_type', 'entity_id'); - $entityQuery->join(DB::raw('(' . $subQuery->toSql() . ') as s'), function (JoinClause $join) { - $join->on('id', '=', 'entity_id'); - }) - ->addSelect(DB::raw('s.score')) - ->orderBy('score', 'desc'); + $entityQuery->joinSub($subQuery, 's', 'id', '=', 'entity_id'); + $entityQuery->addSelect('s.score'); + $entityQuery->orderBy('score', 'desc'); + } - $entityQuery->mergeBindings($subQuery); + /** + * Create a select statement, with prepared bindings, for the given + * set of scored search terms. + * @return array{statement: string, bindings: string[]} + */ + protected function selectForScoredTerms(array $scoredTerms): array + { + // Within this we walk backwards to create the chain of 'if' statements + // so that each previous statement is used in the 'else' condition of + // the next (earlier) to be built. We start at '0' to have no score + // on no match (Should never actually get to this case). + $ifChain = '0'; + $bindings = []; + foreach ($scoredTerms as $term => $score) { + $ifChain = 'IF(term like ?, score * ' . (float)$score . ', ' . $ifChain . ')'; + $bindings[] = $term . '%'; + } + + return [ + 'statement' => 'SUM(' . $ifChain . ') as score', + 'bindings' => array_reverse($bindings), + ]; + } + + protected function getTermAdjustments(SearchOptions $options): array + { + if (isset($this->termAdjustmentCache[$options])) { + return $this->termAdjustmentCache[$options]; + } + + $termQuery = SearchTerm::query()->toBase(); + $whenStatements = []; + $whenBindings = []; + + foreach ($options->searches as $term) { + $whenStatements[] = 'WHEN term LIKE ? THEN ?'; + $whenBindings[] = $term . '%'; + $whenBindings[] = $term; + + $termQuery->orWhere('term', 'like', $term . '%'); + } + + $case = 'CASE ' . implode(' ', $whenStatements) . ' END'; + $termQuery->selectRaw( $case . ' as term', $whenBindings); + $termQuery->selectRaw('COUNT(*) as count'); + $termQuery->groupByRaw($case, $whenBindings); + + $termCounts = $termQuery->get()->pluck('count', 'term')->toArray(); + $adjusted = $this->rawTermCountsToAdjustments($termCounts); + + $this->termAdjustmentCache[$options] = $adjusted; + return $this->termAdjustmentCache[$options]; + } + + /** + * Convert counts of terms into a relative-count normalised multiplier. + * @param array $termCounts + * @return array + */ + protected function rawTermCountsToAdjustments(array $termCounts): array + { + $multipliers = []; + $max = max(array_values($termCounts)); + + foreach ($termCounts as $term => $count) { + $percent = round($count / $max, 5); + $multipliers[$term] = 1.3 - $percent; + } + + return $multipliers; } /** From b3e1c7da73a5a5279b84d16b2efd170f4b7702f9 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 8 Nov 2021 15:00:47 +0000 Subject: [PATCH 05/22] Applied styleci fixes and pluck improvement as per larastan --- app/Entities/Tools/SearchRunner.php | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/app/Entities/Tools/SearchRunner.php b/app/Entities/Tools/SearchRunner.php index 3dd0e6b6e..a251fbc4d 100644 --- a/app/Entities/Tools/SearchRunner.php +++ b/app/Entities/Tools/SearchRunner.php @@ -216,6 +216,7 @@ class SearchRunner /** * Create a select statement, with prepared bindings, for the given * set of scored search terms. + * * @return array{statement: string, bindings: string[]} */ protected function selectForScoredTerms(array $scoredTerms): array @@ -227,13 +228,13 @@ class SearchRunner $ifChain = '0'; $bindings = []; foreach ($scoredTerms as $term => $score) { - $ifChain = 'IF(term like ?, score * ' . (float)$score . ', ' . $ifChain . ')'; + $ifChain = 'IF(term like ?, score * ' . (float) $score . ', ' . $ifChain . ')'; $bindings[] = $term . '%'; } return [ 'statement' => 'SUM(' . $ifChain . ') as score', - 'bindings' => array_reverse($bindings), + 'bindings' => array_reverse($bindings), ]; } @@ -256,24 +257,31 @@ class SearchRunner } $case = 'CASE ' . implode(' ', $whenStatements) . ' END'; - $termQuery->selectRaw( $case . ' as term', $whenBindings); + $termQuery->selectRaw($case . ' as term', $whenBindings); $termQuery->selectRaw('COUNT(*) as count'); $termQuery->groupByRaw($case, $whenBindings); - $termCounts = $termQuery->get()->pluck('count', 'term')->toArray(); + $termCounts = $termQuery->pluck('count', 'term')->toArray(); $adjusted = $this->rawTermCountsToAdjustments($termCounts); $this->termAdjustmentCache[$options] = $adjusted; + return $this->termAdjustmentCache[$options]; } /** * Convert counts of terms into a relative-count normalised multiplier. + * * @param array $termCounts + * * @return array */ protected function rawTermCountsToAdjustments(array $termCounts): array { + if (empty($termCounts)) { + return []; + } + $multipliers = []; $max = max(array_values($termCounts)); @@ -338,7 +346,8 @@ class SearchRunner try { $date = date_create($input); $query->where('updated_at', '>=', $date); - } catch (\Exception $e) {} + } catch (\Exception $e) { + } } protected function filterUpdatedBefore(EloquentBuilder $query, Entity $model, $input): void @@ -346,7 +355,8 @@ class SearchRunner try { $date = date_create($input); $query->where('updated_at', '<', $date); - } catch (\Exception $e) {} + } catch (\Exception $e) { + } } protected function filterCreatedAfter(EloquentBuilder $query, Entity $model, $input): void @@ -354,7 +364,8 @@ class SearchRunner try { $date = date_create($input); $query->where('created_at', '>=', $date); - } catch (\Exception $e) {} + } catch (\Exception $e) { + } } protected function filterCreatedBefore(EloquentBuilder $query, Entity $model, $input) @@ -362,7 +373,8 @@ class SearchRunner try { $date = date_create($input); $query->where('created_at', '<', $date); - } catch (\Exception $e) {} + } catch (\Exception $e) { + } } protected function filterCreatedBy(EloquentBuilder $query, Entity $model, $input) From bc472ca2d7f0f01b035cb17a414c9e7eef9a5576 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 8 Nov 2021 15:24:49 +0000 Subject: [PATCH 06/22] Improved relation loading during search Relations now loaded during back-end query phase instead of being lazy loaded one-by-one within views. Reduced queries in testing from ~60 to ~20. Need to check other areas list-item.php's "showPath" option is used to ensure relations are properly loaded for those listings. --- app/Entities/Tools/SearchRunner.php | 50 ++++++++++++++------ resources/views/entities/list-item.blade.php | 4 +- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/app/Entities/Tools/SearchRunner.php b/app/Entities/Tools/SearchRunner.php index a251fbc4d..3bcd6c054 100644 --- a/app/Entities/Tools/SearchRunner.php +++ b/app/Entities/Tools/SearchRunner.php @@ -5,11 +5,13 @@ namespace BookStack\Entities\Tools; use BookStack\Auth\Permissions\PermissionService; use BookStack\Auth\User; use BookStack\Entities\EntityProvider; +use BookStack\Entities\Models\BookChild; use BookStack\Entities\Models\Entity; use BookStack\Entities\Models\Page; use BookStack\Entities\Models\SearchTerm; use Illuminate\Database\Eloquent\Builder as EloquentBuilder; use Illuminate\Database\Eloquent\Collection as EloquentCollection; +use Illuminate\Database\Eloquent\Relations\BelongsTo; use Illuminate\Database\Query\Builder; use Illuminate\Support\Collection; use Illuminate\Support\Facades\DB; @@ -75,9 +77,10 @@ class SearchRunner continue; } - $searchQuery = $this->buildQuery($searchOpts, $entityType, $action); + $entityModelInstance = $this->entityProvider->get($entityType); + $searchQuery = $this->buildQuery($searchOpts, $entityModelInstance, $action); $entityTotal = $searchQuery->count(); - $searchResults = $this->getPageOfDataFromQuery($searchQuery, $page, $count); + $searchResults = $this->getPageOfDataFromQuery($searchQuery, $entityModelInstance, $page, $count); if ($entityTotal > ($page * $count)) { $hasMore = true; @@ -109,7 +112,9 @@ class SearchRunner if (!in_array($entityType, $entityTypes)) { continue; } - $search = $this->buildQuery($opts, $entityType)->where('book_id', '=', $bookId)->take(20)->get(); + + $entityModelInstance = $this->entityProvider->get($entityType); + $search = $this->buildQuery($opts, $entityModelInstance)->where('book_id', '=', $bookId)->take(20)->get(); $results = $results->merge($search); } @@ -122,7 +127,8 @@ class SearchRunner public function searchChapter(int $chapterId, string $searchString): Collection { $opts = SearchOptions::fromString($searchString); - $pages = $this->buildQuery($opts, 'page')->where('chapter_id', '=', $chapterId)->take(20)->get(); + $entityModelInstance = $this->entityProvider->get('page'); + $pages = $this->buildQuery($opts, $entityModelInstance)->where('chapter_id', '=', $chapterId)->take(20)->get(); return $pages->sortByDesc('score'); } @@ -130,9 +136,24 @@ class SearchRunner /** * Get a page of result data from the given query based on the provided page parameters. */ - protected function getPageOfDataFromQuery(EloquentBuilder $query, int $page = 1, int $count = 20): EloquentCollection + protected function getPageOfDataFromQuery(EloquentBuilder $query, Entity $entityModelInstance, int $page = 1, int $count = 20): EloquentCollection { + $relations = ['tags']; + + if ($entityModelInstance instanceof BookChild) { + $relations['book'] = function(BelongsTo $query) { + $query->visible(); + }; + } + + if ($entityModelInstance instanceof Page) { + $relations['chapter'] = function(BelongsTo $query) { + $query->visible(); + }; + } + return $query->clone() + ->with(array_filter($relations)) ->skip(($page - 1) * $count) ->take($count) ->get(); @@ -141,25 +162,24 @@ class SearchRunner /** * Create a search query for an entity. */ - protected function buildQuery(SearchOptions $searchOpts, string $entityType = 'page', string $action = 'view'): EloquentBuilder + protected function buildQuery(SearchOptions $searchOpts, Entity $entityModelInstance, string $action = 'view'): EloquentBuilder { - $entity = $this->entityProvider->get($entityType); - $entityQuery = $entity->newQuery(); + $entityQuery = $entityModelInstance->newQuery(); - if ($entity instanceof Page) { - $entityQuery->select($entity::$listAttributes); + if ($entityModelInstance instanceof Page) { + $entityQuery->select($entityModelInstance::$listAttributes); } else { $entityQuery->select(['*']); } // Handle normal search terms - $this->applyTermSearch($entityQuery, $searchOpts, $entity); + $this->applyTermSearch($entityQuery, $searchOpts, $entityModelInstance); // Handle exact term matching foreach ($searchOpts->exacts as $inputTerm) { - $entityQuery->where(function (EloquentBuilder $query) use ($inputTerm, $entity) { + $entityQuery->where(function (EloquentBuilder $query) use ($inputTerm, $entityModelInstance) { $query->where('name', 'like', '%' . $inputTerm . '%') - ->orWhere($entity->textField, 'like', '%' . $inputTerm . '%'); + ->orWhere($entityModelInstance->textField, 'like', '%' . $inputTerm . '%'); }); } @@ -172,11 +192,11 @@ class SearchRunner foreach ($searchOpts->filters as $filterTerm => $filterValue) { $functionName = Str::camel('filter_' . $filterTerm); if (method_exists($this, $functionName)) { - $this->$functionName($entityQuery, $entity, $filterValue); + $this->$functionName($entityQuery, $entityModelInstance, $filterValue); } } - return $this->permissionService->enforceEntityRestrictions($entity, $entityQuery, $action); + return $this->permissionService->enforceEntityRestrictions($entityModelInstance, $entityQuery, $action); } /** diff --git a/resources/views/entities/list-item.blade.php b/resources/views/entities/list-item.blade.php index 8b5eb20b0..c757b0691 100644 --- a/resources/views/entities/list-item.blade.php +++ b/resources/views/entities/list-item.blade.php @@ -3,9 +3,9 @@
@if($showPath ?? false) - @if($entity->book_id) + @if($entity->relationLoaded('book') && $entity->book) {{ $entity->book->getShortName(42) }} - @if($entity->chapter_id) + @if($entity->relationLoaded('chapter') && $entity->chapter) @icon('chevron-right') {{ $entity->chapter->getShortName(42) }} @endif @endif From da17004c3ee95a13afd1ea1b460ac2eae4262e87 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Tue, 9 Nov 2021 15:05:02 +0000 Subject: [PATCH 07/22] Added test to cover search frquency rank changes --- tests/Entity/EntitySearchTest.php | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index 8d2ef0fde..c30bb1d99 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -302,4 +302,22 @@ class EntitySearchTest extends TestCase $search->assertSeeText($page->name); $search->assertSee($page->getUrl()); } + + public function test_search_ranks_common_words_lower() + { + $this->newPage(['name' => 'Test page A', 'html' => '

dog biscuit dog dog

']); + $this->newPage(['name' => 'Test page B', 'html' => '

cat biscuit

']); + + $search = $this->asEditor()->get('/search?term=cat+dog+biscuit'); + $search->assertElementContains('.entity-list > .page', 'Test page A', 1); + $search->assertElementContains('.entity-list > .page', 'Test page B', 2); + + for ($i = 0; $i < 2; $i++) { + $this->newPage(['name' => 'Test page ' . $i, 'html' => '

dog

']); + } + + $search = $this->asEditor()->get('/search?term=cat+dog+biscuit'); + $search->assertElementContains('.entity-list > .page', 'Test page B', 1); + $search->assertElementContains('.entity-list > .page', 'Test page A', 2); + } } From 0ddd0528181fde31e9d3a45f3ec5c2efaba44995 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Tue, 9 Nov 2021 15:13:15 +0000 Subject: [PATCH 08/22] Added missing comments or types Checked over latest changes for potential SQL injection, all variable usages are either (from trusted sourced AND case) or using parameters/bindings to ensure it's handled at driver/lib level. --- app/Entities/Tools/SearchRunner.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/app/Entities/Tools/SearchRunner.php b/app/Entities/Tools/SearchRunner.php index 3bcd6c054..afe5e9f19 100644 --- a/app/Entities/Tools/SearchRunner.php +++ b/app/Entities/Tools/SearchRunner.php @@ -237,6 +237,8 @@ class SearchRunner * Create a select statement, with prepared bindings, for the given * set of scored search terms. * + * @param array $scoredTerms + * * @return array{statement: string, bindings: string[]} */ protected function selectForScoredTerms(array $scoredTerms): array @@ -258,6 +260,13 @@ class SearchRunner ]; } + /** + * For the terms in the given search options, query their popularity across all + * search terms then provide that back as score adjustment multiplier applicable + * for their rarity. Returns an array of float multipliers, keyed by term. + * + * @return array + */ protected function getTermAdjustments(SearchOptions $options): array { if (isset($this->termAdjustmentCache[$options])) { From 9f3261398207d3c4d77d20f54ac160f61209c1e1 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Thu, 11 Nov 2021 13:36:49 +0000 Subject: [PATCH 09/22] Refactored search indexer, Increase title/name score boost - Title score boost changed from 5 to 40 (8x increase). - Extracted entity parsing to its own function --- app/Entities/Tools/SearchIndex.php | 52 +++++++++++++++++------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index 541f06994..16f261a37 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -9,19 +9,14 @@ use Illuminate\Support\Collection; class SearchIndex { - /** - * @var SearchTerm - */ - protected $searchTerm; /** * @var EntityProvider */ protected $entityProvider; - public function __construct(SearchTerm $searchTerm, EntityProvider $entityProvider) + public function __construct(EntityProvider $entityProvider) { - $this->searchTerm = $searchTerm; $this->entityProvider = $entityProvider; } @@ -31,14 +26,8 @@ class SearchIndex public function indexEntity(Entity $entity) { $this->deleteEntityTerms($entity); - $nameTerms = $this->generateTermArrayFromText($entity->name, 5 * $entity->searchFactor); - $bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor); - $terms = array_merge($nameTerms, $bodyTerms); - foreach ($terms as $index => $term) { - $terms[$index]['entity_type'] = $entity->getMorphClass(); - $terms[$index]['entity_id'] = $entity->id; - } - $this->searchTerm->newQuery()->insert($terms); + $terms = $this->entityToTermDataArray($entity); + SearchTerm::query()->insert($terms); } /** @@ -50,18 +39,13 @@ class SearchIndex { $terms = []; foreach ($entities as $entity) { - $nameTerms = $this->generateTermArrayFromText($entity->name, 5 * $entity->searchFactor); - $bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor); - foreach (array_merge($nameTerms, $bodyTerms) as $term) { - $term['entity_id'] = $entity->id; - $term['entity_type'] = $entity->getMorphClass(); - $terms[] = $term; - } + $entityTerms = $this->entityToTermDataArray($entity); + array_push($terms, ...$entityTerms); } $chunkedTerms = array_chunk($terms, 500); foreach ($chunkedTerms as $termChunk) { - $this->searchTerm->newQuery()->insert($termChunk); + SearchTerm::query()->insert($termChunk); } } @@ -70,7 +54,7 @@ class SearchIndex */ public function indexAllEntities() { - $this->searchTerm->newQuery()->truncate(); + SearchTerm::query()->truncate(); foreach ($this->entityProvider->all() as $entityModel) { $selectFields = ['id', 'name', $entityModel->textField]; @@ -93,6 +77,8 @@ class SearchIndex /** * Create a scored term array from the given text. + * + * @returns array{term: string, score: float} */ protected function generateTermArrayFromText(string $text, int $scoreAdjustment = 1): array { @@ -118,4 +104,24 @@ class SearchIndex return $terms; } + + /** + * For the given entity, Generate an array of term data details. + * Is the raw term data, not instances of SearchTerm models. + * + * @returns array{term: string, score: float}[] + */ + protected function entityToTermDataArray(Entity $entity): array + { + $nameTerms = $this->generateTermArrayFromText($entity->name, 40 * $entity->searchFactor); + $bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor); + $termData = array_merge($nameTerms, $bodyTerms); + + foreach ($termData as $index => $term) { + $termData[$index]['entity_type'] = $entity->getMorphClass(); + $termData[$index]['entity_id'] = $entity->id; + } + + return $termData; + } } From 820be162f5bfb31f69f0122a61755fdd8623275f Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Thu, 11 Nov 2021 14:10:11 +0000 Subject: [PATCH 10/22] Updated regen-search command to show some level of progress --- app/Console/Commands/RegenerateSearch.php | 13 +++++++++-- app/Entities/Tools/SearchIndex.php | 27 ++++++++++++++++++----- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/app/Console/Commands/RegenerateSearch.php b/app/Console/Commands/RegenerateSearch.php index 50e81a2b8..62ee88fc0 100644 --- a/app/Console/Commands/RegenerateSearch.php +++ b/app/Console/Commands/RegenerateSearch.php @@ -2,6 +2,7 @@ namespace BookStack\Console\Commands; +use BookStack\Entities\Models\Entity; use BookStack\Entities\Tools\SearchIndex; use Illuminate\Console\Command; use Illuminate\Support\Facades\DB; @@ -22,6 +23,9 @@ class RegenerateSearch extends Command */ protected $description = 'Re-index all content for searching'; + /** + * @var SearchIndex + */ protected $searchIndex; /** @@ -45,8 +49,13 @@ class RegenerateSearch extends Command DB::setDefaultConnection($this->option('database')); } - $this->searchIndex->indexAllEntities(); + $this->searchIndex->indexAllEntities(function (Entity $model, int $processed, int $total) { + $this->info('Indexed ' . class_basename($model) . ' entries (' . $processed . '/' . $total . ')'); + }); + DB::setDefaultConnection($connection); - $this->comment('Search index regenerated'); + $this->line('Search index regenerated!'); + + return static::SUCCESS; } } diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index 16f261a37..50e471bc9 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -51,19 +51,36 @@ class SearchIndex /** * Delete and re-index the terms for all entities in the system. + * Can take a callback which is used for reporting progress. + * Callback receives three arguments: + * - An instance of the model being processed + * - The number that have been processed so far. + * - The total number of that model to be processed. + * + * @param callable(Entity, int, int)|null $progressCallback */ - public function indexAllEntities() + public function indexAllEntities(?callable $progressCallback = null) { SearchTerm::query()->truncate(); foreach ($this->entityProvider->all() as $entityModel) { $selectFields = ['id', 'name', $entityModel->textField]; + $total = $entityModel->newQuery()->withTrashed()->count(); + $chunkSize = 250; + $processed = 0; + + $chunkCallback = function (Collection $entities) use ($progressCallback, &$processed, $total, $chunkSize, $entityModel) { + $this->indexEntities($entities->all()); + $processed = min($processed + $chunkSize, $total); + + if (is_callable($progressCallback)) { + $progressCallback($entityModel, $processed, $total); + } + }; + $entityModel->newQuery() - ->withTrashed() ->select($selectFields) - ->chunk(1000, function (Collection $entities) { - $this->indexEntities($entities->all()); - }); + ->chunk($chunkSize, $chunkCallback); } } From f28daa01d9d43d36c12b075bddca92be9e8f85e4 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Fri, 12 Nov 2021 13:47:23 +0000 Subject: [PATCH 11/22] Added page content parsing to up-rank header text in search This adds parsing of page content so that headers apply a boost to scores in the search term index. Additionally, this merges title and content terms to reduce the amount of stored terms a little. Includes testing to cover. --- app/Entities/Models/Book.php | 2 +- app/Entities/Models/Bookshelf.php | 2 +- app/Entities/Models/Chapter.php | 2 +- app/Entities/Models/Entity.php | 10 +-- app/Entities/Models/Page.php | 6 +- app/Entities/Repos/PageRepo.php | 2 +- app/Entities/Tools/SearchIndex.php | 132 ++++++++++++++++++++++++----- tests/Entity/EntitySearchTest.php | 40 +++++++++ 8 files changed, 158 insertions(+), 38 deletions(-) diff --git a/app/Entities/Models/Book.php b/app/Entities/Models/Book.php index 982df5c90..359f7961c 100644 --- a/app/Entities/Models/Book.php +++ b/app/Entities/Models/Book.php @@ -24,7 +24,7 @@ class Book extends Entity implements HasCoverImage { use HasFactory; - public $searchFactor = 2; + public $searchFactor = 1.5; protected $fillable = ['name', 'description']; protected $hidden = ['restricted', 'pivot', 'image_id', 'deleted_at']; diff --git a/app/Entities/Models/Bookshelf.php b/app/Entities/Models/Bookshelf.php index 8fe9dbe41..b426858c3 100644 --- a/app/Entities/Models/Bookshelf.php +++ b/app/Entities/Models/Bookshelf.php @@ -13,7 +13,7 @@ class Bookshelf extends Entity implements HasCoverImage protected $table = 'bookshelves'; - public $searchFactor = 3; + public $searchFactor = 1.5; protected $fillable = ['name', 'description', 'image_id']; diff --git a/app/Entities/Models/Chapter.php b/app/Entities/Models/Chapter.php index 0e2917af3..f4d1a281d 100644 --- a/app/Entities/Models/Chapter.php +++ b/app/Entities/Models/Chapter.php @@ -16,7 +16,7 @@ class Chapter extends BookChild { use HasFactory; - public $searchFactor = 1.3; + public $searchFactor = 1.5; protected $fillable = ['name', 'description', 'priority', 'book_id']; protected $hidden = ['restricted', 'pivot', 'deleted_at']; diff --git a/app/Entities/Models/Entity.php b/app/Entities/Models/Entity.php index f5f9d91f0..4c4e55bb8 100644 --- a/app/Entities/Models/Entity.php +++ b/app/Entities/Models/Entity.php @@ -238,20 +238,12 @@ abstract class Entity extends Model implements Sluggable, Favouritable, Viewable return mb_substr($this->name, 0, $length - 3) . '...'; } - /** - * Get the body text of this entity. - */ - public function getText(): string - { - return $this->{$this->textField} ?? ''; - } - /** * Get an excerpt of this entity's descriptive content to the specified length. */ public function getExcerpt(int $length = 100): string { - $text = $this->getText(); + $text = $this->{$this->textField} ?? ''; if (mb_strlen($text) > $length) { $text = mb_substr($text, 0, $length - 3) . '...'; diff --git a/app/Entities/Models/Page.php b/app/Entities/Models/Page.php index 27d5dc6a4..c28b9a305 100644 --- a/app/Entities/Models/Page.php +++ b/app/Entities/Models/Page.php @@ -3,13 +3,13 @@ namespace BookStack\Entities\Models; use BookStack\Entities\Tools\PageContent; +use BookStack\Facades\Permissions; use BookStack\Uploads\Attachment; use Illuminate\Database\Eloquent\Builder; use Illuminate\Database\Eloquent\Collection; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Relations\BelongsTo; use Illuminate\Database\Eloquent\Relations\HasMany; -use Permissions; /** * Class Page. @@ -64,10 +64,8 @@ class Page extends BookChild /** * Check if this page has a chapter. - * - * @return bool */ - public function hasChapter() + public function hasChapter(): bool { return $this->chapter()->count() > 0; } diff --git a/app/Entities/Repos/PageRepo.php b/app/Entities/Repos/PageRepo.php index ffa06d459..98fe4ef55 100644 --- a/app/Entities/Repos/PageRepo.php +++ b/app/Entities/Repos/PageRepo.php @@ -157,8 +157,8 @@ class PageRepo */ public function publishDraft(Page $draft, array $input): Page { - $this->baseRepo->update($draft, $input); $this->updateTemplateStatusAndContentFromInput($draft, $input); + $this->baseRepo->update($draft, $input); $draft->draft = false; $draft->revision_count = 1; diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index 50e471bc9..bde5ef860 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -4,7 +4,10 @@ namespace BookStack\Entities\Tools; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; +use BookStack\Entities\Models\Page; use BookStack\Entities\Models\SearchTerm; +use DOMDocument; +use DOMNode; use Illuminate\Support\Collection; class SearchIndex @@ -64,7 +67,8 @@ class SearchIndex SearchTerm::query()->truncate(); foreach ($this->entityProvider->all() as $entityModel) { - $selectFields = ['id', 'name', $entityModel->textField]; + $indexContentField = $entityModel instanceof Page ? 'html' : 'description'; + $selectFields = ['id', 'name', $indexContentField]; $total = $entityModel->newQuery()->withTrashed()->count(); $chunkSize = 250; $processed = 0; @@ -93,11 +97,70 @@ class SearchIndex } /** - * Create a scored term array from the given text. + * Create a scored term array from the given text, where the keys are the terms + * and the values are their scores. * - * @returns array{term: string, score: float} + * @returns array */ - protected function generateTermArrayFromText(string $text, int $scoreAdjustment = 1): array + protected function generateTermScoreMapFromText(string $text, int $scoreAdjustment = 1): array + { + $termMap = $this->textToTermCountMap($text); + + foreach ($termMap as $term => $count) { + $termMap[$term] = $count * $scoreAdjustment; + } + + return $termMap; + } + + /** + * Create a scored term array from the given HTML, where the keys are the terms + * and the values are their scores. + * + * @returns array + */ + protected function generateTermScoreMapFromHtml(string $html): array + { + if (empty($html)) { + return []; + } + + $scoresByTerm = []; + $elementScoreAdjustmentMap = [ + 'h1' => 10, + 'h2' => 5, + 'h3' => 4, + 'h4' => 3, + 'h5' => 2, + 'h6' => 1.5, + ]; + + $html = '' . $html . ''; + libxml_use_internal_errors(true); + $doc = new DOMDocument(); + $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); + + $topElems = $doc->documentElement->childNodes->item(0)->childNodes; + /** @var DOMNode $child */ + foreach ($topElems as $child) { + $nodeName = $child->nodeName; + $termCounts = $this->textToTermCountMap(trim($child->textContent)); + foreach ($termCounts as $term => $count) { + $scoreChange = $count * ($elementScoreAdjustmentMap[$nodeName] ?? 1); + $scoresByTerm[$term] = ($scoresByTerm[$term] ?? 0) + $scoreChange; + } + } + + return $scoresByTerm; + } + + /** + * For the given text, return an array where the keys are the unique term words + * and the values are the frequency of that term. + * + * @returns array + */ + protected function textToTermCountMap(string $text): array { $tokenMap = []; // {TextToken => OccurrenceCount} $splitChars = " \n\t.,!?:;()[]{}<>`'\""; @@ -111,34 +174,61 @@ class SearchIndex $token = strtok($splitChars); } - $terms = []; - foreach ($tokenMap as $token => $count) { - $terms[] = [ - 'term' => $token, - 'score' => $count * $scoreAdjustment, - ]; - } - - return $terms; + return $tokenMap; } /** * For the given entity, Generate an array of term data details. * Is the raw term data, not instances of SearchTerm models. * - * @returns array{term: string, score: float}[] + * @returns array{term: string, score: float, entity_id: int, entity_type: string}[] */ protected function entityToTermDataArray(Entity $entity): array { - $nameTerms = $this->generateTermArrayFromText($entity->name, 40 * $entity->searchFactor); - $bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor); - $termData = array_merge($nameTerms, $bodyTerms); + $nameTermsMap = $this->generateTermScoreMapFromText($entity->name, 40 * $entity->searchFactor); - foreach ($termData as $index => $term) { - $termData[$index]['entity_type'] = $entity->getMorphClass(); - $termData[$index]['entity_id'] = $entity->id; + if ($entity instanceof Page) { + $bodyTermsMap = $this->generateTermScoreMapFromHtml($entity->html); + } else { + $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description, $entity->searchFactor); } - return $termData; + $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap); + + $dataArray = []; + $entityId = $entity->id; + $entityType = $entity->getMorphClass(); + foreach ($mergedScoreMap as $term => $score) { + $dataArray[] = [ + 'term' => $term, + 'score' => $score, + 'entity_type' => $entityType, + 'entity_id' => $entityId, + ]; + } + + return $dataArray; + } + + + /** + * For the given term data arrays, Merge their contents by term + * while combining any scores. + * + * @param array[] ...$scoreMaps + * + * @returns array + */ + protected function mergeTermScoreMaps(...$scoreMaps): array + { + $mergedMap = []; + + foreach ($scoreMaps as $scoreMap) { + foreach ($scoreMap as $term => $score) { + $mergedMap[$term] = ($mergedMap[$term] ?? 0) + $score; + } + } + + return $mergedMap; } } diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index c30bb1d99..bd50a13ac 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -7,6 +7,7 @@ use BookStack\Entities\Models\Book; use BookStack\Entities\Models\Bookshelf; use BookStack\Entities\Models\Chapter; use BookStack\Entities\Models\Page; +use BookStack\Entities\Models\SearchTerm; use Tests\TestCase; class EntitySearchTest extends TestCase @@ -320,4 +321,43 @@ class EntitySearchTest extends TestCase $search->assertElementContains('.entity-list > .page', 'Test page B', 1); $search->assertElementContains('.entity-list > .page', 'Test page A', 2); } + + public function test_terms_in_headers_have_an_adjusted_index_score() + { + $page = $this->newPage(['name' => 'Test page A', 'html' => ' +

TermA

+

TermB TermNested

+

TermC

+

TermD

+

TermE

+
TermF
+
TermG
+ ']); + + $entityRelationCols = ['entity_id' => $page->id, 'entity_type' => 'BookStack\\Page']; + $scoreByTerm = SearchTerm::query()->where($entityRelationCols)->pluck('score', 'term'); + + $this->assertEquals(1, $scoreByTerm->get('TermA')); + $this->assertEquals(10, $scoreByTerm->get('TermB')); + $this->assertEquals(10, $scoreByTerm->get('TermNested')); + $this->assertEquals(5, $scoreByTerm->get('TermC')); + $this->assertEquals(4, $scoreByTerm->get('TermD')); + $this->assertEquals(3, $scoreByTerm->get('TermE')); + $this->assertEquals(2, $scoreByTerm->get('TermF')); + // Is 1.5 but stored as integer, rounding up + $this->assertEquals(2, $scoreByTerm->get('TermG')); + } + + public function test_name_and_content_terms_are_merged_to_single_score() + { + $page = $this->newPage(['name' => 'TermA', 'html' => ' +

TermA

+ ']); + + $entityRelationCols = ['entity_id' => $page->id, 'entity_type' => 'BookStack\\Page']; + $scoreByTerm = SearchTerm::query()->where($entityRelationCols)->pluck('score', 'term'); + + // Scores 40 for being in the name then 1 for being in the content + $this->assertEquals(41, $scoreByTerm->get('TermA')); + } } From 99587a0be63556a6915ac2728d8236da2f61c288 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Fri, 12 Nov 2021 17:06:01 +0000 Subject: [PATCH 12/22] Added tag values as part of the indexed search terms This allows finding content via tag name/values when just searching using normal seach terms. Added testing to cover. Related to #1577 --- app/Actions/Tag.php | 6 ++++++ app/Entities/Models/Book.php | 2 +- app/Entities/Models/Bookshelf.php | 2 +- app/Entities/Models/Chapter.php | 2 +- app/Entities/Tools/SearchIndex.php | 29 ++++++++++++++++++++++++++++- tests/Entity/EntitySearchTest.php | 19 +++++++++++++++---- 6 files changed, 52 insertions(+), 8 deletions(-) diff --git a/app/Actions/Tag.php b/app/Actions/Tag.php index db9328b7d..609c299ad 100644 --- a/app/Actions/Tag.php +++ b/app/Actions/Tag.php @@ -6,6 +6,12 @@ use BookStack\Model; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Relations\MorphTo; +/** + * @property int $id + * @property string $name + * @property string $value + * @property int $order + */ class Tag extends Model { use HasFactory; diff --git a/app/Entities/Models/Book.php b/app/Entities/Models/Book.php index 359f7961c..735d25a99 100644 --- a/app/Entities/Models/Book.php +++ b/app/Entities/Models/Book.php @@ -24,7 +24,7 @@ class Book extends Entity implements HasCoverImage { use HasFactory; - public $searchFactor = 1.5; + public $searchFactor = 1.2; protected $fillable = ['name', 'description']; protected $hidden = ['restricted', 'pivot', 'image_id', 'deleted_at']; diff --git a/app/Entities/Models/Bookshelf.php b/app/Entities/Models/Bookshelf.php index b426858c3..e4d9775b7 100644 --- a/app/Entities/Models/Bookshelf.php +++ b/app/Entities/Models/Bookshelf.php @@ -13,7 +13,7 @@ class Bookshelf extends Entity implements HasCoverImage protected $table = 'bookshelves'; - public $searchFactor = 1.5; + public $searchFactor = 1.2; protected $fillable = ['name', 'description', 'image_id']; diff --git a/app/Entities/Models/Chapter.php b/app/Entities/Models/Chapter.php index f4d1a281d..224ded935 100644 --- a/app/Entities/Models/Chapter.php +++ b/app/Entities/Models/Chapter.php @@ -16,7 +16,7 @@ class Chapter extends BookChild { use HasFactory; - public $searchFactor = 1.5; + public $searchFactor = 1.2; protected $fillable = ['name', 'description', 'priority', 'book_id']; protected $hidden = ['restricted', 'pivot', 'deleted_at']; diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index bde5ef860..05de341f9 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -2,6 +2,7 @@ namespace BookStack\Entities\Tools; +use BookStack\Actions\Tag; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; use BookStack\Entities\Models\Page; @@ -84,6 +85,7 @@ class SearchIndex $entityModel->newQuery() ->select($selectFields) + ->with(['tags:id,name,value,entity_id,entity_type']) ->chunk($chunkSize, $chunkCallback); } } @@ -154,6 +156,30 @@ class SearchIndex return $scoresByTerm; } + /** + * Create a scored term map from the given set of entity tags. + * + * @param Tag[] $tags + * + * @returns array + */ + protected function generateTermScoreMapFromTags(array $tags): array + { + $scoreMap = []; + $names = []; + $values = []; + + foreach($tags as $tag) { + $names[] = $tag->name; + $values[] = $tag->value; + } + + $nameMap = $this->generateTermScoreMapFromText(implode(' ', $names), 3); + $valueMap = $this->generateTermScoreMapFromText(implode(' ', $values), 5); + + return $this->mergeTermScoreMaps($nameMap, $valueMap); + } + /** * For the given text, return an array where the keys are the unique term words * and the values are the frequency of that term. @@ -186,6 +212,7 @@ class SearchIndex protected function entityToTermDataArray(Entity $entity): array { $nameTermsMap = $this->generateTermScoreMapFromText($entity->name, 40 * $entity->searchFactor); + $tagTermsMap = $this->generateTermScoreMapFromTags($entity->tags->all()); if ($entity instanceof Page) { $bodyTermsMap = $this->generateTermScoreMapFromHtml($entity->html); @@ -193,7 +220,7 @@ class SearchIndex $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description, $entity->searchFactor); } - $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap); + $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap, $tagTermsMap); $dataArray = []; $entityId = $entity->id; diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index bd50a13ac..08fabba0c 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -334,8 +334,7 @@ class EntitySearchTest extends TestCase
TermG
']); - $entityRelationCols = ['entity_id' => $page->id, 'entity_type' => 'BookStack\\Page']; - $scoreByTerm = SearchTerm::query()->where($entityRelationCols)->pluck('score', 'term'); + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); $this->assertEquals(1, $scoreByTerm->get('TermA')); $this->assertEquals(10, $scoreByTerm->get('TermB')); @@ -354,10 +353,22 @@ class EntitySearchTest extends TestCase

TermA

']); - $entityRelationCols = ['entity_id' => $page->id, 'entity_type' => 'BookStack\\Page']; - $scoreByTerm = SearchTerm::query()->where($entityRelationCols)->pluck('score', 'term'); + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); // Scores 40 for being in the name then 1 for being in the content $this->assertEquals(41, $scoreByTerm->get('TermA')); } + + public function test_tag_names_and_values_are_indexed_for_search() + { + $page = $this->newPage(['name' => 'PageA', 'html' => '

content

', 'tags' => [ + ['name' => 'Animal', 'value' => 'MeowieCat'], + ['name' => 'SuperImportant'], + ]]); + + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); + $this->assertEquals(5, $scoreByTerm->get('MeowieCat')); + $this->assertEquals(3, $scoreByTerm->get('Animal')); + $this->assertEquals(3, $scoreByTerm->get('SuperImportant')); + } } From 7d0724e2888f768149b425efcdc185a1c7a4be02 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Fri, 12 Nov 2021 18:03:44 +0000 Subject: [PATCH 13/22] Added auto-conversion of search terms to exact values Will occur when a search term contains a character that's used to split content into search terms. Added testing to cover. --- app/Entities/Tools/SearchIndex.php | 8 ++++- app/Entities/Tools/SearchOptions.php | 47 +++++++++++++++++++++++----- tests/Entity/EntitySearchTest.php | 12 +++++++ 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index 05de341f9..3c4b5a247 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -13,6 +13,12 @@ use Illuminate\Support\Collection; class SearchIndex { + /** + * A list of delimiter characters used to break-up parsed content into terms for indexing. + * + * @var string + */ + public static $delimiters = " \n\t.,!?:;()[]{}<>`'\""; /** * @var EntityProvider @@ -189,7 +195,7 @@ class SearchIndex protected function textToTermCountMap(string $text): array { $tokenMap = []; // {TextToken => OccurrenceCount} - $splitChars = " \n\t.,!?:;()[]{}<>`'\""; + $splitChars = static::$delimiters; $token = strtok($text, $splitChars); while ($token !== false) { diff --git a/app/Entities/Tools/SearchOptions.php b/app/Entities/Tools/SearchOptions.php index 39074fb38..9f1b9742d 100644 --- a/app/Entities/Tools/SearchOptions.php +++ b/app/Entities/Tools/SearchOptions.php @@ -57,15 +57,22 @@ class SearchOptions $instance = new SearchOptions(); $inputs = $request->only(['search', 'types', 'filters', 'exact', 'tags']); - $instance->searches = explode(' ', $inputs['search'] ?? []); - $instance->exacts = array_filter($inputs['exact'] ?? []); + + $parsedStandardTerms = static::parseStandardTermString($inputs['search'] ?? ''); + $instance->searches = $parsedStandardTerms['terms']; + $instance->exacts = $parsedStandardTerms['exacts']; + + array_push($instance->exacts, ...array_filter($inputs['exact'] ?? [])); + $instance->tags = array_filter($inputs['tags'] ?? []); + foreach (($inputs['filters'] ?? []) as $filterKey => $filterVal) { if (empty($filterVal)) { continue; } $instance->filters[$filterKey] = $filterVal === 'true' ? '' : $filterVal; } + if (isset($inputs['types']) && count($inputs['types']) < 4) { $instance->filters['type'] = implode('|', $inputs['types']); } @@ -102,11 +109,9 @@ class SearchOptions } // Parse standard terms - foreach (explode(' ', trim($searchString)) as $searchTerm) { - if ($searchTerm !== '') { - $terms['searches'][] = $searchTerm; - } - } + $parsedStandardTerms = static::parseStandardTermString($searchString); + array_push($terms['searches'], ...$parsedStandardTerms['terms']); + array_push($terms['exacts'], ...$parsedStandardTerms['exacts']); // Split filter values out $splitFilters = []; @@ -119,6 +124,34 @@ class SearchOptions return $terms; } + + /** + * Parse a standard search term string into individual search terms and + * extract any exact terms searches to be made. + * + * @return array{terms: array, exacts: array} + */ + protected static function parseStandardTermString(string $termString): array + { + $terms = explode(' ', $termString); + $indexDelimiters = SearchIndex::$delimiters; + $parsed = [ + 'terms' => [], + 'exacts' => [], + ]; + + foreach ($terms as $searchTerm) { + if ($searchTerm === '') { + continue; + } + + $parsedList = (strpbrk($searchTerm, $indexDelimiters) === false) ? 'terms' : 'exacts'; + $parsed[$parsedList][] = $searchTerm; + } + + return $parsed; + } + /** * Encode this instance to a search string. */ diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index 08fabba0c..f69dba211 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -119,6 +119,18 @@ class EntitySearchTest extends TestCase $exactSearchB->assertStatus(200)->assertDontSee($page->name); } + public function test_search_terms_with_delimiters_are_converted_to_exact_matches() + { + $this->asEditor(); + $page = $this->newPage(['name' => 'Delimiter test', 'html' => '

1.1 2,2 3?3 4:4 5;5 (8) <9> "10" \'11\' `12`

']); + $terms = explode(' ', '1.1 2,2 3?3 4:4 5;5 (8) <9> "10" \'11\' `12`'); + + foreach ($terms as $term) { + $search = $this->get('/search?term=' . urlencode($term)); + $search->assertSee($page->name); + } + } + public function test_search_filters() { $page = $this->newPage(['name' => 'My new test quaffleachits', 'html' => 'this is about an orange donkey danzorbhsing']); From f30b937bb02eea92c078ea9644e3b70bd63974d8 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Fri, 12 Nov 2021 22:57:50 +0000 Subject: [PATCH 14/22] Added search result preview text highlighting Created a new class to manage formatting of content for search results. Turned out to be quite a complex task. This only does the preview text so far, not titles or tags. Not yet tested. --- app/Entities/Tools/SearchResultsFormatter.php | 173 ++++++++++++++++++ app/Http/Controllers/SearchController.php | 11 +- resources/views/entities/list-item.blade.php | 2 +- 3 files changed, 178 insertions(+), 8 deletions(-) create mode 100644 app/Entities/Tools/SearchResultsFormatter.php diff --git a/app/Entities/Tools/SearchResultsFormatter.php b/app/Entities/Tools/SearchResultsFormatter.php new file mode 100644 index 000000000..aaa5c129d --- /dev/null +++ b/app/Entities/Tools/SearchResultsFormatter.php @@ -0,0 +1,173 @@ +setSearchPreview($result, $options); + } + } + + /** + * Update the given entity model to set attributes used for previews of the item + * primarily within search result lists. + */ + protected function setSearchPreview(Entity $entity, SearchOptions $options) + { + $textProperty = $entity->textField; + $textContent = $entity->$textProperty; + $terms = array_merge($options->exacts, $options->searches); + + $matchRefs = $this->getMatchPositions($textContent, $terms); + $mergedRefs = $this->sortAndMergeMatchPositions($matchRefs); + $content = $this->formatTextUsingMatchPositions($mergedRefs, $textContent); + + $entity->setAttribute('preview_content', new HtmlString($content)); + } + + /** + * Get positions of the given terms within the given text. + * Is in the array format of [int $startIndex => int $endIndex] where the indexes + * are positions within the provided text. + * + * @return array + */ + protected function getMatchPositions(string $text, array $terms): array + { + $matchRefs = []; + $text = strtolower($text); + + foreach ($terms as $term) { + $offset = 0; + $term = strtolower($term); + $pos = strpos($text, $term, $offset); + while ($pos !== false) { + $end = $pos + strlen($term); + $matchRefs[$pos] = $end; + $offset = $end; + $pos = strpos($text, $term, $offset); + } + } + + return $matchRefs; + } + + /** + * Sort the given match positions before merging them where they're + * adjacent or where they overlap. + * + * @param array $matchPositions + * @return array + */ + protected function sortAndMergeMatchPositions(array $matchPositions): array + { + ksort($matchPositions); + $mergedRefs = []; + $lastStart = 0; + $lastEnd = 0; + + foreach ($matchPositions as $start => $end) { + if ($start > $lastEnd) { + $mergedRefs[$start] = $end; + $lastStart = $start; + $lastEnd = $end; + } else if ($end > $lastEnd) { + $mergedRefs[$lastStart] = $end; + $lastEnd = $end; + } + } + + return $mergedRefs; + } + + /** + * Format the given original text, returning a version where terms are highlighted within. + * Returned content is in HTML text format. + */ + protected function formatTextUsingMatchPositions(array $matchPositions, string $originalText): string + { + $contextRange = 32; + $targetLength = 260; + $maxEnd = strlen($originalText); + $lastEnd = 0; + $firstStart = null; + $content = ''; + + foreach ($matchPositions as $start => $end) { + // Get our outer text ranges for the added context we want to show upon the result. + $contextStart = max($start - $contextRange, 0, $lastEnd); + $contextEnd = min($end + $contextRange, $maxEnd); + + // Adjust the start if we're going to be touching the previous match. + $startDiff = $start - $lastEnd; + if ($startDiff < 0) { + $contextStart = $start; + $content = substr($content, 0, strlen($content) + $startDiff); + } + + // Add ellipsis between results + if ($contextStart !== 0 && $contextStart !== $start) { + $content .= ' ...'; + } + + // Add our content including the bolded matching text + $content .= e(substr($originalText, $contextStart, $start - $contextStart)); + $content .= '' . e(substr($originalText, $start, $end - $start)) . ''; + $content .= e(substr($originalText, $end, $contextEnd - $end)); + + // Update our last end position + $lastEnd = $contextEnd; + + // Update the first start position if it's not already been set + if (is_null($firstStart)) { + $firstStart = $contextStart; + } + + // Stop if we're near our target + if (strlen($content) >= $targetLength - 10) { + break; + } + } + + // Just copy out the content if we haven't moved along anywhere. + if ($lastEnd === 0) { + $content = e(substr($originalText, 0, $targetLength)); + $lastEnd = $targetLength; + } + + // Pad out the end if we're low + $remainder = $targetLength - strlen($content); + if ($remainder > 10) { + $content .= e(substr($originalText, $lastEnd, $remainder)); + $lastEnd += $remainder; + } + + // Pad out the start if we're still low + $remainder = $targetLength - strlen($content); + $firstStart = $firstStart ?: 0; + if ($remainder > 10 && $firstStart !== 0) { + $padStart = max(0, $firstStart - $remainder); + $content = ($padStart === 0 ? '' : '...') . e(substr($originalText, $padStart, $firstStart - $padStart)) . substr($content, 4); + } + + // Add ellipsis if we're not at the end + if ($lastEnd < $maxEnd) { + $content .= '...'; + } + + return $content; + } + +} \ No newline at end of file diff --git a/app/Http/Controllers/SearchController.php b/app/Http/Controllers/SearchController.php index d12c23b5a..040c04ece 100644 --- a/app/Http/Controllers/SearchController.php +++ b/app/Http/Controllers/SearchController.php @@ -4,8 +4,8 @@ namespace BookStack\Http\Controllers; use BookStack\Entities\Queries\Popular; use BookStack\Entities\Tools\SearchOptions; +use BookStack\Entities\Tools\SearchResultsFormatter; use BookStack\Entities\Tools\SearchRunner; -use BookStack\Entities\Tools\ShelfContext; use BookStack\Entities\Tools\SiblingFetcher; use Illuminate\Http\Request; @@ -14,18 +14,14 @@ class SearchController extends Controller protected $searchRunner; protected $entityContextManager; - public function __construct( - SearchRunner $searchRunner, - ShelfContext $entityContextManager - ) { + public function __construct(SearchRunner $searchRunner) { $this->searchRunner = $searchRunner; - $this->entityContextManager = $entityContextManager; } /** * Searches all entities. */ - public function search(Request $request) + public function search(Request $request, SearchResultsFormatter $formatter) { $searchOpts = SearchOptions::fromRequest($request); $fullSearchString = $searchOpts->toString(); @@ -35,6 +31,7 @@ class SearchController extends Controller $nextPageLink = url('/search?term=' . urlencode($fullSearchString) . '&page=' . ($page + 1)); $results = $this->searchRunner->searchEntities($searchOpts, 'all', $page, 20); + $formatter->format($results['results']->all(), $searchOpts); return view('search.all', [ 'entities' => $results['results'], diff --git a/resources/views/entities/list-item.blade.php b/resources/views/entities/list-item.blade.php index c757b0691..aa4f6c1e8 100644 --- a/resources/views/entities/list-item.blade.php +++ b/resources/views/entities/list-item.blade.php @@ -11,7 +11,7 @@ @endif @endif -

{{ $entity->getExcerpt() }}

+

{{ $entity->preview_content ?? $entity->getExcerpt() }}

@if(($showTags ?? false) && $entity->tags->count() > 0) From ab4e99bb187fb4273dcad2fa3c731ba46e49a585 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sat, 13 Nov 2021 12:44:27 +0000 Subject: [PATCH 15/22] Added name highlighting in search results --- app/Entities/Tools/SearchResultsFormatter.php | 14 ++++++++++---- resources/views/entities/list-item-basic.blade.php | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/app/Entities/Tools/SearchResultsFormatter.php b/app/Entities/Tools/SearchResultsFormatter.php index aaa5c129d..24dc820a4 100644 --- a/app/Entities/Tools/SearchResultsFormatter.php +++ b/app/Entities/Tools/SearchResultsFormatter.php @@ -30,11 +30,17 @@ class SearchResultsFormatter $textContent = $entity->$textProperty; $terms = array_merge($options->exacts, $options->searches); - $matchRefs = $this->getMatchPositions($textContent, $terms); - $mergedRefs = $this->sortAndMergeMatchPositions($matchRefs); - $content = $this->formatTextUsingMatchPositions($mergedRefs, $textContent); + $originalContentByNewAttribute = [ + 'preview_name' => $entity->name, + 'preview_content' => $textContent, + ]; - $entity->setAttribute('preview_content', new HtmlString($content)); + foreach ($originalContentByNewAttribute as $attributeName => $content) { + $matchRefs = $this->getMatchPositions($content, $terms); + $mergedRefs = $this->sortAndMergeMatchPositions($matchRefs); + $formatted = $this->formatTextUsingMatchPositions($mergedRefs, $content); + $entity->setAttribute($attributeName, new HtmlString($formatted)); + } } /** diff --git a/resources/views/entities/list-item-basic.blade.php b/resources/views/entities/list-item-basic.blade.php index 2ec4bee5c..398c33b93 100644 --- a/resources/views/entities/list-item-basic.blade.php +++ b/resources/views/entities/list-item-basic.blade.php @@ -2,7 +2,7 @@ @icon($type)
-

{{ $entity->name }}

+

{{ $entity->preview_name ?? $entity->name }}

{{ $slot ?? '' }}
\ No newline at end of file From 339518e2a6ad1cee717d821afe9238d0ac9792ed Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sat, 13 Nov 2021 13:02:32 +0000 Subject: [PATCH 16/22] Added tag highlighting in search Using basic match of name or value containing a general term. --- app/Entities/Tools/SearchResultsFormatter.php | 29 +++++++++++++++++++ resources/sass/_blocks.scss | 4 +++ resources/views/entities/tag.blade.php | 8 ++--- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/app/Entities/Tools/SearchResultsFormatter.php b/app/Entities/Tools/SearchResultsFormatter.php index 24dc820a4..1ddee5830 100644 --- a/app/Entities/Tools/SearchResultsFormatter.php +++ b/app/Entities/Tools/SearchResultsFormatter.php @@ -2,6 +2,7 @@ namespace BookStack\Entities\Tools; +use BookStack\Actions\Tag; use BookStack\Entities\Models\Entity; use Illuminate\Support\HtmlString; @@ -41,6 +42,34 @@ class SearchResultsFormatter $formatted = $this->formatTextUsingMatchPositions($mergedRefs, $content); $entity->setAttribute($attributeName, new HtmlString($formatted)); } + + $tags = $entity->relationLoaded('tags') ? $entity->tags->all() : []; + $this->highlightTagsContainingTerms($tags, $terms); + } + + /** + * Highlight tags which match the given terms. + * @param Tag[] $tags + * @param string[] $terms + */ + protected function highlightTagsContainingTerms(array $tags, array $terms): void + { + foreach ($tags as $tag) { + $tagName = strtolower($tag->name); + $tagValue = strtolower($tag->value); + + foreach ($terms as $term) { + $termLower = strtolower($term); + + if (strpos($tagName, $termLower) !== false) { + $tag->setAttribute('highlight_name', true); + } + + if (strpos($tagValue, $termLower) !== false) { + $tag->setAttribute('highlight_value', true); + } + } + } } /** diff --git a/resources/sass/_blocks.scss b/resources/sass/_blocks.scss index ef03699f1..ae3e7a441 100644 --- a/resources/sass/_blocks.scss +++ b/resources/sass/_blocks.scss @@ -262,6 +262,10 @@ } } +.tag-name.highlight, .tag-value.highlight { + font-weight: bold; +} + .tag-list div:last-child .tag-item { margin-bottom: 0; } diff --git a/resources/views/entities/tag.blade.php b/resources/views/entities/tag.blade.php index 057c70921..de4750c13 100644 --- a/resources/views/entities/tag.blade.php +++ b/resources/views/entities/tag.blade.php @@ -1,9 +1,9 @@
@if($linked ?? true) - - @if($tag->value) @endif + + @if($tag->value) @endif @else -
@icon('tag'){{ $tag->name }}
- @if($tag->value)
{{$tag->value}}
@endif +
@icon('tag'){{ $tag->name }}
+ @if($tag->value)
{{$tag->value}}
@endif @endif
\ No newline at end of file From 63d8d72d7ecdba31903bee4c2295f2d0a2149e0d Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sat, 13 Nov 2021 13:26:11 +0000 Subject: [PATCH 17/22] Added testing to cover search result highlighting --- tests/Entity/EntitySearchTest.php | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index f69dba211..95d474629 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -383,4 +383,32 @@ class EntitySearchTest extends TestCase $this->assertEquals(3, $scoreByTerm->get('Animal')); $this->assertEquals(3, $scoreByTerm->get('SuperImportant')); } + + public function test_matching_terms_in_search_results_are_highlighted() + { + $this->newPage(['name' => 'My Meowie Cat', 'html' => '

A superimportant page about meowieable animals

', 'tags' => [ + ['name' => 'Animal', 'value' => 'MeowieCat'], + ['name' => 'SuperImportant'], + ]]); + + $search = $this->asEditor()->get('/search?term=SuperImportant+Meowie'); + // Title + $search->assertSee('My Meowie Cat', false); + // Content + $search->assertSee('A superimportant page about meowieable animals', false); + // Tag name + $search->assertElementContains('.tag-name.highlight', 'SuperImportant'); + // Tag value + $search->assertElementContains('.tag-value.highlight', 'MeowieCat'); + } + + public function test_html_entities_in_item_details_remains_escaped_in_search_results() + { + $this->newPage(['name' => 'My TestPageContent', 'html' => '

My supercool <great> TestPageContent page

']); + + $search = $this->asEditor()->get('/search?term=TestPageContent'); + $search->assertSee('My <cool> TestPageContent', false); + $search->assertSee('My supercool <great> TestPageContent page', false); + } + } From 2633b94deb760dc2f183a612b68e311f57795ce5 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sat, 13 Nov 2021 13:28:17 +0000 Subject: [PATCH 18/22] Applied StyleCI changes --- app/Entities/Tools/SearchIndex.php | 9 ++++----- app/Entities/Tools/SearchOptions.php | 3 +-- app/Entities/Tools/SearchResultsFormatter.php | 15 ++++++++------- app/Entities/Tools/SearchRunner.php | 6 +++--- app/Http/Controllers/SearchController.php | 3 ++- tests/Entity/EntitySearchTest.php | 2 -- 6 files changed, 18 insertions(+), 20 deletions(-) diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index 3c4b5a247..702606be9 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -175,7 +175,7 @@ class SearchIndex $names = []; $values = []; - foreach($tags as $tag) { + foreach ($tags as $tag) { $names[] = $tag->name; $values[] = $tag->value; } @@ -233,17 +233,16 @@ class SearchIndex $entityType = $entity->getMorphClass(); foreach ($mergedScoreMap as $term => $score) { $dataArray[] = [ - 'term' => $term, - 'score' => $score, + 'term' => $term, + 'score' => $score, 'entity_type' => $entityType, - 'entity_id' => $entityId, + 'entity_id' => $entityId, ]; } return $dataArray; } - /** * For the given term data arrays, Merge their contents by term * while combining any scores. diff --git a/app/Entities/Tools/SearchOptions.php b/app/Entities/Tools/SearchOptions.php index 9f1b9742d..99271058e 100644 --- a/app/Entities/Tools/SearchOptions.php +++ b/app/Entities/Tools/SearchOptions.php @@ -124,7 +124,6 @@ class SearchOptions return $terms; } - /** * Parse a standard search term string into individual search terms and * extract any exact terms searches to be made. @@ -136,7 +135,7 @@ class SearchOptions $terms = explode(' ', $termString); $indexDelimiters = SearchIndex::$delimiters; $parsed = [ - 'terms' => [], + 'terms' => [], 'exacts' => [], ]; diff --git a/app/Entities/Tools/SearchResultsFormatter.php b/app/Entities/Tools/SearchResultsFormatter.php index 1ddee5830..2898520ab 100644 --- a/app/Entities/Tools/SearchResultsFormatter.php +++ b/app/Entities/Tools/SearchResultsFormatter.php @@ -8,10 +8,10 @@ use Illuminate\Support\HtmlString; class SearchResultsFormatter { - /** * For the given array of entities, Prepare the models to be shown in search result * output. This sets a series of additional attributes. + * * @param Entity[] $results */ public function format(array $results, SearchOptions $options): void @@ -32,7 +32,7 @@ class SearchResultsFormatter $terms = array_merge($options->exacts, $options->searches); $originalContentByNewAttribute = [ - 'preview_name' => $entity->name, + 'preview_name' => $entity->name, 'preview_content' => $textContent, ]; @@ -49,7 +49,8 @@ class SearchResultsFormatter /** * Highlight tags which match the given terms. - * @param Tag[] $tags + * + * @param Tag[] $tags * @param string[] $terms */ protected function highlightTagsContainingTerms(array $tags, array $terms): void @@ -104,6 +105,7 @@ class SearchResultsFormatter * adjacent or where they overlap. * * @param array $matchPositions + * * @return array */ protected function sortAndMergeMatchPositions(array $matchPositions): array @@ -118,7 +120,7 @@ class SearchResultsFormatter $mergedRefs[$start] = $end; $lastStart = $start; $lastEnd = $end; - } else if ($end > $lastEnd) { + } elseif ($end > $lastEnd) { $mergedRefs[$lastStart] = $end; $lastEnd = $end; } @@ -194,7 +196,7 @@ class SearchResultsFormatter $firstStart = $firstStart ?: 0; if ($remainder > 10 && $firstStart !== 0) { $padStart = max(0, $firstStart - $remainder); - $content = ($padStart === 0 ? '' : '...') . e(substr($originalText, $padStart, $firstStart - $padStart)) . substr($content, 4); + $content = ($padStart === 0 ? '' : '...') . e(substr($originalText, $padStart, $firstStart - $padStart)) . substr($content, 4); } // Add ellipsis if we're not at the end @@ -204,5 +206,4 @@ class SearchResultsFormatter return $content; } - -} \ No newline at end of file +} diff --git a/app/Entities/Tools/SearchRunner.php b/app/Entities/Tools/SearchRunner.php index afe5e9f19..f6da871f4 100644 --- a/app/Entities/Tools/SearchRunner.php +++ b/app/Entities/Tools/SearchRunner.php @@ -141,13 +141,13 @@ class SearchRunner $relations = ['tags']; if ($entityModelInstance instanceof BookChild) { - $relations['book'] = function(BelongsTo $query) { + $relations['book'] = function (BelongsTo $query) { $query->visible(); }; } if ($entityModelInstance instanceof Page) { - $relations['chapter'] = function(BelongsTo $query) { + $relations['chapter'] = function (BelongsTo $query) { $query->visible(); }; } @@ -310,7 +310,7 @@ class SearchRunner if (empty($termCounts)) { return []; } - + $multipliers = []; $max = max(array_values($termCounts)); diff --git a/app/Http/Controllers/SearchController.php b/app/Http/Controllers/SearchController.php index 040c04ece..6b2be5a2d 100644 --- a/app/Http/Controllers/SearchController.php +++ b/app/Http/Controllers/SearchController.php @@ -14,7 +14,8 @@ class SearchController extends Controller protected $searchRunner; protected $entityContextManager; - public function __construct(SearchRunner $searchRunner) { + public function __construct(SearchRunner $searchRunner) + { $this->searchRunner = $searchRunner; } diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index 95d474629..f935f1331 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -7,7 +7,6 @@ use BookStack\Entities\Models\Book; use BookStack\Entities\Models\Bookshelf; use BookStack\Entities\Models\Chapter; use BookStack\Entities\Models\Page; -use BookStack\Entities\Models\SearchTerm; use Tests\TestCase; class EntitySearchTest extends TestCase @@ -410,5 +409,4 @@ class EntitySearchTest extends TestCase $search->assertSee('My <cool> TestPageContent', false); $search->assertSee('My supercool <great> TestPageContent page', false); } - } From 221458ccfd70e1b2c6791018792732b247e55d65 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sat, 13 Nov 2021 13:43:41 +0000 Subject: [PATCH 19/22] Fixed failing tests due to search highlighting changes --- app/Entities/Tools/SearchIndex.php | 2 +- tests/Entity/EntitySearchTest.php | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index 702606be9..d748c1695 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -223,7 +223,7 @@ class SearchIndex if ($entity instanceof Page) { $bodyTermsMap = $this->generateTermScoreMapFromHtml($entity->html); } else { - $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description, $entity->searchFactor); + $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description ?? '', $entity->searchFactor); } $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap, $tagTermsMap); diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index f935f1331..0ef46d3a7 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -18,15 +18,20 @@ class EntitySearchTest extends TestCase $search = $this->asEditor()->get('/search?term=' . urlencode($page->name)); $search->assertSee('Search Results'); - $search->assertSee($page->name); + + $title = strip_tags($search->getElementHtml('.entity-list-item-name')); + $this->assertEquals($page->name, $title); } public function test_bookshelf_search() { - $shelf = Bookshelf::first(); + /** @var Bookshelf $shelf */ + $shelf = Bookshelf::query()->first(); $search = $this->asEditor()->get('/search?term=' . urlencode(mb_substr($shelf->name, 0, 3)) . ' {type:bookshelf}'); $search->assertStatus(200); - $search->assertSee($shelf->name); + + $title = strip_tags($search->getElementHtml('.entity-list-item-name')); + $this->assertEquals($shelf->name, $title); } public function test_invalid_page_search() From 755dc99c724d63ed7feb9ccc1ff607c2579a9f75 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sat, 13 Nov 2021 14:37:40 +0000 Subject: [PATCH 20/22] Made further tweaks to search results formatting - Updated page names to not be limited to a certain length. - Added better start/end fill logic. - Prevented tags from being counted towards the target content length desired from the formatter. --- app/Entities/Tools/SearchResultsFormatter.php | 46 +++++++++++++++---- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/app/Entities/Tools/SearchResultsFormatter.php b/app/Entities/Tools/SearchResultsFormatter.php index 2898520ab..a30c96003 100644 --- a/app/Entities/Tools/SearchResultsFormatter.php +++ b/app/Entities/Tools/SearchResultsFormatter.php @@ -37,9 +37,10 @@ class SearchResultsFormatter ]; foreach ($originalContentByNewAttribute as $attributeName => $content) { + $targetLength = ($attributeName === 'preview_name') ? 0 : 260; $matchRefs = $this->getMatchPositions($content, $terms); $mergedRefs = $this->sortAndMergeMatchPositions($matchRefs); - $formatted = $this->formatTextUsingMatchPositions($mergedRefs, $content); + $formatted = $this->formatTextUsingMatchPositions($mergedRefs, $content, $targetLength); $entity->setAttribute($attributeName, new HtmlString($formatted)); } @@ -132,15 +133,25 @@ class SearchResultsFormatter /** * Format the given original text, returning a version where terms are highlighted within. * Returned content is in HTML text format. + * A given $targetLength of 0 asserts no target length limit. + * + * This is a complex function but written to be relatively efficient, going through the term matches in order + * so that we're only doing a one-time loop through of the matches. There is no further searching + * done within here. */ - protected function formatTextUsingMatchPositions(array $matchPositions, string $originalText): string + protected function formatTextUsingMatchPositions(array $matchPositions, string $originalText, int $targetLength): string { $contextRange = 32; - $targetLength = 260; $maxEnd = strlen($originalText); $lastEnd = 0; $firstStart = null; + $fetchAll = ($targetLength === 0); $content = ''; + $contentTextLength = 0; + + if ($fetchAll) { + $targetLength = $maxEnd * 2; + } foreach ($matchPositions as $start => $end) { // Get our outer text ranges for the added context we want to show upon the result. @@ -151,18 +162,30 @@ class SearchResultsFormatter $startDiff = $start - $lastEnd; if ($startDiff < 0) { $contextStart = $start; + // Trims off '$startDiff' number of characters to bring it back to the start + // if this current match zone. $content = substr($content, 0, strlen($content) + $startDiff); + $contentTextLength += $startDiff; } // Add ellipsis between results - if ($contextStart !== 0 && $contextStart !== $start) { + if (!$fetchAll && $contextStart !== 0 && $contextStart !== $start) { $content .= ' ...'; + $contentTextLength += 4; + } else if ($fetchAll) { + // Or fill in gap since the previous match + $fillLength = $contextStart - $lastEnd; + $content .= e(substr($originalText, $lastEnd, $fillLength)); + $contentTextLength += $fillLength; } // Add our content including the bolded matching text $content .= e(substr($originalText, $contextStart, $start - $contextStart)); + $contentTextLength += $start - $contextStart; $content .= '' . e(substr($originalText, $start, $end - $start)) . ''; + $contentTextLength += $end - $start; $content .= e(substr($originalText, $end, $contextEnd - $end)); + $contentTextLength += $contextEnd - $end; // Update our last end position $lastEnd = $contextEnd; @@ -173,7 +196,7 @@ class SearchResultsFormatter } // Stop if we're near our target - if (strlen($content) >= $targetLength - 10) { + if ($contentTextLength >= $targetLength - 10) { break; } } @@ -181,20 +204,23 @@ class SearchResultsFormatter // Just copy out the content if we haven't moved along anywhere. if ($lastEnd === 0) { $content = e(substr($originalText, 0, $targetLength)); + $contentTextLength = $targetLength; $lastEnd = $targetLength; } // Pad out the end if we're low - $remainder = $targetLength - strlen($content); + $remainder = $targetLength - $contentTextLength; if ($remainder > 10) { - $content .= e(substr($originalText, $lastEnd, $remainder)); - $lastEnd += $remainder; + $padEndLength = min($maxEnd - $lastEnd, $remainder); + $content .= e(substr($originalText, $lastEnd, $padEndLength)); + $lastEnd += $padEndLength; + $contentTextLength += $padEndLength; } // Pad out the start if we're still low - $remainder = $targetLength - strlen($content); + $remainder = $targetLength - $contentTextLength; $firstStart = $firstStart ?: 0; - if ($remainder > 10 && $firstStart !== 0) { + if (!$fetchAll && $remainder > 10 && $firstStart !== 0) { $padStart = max(0, $firstStart - $remainder); $content = ($padStart === 0 ? '' : '...') . e(substr($originalText, $padStart, $firstStart - $padStart)) . substr($content, 4); } From 21d3620ef07e5a405c9aaeb118ced312aece6a85 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sat, 13 Nov 2021 14:51:59 +0000 Subject: [PATCH 21/22] Attempted to make test a bit less flaky --- tests/Entity/EntitySearchTest.php | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index 0ef46d3a7..6929adc74 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -18,20 +18,17 @@ class EntitySearchTest extends TestCase $search = $this->asEditor()->get('/search?term=' . urlencode($page->name)); $search->assertSee('Search Results'); - - $title = strip_tags($search->getElementHtml('.entity-list-item-name')); - $this->assertEquals($page->name, $title); + $search->assertSeeText($page->name, true); } public function test_bookshelf_search() { /** @var Bookshelf $shelf */ $shelf = Bookshelf::query()->first(); - $search = $this->asEditor()->get('/search?term=' . urlencode(mb_substr($shelf->name, 0, 3)) . ' {type:bookshelf}'); - $search->assertStatus(200); - $title = strip_tags($search->getElementHtml('.entity-list-item-name')); - $this->assertEquals($shelf->name, $title); + $search = $this->asEditor()->get('/search?term=' . urlencode($shelf->name) . ' {type:bookshelf}'); + $search->assertSee('Search Results'); + $search->assertSeeText($shelf->name, true); } public function test_invalid_page_search() From fc7bd57dc804921cc89f7add1ce07dd29c3736d1 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sat, 13 Nov 2021 15:04:04 +0000 Subject: [PATCH 22/22] Fixed occurances of altered titles in search results --- app/Entities/Tools/SearchResultsFormatter.php | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/app/Entities/Tools/SearchResultsFormatter.php b/app/Entities/Tools/SearchResultsFormatter.php index a30c96003..31a8f81c9 100644 --- a/app/Entities/Tools/SearchResultsFormatter.php +++ b/app/Entities/Tools/SearchResultsFormatter.php @@ -141,11 +141,12 @@ class SearchResultsFormatter */ protected function formatTextUsingMatchPositions(array $matchPositions, string $originalText, int $targetLength): string { - $contextRange = 32; $maxEnd = strlen($originalText); - $lastEnd = 0; - $firstStart = null; $fetchAll = ($targetLength === 0); + $contextLength = ($fetchAll ? 0 : 32); + + $firstStart = null; + $lastEnd = 0; $content = ''; $contentTextLength = 0; @@ -155,8 +156,8 @@ class SearchResultsFormatter foreach ($matchPositions as $start => $end) { // Get our outer text ranges for the added context we want to show upon the result. - $contextStart = max($start - $contextRange, 0, $lastEnd); - $contextEnd = min($end + $contextRange, $maxEnd); + $contextStart = max($start - $contextLength, 0, $lastEnd); + $contextEnd = min($end + $contextLength, $maxEnd); // Adjust the start if we're going to be touching the previous match. $startDiff = $start - $lastEnd;