Fixed issue where text after line breaks not indexed

Linebreaks would previously essentially be removed during index and
hence joined to adjacent words, breaking prefix matching.
Added test to cover.
For #3508
This commit is contained in:
Dan Brown 2022-06-20 23:47:42 +01:00
parent df94b73e29
commit 0c6f598d91
No known key found for this signature in database
GPG Key ID: 46D9F943C24A2EF9
2 changed files with 13 additions and 0 deletions

View File

@ -147,6 +147,8 @@ class SearchIndex
];
$html = '<body>' . $html . '</body>';
$html = str_ireplace(['<br>', '<br />', '<br/>'], "\n", $html);
libxml_use_internal_errors(true);
$doc = new DOMDocument();
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));

View File

@ -423,6 +423,17 @@ class EntitySearchTest extends TestCase
$search->assertSee('My supercool &lt;great&gt; <strong>TestPageContent</strong> page', false);
}
public function test_words_adjacent_to_lines_breaks_can_be_matched_with_normal_terms()
{
$page = $this->newPage(['name' => 'TermA', 'html' => '
<p>TermA<br>TermB<br>TermC</p>
']);
$search = $this->asEditor()->get('/search?term=' . urlencode('TermB TermC'));
$search->assertSee($page->getUrl(), false);
}
public function test_searches_with_user_filters_adds_them_into_advanced_search_form()
{
$resp = $this->asEditor()->get('/search?term=' . urlencode('test {updated_by:me} {created_by:dan}'));