From 13d09b6010c45b3f09034729fc250de90c1a1e82 Mon Sep 17 00:00:00 2001 From: Jared Hancock <jared@osticket.com> Date: Fri, 22 May 2015 09:24:05 -0500 Subject: [PATCH] search: Fix several small issues with search indexing * Reindexing did not properly flush the last batch of items to the search therefore reindexing would always miss the last few items. * Creating a new html thread entry with inline images resulted in empty search content * HTML tag stripping in HtmlThreadBody::getSearchable() would result in missing white space between some words, resulting in poor searchable content --- include/class.search.php | 2 ++ include/class.thread.php | 14 +++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/class.search.php b/include/class.search.php index 47254dd40..d4616307a 100644 --- a/include/class.search.php +++ b/include/class.search.php @@ -606,6 +606,8 @@ class MysqlSearchBackend extends SearchBackend { // FILES ------------------------------------ // Flush non-full batch of records + $this->__index(null, true); + if (!$this->_reindexed) { // Stop rebuilding the index $this->getConfig()->set('reindex', 0); diff --git a/include/class.thread.php b/include/class.thread.php index c44cc3659..46c018282 100644 --- a/include/class.thread.php +++ b/include/class.thread.php @@ -1178,6 +1178,9 @@ class ThreadEntry { .' WHERE `id`='.db_input($entry->getId()); if (!db_query($sql) || !db_affected_rows()) return false; + + // Set the $entry here for search indexing + $entry->ht['body'] = $body; } // Email message id @@ -1532,9 +1535,14 @@ class HtmlThreadBody extends ThreadBody { } function getSearchable() { - // <br> -> \n - $body = preg_replace(array('`<br(\s*)?/?>`i', '`</div>`i'), "\n", $this->body); - $body = Format::htmldecode(Format::striptags($body)); + // Replace tag chars with spaces (to ensure words are separated) + $body = Format::html($this->body, array('hook_tag' => function($el, $attributes=0) { + static $non_ws = array('wbr' => 1); + return (isset($non_ws[$el])) ? '' : ' '; + })); + // Collapse multiple white-spaces + $body = html_entity_decode($body, ENT_QUOTES); + $body = preg_replace('`\s+`u', ' ', $body); return Format::searchable($body); } -- GitLab