From 13d09b6010c45b3f09034729fc250de90c1a1e82 Mon Sep 17 00:00:00 2001
From: Jared Hancock <jared@osticket.com>
Date: Fri, 22 May 2015 09:24:05 -0500
Subject: [PATCH] search: Fix several small issues with search indexing

* Reindexing did not properly flush the last batch of items to the search
  therefore reindexing would always miss the last few items.
* Creating a new html thread entry with inline images resulted in empty
  search content
* HTML tag stripping in HtmlThreadBody::getSearchable() would result in
  missing white space between some words, resulting in poor searchable
  content
---
 include/class.search.php |  2 ++
 include/class.thread.php | 14 +++++++++++---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/class.search.php b/include/class.search.php
index 47254dd40..d4616307a 100644
--- a/include/class.search.php
+++ b/include/class.search.php
@@ -606,6 +606,8 @@ class MysqlSearchBackend extends SearchBackend {
         // FILES ------------------------------------
 
         // Flush non-full batch of records
+        $this->__index(null, true);
+
         if (!$this->_reindexed) {
             // Stop rebuilding the index
             $this->getConfig()->set('reindex', 0);
diff --git a/include/class.thread.php b/include/class.thread.php
index c44cc3659..46c018282 100644
--- a/include/class.thread.php
+++ b/include/class.thread.php
@@ -1178,6 +1178,9 @@ class ThreadEntry {
                 .' WHERE `id`='.db_input($entry->getId());
             if (!db_query($sql) || !db_affected_rows())
                 return false;
+
+            // Set the $entry here for search indexing
+            $entry->ht['body'] = $body;
         }
 
         // Email message id
@@ -1532,9 +1535,14 @@ class HtmlThreadBody extends ThreadBody {
     }
 
     function getSearchable() {
-        // <br> -> \n
-        $body = preg_replace(array('`<br(\s*)?/?>`i', '`</div>`i'), "\n", $this->body);
-        $body = Format::htmldecode(Format::striptags($body));
+        // Replace tag chars with spaces (to ensure words are separated)
+        $body = Format::html($this->body, array('hook_tag' => function($el, $attributes=0) {
+            static $non_ws = array('wbr' => 1);
+            return (isset($non_ws[$el])) ? '' : ' ';
+        }));
+        // Collapse multiple white-spaces
+        $body = html_entity_decode($body, ENT_QUOTES);
+        $body = preg_replace('`\s+`u', ' ', $body);
         return Format::searchable($body);
     }
 
-- 
GitLab