From 795fcca0ce8f78de4a3d25c5e262e294218f35f5 Mon Sep 17 00:00:00 2001 From: Jared Hancock <jared@osticket.com> Date: Mon, 1 Jul 2013 13:33:15 -0500 Subject: [PATCH] Use a failsafe encoding for unrecognized charsets If the body of an email message is written and encoded with an unrecognized charset (like 'iso', for instance), then use the original text, assumed to be 8-bit encoded. This patch will create idiosyncrasies, where text might be transcoded to utf-8 incorrectly, but it should eliminate instances where email message bodies are missing due to incorrect charset labels. --- include/class.format.php | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/include/class.format.php b/include/class.format.php index 7a339e429..10859d15a 100644 --- a/include/class.format.php +++ b/include/class.format.php @@ -41,18 +41,23 @@ class Format { if(!$charset && function_exists('mb_detect_encoding')) $charset = mb_detect_encoding($text); - //Cleanup - junk - if($charset && in_array(trim($charset), array('default','x-user-defined'))) + // Cleanup - incorrect, bogus, or ambiguous charsets + if($charset && in_array(strtolower(trim($charset)), + array('default','x-user-defined','iso'))) $charset = 'ISO-8859-1'; + $original = $text; if(function_exists('iconv') && $charset) $text = iconv($charset, $encoding.'//IGNORE', $text); elseif(function_exists('mb_convert_encoding') && $charset && $encoding) $text = mb_convert_encoding($text, $encoding, $charset); elseif(!strcasecmp($encoding, 'utf-8')) //forced blind utf8 encoding. $text = function_exists('imap_utf8')?imap_utf8($text):utf8_encode($text); - - return $text; + + // If $text is false, then we have a (likely) invalid charset, use + // the original text and assume 8-bit (latin-1 / iso-8859-1) + // encoding + return (!$text && $original) ? $original : $text; } //Wrapper for utf-8 encoding. -- GitLab