diff --git a/include/class.charset.php b/include/class.charset.php new file mode 100644 index 0000000000000000000000000000000000000000..5a4f28ad7ce7a14afc1c7647771f5417a7e908d1 --- /dev/null +++ b/include/class.charset.php @@ -0,0 +1,41 @@ +<?php +/********************************************************************* + class.charset.php + + Charset util class + + Copyright (c) 2015 osTicket + http://www.osticket.com + + Released under the GNU General Public License WITHOUT ANY WARRANTY. + See LICENSE.TXT for details. + + vim: expandtab sw=4 ts=4 sts=4: +**********************************************************************/ + +class Charset { + + // Cleanup invalid charsets + // Thanks in part to https://github.com/mikel/mail/commit/88457e + function normalize($charset) { + + $match = array(); + switch (true) { + // Windows charsets - force correct format + case preg_match('`^Windows-?(\d+)$`', $charset, $match): + return 'Windows-'.$match[1]; + // ks_c_5601-1987: Korean alias for cp949 + case preg_match('`^ks_c_5601-1987`', $charset): + return 'cp949'; + // Incorrect, bogus, ambiguous or empty charsets + // ISO-8859-1 is assumed + case preg_match('`^(default|x-user-defined|iso|us-ascii)`', $charset): + case preg_match('`^\s*$`', $charset): + return 'ISO-8859-1'; + } + + // Hmmmm + return $charset; + } +} +?> diff --git a/include/class.format.php b/include/class.format.php index a124a577f60a0cc88beff06dce0c3953fa8d96d6..aaff16b8bcd863d76d95fe25f57c46b77b2d3135 100644 --- a/include/class.format.php +++ b/include/class.format.php @@ -14,6 +14,7 @@ vim: expandtab sw=4 ts=4 sts=4: **********************************************************************/ +include_once INCLUDE_DIR.'class.charset.php'; class Format { @@ -47,12 +48,8 @@ class Format { if (!$charset && function_exists('mb_detect_encoding')) $charset = mb_detect_encoding($text); - // Cleanup - incorrect, bogus, or ambiguous charsets - // ISO-8859-1 is assumed for empty charset. - if (!$charset || in_array(strtolower(trim($charset)), - array('default','x-user-defined','iso','us-ascii'))) - $charset = 'ISO-8859-1'; - + // Normalize bogus or ambiguous charsets + $charset = Charset::normalize(trim($charset)); $original = $text; if (function_exists('iconv')) $text = iconv($charset, $encoding.'//IGNORE', $text);