Skip to content
Snippets Groups Projects
Commit 6d5d19fa authored by Peter Rotich's avatar Peter Rotich
Browse files

charset: Normalize charset

This pull request adds a cleanup util for bogus and invalid charsets, mostly
added by a nameless company out of Redmond, WA.
parent d700d4eb
Branches
Tags
No related merge requests found
<?php
/*********************************************************************
class.charset.php
Charset util class
Copyright (c) 2015 osTicket
http://www.osticket.com
Released under the GNU General Public License WITHOUT ANY WARRANTY.
See LICENSE.TXT for details.
vim: expandtab sw=4 ts=4 sts=4:
**********************************************************************/
class Charset {
// Cleanup invalid charsets
// Thanks in part to https://github.com/mikel/mail/commit/88457e
function normalize($charset) {
$match = array();
switch (true) {
// Windows charsets - force correct format
case preg_match('`^Windows-?(\d+)$`', $charset, $match):
return 'Windows-'.$match[1];
// ks_c_5601-1987: Korean alias for cp949
case preg_match('`^ks_c_5601-1987`', $charset):
return 'cp949';
// Incorrect, bogus, ambiguous or empty charsets
// ISO-8859-1 is assumed
case preg_match('`^(default|x-user-defined|iso|us-ascii)`', $charset):
case preg_match('`^\s*$`', $charset):
return 'ISO-8859-1';
}
// Hmmmm
return $charset;
}
}
?>
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
vim: expandtab sw=4 ts=4 sts=4: vim: expandtab sw=4 ts=4 sts=4:
**********************************************************************/ **********************************************************************/
include_once INCLUDE_DIR.'class.charset.php';
class Format { class Format {
...@@ -47,12 +48,8 @@ class Format { ...@@ -47,12 +48,8 @@ class Format {
if (!$charset && function_exists('mb_detect_encoding')) if (!$charset && function_exists('mb_detect_encoding'))
$charset = mb_detect_encoding($text); $charset = mb_detect_encoding($text);
// Cleanup - incorrect, bogus, or ambiguous charsets // Normalize bogus or ambiguous charsets
// ISO-8859-1 is assumed for empty charset. $charset = Charset::normalize(trim($charset));
if (!$charset || in_array(strtolower(trim($charset)),
array('default','x-user-defined','iso','us-ascii')))
$charset = 'ISO-8859-1';
$original = $text; $original = $text;
if (function_exists('iconv')) if (function_exists('iconv'))
$text = iconv($charset, $encoding.'//IGNORE', $text); $text = iconv($charset, $encoding.'//IGNORE', $text);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment