Charset util class
Copyright (c) 2015 osTicket
Released under the GNU General Public License WITHOUT ANY WARRANTY.
See LICENSE.TXT for details.
vim: expandtab sw=4 ts=4 sts=4:
class Charset {
// Cleanup invalid charsets
// Thanks in part to
static function normalize($charset) {
$match = array();
switch (true) {
// Windows charsets - force correct format
case preg_match('`^Windows-?(\d+)$`i', $charset, $match):
return 'Windows-'.$match[1];
// ks_c_5601-1987: Korean alias for cp949
case preg_match('`^ks_c_5601-1987`i', $charset):
case preg_match('`^iso-?(\S+)$`i', $charset, $match):
// GBK superceded gb2312 and is backward compatible
case preg_match('`^gb2312`i', $charset):
return 'GBK';
// Incorrect, bogus, ambiguous or empty charsets
// ISO-8859-1 is assumed
case preg_match('`^(default|x-user-defined|iso|us-ascii)$`i', $charset):
case preg_match('`^\s*$`', $charset):
return 'ISO-8859-1';
// Hmmmm
return $charset;
// Translate characters ($text) from one encoding ($from) to another ($to)
static function transcode($text, $from, $to) {
//Try auto-detecting charset/encoding
if (!$from && function_exists('mb_detect_encoding'))
$from = mb_detect_encoding($text);
// Normalize bogus or ambiguous charsets
$from = self::normalize(trim($from));
$to = self::normalize(trim($to));
$original = $text;
if (function_exists('iconv'))
$text = iconv($from, $to.'//IGNORE', $text);
elseif (function_exists('mb_convert_encoding'))
$text = mb_convert_encoding($text, $to, $from);
elseif (!strcasecmp($to, 'utf-8')
&& function_exists('utf8_encode')
&& !strcasecmp($from, 'ISO-8859-1'))
$text = utf8_encode($text);
// If $text is false, then we have a (likely) invalid charset, use
// the original text and assume 8-bit (latin-1 / iso-8859-1)
// encoding
return (!$text && $original) ? $original : $text;
//Wrapper for utf-8 transcoding.
function utf8($text, $charset=null) {
return self::transcode($text, $charset, self::UTF8);
class transcode_filter extends php_user_filter {
var $from;
var $to;
function filter($in, $out, &$consumed, $closing) {
while ($bucket = stream_bucket_make_writeable($in)) {
$bucket->data = Charset::transcode($bucket->data, $this->from,
$consumed += $bucket->datalen;
stream_bucket_append($out, $bucket);
return PSFS_PASS_ON;
function onCreate() {
switch ($this->filtername) {
case 'transcode.utf8-ascii':
$this->from ='utf-8';
$this->to = 'ISO-8859-1';
return false;
return true;
stream_filter_register('transcode.*', 'transcode_filter');