Newer
Older
<?php
/*********************************************************************
class.charset.php
Charset util class
Copyright (c) 2015 osTicket
http://www.osticket.com
Released under the GNU General Public License WITHOUT ANY WARRANTY.
See LICENSE.TXT for details.
vim: expandtab sw=4 ts=4 sts=4:
**********************************************************************/
class Charset {
// Cleanup invalid charsets
// Thanks in part to https://github.com/mikel/mail/commit/88457e
static function normalize($charset) {
$match = array();
switch (true) {
// Windows charsets - force correct format
case preg_match('`^Windows-?(\d+)$`i', $charset, $match):
return 'Windows-'.$match[1];
// ks_c_5601-1987: Korean alias for cp949
case preg_match('`^ks_c_5601-1987`i', $charset):
case preg_match('`^iso-?(\S+)$`i', $charset, $match):
// GBK superceded gb2312 and is backward compatible
case preg_match('`^gb2312`i', $charset):
return 'GBK';
// Incorrect, bogus, ambiguous or empty charsets
// ISO-8859-1 is assumed
case preg_match('`^(default|x-user-defined|iso|us-ascii)$`i', $charset):
case preg_match('`^\s*$`', $charset):
return 'ISO-8859-1';
}
// Hmmmm
return $charset;
}
// Translate characters ($text) from one encoding ($from) to another ($to)
static function transcode($text, $from, $to) {
//Try auto-detecting charset/encoding
if (!$from && function_exists('mb_detect_encoding'))
$from = mb_detect_encoding($text);
// Normalize bogus or ambiguous charsets
$from = self::normalize(trim($from));
$to = self::normalize(trim($to));
$original = $text;
if (function_exists('iconv'))
$text = iconv($from, $to.'//IGNORE', $text);
elseif (function_exists('mb_convert_encoding'))
$text = mb_convert_encoding($text, $to, $from);
elseif (!strcasecmp($to, 'utf-8')
&& function_exists('utf8_encode')
&& !strcasecmp($from, 'ISO-8859-1'))
$text = utf8_encode($text);
// If $text is false, then we have a (likely) invalid charset, use
// the original text and assume 8-bit (latin-1 / iso-8859-1)
// encoding
return (!$text && $original) ? $original : $text;
}
//Wrapper for utf-8 transcoding.
function utf8($text, $charset=null) {
return self::transcode($text, $charset, self::UTF8);
}
class transcode_filter extends php_user_filter {
var $from;
var $to;
function filter($in, $out, &$consumed, $closing) {
while ($bucket = stream_bucket_make_writeable($in)) {
$bucket->data = Charset::transcode($bucket->data, $this->from,
$this->to);
$consumed += $bucket->datalen;
stream_bucket_append($out, $bucket);
}
return PSFS_PASS_ON;
}
function onCreate() {
switch ($this->filtername) {
case 'transcode.utf8-ascii':
$this->from ='utf-8';
$this->to = 'ISO-8859-1';
break;
default:
return false;
}
return true;
}
}
stream_filter_register('transcode.*', 'transcode_filter');