Skip to content
Snippets Groups Projects
class.charset.php 2.64 KiB
Newer Older
  • Learn to ignore specific revisions
  • Peter Rotich's avatar
    Peter Rotich committed
    <?php
    /*********************************************************************
        class.charset.php
    
        Charset util class
    
        Copyright (c) 2015 osTicket
        http://www.osticket.com
    
        Released under the GNU General Public License WITHOUT ANY WARRANTY.
        See LICENSE.TXT for details.
    
        vim: expandtab sw=4 ts=4 sts=4:
    **********************************************************************/
    
    class Charset {
    
    
        const UTF8 = 'utf-8';
    
    
    Peter Rotich's avatar
    Peter Rotich committed
        // Cleanup invalid charsets
        // Thanks in part to https://github.com/mikel/mail/commit/88457e
    
        static function normalize($charset) {
    
    Peter Rotich's avatar
    Peter Rotich committed
    
            $match = array();
            switch (true) {
            // Windows charsets - force correct format
    
            case preg_match('`^Windows-?(\d+)$`i', $charset, $match):
    
    Peter Rotich's avatar
    Peter Rotich committed
                return 'Windows-'.$match[1];
            // ks_c_5601-1987: Korean alias for cp949
    
            case preg_match('`^ks_c_5601-1987`i', $charset):
    
    Peter Rotich's avatar
    Peter Rotich committed
                return 'cp949';
    
            case preg_match('`^iso-?(\S+)$`i', $charset, $match):
    
    Przemek Grondek's avatar
    Przemek Grondek committed
                return "ISO-".$match[1];
    
            // GBK superceded gb2312 and is backward compatible
            case preg_match('`^gb2312`i', $charset):
                return 'GBK';
    
    Peter Rotich's avatar
    Peter Rotich committed
            // Incorrect, bogus, ambiguous or empty charsets
            // ISO-8859-1 is assumed
    
            case preg_match('`^(default|x-user-defined|iso|us-ascii)$`i', $charset):
    
    Peter Rotich's avatar
    Peter Rotich committed
            case preg_match('`^\s*$`', $charset):
                return 'ISO-8859-1';
            }
    
            // Hmmmm
            return $charset;
        }
    
    
        // Translate characters ($text) from one encoding ($from) to another ($to)
        static function transcode($text, $from, $to) {
    
            //Try auto-detecting charset/encoding
            if (!$from && function_exists('mb_detect_encoding'))
                $from = mb_detect_encoding($text);
    
            // Normalize bogus or ambiguous charsets
            $from = self::normalize(trim($from));
            $to = self::normalize(trim($to));
    
            $original = $text;
            if (function_exists('iconv'))
                $text = iconv($from, $to.'//IGNORE', $text);
            elseif (function_exists('mb_convert_encoding'))
                $text = mb_convert_encoding($text, $to, $from);
            elseif (!strcasecmp($to, 'utf-8')
                    && function_exists('utf8_encode')
                    && !strcasecmp($from, 'ISO-8859-1'))
                $text = utf8_encode($text);
    
            // If $text is false, then we have a (likely) invalid charset, use
            // the original text and assume 8-bit (latin-1 / iso-8859-1)
            // encoding
            return (!$text && $original) ? $original : $text;
        }
    
        //Wrapper for utf-8 transcoding.
        function utf8($text, $charset=null) {
            return self::transcode($text, $charset, self::UTF8);
        }