Skip to content
Snippets Groups Projects
Commit b03d52f8 authored by Peter Rotich's avatar Peter Rotich
Browse files

Move Format::encode to Charset::transcode

parent 6d5d19fa
No related branches found
No related tags found
No related merge requests found
......@@ -345,7 +345,7 @@ class ApiXmlDataParser extends XmlDataParser {
unset($value[":text"]);
}
if (isset($value['encoding']))
$value['body'] = Format::utf8encode($value['body'], $value['encoding']);
$value['body'] = Charset::utf8($value['body'], $value['encoding']);
if (!strcasecmp($value['type'], 'text/html'))
$value = new HtmlThreadBody($value['body']);
......
......@@ -15,9 +15,11 @@
class Charset {
const UTF8 = 'utf-8';
// Cleanup invalid charsets
// Thanks in part to https://github.com/mikel/mail/commit/88457e
function normalize($charset) {
static function normalize($charset) {
$match = array();
switch (true) {
......@@ -37,5 +39,37 @@ class Charset {
// Hmmmm
return $charset;
}
// Translate characters ($text) from one encoding ($from) to another ($to)
static function transcode($text, $from, $to) {
//Try auto-detecting charset/encoding
if (!$from && function_exists('mb_detect_encoding'))
$from = mb_detect_encoding($text);
// Normalize bogus or ambiguous charsets
$from = self::normalize(trim($from));
$to = self::normalize(trim($to));
$original = $text;
if (function_exists('iconv'))
$text = iconv($from, $to.'//IGNORE', $text);
elseif (function_exists('mb_convert_encoding'))
$text = mb_convert_encoding($text, $to, $from);
elseif (!strcasecmp($to, 'utf-8')
&& function_exists('utf8_encode')
&& !strcasecmp($from, 'ISO-8859-1'))
$text = utf8_encode($text);
// If $text is false, then we have a (likely) invalid charset, use
// the original text and assume 8-bit (latin-1 / iso-8859-1)
// encoding
return (!$text && $original) ? $original : $text;
}
//Wrapper for utf-8 transcoding.
function utf8($text, $charset=null) {
return self::transcode($text, $charset, self::UTF8);
}
}
?>
......@@ -41,43 +41,13 @@ class Format {
return $size;
}
/* encode text into desired encoding - taking into accout charset when available. */
function encode($text, $charset=null, $encoding='utf-8') {
//Try auto-detecting charset/encoding
if (!$charset && function_exists('mb_detect_encoding'))
$charset = mb_detect_encoding($text);
// Normalize bogus or ambiguous charsets
$charset = Charset::normalize(trim($charset));
$original = $text;
if (function_exists('iconv'))
$text = iconv($charset, $encoding.'//IGNORE', $text);
elseif (function_exists('mb_convert_encoding'))
$text = mb_convert_encoding($text, $encoding, $charset);
elseif (!strcasecmp($encoding, 'utf-8')
&& function_exists('utf8_encode')
&& !strcasecmp($charset, 'ISO-8859-1'))
$text = utf8_encode($text);
// If $text is false, then we have a (likely) invalid charset, use
// the original text and assume 8-bit (latin-1 / iso-8859-1)
// encoding
return (!$text && $original) ? $original : $text;
}
//Wrapper for utf-8 encoding.
function utf8encode($text, $charset=null) {
return Format::encode($text, $charset, 'utf-8');
}
function mimedecode($text, $encoding='UTF-8') {
if(function_exists('imap_mime_header_decode')
&& ($parts = imap_mime_header_decode($text))) {
$str ='';
foreach ($parts as $part)
$str.= Format::encode($part->text, $part->charset, $encoding);
$str.= Charset::transcode($part->text, $part->charset, $encoding);
$text = $str;
} elseif($text[0] == '=' && function_exists('iconv_mime_decode')) {
......@@ -102,7 +72,7 @@ class Format {
$filename, $match))
// XXX: Currently we don't care about the language component.
// The encoding hint is sufficient.
return self::utf8encode(urldecode($match[3]), $match[1]);
return Charset::utf8(urldecode($match[3]), $match[1]);
else
return $filename;
}
......@@ -552,7 +522,7 @@ class Format {
$contents = base64_decode($contents);
}
if ($output_encoding && $charset)
$contents = Format::encode($contents, $charset, $output_encoding);
$contents = Charset::transcode($contents, $charset, $output_encoding);
return array(
'data' => $contents,
......
......@@ -206,7 +206,7 @@ class MailFetcher {
//Convert text to desired encoding..defaults to utf8
function mime_encode($text, $charset=null, $encoding='utf-8') { //Thank in part to afterburner
return Format::encode($text, $charset, $encoding);
return Charset::transcode($text, $charset, $encoding);
}
function mailbox_encode($mailbox) {
......@@ -240,7 +240,7 @@ class MailFetcher {
if (function_exists('mb_detect_encoding'))
if (($src_enc = mb_detect_encoding($text))
&& (strcasecmp($src_enc, 'ASCII') !== 0))
return Format::encode($text, $src_enc, $encoding);
return Charset::transcode($text, $src_enc, $encoding);
// Handle ASCII text and RFC-2047 encoding
$str = '';
......
......@@ -335,7 +335,7 @@ class Mail_Parse {
$content = $struct->body;
//Encode to desired encoding - ONLY if charset is known??
if (isset($struct->ctype_parameters['charset']))
$content = Format::encode($content,
$content = Charset::transcode($content,
$struct->ctype_parameters['charset'], $this->charset);
return $content;
......@@ -358,7 +358,7 @@ class Mail_Parse {
function mime_encode($text, $charset=null, $encoding='utf-8') {
return Format::encode($text, $charset, $encoding);
return Charset::transcode($text, $charset, $encoding);
}
function getAttachments($part=null){
......
......@@ -576,7 +576,7 @@ class Translation extends gettext_reader implements Serializable {
if (!$this->encode)
return $string;
return Format::encode($string, 'utf-8', $this->charset);
return Charset::transcode($string, 'utf-8', $this->charset);
}
static function buildHashFile($mofile, $outfile=false, $return=false) {
......@@ -626,9 +626,8 @@ class Translation extends gettext_reader implements Serializable {
}
if ($charset && strcasecmp($charset, 'utf-8') !== 0) {
foreach ($table as $orig=>$trans) {
// Format::encode defaults to UTF-8 output
$table[Format::encode($orig, $charset)] =
Format::encode($trans, $charset);
$table[Charset::utf8($orig, $charset)] =
Charset::utf8($trans, $charset);
unset($table[$orig]);
}
}
......
......@@ -341,7 +341,7 @@ class TnefAttributeStreamReader extends TnefStreamReader {
/* Read and truncate to length. */
$text = substr($this->_getx($datalen), 0, $length);
if ($type == self::TypeUnicode) {
$text = Format::encode($text, 'ucs2');
$text = Charset::utf8($text, 'ucs2');
}
return $text;
......@@ -543,7 +543,7 @@ class TnefMessage extends AbstractTnefObject {
// Transcode it
if ($encoding && $charset)
$body = Format::encode($body, $charset, $encoding);
$body = Charset::transcode($body, $charset, $encoding);
return $body;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment