From 4da0324b407692160d21e7c1ccb0272b13f16d91 Mon Sep 17 00:00:00 2001 From: JediKev <kevin@enhancesoft.com> Date: Thu, 27 Jun 2019 12:41:57 -0500 Subject: [PATCH] issue: ISO-8859-8-i Charset Issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This addresses an issue where emails with `ISO-8859-8-i` character-sets appear as "(empty)" in the system. This is due to `ISO-8859-8-i` not being a valid character-set for `iconv()`. When you pass `ISO-8859-8-i` to `iconv()` you will receive an error similar to `iconv(): Wrong charset, conversion from 'ISO-8859-8-i' to 'UTF-8//IGNORE' is not allowed`. I don’t know why it's not a valid character-set for `iconv()` but the trailing `-i` is used to say "keep the text in logical order instead of visual order". Logical order just means to keep the text in true right-to-left format instead of transcoding the characters to left-to-right format. This adds a new case to the `Charset::normalize()` switch statement to match against `ISO-XXXX-X-i`. If a character set matches the criteria we will remove the trailing `-i` and set the charset to `ISO-XXXX-X`. This charset format is valid in `iconv()` which will return the correctly formatted email instead of "(empty)". --- include/class.charset.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/class.charset.php b/include/class.charset.php index d33c3abd1..dc1caa04f 100644 --- a/include/class.charset.php +++ b/include/class.charset.php @@ -29,7 +29,8 @@ class Charset { // ks_c_5601-1987: Korean alias for cp949 case preg_match('`^ks_c_5601-1987`i', $charset): return 'cp949'; - case preg_match('`^iso-?(\S+)$`i', $charset, $match): + // Remove trailing junk from ISO charset + case preg_match('`^iso-?(\S+[^i])(-i)?$`i', $charset, $match): return "ISO-".$match[1]; // GBK superceded gb2312 and is backward compatible case preg_match('`^gb2312`i', $charset): -- GitLab