From 4da0324b407692160d21e7c1ccb0272b13f16d91 Mon Sep 17 00:00:00 2001
From: JediKev <kevin@enhancesoft.com>
Date: Thu, 27 Jun 2019 12:41:57 -0500
Subject: [PATCH] issue: ISO-8859-8-i Charset Issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This addresses an issue where emails with `ISO-8859-8-i` character-sets
appear as "(empty)" in the system. This is due to `ISO-8859-8-i` not being a
valid character-set for `iconv()`. When you pass `ISO-8859-8-i` to `iconv()`
you will receive an error similar to `iconv(): Wrong charset, conversion
from 'ISO-8859-8-i' to 'UTF-8//IGNORE' is not allowed`. I don’t know why
it's not a valid character-set for `iconv()` but the trailing `-i` is used
to say "keep the text in logical order instead of visual order". Logical
order just means to keep the text in true right-to-left format instead of
transcoding the characters to left-to-right format.

This adds a new case to the `Charset::normalize()` switch statement to match
against `ISO-XXXX-X-i`. If a character set matches the criteria we will
remove the trailing `-i` and set the charset to `ISO-XXXX-X`. This charset
format is valid in `iconv()` which will return the correctly formatted email
instead of "(empty)".
---
 include/class.charset.php | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/class.charset.php b/include/class.charset.php
index d33c3abd1..dc1caa04f 100644
--- a/include/class.charset.php
+++ b/include/class.charset.php
@@ -29,7 +29,8 @@ class Charset {
         // ks_c_5601-1987: Korean alias for cp949
         case preg_match('`^ks_c_5601-1987`i', $charset):
             return 'cp949';
-        case preg_match('`^iso-?(\S+)$`i', $charset, $match):
+        // Remove trailing junk from ISO charset
+        case preg_match('`^iso-?(\S+[^i])(-i)?$`i', $charset, $match):
             return "ISO-".$match[1];
         // GBK superceded gb2312 and is backward compatible
         case preg_match('`^gb2312`i', $charset):
-- 
GitLab