From 3dcc0583c11d364124b5c98ee93913f5b7c93f8d Mon Sep 17 00:00:00 2001 From: Jared Hancock <jared@osticket.com> Date: Tue, 4 Mar 2014 17:17:08 -0600 Subject: [PATCH] html2text: Fix Unicode character munging This issue was addressed for htmLawed, where the UTF-8 version of a non-breaking space, 0xc2a0 would be collapsed to 0xa0, which would very likely create an invalid Unicode character, and the text block would be dropped or truncated. --- include/html2text.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/html2text.php b/include/html2text.php index c44997614..48b0b3ee1 100644 --- a/include/html2text.php +++ b/include/html2text.php @@ -201,7 +201,7 @@ class HtmlInlineElement { case 'normal': default: if ($after_block) $more = ltrim($more); - $more = preg_replace('/\s+/m', ' ', $more); + $more = preg_replace('/[ \r\n\t\f]+/mu', ' ', $more); } } elseif ($c instanceof HtmlInlineElement) { -- GitLab