From 72f85d315d24589a939691c0651bec904eddc1fc Mon Sep 17 00:00:00 2001
From: igor <igor.markin@vereign.com>
Date: Thu, 24 Dec 2020 10:57:17 +0300
Subject: [PATCH] Properly handle spaces in plain part

---
 .../outlook-outlook/chrome-chrome/07/received.eml |  2 +-
 .../outlook-outlook/chrome-chrome/08/received.eml |  2 +-
 .../outlook-outlook/chrome-chrome/15/received.eml |  2 +-
 src/PlainNormalizer/PlainNormalizer.ts            | 15 +++++++++++----
 src/utils.ts                                      |  4 ++++
 5 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/__tests__/files/outlook-outlook/chrome-chrome/07/received.eml b/__tests__/files/outlook-outlook/chrome-chrome/07/received.eml
index 68c2e4f..5b3539a 100644
--- a/__tests__/files/outlook-outlook/chrome-chrome/07/received.eml
+++ b/__tests__/files/outlook-outlook/chrome-chrome/07/received.eml
@@ -205,7 +205,7 @@ Content-Type: text/plain; charset="iso-8859-1"
 Content-Transfer-Encoding: quoted-printable
 
 Stephan Morphis has shared OneDrive files with you. To view them, click the=
-links below.
+ links below.
 <https://1drv.ms/u/s!Au0bzLX4nSlWiDaBztIacMtR0TFd>
 [https://r1.res.office365.com/owa/prem/images/dc-generic_20.png]<https://1d=
 rv.ms/u/s!Au0bzLX4nSlWiDaBztIacMtR0TFd>
diff --git a/__tests__/files/outlook-outlook/chrome-chrome/08/received.eml b/__tests__/files/outlook-outlook/chrome-chrome/08/received.eml
index 2dbf245..94f09f9 100644
--- a/__tests__/files/outlook-outlook/chrome-chrome/08/received.eml
+++ b/__tests__/files/outlook-outlook/chrome-chrome/08/received.eml
@@ -206,7 +206,7 @@ Content-Type: text/plain; charset="iso-8859-1"
 Content-Transfer-Encoding: quoted-printable
 
 Stephan Morphis has shared OneDrive files with you. To view them, click the=
-links below.
+ links below.
 <https://1drv.ms/u/s!Au0bzLX4nSlWiDaBztIacMtR0TFd>
 [https://r1.res.office365.com/owa/prem/images/dc-generic_20.png]<https://1d=
 rv.ms/u/s!Au0bzLX4nSlWiDaBztIacMtR0TFd>
diff --git a/__tests__/files/outlook-outlook/chrome-chrome/15/received.eml b/__tests__/files/outlook-outlook/chrome-chrome/15/received.eml
index d788cca..8f7b852 100644
--- a/__tests__/files/outlook-outlook/chrome-chrome/15/received.eml
+++ b/__tests__/files/outlook-outlook/chrome-chrome/15/received.eml
@@ -210,7 +210,7 @@ Content-Type: text/plain; charset="iso-8859-1"
 Content-Transfer-Encoding: quoted-printable
 
 Stephan Morphis has shared OneDrive files with you. To view them, click the=
-links below.
+ links below.
 <https://1drv.ms/u/s!Au0bzLX4nSlWlj-_pxZ0QmCUigL4>
 [https://r1.res.office365.com/owa/prem/images/dc-png_20.png]<https://1drv.m=
 s/u/s!Au0bzLX4nSlWlj-_pxZ0QmCUigL4>
diff --git a/src/PlainNormalizer/PlainNormalizer.ts b/src/PlainNormalizer/PlainNormalizer.ts
index ba47353..f230a11 100644
--- a/src/PlainNormalizer/PlainNormalizer.ts
+++ b/src/PlainNormalizer/PlainNormalizer.ts
@@ -1,14 +1,16 @@
 // this is a Node module. require is a must to work across different envs
 const URL = require("url-parse");
 
-import { removeSpacesAndLinebreaks } from "../utils";
+import { normalizeTextSpacings } from "../utils";
 
 export const normalizePlainPart = (text: string): string => {
+  text = cleanupHiddenCharacters(text);
   text = removeListBullets(text);
-  text = removeSpacesAndLinebreaks(text);
   text = removeQRCodes(text);
+  text = normalizeTextSpacings(text);
   text = patchOutlookSafelinksWrappers(text);
-  return text;
+
+  return text.trim();
 };
 
 const patchOutlookSafelinksWrappers = (text: string) => {
@@ -29,10 +31,15 @@ const patchOutlookSafelinksWrappers = (text: string) => {
 
 const removeQRCodes = (s: string): string => {
   return s
-    .replace(/\[(image:)*qrcode.png]\s*<https:\/\/.+?>/g, "")
+    .replace(/\[(image:\s)*qrcode.png]\s*<https:\/\/.+?>/g, "")
     .replace(/<https:\/\/.+?>\s*\[(image: )*qrcode.png]/g, "");
 };
 
 const removeListBullets = (s: string): string => {
   return s.replace("\n[o§]\n+/g", "");
 };
+
+export const cleanupHiddenCharacters = (s: string): string => {
+  const removeSymbols = new RegExp(/[\u200B]+/g);
+  return s.replace(removeSymbols, "");
+};
diff --git a/src/utils.ts b/src/utils.ts
index c8bb6bb..51805e0 100644
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -3,3 +3,7 @@ export const removeSpacesAndLinebreaks = (s: string): string => {
 
   return s.replace(removeSymbols, "").trim();
 };
+
+export const normalizeTextSpacings = (s: string): string => {
+  return s.replace(/[\r\n\v\s]+/g, " ");
+};
-- 
GitLab