diff --git a/__tests__/files/outlook-outlook/macos-macos/README.md b/__tests__/files/outlook-outlook/macos-macos/README.md index eab5e638cad7441cfdf1d87e407143a2463be59b..13dce39dccf2ef4af3dc1fe500a5f59b2d647c4c 100644 --- a/__tests__/files/outlook-outlook/macos-macos/README.md +++ b/__tests__/files/outlook-outlook/macos-macos/README.md @@ -8,7 +8,7 @@ | 06 | Multiple MIME attachments and text | ok | ok | ok | | 07 | Multiple Drive attachments and text | ok | ok | ok | | 08 | Complex email with formatted text, embedded images, MIME and GDrive attachments | fail | ok | fail | -| 09 | 2 replies with test case 01 | fail | ok | ok | +| 09 | 2 replies with test case 01 | fail | ok | fail | | 10 | 2 replies with test case 02 | - | - | - | | 11 | 2 replies with test case 03 | - | - | - | | 12 | 2 replies with test case 04 | - | - | - | diff --git a/__tests__/htmltext-outlook-outlook.test.ts b/__tests__/htmltext-outlook-outlook.test.ts index 55209d1a606db5694f0763ff92af69724adbfe1a..125c8dd68d541f8e2e5f79a420b72903e2e6847b 100644 --- a/__tests__/htmltext-outlook-outlook.test.ts +++ b/__tests__/htmltext-outlook-outlook.test.ts @@ -14,6 +14,6 @@ describe("[Pseudo PLAIN] Outlook-Outlook normalization", () => { ); describe("Chrome-Chrome", describeFunction("chrome-chrome")); - describe("MacOS-MacOS", describeFunction("macos-macos", null)); + describe("MacOS-MacOS", describeFunction("macos-macos", ["09", "s"])); describe("Windows-Windows", describeFunction("windows-windows")); }); diff --git a/__tests__/utils.ts b/__tests__/utils.ts index f009c7e5a45ef0acaa7f103295dc99cae7b72f09..c5fbc6a1a964e7b78240db160d9d53f6d612e0b4 100644 --- a/__tests__/utils.ts +++ b/__tests__/utils.ts @@ -10,7 +10,7 @@ import { DOM } from "@vereign/dom"; import { diffStringsUnified } from "jest-diff"; expect.extend({ - toContainWithDiff(target, source) { + toEqualWithDiff(target, source) { let pass = true; try { expect(target).toEqual(source); @@ -134,7 +134,7 @@ export const createDescribeHtmlTestCases = ( // eslint-disable-next-line // @ts-ignore // console.log(receivedHtml); - expect(receivedHtml).toContainWithDiff(sentHtml); + expect(receivedHtml).toEqualWithDiff(sentHtml); }); }; @@ -165,7 +165,7 @@ export const createDescribePlainTestCases = (testsPath: string) => ( // expect(receivedPlain.length).toBeGreaterThan(0); // eslint-disable-next-line // @ts-ignore - expect(receivedPlain).toContainWithDiff(sentPlain); + expect(receivedPlain).toEqualWithDiff(sentPlain); }); }; @@ -204,18 +204,20 @@ export const createDescribePseudoPlainTestCases = ( testCasePath ); - HTMLNormalizer.normalizeVendorHtml(receivedHtmlDocument, vendor); HTMLNormalizer.normalizeVendorHtml(sentHtmlDocument, vendor); - - const normalizedReceivedPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart( - receivedHtmlDocument - ); + HTMLNormalizer.normalizeVendorHtml(receivedHtmlDocument, vendor); const normalizedSentPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart( sentHtmlDocument ); - expect(normalizedReceivedPseudoPlainText).toEqual( + const normalizedReceivedPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart( + receivedHtmlDocument + ); + + // eslint-disable-next-line + // @ts-ignore + expect(normalizedReceivedPseudoPlainText).toEqualWithDiff( normalizedSentPseudoPlainText ); }); @@ -230,7 +232,7 @@ export const getDOMDocuments = ( const sentMime = getMime(`${testCasePath}/${SENT_EML_NAME}`); const receivedMime = getMime(`${testCasePath}/${RECEIVED_EML_NAME}`); - const sentDOM = new DOM(sentMime.getHTML()); + const sentDOM = new JSDOM(sentMime.getHTML()); const receivedDOM = new JSDOM(receivedMime.getHTML()); return { diff --git a/src/HTMLNormalizer/HTMLNormalizer.ts b/src/HTMLNormalizer/HTMLNormalizer.ts index 4584ab17a1de9f06ecee55e3ed46692ab4ef659f..ce2f1d3e84e413c530add95f471b66edb7295f74 100644 --- a/src/HTMLNormalizer/HTMLNormalizer.ts +++ b/src/HTMLNormalizer/HTMLNormalizer.ts @@ -12,7 +12,7 @@ import { pruneOutlookElement, } from "./strategies/outlook"; import { EMAIL_VENDORS } from "../constants"; -import { removeSpacesAndLinebreaks } from "../utils"; +import { normalizeTextSpacings, removeSpacesAndLinebreaks } from "../utils"; import { amendGmailNodes, cleanupGMailElementAttributes, @@ -87,7 +87,9 @@ export const extractPseudoPlainPart = ( document: HTMLDocument /*vendor: string*/ ): string => { - const textContent = PlainNormalizer.normalizePlain(document.body.textContent); + const normalizedTextContent = PlainNormalizer.normalizePlain( + document.body.textContent + ); // const anchors = document.getElementsByTagName("a"); // const images = document.getElementsByTagName("img"); @@ -103,7 +105,7 @@ export const extractPseudoPlainPart = ( // // meaningfulAttributes = meaningfulAttributes.filter((attr) => !!attr).sort(); // console.log(meaningfulAttributes); - return textContent; + return normalizedTextContent; }; export const printHtmlChildren = ( @@ -169,11 +171,15 @@ export const printHtmlNode = ( }); if (node.firstChild) { - result += ">"; - result += "\n"; const printout = printHtmlChildren(node, printFunction, depth + 1); - result += printout; - result += "</" + node.nodeName + ">"; + if (printout.trim().length === 0) { + result += "/>"; + } else { + result += ">"; + result += "\n"; + result += printout; + result += "</" + node.nodeName + ">"; + } } else { result += "/>"; } @@ -211,7 +217,7 @@ export const pruneHtmlNode = ( toBeRemoved = true; break; case TEXT_NODE: { - const trimmedText = node.textContent.trim(); + const trimmedText = normalizeTextSpacings(node.textContent); if (trimmedText === "") { toBeRemoved = true; } else { diff --git a/src/HTMLNormalizer/strategies/outlook.ts b/src/HTMLNormalizer/strategies/outlook.ts index 045c4d466c19fb63db70787e179bf58e2258a04c..835481239abceaa49fca82d056acc396ad48fd02 100644 --- a/src/HTMLNormalizer/strategies/outlook.ts +++ b/src/HTMLNormalizer/strategies/outlook.ts @@ -78,7 +78,7 @@ export const amendOutlookNodes = (document: HTMLDocument): void => { * Remove Word o:p paragraphs */ const ops = document.getElementsByTagName("o:p"); - Array.from(ops).forEach((op) => op.parentNode.removeChild(op)); + unwindTags(Array.from(ops)); /** * Remove empty paragraphs @@ -91,7 +91,7 @@ export const amendOutlookNodes = (document: HTMLDocument): void => { if (p.childNodes.length === 1 && p.childNodes[0].nodeType === TEXT_NODE) { const text = p.childNodes[0].textContent; - if (!text.replace(/ /g, "").trim()) { + if (!text.replace(/\u00A0/g, "").trim()) { p.parentNode.removeChild(p); } } @@ -173,6 +173,7 @@ export const amendOutlookNodes = (document: HTMLDocument): void => { spansDepths[depth].forEach((span) => { let child = span.firstChild; const parent = span.parentNode; + while (child) { parent.insertBefore(child.cloneNode(true), span); diff --git a/src/utils.ts b/src/utils.ts index 51805e0b55168c2ecf577d0d154f94b940ebb7a3..3723902a1ebaa16ca517bffe227b0f90eafd1f78 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -5,5 +5,5 @@ export const removeSpacesAndLinebreaks = (s: string): string => { }; export const normalizeTextSpacings = (s: string): string => { - return s.replace(/[\r\n\v\s]+/g, " "); + return s.replace(/[\r\n\v\s\u00A0]+/g, " "); };