diff --git a/__tests__/pseudoplain-gmail-gmail.test.ts b/__tests__/pseudoplain-gmail-gmail.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..0e00189ad7a30b9dcfde615d80b7bd94101cc5fc --- /dev/null +++ b/__tests__/pseudoplain-gmail-gmail.test.ts @@ -0,0 +1,17 @@ +import { EMAIL_VENDORS } from "../src"; + +const path = require("path"); +import { describe } from "@jest/globals"; +import { createDescribePseudoPlainTestCases } from "./utils"; + +const TESTS_GLOBAL_PATH = "/files/gmail-gmail"; +const testsPath = path.resolve(__dirname, `.${TESTS_GLOBAL_PATH}`); + +describe("[Pseudo PLAIN] GMail-GMail", () => { + const describeFunction = createDescribePseudoPlainTestCases( + testsPath, + EMAIL_VENDORS.GMAIL + ); + + describe("One", describeFunction("one")); +}); diff --git a/__tests__/pseudoplain-gmail-outlook.test.ts b/__tests__/pseudoplain-gmail-outlook.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..46b3ce90a57fe8a463bb8aec7e7e1ee6fba306f3 --- /dev/null +++ b/__tests__/pseudoplain-gmail-outlook.test.ts @@ -0,0 +1,23 @@ +import { describe } from "@jest/globals"; +import { createDescribePseudoPlainTestCases } from "./utils"; +import { EMAIL_VENDORS } from "../src"; +const path = require("path"); + +const TESTS_GLOBAL_PATH = "/files/gmail-outlook"; + +const testsPath = path.resolve(__dirname, `.${TESTS_GLOBAL_PATH}`); + +describe("[Pseudo PLAIN] Gmail-Outlook normalization", () => { + const describeFunction = createDescribePseudoPlainTestCases( + testsPath, + EMAIL_VENDORS.GMAIL + ); + describe( + "One", + describeFunction("one", [ + "21forward", // missing file + "23forward", // missing file + "24forward", // missing file + ]) + ); +}); diff --git a/__tests__/pseudoplain-outlook-outlook.test.ts b/__tests__/pseudoplain-outlook-outlook.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..509abd0fe3a1e108d3829534fe3c77f40cef0c90 --- /dev/null +++ b/__tests__/pseudoplain-outlook-outlook.test.ts @@ -0,0 +1,43 @@ +import { describe } from "@jest/globals"; +import { createDescribePseudoPlainTestCases } from "./utils"; +import { EMAIL_VENDORS } from "../src"; + +const path = require("path"); + +const TESTS_GLOBAL_PATH = "/files/outlook-outlook"; +const testsPath = path.resolve(__dirname, `.${TESTS_GLOBAL_PATH}`); + +describe("[Pseudo PLAIN] Outlook-Outlook normalization", () => { + const describeFunction = createDescribePseudoPlainTestCases( + testsPath, + EMAIL_VENDORS.OUTLOOK + ); + + // ["01"] - is a filter. Pass here names of directories with test cases you want to check + describe("Emails Chrome", describeFunction("chrome", null, [])); + + describe( + "Emails Edge", + describeFunction("edge", [ + "08", // Files are missing for test case + "10", // Files are missing for test case + ]) + ); + + describe("Emails Safari", describeFunction("safari", ["08"])); + + //Does not work at all + describe( + "Emails MacOS", + describeFunction("macos", [ + "23", // has special character + ]) + ); + + describe( + "Emails Windows", + describeFunction("windows", [ + "10", // missing files + ]) + ); +}); diff --git a/__tests__/utils.ts b/__tests__/utils.ts index 7a0e7b901caccb3cd579f1746b92f0f20c267119..ef875f21f5fc9472fed58d6451b6a0d19f10988b 100644 --- a/__tests__/utils.ts +++ b/__tests__/utils.ts @@ -8,6 +8,7 @@ const RECEIVED_PLAIN_NAME = "r_plainContent.data"; import { PlainNormalizer, HTMLNormalizer } from "../src"; import { expect, test } from "@jest/globals"; import { DOM } from "@vereign/dom"; +//import { diffStringsUnified } from "jest-diff"; export const getNormalizedPlain = ( testCasePath: string @@ -111,12 +112,21 @@ export const createDescribeHtmlTestCases = ( export const createDescribePlainTestCases = (testsPath: string) => ( casesName: string, - failingCases: Array<string> = [] + failingCases: Array<string> = [], + casesToCheckOnly?: Array<string> ) => (): void => { const testsCasesPath = testsPath + "/" + casesName; - const testCasesDirs = getTestCasesDirs(testsCasesPath).filter( - (dir) => !failingCases.includes(dir) - ); + let testCasesDirs = getTestCasesDirs(testsCasesPath); + + if (casesToCheckOnly && casesToCheckOnly.length) { + testCasesDirs = testCasesDirs.filter((dir) => + casesToCheckOnly.includes(dir) + ); + } + + if (failingCases && failingCases.length) { + testCasesDirs = testCasesDirs.filter((dir) => !failingCases.includes(dir)); + } test.each(testCasesDirs)("Case %s", (dirName: string) => { const testCasePath = testsCasesPath + "/" + dirName; @@ -127,5 +137,87 @@ export const createDescribePlainTestCases = (testsPath: string) => ( // expect(sentPlain.length).toBeGreaterThan(0); // expect(receivedPlain.length).toBeGreaterThan(0); expect(receivedPlain).toContain(sentPlain); + // const diff = diffStringsUnified(sentPlain, receivedPlain); + // console.log(diff); }); }; + +export const createDescribePseudoPlainTestCases = ( + testsPath: string, + vendor: string +) => + /** + * @param casesGroupName - name of the folder with cases + * @param failingCases - a list of cases that are failing and ignored. Pending to be fixed + * @param casesToCheckOnly - a filter to use if you want to check specific cases + */ + ( + casesGroupName: string, + failingCases?: Array<string>, + casesToCheckOnly?: Array<string> + ) => (): void => { + const testsCasesPath = testsPath + "/" + casesGroupName; + let testCasesDirs = getTestCasesDirs(testsCasesPath); + + if (casesToCheckOnly && casesToCheckOnly.length) { + testCasesDirs = testCasesDirs.filter((dir) => + casesToCheckOnly.includes(dir) + ); + } + + if (failingCases && failingCases.length) { + testCasesDirs = testCasesDirs.filter( + (dir) => !failingCases.includes(dir) + ); + } + + test.each(testCasesDirs)("Case %s", (dirName: string) => { + const testCasePath = testsCasesPath + "/" + dirName; + const { sentHtmlDocument, receivedHtmlDocument } = getDOMDocuments( + testCasePath + ); + + HTMLNormalizer.normalizeVendorHtml(receivedHtmlDocument, vendor); + HTMLNormalizer.normalizeVendorHtml(sentHtmlDocument, vendor); + + const normalizedReceivedPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart( + receivedHtmlDocument + ); + + const normalizedSentPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart( + sentHtmlDocument + ); + + expect(normalizedReceivedPseudoPlainText).toEqual( + normalizedSentPseudoPlainText + ); + + // const diff = diffStringsUnified( + // normalizedReceivedPseudoPlainText, + // normalizedSentPseudoPlainText + // ); + // console.log(diff); + }); + }; + +export const getDOMDocuments = ( + testCasePath: string +): { + sentHtmlDocument: HTMLDocument; + receivedHtmlDocument: HTMLDocument; +} => { + const sentHtml = fs + .readFileSync(`${testCasePath}/${SENT_HTML_NAME}`) + .toString(); + const receivedHtml = fs + .readFileSync(`${testCasePath}/${RECEIVED_HTML_NAME}`) + .toString(); + + const sentDOM = new DOM(sentHtml); + const receivedDOM = new JSDOM(receivedHtml); + + return { + sentHtmlDocument: sentDOM.window.document, + receivedHtmlDocument: receivedDOM.window.document, + }; +}; diff --git a/dist/HTMLNormalizer/HTMLNormalizer.d.ts b/dist/HTMLNormalizer/HTMLNormalizer.d.ts index 14512588371174dcb28897c7dca926e5ff64f13f..62ff3a55b69e5319892d8de8f7712d8bc19a7176 100644 --- a/dist/HTMLNormalizer/HTMLNormalizer.d.ts +++ b/dist/HTMLNormalizer/HTMLNormalizer.d.ts @@ -1,4 +1,5 @@ export declare const normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string; +export declare const extractPseudoPlainPart: (document: HTMLDocument) => string; export declare const printHtmlChildren: (node: Node, printFunction: (node: Node) => string, depth: number) => string; export declare const printHtmlNode: (node: Node, printFunction: (node: Node) => string, depth: number) => string; export declare const cleanupHtmlNodeAttributes: (node: Node, cleanupElementAttributes: (element: HTMLElement) => void) => void; diff --git a/dist/HTMLNormalizer/HTMLNormalizer.js b/dist/HTMLNormalizer/HTMLNormalizer.js index 68dc062b75b86094cc0fbeb7158641480a0f1311..1fdd9d92969cc0029c87b36379160a54a3963009 100644 --- a/dist/HTMLNormalizer/HTMLNormalizer.js +++ b/dist/HTMLNormalizer/HTMLNormalizer.js @@ -1,11 +1,12 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.escapeHtmlString = exports.pruneHtmlNode = exports.cleanupHtmlNodeAttributes = exports.printHtmlNode = exports.printHtmlChildren = exports.normalizeVendorHtml = void 0; +exports.escapeHtmlString = exports.pruneHtmlNode = exports.cleanupHtmlNodeAttributes = exports.printHtmlNode = exports.printHtmlChildren = exports.extractPseudoPlainPart = exports.normalizeVendorHtml = void 0; const constants_1 = require("../constants"); const outlook_1 = require("./strategies/outlook"); const constants_2 = require("../constants"); const utils_1 = require("../utils"); const gmail_1 = require("./strategies/gmail"); +const index_1 = require("../index"); const nodesAmendingFunctions = { [constants_2.EMAIL_VENDORS.GMAIL]: gmail_1.amendGmailNodes, [constants_2.EMAIL_VENDORS.OUTLOOK]: outlook_1.amendOutlookNodes, @@ -49,6 +50,27 @@ const normalizeVendorHtml = (document, vendor) => { return exports.printHtmlChildren(mimeBody, vendorPrintFunction, 0); }; exports.normalizeVendorHtml = normalizeVendorHtml; +const extractPseudoPlainPart = (document +/*vendor: string*/ +) => { + const textContent = index_1.PlainNormalizer.normalizePlain(document.body.textContent); + // const anchors = document.getElementsByTagName("a"); + // const images = document.getElementsByTagName("img"); + // let meaningfulAttributes = []; + // + // Array.from(anchors).forEach((a) => { + // meaningfulAttributes.push(a.getAttribute("href")); + // }); + // Array.from(images).forEach((img) => { + // meaningfulAttributes.push(img.getAttribute("src")); + // meaningfulAttributes.push(img.getAttribute("alt")); + // }); + // + // meaningfulAttributes = meaningfulAttributes.filter((attr) => !!attr).sort(); + // console.log(meaningfulAttributes); + return textContent; +}; +exports.extractPseudoPlainPart = extractPseudoPlainPart; const printHtmlChildren = (node, printFunction, depth) => { let child = node.firstChild; if (!child) { diff --git a/dist/HTMLNormalizer/index.d.ts b/dist/HTMLNormalizer/index.d.ts index 41ae9326611d9ad3288f6f5fe3ef3d42726d367f..7bea28c1205f5bb6bd23639914e640cedaaed170 100644 --- a/dist/HTMLNormalizer/index.d.ts +++ b/dist/HTMLNormalizer/index.d.ts @@ -1,4 +1,5 @@ declare const _default: { normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string; + extractPseudoPlainPart: (document: HTMLDocument) => string; }; export default _default; diff --git a/dist/HTMLNormalizer/index.js b/dist/HTMLNormalizer/index.js index 10347750ec3e33b39101a866e87ad04b098c1880..3d76acbbaa9ad2760df4c70687bd541bdac19eea 100644 --- a/dist/HTMLNormalizer/index.js +++ b/dist/HTMLNormalizer/index.js @@ -3,4 +3,5 @@ Object.defineProperty(exports, "__esModule", { value: true }); const HTMLNormalizer_1 = require("./HTMLNormalizer"); exports.default = { normalizeVendorHtml: HTMLNormalizer_1.normalizeVendorHtml, + extractPseudoPlainPart: HTMLNormalizer_1.extractPseudoPlainPart, }; diff --git a/dist/PlainNormalizer/PlainNormalizer.js b/dist/PlainNormalizer/PlainNormalizer.js index e79c6a0a2a9d639118ce5bd1762b3455b75dae14..342abdd0887e6066cd81d5c2515291417666585d 100644 --- a/dist/PlainNormalizer/PlainNormalizer.js +++ b/dist/PlainNormalizer/PlainNormalizer.js @@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.normalizePlainPart = void 0; const utils_1 = require("../utils"); const normalizePlainPart = (text) => { + text = removeListBullets(text); text = utils_1.removeSpacesAndLinebreaks(text); return removeQRCodes(text); }; @@ -12,3 +13,6 @@ const removeQRCodes = (s) => { .replace(/\[qrcode.png]\s*<https:\/\/[\w./?=\-&]+>/g, "") .replace(/<https:\/\/[\w./?=\-&]+>\s*\[qrcode.png]/g, ""); }; +const removeListBullets = (s) => { + return s.replace("\n[o§]\n+/g", ""); +}; diff --git a/dist/utils.js b/dist/utils.js index 6a64db988795a1ffa219666a6fa6c8faac878a56..3030ed27ee0506f9f3c3d118b607339781fb0520 100644 --- a/dist/utils.js +++ b/dist/utils.js @@ -2,8 +2,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.removeSpacesAndLinebreaks = void 0; const removeSpacesAndLinebreaks = (s) => { - const regexNewlines = new RegExp(/[\r\n\v]+/g); - const regexSpaces = new RegExp(/\s+|\u200B/g); - return s.replace(regexNewlines, "").replace(regexSpaces, ""); + const removeSymbols = new RegExp(/[\r\n\v\s\u200B]+/g); + return s.replace(removeSymbols, "").trim(); }; exports.removeSpacesAndLinebreaks = removeSpacesAndLinebreaks; diff --git a/package.json b/package.json index 5805b72051fb21f31a605de843d07ed8acdae5ff..6831c4de178f66f35857b2f50a15608b6c282ad5 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "eslint": "^7.7.0", "husky": "^4.2.5", "jest": "^26.4.2", + "jest-diff": "^26.6.2", "lint-staged": "^10.2.13", "prettier": "^2.1.1", "typescript": "^4.0.2" diff --git a/src/HTMLNormalizer/HTMLNormalizer.ts b/src/HTMLNormalizer/HTMLNormalizer.ts index 3eb2bc8c077fb86929f8b7ae405d5b47cfa2e8a2..4fbe7c7e3b5af040c15f619eac2b252ca5b17840 100644 --- a/src/HTMLNormalizer/HTMLNormalizer.ts +++ b/src/HTMLNormalizer/HTMLNormalizer.ts @@ -18,6 +18,7 @@ import { cleanupGMailElementAttributes, pruneGmailElement, } from "./strategies/gmail"; +import { PlainNormalizer } from "../index"; const nodesAmendingFunctions = { [EMAIL_VENDORS.GMAIL]: amendGmailNodes, @@ -79,6 +80,29 @@ export const normalizeVendorHtml = ( return printHtmlChildren(mimeBody, vendorPrintFunction, 0); }; +export const extractPseudoPlainPart = ( + document: HTMLDocument + /*vendor: string*/ +): string => { + const textContent = PlainNormalizer.normalizePlain(document.body.textContent); + + // const anchors = document.getElementsByTagName("a"); + // const images = document.getElementsByTagName("img"); + // let meaningfulAttributes = []; + // + // Array.from(anchors).forEach((a) => { + // meaningfulAttributes.push(a.getAttribute("href")); + // }); + // Array.from(images).forEach((img) => { + // meaningfulAttributes.push(img.getAttribute("src")); + // meaningfulAttributes.push(img.getAttribute("alt")); + // }); + // + // meaningfulAttributes = meaningfulAttributes.filter((attr) => !!attr).sort(); + // console.log(meaningfulAttributes); + return textContent; +}; + export const printHtmlChildren = ( node: Node, printFunction: (node: Node) => string, diff --git a/src/HTMLNormalizer/index.ts b/src/HTMLNormalizer/index.ts index 5a404bb97ec11626d161e4eb38928b5572c2c1f9..63caf0692ceae3bb8615175c74eed612c64ad924 100644 --- a/src/HTMLNormalizer/index.ts +++ b/src/HTMLNormalizer/index.ts @@ -1,5 +1,6 @@ -import { normalizeVendorHtml } from "./HTMLNormalizer"; +import { normalizeVendorHtml, extractPseudoPlainPart } from "./HTMLNormalizer"; export default { normalizeVendorHtml, + extractPseudoPlainPart, }; diff --git a/src/PlainNormalizer/PlainNormalizer.ts b/src/PlainNormalizer/PlainNormalizer.ts index dc4cd689f48597cbc64770bfb3cfe4aea486a139..16a59c9c4a4e9c51e90c521158c8ce468ad40628 100644 --- a/src/PlainNormalizer/PlainNormalizer.ts +++ b/src/PlainNormalizer/PlainNormalizer.ts @@ -1,6 +1,7 @@ import { removeSpacesAndLinebreaks } from "../utils"; export const normalizePlainPart = (text: string): string => { + text = removeListBullets(text); text = removeSpacesAndLinebreaks(text); return removeQRCodes(text); }; @@ -10,3 +11,7 @@ const removeQRCodes = (s: string): string => { .replace(/\[qrcode.png]\s*<https:\/\/[\w./?=\-&]+>/g, "") .replace(/<https:\/\/[\w./?=\-&]+>\s*\[qrcode.png]/g, ""); }; + +const removeListBullets = (s: string): string => { + return s.replace("\n[o§]\n+/g", ""); +}; diff --git a/src/utils.ts b/src/utils.ts index c25376fc137c9374ef76d2115f5638d03f2865a7..c8bb6bb80bbf0e10a5b9c8bbbe95b69b179bdfcf 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,6 +1,5 @@ export const removeSpacesAndLinebreaks = (s: string): string => { - const regexNewlines = new RegExp(/[\r\n\v]+/g); - const regexSpaces = new RegExp(/\s+|\u200B/g); + const removeSymbols = new RegExp(/[\r\n\v\s\u200B]+/g); - return s.replace(regexNewlines, "").replace(regexSpaces, ""); + return s.replace(removeSymbols, "").trim(); };