diff --git a/dist/services/MimeVerificationService/MimeVerificationService.js b/dist/services/MimeVerificationService/MimeVerificationService.js index 71e7152326755f0b5943294b0197c6287a256f4f..120b643361249c0f1ccdd991b2bcd7fee2eca551 100644 --- a/dist/services/MimeVerificationService/MimeVerificationService.js +++ b/dist/services/MimeVerificationService/MimeVerificationService.js @@ -18,12 +18,13 @@ const index_1 = require("./index"); const index_2 = require("../../index"); const VerificationError_1 = require("../VerificationService/VerificationError"); const stringUtils_1 = require("../../utils/stringUtils"); -const ELEMENT_NODE = 1; -const TEXT_NODE = 3; -const DOCUMENT_NODE = 9; -const vendorProcessingFunctions = { - [StatusesService_1.EMAIL_VENDORS.GMAIL]: normalizationStrategies_1.processGmailElement, - [StatusesService_1.EMAIL_VENDORS.OUTLOOK]: normalizationStrategies_1.processOutlookElement, +const utils_1 = require("./utils"); +const vendorPruningFunctions = { + [StatusesService_1.EMAIL_VENDORS.GMAIL]: normalizationStrategies_1.pruneGmailElement, + [StatusesService_1.EMAIL_VENDORS.OUTLOOK]: normalizationStrategies_1.pruneOutlookElement, +}; +const vendorPrintingFunctions = { + [StatusesService_1.EMAIL_VENDORS.OUTLOOK]: utils_1.printOutlookElement, }; // Load JSDOM dynamically for Node env only, because built CRA is crashing with it let JSDOM; @@ -97,100 +98,17 @@ const normalizeVendorHtml = (htmlString, vendor) => { document = window.document; } const mimeBody = document.firstChild.lastChild; - const elementProcessingFunction = vendorProcessingFunctions[vendor]; - if (!elementProcessingFunction) { + const elementPruningFunction = vendorPruningFunctions[vendor]; + if (!elementPruningFunction) { throw new Error(`Vendor "${vendor}" is not supported. Please, develop a normalization strategy for it.`); } - pruneHtmlNode(document, elementProcessingFunction); - return printHtmlChildren(mimeBody, 0); + utils_1.pruneHtmlNode(document, elementPruningFunction); + const vendorPrintFunction = vendorPrintingFunctions[vendor]; + return utils_1.printHtmlChildren(mimeBody, vendorPrintFunction, 0); }; const normalizePlainPart = (text) => { return stringUtils_1.removeSpacesAndLinebreaks(text); }; -const printHtmlChildren = (node, depth) => { - let child = node.firstChild; - if (!child) { - return ""; - } - if (child == node.lastChild && child.nodeType == TEXT_NODE) { - return printHtmlNode(child, depth); - } - else { - let result = ""; - while (child) { - result = result.concat(printHtmlNode(child, depth)); - child = child.nextSibling; - } - return result; - } -}; -const printHtmlNode = (node, depth) => { - let result = ""; - switch (node.nodeType) { - case TEXT_NODE: - result += "<TEXT>"; - result += stringUtils_1.removeSpacesAndLinebreaks(node.textContent); - result += "</TEXT>"; - result += "\n"; - break; - case DOCUMENT_NODE: - result += printHtmlChildren(node, depth); - break; - case ELEMENT_NODE: - result += "<" + node.nodeName; - Array.from(node.attributes) - .sort((a, b) => a.name.localeCompare(b.name)) - .forEach((attribute) => { - result += ` ${attribute.name}`; - if (attribute.value) { - result += `="${common_1.escapeHtmlString(attribute.value)}"`; - } - }); - if (node.firstChild) { - result += ">"; - result += "\n"; - result += printHtmlChildren(node, depth + 1); - result += "</" + node.nodeName + ">"; - } - else { - result += "/>"; - } - result += "\n"; - } - return result; -}; -const pruneHtmlNode = (node, pruneElement) => { - let toBeRemoved = false; - switch (node.nodeType) { - case node.COMMENT_NODE: - case node.DOCUMENT_TYPE_NODE: - toBeRemoved = true; - break; - case node.TEXT_NODE: { - const trimmedText = node.textContent.trim(); - if (trimmedText === "") { - toBeRemoved = true; - } - else { - node.textContent = trimmedText; - } - break; - } - case node.ELEMENT_NODE: - toBeRemoved = pruneElement(node); - } - if (toBeRemoved) { - return true; - } - const childrenToRemove = []; - let child = node.firstChild; - while (child) { - pruneHtmlNode(child, pruneElement) && childrenToRemove.push(child); - child = child.nextSibling; - } - childrenToRemove.forEach((child) => node.removeChild(child)); - return false; -}; exports.default = { normalizeVendorHtml, getMimeHtmlAndPlainParts, diff --git a/dist/services/MimeVerificationService/normalizationStrategies.d.ts b/dist/services/MimeVerificationService/normalizationStrategies.d.ts index 08113b4cfd63b1af06fde0fd13171e9dee649167..89e97ce976d3d267d0cf1428b1d154b8eaf5654c 100644 --- a/dist/services/MimeVerificationService/normalizationStrategies.d.ts +++ b/dist/services/MimeVerificationService/normalizationStrategies.d.ts @@ -1,6 +1,6 @@ -export declare const processGmailElement: (element: HTMLElement) => boolean; +export declare const pruneGmailElement: (element: HTMLElement) => boolean; /** * Returns true if element should be completely removed * @param element */ -export declare const processOutlookElement: (element: HTMLElement) => boolean; +export declare const pruneOutlookElement: (element: HTMLElement) => boolean; diff --git a/dist/services/MimeVerificationService/normalizationStrategies.js b/dist/services/MimeVerificationService/normalizationStrategies.js index aa40890a20de01aebf836b47683de7c06b27dec4..4345de823fb75d7fbf00c05ca0dd03597a2f19b5 100644 --- a/dist/services/MimeVerificationService/normalizationStrategies.js +++ b/dist/services/MimeVerificationService/normalizationStrategies.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.processOutlookElement = exports.processGmailElement = void 0; +exports.pruneOutlookElement = exports.pruneGmailElement = void 0; const DUMMY_QR_CODE_ID = "dummyQrCode"; const deleteAttributesWithPrefix = (element, prefix) => { for (const attribute of element.attributes) { @@ -17,7 +17,7 @@ const isDummyQrCode = (element) => { return true; } }; -exports.processGmailElement = (element) => { +exports.pruneGmailElement = (element) => { if (isDummyQrCode(element)) { return true; } @@ -35,10 +35,14 @@ exports.processGmailElement = (element) => { * Returns true if element should be completely removed * @param element */ -exports.processOutlookElement = (element) => { +exports.pruneOutlookElement = (element) => { if (isDummyQrCode(element)) { return true; } + // Remove Outlook generic <o:p> tags + if (element.nodeName.toLowerCase().startsWith("o:")) { + return true; + } if (element.attributes.length > 0) { deleteDataAttributes(element); // TODO Prepare for demo. fix later. the value at style attribute is changed (additional spaces and semicolon) diff --git a/src/services/MimeVerificationService/MimeVerificationService.ts b/src/services/MimeVerificationService/MimeVerificationService.ts index 310a8f07ab3eaf9d366c6c5c0cd1255f564adfab..913e4bf36a5239c575d3ee27b96322a8e83b8db0 100644 --- a/src/services/MimeVerificationService/MimeVerificationService.ts +++ b/src/services/MimeVerificationService/MimeVerificationService.ts @@ -1,23 +1,24 @@ import { - processGmailElement, - processOutlookElement, + pruneGmailElement, + pruneOutlookElement, } from "./normalizationStrategies"; import MailParser, { fixNewLines } from "../MailParser/MailParser"; import CryptoService from "../CryptoService"; -import { arrayBufferToBase64, escapeHtmlString } from "../../utils/common"; +import { arrayBufferToBase64 } from "../../utils/common"; import { EMAIL_VENDORS } from "../StatusesService"; import MimeVerificationService from "./index"; import { RollingHash } from "../../index"; import VerificationError from "../VerificationService/VerificationError"; import { removeSpacesAndLinebreaks } from "../../utils/stringUtils"; +import { printHtmlChildren, printOutlookElement, pruneHtmlNode } from "./utils"; -const ELEMENT_NODE = 1; -const TEXT_NODE = 3; -const DOCUMENT_NODE = 9; +const vendorPruningFunctions = { + [EMAIL_VENDORS.GMAIL]: pruneGmailElement, + [EMAIL_VENDORS.OUTLOOK]: pruneOutlookElement, +}; -const vendorProcessingFunctions = { - [EMAIL_VENDORS.GMAIL]: processGmailElement, - [EMAIL_VENDORS.OUTLOOK]: processOutlookElement, +const vendorPrintingFunctions = { + [EMAIL_VENDORS.OUTLOOK]: printOutlookElement, }; // Load JSDOM dynamically for Node env only, because built CRA is crashing with it @@ -167,121 +168,25 @@ const normalizeVendorHtml = (htmlString: string, vendor: string): string => { const mimeBody = document.firstChild.lastChild; - const elementProcessingFunction = vendorProcessingFunctions[vendor]; + const elementPruningFunction = vendorPruningFunctions[vendor]; - if (!elementProcessingFunction) { + if (!elementPruningFunction) { throw new Error( `Vendor "${vendor}" is not supported. Please, develop a normalization strategy for it.` ); } - pruneHtmlNode(document, elementProcessingFunction); + pruneHtmlNode(document, elementPruningFunction); + + const vendorPrintFunction = vendorPrintingFunctions[vendor]; - return printHtmlChildren(mimeBody, 0); + return printHtmlChildren(mimeBody, vendorPrintFunction, 0); }; const normalizePlainPart = (text: string): string => { return removeSpacesAndLinebreaks(text); }; -const printHtmlChildren = (node: Node, depth: number): string => { - let child = node.firstChild; - if (!child) { - return ""; - } - - if (child == node.lastChild && child.nodeType == TEXT_NODE) { - return printHtmlNode(child, depth); - } else { - let result = ""; - while (child) { - result = result.concat(printHtmlNode(child, depth)); - child = child.nextSibling; - } - - return result; - } -}; - -const printHtmlNode = (node: Node, depth: number): string => { - let result = ""; - - switch (node.nodeType) { - case TEXT_NODE: - result += "<TEXT>"; - result += removeSpacesAndLinebreaks(node.textContent); - result += "</TEXT>"; - result += "\n"; - break; - case DOCUMENT_NODE: - result += printHtmlChildren(node, depth); - break; - case ELEMENT_NODE: - result += "<" + node.nodeName; - Array.from((node as HTMLElement).attributes) - .sort((a, b) => a.name.localeCompare(b.name)) - .forEach((attribute) => { - result += ` ${attribute.name}`; - if (attribute.value) { - result += `="${escapeHtmlString(attribute.value)}"`; - } - }); - - if (node.firstChild) { - result += ">"; - result += "\n"; - result += printHtmlChildren(node, depth + 1); - result += "</" + node.nodeName + ">"; - } else { - result += "/>"; - } - result += "\n"; - } - - return result; -}; - -const pruneHtmlNode = ( - node: Node, - pruneElement: (element: HTMLElement) => boolean -): boolean => { - let toBeRemoved = false; - - switch (node.nodeType) { - case node.COMMENT_NODE: - case node.DOCUMENT_TYPE_NODE: - toBeRemoved = true; - break; - case node.TEXT_NODE: { - const trimmedText = node.textContent.trim(); - if (trimmedText === "") { - toBeRemoved = true; - } else { - node.textContent = trimmedText; - } - break; - } - case node.ELEMENT_NODE: - toBeRemoved = pruneElement(node as HTMLElement); - } - - if (toBeRemoved) { - return true; - } - - const childrenToRemove = []; - let child = node.firstChild; - - while (child) { - pruneHtmlNode(child, pruneElement) && childrenToRemove.push(child); - child = child.nextSibling; - } - - childrenToRemove.forEach((child) => node.removeChild(child)); - - return false; -}; - export default { normalizeVendorHtml, getMimeHtmlAndPlainParts, diff --git a/src/services/MimeVerificationService/constants.ts b/src/services/MimeVerificationService/constants.ts new file mode 100644 index 0000000000000000000000000000000000000000..be3c40ceffbc88422101e57db3345596895918b0 --- /dev/null +++ b/src/services/MimeVerificationService/constants.ts @@ -0,0 +1,3 @@ +export const ELEMENT_NODE = 1; +export const TEXT_NODE = 3; +export const DOCUMENT_NODE = 9; diff --git a/src/services/MimeVerificationService/normalizationStrategies.ts b/src/services/MimeVerificationService/normalizationStrategies.ts index 0413209fc865cca0dd135e9256f8b3834a0aef31..bc32690f016749ac0ec5d31a4687d557735e1af5 100644 --- a/src/services/MimeVerificationService/normalizationStrategies.ts +++ b/src/services/MimeVerificationService/normalizationStrategies.ts @@ -18,7 +18,7 @@ const isDummyQrCode = (element: HTMLElement): boolean => { } }; -export const processGmailElement = (element: HTMLElement): boolean => { +export const pruneGmailElement = (element: HTMLElement): boolean => { if (isDummyQrCode(element)) { return true; } @@ -40,11 +40,16 @@ export const processGmailElement = (element: HTMLElement): boolean => { * Returns true if element should be completely removed * @param element */ -export const processOutlookElement = (element: HTMLElement): boolean => { +export const pruneOutlookElement = (element: HTMLElement): boolean => { if (isDummyQrCode(element)) { return true; } + // Remove Outlook generic <o:p> tags + if (element.nodeName.toLowerCase().startsWith("o:")) { + return true; + } + if (element.attributes.length > 0) { deleteDataAttributes(element); diff --git a/src/services/MimeVerificationService/utils.ts b/src/services/MimeVerificationService/utils.ts new file mode 100644 index 0000000000000000000000000000000000000000..f4bc6726ba4c59dfde0777875dd8495cde7ea038 --- /dev/null +++ b/src/services/MimeVerificationService/utils.ts @@ -0,0 +1,124 @@ +import { removeSpacesAndLinebreaks } from "../../utils/stringUtils"; +import { escapeHtmlString } from "../.."; +import { DOCUMENT_NODE, ELEMENT_NODE, TEXT_NODE } from "./constants"; + +export const printHtmlChildren = ( + node: Node, + printFunction: (node: Node) => string, + depth: number +): string => { + let child = node.firstChild; + if (!child) { + return ""; + } + + if (child == node.lastChild && child.nodeType == TEXT_NODE) { + return printHtmlNode(child, printFunction, depth); + } else { + let result = ""; + while (child) { + result = result.concat(printHtmlNode(child, printFunction, depth)); + child = child.nextSibling; + } + + return result; + } +}; + +export const printHtmlNode = ( + node: Node, + printFunction: (node: Node) => string, + depth: number +): string => { + let result = ""; + + if (printFunction) { + result = printFunction(node); + if (result) { + return result; + } + } + + switch (node.nodeType) { + case TEXT_NODE: + result += "<TEXT>"; + result += removeSpacesAndLinebreaks(node.textContent); + result += "</TEXT>"; + result += "\n"; + break; + case DOCUMENT_NODE: + result += printHtmlChildren(node, printFunction, depth); + break; + case ELEMENT_NODE: + result += "<" + node.nodeName; + Array.from((node as HTMLElement).attributes) + .sort((a, b) => a.name.localeCompare(b.name)) + .forEach((attribute) => { + result += ` ${attribute.name}`; + if (attribute.value) { + result += `="${escapeHtmlString(attribute.value)}"`; + } + }); + + if (node.firstChild) { + result += ">"; + result += "\n"; + result += printHtmlChildren(node, printFunction, depth + 1); + result += "</" + node.nodeName + ">"; + } else { + result += "/>"; + } + result += "\n"; + } + + return result; +}; + +export const pruneHtmlNode = ( + node: Node, + pruneElement: (element: HTMLElement) => boolean +): boolean => { + let toBeRemoved = false; + + switch (node.nodeType) { + case node.COMMENT_NODE: + case node.DOCUMENT_TYPE_NODE: + toBeRemoved = true; + break; + case node.TEXT_NODE: { + const trimmedText = node.textContent.trim(); + if (trimmedText === "") { + toBeRemoved = true; + } else { + node.textContent = trimmedText; + } + break; + } + case node.ELEMENT_NODE: + toBeRemoved = pruneElement(node as HTMLElement); + } + + if (toBeRemoved) { + return true; + } + + const childrenToRemove = []; + let child = node.firstChild; + + while (child) { + pruneHtmlNode(child, pruneElement) && childrenToRemove.push(child); + child = child.nextSibling; + } + + childrenToRemove.forEach((child) => node.removeChild(child)); + + return false; +}; + +export const printOutlookElement = (node: Node): string => { + if (node.nodeType === ELEMENT_NODE) { + if ((node as HTMLElement).classList.contains("WordSection1")) { + return printHtmlChildren(node, null, 0); + } + } +};