Skip to content
Snippets Groups Projects
Commit 65871ff7 authored by Igor Markin's avatar Igor Markin
Browse files

Merge branch 'rework-with-parse-5' into 'master'

Rework with parse 5

See merge request !5
parents 942b7c3b 0e85e918
No related branches found
No related tags found
1 merge request!5Rework with parse 5
Showing
with 149 additions and 66 deletions
......@@ -15,3 +15,8 @@ If you are going to create a new test case, please follow the structure inside t
_Important note: MIME normaliser does not cover all provided test cases.
Some of them explicitly ignored to indicate which tests has to be fixed. Please refer to `failingCases` arrays inside the test files._
Tests are using both JSDOM and custom Vereign [DOM parser](https://code.vereign.com/code/js-toolbox/gsdom).
Vereign DOM parser has been developed to support MIME normalisation in Google Add-on and is a must for testing.
Whenever you develop a new normalisation logic, ensure that Vereign DOM parser support functions you apply.
......@@ -49,6 +49,9 @@ describe("[HTML] GMail-Outlook", () => {
"25reply",
"26reply",
"27reply",
"21forward", // missing files
"23forward", // missing files
"24forward", // missing files
])
);
});
......@@ -17,12 +17,15 @@ describe("Outlook emails HTML normalization", () => {
"Emails Edge",
describeFunction("edge", [
"21", // This case has a src mismatch for the same image. Reproduce this case again
"08", // Files are missing for test case
"10", // Files are missing for test case
])
);
describe(
"Emails Safari",
describeFunction("safari", [
"04", // This case contains <section> tag which is ignored by Outlook, and it also inserts a plenty of empty divs
"04", // This case contains <section> tag which is ignored by Outlook, and it also inserts a plenty of empty divs,
"08",
])
);
......@@ -48,6 +51,7 @@ describe("Outlook emails HTML normalization", () => {
"25",
"26",
"28",
"10", // missing files
])
);
});
......@@ -22,6 +22,9 @@ describe("[Plain] Gmail-Outlook normalization", () => {
"25reply",
"26reply",
"27reply",
"21forward", // missing file
"23forward", // missing file
"24forward", // missing file
])
);
});
......@@ -9,8 +9,26 @@ const testsPath = path.resolve(__dirname, `.${TESTS_GLOBAL_PATH}`);
describe("Outlook emails Plain normalization", () => {
const describeFunction = createDescribePlainTestCases(testsPath);
describe("Emails Chrome", describeFunction("chrome"));
describe("Emails Edge", describeFunction("edge", ["21"]));
describe("Emails Safari", describeFunction("safari"));
describe(
"Emails Edge",
describeFunction("edge", [
"21",
"08", // missing files
"10", // missing files
])
);
describe(
"Emails Safari",
describeFunction("safari", [
"08", // missing files
])
);
describe("Emails MacOS", describeFunction("macos", ["21", "23", "25"]));
describe("Emails Windows", describeFunction("windows", ["25"]));
describe(
"Emails Windows",
describeFunction("windows", [
"25",
"10", // missing file
])
);
});
......@@ -7,6 +7,7 @@ const SENT_PLAIN_NAME = "s_plainContent.data";
const RECEIVED_PLAIN_NAME = "r_plainContent.data";
import { PlainNormalizer, HTMLNormalizer } from "../src";
import { expect, test } from "@jest/globals";
import { DOM } from "@vereign/dom";
export const getNormalizedPlain = (
testCasePath: string
......@@ -50,7 +51,7 @@ export const getNormalizedHtml = (
.readFileSync(`${testCasePath}/${RECEIVED_HTML_NAME}`)
.toString();
const sentDOM = new JSDOM(sentHtml);
const sentDOM = new DOM(sentHtml);
const receivedDOM = new JSDOM(receivedHtml);
const sentNormalizedHtml = HTMLNormalizer.normalizeVendorHtml(
......@@ -75,23 +76,31 @@ export const createDescribeHtmlTestCases = (
/**
* @param casesGroupName - name of the folder with cases
* @param failingCases - a list of cases that are failing and ignored. Pending to be fixed
* @param casesToCheckOnly - a filter to use if you want to check specific cases
*/
(casesGroupName: string, failingCases: Array<string> = []) => (): void => {
(
casesGroupName: string,
failingCases?: Array<string>,
casesToCheckOnly?: Array<string>
) => (): void => {
const testsCasesPath = testsPath + "/" + casesGroupName;
const testCasesDirs = getTestCasesDirs(testsCasesPath).filter(
(dir) => !failingCases.includes(dir)
);
let testCasesDirs = getTestCasesDirs(testsCasesPath);
if (casesToCheckOnly && casesToCheckOnly.length) {
testCasesDirs = testCasesDirs.filter((dir) =>
casesToCheckOnly.includes(dir)
);
}
if (failingCases && failingCases.length) {
testCasesDirs = testCasesDirs.filter(
(dir) => !failingCases.includes(dir)
);
}
test.each(testCasesDirs)("Case %s", (dirName: string) => {
const testCasePath = testsCasesPath + "/" + dirName;
let normalizedHtmls;
try {
normalizedHtmls = getNormalizedHtml(testCasePath, vendor);
} catch (e) {
console.log(`Invalid test case: ${casesGroupName}/${dirName}`);
return;
}
const normalizedHtmls = getNormalizedHtml(testCasePath, vendor);
const { sentHtml, receivedHtml } = normalizedHtmls;
// expect(receivedHtml.length).toBeGreaterThan(0);
......@@ -111,13 +120,7 @@ export const createDescribePlainTestCases = (testsPath: string) => (
test.each(testCasesDirs)("Case %s", (dirName: string) => {
const testCasePath = testsCasesPath + "/" + dirName;
let normalizedPlain;
try {
normalizedPlain = getNormalizedPlain(testCasePath);
} catch (e) {
console.log(`Invalid test case: ${casesName}/${dirName}`);
return;
}
const normalizedPlain = getNormalizedPlain(testCasePath);
const { sentPlain, receivedPlain } = normalizedPlain;
......
......@@ -102,7 +102,8 @@ const printHtmlNode = (node, printFunction, depth) => {
if (node.firstChild) {
result += ">";
result += "\n";
result += exports.printHtmlChildren(node, printFunction, depth + 1);
const printout = exports.printHtmlChildren(node, printFunction, depth + 1);
result += printout;
result += "</" + node.nodeName + ">";
}
else {
......@@ -115,7 +116,7 @@ const printHtmlNode = (node, printFunction, depth) => {
};
exports.printHtmlNode = printHtmlNode;
const cleanupHtmlNodeAttributes = (node, cleanupElementAttributes) => {
if (node.nodeType === node.ELEMENT_NODE) {
if (node.nodeType === constants_1.ELEMENT_NODE) {
cleanupElementAttributes(node);
}
let child = node.firstChild;
......@@ -128,11 +129,11 @@ exports.cleanupHtmlNodeAttributes = cleanupHtmlNodeAttributes;
const pruneHtmlNode = (node, pruneElement) => {
let toBeRemoved = false;
switch (node.nodeType) {
case node.COMMENT_NODE:
case node.DOCUMENT_TYPE_NODE:
case constants_1.COMMENT_NODE:
case constants_1.DOCUMENT_TYPE_NODE:
toBeRemoved = true;
break;
case node.TEXT_NODE: {
case constants_1.TEXT_NODE: {
const trimmedText = node.textContent.trim();
if (trimmedText === "") {
toBeRemoved = true;
......@@ -142,7 +143,7 @@ const pruneHtmlNode = (node, pruneElement) => {
}
break;
}
case node.ELEMENT_NODE:
case constants_1.ELEMENT_NODE:
toBeRemoved = pruneElement(node);
}
if (toBeRemoved) {
......@@ -169,7 +170,7 @@ const escapeHtmlString = (string) => {
let html = "";
let index = 0;
let lastIndex = 0;
for (let index = match.index; index < str.length; index++) {
for (index = match.index; index < str.length; index++) {
switch (str.charCodeAt(index)) {
case 34: // "
escape = "&quot;";
......
......@@ -34,16 +34,12 @@ const pruneElement = (element) => {
if (isDummyQrCode(element)) {
return true;
}
if (element.nodeName.toLowerCase() === "div" &&
element.childNodes.length === 0) {
return true;
}
return !!exports.ELEMENT_TYPES_TO_REMOVE[element.nodeName.toLowerCase()];
};
exports.pruneElement = pruneElement;
const cloneAnchorFromPane = (a, pane) => {
try {
const url = new URL(a.href);
const url = new URL(a.getAttribute("href"));
// If this is external url
if (url.host && url.protocol) {
pane.parentNode.insertBefore(a.cloneNode(false), pane);
......
......@@ -13,8 +13,8 @@ const amendGmailNodes = (document) => {
*/
const attachmentsPanes = Array.from(document.getElementsByClassName("gmail_chip"));
attachmentsPanes.forEach((pane) => {
const as = pane.querySelectorAll("a");
as.forEach((a) => {
const as = pane.getElementsByTagName("a");
Array.from(as).forEach((a) => {
common_1.cloneAnchorFromPane(a, pane);
});
});
......
export declare const unwindTags: (node: Element | Document, tagName: string) => void;
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.unwindTags = void 0;
const unwindTags = (node, tagName) => {
const tags = node.getElementsByTagName(tagName);
//Sort tags by depth to start unwinding the deepest ones, which does not contain nested spans
const tagsDepths = {};
Array.from(tags).forEach((span) => {
let descendant = span;
let parent = descendant.parentNode;
let depth = 0;
while (parent && descendant !== parent) {
descendant = parent;
parent = descendant.parentNode;
depth++;
}
if (!tagsDepths[depth]) {
tagsDepths[depth] = [];
}
tagsDepths[depth].push(span);
});
Object.keys(tagsDepths)
.sort((a, b) => parseInt(b) - parseInt(a))
.forEach((depth) => {
tagsDepths[depth].forEach((span) => {
let child = span.firstChild;
const parent = span.parentNode;
while (child) {
parent.insertBefore(child.cloneNode(true), span);
child = child.nextSibling;
}
span.parentNode.removeChild(span);
});
});
};
exports.unwindTags = unwindTags;
......@@ -31,7 +31,7 @@ const removeQrCodeNodes = (document) => {
let toRemove = [];
let child = node.firstChild;
while (child) {
if (child.nodeType == child.ELEMENT_NODE) {
if (child.nodeType == constants_1.ELEMENT_NODE) {
toRemove = [...toRemove, ...remove(child)];
const childElement = child;
const id = childElement.getAttribute("id");
......
export declare const ELEMENT_NODE = 1;
export declare const COMMENT_NODE = 8;
export declare const DOCUMENT_TYPE_NODE = 10;
export declare const TEXT_NODE = 3;
export declare const DOCUMENT_NODE = 9;
export declare const EMAIL_VENDORS: {
......
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.EMAIL_VENDORS = exports.DOCUMENT_NODE = exports.TEXT_NODE = exports.ELEMENT_NODE = void 0;
exports.EMAIL_VENDORS = exports.DOCUMENT_NODE = exports.TEXT_NODE = exports.DOCUMENT_TYPE_NODE = exports.COMMENT_NODE = exports.ELEMENT_NODE = void 0;
exports.ELEMENT_NODE = 1;
exports.COMMENT_NODE = 8;
exports.DOCUMENT_TYPE_NODE = 10;
exports.TEXT_NODE = 3;
exports.DOCUMENT_NODE = 9;
exports.EMAIL_VENDORS = {
......
import { DOCUMENT_NODE, ELEMENT_NODE, TEXT_NODE } from "../constants";
import {
COMMENT_NODE,
DOCUMENT_NODE,
DOCUMENT_TYPE_NODE,
ELEMENT_NODE,
TEXT_NODE,
} from "../constants";
import {
amendOutlookNodes,
cleanupOutlookElementAttributes,
printOutlookElement,
pruneOutlookElement
pruneOutlookElement,
} from "./strategies/outlook";
import {EMAIL_VENDORS} from "../constants";
import {removeSpacesAndLinebreaks} from "../utils";
import {amendGmailNodes, cleanupGMailElementAttributes, pruneGmailElement} from "./strategies/gmail";
import { EMAIL_VENDORS } from "../constants";
import { removeSpacesAndLinebreaks } from "../utils";
import {
amendGmailNodes,
cleanupGMailElementAttributes,
pruneGmailElement,
} from "./strategies/gmail";
const nodesAmendingFunctions = {
[EMAIL_VENDORS.GMAIL]: amendGmailNodes,
......@@ -28,7 +38,10 @@ const vendorPrintingFunctions = {
[EMAIL_VENDORS.OUTLOOK]: printOutlookElement,
};
export const normalizeVendorHtml = (document: HTMLDocument, vendor: string): string => {
export const normalizeVendorHtml = (
document: HTMLDocument,
vendor: string
): string => {
const mimeBody = document.body;
const amendNodesFunction = nodesAmendingFunctions[vendor];
......@@ -52,8 +65,7 @@ export const normalizeVendorHtml = (document: HTMLDocument, vendor: string): str
/**
* Cleanup unnecessary attributes of nodes
*/
const elementAttributesCleanupFunction =
attributesCleanupFunctions[vendor];
const elementAttributesCleanupFunction = attributesCleanupFunctions[vendor];
if (elementAttributesCleanupFunction) {
cleanupHtmlNodeAttributes(document, elementAttributesCleanupFunction);
......@@ -132,7 +144,8 @@ export const printHtmlNode = (
if (node.firstChild) {
result += ">";
result += "\n";
result += printHtmlChildren(node, printFunction, depth + 1);
const printout = printHtmlChildren(node, printFunction, depth + 1);
result += printout;
result += "</" + node.nodeName + ">";
} else {
result += "/>";
......@@ -148,7 +161,7 @@ export const cleanupHtmlNodeAttributes = (
node: Node,
cleanupElementAttributes: (element: HTMLElement) => void
): void => {
if (node.nodeType === node.ELEMENT_NODE) {
if (node.nodeType === ELEMENT_NODE) {
cleanupElementAttributes(node as HTMLElement);
}
......@@ -166,11 +179,11 @@ export const pruneHtmlNode = (
let toBeRemoved = false;
switch (node.nodeType) {
case node.COMMENT_NODE:
case node.DOCUMENT_TYPE_NODE:
case COMMENT_NODE:
case DOCUMENT_TYPE_NODE:
toBeRemoved = true;
break;
case node.TEXT_NODE: {
case TEXT_NODE: {
const trimmedText = node.textContent.trim();
if (trimmedText === "") {
toBeRemoved = true;
......@@ -179,7 +192,7 @@ export const pruneHtmlNode = (
}
break;
}
case node.ELEMENT_NODE:
case ELEMENT_NODE:
toBeRemoved = pruneElement(node as HTMLElement);
}
......@@ -215,7 +228,7 @@ export const escapeHtmlString = (string: string): string => {
let index = 0;
let lastIndex = 0;
for (let index = match.index; index < str.length; index++) {
for (index = match.index; index < str.length; index++) {
switch (str.charCodeAt(index)) {
case 34: // "
escape = "&quot;";
......
......@@ -36,13 +36,6 @@ export const pruneElement = (element: HTMLElement): boolean => {
return true;
}
if (
element.nodeName.toLowerCase() === "div" &&
element.childNodes.length === 0
) {
return true;
}
return !!ELEMENT_TYPES_TO_REMOVE[element.nodeName.toLowerCase()];
};
......@@ -51,7 +44,7 @@ export const cloneAnchorFromPane = (
pane: HTMLElement
): void => {
try {
const url = new URL(a.href);
const url = new URL(a.getAttribute("href"));
// If this is external url
if (url.host && url.protocol) {
pane.parentNode.insertBefore(a.cloneNode(false), pane);
......
......@@ -20,8 +20,8 @@ export const amendGmailNodes = (document: HTMLDocument): void => {
);
attachmentsPanes.forEach((pane) => {
const as = pane.querySelectorAll("a");
as.forEach((a) => {
const as = pane.getElementsByTagName("a");
Array.from(as).forEach((a) => {
cloneAnchorFromPane(a, pane as HTMLElement);
});
});
......
......@@ -36,7 +36,7 @@ const removeQrCodeNodes = (document: HTMLDocument) => {
let child = node.firstChild;
while (child) {
if (child.nodeType == child.ELEMENT_NODE) {
if (child.nodeType == ELEMENT_NODE) {
toRemove = [...toRemove, ...remove(child as Element)];
const childElement = child as Element;
......
export const ELEMENT_NODE = 1;
export const COMMENT_NODE = 8;
export const DOCUMENT_TYPE_NODE = 10;
export const TEXT_NODE = 3;
export const DOCUMENT_NODE = 9;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment