Skip to content
Snippets Groups Projects
Commit 0b2da4fb authored by Igor Markin's avatar Igor Markin
Browse files

Merge branch 'master' of code.vereign.com:code/js-toolbox/mime-normalizer

parents af50a145 e0534077
Branches
Tags
No related merge requests found
Showing with 221 additions and 101 deletions
......@@ -5,6 +5,13 @@ Library implements normalisation of HTML and plain parts of a MIME message.
Normalisation strategies documentation:
https://code.vereign.com/light/documentation/-/blob/master/Validation.md#normalization-strategies
## Prerequisites
- [Node.JS](https://nodejs.org/en/) ^10.12.0 || >=12.0.0+
- [yarn](https://yarnpkg.com/getting-started/install) package manager
## Initialize project
`yarn install`
## Testing
HTML and text files with test cases provided in `__tests__/files`
......
import { EMAIL_VENDORS } from "../src";
const path = require("path");
import { describe } from "@jest/globals";
import { createDescribePseudoPlainTestCases } from "./utils";
const TESTS_GLOBAL_PATH = "/files/gmail-gmail";
const testsPath = path.resolve(__dirname, `.${TESTS_GLOBAL_PATH}`);
describe("[Pseudo PLAIN] GMail-GMail", () => {
const describeFunction = createDescribePseudoPlainTestCases(
testsPath,
EMAIL_VENDORS.GMAIL
);
describe("One", describeFunction("one"));
});
import { describe } from "@jest/globals";
import { createDescribePseudoPlainTestCases } from "./utils";
import { EMAIL_VENDORS } from "../src";
const path = require("path");
const TESTS_GLOBAL_PATH = "/files/gmail-outlook";
const testsPath = path.resolve(__dirname, `.${TESTS_GLOBAL_PATH}`);
describe("[Pseudo PLAIN] Gmail-Outlook normalization", () => {
const describeFunction = createDescribePseudoPlainTestCases(
testsPath,
EMAIL_VENDORS.GMAIL
);
describe(
"One",
describeFunction("one", [
"21forward", // missing file
"23forward", // missing file
"24forward", // missing file
])
);
});
import { diffStringsUnified } from "jest-diff";
import { describe, test } from "@jest/globals";
import { getDOMDocuments, getTestCasesDirs } from "./utils";
import { describe } from "@jest/globals";
import { createDescribePseudoPlainTestCases } from "./utils";
import { EMAIL_VENDORS } from "../src";
const path = require("path");
const TESTS_GLOBAL_PATH = "/files/outlook-outlook";
const testsPath = path.resolve(__dirname, `.${TESTS_GLOBAL_PATH}`);
const createDescribePseudoPlainTestCases = (testsPath: string) =>
/**
* @param casesGroupName - name of the folder with cases
* @param failingCases - a list of cases that are failing and ignored. Pending to be fixed
* @param casesToCheckOnly - a filter to use if you want to check specific cases
*/
(
casesGroupName: string,
failingCases?: Array<string>,
casesToCheckOnly?: Array<string>
) => (): void => {
const testsCasesPath = testsPath + "/" + casesGroupName;
let testCasesDirs = getTestCasesDirs(testsCasesPath);
if (casesToCheckOnly && casesToCheckOnly.length) {
testCasesDirs = testCasesDirs.filter((dir) =>
casesToCheckOnly.includes(dir)
);
}
if (failingCases && failingCases.length) {
testCasesDirs = testCasesDirs.filter(
(dir) => !failingCases.includes(dir)
);
}
test.each(testCasesDirs)("Case %s", (dirName: string) => {
const testCasePath = testsCasesPath + "/" + dirName;
const { sentHtmlDocument, receivedHtmlDocument } = getDOMDocuments(
testCasePath
);
const difference = diffStringsUnified(
receivedHtmlDocument.body.textContent,
sentHtmlDocument.body.textContent
);
console.log(difference);
});
};
describe("[Pseudo PLAIN] Outlook-Outlook normalization", () => {
const describeFunction = createDescribePseudoPlainTestCases(testsPath);
const describeFunction = createDescribePseudoPlainTestCases(
testsPath,
EMAIL_VENDORS.OUTLOOK
);
// ["01"] - is a filter. Pass here names of directories with test cases you want to check
describe("Emails Chrome", describeFunction("chrome", null, ["01"]));
// describe(
// "Emails Edge",
// describeFunction("edge", [
// "21", // This case has a src mismatch for the same image. Reproduce this case again
// "08", // Files are missing for test case
// "10", // Files are missing for test case
// ])
// );
// describe(
// "Emails Safari",
// describeFunction("safari", [
// "04", // This case contains <section> tag which is ignored by Outlook, and it also inserts a plenty of empty divs,
// "08",
// ])
// );
// Does not work at all
// describe(
// "Emails MacOS",
// describeFunction("macos", ["20", "21", "", "23", "24", "25", "26"])
// );
// describe(
// "Emails Windows",
// describeFunction("windows", [
// "06",
// "20",
// "20forward",
// "20reply",
// "21",
// "21forward",
// "21reply",
// "22",
// "23",
// "24",
// "25",
// "26",
// "28",
// "10", // missing files
// ])
// );
describe("Emails Chrome", describeFunction("chrome", null, []));
describe(
"Emails Edge",
describeFunction("edge", [
"08", // Files are missing for test case
"10", // Files are missing for test case
])
);
describe("Emails Safari", describeFunction("safari", ["08"]));
//Does not work at all
describe(
"Emails MacOS",
describeFunction("macos", [
"23", // has special character
])
);
describe(
"Emails Windows",
describeFunction("windows", [
"10", // missing files
])
);
});
......@@ -8,6 +8,7 @@ const RECEIVED_PLAIN_NAME = "r_plainContent.data";
import { PlainNormalizer, HTMLNormalizer } from "../src";
import { expect, test } from "@jest/globals";
import { DOM } from "@vereign/dom";
//import { diffStringsUnified } from "jest-diff";
export const getNormalizedPlain = (
testCasePath: string
......@@ -111,12 +112,21 @@ export const createDescribeHtmlTestCases = (
export const createDescribePlainTestCases = (testsPath: string) => (
casesName: string,
failingCases: Array<string> = []
failingCases: Array<string> = [],
casesToCheckOnly?: Array<string>
) => (): void => {
const testsCasesPath = testsPath + "/" + casesName;
const testCasesDirs = getTestCasesDirs(testsCasesPath).filter(
(dir) => !failingCases.includes(dir)
);
let testCasesDirs = getTestCasesDirs(testsCasesPath);
if (casesToCheckOnly && casesToCheckOnly.length) {
testCasesDirs = testCasesDirs.filter((dir) =>
casesToCheckOnly.includes(dir)
);
}
if (failingCases && failingCases.length) {
testCasesDirs = testCasesDirs.filter((dir) => !failingCases.includes(dir));
}
test.each(testCasesDirs)("Case %s", (dirName: string) => {
const testCasePath = testsCasesPath + "/" + dirName;
......@@ -127,9 +137,69 @@ export const createDescribePlainTestCases = (testsPath: string) => (
// expect(sentPlain.length).toBeGreaterThan(0);
// expect(receivedPlain.length).toBeGreaterThan(0);
expect(receivedPlain).toContain(sentPlain);
// const diff = diffStringsUnified(sentPlain, receivedPlain);
// console.log(diff);
});
};
export const createDescribePseudoPlainTestCases = (
testsPath: string,
vendor: string
) =>
/**
* @param casesGroupName - name of the folder with cases
* @param failingCases - a list of cases that are failing and ignored. Pending to be fixed
* @param casesToCheckOnly - a filter to use if you want to check specific cases
*/
(
casesGroupName: string,
failingCases?: Array<string>,
casesToCheckOnly?: Array<string>
) => (): void => {
const testsCasesPath = testsPath + "/" + casesGroupName;
let testCasesDirs = getTestCasesDirs(testsCasesPath);
if (casesToCheckOnly && casesToCheckOnly.length) {
testCasesDirs = testCasesDirs.filter((dir) =>
casesToCheckOnly.includes(dir)
);
}
if (failingCases && failingCases.length) {
testCasesDirs = testCasesDirs.filter(
(dir) => !failingCases.includes(dir)
);
}
test.each(testCasesDirs)("Case %s", (dirName: string) => {
const testCasePath = testsCasesPath + "/" + dirName;
const { sentHtmlDocument, receivedHtmlDocument } = getDOMDocuments(
testCasePath
);
HTMLNormalizer.normalizeVendorHtml(receivedHtmlDocument, vendor);
HTMLNormalizer.normalizeVendorHtml(sentHtmlDocument, vendor);
const normalizedReceivedPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart(
receivedHtmlDocument
);
const normalizedSentPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart(
sentHtmlDocument
);
expect(normalizedReceivedPseudoPlainText).toEqual(
normalizedSentPseudoPlainText
);
// const diff = diffStringsUnified(
// normalizedReceivedPseudoPlainText,
// normalizedSentPseudoPlainText
// );
// console.log(diff);
});
};
export const getDOMDocuments = (
testCasePath: string
): {
......@@ -143,7 +213,7 @@ export const getDOMDocuments = (
.readFileSync(`${testCasePath}/${RECEIVED_HTML_NAME}`)
.toString();
const sentDOM = new JSDOM(sentHtml);
const sentDOM = new DOM(sentHtml);
const receivedDOM = new JSDOM(receivedHtml);
return {
......
export declare const normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string;
export declare const extractPseudoPlainPart: (document: HTMLDocument) => string;
export declare const printHtmlChildren: (node: Node, printFunction: (node: Node) => string, depth: number) => string;
export declare const printHtmlNode: (node: Node, printFunction: (node: Node) => string, depth: number) => string;
export declare const cleanupHtmlNodeAttributes: (node: Node, cleanupElementAttributes: (element: HTMLElement) => void) => void;
......
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.escapeHtmlString = exports.pruneHtmlNode = exports.cleanupHtmlNodeAttributes = exports.printHtmlNode = exports.printHtmlChildren = exports.normalizeVendorHtml = void 0;
exports.escapeHtmlString = exports.pruneHtmlNode = exports.cleanupHtmlNodeAttributes = exports.printHtmlNode = exports.printHtmlChildren = exports.extractPseudoPlainPart = exports.normalizeVendorHtml = void 0;
const constants_1 = require("../constants");
const outlook_1 = require("./strategies/outlook");
const constants_2 = require("../constants");
const utils_1 = require("../utils");
const gmail_1 = require("./strategies/gmail");
const index_1 = require("../index");
const nodesAmendingFunctions = {
[constants_2.EMAIL_VENDORS.GMAIL]: gmail_1.amendGmailNodes,
[constants_2.EMAIL_VENDORS.OUTLOOK]: outlook_1.amendOutlookNodes,
......@@ -49,6 +50,27 @@ const normalizeVendorHtml = (document, vendor) => {
return exports.printHtmlChildren(mimeBody, vendorPrintFunction, 0);
};
exports.normalizeVendorHtml = normalizeVendorHtml;
const extractPseudoPlainPart = (document
/*vendor: string*/
) => {
const textContent = index_1.PlainNormalizer.normalizePlain(document.body.textContent);
// const anchors = document.getElementsByTagName("a");
// const images = document.getElementsByTagName("img");
// let meaningfulAttributes = [];
//
// Array.from(anchors).forEach((a) => {
// meaningfulAttributes.push(a.getAttribute("href"));
// });
// Array.from(images).forEach((img) => {
// meaningfulAttributes.push(img.getAttribute("src"));
// meaningfulAttributes.push(img.getAttribute("alt"));
// });
//
// meaningfulAttributes = meaningfulAttributes.filter((attr) => !!attr).sort();
// console.log(meaningfulAttributes);
return textContent;
};
exports.extractPseudoPlainPart = extractPseudoPlainPart;
const printHtmlChildren = (node, printFunction, depth) => {
let child = node.firstChild;
if (!child) {
......
declare const _default: {
normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string;
extractPseudoPlainPart: (document: HTMLDocument) => string;
};
export default _default;
......@@ -3,4 +3,5 @@ Object.defineProperty(exports, "__esModule", { value: true });
const HTMLNormalizer_1 = require("./HTMLNormalizer");
exports.default = {
normalizeVendorHtml: HTMLNormalizer_1.normalizeVendorHtml,
extractPseudoPlainPart: HTMLNormalizer_1.extractPseudoPlainPart,
};
......@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
exports.normalizePlainPart = void 0;
const utils_1 = require("../utils");
const normalizePlainPart = (text) => {
text = removeListBullets(text);
text = utils_1.removeSpacesAndLinebreaks(text);
return removeQRCodes(text);
};
......@@ -12,3 +13,6 @@ const removeQRCodes = (s) => {
.replace(/\[qrcode.png]\s*<https:\/\/[\w./?=\-&]+>/g, "")
.replace(/<https:\/\/[\w./?=\-&]+>\s*\[qrcode.png]/g, "");
};
const removeListBullets = (s) => {
return s.replace("\n[o§]\n+/g", "");
};
......@@ -2,8 +2,7 @@
Object.defineProperty(exports, "__esModule", { value: true });
exports.removeSpacesAndLinebreaks = void 0;
const removeSpacesAndLinebreaks = (s) => {
const regexNewlines = new RegExp(/[\r\n\v]+/g);
const regexSpaces = new RegExp(/\s+|\u200B/g);
return s.replace(regexNewlines, "").replace(regexSpaces, "");
const removeSymbols = new RegExp(/[\r\n\v\s\u200B]+/g);
return s.replace(removeSymbols, "").trim();
};
exports.removeSpacesAndLinebreaks = removeSpacesAndLinebreaks;
......@@ -18,6 +18,7 @@ import {
cleanupGMailElementAttributes,
pruneGmailElement,
} from "./strategies/gmail";
import { PlainNormalizer } from "../index";
const nodesAmendingFunctions = {
[EMAIL_VENDORS.GMAIL]: amendGmailNodes,
......@@ -79,6 +80,29 @@ export const normalizeVendorHtml = (
return printHtmlChildren(mimeBody, vendorPrintFunction, 0);
};
export const extractPseudoPlainPart = (
document: HTMLDocument
/*vendor: string*/
): string => {
const textContent = PlainNormalizer.normalizePlain(document.body.textContent);
// const anchors = document.getElementsByTagName("a");
// const images = document.getElementsByTagName("img");
// let meaningfulAttributes = [];
//
// Array.from(anchors).forEach((a) => {
// meaningfulAttributes.push(a.getAttribute("href"));
// });
// Array.from(images).forEach((img) => {
// meaningfulAttributes.push(img.getAttribute("src"));
// meaningfulAttributes.push(img.getAttribute("alt"));
// });
//
// meaningfulAttributes = meaningfulAttributes.filter((attr) => !!attr).sort();
// console.log(meaningfulAttributes);
return textContent;
};
export const printHtmlChildren = (
node: Node,
printFunction: (node: Node) => string,
......
import { normalizeVendorHtml } from "./HTMLNormalizer";
import { normalizeVendorHtml, extractPseudoPlainPart } from "./HTMLNormalizer";
export default {
normalizeVendorHtml,
extractPseudoPlainPart,
};
import { removeSpacesAndLinebreaks } from "../utils";
export const normalizePlainPart = (text: string): string => {
text = removeListBullets(text);
text = removeSpacesAndLinebreaks(text);
return removeQRCodes(text);
};
......@@ -10,3 +11,7 @@ const removeQRCodes = (s: string): string => {
.replace(/\[qrcode.png]\s*<https:\/\/[\w./?=\-&]+>/g, "")
.replace(/<https:\/\/[\w./?=\-&]+>\s*\[qrcode.png]/g, "");
};
const removeListBullets = (s: string): string => {
return s.replace("\n[o§]\n+/g", "");
};
export const removeSpacesAndLinebreaks = (s: string): string => {
const regexNewlines = new RegExp(/[\r\n\v]+/g);
const regexSpaces = new RegExp(/\s+|\u200B/g);
const removeSymbols = new RegExp(/[\r\n\v\s\u200B]+/g);
return s.replace(regexNewlines, "").replace(regexSpaces, "");
return s.replace(removeSymbols, "").trim();
};
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment