Skip to content
Snippets Groups Projects
Commit 64a5bed6 authored by Igor Markin's avatar Igor Markin
Browse files

Wrap up pseudoplain normalisation

parent a6d5e63a
Branches
Tags
1 merge request!6Implement pseudo plain parsing
import { diffStringsUnified } from "jest-diff"; import { describe } from "@jest/globals";
import { describe, test } from "@jest/globals"; import { createDescribePseudoPlainTestCases } from "./utils";
import { getDOMDocuments, getTestCasesDirs } from "./utils";
import { PlainNormalizer } from "../src";
import { amendOutlookNodes } from "../src/HTMLNormalizer/strategies/outlook";
const path = require("path"); const path = require("path");
const TESTS_GLOBAL_PATH = "/files/outlook-outlook"; const TESTS_GLOBAL_PATH = "/files/outlook-outlook";
const testsPath = path.resolve(__dirname, `.${TESTS_GLOBAL_PATH}`); const testsPath = path.resolve(__dirname, `.${TESTS_GLOBAL_PATH}`);
const createDescribePseudoPlainTestCases = (testsPath: string) =>
/**
* @param casesGroupName - name of the folder with cases
* @param failingCases - a list of cases that are failing and ignored. Pending to be fixed
* @param casesToCheckOnly - a filter to use if you want to check specific cases
*/
(
casesGroupName: string,
failingCases?: Array<string>,
casesToCheckOnly?: Array<string>
) => (): void => {
const testsCasesPath = testsPath + "/" + casesGroupName;
let testCasesDirs = getTestCasesDirs(testsCasesPath);
if (casesToCheckOnly && casesToCheckOnly.length) {
testCasesDirs = testCasesDirs.filter((dir) =>
casesToCheckOnly.includes(dir)
);
}
if (failingCases && failingCases.length) {
testCasesDirs = testCasesDirs.filter(
(dir) => !failingCases.includes(dir)
);
}
test.each(testCasesDirs)("Case %s", (dirName: string) => {
const testCasePath = testsCasesPath + "/" + dirName;
const { sentHtmlDocument, receivedHtmlDocument } = getDOMDocuments(
testCasePath
);
amendOutlookNodes(receivedHtmlDocument);
amendOutlookNodes(sentHtmlDocument);
//console.log(sentHtmlDocument.body.textContent);
var normalizedReceivedPseudoPlainText = PlainNormalizer.normalizePlain(receivedHtmlDocument.body.textContent);
var normalizedSentPseudoPlainText = PlainNormalizer.normalizePlain(sentHtmlDocument.body.textContent);
const difference = diffStringsUnified(
normalizedReceivedPseudoPlainText,
normalizedSentPseudoPlainText
);
console.log(difference);
});
};
describe("[Pseudo PLAIN] Outlook-Outlook normalization", () => { describe("[Pseudo PLAIN] Outlook-Outlook normalization", () => {
const describeFunction = createDescribePseudoPlainTestCases(testsPath); const describeFunction = createDescribePseudoPlainTestCases(testsPath);
// ["01"] - is a filter. Pass here names of directories with test cases you want to check // ["01"] - is a filter. Pass here names of directories with test cases you want to check
//describe("Emails Chrome", describeFunction("chrome", null, [])); describe("Emails Chrome", describeFunction("chrome", null, []));
//describe("Emails Edge", describeFunction("edge", [])); describe(
"Emails Edge",
//describe("Emails Safari", describeFunction("safari", ["08"])); describeFunction("edge", [
"08", // Files are missing for test case
"10", // Files are missing for test case
])
);
//Does not work at all describe("Emails Safari", describeFunction("safari", ["08"]));
describe("Emails MacOS", describeFunction("macos", []));
//describe("Emails Windows", describeFunction("windows", [])); //Does not work at all
describe(
"Emails MacOS",
describeFunction("macos", [
"23", // has special character
])
);
describe(
"Emails Windows",
describeFunction("windows", [
"10", // missing files
])
);
}); });
...@@ -5,7 +5,7 @@ const SENT_HTML_NAME = "s_htmlContent.html"; ...@@ -5,7 +5,7 @@ const SENT_HTML_NAME = "s_htmlContent.html";
const RECEIVED_HTML_NAME = "r_htmlContent.html"; const RECEIVED_HTML_NAME = "r_htmlContent.html";
const SENT_PLAIN_NAME = "s_plainContent.data"; const SENT_PLAIN_NAME = "s_plainContent.data";
const RECEIVED_PLAIN_NAME = "r_plainContent.data"; const RECEIVED_PLAIN_NAME = "r_plainContent.data";
import { PlainNormalizer, HTMLNormalizer } from "../src"; import { PlainNormalizer, HTMLNormalizer, EMAIL_VENDORS } from "../src";
import { expect, test } from "@jest/globals"; import { expect, test } from "@jest/globals";
import { DOM } from "@vereign/dom"; import { DOM } from "@vereign/dom";
...@@ -130,6 +130,72 @@ export const createDescribePlainTestCases = (testsPath: string) => ( ...@@ -130,6 +130,72 @@ export const createDescribePlainTestCases = (testsPath: string) => (
}); });
}; };
export const createDescribePseudoPlainTestCases = (testsPath: string) =>
/**
* @param casesGroupName - name of the folder with cases
* @param failingCases - a list of cases that are failing and ignored. Pending to be fixed
* @param casesToCheckOnly - a filter to use if you want to check specific cases
*/
(
casesGroupName: string,
failingCases?: Array<string>,
casesToCheckOnly?: Array<string>
) => (): void => {
const testsCasesPath = testsPath + "/" + casesGroupName;
let testCasesDirs = getTestCasesDirs(testsCasesPath);
if (casesToCheckOnly && casesToCheckOnly.length) {
testCasesDirs = testCasesDirs.filter((dir) =>
casesToCheckOnly.includes(dir)
);
}
if (failingCases && failingCases.length) {
testCasesDirs = testCasesDirs.filter(
(dir) => !failingCases.includes(dir)
);
}
test.each(testCasesDirs)("Case %s", (dirName: string) => {
const testCasePath = testsCasesPath + "/" + dirName;
const { sentHtmlDocument, receivedHtmlDocument } = getDOMDocuments(
testCasePath
);
// amendOutlookNodes(receivedHtmlDocument);
// amendOutlookNodes(sentHtmlDocument);
HTMLNormalizer.normalizeVendorHtml(
receivedHtmlDocument,
EMAIL_VENDORS.OUTLOOK
);
HTMLNormalizer.normalizeVendorHtml(
sentHtmlDocument,
EMAIL_VENDORS.OUTLOOK
);
const normalizedReceivedPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart(
receivedHtmlDocument,
EMAIL_VENDORS.OUTLOOK
);
const normalizedSentPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart(
sentHtmlDocument,
EMAIL_VENDORS.OUTLOOK
);
// const difference = diffStringsUnified(
// normalizedReceivedPseudoPlainText,
// normalizedSentPseudoPlainText
// );
expect(normalizedReceivedPseudoPlainText).toContain(
normalizedSentPseudoPlainText
);
// console.log(difference);
// expect()
});
};
export const getDOMDocuments = ( export const getDOMDocuments = (
testCasePath: string testCasePath: string
): { ): {
......
...@@ -18,6 +18,7 @@ import { ...@@ -18,6 +18,7 @@ import {
cleanupGMailElementAttributes, cleanupGMailElementAttributes,
pruneGmailElement, pruneGmailElement,
} from "./strategies/gmail"; } from "./strategies/gmail";
import { PlainNormalizer } from "../index";
const nodesAmendingFunctions = { const nodesAmendingFunctions = {
[EMAIL_VENDORS.GMAIL]: amendGmailNodes, [EMAIL_VENDORS.GMAIL]: amendGmailNodes,
...@@ -79,6 +80,13 @@ export const normalizeVendorHtml = ( ...@@ -79,6 +80,13 @@ export const normalizeVendorHtml = (
return printHtmlChildren(mimeBody, vendorPrintFunction, 0); return printHtmlChildren(mimeBody, vendorPrintFunction, 0);
}; };
export const extractPseudoPlainPart = (
document: HTMLDocument
/*vendor: string*/
): string => {
return PlainNormalizer.normalizePlain(document.body.textContent);
};
export const printHtmlChildren = ( export const printHtmlChildren = (
node: Node, node: Node,
printFunction: (node: Node) => string, printFunction: (node: Node) => string,
......
import { normalizeVendorHtml } from "./HTMLNormalizer"; import { normalizeVendorHtml, extractPseudoPlainPart } from "./HTMLNormalizer";
export default { export default {
normalizeVendorHtml, normalizeVendorHtml,
extractPseudoPlainPart,
}; };
...@@ -13,9 +13,5 @@ const removeQRCodes = (s: string): string => { ...@@ -13,9 +13,5 @@ const removeQRCodes = (s: string): string => {
}; };
const removeListBullets = (s: string): string => { const removeListBullets = (s: string): string => {
return s return s.replace("\n[o§]\n+/g", "");
.replace("\no\n/g", "")
.replace("\n§\n/g", "")
.replace("\no /g", "")
.replace("\n§ /g", "");
}; };
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment