Skip to content
Snippets Groups Projects
Commit 9ac662eb authored by Igor Markin's avatar Igor Markin
Browse files

Fix html text bugs

parent 95160b7c
Branches
Tags
1 merge request!17Populate attachments before normalisation
......@@ -8,7 +8,7 @@
| 06 | Multiple MIME attachments and text | ok | ok | ok |
| 07 | Multiple Drive attachments and text | ok | ok | ok |
| 08 | Complex email with formatted text, embedded images, MIME and GDrive attachments | fail | ok | fail |
| 09 | 2 replies with test case 01 | fail | ok | ok |
| 09 | 2 replies with test case 01 | fail | ok | fail |
| 10 | 2 replies with test case 02 | - | - | - |
| 11 | 2 replies with test case 03 | - | - | - |
| 12 | 2 replies with test case 04 | - | - | - |
......
......@@ -14,6 +14,6 @@ describe("[Pseudo PLAIN] Outlook-Outlook normalization", () => {
);
describe("Chrome-Chrome", describeFunction("chrome-chrome"));
describe("MacOS-MacOS", describeFunction("macos-macos", null));
describe("MacOS-MacOS", describeFunction("macos-macos", ["09", "s"]));
describe("Windows-Windows", describeFunction("windows-windows"));
});
......@@ -10,7 +10,7 @@ import { DOM } from "@vereign/dom";
import { diffStringsUnified } from "jest-diff";
expect.extend({
toContainWithDiff(target, source) {
toEqualWithDiff(target, source) {
let pass = true;
try {
expect(target).toEqual(source);
......@@ -134,7 +134,7 @@ export const createDescribeHtmlTestCases = (
// eslint-disable-next-line
// @ts-ignore
// console.log(receivedHtml);
expect(receivedHtml).toContainWithDiff(sentHtml);
expect(receivedHtml).toEqualWithDiff(sentHtml);
});
};
......@@ -165,7 +165,7 @@ export const createDescribePlainTestCases = (testsPath: string) => (
// expect(receivedPlain.length).toBeGreaterThan(0);
// eslint-disable-next-line
// @ts-ignore
expect(receivedPlain).toContainWithDiff(sentPlain);
expect(receivedPlain).toEqualWithDiff(sentPlain);
});
};
......@@ -204,18 +204,20 @@ export const createDescribePseudoPlainTestCases = (
testCasePath
);
HTMLNormalizer.normalizeVendorHtml(receivedHtmlDocument, vendor);
HTMLNormalizer.normalizeVendorHtml(sentHtmlDocument, vendor);
const normalizedReceivedPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart(
receivedHtmlDocument
);
HTMLNormalizer.normalizeVendorHtml(receivedHtmlDocument, vendor);
const normalizedSentPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart(
sentHtmlDocument
);
expect(normalizedReceivedPseudoPlainText).toEqual(
const normalizedReceivedPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart(
receivedHtmlDocument
);
// eslint-disable-next-line
// @ts-ignore
expect(normalizedReceivedPseudoPlainText).toEqualWithDiff(
normalizedSentPseudoPlainText
);
});
......@@ -230,7 +232,7 @@ export const getDOMDocuments = (
const sentMime = getMime(`${testCasePath}/${SENT_EML_NAME}`);
const receivedMime = getMime(`${testCasePath}/${RECEIVED_EML_NAME}`);
const sentDOM = new DOM(sentMime.getHTML());
const sentDOM = new JSDOM(sentMime.getHTML());
const receivedDOM = new JSDOM(receivedMime.getHTML());
return {
......
......@@ -12,7 +12,7 @@ import {
pruneOutlookElement,
} from "./strategies/outlook";
import { EMAIL_VENDORS } from "../constants";
import { removeSpacesAndLinebreaks } from "../utils";
import { normalizeTextSpacings, removeSpacesAndLinebreaks } from "../utils";
import {
amendGmailNodes,
cleanupGMailElementAttributes,
......@@ -87,7 +87,9 @@ export const extractPseudoPlainPart = (
document: HTMLDocument
/*vendor: string*/
): string => {
const textContent = PlainNormalizer.normalizePlain(document.body.textContent);
const normalizedTextContent = PlainNormalizer.normalizePlain(
document.body.textContent
);
// const anchors = document.getElementsByTagName("a");
// const images = document.getElementsByTagName("img");
......@@ -103,7 +105,7 @@ export const extractPseudoPlainPart = (
//
// meaningfulAttributes = meaningfulAttributes.filter((attr) => !!attr).sort();
// console.log(meaningfulAttributes);
return textContent;
return normalizedTextContent;
};
export const printHtmlChildren = (
......@@ -169,11 +171,15 @@ export const printHtmlNode = (
});
if (node.firstChild) {
result += ">";
result += "\n";
const printout = printHtmlChildren(node, printFunction, depth + 1);
result += printout;
result += "</" + node.nodeName + ">";
if (printout.trim().length === 0) {
result += "/>";
} else {
result += ">";
result += "\n";
result += printout;
result += "</" + node.nodeName + ">";
}
} else {
result += "/>";
}
......@@ -211,7 +217,7 @@ export const pruneHtmlNode = (
toBeRemoved = true;
break;
case TEXT_NODE: {
const trimmedText = node.textContent.trim();
const trimmedText = normalizeTextSpacings(node.textContent);
if (trimmedText === "") {
toBeRemoved = true;
} else {
......
......@@ -78,7 +78,7 @@ export const amendOutlookNodes = (document: HTMLDocument): void => {
* Remove Word o:p paragraphs
*/
const ops = document.getElementsByTagName("o:p");
Array.from(ops).forEach((op) => op.parentNode.removeChild(op));
unwindTags(Array.from(ops));
/**
* Remove empty paragraphs
......@@ -91,7 +91,7 @@ export const amendOutlookNodes = (document: HTMLDocument): void => {
if (p.childNodes.length === 1 && p.childNodes[0].nodeType === TEXT_NODE) {
const text = p.childNodes[0].textContent;
if (!text.replace(/&nbsp;/g, "").trim()) {
if (!text.replace(/\u00A0/g, "").trim()) {
p.parentNode.removeChild(p);
}
}
......@@ -173,6 +173,7 @@ export const amendOutlookNodes = (document: HTMLDocument): void => {
spansDepths[depth].forEach((span) => {
let child = span.firstChild;
const parent = span.parentNode;
while (child) {
parent.insertBefore(child.cloneNode(true), span);
......
......@@ -5,5 +5,5 @@ export const removeSpacesAndLinebreaks = (s: string): string => {
};
export const normalizeTextSpacings = (s: string): string => {
return s.replace(/[\r\n\v\s]+/g, " ");
return s.replace(/[\r\n\v\s\u00A0]+/g, " ");
};
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment