Skip to content
Snippets Groups Projects
Commit 9ac662eb authored by Igor Markin's avatar Igor Markin
Browse files

Fix html text bugs

parent 95160b7c
No related branches found
No related tags found
1 merge request!17Populate attachments before normalisation
......@@ -8,7 +8,7 @@
| 06 | Multiple MIME attachments and text | ok | ok | ok |
| 07 | Multiple Drive attachments and text | ok | ok | ok |
| 08 | Complex email with formatted text, embedded images, MIME and GDrive attachments | fail | ok | fail |
| 09 | 2 replies with test case 01 | fail | ok | ok |
| 09 | 2 replies with test case 01 | fail | ok | fail |
| 10 | 2 replies with test case 02 | - | - | - |
| 11 | 2 replies with test case 03 | - | - | - |
| 12 | 2 replies with test case 04 | - | - | - |
......
......@@ -14,6 +14,6 @@ describe("[Pseudo PLAIN] Outlook-Outlook normalization", () => {
);
describe("Chrome-Chrome", describeFunction("chrome-chrome"));
describe("MacOS-MacOS", describeFunction("macos-macos", null));
describe("MacOS-MacOS", describeFunction("macos-macos", ["09", "s"]));
describe("Windows-Windows", describeFunction("windows-windows"));
});
......@@ -10,7 +10,7 @@ import { DOM } from "@vereign/dom";
import { diffStringsUnified } from "jest-diff";
expect.extend({
toContainWithDiff(target, source) {
toEqualWithDiff(target, source) {
let pass = true;
try {
expect(target).toEqual(source);
......@@ -134,7 +134,7 @@ export const createDescribeHtmlTestCases = (
// eslint-disable-next-line
// @ts-ignore
// console.log(receivedHtml);
expect(receivedHtml).toContainWithDiff(sentHtml);
expect(receivedHtml).toEqualWithDiff(sentHtml);
});
};
......@@ -165,7 +165,7 @@ export const createDescribePlainTestCases = (testsPath: string) => (
// expect(receivedPlain.length).toBeGreaterThan(0);
// eslint-disable-next-line
// @ts-ignore
expect(receivedPlain).toContainWithDiff(sentPlain);
expect(receivedPlain).toEqualWithDiff(sentPlain);
});
};
......@@ -204,18 +204,20 @@ export const createDescribePseudoPlainTestCases = (
testCasePath
);
HTMLNormalizer.normalizeVendorHtml(receivedHtmlDocument, vendor);
HTMLNormalizer.normalizeVendorHtml(sentHtmlDocument, vendor);
const normalizedReceivedPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart(
receivedHtmlDocument
);
HTMLNormalizer.normalizeVendorHtml(receivedHtmlDocument, vendor);
const normalizedSentPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart(
sentHtmlDocument
);
expect(normalizedReceivedPseudoPlainText).toEqual(
const normalizedReceivedPseudoPlainText = HTMLNormalizer.extractPseudoPlainPart(
receivedHtmlDocument
);
// eslint-disable-next-line
// @ts-ignore
expect(normalizedReceivedPseudoPlainText).toEqualWithDiff(
normalizedSentPseudoPlainText
);
});
......@@ -230,7 +232,7 @@ export const getDOMDocuments = (
const sentMime = getMime(`${testCasePath}/${SENT_EML_NAME}`);
const receivedMime = getMime(`${testCasePath}/${RECEIVED_EML_NAME}`);
const sentDOM = new DOM(sentMime.getHTML());
const sentDOM = new JSDOM(sentMime.getHTML());
const receivedDOM = new JSDOM(receivedMime.getHTML());
return {
......
......@@ -12,7 +12,7 @@ import {
pruneOutlookElement,
} from "./strategies/outlook";
import { EMAIL_VENDORS } from "../constants";
import { removeSpacesAndLinebreaks } from "../utils";
import { normalizeTextSpacings, removeSpacesAndLinebreaks } from "../utils";
import {
amendGmailNodes,
cleanupGMailElementAttributes,
......@@ -87,7 +87,9 @@ export const extractPseudoPlainPart = (
document: HTMLDocument
/*vendor: string*/
): string => {
const textContent = PlainNormalizer.normalizePlain(document.body.textContent);
const normalizedTextContent = PlainNormalizer.normalizePlain(
document.body.textContent
);
// const anchors = document.getElementsByTagName("a");
// const images = document.getElementsByTagName("img");
......@@ -103,7 +105,7 @@ export const extractPseudoPlainPart = (
//
// meaningfulAttributes = meaningfulAttributes.filter((attr) => !!attr).sort();
// console.log(meaningfulAttributes);
return textContent;
return normalizedTextContent;
};
export const printHtmlChildren = (
......@@ -169,11 +171,15 @@ export const printHtmlNode = (
});
if (node.firstChild) {
result += ">";
result += "\n";
const printout = printHtmlChildren(node, printFunction, depth + 1);
result += printout;
result += "</" + node.nodeName + ">";
if (printout.trim().length === 0) {
result += "/>";
} else {
result += ">";
result += "\n";
result += printout;
result += "</" + node.nodeName + ">";
}
} else {
result += "/>";
}
......@@ -211,7 +217,7 @@ export const pruneHtmlNode = (
toBeRemoved = true;
break;
case TEXT_NODE: {
const trimmedText = node.textContent.trim();
const trimmedText = normalizeTextSpacings(node.textContent);
if (trimmedText === "") {
toBeRemoved = true;
} else {
......
......@@ -78,7 +78,7 @@ export const amendOutlookNodes = (document: HTMLDocument): void => {
* Remove Word o:p paragraphs
*/
const ops = document.getElementsByTagName("o:p");
Array.from(ops).forEach((op) => op.parentNode.removeChild(op));
unwindTags(Array.from(ops));
/**
* Remove empty paragraphs
......@@ -91,7 +91,7 @@ export const amendOutlookNodes = (document: HTMLDocument): void => {
if (p.childNodes.length === 1 && p.childNodes[0].nodeType === TEXT_NODE) {
const text = p.childNodes[0].textContent;
if (!text.replace(/&nbsp;/g, "").trim()) {
if (!text.replace(/\u00A0/g, "").trim()) {
p.parentNode.removeChild(p);
}
}
......@@ -173,6 +173,7 @@ export const amendOutlookNodes = (document: HTMLDocument): void => {
spansDepths[depth].forEach((span) => {
let child = span.firstChild;
const parent = span.parentNode;
while (child) {
parent.insertBefore(child.cloneNode(true), span);
......
......@@ -5,5 +5,5 @@ export const removeSpacesAndLinebreaks = (s: string): string => {
};
export const normalizeTextSpacings = (s: string): string => {
return s.replace(/[\r\n\v\s]+/g, " ");
return s.replace(/[\r\n\v\s\u00A0]+/g, " ");
};
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment