From 5cef614c4f87d42ef5d2917ea6d9175910e112ec Mon Sep 17 00:00:00 2001
From: igor <igor.markin@vereign.com>
Date: Wed, 2 Dec 2020 17:28:15 +0300
Subject: [PATCH] Fixes for DOM gmail parser

---
 __tests__/utils.ts                      | 32 +++++++++++--------
 package.json                            |  3 +-
 src/HTMLNormalizer/HTMLNormalizer.ts    | 41 ++++++++++++++++---------
 src/HTMLNormalizer/strategies/common.ts |  2 +-
 src/HTMLNormalizer/strategies/gmail.ts  |  4 +--
 src/constants.ts                        |  2 ++
 yarn.lock                               | 17 ++++++++++
 7 files changed, 70 insertions(+), 31 deletions(-)

diff --git a/__tests__/utils.ts b/__tests__/utils.ts
index 1d60560..a4e76a0 100644
--- a/__tests__/utils.ts
+++ b/__tests__/utils.ts
@@ -1,4 +1,5 @@
 import { JSDOM } from "jsdom";
+import { DOM } from "@vereign/dom";
 const fs = require("fs");
 
 const SENT_HTML_NAME = "s_htmlContent.html";
@@ -50,7 +51,7 @@ export const getNormalizedHtml = (
     .readFileSync(`${testCasePath}/${RECEIVED_HTML_NAME}`)
     .toString();
 
-  const sentDOM = new JSDOM(sentHtml);
+  const sentDOM = new DOM(sentHtml);
   const receivedDOM = new JSDOM(receivedHtml);
 
   const sentNormalizedHtml = HTMLNormalizer.normalizeVendorHtml(
@@ -76,22 +77,27 @@ export const createDescribeHtmlTestCases = (
    * @param casesGroupName - name of the folder with cases
    * @param failingCases - a list of cases that are failing and ignored. Pending to be fixed
    */
-  (casesGroupName: string, failingCases: Array<string> = []) => (): void => {
+  (
+    casesGroupName: string,
+    failingCases?: Array<string>,
+    casesToCheckOnly?: Array<string>
+  ) => (): void => {
     const testsCasesPath = testsPath + "/" + casesGroupName;
-    const testCasesDirs = getTestCasesDirs(testsCasesPath).filter(
-      (dir) => !failingCases.includes(dir)
-    );
+    let testCasesDirs = getTestCasesDirs(testsCasesPath);
+
+    if (casesToCheckOnly) {
+      testCasesDirs = testCasesDirs.filter((dir) =>
+        casesToCheckOnly.includes(dir)
+      );
+    }
+
+    if (failingCases) {
+      testCasesDirs.filter((dir) => !failingCases.includes(dir));
+    }
 
     test.each(testCasesDirs)("Case %s", (dirName: string) => {
       const testCasePath = testsCasesPath + "/" + dirName;
-      let normalizedHtmls;
-      try {
-        normalizedHtmls = getNormalizedHtml(testCasePath, vendor);
-      } catch (e) {
-        console.log(`Invalid test case: ${casesGroupName}/${dirName}`);
-        return;
-      }
-
+      const normalizedHtmls = getNormalizedHtml(testCasePath, vendor);
       const { sentHtml, receivedHtml } = normalizedHtmls;
 
       // expect(receivedHtml.length).toBeGreaterThan(0);
diff --git a/package.json b/package.json
index 1fcd574..becd0b5 100644
--- a/package.json
+++ b/package.json
@@ -19,7 +19,8 @@
     "jest": "^26.4.2",
     "lint-staged": "^10.2.13",
     "prettier": "^2.1.1",
-    "typescript": "^4.0.2"
+    "typescript": "^4.0.2",
+    "@vereign/dom": "git+ssh://git@code.vereign.com:code/js-toolbox/gsdom.git#rework-with-parse-5"
   },
   "scripts": {
     "prepare": "yarn build",
diff --git a/src/HTMLNormalizer/HTMLNormalizer.ts b/src/HTMLNormalizer/HTMLNormalizer.ts
index 754d67b..291336c 100644
--- a/src/HTMLNormalizer/HTMLNormalizer.ts
+++ b/src/HTMLNormalizer/HTMLNormalizer.ts
@@ -1,13 +1,23 @@
-import { DOCUMENT_NODE, ELEMENT_NODE, TEXT_NODE } from "../constants";
+import {
+  COMMENT_NODE,
+  DOCUMENT_NODE,
+  DOCUMENT_TYPE_NODE,
+  ELEMENT_NODE,
+  TEXT_NODE,
+} from "../constants";
 import {
   amendOutlookNodes,
   cleanupOutlookElementAttributes,
   printOutlookElement,
-  pruneOutlookElement
+  pruneOutlookElement,
 } from "./strategies/outlook";
-import {EMAIL_VENDORS} from "../constants";
-import {removeSpacesAndLinebreaks} from "../utils";
-import {amendGmailNodes, cleanupGMailElementAttributes, pruneGmailElement} from "./strategies/gmail";
+import { EMAIL_VENDORS } from "../constants";
+import { removeSpacesAndLinebreaks } from "../utils";
+import {
+  amendGmailNodes,
+  cleanupGMailElementAttributes,
+  pruneGmailElement,
+} from "./strategies/gmail";
 
 const nodesAmendingFunctions = {
   [EMAIL_VENDORS.GMAIL]: amendGmailNodes,
@@ -28,7 +38,10 @@ const vendorPrintingFunctions = {
   [EMAIL_VENDORS.OUTLOOK]: printOutlookElement,
 };
 
-export const normalizeVendorHtml = (document: HTMLDocument, vendor: string): string => {
+export const normalizeVendorHtml = (
+  document: HTMLDocument,
+  vendor: string
+): string => {
   const mimeBody = document.body;
 
   const amendNodesFunction = nodesAmendingFunctions[vendor];
@@ -52,8 +65,7 @@ export const normalizeVendorHtml = (document: HTMLDocument, vendor: string): str
   /**
    * Cleanup unnecessary attributes of nodes
    */
-  const elementAttributesCleanupFunction =
-    attributesCleanupFunctions[vendor];
+  const elementAttributesCleanupFunction = attributesCleanupFunctions[vendor];
 
   if (elementAttributesCleanupFunction) {
     cleanupHtmlNodeAttributes(document, elementAttributesCleanupFunction);
@@ -132,7 +144,8 @@ export const printHtmlNode = (
       if (node.firstChild) {
         result += ">";
         result += "\n";
-        result += printHtmlChildren(node, printFunction, depth + 1);
+        const printout = printHtmlChildren(node, printFunction, depth + 1);
+        result += printout;
         result += "</" + node.nodeName + ">";
       } else {
         result += "/>";
@@ -148,7 +161,7 @@ export const cleanupHtmlNodeAttributes = (
   node: Node,
   cleanupElementAttributes: (element: HTMLElement) => void
 ): void => {
-  if (node.nodeType === node.ELEMENT_NODE) {
+  if (node.nodeType === ELEMENT_NODE) {
     cleanupElementAttributes(node as HTMLElement);
   }
 
@@ -166,8 +179,8 @@ export const pruneHtmlNode = (
   let toBeRemoved = false;
 
   switch (node.nodeType) {
-    case node.COMMENT_NODE:
-    case node.DOCUMENT_TYPE_NODE:
+    case COMMENT_NODE:
+    case DOCUMENT_TYPE_NODE:
       toBeRemoved = true;
       break;
     case node.TEXT_NODE: {
@@ -179,7 +192,7 @@ export const pruneHtmlNode = (
       }
       break;
     }
-    case node.ELEMENT_NODE:
+    case ELEMENT_NODE:
       toBeRemoved = pruneElement(node as HTMLElement);
   }
 
@@ -215,7 +228,7 @@ export const escapeHtmlString = (string: string): string => {
   let index = 0;
   let lastIndex = 0;
 
-  for (let index = match.index; index < str.length; index++) {
+  for (index = match.index; index < str.length; index++) {
     switch (str.charCodeAt(index)) {
       case 34: // "
         escape = "&quot;";
diff --git a/src/HTMLNormalizer/strategies/common.ts b/src/HTMLNormalizer/strategies/common.ts
index 55c701e..d07e8dc 100644
--- a/src/HTMLNormalizer/strategies/common.ts
+++ b/src/HTMLNormalizer/strategies/common.ts
@@ -51,7 +51,7 @@ export const cloneAnchorFromPane = (
   pane: HTMLElement
 ): void => {
   try {
-    const url = new URL(a.href);
+    const url = new URL(a.getAttribute("href"));
     // If this is external url
     if (url.host && url.protocol) {
       pane.parentNode.insertBefore(a.cloneNode(false), pane);
diff --git a/src/HTMLNormalizer/strategies/gmail.ts b/src/HTMLNormalizer/strategies/gmail.ts
index ee4aa87..fa82935 100644
--- a/src/HTMLNormalizer/strategies/gmail.ts
+++ b/src/HTMLNormalizer/strategies/gmail.ts
@@ -20,8 +20,8 @@ export const amendGmailNodes = (document: HTMLDocument): void => {
   );
 
   attachmentsPanes.forEach((pane) => {
-    const as = pane.querySelectorAll("a");
-    as.forEach((a) => {
+    const as = pane.getElementsByTagName("a");
+    Array.from(as).forEach((a) => {
       cloneAnchorFromPane(a, pane as HTMLElement);
     });
   });
diff --git a/src/constants.ts b/src/constants.ts
index a639830..4e81cd9 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -1,4 +1,6 @@
 export const ELEMENT_NODE = 1;
+export const COMMENT_NODE = 8;
+export const DOCUMENT_TYPE_NODE = 10;
 export const TEXT_NODE = 3;
 export const DOCUMENT_NODE = 9;
 
diff --git a/yarn.lock b/yarn.lock
index 24d4a89..ee55475 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -1168,6 +1168,11 @@
   resolved "https://registry.yarnpkg.com/@types/parse-json/-/parse-json-4.0.0.tgz#2f8bb441434d163b35fb8ffdccd7138927ffb8c0"
   integrity sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA==
 
+"@types/parse5@^5.0.3":
+  version "5.0.3"
+  resolved "https://registry.yarnpkg.com/@types/parse5/-/parse5-5.0.3.tgz#e7b5aebbac150f8b5fdd4a46e7f0bd8e65e19109"
+  integrity sha512-kUNnecmtkunAoQ3CnjmMkzNU/gtxG8guhi+Fk2U/kOpIKjIMKnXGp4IJCgQJrXSgMsWYimYG4TGjz/UzbGEBTw==
+
 "@types/prettier@^2.0.0":
   version "2.1.5"
   resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.1.5.tgz#b6ab3bba29e16b821d84e09ecfaded462b816b00"
@@ -1250,6 +1255,13 @@
   dependencies:
     eslint-visitor-keys "^1.1.0"
 
+"@vereign/dom@git+ssh://git@code.vereign.com:code/js-toolbox/gsdom.git#rework-with-parse-5":
+  version "1.0.0"
+  resolved "git+ssh://git@code.vereign.com:code/js-toolbox/gsdom.git#b14f5560fc58bd3c1f0095b67166b1771442a1cb"
+  dependencies:
+    "@types/parse5" "^5.0.3"
+    parse5 "^6.0.1"
+
 abab@^2.0.3:
   version "2.0.5"
   resolved "https://registry.yarnpkg.com/abab/-/abab-2.0.5.tgz#c0b678fb32d60fc1219c784d6a826fe385aeb79a"
@@ -3857,6 +3869,11 @@ parse5@5.1.1:
   resolved "https://registry.yarnpkg.com/parse5/-/parse5-5.1.1.tgz#f68e4e5ba1852ac2cadc00f4555fff6c2abb6178"
   integrity sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==
 
+parse5@^6.0.1:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b"
+  integrity sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==
+
 pascalcase@^0.1.1:
   version "0.1.1"
   resolved "https://registry.yarnpkg.com/pascalcase/-/pascalcase-0.1.1.tgz#b363e55e8006ca6fe21784d2db22bd15d7917f14"
-- 
GitLab