From 27cd21f6aec7135a2f8f4d7ee3f84ac419e11a56 Mon Sep 17 00:00:00 2001 From: igor <igor.markin@vereign.com> Date: Tue, 8 Dec 2020 19:17:12 +0300 Subject: [PATCH] Add build --- dist/HTMLNormalizer/HTMLNormalizer.d.ts | 1 + dist/HTMLNormalizer/HTMLNormalizer.js | 24 +++++++++++++++++++++++- dist/HTMLNormalizer/index.d.ts | 1 + dist/HTMLNormalizer/index.js | 1 + dist/PlainNormalizer/PlainNormalizer.js | 4 ++++ dist/utils.js | 5 ++--- 6 files changed, 32 insertions(+), 4 deletions(-) diff --git a/dist/HTMLNormalizer/HTMLNormalizer.d.ts b/dist/HTMLNormalizer/HTMLNormalizer.d.ts index 1451258..62ff3a5 100644 --- a/dist/HTMLNormalizer/HTMLNormalizer.d.ts +++ b/dist/HTMLNormalizer/HTMLNormalizer.d.ts @@ -1,4 +1,5 @@ export declare const normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string; +export declare const extractPseudoPlainPart: (document: HTMLDocument) => string; export declare const printHtmlChildren: (node: Node, printFunction: (node: Node) => string, depth: number) => string; export declare const printHtmlNode: (node: Node, printFunction: (node: Node) => string, depth: number) => string; export declare const cleanupHtmlNodeAttributes: (node: Node, cleanupElementAttributes: (element: HTMLElement) => void) => void; diff --git a/dist/HTMLNormalizer/HTMLNormalizer.js b/dist/HTMLNormalizer/HTMLNormalizer.js index 68dc062..1fdd9d9 100644 --- a/dist/HTMLNormalizer/HTMLNormalizer.js +++ b/dist/HTMLNormalizer/HTMLNormalizer.js @@ -1,11 +1,12 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.escapeHtmlString = exports.pruneHtmlNode = exports.cleanupHtmlNodeAttributes = exports.printHtmlNode = exports.printHtmlChildren = exports.normalizeVendorHtml = void 0; +exports.escapeHtmlString = exports.pruneHtmlNode = exports.cleanupHtmlNodeAttributes = exports.printHtmlNode = exports.printHtmlChildren = exports.extractPseudoPlainPart = exports.normalizeVendorHtml = void 0; const constants_1 = require("../constants"); const outlook_1 = require("./strategies/outlook"); const constants_2 = require("../constants"); const utils_1 = require("../utils"); const gmail_1 = require("./strategies/gmail"); +const index_1 = require("../index"); const nodesAmendingFunctions = { [constants_2.EMAIL_VENDORS.GMAIL]: gmail_1.amendGmailNodes, [constants_2.EMAIL_VENDORS.OUTLOOK]: outlook_1.amendOutlookNodes, @@ -49,6 +50,27 @@ const normalizeVendorHtml = (document, vendor) => { return exports.printHtmlChildren(mimeBody, vendorPrintFunction, 0); }; exports.normalizeVendorHtml = normalizeVendorHtml; +const extractPseudoPlainPart = (document +/*vendor: string*/ +) => { + const textContent = index_1.PlainNormalizer.normalizePlain(document.body.textContent); + // const anchors = document.getElementsByTagName("a"); + // const images = document.getElementsByTagName("img"); + // let meaningfulAttributes = []; + // + // Array.from(anchors).forEach((a) => { + // meaningfulAttributes.push(a.getAttribute("href")); + // }); + // Array.from(images).forEach((img) => { + // meaningfulAttributes.push(img.getAttribute("src")); + // meaningfulAttributes.push(img.getAttribute("alt")); + // }); + // + // meaningfulAttributes = meaningfulAttributes.filter((attr) => !!attr).sort(); + // console.log(meaningfulAttributes); + return textContent; +}; +exports.extractPseudoPlainPart = extractPseudoPlainPart; const printHtmlChildren = (node, printFunction, depth) => { let child = node.firstChild; if (!child) { diff --git a/dist/HTMLNormalizer/index.d.ts b/dist/HTMLNormalizer/index.d.ts index 41ae932..7bea28c 100644 --- a/dist/HTMLNormalizer/index.d.ts +++ b/dist/HTMLNormalizer/index.d.ts @@ -1,4 +1,5 @@ declare const _default: { normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string; + extractPseudoPlainPart: (document: HTMLDocument) => string; }; export default _default; diff --git a/dist/HTMLNormalizer/index.js b/dist/HTMLNormalizer/index.js index 1034775..3d76acb 100644 --- a/dist/HTMLNormalizer/index.js +++ b/dist/HTMLNormalizer/index.js @@ -3,4 +3,5 @@ Object.defineProperty(exports, "__esModule", { value: true }); const HTMLNormalizer_1 = require("./HTMLNormalizer"); exports.default = { normalizeVendorHtml: HTMLNormalizer_1.normalizeVendorHtml, + extractPseudoPlainPart: HTMLNormalizer_1.extractPseudoPlainPart, }; diff --git a/dist/PlainNormalizer/PlainNormalizer.js b/dist/PlainNormalizer/PlainNormalizer.js index e79c6a0..342abdd 100644 --- a/dist/PlainNormalizer/PlainNormalizer.js +++ b/dist/PlainNormalizer/PlainNormalizer.js @@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.normalizePlainPart = void 0; const utils_1 = require("../utils"); const normalizePlainPart = (text) => { + text = removeListBullets(text); text = utils_1.removeSpacesAndLinebreaks(text); return removeQRCodes(text); }; @@ -12,3 +13,6 @@ const removeQRCodes = (s) => { .replace(/\[qrcode.png]\s*<https:\/\/[\w./?=\-&]+>/g, "") .replace(/<https:\/\/[\w./?=\-&]+>\s*\[qrcode.png]/g, ""); }; +const removeListBullets = (s) => { + return s.replace("\n[o§]\n+/g", ""); +}; diff --git a/dist/utils.js b/dist/utils.js index 6a64db9..3030ed2 100644 --- a/dist/utils.js +++ b/dist/utils.js @@ -2,8 +2,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.removeSpacesAndLinebreaks = void 0; const removeSpacesAndLinebreaks = (s) => { - const regexNewlines = new RegExp(/[\r\n\v]+/g); - const regexSpaces = new RegExp(/\s+|\u200B/g); - return s.replace(regexNewlines, "").replace(regexSpaces, ""); + const removeSymbols = new RegExp(/[\r\n\v\s\u200B]+/g); + return s.replace(removeSymbols, "").trim(); }; exports.removeSpacesAndLinebreaks = removeSpacesAndLinebreaks; -- GitLab