diff --git a/dist/HTMLNormalizer/HTMLNormalizer.d.ts b/dist/HTMLNormalizer/HTMLNormalizer.d.ts index 14512588371174dcb28897c7dca926e5ff64f13f..62ff3a55b69e5319892d8de8f7712d8bc19a7176 100644 --- a/dist/HTMLNormalizer/HTMLNormalizer.d.ts +++ b/dist/HTMLNormalizer/HTMLNormalizer.d.ts @@ -1,4 +1,5 @@ export declare const normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string; +export declare const extractPseudoPlainPart: (document: HTMLDocument) => string; export declare const printHtmlChildren: (node: Node, printFunction: (node: Node) => string, depth: number) => string; export declare const printHtmlNode: (node: Node, printFunction: (node: Node) => string, depth: number) => string; export declare const cleanupHtmlNodeAttributes: (node: Node, cleanupElementAttributes: (element: HTMLElement) => void) => void; diff --git a/dist/HTMLNormalizer/HTMLNormalizer.js b/dist/HTMLNormalizer/HTMLNormalizer.js index 68dc062b75b86094cc0fbeb7158641480a0f1311..1fdd9d92969cc0029c87b36379160a54a3963009 100644 --- a/dist/HTMLNormalizer/HTMLNormalizer.js +++ b/dist/HTMLNormalizer/HTMLNormalizer.js @@ -1,11 +1,12 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.escapeHtmlString = exports.pruneHtmlNode = exports.cleanupHtmlNodeAttributes = exports.printHtmlNode = exports.printHtmlChildren = exports.normalizeVendorHtml = void 0; +exports.escapeHtmlString = exports.pruneHtmlNode = exports.cleanupHtmlNodeAttributes = exports.printHtmlNode = exports.printHtmlChildren = exports.extractPseudoPlainPart = exports.normalizeVendorHtml = void 0; const constants_1 = require("../constants"); const outlook_1 = require("./strategies/outlook"); const constants_2 = require("../constants"); const utils_1 = require("../utils"); const gmail_1 = require("./strategies/gmail"); +const index_1 = require("../index"); const nodesAmendingFunctions = { [constants_2.EMAIL_VENDORS.GMAIL]: gmail_1.amendGmailNodes, [constants_2.EMAIL_VENDORS.OUTLOOK]: outlook_1.amendOutlookNodes, @@ -49,6 +50,27 @@ const normalizeVendorHtml = (document, vendor) => { return exports.printHtmlChildren(mimeBody, vendorPrintFunction, 0); }; exports.normalizeVendorHtml = normalizeVendorHtml; +const extractPseudoPlainPart = (document +/*vendor: string*/ +) => { + const textContent = index_1.PlainNormalizer.normalizePlain(document.body.textContent); + // const anchors = document.getElementsByTagName("a"); + // const images = document.getElementsByTagName("img"); + // let meaningfulAttributes = []; + // + // Array.from(anchors).forEach((a) => { + // meaningfulAttributes.push(a.getAttribute("href")); + // }); + // Array.from(images).forEach((img) => { + // meaningfulAttributes.push(img.getAttribute("src")); + // meaningfulAttributes.push(img.getAttribute("alt")); + // }); + // + // meaningfulAttributes = meaningfulAttributes.filter((attr) => !!attr).sort(); + // console.log(meaningfulAttributes); + return textContent; +}; +exports.extractPseudoPlainPart = extractPseudoPlainPart; const printHtmlChildren = (node, printFunction, depth) => { let child = node.firstChild; if (!child) { diff --git a/dist/HTMLNormalizer/index.d.ts b/dist/HTMLNormalizer/index.d.ts index 41ae9326611d9ad3288f6f5fe3ef3d42726d367f..7bea28c1205f5bb6bd23639914e640cedaaed170 100644 --- a/dist/HTMLNormalizer/index.d.ts +++ b/dist/HTMLNormalizer/index.d.ts @@ -1,4 +1,5 @@ declare const _default: { normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string; + extractPseudoPlainPart: (document: HTMLDocument) => string; }; export default _default; diff --git a/dist/HTMLNormalizer/index.js b/dist/HTMLNormalizer/index.js index 10347750ec3e33b39101a866e87ad04b098c1880..3d76acbbaa9ad2760df4c70687bd541bdac19eea 100644 --- a/dist/HTMLNormalizer/index.js +++ b/dist/HTMLNormalizer/index.js @@ -3,4 +3,5 @@ Object.defineProperty(exports, "__esModule", { value: true }); const HTMLNormalizer_1 = require("./HTMLNormalizer"); exports.default = { normalizeVendorHtml: HTMLNormalizer_1.normalizeVendorHtml, + extractPseudoPlainPart: HTMLNormalizer_1.extractPseudoPlainPart, }; diff --git a/dist/PlainNormalizer/PlainNormalizer.js b/dist/PlainNormalizer/PlainNormalizer.js index e79c6a0a2a9d639118ce5bd1762b3455b75dae14..342abdd0887e6066cd81d5c2515291417666585d 100644 --- a/dist/PlainNormalizer/PlainNormalizer.js +++ b/dist/PlainNormalizer/PlainNormalizer.js @@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.normalizePlainPart = void 0; const utils_1 = require("../utils"); const normalizePlainPart = (text) => { + text = removeListBullets(text); text = utils_1.removeSpacesAndLinebreaks(text); return removeQRCodes(text); }; @@ -12,3 +13,6 @@ const removeQRCodes = (s) => { .replace(/\[qrcode.png]\s*<https:\/\/[\w./?=\-&]+>/g, "") .replace(/<https:\/\/[\w./?=\-&]+>\s*\[qrcode.png]/g, ""); }; +const removeListBullets = (s) => { + return s.replace("\n[o§]\n+/g", ""); +}; diff --git a/dist/utils.js b/dist/utils.js index 6a64db988795a1ffa219666a6fa6c8faac878a56..3030ed27ee0506f9f3c3d118b607339781fb0520 100644 --- a/dist/utils.js +++ b/dist/utils.js @@ -2,8 +2,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.removeSpacesAndLinebreaks = void 0; const removeSpacesAndLinebreaks = (s) => { - const regexNewlines = new RegExp(/[\r\n\v]+/g); - const regexSpaces = new RegExp(/\s+|\u200B/g); - return s.replace(regexNewlines, "").replace(regexSpaces, ""); + const removeSymbols = new RegExp(/[\r\n\v\s\u200B]+/g); + return s.replace(removeSymbols, "").trim(); }; exports.removeSpacesAndLinebreaks = removeSpacesAndLinebreaks;