Skip to content
Snippets Groups Projects
Commit 27cd21f6 authored by Igor Markin's avatar Igor Markin
Browse files

Add build

parent 4cf49312
No related branches found
No related tags found
1 merge request!6Implement pseudo plain parsing
export declare const normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string;
export declare const extractPseudoPlainPart: (document: HTMLDocument) => string;
export declare const printHtmlChildren: (node: Node, printFunction: (node: Node) => string, depth: number) => string;
export declare const printHtmlNode: (node: Node, printFunction: (node: Node) => string, depth: number) => string;
export declare const cleanupHtmlNodeAttributes: (node: Node, cleanupElementAttributes: (element: HTMLElement) => void) => void;
......
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.escapeHtmlString = exports.pruneHtmlNode = exports.cleanupHtmlNodeAttributes = exports.printHtmlNode = exports.printHtmlChildren = exports.normalizeVendorHtml = void 0;
exports.escapeHtmlString = exports.pruneHtmlNode = exports.cleanupHtmlNodeAttributes = exports.printHtmlNode = exports.printHtmlChildren = exports.extractPseudoPlainPart = exports.normalizeVendorHtml = void 0;
const constants_1 = require("../constants");
const outlook_1 = require("./strategies/outlook");
const constants_2 = require("../constants");
const utils_1 = require("../utils");
const gmail_1 = require("./strategies/gmail");
const index_1 = require("../index");
const nodesAmendingFunctions = {
[constants_2.EMAIL_VENDORS.GMAIL]: gmail_1.amendGmailNodes,
[constants_2.EMAIL_VENDORS.OUTLOOK]: outlook_1.amendOutlookNodes,
......@@ -49,6 +50,27 @@ const normalizeVendorHtml = (document, vendor) => {
return exports.printHtmlChildren(mimeBody, vendorPrintFunction, 0);
};
exports.normalizeVendorHtml = normalizeVendorHtml;
const extractPseudoPlainPart = (document
/*vendor: string*/
) => {
const textContent = index_1.PlainNormalizer.normalizePlain(document.body.textContent);
// const anchors = document.getElementsByTagName("a");
// const images = document.getElementsByTagName("img");
// let meaningfulAttributes = [];
//
// Array.from(anchors).forEach((a) => {
// meaningfulAttributes.push(a.getAttribute("href"));
// });
// Array.from(images).forEach((img) => {
// meaningfulAttributes.push(img.getAttribute("src"));
// meaningfulAttributes.push(img.getAttribute("alt"));
// });
//
// meaningfulAttributes = meaningfulAttributes.filter((attr) => !!attr).sort();
// console.log(meaningfulAttributes);
return textContent;
};
exports.extractPseudoPlainPart = extractPseudoPlainPart;
const printHtmlChildren = (node, printFunction, depth) => {
let child = node.firstChild;
if (!child) {
......
declare const _default: {
normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string;
extractPseudoPlainPart: (document: HTMLDocument) => string;
};
export default _default;
......@@ -3,4 +3,5 @@ Object.defineProperty(exports, "__esModule", { value: true });
const HTMLNormalizer_1 = require("./HTMLNormalizer");
exports.default = {
normalizeVendorHtml: HTMLNormalizer_1.normalizeVendorHtml,
extractPseudoPlainPart: HTMLNormalizer_1.extractPseudoPlainPart,
};
......@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
exports.normalizePlainPart = void 0;
const utils_1 = require("../utils");
const normalizePlainPart = (text) => {
text = removeListBullets(text);
text = utils_1.removeSpacesAndLinebreaks(text);
return removeQRCodes(text);
};
......@@ -12,3 +13,6 @@ const removeQRCodes = (s) => {
.replace(/\[qrcode.png]\s*<https:\/\/[\w./?=\-&]+>/g, "")
.replace(/<https:\/\/[\w./?=\-&]+>\s*\[qrcode.png]/g, "");
};
const removeListBullets = (s) => {
return s.replace("\n[o§]\n+/g", "");
};
......@@ -2,8 +2,7 @@
Object.defineProperty(exports, "__esModule", { value: true });
exports.removeSpacesAndLinebreaks = void 0;
const removeSpacesAndLinebreaks = (s) => {
const regexNewlines = new RegExp(/[\r\n\v]+/g);
const regexSpaces = new RegExp(/\s+|\u200B/g);
return s.replace(regexNewlines, "").replace(regexSpaces, "");
const removeSymbols = new RegExp(/[\r\n\v\s\u200B]+/g);
return s.replace(removeSymbols, "").trim();
};
exports.removeSpacesAndLinebreaks = removeSpacesAndLinebreaks;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment