Skip to content
Snippets Groups Projects
Commit 58dc94bc authored by Igor Markin's avatar Igor Markin
Browse files

Add build

parent 3d01e55d
No related branches found
No related tags found
1 merge request!22"Rework normalisation logic to handle new seal templates"
export declare const normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string;
export interface SealRemovalOptions {
sealContainerId: string;
}
export declare const normalizeVendorHtml: (document: HTMLDocument, vendor: string, sealRemovalOptions?: SealRemovalOptions) => string;
export declare const extractPseudoPlainPart: (document: HTMLDocument) => string;
export declare const pruneHtmlNode: (node: Node) => boolean;
export declare const printHtmlChildren: (node: Node, depth: number) => string;
......
......@@ -12,7 +12,7 @@ const documentNormalizationFunctions = {
[constants_2.EMAIL_VENDORS.GMAIL]: gmail_1.normalizeGmailDocument,
[constants_2.EMAIL_VENDORS.OUTLOOK]: outlook_1.normalizeOutlookDocument,
};
const normalizeVendorHtml = (document, vendor) => {
const normalizeVendorHtml = (document, vendor, sealRemovalOptions) => {
/**
* Remove unnecessary nodes
*/
......@@ -20,7 +20,7 @@ const normalizeVendorHtml = (document, vendor) => {
/**
* Apply document normalisations
*/
common_1.normalizeDocumentCommon(document.body);
common_1.normalizeDocumentCommon(document.body, sealRemovalOptions?.sealContainerId);
const normalizeDocument = documentNormalizationFunctions[vendor];
if (normalizeDocument) {
normalizeDocument(document);
......
declare const _default: {
normalizeVendorHtml: (document: HTMLDocument, vendor: string) => string;
normalizeVendorHtml: (document: HTMLDocument, vendor: string, sealRemovalOptions?: import("./HTMLNormalizer").SealRemovalOptions) => string;
extractPseudoPlainPart: (document: HTMLDocument) => string;
};
export default _default;
......@@ -18,7 +18,10 @@ exports.ATTRIBUTES_TO_KEEP = {
href: true,
value: true,
};
const normalizeDocumentCommon = (body) => {
const normalizeDocumentCommon = (body, sealContainerId) => {
if (sealContainerId) {
removeSeal(body, sealContainerId);
}
/**
* Unwind Outlook safelink wrappers
*/
......@@ -50,6 +53,20 @@ const normalizeDocumentCommon = (body) => {
}
};
exports.normalizeDocumentCommon = normalizeDocumentCommon;
const removeSeal = (rootNode, sealContainerId) => {
const queue = [rootNode];
while (queue.length) {
const element = queue.shift();
const id = element.getAttribute("id");
if (id?.includes(sealContainerId)) {
element.parentNode.removeChild(element);
break;
}
for (const node of element.childNodes) {
node.nodeType === 1 && queue.push(node);
}
}
};
/**
* Decides whether node should be removed
* @param element
......
......@@ -2,31 +2,7 @@
Object.defineProperty(exports, "__esModule", { value: true });
exports.normalizeGmailDocument = void 0;
const common_1 = require("./common");
const constants_1 = require("../../constants");
const qrCodeContainerIds = { vereignWrapperLink: 1 };
const removeQrCodeNodes = (document) => {
const remove = (node) => {
let toRemove = [];
let child = node.firstChild;
while (child) {
if (child.nodeType == constants_1.ELEMENT_NODE) {
toRemove = [...toRemove, ...remove(child)];
const childElement = child;
const id = childElement.getAttribute("id");
if (id &&
Object.keys(qrCodeContainerIds).find((possibleId) => id.includes(possibleId))) {
toRemove.push(childElement);
}
}
child = child.nextSibling;
}
return toRemove;
};
const elementsToRemove = remove(document.body);
elementsToRemove.forEach((element) => element.parentNode.removeChild(element));
};
const normalizeGmailDocument = (document) => {
removeQrCodeNodes(document);
/**
* Look for attachments panes and extract <a> tags from them
*/
......
......@@ -4,35 +4,7 @@ exports.normalizeOutlookDocument = void 0;
const constants_1 = require("../../constants");
const common_1 = require("./common");
const nodesAmendingFunctions_1 = require("./nodesAmendingFunctions");
const qrCodeContainerIds = {
"test-for-us": 1,
};
const removeQrCodeNodes = (document) => {
const remove = (node) => {
let toRemove = [];
let child = node.firstChild;
while (child) {
if (child.nodeType == constants_1.ELEMENT_NODE) {
toRemove = [...toRemove, ...remove(child)];
const childElement = child;
const id = childElement.getAttribute("id");
if (id &&
Object.keys(qrCodeContainerIds).find((possibleId) => id.includes(possibleId))) {
toRemove.push(childElement.parentNode);
}
}
child = child.nextSibling;
}
return toRemove;
};
const elementsToRemove = remove(document.body);
elementsToRemove.forEach((element) => element.parentNode.removeChild(element));
};
const normalizeOutlookDocument = (document) => {
/**
* Remove QR code entries
*/
removeQrCodeNodes(document);
/**
* Unwind Word o:p paragraphs
*/
......
export declare const normalizePlainPart: (text: string) => string;
export interface SealRemovalOptions {
sealUrl: string;
}
export declare const normalizePlainPart: (text: string, sealRemovalOptions?: SealRemovalOptions) => string;
export declare const cleanupHiddenCharacters: (s: string) => string;
......@@ -4,9 +4,11 @@ exports.cleanupHiddenCharacters = exports.normalizePlainPart = void 0;
// this is a Node module. require is a must to work across different envs
const URL = require("url-parse");
const utils_1 = require("../utils");
const normalizePlainPart = (text) => {
const normalizePlainPart = (text, sealRemovalOptions) => {
text = exports.cleanupHiddenCharacters(text);
text = removeQRCodes(text);
if (sealRemovalOptions) {
text = removeSeal(text, sealRemovalOptions.sealUrl);
}
text = utils_1.normalizeTextSpacings(text);
text = patchOutlookSafelinksWrappers(text);
return text.trim();
......@@ -25,14 +27,17 @@ const patchOutlookSafelinksWrappers = (text) => {
};
/**
* Function removes seal from the plain text.
* Function has to support the next possible file names of the seal:
* qrcode.png, qrcode-xxx.png, seal-xxx.png
* @param s
* @param plain
* @param sealUrl
*/
const removeQRCodes = (s) => {
return s
.replace(/\[(image:\s)*(qrcode|seal).*?.png]\s*<https:\/\/.+(vereign\.com|vrgnservices\.com).*?>/g, "")
.replace(/<https:\/\/.+(vereign\.com|vrgnservices\.com).*?>\s*\[(image: )*(qrcode|seal).*?.png]/g, "");
const removeSeal = (plain, sealUrl) => {
// For cases [<image-alt>]<<seal-url>>
const sealRegex = `\\[.+?]\\s*<${sealUrl}>`;
// For cases <<seal-url>>[<image-alt>]
const sealRegexReversed = `<${sealUrl}>\\s*\\[.+?]`;
return plain
.replace(new RegExp(sealRegex), "")
.replace(new RegExp(sealRegexReversed), "");
};
const cleanupHiddenCharacters = (s) => {
const removeSymbols = new RegExp(/[\u200B]+/g);
......
declare const _default: {
normalizePlain: (text: string) => string;
normalizePlain: (text: string, sealRemovalOptions?: import("./PlainNormalizer").SealRemovalOptions) => string;
};
export default _default;
......@@ -21,7 +21,7 @@ const documentNormalizationFunctions = {
[EMAIL_VENDORS.OUTLOOK]: normalizeOutlookDocument,
};
interface SealRemovalOptions {
export interface SealRemovalOptions {
sealContainerId: string;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment