diff --git a/dist/HTMLNormalizer/strategies/common.js b/dist/HTMLNormalizer/strategies/common.js index 738dc47f61c13b4520fe731bc0855883e0196cb9..a3c0fdb2461dce759a173c896a0c9000e4daa596 100644 --- a/dist/HTMLNormalizer/strategies/common.js +++ b/dist/HTMLNormalizer/strategies/common.js @@ -30,6 +30,25 @@ const amendNodes = (document) => { anchor.setAttribute("href", url.query["url"]); } } + /** + * Unwind Gmail "googleusercontent" wrappers + */ + const images = document.getElementsByTagName("img"); + for (const image of images) { + let url; + try { + url = new URL(image.getAttribute("src")); + } + catch (e) { + url = null; + } + if (url && url.host.includes("googleusercontent.com")) { + const originalUrl = url.hash.slice(1, url.hash.length); + if (originalUrl) { + image.setAttribute("src", originalUrl); + } + } + } }; exports.amendNodes = amendNodes; /** diff --git a/dist/PlainNormalizer/PlainNormalizer.js b/dist/PlainNormalizer/PlainNormalizer.js index 33762f5561a843e399d40551c2d23a3809d42208..a30288b0bad49d16dbeac81429be53bf9d8ea8f1 100644 --- a/dist/PlainNormalizer/PlainNormalizer.js +++ b/dist/PlainNormalizer/PlainNormalizer.js @@ -7,8 +7,9 @@ const utils_1 = require("../utils"); const normalizePlainPart = (text) => { text = removeListBullets(text); text = utils_1.removeSpacesAndLinebreaks(text); + text = removeQRCodes(text); text = patchOutlookSafelinksWrappers(text); - return removeQRCodes(text); + return text; }; exports.normalizePlainPart = normalizePlainPart; const patchOutlookSafelinksWrappers = (text) => { @@ -24,8 +25,8 @@ const patchOutlookSafelinksWrappers = (text) => { }; const removeQRCodes = (s) => { return s - .replace(/\[(image:)*qrcode.png]\s*<https:\/\/[\w./?=\-&]+>/g, "") - .replace(/<https:\/\/[\w./?=\-&]+>\s*\[(image: )*qrcode.png]/g, ""); + .replace(/\[(image:)*qrcode.png]\s*<https:\/\/.+?>/g, "") + .replace(/<https:\/\/.+?>\s*\[(image: )*qrcode.png]/g, ""); }; const removeListBullets = (s) => { return s.replace("\n[o§]\n+/g", "");