Skip to content
Snippets Groups Projects
PlainNormalizer.ts 1.53 KiB
Newer Older
  • Learn to ignore specific revisions
  • // this is a Node module. require is a must to work across different envs
    const URL = require("url-parse");
    
    
    import { normalizeTextSpacings } from "../utils";
    
    export interface SealRemovalOptions {
      sealUrl: string;
    }
    
    export const normalizePlainPart = (
      text: string,
      sealRemovalOptions?: SealRemovalOptions
    ): string => {
    
      text = cleanupHiddenCharacters(text);
    
      text = patchOutlookSafelinksWrappers(text);
    
    
      if (sealRemovalOptions) {
        text = removeSeal(text, sealRemovalOptions.sealUrl);
      }
    
      text = normalizeTextSpacings(text);
    
      return text.trim();
    
    const patchOutlookSafelinksWrappers = (text: string) => {
      const links = text.match(
        /<https:.+?(safelinks\.protection\.outlook\.com).+?>/g
      );
    
      if (links) {
        links.forEach((link) => {
    
          const url = new URL(link.slice(1, link.length - 1), true);
          const originalUrl = url.query["url"];
    
          text = text.replace(link, `<${originalUrl}>`);
        });
      }
    
      return text;
    };
    
    
    Igor Markin's avatar
    Igor Markin committed
    /**
     * Function removes seal from the plain text.
    
     * @param plain
    
     * @param sealUrl
    
    Igor Markin's avatar
    Igor Markin committed
     */
    
    const removeSeal = (plain: string, sealUrl: string): string => {
      // For cases [<image-alt>]<<seal-url>>
      const sealRegex = `\\[.+?]\\s*<${sealUrl}>`;
    
      // For cases <<seal-url>>[<image-alt>]
      const sealRegexReversed = `<${sealUrl}>\\s*\\[.+?]`;
    
      return plain
        .replace(new RegExp(sealRegex), "")
        .replace(new RegExp(sealRegexReversed), "");
    
    Gospodin Bodurov's avatar
    Gospodin Bodurov committed
    
    
    export const cleanupHiddenCharacters = (s: string): string => {
      const removeSymbols = new RegExp(/[\u200B]+/g);
      return s.replace(removeSymbols, "");
    };