import {
  BASE_URL,
  COREMEDIA_BASE_URL,
  COREMEDIA_PREVIEW_BASE_URL
} from '@/configs/env/public';
import { InvalidStateError } from '@/utils/errors';
import { escapeRegEx } from '@/utils/regex-utils';
import { sanitize, addHook } from 'isomorphic-dompurify';

const allowedBaseURLs = [
  BASE_URL,
  COREMEDIA_BASE_URL,
  COREMEDIA_PREVIEW_BASE_URL,
  'https://www.deckers.com',
  'https://bs.sanuk.com',
  'https://www.youtube.com',
  'https://dms.deckers.com',
  'https://ahnu.co',
  '/',
  // For Accessibility page.
  'https://www.essentialaccessibility.com',
  'mailto:'
];

/**
 * Provided a URL property value, this function will return a sanitized url. A sanitized
 * URL will either take the form of the original passed-in URL, if it was a URL on our
 * "Allow List", or it will become an empty URL in the form of `url("")`.
 * @param url - A url property value to be compared. This should be in the form of
 * `url("https://www.example.com")`.
 * @returns Will return either the original URL, if it was on our "Allow List", or it will
 * return an empty URL in the form of `url("")`.
 */
const sanitizeCSSURL = (url: string): string => {
  const start = url.indexOf('http');
  let isAllowed = false;

  allowedBaseURLs.forEach((allowedUrl) => {
    if (
      allowedUrl &&
      // Check if allowed url is contained within the url.
      url.indexOf(allowedUrl) > -1 &&
      // Check if the allowed url is at the start of the url.
      url.indexOf(allowedUrl) === start
    ) {
      isAllowed = true;
    }
  });

  return isAllowed ? url : 'url("")';
};

/**
 * Validates that the provided `baseURLs` are all non-empty strings. This is used
 * for better error visibility when these values do not come through as expected.
 * @throws `InvalidStateError` If any of the provided `baseURLs` are empty strings.
 * @example ```ts
 * const allowedBaseURLs = [undefined, 'https://example2.com'];
 * validateURLs(allowedBaseURLs); // Throws.
 * ```
 */
const validateURLs = (): void => {
  for (const baseURL of allowedBaseURLs) {
    if (!baseURL) {
      throw new InvalidStateError(
        `\`baseURL\` was not an acceptable value. Found "${baseURL}", ` +
          `but expected a non-empty string. These URLs are used ` +
          `for security, so they may not be empty values.\n` +
          `\`allowedBaseURLs\` was ${JSON.stringify(
            allowedBaseURLs,
            null,
            '  '
          )}`
      );
    }
  }
};

/**
 * Sanitizes the provided markup string. Note that we simply never should
 * inject into the page unvalidated user input. Doing so is in and of itself
 * a "code smell". This function is _not_ for that purpose. This function is
 * to sanitize markup that comes from third-party services and entered by
 * internal users. It's to control anybody within our organization from
 * getting cute and inadvertently opening us to a security vulnerability.
 * @param unsafeMarkup - A string of markup which may contain malicious code.
 * @returns The provided markup string with malicious code removed.
 * @see https://github.com/cure53/DOMPurify
 * @example ```ts
 * const maliciousMarkup = '<script>alert("You have been hacked!");</script>';
 * const safeMarkup = sanitizeMarkup(maliciousMarkup); // ''
 * ```
 */
export const sanitizeMarkup = (unsafeMarkup: string): string => {
  // Validate that all URLs are in the expected format.
  validateURLs();

  // This regex will be used to match the base URLs to our site and third-parties.
  // The base URL is everything before the pathname. For example, the base URL for
  // "https://www.example.com/foo/bar" is "https://www.example.com".
  // We need to escape any characters that have special meaning in a regex so
  // that we can match the literal URL.
  const allowedURLsRegEx = new RegExp(
    `^(${allowedBaseURLs.map((baseURL) => escapeRegEx(baseURL!)).join('|')})`
  );

  // Strips CSS urls that are not on the allowed list.
  unsafeMarkup = unsafeMarkup.replace(
    /(url\s*\(\s*("|')?)\s*.*\s*(\2\s*\))/gi,
    (url) => {
      return sanitizeCSSURL(url);
    }
  );

  addHook('afterSanitizeAttributes', function (node) {
    // set all elements owning target to target=_blank
    if ('target' in node && node.target !== '') {
      node.setAttribute('target', '_blank');
      node.setAttribute('rel', 'noopener noreferrer');
    }
  });

  return sanitize(unsafeMarkup, {
    // Removes any `{{}}` instances for React.
    SAFE_FOR_TEMPLATES: true,
    // Always treat the markup as HTML specifically.
    USE_PROFILES: { html: true },
    // Only URLs beginning with the allowed values are accepted.
    ALLOWED_URI_REGEXP: allowedURLsRegEx,
    // Allows style tags to be used in the fragment.
    FORCE_BODY: true,
    // Allow links without sanitization.
    ADD_URI_SAFE_ATTR: ['href']
  });
};

/**
 * Unescapes HTML entities in a string. This is useful when you have a string of
 * user-input containing HTML entities that are escaped (like `&amp;amp;` or
 * `&amp;lt;`), and wish to unescape them without parsing the input as HTML.
 * @param unsafeMarkup - A string of markup which may contain escaped HTML entities or
 * other possibly encoded or escaped characters, such as HTML tags.
 * @returns The provided markup string with escaped HTML entities unescaped. `&amp;amp;`
 * becomes `&amp;` `&amp;lt;` becomes `&lt;`, and `&amp;x27;` becomes `&x27;`.
 */
export const unescapeHTMLEntities = (unsafeMarkup: string): string =>
  unsafeMarkup.replace(/&amp;(((#x?(\d)+)|([a-zA-Z])+);)/g, '&$1');

/**
 * A map of HTML entities by their well-known names to their numeric representations.
 * For example, `&quot;` (quotation mark) is `&#34;`.
 */
const KNOWN_HTML_ENTITIES = {
  '&quot;': '&#34;',
  '&num;': '&#35;',
  '&dollar;': '&#36;',
  '&percnt;': '&#37;',
  '&amp;': '&#38;',
  '&apos;': '&#39;',
  '&lpar;': '&#40;',
  '&rpar;': '&#41;',
  '&ast;': '&#42;',
  '&plus;': '&#43;',
  '&comma;': '&#44;',
  '&minus;': '&#8722;',
  '&period;': '&#46;',
  '&sol;': '&#47;',
  '&colon;': '&#58;',
  '&semi;': '&#59;',
  '&lt;': '&#60;',
  '&equals;': '&#61;',
  '&gt;': '&#62;',
  '&quest;': '&#63;',
  '&commat;': '&#64;',
  '&lsqb;': '&#91;',
  '&bsol;': '&#92;',
  '&rsqb;': '&#93;',
  '&Hat;': '&#94;',
  '&lowbar;': '&#95;',
  '&grave;': '&#96;',
  '&lcub;': '&#123;',
  '&verbar;': '&#124;',
  '&rcub;': '&#125;',
  '&nbsp;': '&#160;',
  '&iexcl;': '&#161;',
  '&cent;': '&#162;',
  '&pound;': '&#163;',
  '&curren;': '&#164;',
  '&yen;': '&#165;',
  '&#x20B9;': '&#x20B9;',
  '&brvbar;': '&#166;',
  '&sect;': '&#167;',
  '&uml;': '&#168;',
  '&copy;': '&#169;',
  '&ordf;': '&#170;',
  '&laquo;': '&#171;',
  '&not;': '&#172;',
  '&shy;': '&#173;',
  '&reg;': '&#174;',
  '&macr;': '&#175;',
  '&deg;': '&#176;',
  '&plusmn;': '&#177;',
  '&sup2;': '&#178;',
  '&sup3;': '&#179;',
  '&acute;': '&#180;',
  '&micro;': '&#181;',
  '&para;': '&#182;',
  '&middot;': '&#183;',
  '&cedil;': '&#184;',
  '&sup1;': '&#185;',
  '&ordm;': '&#186;',
  '&raquo;': '&#187;',
  '&frac14;': '&#188;',
  '&frac12;': '&#189;',
  '&frac34;': '&#190;',
  '&iquest;': '&#191;',
  '&Agrave;': '&#192;',
  '&Aacute;': '&#193;',
  '&Acirc;': '&#194 ;',
  '&Atilde;': '&#195;',
  '&Auml;': '&#196;',
  '&Aring;': '&#197;',
  '&AElig;': '&#198;',
  '&Ccedil;': '&#199;',
  '&Egrave;': '&#200;',
  '&Eacute;': '&#201;',
  '&Ecirc;': '&#202;',
  '&Euml;': '&#203;',
  '&Igrave;': '&#204;',
  '&Iacute;': '&#205;',
  '&Icirc;': '&#206;',
  '&Iuml;': '&#207;',
  '&ETH;': '&#208;',
  '&Ntilde;': '&#209;',
  '&Ograve;': '&#210;',
  '&Oacute;': '&#211;',
  '&Ocirc;': '&#212;',
  '&Otilde;': '&#213;',
  '&Ouml;': '&#214;',
  '&times;': '&#215;',
  '&Oslash;': '&#216;',
  '&Ugrave;': '&#217;',
  '&Uacute;': '&#218;',
  '&Ucirc;': '&#219;',
  '&Uuml;': '&#220;',
  '&Yacute;': '&#221;',
  '&THORN;': '&#222;',
  '&szlig;': '&#223;',
  '&agrave;': '&#224;',
  '&aacute;': '&#225;',
  '&acirc;': '&#226;',
  '&atilde;': '&#227;',
  '&auml;': '&#228;',
  '&aring;': '&#229;',
  '&aelig;': '&#230;',
  '&ccedil;': '&#231;',
  '&egrave;': '&#232;',
  '&eacute;': '&#233;',
  '&ecirc;': '&#234;',
  '&euml;': '&#235;',
  '&igrave;': '&#236;',
  '&iacute;': '&#237;',
  '&icirc;': '&#238;',
  '&iuml;': '&#239;',
  '&eth;': '&#240;',
  '&ntilde;': '&#241;',
  '&ograve;': '&#242;',
  '&oacute;': '&#243;',
  '&ocirc;': '&#244;',
  '&otilde;': '&#245;',
  '&ouml;': '&#246;',
  '&divide;': '&#247;',
  '&oslash;': '&#248;',
  '&ugrave;': '&#249;',
  '&uacute;': '&#250;',
  '&ucirc;': '&#251;',
  '&uuml;': '&#252;',
  '&yacute;': '&#253;',
  '&thorn;': '&#254;',
  '&yuml;': '&#255;',
  '&OElig;': '&#338;',
  '&oelig;': '&#339;',
  '&Scaron;': '&#352;',
  '&scaron;': '&#353;',
  '&Yuml;': '&#376;',
  '&fnof;': '&#402;',
  '&circ;': '',
  '&tilde;': '',
  '&Alpha;': '&#913;',
  '&Beta;': '&#914;',
  '&Gamma;': '&#915;',
  '&Delta;': '&#916;',
  '&Epsilon;': '&#917;',
  '&Zeta;': '&#918;',
  '&Eta;': '&#919;',
  '&Theta;': '&#920;',
  '&Iota;': '&#921;',
  '&Kappa;': '&#922;',
  '&Lambda;': '&#923;',
  '&Mu;': '&#924;',
  '&Nu;': '&#925;',
  '&Xi;': '&#926;',
  '&Omicron;': '&#927;',
  '&Pi;': '&#928;',
  '&Rho;': '&#929;',
  '&Sigma;': '&#931;',
  '&Tau;': '&#932;',
  '&Upsilon;': '&#933;',
  '&Phi;': '&#934;',
  '&Chi;': '&#935;',
  '&Psi;': '&#936;',
  '&Omega;': '&#937;',
  '&alpha;': '&#945;',
  '&beta;': '&#946;',
  '&gamma;': '&#947;',
  '&delta;': '&#948;',
  '&epsilon;': '&#949;',
  '&zeta;': '&#950;',
  '&eta;': '&#951;',
  '&theta;': '&#952;',
  '&iota;': '&#953;',
  '&kappa;': '&#954;',
  '&lambda;': '&#955;',
  '&mu;': '&#956;',
  '&nu;': '&#957;',
  '&xi;': '&#958;',
  '&omicron;': '&#959;',
  '&pi;': '&#960;',
  '&rho;': '&#961;',
  '&sigmaf;': '&#962;',
  '&sigma;': '&#963;',
  '&tau;': '&#964;',
  '&upsilon;': '&#965;',
  '&phi;': '&#966;',
  '&chi;': '&#967;',
  '&psi;': '&#968;',
  '&omega;': '&#969;',
  '&thetasym;': '&#977;',
  '&upsih;': '&#978;',
  '&piv;': '&#982;',
  '&ensp;': '&#8194;',
  '&emsp;': '&#8195;',
  '&thinsp;': '&#8201;',
  '&zwnj;': '&#8204;',
  '&zwj;': '&#8205;',
  '&lrm;': '&#8206;',
  '&rlm;': '&#8207;',
  '&ndash;': '&#8211;',
  '&mdash;': '&#8212;',
  '&lsquo;': '&#8216;',
  '&rsquo;': '&#8217;',
  '&sbquo;': '&#8218;',
  '&ldquo;': '&#8220;',
  '&rdquo;': '&#8221;',
  '&bdquo;': '&#8222;',
  '&dagger;': '&#8224;',
  '&Dagger;': '&#8225;',
  '&permil;': '&#8240;',
  '&lsaquo;': '&#8249;',
  '&rsaquo;': '&#8250;',
  '&bull;': '&#8226;',
  '&hellip;': '&#8230;',
  '&prime;': '&#8242;',
  '&Prime;': '&#8243;',
  '&oline;': '&#8254;',
  '&frasl;': '&#8260;',
  '&weierp;': '&#8472;',
  '&image;': '&#8465;',
  '&real;': '&#8476;',
  '&trade;': '&#8482;',
  '&alefsym;': '&#8501;',
  '&larr;': '&#8592;',
  '&uarr;': '&#8593;',
  '&rarr;': '&#8594;',
  '&darr;': '&#8595;',
  '&harr;': '&#8596;',
  '&crarr;': '&#8629;',
  '&lArr;': '&#8656;',
  '&uArr;': '&#8657;',
  '&rArr;': '&#8658;',
  '&dArr;': '&#8659;',
  '&hArr;': '&#8660;',
  '&forall;': '&#8704;',
  '&part;': '&#8706;',
  '&exist;': '&#8707;',
  '&empty;': '&#8709;',
  '&nabla;': '&#8711;',
  '&isin;': '&#8712;',
  '&notin;': '&#8713;',
  '&ni;': '&#8715;',
  '&prod;': '&#8719;',
  '&sum;': '&#8721;',
  '&lowast;': '&#8727;',
  '&radic;': '&#8730;',
  '&prop;': '&#8733;',
  '&infin;': '&#8734;',
  '&ang;': '&#8736;',
  '&and;': '&#8743;',
  '&or;': '&#8744;',
  '&cap;': '&#8745;',
  '&cup;': '&#8746;',
  '&int;': '&#8747;',
  '&there4;': '&#8756;',
  '&sim;': '&#8764;',
  '&cong;': '&#8773;',
  '&asymp;': '&#8776;',
  '&ne;': '&#8800;',
  '&equiv;': '&#8801;',
  '&le;': '&#8804;',
  '&ge;': '&#8805;',
  '&sub;': '&#8834;',
  '&sup;': '&#8835;',
  '&nsub;': '&#8836;',
  '&sube;': '&#8838;',
  '&supe;': '&#8839;',
  '&oplus;': '&#8853;',
  '&otimes;': '&#8855;',
  '&perp;': '&#8869;',
  '&sdot;': '&#8901;',
  '&lceil;': '&#8968;',
  '&rceil;': '&#8969;',
  '&lfloor;': '&#8970;',
  '&rfloor;': '&#8971;',
  '&lang;': '&#9001;',
  '&rang;': '&#9002;',
  '&loz;': '&#9674;',
  '&spades;': '&#9824;',
  '&clubs;': '&#9827;',
  '&hearts;': '&#9829;',
  '&diams;': '&#9830;'
} as const;

/**
 * Converts HTML entities to their unicode escape equivalents. For example, `&amp;` becomes `\u0026`.
 * @param htmlString - The HTML string to convert. The HTML is not being parsed, so it's possible
 * that a string may be replaced that is a part of another escape or included in a
 * JavaScript string. This could create a syntax errror when actually parsing the HTML. In
 * most cases this is likely fine since we're usually using this to sanitize HTML.
 * @returns The HTML string with HTML entities converted to unicode escapes.
 */
export const htmlEntitiesToUnicodeEscapes = (htmlString: string): string => {
  // Search for and replace HTML entities in any of the 3 formats:
  // 1. Named entities, like `&amp;`
  // 2. Decimal entities, like `&#38;`
  // 3. Hexadecimal entities, like `&#x26;`
  // and convert them to unicode escapes sequences, like `\u0026`.
  return htmlString.replace(
    /&(((#(\d)+)|(#x[a-zA-Z0-9]+)|([a-zA-Z])+);)/gi,
    (entity) => {
      /** The entity in its numeric representation, like `&#38`. */
      let numericEntity = entity;

      // Search for the entity in the list of named entities. If it's found, convert it to a
      // numeric entity. If it's not found, it's likely a numeric entity already, so we'll
      // just use that.
      for (const [key, value] of Object.entries(KNOWN_HTML_ENTITIES)) {
        // If the entity is named, like `&amp;`, convert it to a numeric entity like `&#38;`
        if (entity.toLocaleLowerCase() === key.toLocaleLowerCase()) {
          numericEntity = value;
        }
      }

      // If the entity starts with `&#x`, it's a hexadecimal number. We need to convert it
      // to a Unicode escape sequence and return it.
      if (numericEntity.startsWith('&#x')) {
        // Get the hexadecimal number, like `26` from `&#x26;` and return it as a Unicode
        // escape sequence.
        return `\\u${numericEntity
          .replace(/&#x?([a-zA-Z0-9]+);/gi, '$1')
          .padStart(4, '0')}`;
      }

      // Otherwise, it's a decimal number. We need to convert it to a Unicode escape sequence.
      /** The character code. Such as: `'&#38;'` is `38`. */
      const characterNumber = parseInt(
        numericEntity.replace(/&#x?(\d+);/gi, '$1'),
        10
      );

      // Check if the character code is a valid number. If not, something went wrong and we
      // should return the original entity. It's likely a "named" entity that we don't have
      // a mapping for. This shouldn't happen, but it's better to return the original entity
      // than to return an empty string. We don't throw here because it's likely not a
      // genuine exceptional issue.
      if (Number.isNaN(characterNumber)) {
        return entity;
      }

      // Finally, get the unicode escape sequence for the character like `\u0022`.
      return `\\u${characterNumber.toString(16).padStart(4, '0')}`;
    }
  );
};
