escape.js 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. const xmlCodeMap = new Map([
  2. [34, """],
  3. [38, "&"],
  4. [39, "'"],
  5. [60, "<"],
  6. [62, ">"],
  7. ]);
  8. // For compatibility with node < 4, we wrap `codePointAt`
  9. export const getCodePoint =
  10. // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
  11. String.prototype.codePointAt == null
  12. ? (c, index) => (c.charCodeAt(index) & 64512) === 55296
  13. ? (c.charCodeAt(index) - 55296) * 1024 +
  14. c.charCodeAt(index + 1) -
  15. 56320 +
  16. 65536
  17. : c.charCodeAt(index)
  18. : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  19. (input, index) => input.codePointAt(index);
  20. /**
  21. * Bitset for ASCII characters that need to be escaped in XML.
  22. */
  23. export const XML_BITSET_VALUE = 1342177476; // 32..63 -> 34 ("),38 (&),39 ('),60 (<),62 (>)
  24. /**
  25. * Encodes all non-ASCII characters, as well as characters not valid in XML
  26. * documents using XML entities. Uses a fast bitset scan instead of RegExp.
  27. *
  28. * If a character has no equivalent entity, a numeric hexadecimal reference
  29. * (eg. `&#xfc;`) will be used.
  30. */
  31. export function encodeXML(input) {
  32. let out;
  33. let last = 0;
  34. const { length } = input;
  35. for (let index = 0; index < length; index++) {
  36. const char = input.charCodeAt(index);
  37. // Check for ASCII chars that don't need escaping
  38. if (char < 0x80 &&
  39. (((XML_BITSET_VALUE >>> char) & 1) === 0 || char >= 64 || char < 32)) {
  40. continue;
  41. }
  42. if (out === undefined)
  43. out = input.substring(0, index);
  44. else if (last !== index)
  45. out += input.substring(last, index);
  46. if (char < 64) {
  47. // Known replacement
  48. out += xmlCodeMap.get(char);
  49. last = index + 1;
  50. continue;
  51. }
  52. // Non-ASCII: encode as numeric entity (handle surrogate pair)
  53. const cp = getCodePoint(input, index);
  54. out += `&#x${cp.toString(16)};`;
  55. if (cp !== char)
  56. index++; // Skip trailing surrogate
  57. last = index + 1;
  58. }
  59. if (out === undefined)
  60. return input;
  61. if (last < length)
  62. out += input.substr(last);
  63. return out;
  64. }
  65. /**
  66. * Encodes all non-ASCII characters, as well as characters not valid in XML
  67. * documents using numeric hexadecimal reference (eg. `&#xfc;`).
  68. *
  69. * Have a look at `escapeUTF8` if you want a more concise output at the expense
  70. * of reduced transportability.
  71. *
  72. * @param data String to escape.
  73. */
  74. export const escape = encodeXML;
  75. /**
  76. * Creates a function that escapes all characters matched by the given regular
  77. * expression using the given map of characters to escape to their entities.
  78. *
  79. * @param regex Regular expression to match characters to escape.
  80. * @param map Map of characters to escape to their entities.
  81. *
  82. * @returns Function that escapes all characters matched by the given regular
  83. * expression using the given map of characters to escape to their entities.
  84. */
  85. function getEscaper(regex, map) {
  86. return function escape(data) {
  87. let match;
  88. let lastIndex = 0;
  89. let result = "";
  90. while ((match = regex.exec(data))) {
  91. if (lastIndex !== match.index) {
  92. result += data.substring(lastIndex, match.index);
  93. }
  94. // We know that this character will be in the map.
  95. result += map.get(match[0].charCodeAt(0));
  96. // Every match will be of length 1
  97. lastIndex = match.index + 1;
  98. }
  99. return result + data.substring(lastIndex);
  100. };
  101. }
  102. /**
  103. * Encodes all characters not valid in XML documents using XML entities.
  104. *
  105. * Note that the output will be character-set dependent.
  106. *
  107. * @param data String to escape.
  108. */
  109. export const escapeUTF8 = /* #__PURE__ */ getEscaper(/["&'<>]/g, xmlCodeMap);
  110. /**
  111. * Encodes all characters that have to be escaped in HTML attributes,
  112. * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
  113. *
  114. * @param data String to escape.
  115. */
  116. export const escapeAttribute =
  117. /* #__PURE__ */ getEscaper(/["&\u00A0]/g, new Map([
  118. [34, "&quot;"],
  119. [38, "&amp;"],
  120. [160, "&nbsp;"],
  121. ]));
  122. /**
  123. * Encodes all characters that have to be escaped in HTML text,
  124. * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
  125. *
  126. * @param data String to escape.
  127. */
  128. export const escapeText = /* #__PURE__ */ getEscaper(/[&<>\u00A0]/g, new Map([
  129. [38, "&amp;"],
  130. [60, "&lt;"],
  131. [62, "&gt;"],
  132. [160, "&nbsp;"],
  133. ]));
  134. //# sourceMappingURL=escape.js.map