escape.js 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.escapeText = exports.escapeAttribute = exports.escapeUTF8 = exports.escape = exports.XML_BITSET_VALUE = exports.getCodePoint = void 0;
  4. exports.encodeXML = encodeXML;
  5. const xmlCodeMap = new Map([
  6. [34, """],
  7. [38, "&"],
  8. [39, "'"],
  9. [60, "<"],
  10. [62, ">"],
  11. ]);
  12. // For compatibility with node < 4, we wrap `codePointAt`
  13. exports.getCodePoint =
  14. // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
  15. String.prototype.codePointAt == null
  16. ? (c, index) => (c.charCodeAt(index) & 64512) === 55296
  17. ? (c.charCodeAt(index) - 55296) * 1024 +
  18. c.charCodeAt(index + 1) -
  19. 56320 +
  20. 65536
  21. : c.charCodeAt(index)
  22. : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  23. (input, index) => input.codePointAt(index);
  24. /**
  25. * Bitset for ASCII characters that need to be escaped in XML.
  26. */
  27. exports.XML_BITSET_VALUE = 1342177476; // 32..63 -> 34 ("),38 (&),39 ('),60 (<),62 (>)
  28. /**
  29. * Encodes all non-ASCII characters, as well as characters not valid in XML
  30. * documents using XML entities. Uses a fast bitset scan instead of RegExp.
  31. *
  32. * If a character has no equivalent entity, a numeric hexadecimal reference
  33. * (eg. `&#xfc;`) will be used.
  34. */
  35. function encodeXML(input) {
  36. let out;
  37. let last = 0;
  38. const { length } = input;
  39. for (let index = 0; index < length; index++) {
  40. const char = input.charCodeAt(index);
  41. // Check for ASCII chars that don't need escaping
  42. if (char < 0x80 &&
  43. (((exports.XML_BITSET_VALUE >>> char) & 1) === 0 || char >= 64 || char < 32)) {
  44. continue;
  45. }
  46. if (out === undefined)
  47. out = input.substring(0, index);
  48. else if (last !== index)
  49. out += input.substring(last, index);
  50. if (char < 64) {
  51. // Known replacement
  52. out += xmlCodeMap.get(char);
  53. last = index + 1;
  54. continue;
  55. }
  56. // Non-ASCII: encode as numeric entity (handle surrogate pair)
  57. const cp = (0, exports.getCodePoint)(input, index);
  58. out += `&#x${cp.toString(16)};`;
  59. if (cp !== char)
  60. index++; // Skip trailing surrogate
  61. last = index + 1;
  62. }
  63. if (out === undefined)
  64. return input;
  65. if (last < length)
  66. out += input.substr(last);
  67. return out;
  68. }
  69. /**
  70. * Encodes all non-ASCII characters, as well as characters not valid in XML
  71. * documents using numeric hexadecimal reference (eg. `&#xfc;`).
  72. *
  73. * Have a look at `escapeUTF8` if you want a more concise output at the expense
  74. * of reduced transportability.
  75. *
  76. * @param data String to escape.
  77. */
  78. exports.escape = encodeXML;
  79. /**
  80. * Creates a function that escapes all characters matched by the given regular
  81. * expression using the given map of characters to escape to their entities.
  82. *
  83. * @param regex Regular expression to match characters to escape.
  84. * @param map Map of characters to escape to their entities.
  85. *
  86. * @returns Function that escapes all characters matched by the given regular
  87. * expression using the given map of characters to escape to their entities.
  88. */
  89. function getEscaper(regex, map) {
  90. return function escape(data) {
  91. let match;
  92. let lastIndex = 0;
  93. let result = "";
  94. while ((match = regex.exec(data))) {
  95. if (lastIndex !== match.index) {
  96. result += data.substring(lastIndex, match.index);
  97. }
  98. // We know that this character will be in the map.
  99. result += map.get(match[0].charCodeAt(0));
  100. // Every match will be of length 1
  101. lastIndex = match.index + 1;
  102. }
  103. return result + data.substring(lastIndex);
  104. };
  105. }
  106. /**
  107. * Encodes all characters not valid in XML documents using XML entities.
  108. *
  109. * Note that the output will be character-set dependent.
  110. *
  111. * @param data String to escape.
  112. */
  113. exports.escapeUTF8 = getEscaper(/["&'<>]/g, xmlCodeMap);
  114. /**
  115. * Encodes all characters that have to be escaped in HTML attributes,
  116. * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
  117. *
  118. * @param data String to escape.
  119. */
  120. exports.escapeAttribute =
  121. /* #__PURE__ */ getEscaper(/["&\u00A0]/g, new Map([
  122. [34, "&quot;"],
  123. [38, "&amp;"],
  124. [160, "&nbsp;"],
  125. ]));
  126. /**
  127. * Encodes all characters that have to be escaped in HTML text,
  128. * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
  129. *
  130. * @param data String to escape.
  131. */
  132. exports.escapeText = getEscaper(/[&<>\u00A0]/g, new Map([
  133. [38, "&amp;"],
  134. [60, "&lt;"],
  135. [62, "&gt;"],
  136. [160, "&nbsp;"],
  137. ]));
  138. //# sourceMappingURL=escape.js.map