char-source.js 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. /**
  2. * @fileoverview Utility functions to locate the source text of each code unit in the value of a string literal or template token.
  3. * @author Francesco Trotta
  4. */
  5. "use strict";
  6. /**
  7. * Represents a code unit produced by the evaluation of a JavaScript common token like a string
  8. * literal or template token.
  9. */
  10. class CodeUnit {
  11. constructor(start, source) {
  12. this.start = start;
  13. this.source = source;
  14. }
  15. get end() {
  16. return this.start + this.length;
  17. }
  18. get length() {
  19. return this.source.length;
  20. }
  21. }
  22. /**
  23. * An object used to keep track of the position in a source text where the next characters will be read.
  24. */
  25. class TextReader {
  26. constructor(source) {
  27. this.source = source;
  28. this.pos = 0;
  29. }
  30. /**
  31. * Advances the reading position of the specified number of characters.
  32. * @param {number} length Number of characters to advance.
  33. * @returns {void}
  34. */
  35. advance(length) {
  36. this.pos += length;
  37. }
  38. /**
  39. * Reads characters from the source.
  40. * @param {number} [offset=0] The offset where reading starts, relative to the current position.
  41. * @param {number} [length=1] Number of characters to read.
  42. * @returns {string} A substring of source characters.
  43. */
  44. read(offset = 0, length = 1) {
  45. const start = offset + this.pos;
  46. return this.source.slice(start, start + length);
  47. }
  48. }
  49. const SIMPLE_ESCAPE_SEQUENCES = {
  50. __proto__: null,
  51. b: "\b",
  52. f: "\f",
  53. n: "\n",
  54. r: "\r",
  55. t: "\t",
  56. v: "\v",
  57. };
  58. /**
  59. * Reads a hex escape sequence.
  60. * @param {TextReader} reader The reader should be positioned on the first hexadecimal digit.
  61. * @param {number} length The number of hexadecimal digits.
  62. * @returns {string} A code unit.
  63. */
  64. function readHexSequence(reader, length) {
  65. const str = reader.read(0, length);
  66. const charCode = parseInt(str, 16);
  67. reader.advance(length);
  68. return String.fromCharCode(charCode);
  69. }
  70. /**
  71. * Reads a Unicode escape sequence.
  72. * @param {TextReader} reader The reader should be positioned after the "u".
  73. * @returns {string} A code unit.
  74. */
  75. function readUnicodeSequence(reader) {
  76. const regExp = /\{(?<hexDigits>[\dA-F]+)\}/iuy;
  77. regExp.lastIndex = reader.pos;
  78. const match = regExp.exec(reader.source);
  79. if (match) {
  80. const codePoint = parseInt(match.groups.hexDigits, 16);
  81. reader.pos = regExp.lastIndex;
  82. return String.fromCodePoint(codePoint);
  83. }
  84. return readHexSequence(reader, 4);
  85. }
  86. /**
  87. * Reads an octal escape sequence.
  88. * @param {TextReader} reader The reader should be positioned after the first octal digit.
  89. * @param {number} maxLength The maximum number of octal digits.
  90. * @returns {string} A code unit.
  91. */
  92. function readOctalSequence(reader, maxLength) {
  93. const [octalStr] = reader.read(-1, maxLength).match(/^[0-7]+/u);
  94. reader.advance(octalStr.length - 1);
  95. const octal = parseInt(octalStr, 8);
  96. return String.fromCharCode(octal);
  97. }
  98. /**
  99. * Reads an escape sequence or line continuation.
  100. * @param {TextReader} reader The reader should be positioned on the backslash.
  101. * @returns {string} A string of zero, one or two code units.
  102. */
  103. function readEscapeSequenceOrLineContinuation(reader) {
  104. const char = reader.read(1);
  105. reader.advance(2);
  106. const unitChar = SIMPLE_ESCAPE_SEQUENCES[char];
  107. if (unitChar) {
  108. return unitChar;
  109. }
  110. switch (char) {
  111. case "x":
  112. return readHexSequence(reader, 2);
  113. case "u":
  114. return readUnicodeSequence(reader);
  115. case "\r":
  116. if (reader.read() === "\n") {
  117. reader.advance(1);
  118. }
  119. // fallthrough
  120. case "\n":
  121. case "\u2028":
  122. case "\u2029":
  123. return "";
  124. case "0":
  125. case "1":
  126. case "2":
  127. case "3":
  128. return readOctalSequence(reader, 3);
  129. case "4":
  130. case "5":
  131. case "6":
  132. case "7":
  133. return readOctalSequence(reader, 2);
  134. default:
  135. return char;
  136. }
  137. }
  138. /**
  139. * Reads an escape sequence or line continuation and generates the respective `CodeUnit` elements.
  140. * @param {TextReader} reader The reader should be positioned on the backslash.
  141. * @returns {Generator<CodeUnit>} Zero, one or two `CodeUnit` elements.
  142. */
  143. function* mapEscapeSequenceOrLineContinuation(reader) {
  144. const start = reader.pos;
  145. const str = readEscapeSequenceOrLineContinuation(reader);
  146. const end = reader.pos;
  147. const source = reader.source.slice(start, end);
  148. switch (str.length) {
  149. case 0:
  150. break;
  151. case 1:
  152. yield new CodeUnit(start, source);
  153. break;
  154. default:
  155. yield new CodeUnit(start, source);
  156. yield new CodeUnit(start, source);
  157. break;
  158. }
  159. }
  160. /**
  161. * Parses a string literal.
  162. * @param {string} source The string literal to parse, including the delimiting quotes.
  163. * @returns {CodeUnit[]} A list of code units produced by the string literal.
  164. */
  165. function parseStringLiteral(source) {
  166. const reader = new TextReader(source);
  167. const quote = reader.read();
  168. reader.advance(1);
  169. const codeUnits = [];
  170. for (;;) {
  171. const char = reader.read();
  172. if (char === quote) {
  173. break;
  174. }
  175. if (char === "\\") {
  176. codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
  177. } else {
  178. codeUnits.push(new CodeUnit(reader.pos, char));
  179. reader.advance(1);
  180. }
  181. }
  182. return codeUnits;
  183. }
  184. /**
  185. * Parses a template token.
  186. * @param {string} source The template token to parse, including the delimiting sequences `` ` ``, `${` and `}`.
  187. * @returns {CodeUnit[]} A list of code units produced by the template token.
  188. */
  189. function parseTemplateToken(source) {
  190. const reader = new TextReader(source);
  191. reader.advance(1);
  192. const codeUnits = [];
  193. for (;;) {
  194. const char = reader.read();
  195. if (char === "`" || (char === "$" && reader.read(1) === "{")) {
  196. break;
  197. }
  198. if (char === "\\") {
  199. codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
  200. } else {
  201. let unitSource;
  202. if (char === "\r" && reader.read(1) === "\n") {
  203. unitSource = "\r\n";
  204. } else {
  205. unitSource = char;
  206. }
  207. codeUnits.push(new CodeUnit(reader.pos, unitSource));
  208. reader.advance(unitSource.length);
  209. }
  210. }
  211. return codeUnits;
  212. }
  213. module.exports = { parseStringLiteral, parseTemplateToken };