no-control-regex.js 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. /**
  2. * @fileoverview Rule to forbid control characters from regular expressions.
  3. * @author Nicholas C. Zakas
  4. */
  5. "use strict";
  6. const RegExpValidator = require("@eslint-community/regexpp").RegExpValidator;
  7. const collector = new (class {
  8. constructor() {
  9. this._source = "";
  10. this._controlChars = [];
  11. this._validator = new RegExpValidator(this);
  12. }
  13. onPatternEnter() {
  14. /*
  15. * `RegExpValidator` may parse the pattern twice in one `validatePattern`.
  16. * So `this._controlChars` should be cleared here as well.
  17. *
  18. * For example, the `/(?<a>\x1f)/` regex will parse the pattern twice.
  19. * This is based on the content described in Annex B.
  20. * If the regex contains a `GroupName` and the `u` flag is not used, `ParseText` will be called twice.
  21. * See https://tc39.es/ecma262/2023/multipage/additional-ecmascript-features-for-web-browsers.html#sec-parsepattern-annexb
  22. */
  23. this._controlChars = [];
  24. }
  25. onCharacter(start, end, cp) {
  26. if (
  27. cp >= 0x00 &&
  28. cp <= 0x1f &&
  29. (this._source.codePointAt(start) === cp ||
  30. this._source.slice(start, end).startsWith("\\x") ||
  31. this._source.slice(start, end).startsWith("\\u"))
  32. ) {
  33. this._controlChars.push(`\\x${`0${cp.toString(16)}`.slice(-2)}`);
  34. }
  35. }
  36. collectControlChars(regexpStr, flags) {
  37. const uFlag = typeof flags === "string" && flags.includes("u");
  38. const vFlag = typeof flags === "string" && flags.includes("v");
  39. this._controlChars = [];
  40. this._source = regexpStr;
  41. try {
  42. this._validator.validatePattern(regexpStr, void 0, void 0, {
  43. unicode: uFlag,
  44. unicodeSets: vFlag,
  45. }); // Call onCharacter hook
  46. } catch {
  47. // Ignore syntax errors in RegExp.
  48. }
  49. return this._controlChars;
  50. }
  51. })();
  52. //------------------------------------------------------------------------------
  53. // Rule Definition
  54. //------------------------------------------------------------------------------
  55. /** @type {import('../types').Rule.RuleModule} */
  56. module.exports = {
  57. meta: {
  58. type: "problem",
  59. docs: {
  60. description: "Disallow control characters in regular expressions",
  61. recommended: true,
  62. url: "https://eslint.org/docs/latest/rules/no-control-regex",
  63. },
  64. schema: [],
  65. messages: {
  66. unexpected:
  67. "Unexpected control character(s) in regular expression: {{controlChars}}.",
  68. },
  69. },
  70. create(context) {
  71. /**
  72. * Get the regex expression
  73. * @param {ASTNode} node `Literal` node to evaluate
  74. * @returns {{ pattern: string, flags: string | null } | null} Regex if found (the given node is either a regex literal
  75. * or a string literal that is the pattern argument of a RegExp constructor call). Otherwise `null`. If flags cannot be determined,
  76. * the `flags` property will be `null`.
  77. * @private
  78. */
  79. function getRegExp(node) {
  80. if (node.regex) {
  81. return node.regex;
  82. }
  83. if (
  84. typeof node.value === "string" &&
  85. (node.parent.type === "NewExpression" ||
  86. node.parent.type === "CallExpression") &&
  87. node.parent.callee.type === "Identifier" &&
  88. node.parent.callee.name === "RegExp" &&
  89. node.parent.arguments[0] === node
  90. ) {
  91. const pattern = node.value;
  92. const flags =
  93. node.parent.arguments.length > 1 &&
  94. node.parent.arguments[1].type === "Literal" &&
  95. typeof node.parent.arguments[1].value === "string"
  96. ? node.parent.arguments[1].value
  97. : null;
  98. return { pattern, flags };
  99. }
  100. return null;
  101. }
  102. return {
  103. Literal(node) {
  104. const regExp = getRegExp(node);
  105. if (regExp) {
  106. const { pattern, flags } = regExp;
  107. const controlCharacters = collector.collectControlChars(
  108. pattern,
  109. flags,
  110. );
  111. if (controlCharacters.length > 0) {
  112. context.report({
  113. node,
  114. messageId: "unexpected",
  115. data: {
  116. controlChars: controlCharacters.join(", "),
  117. },
  118. });
  119. }
  120. }
  121. },
  122. };
  123. },
  124. };