diff --git a/package.json b/package.json index b4d8bb7..a179025 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "watch": "warun \"{src,test}/**/*.mjs\" -- npm run -s test:mocha" }, "dependencies": { + "@eslint-community/regexpp": "^4.9.1", "eslint-visitor-keys": "^3.4.3" }, "devDependencies": { diff --git a/src/get-static-value.mjs b/src/get-static-value.mjs index 074f298..68305d2 100644 --- a/src/get-static-value.mjs +++ b/src/get-static-value.mjs @@ -1,6 +1,7 @@ /* globals globalThis, global, self, window */ import { findVariable } from "./find-variable.mjs" +import { isSafeRegex } from "./safe-regex.mjs" const globalObject = typeof globalThis !== "undefined" @@ -13,6 +14,8 @@ const globalObject = ? global : {} +class DangerousCallError extends Error {} + const builtinNames = Object.freeze( new Set([ "Array", @@ -169,6 +172,14 @@ const callPassThrough = new Set([ Object.preventExtensions, Object.seal, ]) +/** @type {ReadonlyMap>} */ +const callReplacement = new Map([ + checkArgs(String.prototype.match, checkSafeSearchValue), + checkArgs(String.prototype.matchAll, checkSafeSearchValue), + checkArgs(String.prototype.replace, checkSafeSearchValue), + checkArgs(String.prototype.replaceAll, checkSafeSearchValue), + checkArgs(String.prototype.split, checkSafeSearchValue), +]) /** @type {ReadonlyArray]>} */ const getterAllowed = [ @@ -190,6 +201,47 @@ const getterAllowed = [ [Set, new Set(["size"])], ] +/** + * @typedef {(thisArg: T, args: unknown[], original: (this: T, ...args: unknown[]) => R) => R} ReplaceFn + * @template T + * @template R + */ + +/** + * A helper function that creates an entry for the given function. + * @param {T} fn + * @param {(args: unknown[]) => void} checkFn + * @returns {[T, ReplaceFn>]} + * @template {Function} T + */ +function checkArgs(fn, checkFn) { + return [ + fn, + (thisArg, args) => { + checkFn(args) + return fn.apply(thisArg, args) + }, + ] +} + +/** + * Checks that the first argument is either a string or a safe regex. + * @param {unknown[]} args + */ +function checkSafeSearchValue(args) { + const searchValue = args[0] + if (typeof searchValue === "string") { + // strings are always safe search values + return + } + if (searchValue instanceof RegExp && isSafeRegex(searchValue)) { + // we verified that the regex is safe + return + } + // we were unable to verify that the search value is safe, + throw new DangerousCallError() +} + /** * Get the property descriptor. * @param {object} object The object to get. @@ -249,6 +301,34 @@ function getElementValues(nodeList, initialScope) { return valueList } +/** + * Calls the given function if it is one of the allowed functions. + * @param {Function} func The function to call. + * @param {unknown} thisArg The `this` arg of the function. Use `undefined` when calling a free function. + * @param {unknown[]} args + */ +function callFunction(func, thisArg, args) { + if (callAllowed.has(func)) { + return { value: func.apply(thisArg, args) } + } + if (callPassThrough.has(func)) { + return { value: args[0] } + } + + const replacement = callReplacement.get(func) + if (replacement) { + try { + return { value: replacement(thisArg, args, func) } + } catch (error) { + if (!(error instanceof DangerousCallError)) { + throw error + } + } + } + + return null +} + /** * Returns whether the given variable is never written to after initialization. * @param {import("eslint").Scope.Variable} variable @@ -363,12 +443,11 @@ const operations = Object.freeze({ if (property != null) { const receiver = object.value const methodName = property.value - if (callAllowed.has(receiver[methodName])) { - return { value: receiver[methodName](...args) } - } - if (callPassThrough.has(receiver[methodName])) { - return { value: args[0] } - } + return callFunction( + receiver[methodName], + receiver, + args, + ) } } } else { @@ -378,12 +457,7 @@ const operations = Object.freeze({ return { value: undefined, optional: true } } const func = callee.value - if (callAllowed.has(func)) { - return { value: func(...args) } - } - if (callPassThrough.has(func)) { - return { value: args[0] } - } + return callFunction(func, undefined, args) } } } diff --git a/src/safe-regex.mjs b/src/safe-regex.mjs new file mode 100644 index 0000000..2ab66d5 --- /dev/null +++ b/src/safe-regex.mjs @@ -0,0 +1,243 @@ +import { RegExpParser } from "@eslint-community/regexpp" + +/** + * Returns whether the given regex will execute in O(n) (with a decently small + * constant factor) on any string. This is a conservative check. If the check + * returns `true`, then the regex is guaranteed to be safe. + * @param {RegExp | string} regex + * @returns {boolean} + */ +export function isSafeRegex(regex) { + try { + const parser = new RegExpParser() + const ast = parser.parseLiteral(regex.toString()) + const paths = maxPossiblePaths(ast.pattern, "ltr") + return paths < 100 + } catch { + // can't parse regex, or there are some elements we don't support + return false + } +} + +/** + * @typedef {import("@eslint-community/regexpp").AST} AST + */ + +/** + * Returns the maximum number of possible paths through a given regex node. + * @param {import("@eslint-community/regexpp/ast").Element + * | import("@eslint-community/regexpp/ast").Alternative + * | import("@eslint-community/regexpp/ast").Pattern + * } n + * @param {"ltr" | "rtl"} direction The matching direction. + * @returns {number} + */ +// eslint-disable-next-line complexity +export function maxPossiblePaths(n, direction) { + switch (n.type) { + case "Alternative": { + let elements = n.elements + if (direction === "rtl") { + elements = [...elements].reverse() + } + let paths = 1 + for (const e of elements) { + paths *= maxPossiblePaths(e, direction) + if (paths === 0 || paths === Infinity) { + return paths + } + } + return paths + } + + case "Assertion": { + if (n.kind === "lookahead" || n.kind === "lookbehind") { + const d = n.kind === "lookahead" ? "ltr" : "rtl" + let paths = 0 + for (const e of n.alternatives) { + paths += maxPossiblePaths(e, d) + } + return paths + } + // built-in assertions are always constant + return 1 + } + + case "Backreference": + return 1 + + case "Character": + case "CharacterSet": + case "CharacterClass": + case "ExpressionCharacterClass": + return getStringsInCharacters(n) + (hasNoCharacters(n) ? 0 : 1) + + case "Quantifier": { + if (n.max === 0) { + return 1 + } + const inner = maxPossiblePaths(n.element, direction) + if (inner === 0) { + return n.min === 0 ? 1 : 0 + } + if (n.max === Infinity) { + return Infinity + } + if (inner === Infinity) { + return inner + } + const constant = inner ** n.min + if (n.min === n.max) { + return constant + } + // The {n,m} case (n!=m) is bit harder. + // Example: (a|b){2,4} is equivalent to (a|b){2}(a|b){0,2} + // To get the maximum possible paths of any x{0,p}, we first note + // that this is the same as x{0}|x|xx|xxx|...|x{p}. So the max + // paths of x{0,p} is the sum of the max paths of x{0}, x{1}, ..., x{p}. + // Let y=maxPossiblePaths(x). Then maxPossiblePaths(x{0,p}) = + // = 1 + y + y^2 + y^3 + ... y^p + // = ceil(y*(p+1)/(y-1))-1 (if y>=2) + // = p+1 (if y=1) + // = 1 (if y=0) + const p = n.max - n.min + let e + if (inner < 2) { + e = p * inner + 1 + } else { + e = Math.ceil(inner ** (p + 1) / (inner - 1)) - 1 + } + return constant * e + } + + case "CapturingGroup": + case "Group": + case "Pattern": { + let paths = 0 + for (const e of n.alternatives) { + paths += maxPossiblePaths(e, direction) + if (paths === Infinity) { + return paths + } + } + return paths + } + + default: + return assertNever(n) + } +} + +/** + * Returns the worst-case (=maximum) number of string (length!=1) elements in the given character element. + * @param {import("@eslint-community/regexpp/ast").CharacterClassElement + * | import("@eslint-community/regexpp/ast").ExpressionCharacterClass["expression"] + * | import("@eslint-community/regexpp/ast").CharacterSet + * | import("@eslint-community/regexpp/ast").CharacterClass + * } n + * @returns {number} + * + * @typedef {import("@eslint-community/regexpp").AST} AST + */ +function getStringsInCharacters(n) { + switch (n.type) { + case "Character": + case "CharacterClassRange": + return 0 + + case "CharacterSet": + // since we can't know how many strings the set contains, we + // just assume 1000 + return n.kind === "property" && n.strings ? 1000 : 0 + + case "ClassStringDisjunction": + return n.alternatives.filter((a) => a.elements.length !== 1).length + + case "CharacterClass": + if (n.negate || !n.unicodeSets) { + return 0 + } + return n.elements.reduce((a, b) => a + getStringsInCharacters(b), 0) + + case "ExpressionCharacterClass": + if (n.negate) { + return 0 + } + return getStringsInCharacters(n.expression) + + case "ClassIntersection": + return Math.min( + getStringsInCharacters(n.left), + getStringsInCharacters(n.right), + ) + case "ClassSubtraction": + return getStringsInCharacters(n.left) + + default: + return assertNever(n) + } +} + +/** + * Returns `true` if the given elements does not contain any single-character + * elements. If `false` is returned, then the given element might still contain + * single-character elements. + * @param {import("@eslint-community/regexpp/ast").CharacterClassElement + * | import("@eslint-community/regexpp/ast").ExpressionCharacterClass["expression"] + * | import("@eslint-community/regexpp/ast").CharacterSet + * | import("@eslint-community/regexpp/ast").CharacterClass + * } n + * @returns {boolean} + * + * @typedef {import("@eslint-community/regexpp").AST} AST + */ +function hasNoCharacters(n) { + switch (n.type) { + case "Character": + case "CharacterClassRange": + return false + + case "CharacterSet": + // while not exactly true, we'll just assume that character sets + // always contain at least one character + return false + + case "ClassStringDisjunction": + return n.alternatives.every((a) => a.elements.length !== 1) + + case "CharacterClass": + if (n.negate) { + // since we can't know whether the elements contains all + // characters, we have have to assume that [^not all] will + // contains at least some + return false + } + return n.elements.every(hasNoCharacters) + + case "ExpressionCharacterClass": + if (n.negate) { + // since we can't know whether the expression contains all + // characters, we have have to assume that [^not all] will + // contains at least some + return false + } + return hasNoCharacters(n.expression) + + case "ClassIntersection": + return hasNoCharacters(n.left) || hasNoCharacters(n.right) + case "ClassSubtraction": + return hasNoCharacters(n.left) + + default: + return assertNever(n) + } +} + +/** + * A function that should never be called. + * @param {never} value + * @returns {never} + */ +function assertNever(value) { + throw new Error(`Unexpected value: ${value}`) +} diff --git a/test/get-static-value.mjs b/test/get-static-value.mjs index b898f32..393f437 100644 --- a/test/get-static-value.mjs +++ b/test/get-static-value.mjs @@ -189,6 +189,31 @@ describe("The 'getStaticValue' function", () => { { code: "' foo '.charAt(4)", expected: { value: "o" } }, { code: "' foo '.charCodeAt(400)", expected: { value: NaN } }, { code: "' foo '.repeat(1e12)", expected: null }, + { code: "'abcdef'.replace('a', 'x')", expected: { value: "xbcdef" } }, + { code: "'abcdef'.replace(/a/, 'x')", expected: { value: "xbcdef" } }, + { code: "'abcdef'.replace(/a+/, 'x')", expected: null }, + { + code: "'abcdef'.match('a')", + expected: { value: "abcdef".match("a") }, + }, + { + code: "'abcdef'.match(/a/gu)", + expected: { value: "abcdef".match(/a/gu) }, + }, + { code: "'abcdef'.match('[')", expected: null }, + { code: "'abcdef'.match(/a+/g)", expected: null }, + { code: "'abab'.split('a')", expected: { value: ["", "b", "b"] } }, + { code: "'abab'.split(/a/)", expected: { value: ["", "b", "b"] } }, + { + code: "'abab'.split(/(?:a|a)/)", + expected: { value: ["", "b", "b"] }, + }, + { code: "'abab'.split(/a+/)", expected: null }, + { code: "'abab'.split(/(?:a|a)+/)", expected: null }, + { + code: "'abab'.split(/(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)b/)", + expected: null, + }, { code: "-1", expected: { value: -1 } }, { code: "+'1'", expected: { value: 1 } }, { code: "!0", expected: { value: true } }, diff --git a/test/safe-regex.mjs b/test/safe-regex.mjs new file mode 100644 index 0000000..2154c37 --- /dev/null +++ b/test/safe-regex.mjs @@ -0,0 +1,93 @@ +import { RegExpParser } from "@eslint-community/regexpp" +import assert from "assert" +import { isSafeRegex, maxPossiblePaths } from "../src/safe-regex.mjs" + +describe("isSafeRegex", () => { + const maxPaths = { + [String.raw`/[]/`]: 0, + [String.raw`/[]+/`]: 0, + [String.raw`/[]a+/`]: 0, + [String.raw`/(?<=cb+[])a/`]: 0, + [String.raw`/[\w&&\q{foo|bar}]/v`]: 0, + + [String.raw`/a/`]: 1, + [String.raw`/[a]/`]: 1, + [String.raw`/foobar/`]: 1, + [String.raw`/\bfoobar\b/`]: 1, + [String.raw`/^foobar$/`]: 1, + [String.raw`/^foobar$/u`]: 1, + [String.raw`/^foobar$/v`]: 1, + [String.raw`/\p{ASCII}/v`]: 1, + [String.raw`/[abcA-Z\d\w\p{ASCII}]/`]: 1, + [String.raw`/[abcA-Z\d\w\p{ASCII}]/u`]: 1, + [String.raw`/[abcA-Z\d\w\p{ASCII}]/v`]: 1, + [String.raw`/[abcA-Z\d\w\p{ASCII}\q{f|g|h}]/v`]: 1, + [String.raw`/[^abcA-Z\d\w\p{ASCII}\q{f|g|h}]/v`]: 1, + [String.raw`/a{100}/v`]: 1, + [String.raw`/[]*/v`]: 1, + [String.raw`/[]?/v`]: 1, + [String.raw`/[]{0,100}/v`]: 1, + [String.raw`/(?:a*a*a*a*){0}/`]: 1, + [String.raw`/(a)b\1/v`]: 1, + [String.raw`/a(?!foo)/`]: 1, + [String.raw`/[^[a-b]&&\w]/v`]: 1, + [String.raw`/[\w&&\d]/v`]: 1, + [String.raw`/[^\p{ASCII}--\w]/v`]: 1, + [String.raw`/[\w&&[\d\q{foo|bar}]]/v`]: 1, + + [String.raw`/a|b/`]: 2, + [String.raw`/a|a/`]: 2, + [String.raw`/a?/`]: 2, + [String.raw`/a??/`]: 2, + [String.raw`/[\q{foo|bar}]/v`]: 2, + [String.raw`/[\q{foo|}]/v`]: 2, + [String.raw`/[\q{foo}\w]/v`]: 2, + [String.raw`/[\q{}\w]/v`]: 2, + [String.raw`/(a|b)c\1/v`]: 2, + [String.raw`/[[\p{ASCII}\q{foo}]--\w]/v`]: 2, + + [String.raw`/a{2,4}/v`]: 3, + [String.raw`/(a|b){2,4}/v`]: 28, + [String.raw`/(a|b|c){2,4}/v`]: 117, + [String.raw`/(a|b|c)(a|b|c)((a|b|c)((a|b|c)|)|)/v`]: 117, + + [String.raw`/(a|b){10}/v`]: 2 ** 10, + [String.raw`/(a|b|c|d|e){10}/v`]: 5 ** 10, + + [String.raw`/^\p{RGI_Emoji}$/v`]: 1001, + + [String.raw`/(a+)b\1/`]: Infinity, + [String.raw`/(?:a|a)+b/`]: Infinity, + [String.raw`/b+$/`]: Infinity, + [String.raw`/b+[]/`]: Infinity, + [String.raw`/b+$|foo/`]: Infinity, + [String.raw`/foo|b+$/`]: Infinity, + [String.raw`/(?:a+){3}/`]: Infinity, + [String.raw`/(a|b|c|d|e){1000}/v`]: Infinity, + } + + it("should be false for invalid regexes", () => { + const actual = isSafeRegex("/foo[a-/u") + assert.deepStrictEqual(actual, false) + }) + + // it("should be true for safe regexes", () => { + // for (const [regex, paths] of Object.entries(maxPaths)) { + // if (paths < 100) { + // const actual = isSafeRegex(regex) + // assert.deepStrictEqual(actual, true) + // } + // } + // }) + + describe("maxPaths", () => { + for (const [regex, paths] of Object.entries(maxPaths)) { + it(regex, () => { + const parser = new RegExpParser() + const ast = parser.parseLiteral(regex.toString()) + const actual = maxPossiblePaths(ast.pattern, "ltr") + assert.deepStrictEqual(actual, paths) + }) + } + }) +})