From 57982c59914af1f270629cf95adb45068774b3e2 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Sun, 19 Mar 2023 23:27:59 +0100 Subject: [PATCH 1/5] feat(getStaticValue): string regex functions --- src/get-static-value.mjs | 131 ++++++++++++++++++++++++++++++++++---- test/get-static-value.mjs | 15 +++++ 2 files changed, 134 insertions(+), 12 deletions(-) diff --git a/src/get-static-value.mjs b/src/get-static-value.mjs index 46616f2..5262184 100644 --- a/src/get-static-value.mjs +++ b/src/get-static-value.mjs @@ -13,6 +13,8 @@ const globalObject = ? global : {} +class DangerousCallError extends Error {} + const builtinNames = Object.freeze( new Set([ "Array", @@ -169,6 +171,14 @@ const callPassThrough = new Set([ Object.preventExtensions, Object.seal, ]) +/** @type {ReadonlyMap>} */ +const callReplacement = new Map([ + checkArgs(String.prototype.match, checkSafeSearchValue), + checkArgs(String.prototype.matchAll, checkSafeSearchValue), + checkArgs(String.prototype.replace, checkSafeSearchValue), + checkArgs(String.prototype.replaceAll, checkSafeSearchValue), + checkArgs(String.prototype.split, checkSafeSearchValue), +]) /** @type {ReadonlyArray]>} */ const getterAllowed = [ @@ -188,6 +198,81 @@ const getterAllowed = [ ], ] +/** + * @typedef {(thisArg: T, args: unknown[], original: (this: T, ...args: unknown[]) => R) => R} ReplaceFn + * @template T + * @template R + */ + +/** + * A helper function that creates an entry for the given function. + * @param {T} fn + * @param {(args: unknown[]) => void} checkFn + * @returns {[T, ReplaceFn>]} + * @template {Function} T + */ +function checkArgs(fn, checkFn) { + return [ + fn, + (thisArg, args) => { + checkFn(args) + return fn.apply(thisArg, args) + }, + ] +} + +/** + * Checks that the first argument is either a string or a safe regex. + * @param {unknown[]} args + */ +function checkSafeSearchValue(args) { + const searchValue = args[0] + if (typeof searchValue === "string") { + // strings are always safe search values + return + } + if (searchValue instanceof RegExp && isSafeRegex(searchValue)) { + // we verified that the regex is safe + return + } + // we were unable to verify that the search value is safe, + throw new DangerousCallError() +} + +/** + * Returns whether the given regex will execute in O(n) (with a decently small + * constant factor) on any string. + * @param {RegExp} regex + * @returns {boolean} + */ +function isSafeRegex(regex) { + let pattern = regex.source + + // replace all escape sequences with some arbitrary character + pattern = pattern.replace(/\\./gu, "a") + // replace all character classes with some arbitrary character + pattern = pattern.replace(/\[[^\]]*\]/gu, "a") + + // in the following check, we have to account for neither escapes nor character classes + if (/[+*{}]/u.test(pattern)) { + // contains (potentially) unbound quantifiers, e.g. /a*/ + // this can be exploited for up to exponential backtracking + return false + } + + // collect the number of branches in the regex + // here, a branch is a non-constant quantifier of disjunction + const branches = (pattern.match(/\||[^(]\?/gu) || []).length + + // with n branches, it is possible to cause 2^n backtracking steps + // E.g. /^(a|a)(a|a)(a|a)(a|a)$/ has 4 branches and takes around 16 steps to reject "aaaab" + if (branches > 10) { + return false + } + + return true +} + /** * Get the property descriptor. * @param {object} object The object to get. @@ -247,6 +332,34 @@ function getElementValues(nodeList, initialScope) { return valueList } +/** + * Calls the given function if it is one of the allowed functions. + * @param {Function} func The function to call. + * @param {unknown} thisArg The `this` arg of the function. Use `undefined` when calling a free function. + * @param {unknown[]} args + */ +function callFunction(func, thisArg, args) { + if (callAllowed.has(func)) { + return { value: func.apply(thisArg, args) } + } + if (callPassThrough.has(func)) { + return { value: args[0] } + } + + const replacement = callReplacement.get(func) + if (replacement) { + try { + return { value: replacement(thisArg, args, func) } + } catch (error) { + if (!(error instanceof DangerousCallError)) { + throw error + } + } + } + + return null +} + const operations = Object.freeze({ ArrayExpression(node, initialScope) { const elements = getElementValues(node.elements, initialScope) @@ -344,12 +457,11 @@ const operations = Object.freeze({ if (property != null) { const receiver = object.value const methodName = property.value - if (callAllowed.has(receiver[methodName])) { - return { value: receiver[methodName](...args) } - } - if (callPassThrough.has(receiver[methodName])) { - return { value: args[0] } - } + return callFunction( + receiver[methodName], + receiver, + args, + ) } } } else { @@ -359,12 +471,7 @@ const operations = Object.freeze({ return { value: undefined, optional: true } } const func = callee.value - if (callAllowed.has(func)) { - return { value: func(...args) } - } - if (callPassThrough.has(func)) { - return { value: args[0] } - } + return callFunction(func, undefined, args) } } } diff --git a/test/get-static-value.mjs b/test/get-static-value.mjs index a60cb8d..75dfab5 100644 --- a/test/get-static-value.mjs +++ b/test/get-static-value.mjs @@ -178,6 +178,21 @@ describe("The 'getStaticValue' function", () => { { code: "' foo '.charAt(4)", expected: { value: "o" } }, { code: "' foo '.charCodeAt(400)", expected: { value: NaN } }, { code: "' foo '.repeat(1e12)", expected: null }, + { code: "'abcdef'.replace('a', 'x')", expected: { value: "xbcdef" } }, + { code: "'abcdef'.replace(/a/, 'x')", expected: { value: "xbcdef" } }, + { code: "'abcdef'.replace(/a+/, 'x')", expected: null }, + { + code: "'abcdef'.match('a')", + expected: { value: "abcdef".match("a") }, + }, + { + code: "'abcdef'.match(/a/gu)", + expected: { value: "abcdef".match(/a/gu) }, + }, + { code: "'abcdef'.match(/a+/g)", expected: null }, + { code: "'abab'.split('a')", expected: { value: ["", "b", "b"] } }, + { code: "'abab'.split(/a/)", expected: { value: ["", "b", "b"] } }, + { code: "'abab'.split(/a+/)", expected: null }, { code: "-1", expected: { value: -1 } }, { code: "+'1'", expected: { value: 1 } }, { code: "!0", expected: { value: true } }, From 97e1d87d07055ecbca46ebe45f911cc934672686 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Sun, 19 Mar 2023 23:52:34 +0100 Subject: [PATCH 2/5] Typo --- src/get-static-value.mjs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/get-static-value.mjs b/src/get-static-value.mjs index 5262184..4cf91aa 100644 --- a/src/get-static-value.mjs +++ b/src/get-static-value.mjs @@ -254,6 +254,7 @@ function isSafeRegex(regex) { pattern = pattern.replace(/\[[^\]]*\]/gu, "a") // in the following check, we have to account for neither escapes nor character classes + if (/[+*{}]/u.test(pattern)) { // contains (potentially) unbound quantifiers, e.g. /a*/ // this can be exploited for up to exponential backtracking @@ -261,7 +262,7 @@ function isSafeRegex(regex) { } // collect the number of branches in the regex - // here, a branch is a non-constant quantifier of disjunction + // here, a branch is a non-constant quantifier or disjunction const branches = (pattern.match(/\||[^(]\?/gu) || []).length // with n branches, it is possible to cause 2^n backtracking steps From 8bc6ceaff2e3d87283461551a5c1850d6ee2658b Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Sat, 30 Sep 2023 19:10:18 +0200 Subject: [PATCH 3/5] code cov --- test/get-static-value.mjs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/get-static-value.mjs b/test/get-static-value.mjs index 60233e0..c6a80df 100644 --- a/test/get-static-value.mjs +++ b/test/get-static-value.mjs @@ -203,7 +203,16 @@ describe("The 'getStaticValue' function", () => { { code: "'abcdef'.match(/a+/g)", expected: null }, { code: "'abab'.split('a')", expected: { value: ["", "b", "b"] } }, { code: "'abab'.split(/a/)", expected: { value: ["", "b", "b"] } }, + { + code: "'abab'.split(/(?:a|a)/)", + expected: { value: ["", "b", "b"] }, + }, { code: "'abab'.split(/a+/)", expected: null }, + { code: "'abab'.split(/(?:a|a)+/)", expected: null }, + { + code: "'abab'.split(/(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)(?:a|a)b/)", + expected: null, + }, { code: "-1", expected: { value: -1 } }, { code: "+'1'", expected: { value: 1 } }, { code: "!0", expected: { value: true } }, From 34325f82b52a800bcfb432cff5218d5e53c02211 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Sat, 30 Sep 2023 19:19:19 +0200 Subject: [PATCH 4/5] code cov --- test/get-static-value.mjs | 1 + 1 file changed, 1 insertion(+) diff --git a/test/get-static-value.mjs b/test/get-static-value.mjs index c6a80df..393f437 100644 --- a/test/get-static-value.mjs +++ b/test/get-static-value.mjs @@ -200,6 +200,7 @@ describe("The 'getStaticValue' function", () => { code: "'abcdef'.match(/a/gu)", expected: { value: "abcdef".match(/a/gu) }, }, + { code: "'abcdef'.match('[')", expected: null }, { code: "'abcdef'.match(/a+/g)", expected: null }, { code: "'abab'.split('a')", expected: { value: ["", "b", "b"] } }, { code: "'abab'.split(/a/)", expected: { value: ["", "b", "b"] } }, From 342b43c33763d235b657118ffac0d33f4eccc745 Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Sun, 15 Oct 2023 19:12:15 +0200 Subject: [PATCH 5/5] Proper implementation of `isSafeRegex` --- package.json | 1 + src/get-static-value.mjs | 36 +----- src/safe-regex.mjs | 243 +++++++++++++++++++++++++++++++++++++++ test/safe-regex.mjs | 93 +++++++++++++++ 4 files changed, 338 insertions(+), 35 deletions(-) create mode 100644 src/safe-regex.mjs create mode 100644 test/safe-regex.mjs diff --git a/package.json b/package.json index b4d8bb7..a179025 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "watch": "warun \"{src,test}/**/*.mjs\" -- npm run -s test:mocha" }, "dependencies": { + "@eslint-community/regexpp": "^4.9.1", "eslint-visitor-keys": "^3.4.3" }, "devDependencies": { diff --git a/src/get-static-value.mjs b/src/get-static-value.mjs index 67ee9c0..68305d2 100644 --- a/src/get-static-value.mjs +++ b/src/get-static-value.mjs @@ -1,6 +1,7 @@ /* globals globalThis, global, self, window */ import { findVariable } from "./find-variable.mjs" +import { isSafeRegex } from "./safe-regex.mjs" const globalObject = typeof globalThis !== "undefined" @@ -241,41 +242,6 @@ function checkSafeSearchValue(args) { throw new DangerousCallError() } -/** - * Returns whether the given regex will execute in O(n) (with a decently small - * constant factor) on any string. - * @param {RegExp} regex - * @returns {boolean} - */ -function isSafeRegex(regex) { - let pattern = regex.source - - // replace all escape sequences with some arbitrary character - pattern = pattern.replace(/\\./gu, "a") - // replace all character classes with some arbitrary character - pattern = pattern.replace(/\[[^\]]*\]/gu, "a") - - // in the following check, we have to account for neither escapes nor character classes - - if (/[+*{}]/u.test(pattern)) { - // contains (potentially) unbound quantifiers, e.g. /a*/ - // this can be exploited for up to exponential backtracking - return false - } - - // collect the number of branches in the regex - // here, a branch is a non-constant quantifier or disjunction - const branches = (pattern.match(/\||[^(]\?/gu) || []).length - - // with n branches, it is possible to cause 2^n backtracking steps - // E.g. /^(a|a)(a|a)(a|a)(a|a)$/ has 4 branches and takes around 16 steps to reject "aaaab" - if (branches > 10) { - return false - } - - return true -} - /** * Get the property descriptor. * @param {object} object The object to get. diff --git a/src/safe-regex.mjs b/src/safe-regex.mjs new file mode 100644 index 0000000..2ab66d5 --- /dev/null +++ b/src/safe-regex.mjs @@ -0,0 +1,243 @@ +import { RegExpParser } from "@eslint-community/regexpp" + +/** + * Returns whether the given regex will execute in O(n) (with a decently small + * constant factor) on any string. This is a conservative check. If the check + * returns `true`, then the regex is guaranteed to be safe. + * @param {RegExp | string} regex + * @returns {boolean} + */ +export function isSafeRegex(regex) { + try { + const parser = new RegExpParser() + const ast = parser.parseLiteral(regex.toString()) + const paths = maxPossiblePaths(ast.pattern, "ltr") + return paths < 100 + } catch { + // can't parse regex, or there are some elements we don't support + return false + } +} + +/** + * @typedef {import("@eslint-community/regexpp").AST} AST + */ + +/** + * Returns the maximum number of possible paths through a given regex node. + * @param {import("@eslint-community/regexpp/ast").Element + * | import("@eslint-community/regexpp/ast").Alternative + * | import("@eslint-community/regexpp/ast").Pattern + * } n + * @param {"ltr" | "rtl"} direction The matching direction. + * @returns {number} + */ +// eslint-disable-next-line complexity +export function maxPossiblePaths(n, direction) { + switch (n.type) { + case "Alternative": { + let elements = n.elements + if (direction === "rtl") { + elements = [...elements].reverse() + } + let paths = 1 + for (const e of elements) { + paths *= maxPossiblePaths(e, direction) + if (paths === 0 || paths === Infinity) { + return paths + } + } + return paths + } + + case "Assertion": { + if (n.kind === "lookahead" || n.kind === "lookbehind") { + const d = n.kind === "lookahead" ? "ltr" : "rtl" + let paths = 0 + for (const e of n.alternatives) { + paths += maxPossiblePaths(e, d) + } + return paths + } + // built-in assertions are always constant + return 1 + } + + case "Backreference": + return 1 + + case "Character": + case "CharacterSet": + case "CharacterClass": + case "ExpressionCharacterClass": + return getStringsInCharacters(n) + (hasNoCharacters(n) ? 0 : 1) + + case "Quantifier": { + if (n.max === 0) { + return 1 + } + const inner = maxPossiblePaths(n.element, direction) + if (inner === 0) { + return n.min === 0 ? 1 : 0 + } + if (n.max === Infinity) { + return Infinity + } + if (inner === Infinity) { + return inner + } + const constant = inner ** n.min + if (n.min === n.max) { + return constant + } + // The {n,m} case (n!=m) is bit harder. + // Example: (a|b){2,4} is equivalent to (a|b){2}(a|b){0,2} + // To get the maximum possible paths of any x{0,p}, we first note + // that this is the same as x{0}|x|xx|xxx|...|x{p}. So the max + // paths of x{0,p} is the sum of the max paths of x{0}, x{1}, ..., x{p}. + // Let y=maxPossiblePaths(x). Then maxPossiblePaths(x{0,p}) = + // = 1 + y + y^2 + y^3 + ... y^p + // = ceil(y*(p+1)/(y-1))-1 (if y>=2) + // = p+1 (if y=1) + // = 1 (if y=0) + const p = n.max - n.min + let e + if (inner < 2) { + e = p * inner + 1 + } else { + e = Math.ceil(inner ** (p + 1) / (inner - 1)) - 1 + } + return constant * e + } + + case "CapturingGroup": + case "Group": + case "Pattern": { + let paths = 0 + for (const e of n.alternatives) { + paths += maxPossiblePaths(e, direction) + if (paths === Infinity) { + return paths + } + } + return paths + } + + default: + return assertNever(n) + } +} + +/** + * Returns the worst-case (=maximum) number of string (length!=1) elements in the given character element. + * @param {import("@eslint-community/regexpp/ast").CharacterClassElement + * | import("@eslint-community/regexpp/ast").ExpressionCharacterClass["expression"] + * | import("@eslint-community/regexpp/ast").CharacterSet + * | import("@eslint-community/regexpp/ast").CharacterClass + * } n + * @returns {number} + * + * @typedef {import("@eslint-community/regexpp").AST} AST + */ +function getStringsInCharacters(n) { + switch (n.type) { + case "Character": + case "CharacterClassRange": + return 0 + + case "CharacterSet": + // since we can't know how many strings the set contains, we + // just assume 1000 + return n.kind === "property" && n.strings ? 1000 : 0 + + case "ClassStringDisjunction": + return n.alternatives.filter((a) => a.elements.length !== 1).length + + case "CharacterClass": + if (n.negate || !n.unicodeSets) { + return 0 + } + return n.elements.reduce((a, b) => a + getStringsInCharacters(b), 0) + + case "ExpressionCharacterClass": + if (n.negate) { + return 0 + } + return getStringsInCharacters(n.expression) + + case "ClassIntersection": + return Math.min( + getStringsInCharacters(n.left), + getStringsInCharacters(n.right), + ) + case "ClassSubtraction": + return getStringsInCharacters(n.left) + + default: + return assertNever(n) + } +} + +/** + * Returns `true` if the given elements does not contain any single-character + * elements. If `false` is returned, then the given element might still contain + * single-character elements. + * @param {import("@eslint-community/regexpp/ast").CharacterClassElement + * | import("@eslint-community/regexpp/ast").ExpressionCharacterClass["expression"] + * | import("@eslint-community/regexpp/ast").CharacterSet + * | import("@eslint-community/regexpp/ast").CharacterClass + * } n + * @returns {boolean} + * + * @typedef {import("@eslint-community/regexpp").AST} AST + */ +function hasNoCharacters(n) { + switch (n.type) { + case "Character": + case "CharacterClassRange": + return false + + case "CharacterSet": + // while not exactly true, we'll just assume that character sets + // always contain at least one character + return false + + case "ClassStringDisjunction": + return n.alternatives.every((a) => a.elements.length !== 1) + + case "CharacterClass": + if (n.negate) { + // since we can't know whether the elements contains all + // characters, we have have to assume that [^not all] will + // contains at least some + return false + } + return n.elements.every(hasNoCharacters) + + case "ExpressionCharacterClass": + if (n.negate) { + // since we can't know whether the expression contains all + // characters, we have have to assume that [^not all] will + // contains at least some + return false + } + return hasNoCharacters(n.expression) + + case "ClassIntersection": + return hasNoCharacters(n.left) || hasNoCharacters(n.right) + case "ClassSubtraction": + return hasNoCharacters(n.left) + + default: + return assertNever(n) + } +} + +/** + * A function that should never be called. + * @param {never} value + * @returns {never} + */ +function assertNever(value) { + throw new Error(`Unexpected value: ${value}`) +} diff --git a/test/safe-regex.mjs b/test/safe-regex.mjs new file mode 100644 index 0000000..2154c37 --- /dev/null +++ b/test/safe-regex.mjs @@ -0,0 +1,93 @@ +import { RegExpParser } from "@eslint-community/regexpp" +import assert from "assert" +import { isSafeRegex, maxPossiblePaths } from "../src/safe-regex.mjs" + +describe("isSafeRegex", () => { + const maxPaths = { + [String.raw`/[]/`]: 0, + [String.raw`/[]+/`]: 0, + [String.raw`/[]a+/`]: 0, + [String.raw`/(?<=cb+[])a/`]: 0, + [String.raw`/[\w&&\q{foo|bar}]/v`]: 0, + + [String.raw`/a/`]: 1, + [String.raw`/[a]/`]: 1, + [String.raw`/foobar/`]: 1, + [String.raw`/\bfoobar\b/`]: 1, + [String.raw`/^foobar$/`]: 1, + [String.raw`/^foobar$/u`]: 1, + [String.raw`/^foobar$/v`]: 1, + [String.raw`/\p{ASCII}/v`]: 1, + [String.raw`/[abcA-Z\d\w\p{ASCII}]/`]: 1, + [String.raw`/[abcA-Z\d\w\p{ASCII}]/u`]: 1, + [String.raw`/[abcA-Z\d\w\p{ASCII}]/v`]: 1, + [String.raw`/[abcA-Z\d\w\p{ASCII}\q{f|g|h}]/v`]: 1, + [String.raw`/[^abcA-Z\d\w\p{ASCII}\q{f|g|h}]/v`]: 1, + [String.raw`/a{100}/v`]: 1, + [String.raw`/[]*/v`]: 1, + [String.raw`/[]?/v`]: 1, + [String.raw`/[]{0,100}/v`]: 1, + [String.raw`/(?:a*a*a*a*){0}/`]: 1, + [String.raw`/(a)b\1/v`]: 1, + [String.raw`/a(?!foo)/`]: 1, + [String.raw`/[^[a-b]&&\w]/v`]: 1, + [String.raw`/[\w&&\d]/v`]: 1, + [String.raw`/[^\p{ASCII}--\w]/v`]: 1, + [String.raw`/[\w&&[\d\q{foo|bar}]]/v`]: 1, + + [String.raw`/a|b/`]: 2, + [String.raw`/a|a/`]: 2, + [String.raw`/a?/`]: 2, + [String.raw`/a??/`]: 2, + [String.raw`/[\q{foo|bar}]/v`]: 2, + [String.raw`/[\q{foo|}]/v`]: 2, + [String.raw`/[\q{foo}\w]/v`]: 2, + [String.raw`/[\q{}\w]/v`]: 2, + [String.raw`/(a|b)c\1/v`]: 2, + [String.raw`/[[\p{ASCII}\q{foo}]--\w]/v`]: 2, + + [String.raw`/a{2,4}/v`]: 3, + [String.raw`/(a|b){2,4}/v`]: 28, + [String.raw`/(a|b|c){2,4}/v`]: 117, + [String.raw`/(a|b|c)(a|b|c)((a|b|c)((a|b|c)|)|)/v`]: 117, + + [String.raw`/(a|b){10}/v`]: 2 ** 10, + [String.raw`/(a|b|c|d|e){10}/v`]: 5 ** 10, + + [String.raw`/^\p{RGI_Emoji}$/v`]: 1001, + + [String.raw`/(a+)b\1/`]: Infinity, + [String.raw`/(?:a|a)+b/`]: Infinity, + [String.raw`/b+$/`]: Infinity, + [String.raw`/b+[]/`]: Infinity, + [String.raw`/b+$|foo/`]: Infinity, + [String.raw`/foo|b+$/`]: Infinity, + [String.raw`/(?:a+){3}/`]: Infinity, + [String.raw`/(a|b|c|d|e){1000}/v`]: Infinity, + } + + it("should be false for invalid regexes", () => { + const actual = isSafeRegex("/foo[a-/u") + assert.deepStrictEqual(actual, false) + }) + + // it("should be true for safe regexes", () => { + // for (const [regex, paths] of Object.entries(maxPaths)) { + // if (paths < 100) { + // const actual = isSafeRegex(regex) + // assert.deepStrictEqual(actual, true) + // } + // } + // }) + + describe("maxPaths", () => { + for (const [regex, paths] of Object.entries(maxPaths)) { + it(regex, () => { + const parser = new RegExpParser() + const ast = parser.parseLiteral(regex.toString()) + const actual = maxPossiblePaths(ast.pattern, "ltr") + assert.deepStrictEqual(actual, paths) + }) + } + }) +})