Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions python/ql/lib/semmle/python/regexp/RegexTreeView.qll
Original file line number Diff line number Diff line change
Expand Up @@ -964,7 +964,7 @@ module Impl implements RegexTreeViewSig {
* ```
*/
class RegExpPositiveLookahead extends RegExpLookahead {
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end, _, _) }

override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
}
Expand All @@ -979,7 +979,7 @@ module Impl implements RegexTreeViewSig {
* ```
*/
additional class RegExpNegativeLookahead extends RegExpLookahead {
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end, _, _) }

override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
}
Expand All @@ -1006,7 +1006,7 @@ module Impl implements RegexTreeViewSig {
* ```
*/
class RegExpPositiveLookbehind extends RegExpLookbehind {
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end, _, _) }

override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
}
Expand All @@ -1021,7 +1021,7 @@ module Impl implements RegexTreeViewSig {
* ```
*/
additional class RegExpNegativeLookbehind extends RegExpLookbehind {
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end, _, _) }

override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
}
Expand Down
70 changes: 46 additions & 24 deletions python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll
Original file line number Diff line number Diff line change
Expand Up @@ -554,9 +554,9 @@ class RegExp extends Expr instanceof StringLiteral {
or
this.negativeAssertionGroup(start, end)
or
this.positiveLookaheadAssertionGroup(start, end)
this.positiveLookaheadAssertionGroup(start, end, _, _)
or
this.positiveLookbehindAssertionGroup(start, end)
this.positiveLookbehindAssertionGroup(start, end, _, _)
}

/** Holds if an empty group is found between `start` and `end`. */
Expand All @@ -572,15 +572,15 @@ class RegExp extends Expr instanceof StringLiteral {
or
this.negativeAssertionGroup(start, end)
or
this.positiveLookaheadAssertionGroup(start, end)
this.positiveLookaheadAssertionGroup(start, end, _, _)
}

private predicate emptyMatchAtEndGroup(int start, int end) {
this.emptyGroup(start, end)
or
this.negativeAssertionGroup(start, end)
or
this.positiveLookbehindAssertionGroup(start, end)
this.positiveLookbehindAssertionGroup(start, end, _, _)
}

private predicate negativeAssertionGroup(int start, int end) {
Expand All @@ -593,32 +593,40 @@ class RegExp extends Expr instanceof StringLiteral {
)
}

/** Holds if a negative lookahead is found between `start` and `end` */
predicate negativeLookaheadAssertionGroup(int start, int end) {
exists(int in_start | this.negative_lookahead_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
/**
* Holds if a negative lookahead is found between `start` and `end`, with contents
* between `in_start` and `in_end`.
*/
predicate negativeLookaheadAssertionGroup(int start, int end, int in_start, int in_end) {
this.negative_lookahead_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
}

/** Holds if a negative lookbehind is found between `start` and `end` */
predicate negativeLookbehindAssertionGroup(int start, int end) {
exists(int in_start | this.negative_lookbehind_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
/**
* Holds if a negative lookbehind is found between `start` and `end`, with contents
* between `in_start` and `in_end`.
*/
predicate negativeLookbehindAssertionGroup(int start, int end, int in_start, int in_end) {
this.negative_lookbehind_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
}

/** Holds if a positive lookahead is found between `start` and `end` */
predicate positiveLookaheadAssertionGroup(int start, int end) {
exists(int in_start | this.lookahead_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
/**
* Holds if a positive lookahead is found between `start` and `end`, with contents
* between `in_start` and `in_end`.
*/
predicate positiveLookaheadAssertionGroup(int start, int end, int in_start, int in_end) {
this.lookahead_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
}

/** Holds if a positive lookbehind is found between `start` and `end` */
predicate positiveLookbehindAssertionGroup(int start, int end) {
exists(int in_start | this.lookbehind_assertion_start(start, in_start) |
this.groupContents(start, end, in_start, _)
)
/**
* Holds if a positive lookbehind is found between `start` and `end`, with contents
* between `in_start` and `in_end`.
*/
predicate positiveLookbehindAssertionGroup(int start, int end, int in_start, int in_end) {
this.lookbehind_assertion_start(start, in_start) and
this.groupContents(start, end, in_start, in_end)
}

private predicate group_start(int start, int end) {
Expand Down Expand Up @@ -1049,6 +1057,13 @@ class RegExp extends Expr instanceof StringLiteral {
or
this.alternationOption(x, y, start, end)
)
or
// Lookbehind assertions can potentially match the start of the string
(
this.positiveLookbehindAssertionGroup(_, _, start, _) or
this.negativeLookbehindAssertionGroup(_, _, start, _)
) and
this.item(start, end)
}

/** A part of the regex that may match the end of the string. */
Expand All @@ -1074,6 +1089,13 @@ class RegExp extends Expr instanceof StringLiteral {
or
this.alternationOption(x, y, start, end)
)
or
// Lookahead assertions can potentially match the end of the string
(
this.positiveLookaheadAssertionGroup(_, _, _, end) or
this.negativeLookaheadAssertionGroup(_, _, _, end)
) and
this.item(start, end)
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
category: minorAnalysis
---

- The queries that check for unmatchable `$` and `^` in regular expressions did not account correctly for occurrences inside lookahead and lookbehind assertions. These occurrences are now handled correctly, eliminating this source of false positives.
2 changes: 2 additions & 0 deletions python/ql/test/library-tests/regex/FirstLast.expected
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
| (?!not-this)^[A-Z_]+$ | first | 12 | 13 |
| (?!not-this)^[A-Z_]+$ | first | 13 | 19 |
| (?!not-this)^[A-Z_]+$ | first | 13 | 20 |
| (?!not-this)^[A-Z_]+$ | last | 3 | 11 |
| (?!not-this)^[A-Z_]+$ | last | 13 | 19 |
| (?!not-this)^[A-Z_]+$ | last | 13 | 20 |
| (?!not-this)^[A-Z_]+$ | last | 20 | 21 |
Expand Down Expand Up @@ -101,6 +102,7 @@
| ^[A-Z_]+$(?<!not-this) | first | 0 | 1 |
| ^[A-Z_]+$(?<!not-this) | first | 1 | 7 |
| ^[A-Z_]+$(?<!not-this) | first | 1 | 8 |
| ^[A-Z_]+$(?<!not-this) | first | 13 | 21 |
| ^[A-Z_]+$(?<!not-this) | last | 1 | 7 |
| ^[A-Z_]+$(?<!not-this) | last | 1 | 8 |
| ^[A-Z_]+$(?<!not-this) | last | 8 | 9 |
Expand Down
10 changes: 9 additions & 1 deletion python/ql/test/query-tests/Expressions/Regex/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,12 @@
re.compile(r"[\u0000-\uFFFF]")

#Allow unicode names
re.compile(r"[\N{degree sign}\N{EM DASH}]")
re.compile(r"[\N{degree sign}\N{EM DASH}]")

#Lookahead assertions. None of these are unmatchable dollars:
re.compile(r"^(?=a$)[ab]")
re.compile(r"^(?!a$)[ab]")

#Lookbehind assertions. None of these are unmatchable carets:
re.compile(r"(?<=^a)a")
re.compile(r"(?<!^a)a")