From c042630ed8f94c32106d92a6a8deb192dabe558d Mon Sep 17 00:00:00 2001 From: Paul Wankadia Date: Fri, 22 Dec 2023 13:27:19 +0000 Subject: [PATCH] Report `kRegexpBadPerlOp` for look-behind assertions. Fixes #468. Change-Id: I9a72db0bbb9a04e5081cc1f0f94399476b63d1c3 Reviewed-on: https://code-review.googlesource.com/c/re2/+/62330 Reviewed-by: Perry Lorier Reviewed-by: Paul Wankadia --- re2/parse.cc | 11 +++++++++++ re2/testing/parse_test.cc | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/re2/parse.cc b/re2/parse.cc index 46335bdb1..904599280 100644 --- a/re2/parse.cc +++ b/re2/parse.cc @@ -2070,6 +2070,17 @@ bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) { return false; } + // Check for look-around assertions. This is NOT because we support them! ;) + // As per https://github.com/google/re2/issues/468, we really want to report + // kRegexpBadPerlOp (not kRegexpBadNamedCapture) for look-behind assertions. + // Additionally, it would be nice to report not "(?<", but "(?<=" or "(? 3 && (t[2] == '=' || t[2] == '!')) || + (t.size() > 4 && t[2] == '<' && (t[3] == '=' || t[3] == '!'))) { + status_->set_code(kRegexpBadPerlOp); + status_->set_error_arg(absl::string_view(t.data(), t[2] == '<' ? 4 : 3)); + return false; + } + // Check for named captures, first introduced in Python's regexp library. // As usual, there are three slightly different syntaxes: // diff --git a/re2/testing/parse_test.cc b/re2/testing/parse_test.cc index acb949c4f..7684b62a4 100644 --- a/re2/testing/parse_test.cc +++ b/re2/testing/parse_test.cc @@ -532,4 +532,30 @@ TEST(NamedCaptures, ErrorArgs) { EXPECT_EQ(status.error_arg(), "(?"); } +// Test that look-around error args are correct. +TEST(LookAround, ErrorArgs) { + RegexpStatus status; + Regexp* re; + + re = Regexp::Parse("(?=foo).*", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadPerlOp); + EXPECT_EQ(status.error_arg(), "(?="); + + re = Regexp::Parse("(?!foo).*", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadPerlOp); + EXPECT_EQ(status.error_arg(), "(?!"); + + re = Regexp::Parse("(?<=foo).*", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadPerlOp); + EXPECT_EQ(status.error_arg(), "(?<="); + + re = Regexp::Parse("(?