From b088651dc25d420f917555f192805758865f9a2d Mon Sep 17 00:00:00 2001 From: firewave Date: Thu, 27 Feb 2025 15:13:21 +0100 Subject: [PATCH] Regex: added `std::regex` implementation [skip ci] --- cli/cmdlineparser.cpp | 28 ++++++++++++++-- lib/regex.cpp | 76 ++++++++++++++++++++++++++++++++++++++----- lib/regex.h | 9 ++++- test/testregex.cpp | 29 ++++++++++++++--- 4 files changed, 126 insertions(+), 16 deletions(-) diff --git a/cli/cmdlineparser.cpp b/cli/cmdlineparser.cpp index b9006711ce1..55ae8ab6d1d 100644 --- a/cli/cmdlineparser.cpp +++ b/cli/cmdlineparser.cpp @@ -398,6 +398,8 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a bool executorAuto = true; + Regex::Type regexType = Regex::Type::Unknown; + for (int i = 1; i < argc; i++) { if (argv[i][0] != '-') { mPathNames.emplace_back(Path::fromNativeSeparators(Path::removeQuotationMarks(argv[i]))); @@ -1207,6 +1209,26 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a else if (std::strcmp(argv[i], "-q") == 0 || std::strcmp(argv[i], "--quiet") == 0) mSettings.quiet = true; + // Rule given at command line + else if (std::strncmp(argv[i], "--regex=", 7) == 0) { +#ifdef HAVE_RULES + const std::string type = 7 + argv[i]; + if (type == "pcre") { + regexType = Regex::Type::Pcre; + } + else if (type == "std") { + regexType = Regex::Type::Std; + } + else { + mLogger.printError("unknown regex type '" + type + "'."); + return Result::Fail; + } +#else + mLogger.printError("Option --regex cannot be used as Cppcheck has not been built with rules support."); + return Result::Fail; +#endif + } + // Output relative paths else if (std::strcmp(argv[i], "-rp") == 0 || std::strcmp(argv[i], "--relative-paths") == 0) mSettings.relativePaths = true; @@ -1276,8 +1298,9 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a return Result::Fail; } + // TODO: the type breaks the left-to-right processing std::string regex_err; - auto regex = Regex::create(rule.pattern, regex_err); + auto regex = Regex::create(rule.pattern, regexType, regex_err); if (!regex) { mLogger.printError("failed to compile rule pattern '" + rule.pattern + "' (" + regex_err + ")."); return Result::Fail; @@ -1360,8 +1383,9 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a return Result::Fail; } + // TODO: the type breaks the left-to-right processing std::string regex_err; - auto regex = Regex::create(rule.pattern, regex_err); + auto regex = Regex::create(rule.pattern, regexType, regex_err); if (!regex) { mLogger.printError("unable to load rule-file '" + ruleFile + "' - pattern '" + rule.pattern + "' failed to compile (" + regex_err + ")."); return Result::Fail; diff --git a/lib/regex.cpp b/lib/regex.cpp index 6ede1406434..e8726637f91 100644 --- a/lib/regex.cpp +++ b/lib/regex.cpp @@ -20,6 +20,7 @@ #include "regex.h" +#include #include #ifdef _WIN32 @@ -188,15 +189,15 @@ namespace { std::string PcreRegex::compile() { if (mRe) - return "pcre_compile failed: regular expression has already been compiled"; + return "regular expression has already been compiled"; const char *pcreCompileErrorStr = nullptr; int erroffset = 0; pcre * const re = pcre_compile(mPattern.c_str(),0,&pcreCompileErrorStr,&erroffset,nullptr); if (!re) { if (pcreCompileErrorStr) - return "pcre_compile failed: " + std::string(pcreCompileErrorStr); - return "pcre_compile failed: unknown error"; + return pcreCompileErrorStr; + return "unknown error"; } // Optimize the regex, but only if PCRE_CONFIG_JIT is available @@ -209,7 +210,7 @@ namespace { if (pcreStudyErrorStr) { // pcre_compile() worked, but pcre_study() returned an error. Free the resources allocated by pcre_compile(). pcre_free(re); - return "pcre_study failed: " + std::string(pcreStudyErrorStr); + return std::string(pcreStudyErrorStr) + " (pcre_study)"; } mExtra = pcreExtra; #endif @@ -222,7 +223,7 @@ namespace { std::string PcreRegex::match(const std::string& str, const MatchFn& match) const { if (!mRe) - return "pcre_exec failed: regular expression has not been compiled yet"; + return "regular expression has not been compiled yet"; int pos = 0; int ovector[30]= {0}; @@ -231,7 +232,7 @@ namespace { if (pcreExecRet == PCRE_ERROR_NOMATCH) return ""; if (pcreExecRet < 0) { - return "pcre_exec failed (pos: " + std::to_string(pos) + "): " + pcreErrorCodeToString(pcreExecRet); + return std::string(pcreErrorCodeToString(pcreExecRet)) + " (pos: " + std::to_string(pos) + ")"; } const auto pos1 = static_cast(ovector[0]); const auto pos2 = static_cast(ovector[1]); @@ -246,10 +247,69 @@ namespace { } } -std::shared_ptr Regex::create(std::string pattern, std::string& err) +namespace { + class StdRegex : public Regex + { + public: + explicit StdRegex(std::string pattern) + : mPattern(std::move(pattern)) + {} + + std::string compile() + { + if (mCompiled) + return "regular expression has already been compiled"; + + try { + mRegex = std::regex(mPattern); + } catch(const std::exception& e) { + return e.what(); + } + mCompiled = true; + return ""; + } + + std::string match(const std::string& str, const MatchFn& matchFn) const override + { + if (!mCompiled) + return "regular expression has not been compiled yet"; + + auto I = std::sregex_iterator(str.cbegin(), str.cend(), mRegex); + const auto E = std::sregex_iterator(); + while (I != E) + { + const std::smatch& match = *I; + matchFn(match.position(), match.position() + match.length()); + ++I; + } + return ""; + } + + private: + std::string mPattern; + std::regex mRegex; + bool mCompiled{}; + }; +} + +template +static T* createAndCompileRegex(std::string pattern, std::string& err) { - auto* regex = new PcreRegex(std::move(pattern)); + T* regex = new T(std::move(pattern)); err = regex->compile(); + return regex; +} + +std::shared_ptr Regex::create(std::string pattern, Type type, std::string& err) +{ + Regex* regex = nullptr; + if (type == Type::Pcre) + regex = createAndCompileRegex(std::move(pattern), err); + else if (type == Type::Std) + regex = createAndCompileRegex(std::move(pattern), err); + else { + err = "unknown regular expression type"; + } if (!err.empty()) { delete regex; return nullptr; diff --git a/lib/regex.h b/lib/regex.h index 9f264314efe..f9fe2e817c2 100644 --- a/lib/regex.h +++ b/lib/regex.h @@ -37,7 +37,14 @@ class CPPCHECKLIB Regex using MatchFn = std::function; virtual std::string match(const std::string& str, const MatchFn& matchFn) const = 0; - static std::shared_ptr create(std::string pattern, std::string& err); + enum class Type + { + Unknown = 0, + Pcre = 1, + Std = 2 + }; + + static std::shared_ptr create(std::string pattern, Type type, std::string& err); }; #endif // HAVE_RULES diff --git a/test/testregex.cpp b/test/testregex.cpp index 3809b19796e..54567dc8165 100644 --- a/test/testregex.cpp +++ b/test/testregex.cpp @@ -25,11 +25,13 @@ #include #include -class TestRegEx : public TestFixture { +class TestRegExBase : public TestFixture { public: - TestRegEx() : TestFixture("TestRegEx") {} + TestRegExBase(const char * const name, Regex::Type type) : TestFixture(name), mType(type) {} private: + Regex::Type mType{}; + void run() override { TEST_CASE(match); TEST_CASE(nomatch); @@ -43,7 +45,7 @@ class TestRegEx : public TestFixture { #define assertRegex(...) assertRegex_(__FILE__, __LINE__, __VA_ARGS__) std::shared_ptr assertRegex_(const char* file, int line, std::string pattern, const std::string& exp_err = "") const { std::string regex_err; - auto r = Regex::create(std::move(pattern), regex_err); + auto r = Regex::create(std::move(pattern), mType, regex_err); if (exp_err.empty()) ASSERT_LOC(!!r.get(), file, line); else @@ -79,7 +81,13 @@ class TestRegEx : public TestFixture { } void compileError() const { - (void)assertRegex("[", "pcre_compile failed: missing terminating ] for character class"); + std::string exp; + if (mType == Regex::Type::Pcre) + exp = "missing terminating ] for character class"; + else if (mType == Regex::Type::Std) + exp = "Unexpected character within '[...]' in regular expression"; + + (void)assertRegex("[", exp); } void copy() const { @@ -189,6 +197,17 @@ class TestRegEx : public TestFixture { #undef assertRegex }; -REGISTER_TEST(TestRegEx) +class TestRegExPcre : public TestRegExBase { +public: + TestRegExPcre() : TestRegExBase("TestRegExPcre", Regex::Type::Pcre) {} +}; + +class TestRegExStd : public TestRegExBase { +public: + TestRegExStd() : TestRegExBase("TestRegExStd", Regex::Type::Std) {} +}; + +REGISTER_TEST(TestRegExPcre) +REGISTER_TEST(TestRegExStd) #endif // HAVE_RULES