From 80e31fd8d49cb6f9bbb98103c2e37470637ecaa0 Mon Sep 17 00:00:00 2001 From: Anil Mahtani <929854+Anilm3@users.noreply.github.com> Date: Thu, 23 Nov 2023 11:06:33 +0000 Subject: [PATCH] Refactor cmake scripts and support LTO (#232) --- .github/workflows/benchmark.yml | 2 +- .github/workflows/fuzz.yml | 4 +- .github/workflows/scripts/llvm.sh | 5 +- .github/workflows/test.yml | 12 +- CMakeLists.txt | 384 ++----------- cmake/objects.cmake | 145 +++++ cmake/package.cmake | 44 ++ cmake/shared.cmake | 83 +++ cmake/static.cmake | 41 ++ docker/libddwaf/build/Dockerfile | 5 +- fuzzing/CMakeLists.txt | 14 +- fuzzing/build.sh | 4 +- fuzzing/scripts/show_coverage.sh | 8 +- perf/CMakeLists.txt | 1 + perf/utils.cpp | 12 + perf/yaml_helpers.cpp | 8 +- src/generator/extract_schema.cpp | 2 +- src/log.hpp | 17 +- src/transformer/lowercase.cpp | 4 +- src/vendor/lua-aho-corasick/.gitignore | 6 - src/vendor/lua-aho-corasick/Makefile | 134 ----- src/vendor/lua-aho-corasick/ac_lua.cxx | 173 ------ src/vendor/lua-aho-corasick/load_ac.lua | 90 --- src/vendor/lua-aho-corasick/mytest.cxx | 200 ------- src/vendor/lua-aho-corasick/tests/Makefile | 65 --- .../lua-aho-corasick/tests/ac_bench.cxx | 519 ------------------ .../lua-aho-corasick/tests/ac_test_aggr.cxx | 135 ----- .../lua-aho-corasick/tests/ac_test_simple.cxx | 275 ---------- .../lua-aho-corasick/tests/dict/README.txt | 1 - .../lua-aho-corasick/tests/dict/dict1.txt | 11 - .../lua-aho-corasick/tests/load_ac_test.lua | 82 --- .../lua-aho-corasick/tests/lua_test.lua | 67 --- .../lua-aho-corasick/tests/test_base.hpp | 60 -- .../lua-aho-corasick/tests/test_bigfile.cxx | 167 ------ .../lua-aho-corasick/tests/test_main.cxx | 33 -- src/vendor/re2/tostring.cc | 8 +- src/version.hpp.in | 2 +- tests/CMakeLists.txt | 13 +- tests/parser_transformers_test.cpp | 4 +- tests/test.hpp | 36 +- tests/test_utils.cpp | 6 +- tests/test_utils.hpp | 34 +- tests/transformer/manager_test.cpp | 22 +- tests/transformer/transformer_utils.hpp | 62 +-- tools/CMakeLists.txt | 17 +- validator/assert.hpp | 14 +- 46 files changed, 518 insertions(+), 2513 deletions(-) create mode 100644 cmake/objects.cmake create mode 100644 cmake/package.cmake create mode 100644 cmake/shared.cmake create mode 100644 cmake/static.cmake delete mode 100644 src/vendor/lua-aho-corasick/.gitignore delete mode 100644 src/vendor/lua-aho-corasick/Makefile delete mode 100644 src/vendor/lua-aho-corasick/ac_lua.cxx delete mode 100644 src/vendor/lua-aho-corasick/load_ac.lua delete mode 100644 src/vendor/lua-aho-corasick/mytest.cxx delete mode 100644 src/vendor/lua-aho-corasick/tests/Makefile delete mode 100644 src/vendor/lua-aho-corasick/tests/ac_bench.cxx delete mode 100644 src/vendor/lua-aho-corasick/tests/ac_test_aggr.cxx delete mode 100644 src/vendor/lua-aho-corasick/tests/ac_test_simple.cxx delete mode 100644 src/vendor/lua-aho-corasick/tests/dict/README.txt delete mode 100644 src/vendor/lua-aho-corasick/tests/dict/dict1.txt delete mode 100644 src/vendor/lua-aho-corasick/tests/load_ac_test.lua delete mode 100644 src/vendor/lua-aho-corasick/tests/lua_test.lua delete mode 100644 src/vendor/lua-aho-corasick/tests/test_base.hpp delete mode 100644 src/vendor/lua-aho-corasick/tests/test_bigfile.cxx delete mode 100644 src/vendor/lua-aho-corasick/tests/test_main.cxx diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 543aae147..a435380be 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -27,7 +27,7 @@ jobs: run: | set -ex mkdir build && cd build - cmake .. -DCMAKE_BUILD_TYPE=Release + cmake .. -DCMAKE_BUILD_TYPE=Release -DLIBDDWAF_ENABLE_LTO=ON VERBOSE=1 make -j benchmark benchcmp - name: Run Benchmark diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 115681395..e3b3bce2e 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -27,8 +27,8 @@ jobs: - name: Install deps run: | DEBIAN_FRONTEND="noninteractive" sudo apt-get -y remove python3-lldb-14 - sudo .github/workflows/scripts/llvm.sh 15 - DEBIAN_FRONTEND="noninteractive" sudo apt-get -y install libfuzzer-15-dev + sudo .github/workflows/scripts/llvm.sh 17 + DEBIAN_FRONTEND="noninteractive" sudo apt-get -y install libfuzzer-17-dev - name: Build run: ./fuzzing/build.sh diff --git a/.github/workflows/scripts/llvm.sh b/.github/workflows/scripts/llvm.sh index 8dbf1bca3..200c0390f 100755 --- a/.github/workflows/scripts/llvm.sh +++ b/.github/workflows/scripts/llvm.sh @@ -20,7 +20,7 @@ usage() { exit 1; } -CURRENT_LLVM_STABLE=16 +CURRENT_LLVM_STABLE=17 BASE_URL="http://apt.llvm.org" # Check for required tools @@ -125,7 +125,8 @@ LLVM_VERSION_PATTERNS[13]="-13" LLVM_VERSION_PATTERNS[14]="-14" LLVM_VERSION_PATTERNS[15]="-15" LLVM_VERSION_PATTERNS[16]="-16" -LLVM_VERSION_PATTERNS[17]="" +LLVM_VERSION_PATTERNS[17]="-17" +LLVM_VERSION_PATTERNS[18]="" if [ ! ${LLVM_VERSION_PATTERNS[$LLVM_VERSION]+_} ]; then echo "This script does not support LLVM version $LLVM_VERSION" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2c94f7edd..c5e4c92e4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -171,16 +171,16 @@ jobs: - name: Install clang-{tidy,format} run: | DEBIAN_FRONTEND="noninteractive" sudo apt-get -y remove python3-lldb-14 - sudo .github/workflows/scripts/llvm.sh 15 - sudo apt-get install -y clang-tidy-15 clang-format-15 + sudo .github/workflows/scripts/llvm.sh 17 + sudo apt-get install -y clang-tidy-17 clang-format-17 - name: CMake env: - CXX: clang++-15 - CC: clang-15 + CXX: clang++-17 + CC: clang-17 run: | - cmake .. -DCMAKE_BUILD_TYPE=Debug -DCLANG_TIDY=/usr/bin/run-clang-tidy-15 \ - -DCLANG_FORMAT=/usr/bin/clang-format-15 + cmake .. -DCMAKE_BUILD_TYPE=Debug -DCLANG_TIDY=/usr/bin/run-clang-tidy-17 \ + -DCLANG_FORMAT=/usr/bin/clang-format-17 working-directory: Debug - name: Build diff --git a/CMakeLists.txt b/CMakeLists.txt index e226fe4e0..109be768e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,401 +1,104 @@ cmake_minimum_required(VERSION 3.14) + file(READ "version" version) +project(libddwaf VERSION "${version}") +message(STATUS "Project version: ${CMAKE_PROJECT_VERSION}") -project(libddwaf) -set(PROJECT_VERSION ${version}) -set(CMAKE_PROJECT_VERSION ${version}) +if(UNIX AND NOT APPLE) + set(LINUX TRUE) +endif() configure_file(src/version.hpp.in ${CMAKE_CURRENT_SOURCE_DIR}/src/version.hpp) -set(CMAKE_OSX_DEPLOYMENT_TARGET "10.12" CACHE STRING "Minimum OS X deployment version") -if(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64" OR - CMAKE_OSX_ARCHITECTURES MATCHES "arm64") - set(CPU_TYPE ${CMAKE_OSX_ARCHITECTURES}) -endif() - set(CMAKE_C_STANDARD 99) set(CMAKE_CXX_STANDARD 20) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -set(LIBDDWAF_SHARED_LINKER_FLAGS "-static-libstdc++" CACHE STRING "Shared library extra linker flags") -set(LIBDDWAF_EXE_LINKER_FLAGS "" CACHE STRING "Executable extra linker flags") -set(LIBDDWAF_PACKAGE_SUFFIX "" CACHE STRING "Suffix for packaging purposes") -if(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") - set(LIBDDWAF_PACKAGE_PROCESSOR ${CMAKE_OSX_ARCHITECTURES} CACHE STRING "Alternative processor for packaging purposes") -else() - set(LIBDDWAF_PACKAGE_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Alternative processor for packaging purposes") +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE) endif() +message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") option(LIBDDWAF_BUILD_SHARED "Build shared library" ON) option(LIBDDWAF_BUILD_STATIC "Build shared library" ON) -option(LIBDDWAF_KEEP_SYMBOL_FILE "Keep the debug symbol file" ON) option(LIBDDWAF_TESTING "Load test subdirectories and targets" ON) option(LIBDDWAF_TEST_COVERAGE "Do coverage if possible" OFF) option(LIBDDWAF_VECTORIZED_TRANSFORMERS "Enable vectorization for transformers" ON) - -if(CMAKE_BUILD_TYPE STREQUAL "") - set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE) -endif() +option(LIBDDWAF_ENABLE_LTO "Enable link-time optimisation" OFF) if(NOT MSVC) - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Wall -Wextra -Wno-narrowing") - set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE} -Wall -Wextra -Wno-narrowing -ggdb") - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Wall -Wextra -Wno-narrowing -ggdb") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall -Wextra -Wno-narrowing") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE} -Wall -Wextra -Wno-narrowing -ggdb") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -Wno-narrowing -ggdb") + if (APPLE) + set(CMAKE_OSX_DEPLOYMENT_TARGET "10.12" CACHE STRING "Minimum OS X deployment version") + endif() + + add_compile_options(-Wall -Wextra -Wno-narrowing) + if (CMAKE_BUILD_TYPE MATCHES Debug OR CMAKE_BUILD_TYPE MATCHES RelWithDebInfo) + add_compile_options(-ggdb) + endif() else() - set(LIBDDWAF_MSVC_RUNTIME_LIBRARY "/MT" CACHE STRING "MSVC RuntimeLibrary") - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${LIBDDWAF_MSVC_RUNTIME_LIBRARY}") - set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE} ${LIBDDWAF_MSVC_RUNTIME_LIBRARY}") - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${LIBDDWAF_MSVC_RUNTIME_LIBRARY}d") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${LIBDDWAF_MSVC_RUNTIME_LIBRARY}") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE} ${LIBDDWAF_MSVC_RUNTIME_LIBRARY}") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${LIBDDWAF_MSVC_RUNTIME_LIBRARY}d") + if (CMAKE_BUILD_TYPE MATCHES Debug) + add_compile_options(/MTd) + else() + add_compile_options(/MT) + endif() - add_definitions(-D_CRT_SECURE_NO_WARNINGS=1 -Dstrdup=_strdup -Dputenv=_putenv) + add_compile_definitions(-D_CRT_SECURE_NO_WARNINGS=1 -Dstrdup=_strdup -Dputenv=_putenv) endif() include(GNUInstallDirs) -add_subdirectory(third_party) - add_library(glibc_compat OBJECT ${libddwaf_SOURCE_DIR}/src/glibc-compat/time64.c) set_target_properties(glibc_compat PROPERTIES POSITION_INDEPENDENT_CODE 1) -set(LIBDDWAF_SOURCE - ${libddwaf_SOURCE_DIR}/src/ruleset_builder.cpp - ${libddwaf_SOURCE_DIR}/src/clock.cpp - ${libddwaf_SOURCE_DIR}/src/parameter.cpp - ${libddwaf_SOURCE_DIR}/src/interface.cpp - ${libddwaf_SOURCE_DIR}/src/context.cpp - ${libddwaf_SOURCE_DIR}/src/context_allocator.cpp - ${libddwaf_SOURCE_DIR}/src/event.cpp - ${libddwaf_SOURCE_DIR}/src/object.cpp - ${libddwaf_SOURCE_DIR}/src/object_store.cpp - ${libddwaf_SOURCE_DIR}/src/collection.cpp - ${libddwaf_SOURCE_DIR}/src/expression.cpp - ${libddwaf_SOURCE_DIR}/src/ruleset_info.cpp - ${libddwaf_SOURCE_DIR}/src/ip_utils.cpp - ${libddwaf_SOURCE_DIR}/src/processor.cpp - ${libddwaf_SOURCE_DIR}/src/iterator.cpp - ${libddwaf_SOURCE_DIR}/src/log.cpp - ${libddwaf_SOURCE_DIR}/src/obfuscator.cpp - ${libddwaf_SOURCE_DIR}/src/utils.cpp - ${libddwaf_SOURCE_DIR}/src/waf.cpp - ${libddwaf_SOURCE_DIR}/src/exclusion/input_filter.cpp - ${libddwaf_SOURCE_DIR}/src/exclusion/object_filter.cpp - ${libddwaf_SOURCE_DIR}/src/exclusion/rule_filter.cpp - ${libddwaf_SOURCE_DIR}/src/generator/extract_schema.cpp - ${libddwaf_SOURCE_DIR}/src/parser/common.cpp - ${libddwaf_SOURCE_DIR}/src/parser/parser.cpp - ${libddwaf_SOURCE_DIR}/src/parser/parser_v1.cpp - ${libddwaf_SOURCE_DIR}/src/parser/parser_v2.cpp - ${libddwaf_SOURCE_DIR}/src/parser/rule_data_parser.cpp - ${libddwaf_SOURCE_DIR}/src/matcher/phrase_match.cpp - ${libddwaf_SOURCE_DIR}/src/matcher/regex_match.cpp - ${libddwaf_SOURCE_DIR}/src/matcher/is_sqli.cpp - ${libddwaf_SOURCE_DIR}/src/matcher/is_xss.cpp - ${libddwaf_SOURCE_DIR}/src/matcher/ip_match.cpp - ${libddwaf_SOURCE_DIR}/src/matcher/exact_match.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/lowercase.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/compress_whitespace.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/normalize_path.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/manager.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/remove_nulls.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/remove_comments.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/shell_unescape.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/unicode_normalize.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/url_basename.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/url_decode.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/url_querystring.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/url_path.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/base64_decode.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/base64_encode.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/css_decode.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/html_entity_decode.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/js_decode.cpp - ${libddwaf_SOURCE_DIR}/src/transformer/common/utf8.cpp - ${libddwaf_SOURCE_DIR}/src/libcxx-compat/monotonic_buffer_resource.cpp - ${libddwaf_SOURCE_DIR}/src/vendor/fmt/format.cc - ${libddwaf_SOURCE_DIR}/src/vendor/radixlib/radixlib.c - ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick/ac_fast.cxx - ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick/ac_slow.cxx - ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick/ac.cxx - ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src/xss.c - ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src/libinjection_html5.c - ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src/libinjection_xss.c - ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src/libinjection_sqli.c - ${libddwaf_SOURCE_DIR}/src/vendor/utf8proc/utf8proc.c - ${libddwaf_SOURCE_DIR}/src/vendor/re2/bitstate.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/compile.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/dfa.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/nfa.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/onepass.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/parse.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/perl_groups.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/prog.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/re2.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/regexp.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/simplify.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/stringpiece.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/tostring.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/unicode_casefold.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/unicode_groups.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/util/rune.cc - ${libddwaf_SOURCE_DIR}/src/vendor/re2/util/strutil.cc -) - -add_library(libddwaf_objects OBJECT ${LIBDDWAF_SOURCE}) - -# we need PIC even on the static lib,as it's expected to be linked in a shared lib -set_target_properties(libddwaf_objects PROPERTIES - CXX_STANDARD_REQUIRED YES - CXX_EXTENSIONS NO - POSITION_INDEPENDENT_CODE 1) - - try_compile(STDLIB_MAP_RECURSIVE ${CMAKE_CURRENT_BINARY_DIR} SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/try_rec_map.cpp CXX_STANDARD 20) -if(NOT STDLIB_MAP_RECURSIVE) - target_compile_definitions(libddwaf_objects PRIVATE HAS_NONRECURSIVE_UNORDERED_MAP) -endif() - -if (LIBDDWAF_VECTORIZED_TRANSFORMERS) - target_compile_definitions(libddwaf_objects PRIVATE LIBDDWAF_VECTORIZED_TRANSFORMERS) -endif() - -target_include_directories(libddwaf_objects PUBLIC ${libddwaf_SOURCE_DIR}/include) -target_include_directories(libddwaf_objects PRIVATE ${libddwaf_SOURCE_DIR}/src) -target_include_directories(libddwaf_objects PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor) -target_include_directories(libddwaf_objects PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src/) -target_include_directories(libddwaf_objects PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/radixlib/) -target_include_directories(libddwaf_objects PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick/) -target_include_directories(libddwaf_objects PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/utf8proc/) -target_include_directories(libddwaf_objects PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/re2/) - -install(FILES ${libddwaf_SOURCE_DIR}/include/ddwaf.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - -target_compile_definitions(libddwaf_objects PRIVATE UTF8PROC_STATIC=1) # System dependencies set(LIBDDWAF_INTERFACE_LIBRARIES "") -set(LIBDDWAF_PRIVATE_LIBRARIES "") - -if (MSVC) - target_compile_definitions(libddwaf_objects PRIVATE NOMINMAX) -endif() - -if(NOT MSVC AND LIBDDWAF_TEST_COVERAGE) - target_compile_options(libddwaf_objects PRIVATE -ggdb --coverage) - list(APPEND LIBDDWAF_PRIVATE_LIBRARIES gcov) -endif() - -if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") -# needed for glibc <2.17. We compile with --as-needed so the library won't -# be linked on more 2.17+, but dependencies might since it's part of the public -# interface of the target +if(LINUX) find_library(LIBPTHREAD pthread) - if (LIBPTHREAD) - list(APPEND LIBDDWAF_INTERFACE_LIBRARIES pthread) - endif() find_library(LIBRT rt) - if (LIBRT) - list(APPEND LIBDDWAF_INTERFACE_LIBRARIES rt) - endif() find_library(LIBDL dl) - if (LIBDL) - list(APPEND LIBDDWAF_INTERFACE_LIBRARIES dl) - endif() # there is also a call to ceilf that may or may not result in an import. # The symbol might live in libm or the normal libc (e.g. musl). In any case, # add the dependency find_library(LIBM m) - if (LIBM) - list(APPEND LIBDDWAF_INTERFACE_LIBRARIES m) - endif() + + set(LIBDDWAF_INTERFACE_LIBRARIES + $<$:pthread> + $<$:rt> + $<$:dl> + $<$:m>) elseif(WIN32) list(APPEND LIBDDWAF_INTERFACE_LIBRARIES ws2_32) endif() -target_link_libraries(libddwaf_objects - PRIVATE ${LIBDDWAF_PRIVATE_LIBRARIES} - INTERFACE ${LIBDDWAF_INTERFACE_LIBRARIES}) +set(LIBDDWAF_PRIVATE_LIBRARIES "") +if(NOT MSVC AND LIBDDWAF_TESTING AND LIBDDWAF_TEST_COVERAGE) + list(APPEND LIBDDWAF_PRIVATE_LIBRARIES gcov) +endif() -if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - target_link_options(libddwaf_objects PRIVATE -Wl,--as-needed) +if (LIBDDWAF_ENABLE_LTO) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto") endif() +include(cmake/objects.cmake) + # Static library if (LIBDDWAF_BUILD_STATIC) - add_library(libddwaf_static STATIC $) - target_link_libraries(libddwaf_static INTERFACE ${LIBDDWAF_INTERFACE_LIBRARIES}) - if (NOT MSVC) - set_target_properties(libddwaf_static PROPERTIES OUTPUT_NAME ddwaf) - else() - set_target_properties(libddwaf_static PROPERTIES OUTPUT_NAME ddwaf_static) - endif() - - install(TARGETS libddwaf_static EXPORT libddwaf-config - DESTINATION ${CMAKE_INSTALL_LIBDIR} - INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - - # Post-processing on the static library - if(CMAKE_SYSTEM_NAME STREQUAL Linux) - add_custom_command(TARGET libddwaf_static POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ar_comb - COMMAND ${CMAKE_COMMAND} -E chdir ar_comb ${CMAKE_AR} -x $ - COMMAND ${CMAKE_COMMAND} -E copy $ ar_comb - COMMAND ${CMAKE_AR} -qcs ar_comb/combined${CMAKE_STATIC_LIBRARY_SUFFIX} ar_comb/*.o* - - COMMAND ${CMAKE_COMMAND} -E copy ar_comb/combined${CMAKE_STATIC_LIBRARY_SUFFIX} $ - COMMAND rm -rf ar_comb - WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) - endif() - if(NOT (CMAKE_BUILD_TYPE MATCHES Debug)) - if (CMAKE_SYSTEM_NAME STREQUAL Darwin OR CMAKE_SYSTEM_NAME STREQUAL Linux) - if (NOT CMAKE_STRIP) - find_program(STRIP strip) - if (STRIP STREQUAL "STRIP-NOTFOUND") - message(FATAL_ERROR "strip not found") - endif() - else() - set(STRIP ${CMAKE_STRIP}) - endif() - add_custom_command(TARGET libddwaf_static POST_BUILD - COMMAND ${STRIP} -x -S $ -o $.stripped) - install(FILES $.stripped DESTINATION ${CMAKE_INSTALL_LIBDIR}) - endif() - endif() + include(cmake/static.cmake) endif() # Shared library if (LIBDDWAF_BUILD_SHARED) - execute_process(COMMAND git rev-parse HEAD - WORKING_DIRECTORY ${libddwaf_SOURCE_DIR} - OUTPUT_VARIABLE BUILD_ID - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - message(STATUS "Build id is ${BUILD_ID}") - string(SUBSTRING "${BUILD_ID}" 0 2 BUILD_ID_PREFIX) - string(SUBSTRING "${BUILD_ID}" 2 39 BUILD_ID_SUFFIX) - - add_library(libddwaf_shared SHARED - $ $<$:libddwaf.def>) - set_target_properties(libddwaf_shared PROPERTIES OUTPUT_NAME ddwaf) - - install(TARGETS libddwaf_shared EXPORT libddwaf-config - DESTINATION ${CMAKE_INSTALL_LIBDIR} - INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - - if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") - target_link_libraries(libddwaf_shared PRIVATE -Wl,-undefined,error libddwaf_objects) - elseif(NOT MSVC) - target_link_libraries(libddwaf_shared PUBLIC ${LIBDDWAF_INTERFACE_LIBRARIES}) - target_link_libraries(libddwaf_shared PRIVATE - -Wl,--no-undefined - -Wl,-version-script=${libddwaf_SOURCE_DIR}/libddwaf.version - -Wl,--build-id=0x${BUILD_ID} - ${LIBDDWAF_PRIVATE_LIBRARIES} - ${LIBDDWAF_SHARED_LINKER_FLAGS} - glibc_compat) - else() - target_link_libraries(libddwaf_shared - PRIVATE ${LIBDDWAF_PRIVATE_LIBRARIES} - PUBLIC ${LIBDDWAF_INTERFACE_LIBRARIES}) - endif() - - if(NOT (CMAKE_BUILD_TYPE MATCHES Debug)) - if (CMAKE_SYSTEM_NAME STREQUAL Darwin) - # Ensure that dsymutil and strip is present - find_program(DSYMUTIL dsymutil) - if (DSYMUTIL STREQUAL "DSYMUTIL-NOTFOUND") - message(FATAL_ERROR "dsymutil not found") - endif() - find_program(STRIP strip) - if (STRIP STREQUAL "STRIP-NOTFOUND") - message(FATAL_ERROR "strip not found") - endif() - - if(LIBDDWAF_KEEP_SYMBOL_FILE) - set(SYMBOL_FILE $.dwarf) - add_custom_command(TARGET libddwaf_shared POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy $ ${SYMBOL_FILE} - COMMAND ${DSYMUTIL} --flat --minimize ${SYMBOL_FILE} - COMMAND ${STRIP} -S -x $ - COMMAND rm ${SYMBOL_FILE} - COMMAND mv ${SYMBOL_FILE}.dwarf ${SYMBOL_FILE}) - else() - add_custom_command(TARGET libddwaf_shared POST_BUILD - COMMAND ${STRIP} -S -x $) - endif() - elseif(NOT WIN32) - if(LIBDDWAF_KEEP_SYMBOL_FILE) - set(SYMBOL_FILE $.debug) - add_custom_command(TARGET libddwaf_shared POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy $ ${SYMBOL_FILE} - COMMAND ${CMAKE_STRIP} --only-keep-debug ${SYMBOL_FILE} - COMMAND ${CMAKE_STRIP} $) - else() - add_custom_command(TARGET libddwaf_shared POST_BUILD - COMMAND ${CMAKE_STRIP} $) - endif() - endif() - - if(LIBDDWAF_KEEP_SYMBOL_FILE AND NOT WIN32) - install(FILES ${SYMBOL_FILE} - DESTINATION ${CMAKE_INSTALL_LIBDIR}/.build-id/${BUILD_ID_PREFIX} - RENAME ${BUILD_ID_SUFFIX}.debug) - endif() - endif() - - if (MSVC) - install(FILES $ DESTINATION lib OPTIONAL) - endif() + include(cmake/shared.cmake) endif() -# Packaging -install(EXPORT libddwaf-config - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/cmake/libddwaf) -set(CPACK_PACKAGE_VENDOR "libddwaf") -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "DataDog WAF Library") -set(CPACK_RESOURCE_FILE_README "${libddwaf_SOURCE_DIR}/README.md") -set(CPACK_GENERATOR "TGZ") -set(CPACK_SOURCE_GENERATOR "TGZ") - -## Package name -execute_process(COMMAND git describe --exact-match --tags HEAD - WORKING_DIRECTORY ${libddwaf_SOURCE_DIR} - OUTPUT_VARIABLE DDWAF_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE - ERROR_QUIET -) - -if (NOT DDWAF_VERSION) - set(DDWAF_VERSION ${CMAKE_PROJECT_VERSION}) - execute_process(COMMAND git rev-parse --short HEAD - WORKING_DIRECTORY ${libddwaf_SOURCE_DIR} - OUTPUT_VARIABLE SHORT_BUILD_ID - OUTPUT_STRIP_TRAILING_WHITESPACE - ) -endif() - -set(CPACK_PACKAGE_FILE_NAME ${CMAKE_PROJECT_NAME}-${DDWAF_VERSION}-${CMAKE_SYSTEM_NAME}-${LIBDDWAF_PACKAGE_PROCESSOR}) -if(LIBDDWAF_PACKAGE_SUFFIX MATCHES ".+") - set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_FILE_NAME}-${LIBDDWAF_PACKAGE_SUFFIX}) -endif() - -if (SHORT_BUILD_ID) - set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_FILE_NAME}-${SHORT_BUILD_ID}) -endif() -string(TOLOWER ${CPACK_PACKAGE_FILE_NAME} CPACK_PACKAGE_FILE_NAME) - -set(CPACK_WARN_ON_ABSOLUTE_INSTALL_DESTINATION TRUE) - -include(CPack) +include(cmake/package.cmake) if (LIBDDWAF_TESTING) + add_subdirectory(third_party EXCLUDE_FROM_ALL) add_subdirectory(tests EXCLUDE_FROM_ALL) add_subdirectory(validator EXCLUDE_FROM_ALL) add_subdirectory(perf EXCLUDE_FROM_ALL) @@ -405,4 +108,3 @@ if (LIBDDWAF_TESTING) include(cmake/clang-tidy.cmake) include(cmake/clang-format.cmake) endif() -# vim: set et: diff --git a/cmake/objects.cmake b/cmake/objects.cmake new file mode 100644 index 000000000..cdc4b0ae8 --- /dev/null +++ b/cmake/objects.cmake @@ -0,0 +1,145 @@ +set(LIBDDWAF_SOURCE + ${libddwaf_SOURCE_DIR}/src/ruleset_builder.cpp + ${libddwaf_SOURCE_DIR}/src/clock.cpp + ${libddwaf_SOURCE_DIR}/src/parameter.cpp + ${libddwaf_SOURCE_DIR}/src/interface.cpp + ${libddwaf_SOURCE_DIR}/src/context.cpp + ${libddwaf_SOURCE_DIR}/src/context_allocator.cpp + ${libddwaf_SOURCE_DIR}/src/event.cpp + ${libddwaf_SOURCE_DIR}/src/object.cpp + ${libddwaf_SOURCE_DIR}/src/object_store.cpp + ${libddwaf_SOURCE_DIR}/src/collection.cpp + ${libddwaf_SOURCE_DIR}/src/expression.cpp + ${libddwaf_SOURCE_DIR}/src/ruleset_info.cpp + ${libddwaf_SOURCE_DIR}/src/ip_utils.cpp + ${libddwaf_SOURCE_DIR}/src/processor.cpp + ${libddwaf_SOURCE_DIR}/src/iterator.cpp + ${libddwaf_SOURCE_DIR}/src/log.cpp + ${libddwaf_SOURCE_DIR}/src/obfuscator.cpp + ${libddwaf_SOURCE_DIR}/src/utils.cpp + ${libddwaf_SOURCE_DIR}/src/waf.cpp + ${libddwaf_SOURCE_DIR}/src/exclusion/input_filter.cpp + ${libddwaf_SOURCE_DIR}/src/exclusion/object_filter.cpp + ${libddwaf_SOURCE_DIR}/src/exclusion/rule_filter.cpp + ${libddwaf_SOURCE_DIR}/src/generator/extract_schema.cpp + ${libddwaf_SOURCE_DIR}/src/parser/common.cpp + ${libddwaf_SOURCE_DIR}/src/parser/parser.cpp + ${libddwaf_SOURCE_DIR}/src/parser/parser_v1.cpp + ${libddwaf_SOURCE_DIR}/src/parser/parser_v2.cpp + ${libddwaf_SOURCE_DIR}/src/parser/rule_data_parser.cpp + ${libddwaf_SOURCE_DIR}/src/matcher/phrase_match.cpp + ${libddwaf_SOURCE_DIR}/src/matcher/regex_match.cpp + ${libddwaf_SOURCE_DIR}/src/matcher/is_sqli.cpp + ${libddwaf_SOURCE_DIR}/src/matcher/is_xss.cpp + ${libddwaf_SOURCE_DIR}/src/matcher/ip_match.cpp + ${libddwaf_SOURCE_DIR}/src/matcher/exact_match.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/lowercase.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/compress_whitespace.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/normalize_path.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/manager.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/remove_nulls.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/remove_comments.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/shell_unescape.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/unicode_normalize.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/url_basename.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/url_decode.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/url_querystring.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/url_path.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/base64_decode.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/base64_encode.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/css_decode.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/html_entity_decode.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/js_decode.cpp + ${libddwaf_SOURCE_DIR}/src/transformer/common/utf8.cpp + ${libddwaf_SOURCE_DIR}/src/libcxx-compat/monotonic_buffer_resource.cpp + ${libddwaf_SOURCE_DIR}/src/vendor/fmt/format.cc + ${libddwaf_SOURCE_DIR}/src/vendor/radixlib/radixlib.c + ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick/ac_fast.cxx + ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick/ac_slow.cxx + ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick/ac.cxx + ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src/xss.c + ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src/libinjection_html5.c + ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src/libinjection_xss.c + ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src/libinjection_sqli.c + ${libddwaf_SOURCE_DIR}/src/vendor/utf8proc/utf8proc.c + ${libddwaf_SOURCE_DIR}/src/vendor/re2/bitstate.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/compile.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/dfa.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/nfa.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/onepass.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/parse.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/perl_groups.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/prog.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/re2.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/regexp.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/simplify.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/stringpiece.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/tostring.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/unicode_casefold.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/unicode_groups.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/util/rune.cc + ${libddwaf_SOURCE_DIR}/src/vendor/re2/util/strutil.cc +) + +set(LIBDDWAF_PUBLIC_INCLUDES ${libddwaf_SOURCE_DIR}/include) + +set(LIBDDWAF_PRIVATE_INCLUDES + ${libddwaf_SOURCE_DIR}/src + ${libddwaf_SOURCE_DIR}/src/vendor + ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src/ + ${libddwaf_SOURCE_DIR}/src/vendor/radixlib/ + ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick/ + ${libddwaf_SOURCE_DIR}/src/vendor/utf8proc/ + ${libddwaf_SOURCE_DIR}/src/vendor/re2/) + +function(gen_objects target_name) + add_library(${target_name} OBJECT ${LIBDDWAF_SOURCE}) + +# we need PIC even on the static lib,as it's expected to be linked in a shared lib + set_target_properties(${target_name} PROPERTIES + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO + POSITION_INDEPENDENT_CODE 1) + + if(NOT STDLIB_MAP_RECURSIVE) + target_compile_definitions(${target_name} PRIVATE HAS_NONRECURSIVE_UNORDERED_MAP) + endif() + + if (LIBDDWAF_VECTORIZED_TRANSFORMERS) + target_compile_definitions(${target_name} PRIVATE LIBDDWAF_VECTORIZED_TRANSFORMERS) + endif() + + target_include_directories(${target_name} PUBLIC ${LIBDDWAF_PUBLIC_INCLUDES}) + target_include_directories(${target_name} PRIVATE ${LIBDDWAF_PRIVATE_INCLUDES}) + + target_compile_definitions(${target_name} PRIVATE UTF8PROC_STATIC=1) + if (MSVC) + target_compile_definitions(${target_name} PRIVATE NOMINMAX) + endif() + + target_link_libraries(${target_name} + PRIVATE ${LIBDDWAF_PRIVATE_LIBRARIES} + INTERFACE ${LIBDDWAF_INTERFACE_LIBRARIES}) +endfunction() + +gen_objects(libddwaf_objects) +add_library(libddwaf_shared_objects ALIAS libddwaf_objects) + +if (LIBDDWAF_ENABLE_LTO) + target_compile_options(libddwaf_objects PRIVATE -flto) + + # If LTO is enabled, we can't use objects with -flto to generate a static + # library, as the contents of the object is an intermediate representation. + # This can be solved (in theory) using -ffat-lto-objects, but clang < 18 + # doesn't currently support this, so we need to generate separate objects + # specifically for the static build. + gen_objects(libddwaf_static_objects) +else() + add_library(libddwaf_static_objects ALIAS libddwaf_objects) +endif() + +if(NOT MSVC AND LIBDDWAF_TESTING AND LIBDDWAF_TEST_COVERAGE) + target_compile_options(libddwaf_objects PRIVATE --coverage) +endif() + + diff --git a/cmake/package.cmake b/cmake/package.cmake new file mode 100644 index 000000000..9c39c398c --- /dev/null +++ b/cmake/package.cmake @@ -0,0 +1,44 @@ +# Packaging + +install(FILES ${libddwaf_SOURCE_DIR}/include/ddwaf.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +install(EXPORT libddwaf-config DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/cmake/libddwaf) + +if(APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + set(LIBDDWAF_PACKAGE_PROCESSOR ${CMAKE_OSX_ARCHITECTURES} CACHE STRING "Alternative processor for packaging purposes") +else() + set(LIBDDWAF_PACKAGE_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Alternative processor for packaging purposes") +endif() + +set(CPACK_PACKAGE_VENDOR "libddwaf") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "DataDog WAF Library") +set(CPACK_RESOURCE_FILE_README "${libddwaf_SOURCE_DIR}/README.md") +set(CPACK_GENERATOR "TGZ") +set(CPACK_SOURCE_GENERATOR "TGZ") + +## Package name +execute_process(COMMAND git describe --exact-match --tags HEAD + WORKING_DIRECTORY ${libddwaf_SOURCE_DIR} + OUTPUT_VARIABLE DDWAF_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + +if (NOT DDWAF_VERSION) + set(DDWAF_VERSION ${CMAKE_PROJECT_VERSION}) + execute_process(COMMAND git rev-parse --short HEAD + WORKING_DIRECTORY ${libddwaf_SOURCE_DIR} + OUTPUT_VARIABLE SHORT_BUILD_ID + OUTPUT_STRIP_TRAILING_WHITESPACE + ) +endif() + +set(CPACK_PACKAGE_FILE_NAME ${CMAKE_PROJECT_NAME}-${DDWAF_VERSION}-${CMAKE_SYSTEM_NAME}-${LIBDDWAF_PACKAGE_PROCESSOR}) +if (SHORT_BUILD_ID) + set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_FILE_NAME}-${SHORT_BUILD_ID}) +endif() +string(TOLOWER ${CPACK_PACKAGE_FILE_NAME} CPACK_PACKAGE_FILE_NAME) + +set(CPACK_WARN_ON_ABSOLUTE_INSTALL_DESTINATION TRUE) + +include(CPack) + diff --git a/cmake/shared.cmake b/cmake/shared.cmake new file mode 100644 index 000000000..536ca54ce --- /dev/null +++ b/cmake/shared.cmake @@ -0,0 +1,83 @@ +execute_process(COMMAND git rev-parse HEAD + WORKING_DIRECTORY ${libddwaf_SOURCE_DIR} + OUTPUT_VARIABLE BUILD_ID + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +message(STATUS "Build id is ${BUILD_ID}") +string(SUBSTRING "${BUILD_ID}" 0 2 BUILD_ID_PREFIX) +string(SUBSTRING "${BUILD_ID}" 2 39 BUILD_ID_SUFFIX) + +add_library(libddwaf_shared SHARED + $ $<$:libddwaf.def>) +set_target_properties(libddwaf_shared PROPERTIES OUTPUT_NAME ddwaf) + +install(TARGETS libddwaf_shared EXPORT libddwaf-config + DESTINATION ${CMAKE_INSTALL_LIBDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +if(LINUX) + target_link_libraries(libddwaf_shared PUBLIC ${LIBDDWAF_INTERFACE_LIBRARIES}) + target_link_libraries(libddwaf_shared PRIVATE + $<$:-flto> + -Wl,--no-undefined + -Wl,-version-script=${libddwaf_SOURCE_DIR}/libddwaf.version + -Wl,--build-id=0x${BUILD_ID} + ${LIBDDWAF_PRIVATE_LIBRARIES} + -static-libstdc++ + glibc_compat) + + if(NOT (CMAKE_BUILD_TYPE MATCHES Debug)) + set(SYMBOL_FILE $.debug) + add_custom_command(TARGET libddwaf_shared POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy $ ${SYMBOL_FILE} + COMMAND ${CMAKE_STRIP} --only-keep-debug ${SYMBOL_FILE} + COMMAND ${CMAKE_STRIP} $) + + install(FILES ${SYMBOL_FILE} + DESTINATION ${CMAKE_INSTALL_LIBDIR}/.build-id/${BUILD_ID_PREFIX} + RENAME ${BUILD_ID_SUFFIX}.debug) + endif() +elseif (APPLE) + target_link_libraries(libddwaf_shared PRIVATE -Wl,-undefined,error libddwaf_shared_objects) + + if(NOT (CMAKE_BUILD_TYPE MATCHES Debug)) + # Ensure that dsymutil and strip is present + find_program(DSYMUTIL dsymutil) + if (DSYMUTIL STREQUAL "DSYMUTIL-NOTFOUND") + message(FATAL_ERROR "dsymutil not found") + endif() + find_program(STRIP strip) + if (STRIP STREQUAL "STRIP-NOTFOUND") + message(FATAL_ERROR "strip not found") + endif() + + set(SYMBOL_FILE $.dwarf) + add_custom_command(TARGET libddwaf_shared POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy $ ${SYMBOL_FILE} + COMMAND ${DSYMUTIL} --flat --minimize ${SYMBOL_FILE} + COMMAND ${STRIP} -S -x $ + COMMAND rm ${SYMBOL_FILE} + COMMAND mv ${SYMBOL_FILE}.dwarf ${SYMBOL_FILE}) + + install(FILES ${SYMBOL_FILE} + DESTINATION ${CMAKE_INSTALL_LIBDIR}/.build-id/${BUILD_ID_PREFIX} + RENAME ${BUILD_ID_SUFFIX}.debug) + endif() +elseif (MSVC) + target_link_libraries(libddwaf_shared + PRIVATE ${LIBDDWAF_PRIVATE_LIBRARIES} + PUBLIC ${LIBDDWAF_INTERFACE_LIBRARIES}) + + install(FILES $ DESTINATION lib OPTIONAL) +elseif (MINGW) + target_link_libraries(libddwaf_shared PUBLIC ${LIBDDWAF_INTERFACE_LIBRARIES}) + target_link_libraries(libddwaf_shared PRIVATE + $<$:-flto> + -Wl,--no-undefined + -Wl,-version-script=${libddwaf_SOURCE_DIR}/libddwaf.version + -Wl,--build-id=0x${BUILD_ID} + ${LIBDDWAF_PRIVATE_LIBRARIES} + -static-libstdc++ + glibc_compat) +endif() diff --git a/cmake/static.cmake b/cmake/static.cmake new file mode 100644 index 000000000..5fe4c6147 --- /dev/null +++ b/cmake/static.cmake @@ -0,0 +1,41 @@ +add_library(libddwaf_static STATIC $) + +target_link_libraries(libddwaf_static INTERFACE ${LIBDDWAF_INTERFACE_LIBRARIES}) +if (NOT MSVC) + set_target_properties(libddwaf_static PROPERTIES OUTPUT_NAME ddwaf) +else() + set_target_properties(libddwaf_static PROPERTIES OUTPUT_NAME ddwaf_static) +endif() + +install(TARGETS libddwaf_static EXPORT libddwaf-config + DESTINATION ${CMAKE_INSTALL_LIBDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +# Post-processing on the static library +if(LINUX) + add_dependencies(libddwaf_static glibc_compat) + add_custom_command(TARGET libddwaf_static POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ar_comb + COMMAND ${CMAKE_COMMAND} -E chdir ar_comb ${CMAKE_AR} -x $ + COMMAND ${CMAKE_COMMAND} -E copy $ ar_comb + COMMAND ${CMAKE_AR} -qcs ar_comb/combined${CMAKE_STATIC_LIBRARY_SUFFIX} ar_comb/*.o* + + COMMAND ${CMAKE_COMMAND} -E copy ar_comb/combined${CMAKE_STATIC_LIBRARY_SUFFIX} $ + COMMAND rm -rf ar_comb + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) +endif() + +if(NOT (CMAKE_BUILD_TYPE MATCHES Debug) AND (APPLE OR LINUX)) + if (NOT CMAKE_STRIP) + find_program(STRIP strip) + if (STRIP STREQUAL "STRIP-NOTFOUND") + message(FATAL_ERROR "strip not found") + endif() + else() + set(STRIP ${CMAKE_STRIP}) + endif() + add_custom_command(TARGET libddwaf_static POST_BUILD + COMMAND ${STRIP} -x -S $ -o $.stripped) + install(FILES $.stripped DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + diff --git a/docker/libddwaf/build/Dockerfile b/docker/libddwaf/build/Dockerfile index 034f4df20..310adfbae 100644 --- a/docker/libddwaf/build/Dockerfile +++ b/docker/libddwaf/build/Dockerfile @@ -9,10 +9,11 @@ RUN cd build && cmake \ -G Ninja \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_TOOLCHAIN_FILE=/libddwaf/docker/libddwaf/sysroot/Toolchain.cmake.${ARCH} \ + -DLIBDDWAF_ENABLE_LTO=ON \ ../libddwaf && ninja RUN patchelf --remove-needed $(basename /sysroot/${ARCH}-none-linux-musl/lib/libc.musl-*.so.1) /build/libddwaf.so -RUN ninja -C build waf_test waf_validator +RUN ninja -v -C build waf_test waf_validator RUN cd libddwaf/tests && qemu-$ARCH-static /build/tests/waf_test RUN cd libddwaf/validator && qemu-$ARCH-static /build/validator/waf_validator @@ -26,7 +27,7 @@ RUN cd /build && \ # Strip archive RUN cd /build && stripcmd=$(egrep -o "/usr/bin/[a-z0-9\_-]*-strip" /libddwaf/docker/libddwaf/sysroot/Toolchain.cmake.${ARCH}) ; $stripcmd -x -S libddwaf.a -o libddwaf.a.stripped -RUN cd /build && ninja package && \ +RUN cd /build && ninja -v package && \ new_name=$(ls libddwaf-*.tar.gz | head -n1 | sed "s/\(libddwaf-[\.0-9]*\)-linux-\([_a-zA-Z0-9]*\)\(-\?[a-zA-Z0-9]*.tar.gz\)/\1-\2-linux-musl\3/g") ; \ mv libddwaf-*.tar.gz "$new_name" diff --git a/fuzzing/CMakeLists.txt b/fuzzing/CMakeLists.txt index 4a47f99f3..f4d0d6b68 100644 --- a/fuzzing/CMakeLists.txt +++ b/fuzzing/CMakeLists.txt @@ -7,19 +7,13 @@ set_target_properties(fuzzer PROPERTIES CXX_EXTENSIONS NO) target_include_directories(fuzzer PRIVATE ${libddwaf_SOURCE_DIR}/fuzzing/tools) -target_include_directories(fuzzer PRIVATE ${libddwaf_SOURCE_DIR}/include) -target_include_directories(fuzzer PRIVATE ${libddwaf_SOURCE_DIR}/src) -target_include_directories(fuzzer PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor) -target_include_directories(fuzzer PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src) -target_include_directories(fuzzer PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/radixlib) -target_include_directories(fuzzer PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick) -target_include_directories(fuzzer PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/utf8proc) +target_include_directories(fuzzer PRIVATE ${LIBDDWAF_PUBLIC_INCLUDES} ${LIBDDWAF_PRIVATE_INCLUDES}) + set_target_properties(fuzzer PROPERTIES COMPILE_FLAGS "-fsanitize=fuzzer,address,undefined,leak -fprofile-instr-generate -fcoverage-mapping") set_target_properties(fuzzer PROPERTIES LINK_FLAGS "-fsanitize=fuzzer,address,undefined,leak -fprofile-instr-generate -fcoverage-mapping") -target_link_libraries(fuzzer - PRIVATE ${LIBDDWAF_PRIVATE_LIBRARIES} ${LIBDDWAF_INTERFACE_LIBRARIES} - ${LIBDDWAF_EXE_LINKER_FLAGS} lib_yamlcpp) +target_link_libraries(fuzzer PRIVATE + ${LIBDDWAF_PRIVATE_LIBRARIES} ${LIBDDWAF_INTERFACE_LIBRARIES} lib_yamlcpp) diff --git a/fuzzing/build.sh b/fuzzing/build.sh index 21cb887b8..18c8d116d 100755 --- a/fuzzing/build.sh +++ b/fuzzing/build.sh @@ -1,8 +1,8 @@ #!/bin/bash set -eu -export CC=clang-15 -export CXX=clang++-15 +export CC=clang-17 +export CXX=clang++-17 rm -rf build && mkdir build && cd build diff --git a/fuzzing/scripts/show_coverage.sh b/fuzzing/scripts/show_coverage.sh index 0c410aac2..07e532cc3 100755 --- a/fuzzing/scripts/show_coverage.sh +++ b/fuzzing/scripts/show_coverage.sh @@ -3,13 +3,13 @@ set -eu cd fuzzing -llvm-profdata-15 merge -sparse *.profraw -o default.profdata -llvm-cov-15 show fuzzer -instr-profile=default.profdata -ignore-filename-regex="(vendor|fuzzing|third_party)" -format=html > coverage.html -llvm-cov-15 report -instr-profile default.profdata fuzzer -ignore-filename-regex="(vendor|fuzzing|third_party)" -show-region-summary=false +llvm-profdata-17 merge -sparse *.profraw -o default.profdata +llvm-cov-17 show fuzzer -instr-profile=default.profdata -ignore-filename-regex="(vendor|fuzzing|third_party)" -format=html > coverage.html +llvm-cov-17 report -instr-profile default.profdata fuzzer -ignore-filename-regex="(vendor|fuzzing|third_party)" -show-region-summary=false if [ ! -z ${1:-} ]; then THRESHOLD=$1 - TOTAL=$(llvm-cov-15 report -instr-profile default.profdata fuzzer -ignore-filename-regex="(vendor|fuzzing|third_party)" -show-region-summary=false | grep TOTAL) + TOTAL=$(llvm-cov-17 report -instr-profile default.profdata fuzzer -ignore-filename-regex="(vendor|fuzzing|third_party)" -show-region-summary=false | grep TOTAL) ARRAY=($TOTAL) COVERAGE=$(echo ${ARRAY[3]} | sed -e "s/\.[[:digit:]]*%//g") diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt index 5c92e7b25..d508d173c 100644 --- a/perf/CMakeLists.txt +++ b/perf/CMakeLists.txt @@ -4,6 +4,7 @@ list(REMOVE_ITEM LIBDDWAF_BENCHMARK_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/benchcmp. list(REMOVE_ITEM LIBDDWAF_BENCHMARK_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/benchmerge.cpp) add_executable(benchmark ${LIBDDWAF_BENCHMARK_SOURCE}) +target_compile_options(benchmark PRIVATE $<$:-flto>) target_link_libraries(benchmark PRIVATE libddwaf_objects lib_yamlcpp lib_rapidjson m) target_include_directories(benchmark PRIVATE ${libddwaf_SOURCE_DIR}/src) diff --git a/perf/utils.cpp b/perf/utils.cpp index d1ee4ef3b..993e2a956 100644 --- a/perf/utils.cpp +++ b/perf/utils.cpp @@ -25,6 +25,9 @@ void debug_str_helper(std::string &res, const ddwaf_object &p) case DDWAF_OBJ_INVALID: res += ""; break; + case DDWAF_OBJ_NULL: + res += ""; + break; case DDWAF_OBJ_BOOL: res += p.boolean ? "true" : "false"; break; @@ -34,6 +37,9 @@ void debug_str_helper(std::string &res, const ddwaf_object &p) case DDWAF_OBJ_UNSIGNED: res += std::to_string(p.uintValue); break; + case DDWAF_OBJ_FLOAT: + res += std::to_string(p.f64); + break; case DDWAF_OBJ_STRING: res += '"'; res += std::string_view{p.stringValue, p.nbEntries}; @@ -79,6 +85,9 @@ ddwaf_object object_dup(const ddwaf_object &o) noexcept case DDWAF_OBJ_INVALID: ddwaf_object_invalid(©); break; + case DDWAF_OBJ_NULL: + ddwaf_object_null(©); + break; case DDWAF_OBJ_BOOL: ddwaf_object_bool(©, o.boolean); break; @@ -88,6 +97,9 @@ ddwaf_object object_dup(const ddwaf_object &o) noexcept case DDWAF_OBJ_UNSIGNED: ddwaf_object_unsigned(©, o.uintValue); break; + case DDWAF_OBJ_FLOAT: + ddwaf_object_float(©, o.f64); + break; case DDWAF_OBJ_STRING: ddwaf_object_stringl(©, o.stringValue, o.nbEntries); break; diff --git a/perf/yaml_helpers.cpp b/perf/yaml_helpers.cpp index 8d622f28e..0effd2302 100644 --- a/perf/yaml_helpers.cpp +++ b/perf/yaml_helpers.cpp @@ -5,6 +5,7 @@ // (https://www.datadoghq.com/). Copyright 2022 Datadog, Inc. #include "yaml_helpers.hpp" +#include namespace YAML { @@ -70,6 +71,9 @@ YAML::Emitter &operator<<(YAML::Emitter &out, const ddwaf_object &o) case DDWAF_OBJ_UNSIGNED: out << o.uintValue; break; + case DDWAF_OBJ_FLOAT: + out << o.f64; + break; case DDWAF_OBJ_STRING: out << o.stringValue; break; @@ -87,7 +91,9 @@ YAML::Emitter &operator<<(YAML::Emitter &out, const ddwaf_object &o) out << YAML::EndMap; break; case DDWAF_OBJ_INVALID: - throw std::runtime_error("Invalid object"); + case DDWAF_OBJ_NULL: + out << YAML::Null; + break; } return out; diff --git a/src/generator/extract_schema.cpp b/src/generator/extract_schema.cpp index 94f9e9644..a75c11a44 100644 --- a/src/generator/extract_schema.cpp +++ b/src/generator/extract_schema.cpp @@ -300,7 +300,7 @@ base_node generate_helper(const ddwaf_object *object, std::string_view key, continue; } - std::string_view const key{ + const std::string_view key{ child->parameterName, static_cast(child->parameterNameLength)}; auto schema = generate_helper(child, key, scanners, depth - 1, deadline); diff --git a/src/log.hpp b/src/log.hpp index dd4861f76..cd5e2d73e 100644 --- a/src/log.hpp +++ b/src/log.hpp @@ -51,16 +51,17 @@ constexpr const char *base_name(const char *path) } # define DDWAF_LOG_HELPER(level, function, file, line, fmt_str, ...) \ - { \ - if (ddwaf::logger::valid(level)) { \ - constexpr const char *filename = base_name(file); \ - auto message = ddwaf::fmt::format(fmt_str, ##__VA_ARGS__); \ - ddwaf::logger::log(level, function, filename, line, message.c_str(), message.size()); \ - } \ - } + { \ + if (ddwaf::logger::valid(level)) { \ + constexpr const char *filename = base_name(file); \ + auto message = ddwaf::fmt::format(fmt_str, ##__VA_ARGS__); \ + ddwaf::logger::log( \ + level, function, filename, line, message.c_str(), message.size()); \ + } \ + } # define DDWAF_LOG(level, fmt, ...) \ - DDWAF_LOG_HELPER(level, __func__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) + DDWAF_LOG_HELPER(level, __func__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) #endif #if DDWAF_COMPILE_LOG_LEVEL <= DDWAF_COMPILE_LOG_TRACE diff --git a/src/transformer/lowercase.cpp b/src/transformer/lowercase.cpp index 517265c4d..abab427f8 100644 --- a/src/transformer/lowercase.cpp +++ b/src/transformer/lowercase.cpp @@ -44,7 +44,7 @@ bool lowercase::needs_transform(std::string_view str) const __m128i sse_mask_lower_bound = _mm_set1_epi8('A'); const __m128i sse_mask_upper_bound = _mm_set1_epi8('Z'); - std::size_t const aligned_size = str.size() & ~0xF; + const std::size_t aligned_size = str.size() & ~0xF; __m128i cmp_result_final = _mm_setzero_si128(); for (std::size_t i = 0; i < aligned_size; i += 16) { @@ -76,7 +76,7 @@ bool lowercase::transform_impl(cow_string &str) const __m128i sse_mask_lower_bound = _mm_set1_epi8('A'); const __m128i sse_addition_value = _mm_set1_epi8(0x20); // value to add to convert up to lc - std::size_t const aligned_size = size & ~0xF; + const std::size_t aligned_size = size & ~0xF; for (std::size_t i = 0; i < aligned_size; i += 16) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) diff --git a/src/vendor/lua-aho-corasick/.gitignore b/src/vendor/lua-aho-corasick/.gitignore deleted file mode 100644 index 04fd22152..000000000 --- a/src/vendor/lua-aho-corasick/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -*.d -*.o -*.a -*.so -*_dep.txt -tests/testinput diff --git a/src/vendor/lua-aho-corasick/Makefile b/src/vendor/lua-aho-corasick/Makefile deleted file mode 100644 index 647166491..000000000 --- a/src/vendor/lua-aho-corasick/Makefile +++ /dev/null @@ -1,134 +0,0 @@ -OS := $(shell uname) - -ifeq ($(OS), Darwin) - SO_EXT := dylib -else - SO_EXT := so -endif - -############################################################################# -# -# Binaries we are going to build -# -############################################################################# -# -C_SO_NAME = libac.$(SO_EXT) -LUA_SO_NAME = ahocorasick.$(SO_EXT) -AR_NAME = libac.a - -############################################################################# -# -# Compile and link flags -# -############################################################################# -PREFIX ?= /usr/local -LUA_VERSION := 5.1 -LUA_INCLUDE_DIR := $(PREFIX)/include/lua$(LUA_VERSION) -SO_TARGET_DIR := $(PREFIX)/lib/lua/$(LUA_VERSION) -LUA_TARGET_DIR := $(PREFIX)/share/lua/$(LUA_VERSION) - -# Available directives: -# -DDEBUG : Turn on debugging support -# -DVERIFY : To verify if the slow-version and fast-version implementations -# get exactly the same result. Note -DVERIFY implies -DDEBUG. -# -COMMON_FLAGS = -O3 #-g -DVERIFY -msse2 -msse3 -msse4.1 -COMMON_FLAGS += -fvisibility=hidden -Wall $(CXXFLAGS) $(MY_CXXFLAGS) $(CPPFLAGS) - -SO_CXXFLAGS = $(COMMON_FLAGS) -fPIC -SO_LFLAGS = $(COMMON_FLAGS) $(LDFLAGS) -AR_CXXFLAGS = $(COMMON_FLAGS) - -# -DVERIFY implies -DDEBUG -ifneq ($(findstring -DVERIFY, $(COMMON_FLAGS)), ) -ifeq ($(findstring -DDEBUG, $(COMMON_FLAGS)), ) - COMMON_FLAGS += -DDEBUG -endif -endif - -AR = ar -AR_FLAGS = cru - -############################################################################# -# -# Divide source codes and objects into several categories -# -############################################################################# -# -SRC_COMMON := ac_fast.cxx ac_slow.cxx -LIBAC_SO_SRC := $(SRC_COMMON) ac.cxx # source for libac.so -LUA_SO_SRC := $(SRC_COMMON) ac_lua.cxx # source for ahocorasick.so -LIBAC_A_SRC := $(LIBAC_SO_SRC) # source for libac.a - -############################################################################# -# -# Make rules -# -############################################################################# -# -.PHONY = all clean test benchmark prepare -all : $(C_SO_NAME) $(LUA_SO_NAME) $(AR_NAME) - --include c_so_dep.txt --include lua_so_dep.txt --include ar_dep.txt - -BUILD_SO_DIR := build_so -BUILD_AR_DIR := build_ar - -$(BUILD_SO_DIR) :; mkdir $@ -$(BUILD_AR_DIR) :; mkdir $@ - -$(BUILD_SO_DIR)/%.o : %.cxx | $(BUILD_SO_DIR) - $(CXX) $< -c $(SO_CXXFLAGS) -I$(LUA_INCLUDE_DIR) -MMD -o $@ - -$(BUILD_AR_DIR)/%.o : %.cxx | $(BUILD_AR_DIR) - $(CXX) $< -c $(AR_CXXFLAGS) -I$(LUA_INCLUDE_DIR) -MMD -o $@ - -ifneq ($(OS), Darwin) -$(C_SO_NAME) : $(addprefix $(BUILD_SO_DIR)/, ${LIBAC_SO_SRC:.cxx=.o}) - $(CXX) $+ -shared -Wl,-soname=$(C_SO_NAME) $(SO_LFLAGS) -o $@ - cat $(addprefix $(BUILD_SO_DIR)/, ${LIBAC_SO_SRC:.cxx=.d}) > c_so_dep.txt - -$(LUA_SO_NAME) : $(addprefix $(BUILD_SO_DIR)/, ${LUA_SO_SRC:.cxx=.o}) - $(CXX) $+ -shared -Wl,-soname=$(LUA_SO_NAME) $(SO_LFLAGS) -o $@ - cat $(addprefix $(BUILD_SO_DIR)/, ${LUA_SO_SRC:.cxx=.d}) > lua_so_dep.txt - -else -$(C_SO_NAME) : $(addprefix $(BUILD_SO_DIR)/, ${LIBAC_SO_SRC:.cxx=.o}) - $(CXX) $+ -shared $(SO_LFLAGS) -o $@ - cat $(addprefix $(BUILD_SO_DIR)/, ${LIBAC_SO_SRC:.cxx=.d}) > c_so_dep.txt - -$(LUA_SO_NAME) : $(addprefix $(BUILD_SO_DIR)/, ${LUA_SO_SRC:.cxx=.o}) - $(CXX) $+ -shared $(SO_LFLAGS) -o $@ -Wl,-undefined,dynamic_lookup - cat $(addprefix $(BUILD_SO_DIR)/, ${LUA_SO_SRC:.cxx=.d}) > lua_so_dep.txt -endif - -$(AR_NAME) : $(addprefix $(BUILD_AR_DIR)/, ${LIBAC_A_SRC:.cxx=.o}) - $(AR) $(AR_FLAGS) $@ $+ - cat $(addprefix $(BUILD_AR_DIR)/, ${LIBAC_A_SRC:.cxx=.d}) > lua_so_dep.txt - -############################################################################# -# -# Misc -# -############################################################################# -# -test : $(C_SO_NAME) - $(MAKE) -C tests && \ - luajit tests/lua_test.lua && \ - luajit tests/load_ac_test.lua - -benchmark: $(C_SO_NAME) - $(MAKE) benchmark -C tests - -clean : - -rm -rf *.o *.d c_so_dep.txt lua_so_dep.txt ar_dep.txt $(TEST) \ - $(C_SO_NAME) $(LUA_SO_NAME) $(TEST) $(BUILD_SO_DIR) $(BUILD_AR_DIR) \ - $(AR_NAME) - make clean -C tests - -install: - install -D -m 755 $(C_SO_NAME) $(DESTDIR)/$(SO_TARGET_DIR)/$(C_SO_NAME) - install -D -m 755 $(LUA_SO_NAME) $(DESTDIR)/$(SO_TARGET_DIR)/$(LUA_SO_NAME) - install -D -m 664 load_ac.lua $(DESTDIR)/$(LUA_TARGET_DIR)/load_ac.lua diff --git a/src/vendor/lua-aho-corasick/ac_lua.cxx b/src/vendor/lua-aho-corasick/ac_lua.cxx deleted file mode 100644 index 5e25ccc36..000000000 --- a/src/vendor/lua-aho-corasick/ac_lua.cxx +++ /dev/null @@ -1,173 +0,0 @@ -// Interface functions for libac.so -// -#include -#include -#include "ac_slow.hpp" -#include "ac_fast.hpp" -#include "ac.h" // for the definition of ac_result_t -#include "ac_util.hpp" - -extern "C" { - #include - #include -} - -#if defined(USE_SLOW_VER) -#error "Not going to implement it" -#endif - -using namespace std; -static const char* tname = "aho-corasick"; - -class BufAlloc : public Buf_Allocator { -public: - BufAlloc(lua_State* L) : _L(L) {} - virtual AC_Buffer* alloc(int sz) { - return (AC_Buffer*)lua_newuserdata (_L, sz); - } - - // Let GC to take care. - virtual void free() {} - -private: - lua_State* _L; -}; - -static bool -_create_helper(lua_State* L, const vector& str_v, - const vector& strlen_v) { - ASSERT(str_v.size() == strlen_v.size()); - - ACS_Constructor acc; - BufAlloc ba(L); - - // Step 1: construct the slow version. - unsigned int strnum = str_v.size(); - const char** str_vect = new const char*[strnum]; - unsigned int* strlen_vect = new unsigned int[strnum]; - - int idx = 0; - for (vector::const_iterator i = str_v.begin(), e = str_v.end(); - i != e; i++) { - str_vect[idx++] = *i; - } - - idx = 0; - for (vector::const_iterator i = strlen_v.begin(), - e = strlen_v.end(); i != e; i++) { - strlen_vect[idx++] = *i; - } - - acc.Construct(str_vect, strlen_vect, idx); - delete[] str_vect; - delete[] strlen_vect; - - // Step 2: convert to fast version - AC_Converter cvt(acc, ba); - return cvt.Convert() != 0; -} - -static ac_result_t -_match_helper(buf_header_t* ac, const char *str, unsigned int len) { - AC_Buffer* buf = (AC_Buffer*)(void*)ac; - ASSERT(ac->magic_num == AC_MAGIC_NUM); - - ac_result_t r = Match(buf, str, len); - return r; -} - -// LUA semantic: -// input: array of strings -// output: userdata containing the AC-graph (i.e. the AC_Buffer). -// -static int -lac_create(lua_State* L) { - // The table of the array must be the 1st argument. - int input_tab = 1; - - luaL_checktype(L, input_tab, LUA_TTABLE); - - // Init the "iteartor". - lua_pushnil(L); - - vector str_v; - vector strlen_v; - - // Loop over the elements - while (lua_next(L, input_tab)) { - size_t str_len; - const char* s = luaL_checklstring(L, -1, &str_len); - str_v.push_back(s); - strlen_v.push_back(str_len); - - // remove the value, but keep the key as the iterator. - lua_pop(L, 1); - } - - // pop the nil value - lua_pop(L, 1); - - if (_create_helper(L, str_v, strlen_v)) { - // The AC graph, as a userdata is already pushed to the stack, hence 1. - return 1; - } - - return 0; -} - -// LUA input: -// arg1: the userdata, representing the AC graph, returned from l_create(). -// arg2: the string to be matched. -// -// LUA return: -// if match, return index range of the match; otherwise nil is returned. -// -static int -lac_match(lua_State* L) { - buf_header_t* ac = (buf_header_t*)lua_touserdata(L, 1); - if (!ac) { - luaL_checkudata(L, 1, tname); - return 0; - } - - size_t len; - const char* str; - #if LUA_VERSION_NUM >= 502 - str = luaL_tolstring(L, 2, &len); - #else - str = lua_tolstring(L, 2, &len); - #endif - if (!str) { - luaL_checkstring(L, 2); - return 0; - } - - ac_result_t r = _match_helper(ac, str, len); - if (r.match_begin != -1) { - lua_pushinteger(L, r.match_begin); - lua_pushinteger(L, r.match_end); - return 2; - } - - return 0; -} - -static const struct luaL_Reg lib_funcs[] = { - { "create", lac_create }, - { "match", lac_match }, - {0, 0} -}; - -extern "C" int AC_EXPORT -luaopen_ahocorasick(lua_State* L) { - luaL_newmetatable(L, tname); - -#if LUA_VERSION_NUM == 501 - luaL_register(L, tname, lib_funcs); -#elif LUA_VERSION_NUM >= 502 - luaL_newlib(L, lib_funcs); -#else - #error "Don't know how to do it right" -#endif - return 1; -} diff --git a/src/vendor/lua-aho-corasick/load_ac.lua b/src/vendor/lua-aho-corasick/load_ac.lua deleted file mode 100644 index eb704465d..000000000 --- a/src/vendor/lua-aho-corasick/load_ac.lua +++ /dev/null @@ -1,90 +0,0 @@ --- Helper wrappring script for loading shared object libac.so (FFI interface) --- from package.cpath instead of LD_LIBRARTY_PATH. --- - -local ffi = require 'ffi' -ffi.cdef[[ - void* ac_create(const char** str_v, unsigned int* strlen_v, - unsigned int v_len); - int ac_match2(void*, const char *str, int len); - void ac_free(void*); -]] - -local _M = {} - -local string_gmatch = string.gmatch -local string_match = string.match - -local ac_lib = nil -local ac_create = nil -local ac_match = nil -local ac_free = nil - ---[[ Find shared object file package.cpath, obviating the need of setting - LD_LIBRARY_PATH -]] -local function find_shared_obj(cpath, so_name) - for k, v in string_gmatch(cpath, "[^;]+") do - local so_path = string_match(k, "(.*/)") - if so_path then - -- "so_path" could be nil. e.g, the dir path component is "." - so_path = so_path .. so_name - - -- Don't get me wrong, the only way to know if a file exist is - -- trying to open it. - local f = io.open(so_path) - if f ~= nil then - io.close(f) - return so_path - end - end - end -end - -function _M.load_ac_lib() - if ac_lib ~= nil then - return ac_lib - else - local so_path = find_shared_obj(package.cpath, "libac.so") - if so_path ~= nil then - ac_lib = ffi.load(so_path) - ac_create = ac_lib.ac_create - ac_match = ac_lib.ac_match2 - ac_free = ac_lib.ac_free - return ac_lib - end - end -end - --- Create an Aho-Corasick instance, and return the instance if it was --- successful. -function _M.create_ac(dict) - local strnum = #dict - if ac_lib == nil then - _M.load_ac_lib() - end - - local str_v = ffi.new("const char *[?]", strnum) - local strlen_v = ffi.new("unsigned int [?]", strnum) - - for i = 1, strnum do - local s = dict[i] - str_v[i - 1] = s - strlen_v[i - 1] = #s - end - - local ac = ac_create(str_v, strlen_v, strnum); - if ac ~= nil then - return ffi.gc(ac, ac_free) - end -end - --- Return nil if str doesn't match the dictionary, else return non-nil. -function _M.match(ac, str) - local r = ac_match(ac, str, #str); - if r >= 0 then - return r - end -end - -return _M diff --git a/src/vendor/lua-aho-corasick/mytest.cxx b/src/vendor/lua-aho-corasick/mytest.cxx deleted file mode 100644 index ef3dc8754..000000000 --- a/src/vendor/lua-aho-corasick/mytest.cxx +++ /dev/null @@ -1,200 +0,0 @@ -#include -#include -#include -#include "ac.h" - -using namespace std; - -///////////////////////////////////////////////////////////////////////// -// -// Test using strings from input files -// -///////////////////////////////////////////////////////////////////////// -// -class BigFileTester { -public: - BigFileTester(const char* filepath); - -private: - void Genector -privaete: - const char* _msg; - int _msg_len; - int _key_num; // number of strings in dictionary - int _key_len_idx; -}; - -///////////////////////////////////////////////////////////////////////// -// -// Simple (yet maybe tricky) testings -// -///////////////////////////////////////////////////////////////////////// -// -typedef struct { - const char* str; - const char* match; -} StrPair; - -typedef struct { - const char* name; - const char** dict; - StrPair* strpairs; - int dict_len; - int strpair_num; -} TestingCase; - -class Tests { -public: - Tests(const char* name, - const char* dict[], int dict_len, - StrPair strpairs[], int strpair_num) { - if (!_tests) - _tests = new vector; - - TestingCase tc; - tc.name = name; - tc.dict = dict; - tc.strpairs = strpairs; - tc.dict_len = dict_len; - tc.strpair_num = strpair_num; - _tests->push_back(tc); - } - - static vector* Get_Tests() { return _tests; } - static void Erase_Tests() { delete _tests; _tests = 0; } - -private: - static vector *_tests; -}; - -vector* Tests::_tests = 0; - -static void -simple_test(void) { - int total = 0; - int fail = 0; - - vector *tests = Tests::Get_Tests(); - if (!tests) - return 0; - - for (vector::iterator i = tests->begin(), e = tests->end(); - i != e; i++) { - TestingCase& t = *i; - fprintf(stdout, ">Testing %s\nDictionary:[ ", t.name); - for (int i = 0, e = t.dict_len, need_break=0; i < e; i++) { - fprintf(stdout, "%s, ", t.dict[i]); - if (need_break++ == 16) { - fputs("\n ", stdout); - need_break = 0; - } - } - fputs("]\n", stdout); - - /* Create the dictionary */ - int dict_len = t.dict_len; - ac_t* ac = ac_create(t.dict, dict_len); - - for (int ii = 0, ee = t.strpair_num; ii < ee; ii++, total++) { - const StrPair& sp = t.strpairs[ii]; - const char *str = sp.str; // the string to be matched - const char *match = sp.match; - - fprintf(stdout, "[%3d] Testing '%s' : ", total, str); - - int len = strlen(str); - ac_result_t r = ac_match(ac, str, len); - int m_b = r.match_begin; - int m_e = r.match_end; - - // The return value per se is insane. - if (m_b > m_e || - ((m_b < 0 || m_e < 0) && (m_b != -1 || m_e != -1))) { - fprintf(stdout, "Insane return value (%d, %d)\n", m_b, m_e); - fail ++; - continue; - } - - // If the string is not supposed to match the dictionary. - if (!match) { - if (m_b != -1 || m_e != -1) { - fail ++; - fprintf(stdout, "Not Supposed to match (%d, %d) \n", - m_b, m_e); - } else - fputs("Pass\n", stdout); - continue; - } - - // The string or its substring is match the dict. - if (m_b >= len || m_b >= len) { - fail ++; - fprintf(stdout, - "Return value >= the length of the string (%d, %d)\n", - m_b, m_e); - continue; - } else { - int mlen = strlen(match); - if ((mlen != m_e - m_b + 1) || - strncmp(str + m_b, match, mlen)) { - fail ++; - fprintf(stdout, "Fail\n"); - } else - fprintf(stdout, "Pass\n"); - } - } - fputs("\n", stdout); - ac_free(ac); - } - - fprintf(stdout, "Total : %d, Fail %d\n", total, fail); - - return fail ? -1 : 0; -} - -int -main (int argc, char** argv) { - int res = simple_test(); - return res; -}; - -/* test 1*/ -const char *dict1[] = {"he", "she", "his", "her"}; -StrPair strpair1[] = { - {"he", "he"}, {"she", "she"}, {"his", "his"}, - {"hers", "he"}, {"ahe", "he"}, {"shhe", "he"}, - {"shis2", "his"}, {"ahhe", "he"} -}; -Tests test1("test 1", - dict1, sizeof(dict1)/sizeof(dict1[0]), - strpair1, sizeof(strpair1)/sizeof(strpair1[0])); - -/* test 2*/ -const char *dict2[] = {"poto", "poto"}; /* duplicated strings*/ -StrPair strpair2[] = {{"The pot had a handle", 0}}; -Tests test2("test 2", dict2, 2, strpair2, 1); - -/* test 3*/ -const char *dict3[] = {"The"}; -StrPair strpair3[] = {{"The pot had a handle", "The"}}; -Tests test3("test 3", dict3, 1, strpair3, 1); - -/* test 4*/ -const char *dict4[] = {"pot"}; -StrPair strpair4[] = {{"The pot had a handle", "pot"}}; -Tests test4("test 4", dict4, 1, strpair4, 1); - -/* test 5*/ -const char *dict5[] = {"pot "}; -StrPair strpair5[] = {{"The pot had a handle", "pot "}}; -Tests test5("test 5", dict5, 1, strpair5, 1); - -/* test 6*/ -const char *dict6[] = {"ot h"}; -StrPair strpair6[] = {{"The pot had a handle", "ot h"}}; -Tests test6("test 6", dict6, 1, strpair6, 1); - -/* test 7*/ -const char *dict7[] = {"andle"}; -StrPair strpair7[] = {{"The pot had a handle", "andle"}}; -Tests test7("test 7", dict7, 1, strpair7, 1); diff --git a/src/vendor/lua-aho-corasick/tests/Makefile b/src/vendor/lua-aho-corasick/tests/Makefile deleted file mode 100644 index 54fd90f48..000000000 --- a/src/vendor/lua-aho-corasick/tests/Makefile +++ /dev/null @@ -1,65 +0,0 @@ -OS := $(shell uname) -ifeq ($(OS), Darwin) - SO_EXT := dylib -else - SO_EXT := so -endif - -.PHONY = all clean test runtest benchmark - -PROGRAM = ac_test -BENCHMARK = ac_bench -all: runtest - -CXXFLAGS = -O3 -g -march=native -Wall -DDEBUG -MYCXXFLAGS = -MMD -I.. $(CXXFLAGS) -%.o : %.cxx - $(CXX) $< -c $(MYCXXFLAGS) - --include dep.cxx -SRC = test_main.cxx ac_test_simple.cxx ac_test_aggr.cxx test_bigfile.cxx - -OBJ = ${SRC:.cxx=.o} - --include test_dep.txt --include bench_dep.txt - -$(PROGRAM) $(BENCHMARK) : testinput/text.tar testinput/image.bin -$(PROGRAM) : $(OBJ) ../libac.$(SO_EXT) - $(CXX) $(OBJ) -L.. -lac -o $@ - -cat *.d > test_dep.txt - -$(BENCHMARK) : ac_bench.o ../libac.$(SO_EXT) - $(CXX) ac_bench.o -L.. -lac -o $@ - -cat *.d > bench_dep.txt - -ifneq ($(OS), Darwin) -runtest:$(PROGRAM) - LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):.. ./$(PROGRAM) testinput/* - -benchmark:$(BENCHMARK) - LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):.. ./ac_bench - -else -runtest:$(PROGRAM) - DYLD_LIBRARY_PATH=$(DYLD_LIBRARY_PATH):.. ./$(PROGRAM) testinput/* - -benchmark:$(BENCHMARK) - DYLD_LIBRARY_PATH=$(DYLD_LIBRARY_PATH):.. ./ac_bench - -endif - -testinput/text.tar: - echo "download testing files (gcc tarball)..." - if [ ! -d testinput ] ; then mkdir testinput; fi - cd testinput && \ - curl ftp://ftp.gnu.org/gnu/gcc/gcc-1.42.tar.gz -o text.tar.gz 2>/dev/null \ - && gzip -d text.tar.gz - -testinput/image.bin: - echo "download testing files.." - if [ ! -d testinput ] ; then mkdir testinput; fi - curl http://www.3dvisionlive.com/sites/default/files/Curiosity_render_hiresb.jpg -o $@ 2>/dev/null - -clean: - -rm -f *.o *.d dep.txt $(PROGRAM) $(BENCHMARK) diff --git a/src/vendor/lua-aho-corasick/tests/ac_bench.cxx b/src/vendor/lua-aho-corasick/tests/ac_bench.cxx deleted file mode 100644 index 486ad3237..000000000 --- a/src/vendor/lua-aho-corasick/tests/ac_bench.cxx +++ /dev/null @@ -1,519 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include "ac.h" -#include "ac_util.hpp" - -using namespace std; - -static bool SomethingWrong = false; - -static int iteration = 300; -static string dict_dir; -static string obj_file_dir; -static bool print_help = false; -static int piece_size = 1024; - -class PatternSet { -public: - PatternSet(const char* filepath); - ~PatternSet() { Cleanup(); } - - int getPatternNum() const { return _pat_num; } - const char** getPatternVector() const { return _patterns; } - unsigned int* getPatternLenVector() const { return _pat_len; } - - const char* getErrMessage() const { return _errmsg; } - static bool isDictFile(const char* filepath) { - if (strncmp(basename(const_cast(filepath)), "dict", 4)) - return false; - return true; - } - -private: - bool ExtractPattern(const char* filepath); - void Cleanup(); - - const char** _patterns; - unsigned int* _pat_len; - char* _mmap; - int _fd; - size_t _mmap_size; - int _pat_num; - - const char* _errmsg; -}; - -bool -PatternSet::ExtractPattern(const char* filepath) { - if (!isDictFile(filepath)) - return false; - - struct stat filestat; - if (stat(filepath, &filestat)) { - _errmsg = "fail to call stat()"; - return false; - } - - if (filestat.st_size > 4096 * 1024) { - /* It doesn't seem to be a dictionary file*/ - _errmsg = "file too big?"; - return false; - } - - _fd = open(filepath, 0); - if (_fd == -1) { - _errmsg = "fail to open dictionary file"; - return false; - } - - _mmap_size = filestat.st_size; - _mmap = (char*)mmap(0, filestat.st_size, PROT_READ|PROT_WRITE, - MAP_PRIVATE, _fd, 0); - if (_mmap == MAP_FAILED) { - _errmsg = "fail to call mmap"; - return false; - } - - const char* pat = _mmap; - vector pat_vect; - vector pat_len_vect; - - for (size_t i = 0, e = filestat.st_size; i < e; i++) { - if (_mmap[i] == '\r' || _mmap[i] == '\n') { - _mmap[i] = '\0'; - int len = _mmap + i - pat; - if (len > 0) { - pat_vect.push_back(pat); - pat_len_vect.push_back(len); - } - pat = _mmap + i + 1; - } - } - - ASSERT(pat_vect.size() == pat_len_vect.size()); - - int pat_num = pat_vect.size(); - if (pat_num > 0) { - const char** p = _patterns = new const char*[pat_num]; - int i = 0; - for (vector::iterator iter = pat_vect.begin(), - iter_e = pat_vect.end(); iter != iter_e; ++iter) { - p[i++] = *iter; - } - - i = 0; - unsigned int* q = _pat_len = new unsigned int[pat_num]; - for (vector::iterator iter = pat_len_vect.begin(), - iter_e = pat_len_vect.end(); iter != iter_e; ++iter) { - q[i++] = *iter; - } - } - - _pat_num = pat_num; - if (pat_num <= 0) { - _errmsg = "no pattern at all"; - return false; - } - - return true; -} - -void -PatternSet::Cleanup() { - if (_mmap != MAP_FAILED) { - munmap(_mmap, _mmap_size); - _mmap = (char*)MAP_FAILED; - _mmap_size = 0; - } - - delete[] _patterns; - delete[] _pat_len; - if (_fd != -1) - close(_fd); - _pat_num = -1; -} - -PatternSet::PatternSet(const char* filepath) { - _patterns = 0; - _pat_len = 0; - _mmap = (char*)MAP_FAILED; - _mmap_size = 0; - _pat_num = -1; - _errmsg = ""; - - if (!ExtractPattern(filepath)) - Cleanup(); -} - -bool -getFilesUnderDir(vector& files, const char* path) { - files.clear(); - - DIR* dir = opendir(path); - if (!dir) - return false; - - string path_dir = path; - path_dir += "/"; - - for (;;) { - struct dirent* entry = readdir(dir); - if (entry) { - string filepath = path_dir + entry->d_name; - struct stat file_stat; - if (stat(filepath.c_str(), &file_stat)) { - closedir(dir); - return false; - } - - if (S_ISREG(file_stat.st_mode)) - files.push_back(filepath); - - continue; - } - - if (errno) { - return false; - } - break; - } - closedir(dir); - return true; -} - -class Timer { -public: - Timer() { - my_clock_gettime(&_start); - _stop = _start; - _acc.tv_sec = 0; - _acc.tv_nsec = 0; - } - - const Timer& operator += (const Timer& that) { - time_t sec = _acc.tv_sec + that._acc.tv_sec; - long nsec = _acc.tv_nsec + that._acc.tv_nsec; - if (nsec > 1000000000) { - nsec -= 1000000000; - sec += 1; - } - _acc.tv_sec = sec; - _acc.tv_nsec = nsec; - return *this; - } - - // return duration in us - size_t getDuration() const { - return _acc.tv_sec * (size_t)1000000 + _acc.tv_nsec/1000; - } - - void Start(bool acc=true) { - my_clock_gettime(&_start); - } - - void Stop() { - my_clock_gettime(&_stop); - struct timespec t = CalcDuration(); - _acc = add_duration(_acc, t); - } - -private: - int my_clock_gettime(struct timespec* t) { -#ifdef __linux - return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t); -#else - struct timeval tv; - int rc = gettimeofday(&tv, 0); - t->tv_sec = tv.tv_sec; - t->tv_nsec = tv.tv_usec * 1000; - return rc; -#endif - } - - struct timespec add_duration(const struct timespec& dur1, - const struct timespec& dur2) { - time_t sec = dur1.tv_sec + dur2.tv_sec; - long nsec = dur1.tv_nsec + dur2.tv_nsec; - if (nsec > 1000000000) { - nsec -= 1000000000; - sec += 1; - } - timespec t; - t.tv_sec = sec; - t.tv_nsec = nsec; - - return t; - } - - struct timespec CalcDuration() const { - timespec diff; - if ((_stop.tv_nsec - _start.tv_nsec)<0) { - diff.tv_sec = _stop.tv_sec - _start.tv_sec - 1; - diff.tv_nsec = 1000000000 + _stop.tv_nsec - _start.tv_nsec; - } else { - diff.tv_sec = _stop.tv_sec - _start.tv_sec; - diff.tv_nsec = _stop.tv_nsec - _start.tv_nsec; - } - return diff; - } - - struct timespec _start; - struct timespec _stop; - struct timespec _acc; -}; - -class Benchmark { -public: - Benchmark(const PatternSet& pat_set, const char* infile): - _pat_set(pat_set), _infile(infile) { - _mmap = (char*)MAP_FAILED; - _file_sz = 0; - _fd = -1; - } - - ~Benchmark() { - if (_mmap != MAP_FAILED) - munmap(_mmap, _file_sz); - if (_fd != -1) - close(_fd); - } - - bool Run(int iteration); - const Timer& getTimer() const { return _timer; } - -private: - const PatternSet& _pat_set; - const char* _infile; - char* _mmap; - int _fd; - size_t _file_sz; // input file size - Timer _timer; -}; - -bool -Benchmark::Run(int iteration) { - if (_pat_set.getPatternNum() <= 0) { - SomethingWrong = true; - return false; - } - - if (_mmap == MAP_FAILED) { - struct stat filestat; - if (stat(_infile, &filestat)) { - SomethingWrong = true; - return false; - } - - if (!S_ISREG(filestat.st_mode)) { - SomethingWrong = true; - return false; - } - - _fd = open(_infile, 0); - if (_fd == -1) - return false; - - _mmap = (char*)mmap(0, filestat.st_size, PROT_READ|PROT_WRITE, - MAP_PRIVATE, _fd, 0); - - if (_mmap == MAP_FAILED) { - SomethingWrong = true; - return false; - } - - _file_sz = filestat.st_size; - } - - ac_t* ac = ac_create(_pat_set.getPatternVector(), - _pat_set.getPatternLenVector(), - _pat_set.getPatternNum()); - if (!ac) { - SomethingWrong = true; - return false; - } - - int piece_num = _file_sz/piece_size; - - _timer.Start(false); - - /* Stupid compiler may not be able to promote piece_size into register. - * Do it manually. - */ - int piece_sz = piece_size; - for (int i = 0; i < iteration; i++) { - size_t match_ofst = 0; - for (int piece_idx = 0; piece_idx < piece_num; piece_idx ++) { - ac_match2(ac, _mmap + match_ofst, piece_sz); - match_ofst += piece_sz; - } - if (match_ofst != _file_sz) - ac_match2(ac, _mmap + match_ofst, _file_sz - match_ofst); - } - _timer.Stop(); - return true; -} - -const char* short_opt = "hd:f:i:p:"; -const struct option long_opts[] = { - {"help", no_argument, 0, 'h'}, - {"iteration", required_argument, 0, 'i'}, - {"dictionary-dir", required_argument, 0, 'd'}, - {"obj-file-dir", required_argument, 0, 'f'}, - {"piece-size", required_argument, 0, 'p'}, -}; - -static void -PrintHelp(const char* prog_name) { - const char* msg = -"Usage %s [OPTIONS]\n" -" -d, --dictionary-dir : specify the dictionary directory (./dict by default)\n" -" -f, --obj-file-dir : specify the object file directory\n" -" (./testinput by default)\n" -" -i, --iteration : Run this many iteration for each pattern match\n" -" -p, --piece-size : The size of 'piece' in byte. The input file is\n" -" divided into pieces, and match function is working\n" -" on one piece at a time. The default size of piece\n" -" is 1k byte.\n"; - - fprintf(stdout, msg, prog_name); -} - -static bool -getOptions(int argc, char** argv) { - bool dict_dir_set = false; - bool objfile_dir_set = false; - int opt_index; - - while (1) { - if (print_help) break; - - int c = getopt_long(argc, argv, short_opt, long_opts, &opt_index); - - if (c == -1) break; - if (c == 0) { c = long_opts[opt_index].val; } - - switch(c) { - case 'h': - print_help = true; - break; - - case 'i': - iteration = atol(optarg); - break; - - case 'd': - dict_dir = optarg; - dict_dir_set = true; - break; - - case 'f': - obj_file_dir = optarg; - objfile_dir_set = true; - break; - - case 'p': - piece_size = atol(optarg); - break; - - case '?': - default: - return false; - } - } - - if (print_help) - return true; - - string basedir(dirname(argv[0])); - if (!dict_dir_set) - dict_dir = basedir + "/dict"; - - if (!objfile_dir_set) - obj_file_dir = basedir + "/testinput"; - - return true; -} - -int -main(int argc, char** argv) { - if (!getOptions(argc, argv)) - return -1; - - if (print_help) { - PrintHelp(argv[0]); - return 0; - } - -#ifndef __linux - fprintf(stdout, "\n!!!WARNING: On this OS, the execution time is measured" - " by gettimeofday(2) which is imprecise!!!\n\n"); -#endif - - fprintf(stdout, "Test with iteration = %d, piece size = %d, and", - iteration, piece_size); - fprintf(stdout, "\n dictionary dir = %s\n object file dir = %s\n\n", - dict_dir.c_str(), obj_file_dir.c_str()); - - vector dict_files; - vector input_files; - - if (!getFilesUnderDir(dict_files, dict_dir.c_str())) { - fprintf(stdout, "fail to find dictionary files\n"); - return -1; - } - - if (!getFilesUnderDir(input_files, obj_file_dir.c_str())) { - fprintf(stdout, "fail to find test input files\n"); - return -1; - } - - for (vector::iterator diter = dict_files.begin(), - diter_e = dict_files.end(); diter != diter_e; ++diter) { - - const char* dict_name = diter->c_str(); - if (!PatternSet::isDictFile(dict_name)) - continue; - - PatternSet ps(dict_name); - if (ps.getPatternNum() <= 0) { - fprintf(stdout, "fail to open dictionary file %s : %s\n", - dict_name, ps.getErrMessage()); - SomethingWrong = true; - continue; - } - - fprintf(stdout, "Using dictionary %s\n", dict_name); - Timer timer; - for (vector::iterator iter = input_files.begin(), - iter_e = input_files.end(); iter != iter_e; ++iter) { - fprintf(stdout, " testing %s ... ", iter->c_str()); - fflush(stdout); - Benchmark bm(ps, iter->c_str()); - bm.Run(iteration); - const Timer& t = bm.getTimer(); - timer += bm.getTimer(); - fprintf(stdout, "elapsed %.3f\n", t.getDuration() / 1000000.0); - } - - fprintf(stdout, - "\n==========================================================\n" - " Total Elapse %.3f\n\n", timer.getDuration() / 1000000.0); - } - - return SomethingWrong ? -1 : 0; -} diff --git a/src/vendor/lua-aho-corasick/tests/ac_test_aggr.cxx b/src/vendor/lua-aho-corasick/tests/ac_test_aggr.cxx deleted file mode 100644 index 4ea02bc8a..000000000 --- a/src/vendor/lua-aho-corasick/tests/ac_test_aggr.cxx +++ /dev/null @@ -1,135 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "ac.h" -#include "ac_util.hpp" -#include "test_base.hpp" - -using namespace std; - -namespace { -class ACBigFileTester : public BigFileTester { -public: - ACBigFileTester(const char* filepath) : BigFileTester(filepath){}; - -private: - virtual buf_header_t* PM_Create(const char** strv, uint32* strlenv, - uint32 vect_len) { - return (buf_header_t*)ac_create(strv, strlenv, vect_len); - } - - virtual void PM_Free(buf_header_t* PM) { ac_free(PM); } - virtual bool Run_Helper(buf_header_t* PM); -}; - -class ACTestAggressive: public ACTestBase { -public: - ACTestAggressive(const vector& files, const char* banner) - : ACTestBase(banner), _files(files) {} - virtual bool Run(); - -private: - void PrintSummary(int total, int fail) { - fprintf(stdout, "Test count : %d, fail: %d\n", total, fail); - fflush(stdout); - } - vector _files; -}; - -} // end of anonymous namespace - -bool -ACBigFileTester::Run_Helper(buf_header_t* PM) { - int fail = 0; - // advance one chunk at a time. - int len = _msg_len; - int chunk_sz = _chunk_sz; - - vector c_style_keys; - for (int i = 0, e = _keys.size(); i != e; i++) { - const char* key = _keys[i].first; - int len = _keys[i].second; - char *t = new char[len+1]; - memcpy(t, key, len); - t[len] = '\0'; - c_style_keys.push_back(t); - } - - for (int ofst = 0, chunk_idx = 0, chunk_num = _chunk_num; - chunk_idx < chunk_num; ofst += chunk_sz, chunk_idx++) { - const char* substring = _msg + ofst; - ac_result_t r = ac_match((ac_t*)(void*)PM, substring , len - ofst); - int m_b = r.match_begin; - int m_e = r.match_end; - - if (m_b < 0 || m_e < 0 || m_e <= m_b || m_e >= len) { - fprintf(stdout, "fail to find match substring[%d:%d])\n", - ofst, len - 1); - fail ++; - continue; - } - - const char* match_str = _msg + len; - int strstr_len = 0; - int key_idx = -1; - - for (int i = 0, e = c_style_keys.size(); i != e; i++) { - const char* key = c_style_keys[i]; - if (const char *m = strstr(substring, key)) { - if (m < match_str) { - match_str = m; - strstr_len = _keys[i].second; - key_idx = i; - } - } - } - ASSERT(key_idx != -1); - if ((match_str - substring != m_b)) { - fprintf(stdout, - "Fail to find match substring[%d:%d])," - " expected to find match at offset %d instead of %d\n", - ofst, len - 1, - (int)(match_str - _msg), ofst + m_b); - fprintf(stdout, "%d vs %d (key idx %d)\n", strstr_len, m_e - m_b + 1, key_idx); - PrintStr(stdout, match_str, strstr_len); - fprintf(stdout, "\n"); - PrintStr(stdout, _msg + ofst + m_b, - m_e - m_b + 1); - fprintf(stdout, "\n"); - fail ++; - } - } - for (vector::iterator i = c_style_keys.begin(), - e = c_style_keys.end(); i != e; i++) { - delete[] *i; - } - - return fail == 0; -} - -bool -ACTestAggressive::Run() { - int fail = 0; - for (vector::iterator i = _files.begin(), e = _files.end(); - i != e; i++) { - ACBigFileTester bft(*i); - if (!bft.Run()) - fail ++; - } - return fail == 0; -} - -bool -Run_AC_Aggressive_Test(const vector& files) { - ACTestAggressive t(files, "AC Aggressive test"); - t.PrintBanner(); - return t.Run(); -} diff --git a/src/vendor/lua-aho-corasick/tests/ac_test_simple.cxx b/src/vendor/lua-aho-corasick/tests/ac_test_simple.cxx deleted file mode 100644 index 3b2d18123..000000000 --- a/src/vendor/lua-aho-corasick/tests/ac_test_simple.cxx +++ /dev/null @@ -1,275 +0,0 @@ -#include -#include -#include -#include - -#include "ac.h" -#include "ac_util.hpp" -#include "test_base.hpp" - -using namespace std; - -namespace { -typedef struct { - const char* str; - const char* match; -} StrPair; - -typedef enum { - MV_FIRST_MATCH = 0, - MV_LEFT_LONGEST = 1, -} MatchVariant; - -typedef struct { - const char* name; - const char** dict; - StrPair* strpairs; - int dict_len; - int strpair_num; - MatchVariant match_variant; -} TestingCase; - -class Tests { -public: - Tests(const char* name, - const char* dict[], int dict_len, - StrPair strpairs[], int strpair_num, - MatchVariant mv = MV_FIRST_MATCH) { - if (!_tests) - _tests = new vector; - - TestingCase tc; - tc.name = name; - tc.dict = dict; - tc.strpairs = strpairs; - tc.dict_len = dict_len; - tc.strpair_num = strpair_num; - tc.match_variant = mv; - _tests->push_back(tc); - } - - static vector* Get_Tests() { return _tests; } - static void Erase_Tests() { delete _tests; _tests = 0; } - -private: - static vector *_tests; -}; - -class LeftLongestTests : public Tests { -public: - LeftLongestTests (const char* name, const char* dict[], int dict_len, - StrPair strpairs[], int strpair_num): - Tests(name, dict, dict_len, strpairs, strpair_num, MV_LEFT_LONGEST) { - } -}; - -vector* Tests::_tests = 0; - -class ACTestSimple: public ACTestBase { -public: - ACTestSimple(const char* banner) : ACTestBase(banner) {} - virtual bool Run(); - -private: - void PrintSummary(int total, int fail) { - fprintf(stdout, "Test count : %d, fail: %d\n", total, fail); - fflush(stdout); - } -}; -} - -bool -ACTestSimple::Run() { - int total = 0; - int fail = 0; - - vector *tests = Tests::Get_Tests(); - if (!tests) { - PrintSummary(0, 0); - return true; - } - - for (vector::iterator i = tests->begin(), e = tests->end(); - i != e; i++) { - TestingCase& t = *i; - int dict_len = t.dict_len; - unsigned int* strlen_v = new unsigned int[dict_len]; - - fprintf(stdout, ">Testing %s\nDictionary:[ ", t.name); - for (int i = 0, need_break=0; i < dict_len; i++) { - const char* s = t.dict[i]; - fprintf(stdout, "%s, ", s); - strlen_v[i] = strlen(s); - if (need_break++ == 16) { - fputs("\n ", stdout); - need_break = 0; - } - } - fputs("]\n", stdout); - - /* Create the dictionary */ - ac_t* ac = ac_create(t.dict, strlen_v, dict_len); - delete[] strlen_v; - - for (int ii = 0, ee = t.strpair_num; ii < ee; ii++, total++) { - const StrPair& sp = t.strpairs[ii]; - const char *str = sp.str; // the string to be matched - const char *match = sp.match; - - fprintf(stdout, "[%3d] Testing '%s' : ", total, str); - - int len = strlen(str); - ac_result_t r; - if (t.match_variant == MV_FIRST_MATCH) - r = ac_match(ac, str, len); - else if (t.match_variant == MV_LEFT_LONGEST) - r = ac_match_longest_l(ac, str, len); - else { - ASSERT(false && "Unknown variant"); - } - - int m_b = r.match_begin; - int m_e = r.match_end; - - // The return value per se is insane. - if (m_b > m_e || - ((m_b < 0 || m_e < 0) && (m_b != -1 || m_e != -1))) { - fprintf(stdout, "Insane return value (%d, %d)\n", m_b, m_e); - fail ++; - continue; - } - - // If the string is not supposed to match the dictionary. - if (!match) { - if (m_b != -1 || m_e != -1) { - fail ++; - fprintf(stdout, "Not Supposed to match (%d, %d) \n", - m_b, m_e); - } else - fputs("Pass\n", stdout); - continue; - } - - // The string or its substring is match the dict. - if (m_b >= len || m_b >= len) { - fail ++; - fprintf(stdout, - "Return value >= the length of the string (%d, %d)\n", - m_b, m_e); - continue; - } else { - int mlen = strlen(match); - if ((mlen != m_e - m_b + 1) || - strncmp(str + m_b, match, mlen)) { - fail ++; - fprintf(stdout, "Fail\n"); - } else - fprintf(stdout, "Pass\n"); - } - } - fputs("\n", stdout); - ac_free(ac); - } - - PrintSummary(total, fail); - return fail == 0; -} - -bool -Run_AC_Simple_Test() { - ACTestSimple t("AC Simple test"); - t.PrintBanner(); - return t.Run(); -} - -////////////////////////////////////////////////////////////////////////////// -// -// Testing cases for first-match variant (i.e. test ac_match()) -// -////////////////////////////////////////////////////////////////////////////// -// - -/* test 1*/ -const char *dict1[] = {"he", "she", "his", "her"}; -StrPair strpair1[] = { - {"he", "he"}, {"she", "she"}, {"his", "his"}, - {"hers", "he"}, {"ahe", "he"}, {"shhe", "he"}, - {"shis2", "his"}, {"ahhe", "he"} -}; -Tests test1("test 1", - dict1, sizeof(dict1)/sizeof(dict1[0]), - strpair1, sizeof(strpair1)/sizeof(strpair1[0])); - -/* test 2*/ -const char *dict2[] = {"poto", "poto"}; /* duplicated strings*/ -StrPair strpair2[] = {{"The pot had a handle", 0}}; -Tests test2("test 2", dict2, 2, strpair2, 1); - -/* test 3*/ -const char *dict3[] = {"The"}; -StrPair strpair3[] = {{"The pot had a handle", "The"}}; -Tests test3("test 3", dict3, 1, strpair3, 1); - -/* test 4*/ -const char *dict4[] = {"pot"}; -StrPair strpair4[] = {{"The pot had a handle", "pot"}}; -Tests test4("test 4", dict4, 1, strpair4, 1); - -/* test 5*/ -const char *dict5[] = {"pot "}; -StrPair strpair5[] = {{"The pot had a handle", "pot "}}; -Tests test5("test 5", dict5, 1, strpair5, 1); - -/* test 6*/ -const char *dict6[] = {"ot h"}; -StrPair strpair6[] = {{"The pot had a handle", "ot h"}}; -Tests test6("test 6", dict6, 1, strpair6, 1); - -/* test 7*/ -const char *dict7[] = {"andle"}; -StrPair strpair7[] = {{"The pot had a handle", "andle"}}; -Tests test7("test 7", dict7, 1, strpair7, 1); - -const char *dict8[] = {"aaab"}; -StrPair strpair8[] = {{"aaaaaaab", "aaab"}}; -Tests test8("test 8", dict8, 1, strpair8, 1); - -const char *dict9[] = {"haha", "z"}; -StrPair strpair9[] = {{"aaaaz", "z"}, {"z", "z"}}; -Tests test9("test 9", dict9, 2, strpair9, 2); - -/* test the case when input string doesn't contain even a single char - * of the pattern in dictionary. - */ -const char *dict10[] = {"abc"}; -StrPair strpair10[] = {{"cde", 0}}; -Tests test10("test 10", dict10, 1, strpair10, 1); - - -////////////////////////////////////////////////////////////////////////////// -// -// Testing cases for first longest match variant (i.e. -// test ac_match_longest_l()) -// -////////////////////////////////////////////////////////////////////////////// -// - -// This was actually first motivation for left-longest-match -const char *dict100[] = {"Mozilla", "Mozilla Mobile"}; -StrPair strpair100[] = {{"User Agent containing string Mozilla Mobile", "Mozilla Mobile"}}; -LeftLongestTests test100("l_test 100", dict100, 2, strpair100, 1); - -// Dict with single char is tricky -const char *dict101[] = {"a", "abc"}; -StrPair strpair101[] = {{"abcdef", "abc"}}; -LeftLongestTests test101("l_test 101", dict101, 2, strpair101, 1); - -// Testing case with partially overlapping patterns. The purpose is to -// check if the fail-link leading from terminal state is correct. -// -// The fail-link leading from terminal-state does not matter in -// match-first-occurrence variant, as it stop when a terminal is hit. -// -const char *dict102[] = {"abc", "bcdef"}; -StrPair strpair102[] = {{"abcdef", "bcdef"}}; -LeftLongestTests test102("l_test 102", dict102, 2, strpair102, 1); diff --git a/src/vendor/lua-aho-corasick/tests/dict/README.txt b/src/vendor/lua-aho-corasick/tests/dict/README.txt deleted file mode 100644 index cd50b4147..000000000 --- a/src/vendor/lua-aho-corasick/tests/dict/README.txt +++ /dev/null @@ -1 +0,0 @@ -This directory contains pattern set of benchmark purpose. diff --git a/src/vendor/lua-aho-corasick/tests/dict/dict1.txt b/src/vendor/lua-aho-corasick/tests/dict/dict1.txt deleted file mode 100644 index 94085a9e8..000000000 --- a/src/vendor/lua-aho-corasick/tests/dict/dict1.txt +++ /dev/null @@ -1,11 +0,0 @@ -false_return@ -forloop#haha -wtfprogram -mmaporunmap -ThIs?Module!IsEssential -struct rtlwtf -gettIMEOfdayWrong -edistribution_and_use_in_@source -Copyright~#@ -while {! -!%SQLinje diff --git a/src/vendor/lua-aho-corasick/tests/load_ac_test.lua b/src/vendor/lua-aho-corasick/tests/load_ac_test.lua deleted file mode 100644 index 7fb7db9c7..000000000 --- a/src/vendor/lua-aho-corasick/tests/load_ac_test.lua +++ /dev/null @@ -1,82 +0,0 @@ --- This script is to test load_ac.lua --- --- Some notes: --- 1. The purpose of this script is not to check if the libac.so work --- properly, it is to check if there are something stupid in load_ac.lua --- --- 2. There are bunch of collectgarbage() calls, the purpose is to make --- sure the shared lib is not unloaded after GC. - --- load_ac.lua looks up libac.so via package.cpath rather than LD_LIBRARY_PATH, --- prepend (instead of appending) some insane paths here to see if it quit --- prematurely. --- -package.cpath = ".;./?.so;" .. package.cpath - -local ac = require "load_ac" - -local ac_create = ac.create_ac -local ac_match = ac.match -local string_fmt = string.format -local string_sub = string.sub - -local err_cnt = 0 -local function mytest(testname, dict, match, notmatch) - print(">Testing ", testname) - - io.write(string_fmt("Dictionary: ")); - for i=1, #dict do - io.write(string_fmt("%s, ", dict[i])) - end - print "" - - local ac_inst = ac_create(dict); - collectgarbage() - for i=1, #match do - local str = match[i] - io.write(string_fmt("Matching %s, ", str)) - local b = ac_match(ac_inst, str) - if b then - print "pass" - else - err_cnt = err_cnt + 1 - print "fail" - end - collectgarbage() - end - - if notmatch == nil then - return - end - - collectgarbage() - - for i = 1, #notmatch do - local str = notmatch[i] - io.write(string_fmt("*Matching %s, ", str)) - local r = ac_match(ac_inst, str) - if r then - err_cnt = err_cnt + 1 - print("fail") - else - print("succ") - end - collectgarbage() - end - ac_inst = nil - collectgarbage() -end - -print("") -print("====== Test to see if load_ac.lua works properly ========") - -mytest("test1", - {"he", "she", "his", "her", "str\0ing"}, - -- matching cases - { "he", "she", "his", "hers", "ahe", "shhe", "shis2", "ahhe", "str\0ing" }, - - -- not matching case - {"str\0", "str"} - ) - -os.exit((err_cnt == 0) and 0 or 1) diff --git a/src/vendor/lua-aho-corasick/tests/lua_test.lua b/src/vendor/lua-aho-corasick/tests/lua_test.lua deleted file mode 100644 index cfe178f3f..000000000 --- a/src/vendor/lua-aho-corasick/tests/lua_test.lua +++ /dev/null @@ -1,67 +0,0 @@ --- This script is to test ahocorasick.so not libac.so --- -local ac = require "ahocorasick" - -local ac_create = ac.create -local ac_match = ac.match -local string_fmt = string.format -local string_sub = string.sub - -local err_cnt = 0 -local function mytest(testname, dict, match, notmatch) - print(">Testing ", testname) - - io.write(string_fmt("Dictionary: ")); - for i=1, #dict do - io.write(string_fmt("%s, ", dict[i])) - end - print "" - - local ac_inst = ac_create(dict); - for i=1, #match do - local str = match[i][1] - local substr = match[i][2] - io.write(string_fmt("Matching %s, ", str)) - local b, e = ac_match(ac_inst, str) - if b and e and (string_sub(str, b+1, e+1) == substr) then - print "pass" - else - err_cnt = err_cnt + 1 - print "fail" - end - --print("gc is called") - collectgarbage() - end - - if notmatch == nil then - return - end - - for i = 1, #notmatch do - local str = notmatch[i] - io.write(string_fmt("*Matching %s, ", str)) - local r = ac_match(ac_inst, str) - if r then - err_cnt = err_cnt + 1 - print("fail") - else - print("succ") - end - collectgarbage() - end -end - -mytest("test1", - {"he", "she", "his", "her", "str\0ing"}, - -- matching cases - { {"he", "he"}, {"she", "she"}, {"his", "his"}, {"hers", "he"}, - {"ahe", "he"}, {"shhe", "he"}, {"shis2", "his"}, {"ahhe", "he"}, - {"str\0ing", "str\0ing"} - }, - - -- not matching case - {"str\0", "str"} - - ) - -os.exit((err_cnt == 0) and 0 or 1) diff --git a/src/vendor/lua-aho-corasick/tests/test_base.hpp b/src/vendor/lua-aho-corasick/tests/test_base.hpp deleted file mode 100644 index 7758371a4..000000000 --- a/src/vendor/lua-aho-corasick/tests/test_base.hpp +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef TEST_BASE_H -#define TEST_BASE_H - -#include -#include -#include - -using namespace std; -class ACTestBase { -public: - ACTestBase(const char* name) :_banner(name) {} - virtual void PrintBanner() { - fprintf(stdout, "\n===== %s ====\n", _banner.c_str()); - } - - virtual bool Run() = 0; -private: - string _banner; -}; - -typedef std::pair StrInfo; -class BigFileTester { -public: - BigFileTester(const char* filepath); - virtual ~BigFileTester() { Cleanup(); } - - bool Run(); - -protected: - virtual buf_header_t* PM_Create(const char** strv, uint32_t* strlenv, - uint32_t vect_len) = 0; - virtual void PM_Free(buf_header_t*) = 0; - virtual bool Run_Helper(buf_header_t* PM) = 0; - - // Return true if the '\0' is valid char of a string. - virtual bool Str_C_Style() { return true; } - - bool GenerateKeys(); - void Cleanup(); - void PrintStr(FILE*, const char* str, int len); - -protected: - const char* _filepath; - int _fd; - vector _keys; - char* _msg; - int _msg_len; - int _key_num; // number of strings in dictionary - int _chunk_sz; - int _chunk_num; - - int _max_key_num; - int _key_min_len; - int _key_max_len; -}; - -extern bool Run_AC_Simple_Test(); -extern bool Run_AC_Aggressive_Test(const vector& files); - -#endif diff --git a/src/vendor/lua-aho-corasick/tests/test_bigfile.cxx b/src/vendor/lua-aho-corasick/tests/test_bigfile.cxx deleted file mode 100644 index f189d8d8f..000000000 --- a/src/vendor/lua-aho-corasick/tests/test_bigfile.cxx +++ /dev/null @@ -1,167 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "ac.h" -#include "ac_util.hpp" -#include "test_base.hpp" - -/////////////////////////////////////////////////////////////////////////// -// -// Implementation of BigFileTester -// -/////////////////////////////////////////////////////////////////////////// -// -BigFileTester::BigFileTester(const char* filepath) { - _filepath = filepath; - _fd = -1; - _msg = (char*)MAP_FAILED; - _msg_len = 0; - _key_num = 0; - _chunk_sz = 0; - _chunk_num = 0; - - _max_key_num = 100; - _key_min_len = 20; - _key_max_len = 80; -} - -void -BigFileTester::Cleanup() { - if (_msg != MAP_FAILED) { - munmap((void*)_msg, _msg_len); - _msg = (char*)MAP_FAILED; - _msg_len = 0; - } - - if (_fd != -1) { - close(_fd); - _fd = -1; - } -} - -bool -BigFileTester::GenerateKeys() { - int chunk_sz = 4096; - int max_key_num = _max_key_num; - int key_min_len = _key_min_len; - int key_max_len = _key_max_len; - - int t = _msg_len / chunk_sz; - int keynum = t > max_key_num ? max_key_num : t; - - if (keynum <= 4) { - // file is too small - return false; - } - chunk_sz = _msg_len / keynum; - _chunk_sz = chunk_sz; - - // For each chunck, "randomly" grab a sub-string searving - // as key. - int random_ofst[] = { 12, 30, 23, 15 }; - int rofstsz = sizeof(random_ofst)/sizeof(random_ofst[0]); - int ofst = 0; - const char* msg = _msg; - _chunk_num = keynum - 1; - for (int idx = 0, e = _chunk_num; idx < e; idx++) { - const char* key = msg + ofst + idx % rofstsz; - int key_len = key_min_len + idx % (key_max_len - key_min_len); - _keys.push_back(StrInfo(key, key_len)); - ofst += chunk_sz; - } - return true; -} - -bool -BigFileTester::Run() { - // Step 1: Bring the file into memory - fprintf(stdout, "Testing using file '%s'...\n", _filepath); - - int fd = _fd = ::open(_filepath, O_RDONLY); - if (fd == -1) { - perror("open"); - return false; - } - - struct stat sb; - if (fstat(fd, &sb) == -1) { - perror("fstat"); - return false; - } - - if (!S_ISREG (sb.st_mode)) { - fprintf(stderr, "%s is not regular file\n", _filepath); - return false; - } - - int ten_M = 1024 * 1024 * 10; - int map_sz = _msg_len = sb.st_size > ten_M ? ten_M : sb.st_size; - char* p = _msg = - (char*)mmap (0, map_sz, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); - if (p == MAP_FAILED) { - perror("mmap"); - return false; - } - - // Get rid of '\0' if we are picky at it. - if (Str_C_Style()) { - for (int i = 0; i < map_sz; i++) { if (!p[i]) p[i] = 'a'; } - p[map_sz - 1] = 0; - } - - // Step 2: "Fabricate" some keys from the file. - if (!GenerateKeys()) { - close(fd); - return false; - } - - // Step 3: Create PM instance - const char** keys = new const char*[_keys.size()]; - unsigned int* keylens = new unsigned int[_keys.size()]; - - int i = 0; - for (vector::iterator si = _keys.begin(), se = _keys.end(); - si != se; si++, i++) { - const StrInfo& strinfo = *si; - keys[i] = strinfo.first; - keylens[i] = strinfo.second; - } - - buf_header_t* PM = PM_Create(keys, keylens, i); - delete[] keys; - delete[] keylens; - - // Step 4: Run testing - bool res = Run_Helper(PM); - PM_Free(PM); - - // Step 5: Clanup - munmap(p, map_sz); - _msg = (char*)MAP_FAILED; - close(fd); - _fd = -1; - - fprintf(stdout, "%s\n", res ? "succ" : "fail"); - return res; -} - -void -BigFileTester::PrintStr(FILE* f, const char* str, int len) { - fprintf(f, "{"); - for (int i = 0; i < len; i++) { - unsigned char c = str[i]; - if (isprint(c)) - fprintf(f, "'%c', ", c); - else - fprintf(f, "%#x, ", c); - } - fprintf(f, "}"); -}; diff --git a/src/vendor/lua-aho-corasick/tests/test_main.cxx b/src/vendor/lua-aho-corasick/tests/test_main.cxx deleted file mode 100644 index b4f522546..000000000 --- a/src/vendor/lua-aho-corasick/tests/test_main.cxx +++ /dev/null @@ -1,33 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include "ac.h" -#include "ac_util.hpp" -#include "test_base.hpp" - -using namespace std; - - -///////////////////////////////////////////////////////////////////////// -// -// Simple (yet maybe tricky) testings -// -///////////////////////////////////////////////////////////////////////// -// -int -main (int argc, char** argv) { - bool succ = Run_AC_Simple_Test(); - - vector files; - for (int i = 1; i < argc; i++) { files.push_back(argv[i]); } - succ = Run_AC_Aggressive_Test(files) && succ; - - return succ ? 0 : -1; -}; diff --git a/src/vendor/re2/tostring.cc b/src/vendor/re2/tostring.cc index adf73e8cd..c95f957b4 100644 --- a/src/vendor/re2/tostring.cc +++ b/src/vendor/re2/tostring.cc @@ -38,10 +38,10 @@ class ToStringWalker : public Regexp::Walker { public: explicit ToStringWalker(std::string* t) : t_(t) {} - virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); - virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, - int* child_args, int nchild_args); - virtual int ShortVisit(Regexp* /*re*/, int /*parent_arg*/) override { + int PreVisit(Regexp* re, int parent_arg, bool* stop) override; + int PostVisit(Regexp* re, int parent_arg, int pre_arg, + int* child_args, int nchild_args) override; + int ShortVisit(Regexp* /*re*/, int /*parent_arg*/) override { return 0; } diff --git a/src/version.hpp.in b/src/version.hpp.in index d88badfb3..2e0913583 100644 --- a/src/version.hpp.in +++ b/src/version.hpp.in @@ -6,4 +6,4 @@ #pragma once -constexpr const char *LIBDDWAF_VERSION = "${CMAKE_PROJECT_VERSION}"; +constexpr const char *LIBDDWAF_VERSION = "${PROJECT_VERSION}"; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d33add585..5c3a9364f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,13 +8,7 @@ set_target_properties(waf_test PROPERTIES CXX_STANDARD_REQUIRED YES CXX_EXTENSIONS NO) -target_include_directories(waf_test PRIVATE ${libddwaf_SOURCE_DIR}/include) -target_include_directories(waf_test PRIVATE ${libddwaf_SOURCE_DIR}/src) -target_include_directories(waf_test PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor) -target_include_directories(waf_test PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/libinjection/src/) -target_include_directories(waf_test PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/radixlib/) -target_include_directories(waf_test PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick/) -target_include_directories(waf_test PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor/utf8proc/) +target_include_directories(waf_test PRIVATE ${LIBDDWAF_PUBLIC_INCLUDES} ${LIBDDWAF_PRIVATE_INCLUDES}) if(NOT STDLIB_MAP_RECURSIVE) target_compile_definitions(waf_test PRIVATE HAS_NONRECURSIVE_UNORDERED_MAP) @@ -35,9 +29,8 @@ else() set_target_properties(waf_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO ".") endif() -target_link_libraries(waf_test - PRIVATE ${LIBDDWAF_PRIVATE_LIBRARIES} ${LIBDDWAF_INTERFACE_LIBRARIES} - ${LIBDDWAF_EXE_LINKER_FLAGS} ${LIBDDWAF_TEST_LIBRARIES}) +target_link_libraries(waf_test PRIVATE ${LIBDDWAF_PRIVATE_LIBRARIES} + ${LIBDDWAF_INTERFACE_LIBRARIES} ${LIBDDWAF_TEST_LIBRARIES}) add_custom_target(test COMMAND ${CMAKE_BINARY_DIR}/tests/waf_test diff --git a/tests/parser_transformers_test.cpp b/tests/parser_transformers_test.cpp index 1afd6f3ed..47c213932 100644 --- a/tests/parser_transformers_test.cpp +++ b/tests/parser_transformers_test.cpp @@ -13,8 +13,8 @@ using namespace ddwaf::parser; // NOLINTBEGIN(cppcoreguidelines-macro-usage,bugprone-unchecked-optional-access) #define EXPECT_OPTEQ(opt, expected) \ - ASSERT_TRUE((opt)); \ - EXPECT_EQ((opt), expected); + ASSERT_TRUE((opt)); \ + EXPECT_EQ((opt), expected); // NOLINTEND(cppcoreguidelines-macro-usage,bugprone-unchecked-optional-access) namespace { diff --git a/tests/test.hpp b/tests/test.hpp index 87ef5dc28..b72b8caba 100644 --- a/tests/test.hpp +++ b/tests/test.hpp @@ -14,29 +14,29 @@ #define SHORT_TIME 1 #define DDWAF_OBJECT_INVALID \ - { \ - NULL, 0, {NULL}, 0, DDWAF_OBJ_INVALID \ - } + { \ + NULL, 0, {NULL}, 0, DDWAF_OBJ_INVALID \ + } #define DDWAF_OBJECT_MAP \ - { \ - NULL, 0, {NULL}, 0, DDWAF_OBJ_MAP \ - } + { \ + NULL, 0, {NULL}, 0, DDWAF_OBJ_MAP \ + } #define DDWAF_OBJECT_ARRAY \ - { \ - NULL, 0, {NULL}, 0, DDWAF_OBJ_ARRAY \ - } + { \ + NULL, 0, {NULL}, 0, DDWAF_OBJ_ARRAY \ + } #define DDWAF_OBJECT_SIGNED_FORCE(value) \ - { \ - NULL, 0, {(const char *)value}, 0, DDWAF_OBJ_SIGNED \ - } + { \ + NULL, 0, {(const char *)value}, 0, DDWAF_OBJ_SIGNED \ + } #define DDWAF_OBJECT_UNSIGNED_FORCE(value) \ - { \ - NULL, 0, {(const char *)value}, 0, DDWAF_OBJ_UNSIGNED \ - } + { \ + NULL, 0, {(const char *)value}, 0, DDWAF_OBJ_UNSIGNED \ + } #define DDWAF_OBJECT_STRING_PTR(string, length) \ - { \ - NULL, 0, {string}, length, DDWAF_OBJ_STRING \ - } + { \ + NULL, 0, {string}, length, DDWAF_OBJ_STRING \ + } #define EXPECT_STR(a, b) EXPECT_STREQ(a.c_str(), b) #define EXPECT_STRV(a, b) EXPECT_STREQ(a.data(), b) diff --git a/tests/test_utils.cpp b/tests/test_utils.cpp index fae6d5bba..8cbd65472 100644 --- a/tests/test_utils.cpp +++ b/tests/test_utils.cpp @@ -660,7 +660,7 @@ void json_to_object_helper(ddwaf_object *object, T &doc) ddwaf_object element; json_to_object_helper(&element, kv.value); - std::string_view const key = kv.name.GetString(); + const std::string_view key = kv.name.GetString(); ddwaf_object_map_addl(object, key.data(), key.length(), &element); } break; @@ -676,7 +676,7 @@ void json_to_object_helper(ddwaf_object *object, T &doc) break; } case rapidjson::kStringType: { - std::string_view const str = doc.GetString(); + const std::string_view str = doc.GetString(); ddwaf_object_stringl(object, str.data(), str.size()); break; } @@ -702,7 +702,7 @@ void json_to_object_helper(ddwaf_object *object, T &doc) ddwaf_object json_to_object(const std::string &json) { rapidjson::Document doc; - rapidjson::ParseResult const result = doc.Parse(json.data()); + const rapidjson::ParseResult result = doc.Parse(json.data()); if (result.IsError()) { throw std::runtime_error( "invalid json object: "s + rapidjson::GetParseError_En(result.Code())); diff --git a/tests/test_utils.hpp b/tests/test_utils.hpp index acdae91dc..939712e73 100644 --- a/tests/test_utils.hpp +++ b/tests/test_utils.hpp @@ -183,25 +183,25 @@ std::list from_matches(const std::vector>(); \ - EXPECT_THAT(events, WithEvents({__VA_ARGS__})); \ - } + { \ + auto data = ddwaf::test::object_to_json(result.events); \ + EXPECT_TRUE(ValidateSchema(data)); \ + YAML::Node doc = YAML::Load(data.c_str()); \ + auto events = doc.as>(); \ + EXPECT_THAT(events, WithEvents({__VA_ARGS__})); \ + } #define EXPECT_MATCHES(matches, ...) EXPECT_THAT(from_matches(matches), WithMatches({__VA_ARGS__})); #define EXPECT_SCHEMA_EQ(obtained, expected) \ - { \ - auto obtained_doc = test::object_to_rapidjson(obtained); \ - EXPECT_TRUE(ValidateSchemaSchema(obtained_doc)); \ - rapidjson::Document expected_doc; \ - expected_doc.Parse(expected); \ - EXPECT_FALSE(expected_doc.HasParseError()); \ - EXPECT_TRUE(json_equals(obtained_doc, expected_doc)) << test::object_to_json(obtained); \ - } + { \ + auto obtained_doc = test::object_to_rapidjson(obtained); \ + EXPECT_TRUE(ValidateSchemaSchema(obtained_doc)); \ + rapidjson::Document expected_doc; \ + expected_doc.Parse(expected); \ + EXPECT_FALSE(expected_doc.HasParseError()); \ + EXPECT_TRUE(json_equals(obtained_doc, expected_doc)) << test::object_to_json(obtained); \ + } // NOLINTEND(cppcoreguidelines-macro-usage) ddwaf_object read_file(std::string_view filename, std::string_view base = "./"); @@ -232,7 +232,7 @@ bool json_equals(const T &lhs, const T &rhs) std::vector seen(lhs.MemberCount(), false); for (const auto &lkv : lhs.GetObject()) { bool found = false; - std::string_view const lkey = lkv.name.GetString(); + const std::string_view lkey = lkv.name.GetString(); for (auto it = rhs.MemberBegin(); it != rhs.MemberEnd(); ++it) { auto i = it - rhs.MemberBegin(); if (seen[i]) { @@ -240,7 +240,7 @@ bool json_equals(const T &lhs, const T &rhs) } const auto &rkv = *it; - std::string_view const rkey = rkv.name.GetString(); + const std::string_view rkey = rkv.name.GetString(); if (lkey != rkey) { continue; } diff --git a/tests/transformer/manager_test.cpp b/tests/transformer/manager_test.cpp index 006c469df..977b64f36 100644 --- a/tests/transformer/manager_test.cpp +++ b/tests/transformer/manager_test.cpp @@ -14,20 +14,20 @@ namespace { // NOLINTNEXTLINE #define EXPECT_TRANSFORM(src, dst, ...) \ - { \ - auto res = transform({src, sizeof(src) - 1}, {__VA_ARGS__}); \ - EXPECT_TRUE(res); \ - if (res) { \ - EXPECT_STR(res.value(), dst); \ - } \ - } + { \ + auto res = transform({src, sizeof(src) - 1}, {__VA_ARGS__}); \ + EXPECT_TRUE(res); \ + if (res) { \ + EXPECT_STR(res.value(), dst); \ + } \ + } // NOLINTNEXTLINE #define EXPECT_NO_TRANSFORM(src, ...) \ - { \ - auto res = transform({src, sizeof(src) - 1}, {__VA_ARGS__}); \ - EXPECT_FALSE(res); \ - } + { \ + auto res = transform({src, sizeof(src) - 1}, {__VA_ARGS__}); \ + EXPECT_FALSE(res); \ + } std::optional transform(std::string_view input, const std::vector &ids) { diff --git a/tests/transformer/transformer_utils.hpp b/tests/transformer/transformer_utils.hpp index c92012a46..4827cb1e8 100644 --- a/tests/transformer/transformer_utils.hpp +++ b/tests/transformer/transformer_utils.hpp @@ -27,41 +27,41 @@ template constexpr std::array literal_to_array(cons // NOLINTBEGIN(cppcoreguidelines-macro-usage) #define EXPECT_EVENTS(result, ...) \ - { \ - auto data = ddwaf::test::object_to_json(result.events); \ - EXPECT_TRUE(ValidateSchema(data)); \ - YAML::Node doc = YAML::Load(data.c_str()); \ - auto events = doc.as>(); \ - EXPECT_THAT(events, WithEvents({__VA_ARGS__})); \ - } + { \ + auto data = ddwaf::test::object_to_json(result.events); \ + EXPECT_TRUE(ValidateSchema(data)); \ + YAML::Node doc = YAML::Load(data.c_str()); \ + auto events = doc.as>(); \ + EXPECT_THAT(events, WithEvents({__VA_ARGS__})); \ + } #define EXPECT_TRANSFORM(name, source, expected) \ - { \ { \ - cow_string str({source, sizeof(source) - 1}); \ - EXPECT_TRUE(transformer::name::transform(str)); \ - EXPECT_STREQ(str.data(), expected); \ - } \ - if constexpr (sizeof(source) > 1) { \ - std::array copy{literal_to_array(source)}; \ - cow_string str({copy.data(), copy.size()}); \ - EXPECT_TRUE(transformer::name::transform(str)) << "Non nul-terminated string"; \ - EXPECT_STREQ(str.data(), expected) << "Non nul-terminated string"; \ - } \ - } + { \ + cow_string str({source, sizeof(source) - 1}); \ + EXPECT_TRUE(transformer::name::transform(str)); \ + EXPECT_STREQ(str.data(), expected); \ + } \ + if constexpr (sizeof(source) > 1) { \ + std::array copy{literal_to_array(source)}; \ + cow_string str({copy.data(), copy.size()}); \ + EXPECT_TRUE(transformer::name::transform(str)) << "Non nul-terminated string"; \ + EXPECT_STREQ(str.data(), expected) << "Non nul-terminated string"; \ + } \ + } #define EXPECT_NO_TRANSFORM(name, source) \ - { \ { \ - cow_string str({source, sizeof(source) - 1}); \ - EXPECT_FALSE(transformer::name::transform(str)); \ - EXPECT_FALSE(str.modified()); \ - } \ - if constexpr (sizeof(source) > 1) { \ - std::array copy{literal_to_array(source)}; \ - cow_string str({copy.data(), copy.size()}); \ - EXPECT_FALSE(transformer::name::transform(str)) << "Non nul-terminated string"; \ - EXPECT_FALSE(str.modified()); \ - } \ - } + { \ + cow_string str({source, sizeof(source) - 1}); \ + EXPECT_FALSE(transformer::name::transform(str)); \ + EXPECT_FALSE(str.modified()); \ + } \ + if constexpr (sizeof(source) > 1) { \ + std::array copy{literal_to_array(source)}; \ + cow_string str({copy.data(), copy.size()}); \ + EXPECT_FALSE(transformer::name::transform(str)) << "Non nul-terminated string"; \ + EXPECT_FALSE(str.modified()); \ + } \ + } // NOLINTEND(cppcoreguidelines-macro-usage) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 85379f200..3379ca540 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,15 +1,14 @@ -file(GLOB LIBDDWAF_EXAMPLE_SOURCE *.cpp) -file(GLOB LIBDDWAF_EXAMPLE_COMMON_SOURCE common/*.cpp) +file(GLOB LIBDDWAF_TOOL_SOURCE *.cpp) +file(GLOB LIBDDWAF_TOOL_COMMON_SOURCE common/*.cpp) -foreach(EXAMPLE ${LIBDDWAF_EXAMPLE_SOURCE}) - get_filename_component(EXAMPLE_NAME ${EXAMPLE} NAME_WLE) +foreach(TOOL ${LIBDDWAF_TOOL_SOURCE}) + get_filename_component(TOOL_NAME ${TOOL} NAME_WLE) - add_executable(${EXAMPLE_NAME} ${EXAMPLE} ${LIBDDWAF_EXAMPLE_COMMON_SOURCE}) - target_link_libraries(${EXAMPLE_NAME} PRIVATE libddwaf_objects lib_yamlcpp lib_rapidjson) - target_include_directories(${EXAMPLE_NAME} PRIVATE ${libddwaf_SOURCE_DIR}/src) - target_include_directories(${EXAMPLE_NAME} PRIVATE ${libddwaf_SOURCE_DIR}/src/vendor) + add_executable(${TOOL_NAME} ${TOOL} ${LIBDDWAF_TOOL_COMMON_SOURCE}) + target_link_libraries(${TOOL_NAME} PRIVATE libddwaf_objects lib_yamlcpp lib_rapidjson) + target_include_directories(${TOOL_NAME} PRIVATE ${LIBDDWAF_PRIVATE_INCLUDES}) - set_target_properties(${EXAMPLE_NAME} PROPERTIES + set_target_properties(${TOOL_NAME} PROPERTIES CXX_STANDARD 20 CXX_STANDARD_REQUIRED YES CXX_EXTENSIONS NO) diff --git a/validator/assert.hpp b/validator/assert.hpp index 5b3991eda..b46371ea1 100644 --- a/validator/assert.hpp +++ b/validator/assert.hpp @@ -15,13 +15,13 @@ #include "utils.hpp" #define expect(lhs, rhs) \ - try { \ - assert(lhs, rhs, __LINE__, __func__); \ - } catch (const assert_exception &e) { \ - throw; \ - } catch (const std::exception &e) { \ - throw assert_exception(e.what(), __LINE__, __func__); \ - } + try { \ + assert(lhs, rhs, __LINE__, __func__); \ + } catch (const assert_exception &e) { \ + throw; \ + } catch (const std::exception &e) { \ + throw assert_exception(e.what(), __LINE__, __func__); \ + } class assert_exception : public std::exception { public: