diff --git a/CMakeLists.txt b/CMakeLists.txt index cc53b58f40..fc5e877bd9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -578,9 +578,6 @@ endif(RS_JSON_API) ################################################################################ if(RS_FORUM_DEEP_INDEX) - find_package(Xapian REQUIRED) - target_link_libraries(${PROJECT_NAME} PRIVATE ${XAPIAN_LIBRARIES}) - target_compile_definitions(${PROJECT_NAME} PUBLIC RS_DEEP_FORUMS_INDEX) endif(RS_FORUM_DEEP_INDEX) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 19360affdc..7fac17ab56 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -687,12 +687,12 @@ if(RS_FORUM_DEEP_INDEX) list( APPEND RS_SOURCES deep_search/commonutils.cpp - deep_search/forumsindex.cpp ) + deep_search/forumsindex_fts5.cpp ) list( APPEND RS_IMPLEMENTATION_HEADERS deep_search/commonutils.hpp - deep_search/forumsindex.hpp ) + deep_search/forumsindex_fts5.hpp ) endif(RS_FORUM_DEEP_INDEX) diff --git a/src/deep_search/commonutils.cpp b/src/deep_search/commonutils.cpp index e8e1e4b590..baaa16def3 100644 --- a/src/deep_search/commonutils.cpp +++ b/src/deep_search/commonutils.cpp @@ -26,6 +26,63 @@ #include "util/rsthreads.h" #include "util/rsdebuglevel0.h" +namespace DeepSearch +{ + +std::string simpleTextHtmlExtract(const std::string& rsHtmlDoc) +{ + if(rsHtmlDoc.empty()) return rsHtmlDoc; + + const bool isPlainMsg = + rsHtmlDoc[0] != '<' || rsHtmlDoc[rsHtmlDoc.size() - 1] != '>'; + if(isPlainMsg) return rsHtmlDoc; + + auto oSize = rsHtmlDoc.size(); + auto bodyTagBegin(rsHtmlDoc.find("
= oSize) return rsHtmlDoc; + + auto bodyTagEnd(rsHtmlDoc.find(">", bodyTagBegin)); + if(bodyTagEnd >= oSize) return rsHtmlDoc; + + std::string retVal(rsHtmlDoc.substr(bodyTagEnd+1)); + + // strip also CSS inside + oSize = retVal.size(); + auto styleTagBegin(retVal.find(" - oSize = retVal.size(); - auto styleTagBegin(retVal.find("