diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 6cea915b55..6c073577c8 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -14,6 +14,8 @@ #include "util/ConstexprUtils.h" #include "util/http/MediaTypes.h" +using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; + // Return true iff the `result` is nonempty. bool getResultForAsk(const std::shared_ptr& result) { if (result->isFullyMaterialized()) { @@ -347,11 +349,58 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) { } } +// _____________________________________________________________________________ +std::optional +ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue( + Id id, bool onlyReturnLiteralsWithXsdString) { + if (onlyReturnLiteralsWithXsdString) { + return std::nullopt; + } + auto optionalStringAndType = idToStringAndTypeForEncodedValue(id); + if (!optionalStringAndType) { + return std::nullopt; + } + + return LiteralOrIri::literalWithoutQuotes(optionalStringAndType->first); +} + +// _____________________________________________________________________________ +bool ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString( + const LiteralOrIri& word) { + return !word.hasDatatype() || + asStringViewUnsafe(word.getDatatype()) == XSD_STRING; +} + +// _____________________________________________________________________________ +std::optional ExportQueryExecutionTrees::handleIriOrLiteral( + LiteralOrIri word, bool onlyReturnLiterals, + bool onlyReturnLiteralsWithXsdString) { + if (!word.isLiteral()) { + if (onlyReturnLiterals || onlyReturnLiteralsWithXsdString) { + AD_THROW("The input is an IRI, but only literals are allowed."); + return std::nullopt; + } + return word; + } + + if (onlyReturnLiteralsWithXsdString) { + if (isPlainLiteralOrLiteralWithXsdString(word)) { + return word; + } + AD_THROW("The literal must either have no datatype or datatype xsd:string."); + return std::nullopt; + } + + if (word.hasDatatype() && !isPlainLiteralOrLiteralWithXsdString(word)) { + word.getLiteral().removeDatatype(); + } + return word; +} + // _____________________________________________________________________________ ad_utility::triple_component::LiteralOrIri ExportQueryExecutionTrees::getLiteralOrIriFromVocabIndex( const Index& index, Id id, const LocalVocab& localVocab) { - using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; switch (id.getDatatype()) { case Datatype::LocalVocabIndex: return localVocab.getWord(id.getLocalVocabIndex()).asLiteralOrIri(); @@ -412,6 +461,39 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, return idToStringAndTypeForEncodedValue(id); } } + +// _____________________________________________________________________________ +template +std::optional ExportQueryExecutionTrees::idToLiteralOrIri( + const Index& index, Id id, const LocalVocab& localVocab, + bool onlyReturnLiteralsWithXsdString) { + using enum Datatype; + auto datatype = id.getDatatype(); + + if constexpr (onlyReturnLiterals) { + if (!(datatype == VocabIndex || datatype == LocalVocabIndex)) { + return std::nullopt; + } + } + + switch (datatype) { + case WordVocabIndex: + return LiteralOrIri::literalWithoutQuotes( + index.indexToString(id.getWordVocabIndex())); + case VocabIndex: + case LocalVocabIndex: + return handleIriOrLiteral( + getLiteralOrIriFromVocabIndex(index, id, localVocab), + onlyReturnLiterals, onlyReturnLiteralsWithXsdString); + case TextRecordIndex: + AD_THROW("TextRecordIndex case is not implemented."); + return std::nullopt; + default: + return idToLiteralOrIriForEncodedValue(id, + onlyReturnLiteralsWithXsdString); + } +} + // ___________________________________________________________________________ template std::optional> ExportQueryExecutionTrees::idToStringAndType( @@ -433,6 +515,18 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, const LocalVocab& localVocab, std::identity&& escapeFunction); +// ___________________________________________________________________________ +template std::optional +ExportQueryExecutionTrees::idToLiteralOrIri( + const Index& index, Id id, const LocalVocab& localVocab, + bool onlyReturnLiteralsWithXsdString); + +// ___________________________________________________________________________ +template std::optional +ExportQueryExecutionTrees::idToLiteralOrIri( + const Index& index, Id id, const LocalVocab& localVocab, + bool onlyReturnLiteralsWithXsdString); + // Convert a stringvalue and optional type to JSON binding. static nlohmann::json stringAndTypeToBinding(std::string_view entitystr, const char* xsdType) { diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index a1443e802d..eae20f697f 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -21,6 +21,7 @@ class ExportQueryExecutionTrees { public: using MediaType = ad_utility::MediaType; using CancellationHandle = ad_utility::SharedCancellationHandle; + using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; // Compute the result of the given `parsedQuery` (created by the // `SparqlParser`) for which the `QueryExecutionTree` has been previously @@ -69,6 +70,37 @@ class ExportQueryExecutionTrees { static std::optional> idToStringAndTypeForEncodedValue(Id id); + // Convert the `id` to a 'LiteralOrIri. Datatypes are always stripped unless + // they are 'xsd:string', so for literals with non-'xsd:string' datatypes + // (this includes IDs that directly store their value, like Doubles) the + // datatype is always empty. If 'onlyReturnLiteralsWithXsdString' is true, all + // IRIs and literals with non'-xsd:string' datatypes (including encoded IDs) + // return 'std::nullopt'. These semantics are useful for the string + // expressions in StringExpressions.cpp. + template + static std::optional idToLiteralOrIri( + const Index& index, Id id, const LocalVocab& localVocab, + bool onlyReturnLiteralsWithXsdString = false); + + // Same as the previous function, but only handles the datatypes for which the + // value is encoded directly in the ID. For other datatypes an exception is + // thrown. + // If `onlyReturnLiteralsWithXsdString` is `true`, returns `std::nullopt`. + // If `onlyReturnLiteralsWithXsdString` is `false`, removes datatypes from + // literals (e.g. the integer `42` is converted to the plain literal `"42"`). + static std::optional idToLiteralOrIriForEncodedValue( + Id id, bool onlyReturnLiteralsWithXsdString = false); + + // A helper function for the `idToLiteralOrIri` function. Checks and processes + // a LiteralOrIri based on the given parameters. + static std::optional handleIriOrLiteral( + LiteralOrIri word, bool onlyReturnLiterals, + bool onlyReturnLiteralsWithXsdString); + + // Checks if a LiteralOrIri is either a plain literal (without datatype) + // or a literal with the `xsd:string` datatype. + static bool isPlainLiteralOrLiteralWithXsdString(const LiteralOrIri& word); + // Acts as a helper to retrieve an LiteralOrIri object // from an Id, where the Id is of type `VocabIndex` or `LocalVocabIndex`. // This function should only be called with suitable `Datatype` Id's, diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index 3ed18e7b99..89b487d3ad 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -90,6 +90,32 @@ std::optional StringValueGetter::operator()( } } +// ____________________________________________________________________________ +std::optional LiteralOrIriValueGetter::operator()( + Id id, const EvaluationContext* context) const { + return ExportQueryExecutionTrees::idToLiteralOrIri(context->_qec.getIndex(), + id, context->_localVocab); +} + +// ____________________________________________________________________________ +std::optional +LiteralOrIriValueGetterWithXsdStringFilter::operator()( + Id id, const EvaluationContext* context) const { + return ExportQueryExecutionTrees::idToLiteralOrIri( + context->_qec.getIndex(), id, context->_localVocab, true); +} + +// ____________________________________________________________________________ +std::optional +LiteralOrIriValueGetterWithXsdStringFilter::operator()( + const LiteralOrIri& s, const EvaluationContext*) const { + if (ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(s)) { + return s; + } + AD_THROW("Input is not a plain string or xsd:string."); + return std::nullopt; +} + // ____________________________________________________________________________ template Id IsSomethingValueGetter::operator()( diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index 88dbc2d825..159040eedd 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -141,6 +141,45 @@ struct StringValueGetter : Mixin { } }; +// This class can be used as the `ValueGetter` argument of Expression +// templates. It produces a LiteralOrIri. +struct LiteralOrIriValueGetter : Mixin { + using Mixin::operator(); + + std::optional operator()(ValueId, + const EvaluationContext*) const; + + std::optional operator()(const LiteralOrIri& s, + const EvaluationContext*) const { + return s; + } +}; + +// Same as above but only literals with 'xsd:string' datatype or no datatype are +// returned. +struct LiteralOrIriValueGetterWithXsdStringFilter + : Mixin { + using Mixin::operator(); + + std::optional operator()(ValueId, + const EvaluationContext*) const; + + std::optional operator()(const LiteralOrIri& s, + const EvaluationContext*) const; +}; + +// Value getter for `isBlank`. +struct IsBlankNodeValueGetter : Mixin { + using Mixin::operator(); + Id operator()(ValueId id, const EvaluationContext*) const { + return Id::makeFromBool(id.getDatatype() == Datatype::BlankNodeIndex); + } + + Id operator()(const LiteralOrIri&, const EvaluationContext*) const { + return Id::makeFromBool(false); + } +}; + // Boolean value getter that checks whether the given `Id` is a `ValueId` of the // given `datatype`. template diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 47ee97bd23..af042a9624 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -21,6 +21,29 @@ constexpr auto toLiteral = [](std::string_view normalizedContent) { asNormalizedStringViewUnsafe(normalizedContent))}; }; +// Count UTF-8 characters by skipping continuation bytes (those starting with +// "10"). +inline std::size_t utf8Length(std::string_view s) { + return std::ranges::count_if( + s, [](char c) { return (static_cast(c) & 0xC0) != 0x80; }); +} + +// Convert UTF-8 position to byte offset +inline std::size_t utf8ToByteOffset(std::string_view str, int64_t utf8Pos) { + std::size_t byteOffset = 0; + int64_t charCount = 0; + + for (char c : str) { + if ((static_cast(c) & 0xC0) != 0x80) { + if (charCount++ == utf8Pos) { + break; + } + } + ++byteOffset; + } + return byteOffset; +} + // String functions. [[maybe_unused]] auto strImpl = [](std::optional s) -> IdOrLiteralOrIri { @@ -125,11 +148,7 @@ using IriOrUriExpression = NARY<1, FV>; // STRLEN [[maybe_unused]] auto strlen = [](std::string_view s) { - // Count UTF-8 characters by skipping continuation bytes (those starting with - // "10"). - auto utf8Len = std::ranges::count_if( - s, [](char c) { return (static_cast(c) & 0xC0) != 0x80; }); - return Id::makeFromInt(utf8Len); + return Id::makeFromInt(utf8Length(s)); }; using StrlenExpression = StringExpressionImpl<1, LiftStringFunction>; @@ -181,10 +200,17 @@ class SubstrImpl { }; public: - IdOrLiteralOrIri operator()(std::optional s, NumericValue start, + IdOrLiteralOrIri operator()(std::optional s, NumericValue start, NumericValue length) const { if (!s.has_value() || std::holds_alternative(start) || std::holds_alternative(length)) { + if(!s.has_value()){ + AD_THROW("Substr called on an object without a value."); + } + else if(std::holds_alternative(start) || + std::holds_alternative(length)){ + AD_THROW("Invalid arguments: 'start' and 'length' must be numeric values."); + } return Id::makeUndefined(); } @@ -201,29 +227,82 @@ class SubstrImpl { if (startInt < 0) { lengthInt += startInt; } - - const auto& str = s.value(); + const auto& str = asStringViewUnsafe(s.value().getContent()); + std::size_t utf8len = utf8Length(str); // Clamp the number such that it is in `[0, str.size()]`. That way we end up - // with valid arguments for the `getUTF8Substring` method below for both + // with valid arguments for the `setSubstr` method below for both // starting position and length since all the other corner cases have been // dealt with above. - auto clamp = [sz = str.size()](int64_t n) -> std::size_t { + auto clamp = [utf8len](int64_t n) -> std::size_t { if (n < 0) { return 0; } - if (static_cast(n) > sz) { - return sz; + if (static_cast(n) > utf8len) { + return utf8len; } return static_cast(n); }; - return toLiteral( - ad_utility::getUTF8Substring(str, clamp(startInt), clamp(lengthInt))); + startInt = clamp(startInt); + lengthInt = clamp(lengthInt); + std::size_t startByteOffset = utf8ToByteOffset(str, startInt); + std::size_t endByteOffset = utf8ToByteOffset(str, startInt + lengthInt); + std::size_t byteLength = endByteOffset - startByteOffset; + + s.value().getLiteral().setSubstr(startByteOffset, byteLength); + return std::move(s.value()); + } +}; + +// Implementation of the `SUBSTR` SPARQL function. It dynamically +// selects the appropriate value getter for the first argument based on whether +// it is a `STR()` expression (using +// `LiteralOrIriValueGetterWithXsdStringFilter`) or another type (using +// `LiteralOrIriValueGetter`). +class SubstrExpressionImpl : public SparqlExpression { + private: + using ExpressionWithStr = + NARY<3, FV>; + using ExpressionWithoutStr = + NARY<3, FV>; + + SparqlExpression::Ptr impl_; + + public: + explicit SubstrExpressionImpl( + SparqlExpression::Ptr child, + std::same_as auto... children) + requires(sizeof...(children) + 1 == 3) { + AD_CORRECTNESS_CHECK(child != nullptr); + + if (child->isStrExpression()) { + auto childrenOfStr = std::move(*child).moveChildrenOut(); + AD_CORRECTNESS_CHECK(childrenOfStr.size() == 1); + impl_ = std::make_unique( + std::move(childrenOfStr.at(0)), std::move(children)...); + } else { + impl_ = std::make_unique(std::move(child), + std::move(children)...); + } + } + + ExpressionResult evaluate(EvaluationContext* context) const override { + return impl_->evaluate(context); + } + + std::string getCacheKey(const VariableToColumnMap& varColMap) const override { + return impl_->getCacheKey(varColMap); + } + + private: + std::span childrenImpl() override { + return impl_->children(); } }; -using SubstrExpression = - StringExpressionImpl<3, SubstrImpl, NumericValueGetter, NumericValueGetter>; +using SubstrExpression = SubstrExpressionImpl; // STRSTARTS [[maybe_unused]] auto strStartsImpl = [](std::string_view text, diff --git a/src/parser/Literal.cpp b/src/parser/Literal.cpp index a9fb4364e6..baeba75d71 100644 --- a/src/parser/Literal.cpp +++ b/src/parser/Literal.cpp @@ -135,4 +135,18 @@ Literal Literal::fromStringRepresentation(std::string internal) { return Literal{std::move(internal), endIdx + 1}; } +// __________________________________________ +void Literal::setSubstr(std::size_t start, std::size_t length) { + std::size_t contentLength = beginOfSuffix_ - 2; + content_.erase(1 + start + length, contentLength - start - length); + content_.erase(1, start); + beginOfSuffix_ = beginOfSuffix_ - (contentLength - length); +} + +// __________________________________________ +void Literal::removeDatatype() { + content_.erase(beginOfSuffix_); + beginOfSuffix_ = content_.size(); +} + } // namespace ad_utility::triple_component diff --git a/src/parser/Literal.h b/src/parser/Literal.h index 5367c261ae..6750e57aa5 100644 --- a/src/parser/Literal.h +++ b/src/parser/Literal.h @@ -90,5 +90,12 @@ class Literal { static Literal literalWithoutQuotes( std::string_view rdfContentWithoutQuotes, std::optional> descriptor = std::nullopt); + + // Set the substring of the literal by erasing the part between the + // end of the prefix and the trailing " from content_. + void setSubstr(std::size_t start, std::size_t length); + + // Remove the datatype suffix from the Literal. + void removeDatatype(); }; } // namespace ad_utility::triple_component diff --git a/src/parser/LiteralOrIri.cpp b/src/parser/LiteralOrIri.cpp index 077b189c26..3ecdeb82a3 100644 --- a/src/parser/LiteralOrIri.cpp +++ b/src/parser/LiteralOrIri.cpp @@ -41,11 +41,17 @@ bool LiteralOrIri::isLiteral() const { // __________________________________________ const Literal& LiteralOrIri::getLiteral() const { - if (!isLiteral()) { - AD_THROW( - "LiteralOrIri object does not contain an Literal object and " - "thus cannot return it"); - } + AD_CONTRACT_CHECK(isLiteral(), + "LiteralOrIri object does not contain a Literal object and " + "thus cannot return it"); + return std::get(data_); +} + +// __________________________________________ +Literal& LiteralOrIri::getLiteral() { + AD_CONTRACT_CHECK(isLiteral(), + "LiteralOrIri object does not contain a Literal object and " + "thus cannot return it"); return std::get(data_); } diff --git a/src/parser/LiteralOrIri.h b/src/parser/LiteralOrIri.h index 3f4bfa191e..ec6a850a7a 100644 --- a/src/parser/LiteralOrIri.h +++ b/src/parser/LiteralOrIri.h @@ -31,6 +31,11 @@ class alignas(16) LiteralOrIri { // otherwise const Literal& getLiteral() const; + // Return a modifiable reference to the contained Literal object if available, + // throw exception otherwise. Allows the caller to modify the Literal object + // e.g. for SubStr in StringExpressions.cpp + Literal& getLiteral(); + // Create a new LiteralOrIri based on a Literal object explicit LiteralOrIri(Literal literal); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e9b2cf8347..8463941067 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -33,7 +33,7 @@ endfunction() # required e.g. if several tests cases write to the same file. function(linkAndDiscoverTestSerial basename) linkTest(${basename} ${ARGN}) - gtest_discover_tests(${basename} ${basename} PROPERTIES RUN_SERIAL + gtest_discover_tests(${basename} ${basename} DISCOVERY_TIMEOUT 600 PROPERTIES RUN_SERIAL TRUE) endfunction() @@ -41,7 +41,7 @@ if (SINGLE_TEST_BINARY) message(STATUS "All tests are linked into a single executable `QLeverAllUnitTestsMain`") add_executable(QLeverAllUnitTestsMain) qlever_target_link_libraries(QLeverAllUnitTestsMain gtest gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) - gtest_discover_tests(QLeverAllUnitTestsMain QLeverAllUnitTestsMain PROPERTIES RUN_SERIAL + gtest_discover_tests(QLeverAllUnitTestsMain QLeverAllUnitTestsMain DISCOVERY_TIMEOUT 600 PROPERTIES RUN_SERIAL TRUE) else () message(STATUS "The tests are split over multiple binaries") diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 663bc46fd0..2813c07cab 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1637,3 +1637,93 @@ TEST(ExportQueryExecutionTrees, convertGeneratorForChunkedTransfer) { AllOf(HasSubstr("!!!!>># An error has occurred"), HasSubstr("A very strange"))); } + +TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) { + std::string kg = + "

\"something\" .

1 .

" + "\"some\"^^ .

" + "\"dadudeldu\"^^ ."; + auto qec = ad_utility::testing::getQec(kg); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + using enum Datatype; + + auto callIdToLiteralOrIri = [&](Id id, bool onlyLiterals, + bool onlyLiteralsWithXsdString = false) { + if (onlyLiterals) { + return ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}, onlyLiteralsWithXsdString); + } else { + return ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}, onlyLiteralsWithXsdString); + } + }; + + auto checkIdToLiteralOrIri = + [&](Id id, + const std::vector>>& + cases) { + for (const auto& [onlyLiterals, onlyLiteralsWithXsdString, expected] : + cases) { + auto result = + callIdToLiteralOrIri(id, onlyLiterals, onlyLiteralsWithXsdString); + if (expected) { + EXPECT_THAT(result, + ::testing::Optional(::testing::ResultOf( + [](const auto& literalOrIri) { + return literalOrIri.toStringRepresentation(); + }, + ::testing::StrEq(*expected)))); + } else { + EXPECT_EQ(result, std::nullopt); + } + } + }; + + // Test cases: Each tuple describes one test case. + // The first element is the ID of the element to test. + // The second element is a list of 3 configurations: + // 1. no restrictions 2.only literals are considered + // 3.only literals with `xsd:string` or no datatype are considered + std::vector>>>> + testCases = { + // Case: Literal without datatype + {getId("\"something\""), + {{false, false, "\"something\""}, + {true, false, "\"something\""}, + {false, true, "\"something\""}}}, + + // Case: Literal with datatype `xsd:string` + {getId("\"some\"^^"), + {{false, false, + "\"some\"^^"}, + {true, false, + "\"some\"^^"}, + {false, true, + "\"some\"^^"}}}, + + // Case: Literal with unknown datatype + {getId("\"dadudeldu\"^^"), + {{false, false, "\"dadudeldu\""}, + {true, false, "\"dadudeldu\""}, + {false, true, std::nullopt}}}, + + // Case: IRI + {getId(""), + {{false, false, ""}, + {true, false, std::nullopt}, + {false, true, std::nullopt}}}, + + // Case: datatype `Int` + {ad_utility::testing::IntId(1), + {{false, false, "\"1\""}, + {true, false, std::nullopt}, + {false, true, std::nullopt}}}, + + // Case: Undefined ID + {ad_utility::testing::UndefId(), {{false, false, std::nullopt}}}}; + + for (const auto& [id, cases] : testCases) { + checkIdToLiteralOrIri(id, cases); + } +} diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index f55d8ed0d1..1eb4ca85f7 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -601,6 +601,12 @@ TEST(SparqlExpression, stringOperators) { IdOrLiteralOrIriVec{lit("true"), lit("false"), lit("true")}); checkStr(IdOrLiteralOrIriVec{lit("one"), lit("two"), lit("three")}, IdOrLiteralOrIriVec{lit("one"), lit("two"), lit("three")}); + checkStr(IdOrLiteralOrIriVec{iriref(""), + iriref(""), + iriref("")}, + IdOrLiteralOrIriVec{lit("http://example.org/str"), + lit("http://example.org/int"), + lit("http://example.org/bool")}); auto T = Id::makeFromBool(true); auto F = Id::makeFromBool(false); @@ -743,28 +749,23 @@ TEST(SparqlExpression, substr) { I(0), I(12)); checkSubstr(strs({"one", "two", "three"}), strs({"one", "two", "three"}), I(-2), I(12)); - checkSubstr(strs({"ne", "wo", "hree"}), strs({"one", "two", "three"}), I(2), I(12)); checkSubstr(strs({"ne", "wo", "hree"}), strs({"one", "two", "three"}), D(1.8), D(11.7)); checkSubstr(strs({"ne", "wo", "hree"}), strs({"one", "two", "three"}), D(2.449), D(12.449)); - // An actual substring from the middle checkSubstr(strs({"es", "os", "re"}), strs({"ones", "twos", "threes"}), I(3), I(2)); - // Subtle corner case if the starting position is negative // Only the letters at positions `p < -3 + 6 = 3` are exported (the first two // letters, remember that the positions are 1-based). checkSubstr(strs({"on", "tw", "th"}), strs({"ones", "twos", "threes"}), I(-3), I(6)); - // Correct handling of UTF-8 multibyte characters. checkSubstr(strs({"pfel", "pfel", "pfel"}), strs({"uApfel", "uÄpfel", "uöpfel"}), I(3), I(18)); - // corner cases: 0 or negative length, or invalid numeric parameter checkSubstr(strs({"", "", ""}), strs({"ones", "twos", "threes"}), D(naN), I(2)); @@ -775,9 +776,8 @@ TEST(SparqlExpression, substr) { D(-3.8)); // Invalid datatypes - // First must be string. + // First must be LiteralOrIri auto Ux = IdOrLiteralOrIri{U}; - checkSubstr(Ux, I(3), I(4), I(7)); checkSubstr(Ux, U, I(4), I(7)); checkSubstr(Ux, Ux, I(4), I(7)); // Second and third must be numeric; @@ -787,6 +787,17 @@ TEST(SparqlExpression, substr) { checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, I(4), U); checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, I(4), IdOrLiteralOrIri{lit("bye")}); + // WithDataType xsd:string + checkSubstr( + IdOrLiteralOrIriVec{ + lit("Hello", "^^")}, + IdOrLiteralOrIriVec{ + lit("Hello World", "^^")}, + I(1), I(5)); + + // WithLanguageTag + checkSubstr(IdOrLiteralOrIriVec{lit("cha", "@en")}, + IdOrLiteralOrIriVec{lit("chat", "@en")}, I(1), I(3)); } // _____________________________________________________________________________________ diff --git a/test/parser/LiteralOrIriTest.cpp b/test/parser/LiteralOrIriTest.cpp index 0c5486fc7b..27ddd573a1 100644 --- a/test/parser/LiteralOrIriTest.cpp +++ b/test/parser/LiteralOrIriTest.cpp @@ -209,3 +209,33 @@ TEST(LiteralOrIri, Hashing) { ad_utility::HashSet set{lit, iri}; EXPECT_THAT(set, ::testing::UnorderedElementsAre(lit, iri)); } + +// _______________________________________________________________________ +TEST(LiteralTest, SetSubstr) { + LiteralOrIri literal = LiteralOrIri::literalWithoutQuotes( + "Hello World!", + Iri::fromIriref("")); + literal.getLiteral().setSubstr(0, 5); + EXPECT_THAT("Hello", asStringViewUnsafe(literal.getContent())); + EXPECT_THAT("http://www.w3.org/2001/XMLSchema#string", + asStringViewUnsafe(literal.getDatatype())); + + literal = LiteralOrIri::literalWithoutQuotes( + "Hello World!", + Iri::fromIriref("")); + literal.getLiteral().setSubstr(6, 5); + EXPECT_THAT("World", asStringViewUnsafe(literal.getContent())); + EXPECT_THAT("http://www.w3.org/2001/XMLSchema#string", + asStringViewUnsafe(literal.getDatatype())); +} + +// _______________________________________________________________________ +TEST(LiteralTest, RemoveDatatype) { + LiteralOrIri literal = LiteralOrIri::literalWithoutQuotes( + "Hello World!", + Iri::fromIriref("")); + literal.getLiteral().removeDatatype(); + EXPECT_THAT("Hello World!", asStringViewUnsafe(literal.getContent())); + EXPECT_FALSE(literal.hasDatatype()); + EXPECT_THROW(literal.getDatatype(), ad_utility::Exception); +}