Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct datatypes for string expressions #1636

Draft
wants to merge 30 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 95 additions & 1 deletion src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#include "util/ConstexprUtils.h"
#include "util/http/MediaTypes.h"

using LiteralOrIri = ad_utility::triple_component::LiteralOrIri;

// Return true iff the `result` is nonempty.
bool getResultForAsk(const std::shared_ptr<const Result>& result) {
if (result->isFullyMaterialized()) {
Expand Down Expand Up @@ -347,11 +349,58 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) {
}
}

// _____________________________________________________________________________
std::optional<LiteralOrIri>
ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue(
Id id, bool onlyReturnLiteralsWithXsdString) {
if (onlyReturnLiteralsWithXsdString) {
return std::nullopt;
}
auto optionalStringAndType = idToStringAndTypeForEncodedValue(id);
if (!optionalStringAndType) {
return std::nullopt;
}

return LiteralOrIri::literalWithoutQuotes(optionalStringAndType->first);
}

// _____________________________________________________________________________
bool ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(
const LiteralOrIri& word) {
return !word.hasDatatype() ||
asStringViewUnsafe(word.getDatatype()) == XSD_STRING;
}

// _____________________________________________________________________________
std::optional<LiteralOrIri> ExportQueryExecutionTrees::handleIriOrLiteral(
LiteralOrIri word, bool onlyReturnLiterals,
bool onlyReturnLiteralsWithXsdString) {
if (!word.isLiteral()) {
if (onlyReturnLiterals || onlyReturnLiteralsWithXsdString) {
AD_THROW("The input is an IRI, but only literals are allowed.");
return std::nullopt;
}
return word;
}

if (onlyReturnLiteralsWithXsdString) {
if (isPlainLiteralOrLiteralWithXsdString(word)) {
return word;
}
AD_THROW("The literal must either have no datatype or datatype xsd:string.");
return std::nullopt;
}

if (word.hasDatatype() && !isPlainLiteralOrLiteralWithXsdString(word)) {
word.getLiteral().removeDatatype();
}
return word;
}

// _____________________________________________________________________________
ad_utility::triple_component::LiteralOrIri
ExportQueryExecutionTrees::getLiteralOrIriFromVocabIndex(
const Index& index, Id id, const LocalVocab& localVocab) {
using LiteralOrIri = ad_utility::triple_component::LiteralOrIri;
switch (id.getDatatype()) {
case Datatype::LocalVocabIndex:
return localVocab.getWord(id.getLocalVocabIndex()).asLiteralOrIri();
Expand Down Expand Up @@ -412,6 +461,39 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id,
return idToStringAndTypeForEncodedValue(id);
}
}

// _____________________________________________________________________________
template <bool onlyReturnLiterals>
std::optional<LiteralOrIri> ExportQueryExecutionTrees::idToLiteralOrIri(
const Index& index, Id id, const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString) {
using enum Datatype;
auto datatype = id.getDatatype();

if constexpr (onlyReturnLiterals) {
if (!(datatype == VocabIndex || datatype == LocalVocabIndex)) {
return std::nullopt;
}
}

switch (datatype) {
case WordVocabIndex:
return LiteralOrIri::literalWithoutQuotes(
index.indexToString(id.getWordVocabIndex()));
case VocabIndex:
case LocalVocabIndex:
return handleIriOrLiteral(
getLiteralOrIriFromVocabIndex(index, id, localVocab),
onlyReturnLiterals, onlyReturnLiteralsWithXsdString);
case TextRecordIndex:
AD_THROW("TextRecordIndex case is not implemented.");
return std::nullopt;
default:
return idToLiteralOrIriForEncodedValue(id,
onlyReturnLiteralsWithXsdString);
}
}

// ___________________________________________________________________________
template std::optional<std::pair<std::string, const char*>>
ExportQueryExecutionTrees::idToStringAndType<true, false, std::identity>(
Expand All @@ -433,6 +515,18 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id,
const LocalVocab& localVocab,
std::identity&& escapeFunction);

// ___________________________________________________________________________
template std::optional<LiteralOrIri>
ExportQueryExecutionTrees::idToLiteralOrIri<false>(
const Index& index, Id id, const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString);

// ___________________________________________________________________________
template std::optional<LiteralOrIri>
ExportQueryExecutionTrees::idToLiteralOrIri<true>(
const Index& index, Id id, const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString);

// Convert a stringvalue and optional type to JSON binding.
static nlohmann::json stringAndTypeToBinding(std::string_view entitystr,
const char* xsdType) {
Expand Down
32 changes: 32 additions & 0 deletions src/engine/ExportQueryExecutionTrees.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class ExportQueryExecutionTrees {
public:
using MediaType = ad_utility::MediaType;
using CancellationHandle = ad_utility::SharedCancellationHandle;
using LiteralOrIri = ad_utility::triple_component::LiteralOrIri;

// Compute the result of the given `parsedQuery` (created by the
// `SparqlParser`) for which the `QueryExecutionTree` has been previously
Expand Down Expand Up @@ -69,6 +70,37 @@ class ExportQueryExecutionTrees {
static std::optional<std::pair<std::string, const char*>>
idToStringAndTypeForEncodedValue(Id id);

// Convert the `id` to a 'LiteralOrIri. Datatypes are always stripped unless
// they are 'xsd:string', so for literals with non-'xsd:string' datatypes
// (this includes IDs that directly store their value, like Doubles) the
// datatype is always empty. If 'onlyReturnLiteralsWithXsdString' is true, all
// IRIs and literals with non'-xsd:string' datatypes (including encoded IDs)
// return 'std::nullopt'. These semantics are useful for the string
// expressions in StringExpressions.cpp.
template <bool returnOnlyLiterals = false>
static std::optional<LiteralOrIri> idToLiteralOrIri(
const Index& index, Id id, const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString = false);

// Same as the previous function, but only handles the datatypes for which the
// value is encoded directly in the ID. For other datatypes an exception is
// thrown.
// If `onlyReturnLiteralsWithXsdString` is `true`, returns `std::nullopt`.
// If `onlyReturnLiteralsWithXsdString` is `false`, removes datatypes from
// literals (e.g. the integer `42` is converted to the plain literal `"42"`).
static std::optional<LiteralOrIri> idToLiteralOrIriForEncodedValue(
Id id, bool onlyReturnLiteralsWithXsdString = false);

// A helper function for the `idToLiteralOrIri` function. Checks and processes
// a LiteralOrIri based on the given parameters.
static std::optional<LiteralOrIri> handleIriOrLiteral(
LiteralOrIri word, bool onlyReturnLiterals,
bool onlyReturnLiteralsWithXsdString);

// Checks if a LiteralOrIri is either a plain literal (without datatype)
// or a literal with the `xsd:string` datatype.
static bool isPlainLiteralOrLiteralWithXsdString(const LiteralOrIri& word);

// Acts as a helper to retrieve an LiteralOrIri object
// from an Id, where the Id is of type `VocabIndex` or `LocalVocabIndex`.
// This function should only be called with suitable `Datatype` Id's,
Expand Down
26 changes: 26 additions & 0 deletions src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,32 @@ std::optional<std::string> StringValueGetter::operator()(
}
}

// ____________________________________________________________________________
std::optional<LiteralOrIri> LiteralOrIriValueGetter::operator()(
Id id, const EvaluationContext* context) const {
return ExportQueryExecutionTrees::idToLiteralOrIri(context->_qec.getIndex(),
id, context->_localVocab);
}

// ____________________________________________________________________________
std::optional<LiteralOrIri>
LiteralOrIriValueGetterWithXsdStringFilter::operator()(
Id id, const EvaluationContext* context) const {
return ExportQueryExecutionTrees::idToLiteralOrIri(
context->_qec.getIndex(), id, context->_localVocab, true);
}

// ____________________________________________________________________________
std::optional<LiteralOrIri>
LiteralOrIriValueGetterWithXsdStringFilter::operator()(
const LiteralOrIri& s, const EvaluationContext*) const {
if (ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(s)) {
return s;
}
AD_THROW("Input is not a plain string or xsd:string.");
return std::nullopt;
}

// ____________________________________________________________________________
template <auto isSomethingFunction, auto prefix>
Id IsSomethingValueGetter<isSomethingFunction, prefix>::operator()(
Expand Down
39 changes: 39 additions & 0 deletions src/engine/sparqlExpressions/SparqlExpressionValueGetters.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,45 @@ struct StringValueGetter : Mixin<StringValueGetter> {
}
};

// This class can be used as the `ValueGetter` argument of Expression
// templates. It produces a LiteralOrIri.
struct LiteralOrIriValueGetter : Mixin<LiteralOrIriValueGetter> {
using Mixin<LiteralOrIriValueGetter>::operator();

std::optional<LiteralOrIri> operator()(ValueId,
const EvaluationContext*) const;

std::optional<LiteralOrIri> operator()(const LiteralOrIri& s,
const EvaluationContext*) const {
return s;
}
};
joka921 marked this conversation as resolved.
Show resolved Hide resolved

// Same as above but only literals with 'xsd:string' datatype or no datatype are
// returned.
struct LiteralOrIriValueGetterWithXsdStringFilter
: Mixin<LiteralOrIriValueGetterWithXsdStringFilter> {
using Mixin<LiteralOrIriValueGetterWithXsdStringFilter>::operator();

std::optional<LiteralOrIri> operator()(ValueId,
const EvaluationContext*) const;

std::optional<LiteralOrIri> operator()(const LiteralOrIri& s,
const EvaluationContext*) const;
};

// Value getter for `isBlank`.
struct IsBlankNodeValueGetter : Mixin<IsBlankNodeValueGetter> {
using Mixin<IsBlankNodeValueGetter>::operator();
Id operator()(ValueId id, const EvaluationContext*) const {
return Id::makeFromBool(id.getDatatype() == Datatype::BlankNodeIndex);
}
DuDaAG marked this conversation as resolved.
Show resolved Hide resolved

Id operator()(const LiteralOrIri&, const EvaluationContext*) const {
return Id::makeFromBool(false);
}
};

// Boolean value getter that checks whether the given `Id` is a `ValueId` of the
// given `datatype`.
template <Datatype datatype>
Expand Down
Loading
Loading