Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion cpp/src/parquet/schema_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1580,7 +1580,8 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) {
LogicalType::EdgeInterpolationAlgorithm::KARNEY),
"Geography(crs=srid:1234, algorithm=karney)",
R"({"Type": "Geography", "crs": "srid:1234", "algorithm": "karney"})"},
{LogicalType::Variant(), "Variant", R"({"Type": "Variant"})"},
{LogicalType::Variant(), "Variant(1)", R"({"Type": "Variant", "SpecVersion": 1})"},
{LogicalType::Variant(2), "Variant(2)", R"({"Type": "Variant", "SpecVersion": 2})"},
{LogicalType::None(), "None", R"({"Type": "None"})"},
};

Expand Down Expand Up @@ -2353,6 +2354,37 @@ TEST(TestLogicalTypeSerialization, Roundtrips) {
// Group nodes ...
ConfirmGroupNodeRoundtrip("map", LogicalType::Map());
ConfirmGroupNodeRoundtrip("list", LogicalType::List());
ConfirmGroupNodeRoundtrip("variant", LogicalType::Variant());
}

TEST(TestLogicalTypeSerialization, VariantSpecificationVersion) {
// Confirm that Variant logical type sets specification_version to expected value in
// thrift serialization
constexpr int8_t specVersion = 2;
auto metadata = PrimitiveNode::Make("metadata", Repetition::REQUIRED, Type::BYTE_ARRAY);
auto value = PrimitiveNode::Make("value", Repetition::REQUIRED, Type::BYTE_ARRAY);
NodePtr variant_node =
GroupNode::Make("variant", Repetition::REQUIRED, {metadata, value},
LogicalType::Variant(specVersion));

// Verify varaint logical type
auto logical_type = variant_node->logical_type();
ASSERT_TRUE(logical_type->is_variant());
const auto& variant_type = checked_cast<const VariantLogicalType&>(*logical_type);
ASSERT_EQ(variant_type.spec_version(), specVersion);

// Verify thrift serialization
std::vector<format::SchemaElement> elements;
ToParquet(reinterpret_cast<GroupNode*>(variant_node.get()), &elements);

// Verify that logicalType is set and is VARIANT
ASSERT_EQ(elements[0].name, "variant");
ASSERT_TRUE(elements[0].__isset.logicalType);
ASSERT_TRUE(elements[0].logicalType.__isset.VARIANT);

// Verify that specification_version is set properly
ASSERT_TRUE(elements[0].logicalType.VARIANT.__isset.specification_version);
ASSERT_EQ(elements[0].logicalType.VARIANT.specification_version, specVersion);
}

} // namespace schema
Expand Down
58 changes: 50 additions & 8 deletions cpp/src/parquet/types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,12 @@ std::shared_ptr<const LogicalType> LogicalType::FromThrift(

return GeographyLogicalType::Make(std::move(crs), algorithm);
} else if (type.__isset.VARIANT) {
return VariantLogicalType::Make();
int8_t specVersion = VARIANT_SPEC_VERSION;
if (type.VARIANT.__isset.specification_version) {
specVersion = type.VARIANT.specification_version;
}

return VariantLogicalType::Make(specVersion);
} else {
// Sentinel type for one we do not recognize
return UndefinedLogicalType::Make();
Expand Down Expand Up @@ -659,8 +664,8 @@ std::shared_ptr<const LogicalType> LogicalType::Geography(
return GeographyLogicalType::Make(std::move(crs), algorithm);
}

std::shared_ptr<const LogicalType> LogicalType::Variant() {
return VariantLogicalType::Make();
std::shared_ptr<const LogicalType> LogicalType::Variant(int8_t specVersion) {
return VariantLogicalType::Make(specVersion);
}

std::shared_ptr<const LogicalType> LogicalType::None() { return NoLogicalType::Make(); }
Expand Down Expand Up @@ -1958,16 +1963,53 @@ class LogicalType::Impl::Variant final : public LogicalType::Impl::Incompatible,
public:
friend class VariantLogicalType;

OVERRIDE_TOSTRING(Variant)
OVERRIDE_TOTHRIFT(VariantType, VARIANT)
std::string ToString() const override;
std::string ToJSON() const override;
format::LogicalType ToThrift() const override;

int8_t spec_version() const { return specVersion_; }

private:
Variant()
explicit Variant(const int8_t specVersion)
: LogicalType::Impl(LogicalType::Type::VARIANT, SortOrder::UNKNOWN),
LogicalType::Impl::Inapplicable() {}
LogicalType::Impl::Inapplicable() {
this->specVersion_ = specVersion;
}

int8_t specVersion_;
};

GENERATE_MAKE(Variant)
int8_t VariantLogicalType::spec_version() const {
return (dynamic_cast<const LogicalType::Impl::Variant&>(*impl_)).spec_version();
}

std::string LogicalType::Impl::Variant::ToString() const {
std::stringstream type;
type << "Variant(" << static_cast<int>(specVersion_) << ")";
return type.str();
}

std::string LogicalType::Impl::Variant::ToJSON() const {
std::stringstream json;
json << R"({"Type": "Variant", "SpecVersion": )" << static_cast<int>(specVersion_)
<< "}";

return json.str();
}

format::LogicalType LogicalType::Impl::Variant::ToThrift() const {
format::LogicalType type;
format::VariantType variant_type;
variant_type.__set_specification_version(specVersion_);
type.__set_VARIANT(variant_type);
return type;
}

std::shared_ptr<const LogicalType> VariantLogicalType::Make(const int8_t specVersion) {
auto logical_type = std::shared_ptr<VariantLogicalType>(new VariantLogicalType());
logical_type->impl_.reset(new LogicalType::Impl::Variant(specVersion));
return logical_type;
}

class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible,
public LogicalType::Impl::UniversalApplicable {
Expand Down
11 changes: 9 additions & 2 deletions cpp/src/parquet/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ class PARQUET_EXPORT LogicalType {
KARNEY = 5
};

/// \brief The latest supported Variant specification version by this library
static constexpr int8_t VARIANT_SPEC_VERSION = 1;

/// \brief If possible, return a logical type equivalent to the given legacy
/// converted type (and decimal metadata if applicable).
static std::shared_ptr<const LogicalType> FromConvertedType(
Expand Down Expand Up @@ -224,7 +227,8 @@ class PARQUET_EXPORT LogicalType {
static std::shared_ptr<const LogicalType> BSON();
static std::shared_ptr<const LogicalType> UUID();
static std::shared_ptr<const LogicalType> Float16();
static std::shared_ptr<const LogicalType> Variant();
static std::shared_ptr<const LogicalType> Variant(
int8_t specVersion = VARIANT_SPEC_VERSION);

static std::shared_ptr<const LogicalType> Geometry(std::string crs = "");

Expand Down Expand Up @@ -495,7 +499,10 @@ class PARQUET_EXPORT GeographyLogicalType : public LogicalType {
/// \brief Allowed for group nodes only.
class PARQUET_EXPORT VariantLogicalType : public LogicalType {
public:
static std::shared_ptr<const LogicalType> Make();
static std::shared_ptr<const LogicalType> Make(
int8_t specVersion = VARIANT_SPEC_VERSION);

int8_t spec_version() const;

private:
VariantLogicalType() = default;
Expand Down
Loading