Skip to content

feat: add visit type support #94

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,17 @@ https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp

MurmurHash3 was written by Austin Appleby, and is placed in the public
domain. The author disclaims copyright to this source code.

--------------------------------------------------------------------------------

The file src/iceberg/util/checked_cast.h contains code adapted from

https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/checked_cast.h

The file src/iceberg/util/visit_type.h contains code adapted from

https://github.com/apache/arrow/blob/main/cpp/src/arrow/visit_type_inline.h

Copyright: 2016-2025 The Apache Software Foundation.
Home page: https://arrow.apache.org/
License: https://www.apache.org/licenses/LICENSE-2.0
4 changes: 4 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ This product includes code from smhasher
* MurmurHash3 was written by Austin Appleby, and is placed in the public
* domain. The author hereby disclaims copyright to this source code.
* https://github.com/aappleby/smhasher

This product includes code from Apache Arrow
* Copyright 2016-2025 The Apache Software Foundation
* https://github.com/apache/arrow
3 changes: 2 additions & 1 deletion src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ set(ICEBERG_SOURCES
transform_function.cc
type.cc
util/murmurhash3_internal.cc
util/timepoint.cc)
util/timepoint.cc
util/unreachable.cc)

set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS)
Expand Down
82 changes: 31 additions & 51 deletions src/iceberg/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ StructType::StructType(std::vector<SchemaField> fields) : fields_(std::move(fiel
}
}

TypeId StructType::type_id() const { return TypeId::kStruct; }
TypeId StructType::type_id() const { return kTypeId; }
std::string StructType::ToString() const {
std::string repr = "struct<\n";
for (const auto& field : fields_) {
Expand Down Expand Up @@ -93,7 +93,7 @@ ListType::ListType(SchemaField element) : element_(std::move(element)) {
ListType::ListType(int32_t field_id, std::shared_ptr<Type> type, bool optional)
: element_(field_id, std::string(kElementName), std::move(type), optional) {}

TypeId ListType::type_id() const { return TypeId::kList; }
TypeId ListType::type_id() const { return kTypeId; }
std::string ListType::ToString() const {
// XXX: work around Clang/libc++: "<{}>" in a format string appears to get
// parsed as {<>} or something; split up the format string to avoid that
Expand Down Expand Up @@ -146,7 +146,7 @@ MapType::MapType(SchemaField key, SchemaField value)

const SchemaField& MapType::key() const { return fields_[0]; }
const SchemaField& MapType::value() const { return fields_[1]; }
TypeId MapType::type_id() const { return TypeId::kMap; }
TypeId MapType::type_id() const { return kTypeId; }
std::string MapType::ToString() const {
// XXX: work around Clang/libc++: "<{}>" in a format string appears to get
// parsed as {<>} or something; split up the format string to avoid that
Expand Down Expand Up @@ -192,33 +192,25 @@ bool MapType::Equals(const Type& other) const {
return fields_ == map.fields_;
}

TypeId BooleanType::type_id() const { return TypeId::kBoolean; }
TypeId BooleanType::type_id() const { return kTypeId; }
std::string BooleanType::ToString() const { return "boolean"; }
bool BooleanType::Equals(const Type& other) const {
return other.type_id() == TypeId::kBoolean;
}
bool BooleanType::Equals(const Type& other) const { return other.type_id() == kTypeId; }

TypeId IntType::type_id() const { return TypeId::kInt; }
TypeId IntType::type_id() const { return kTypeId; }
std::string IntType::ToString() const { return "int"; }
bool IntType::Equals(const Type& other) const { return other.type_id() == TypeId::kInt; }
bool IntType::Equals(const Type& other) const { return other.type_id() == kTypeId; }

TypeId LongType::type_id() const { return TypeId::kLong; }
TypeId LongType::type_id() const { return kTypeId; }
std::string LongType::ToString() const { return "long"; }
bool LongType::Equals(const Type& other) const {
return other.type_id() == TypeId::kLong;
}
bool LongType::Equals(const Type& other) const { return other.type_id() == kTypeId; }

TypeId FloatType::type_id() const { return TypeId::kFloat; }
TypeId FloatType::type_id() const { return kTypeId; }
std::string FloatType::ToString() const { return "float"; }
bool FloatType::Equals(const Type& other) const {
return other.type_id() == TypeId::kFloat;
}
bool FloatType::Equals(const Type& other) const { return other.type_id() == kTypeId; }

TypeId DoubleType::type_id() const { return TypeId::kDouble; }
TypeId DoubleType::type_id() const { return kTypeId; }
std::string DoubleType::ToString() const { return "double"; }
bool DoubleType::Equals(const Type& other) const {
return other.type_id() == TypeId::kDouble;
}
bool DoubleType::Equals(const Type& other) const { return other.type_id() == kTypeId; }

DecimalType::DecimalType(int32_t precision, int32_t scale)
: precision_(precision), scale_(scale) {
Expand All @@ -230,57 +222,47 @@ DecimalType::DecimalType(int32_t precision, int32_t scale)

int32_t DecimalType::precision() const { return precision_; }
int32_t DecimalType::scale() const { return scale_; }
TypeId DecimalType::type_id() const { return TypeId::kDecimal; }
TypeId DecimalType::type_id() const { return kTypeId; }
std::string DecimalType::ToString() const {
return std::format("decimal({}, {})", precision_, scale_);
}
bool DecimalType::Equals(const Type& other) const {
if (other.type_id() != TypeId::kDecimal) {
if (other.type_id() != kTypeId) {
return false;
}
const auto& decimal = static_cast<const DecimalType&>(other);
return precision_ == decimal.precision_ && scale_ == decimal.scale_;
}

TypeId DateType::type_id() const { return TypeId::kDate; }
TypeId DateType::type_id() const { return kTypeId; }
std::string DateType::ToString() const { return "date"; }
bool DateType::Equals(const Type& other) const {
return other.type_id() == TypeId::kDate;
}
bool DateType::Equals(const Type& other) const { return other.type_id() == kTypeId; }

TypeId TimeType::type_id() const { return TypeId::kTime; }
TypeId TimeType::type_id() const { return kTypeId; }
std::string TimeType::ToString() const { return "time"; }
bool TimeType::Equals(const Type& other) const {
return other.type_id() == TypeId::kTime;
}
bool TimeType::Equals(const Type& other) const { return other.type_id() == kTypeId; }

bool TimestampType::is_zoned() const { return false; }
TimeUnit TimestampType::time_unit() const { return TimeUnit::kMicrosecond; }
TypeId TimestampType::type_id() const { return TypeId::kTimestamp; }
TypeId TimestampType::type_id() const { return kTypeId; }
std::string TimestampType::ToString() const { return "timestamp"; }
bool TimestampType::Equals(const Type& other) const {
return other.type_id() == TypeId::kTimestamp;
}
bool TimestampType::Equals(const Type& other) const { return other.type_id() == kTypeId; }

bool TimestampTzType::is_zoned() const { return true; }
TimeUnit TimestampTzType::time_unit() const { return TimeUnit::kMicrosecond; }
TypeId TimestampTzType::type_id() const { return TypeId::kTimestampTz; }
TypeId TimestampTzType::type_id() const { return kTypeId; }
std::string TimestampTzType::ToString() const { return "timestamptz"; }
bool TimestampTzType::Equals(const Type& other) const {
return other.type_id() == TypeId::kTimestampTz;
return other.type_id() == kTypeId;
}

TypeId StringType::type_id() const { return TypeId::kString; }
TypeId StringType::type_id() const { return kTypeId; }
std::string StringType::ToString() const { return "string"; }
bool StringType::Equals(const Type& other) const {
return other.type_id() == TypeId::kString;
}
bool StringType::Equals(const Type& other) const { return other.type_id() == kTypeId; }

TypeId UuidType::type_id() const { return TypeId::kUuid; }
TypeId UuidType::type_id() const { return kTypeId; }
std::string UuidType::ToString() const { return "uuid"; }
bool UuidType::Equals(const Type& other) const {
return other.type_id() == TypeId::kUuid;
}
bool UuidType::Equals(const Type& other) const { return other.type_id() == kTypeId; }

FixedType::FixedType(int32_t length) : length_(length) {
if (length < 0) {
Expand All @@ -289,20 +271,18 @@ FixedType::FixedType(int32_t length) : length_(length) {
}

int32_t FixedType::length() const { return length_; }
TypeId FixedType::type_id() const { return TypeId::kFixed; }
TypeId FixedType::type_id() const { return kTypeId; }
std::string FixedType::ToString() const { return std::format("fixed({})", length_); }
bool FixedType::Equals(const Type& other) const {
if (other.type_id() != TypeId::kFixed) {
if (other.type_id() != kTypeId) {
return false;
}
const auto& fixed = static_cast<const FixedType&>(other);
return length_ == fixed.length_;
}

TypeId BinaryType::type_id() const { return TypeId::kBinary; }
TypeId BinaryType::type_id() const { return kTypeId; }
std::string BinaryType::ToString() const { return "binary"; }
bool BinaryType::Equals(const Type& other) const {
return other.type_id() == TypeId::kBinary;
}
bool BinaryType::Equals(const Type& other) const { return other.type_id() == kTypeId; }

} // namespace iceberg
30 changes: 30 additions & 0 deletions src/iceberg/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class ICEBERG_EXPORT NestedType : public Type {
/// \brief A data type representing a struct with nested fields.
class ICEBERG_EXPORT StructType : public NestedType {
public:
constexpr static TypeId kTypeId = TypeId::kStruct;
explicit StructType(std::vector<SchemaField> fields);
~StructType() override = default;

Expand All @@ -128,6 +129,7 @@ class ICEBERG_EXPORT StructType : public NestedType {
/// \brief A data type representing a list of values.
class ICEBERG_EXPORT ListType : public NestedType {
public:
constexpr static const TypeId kTypeId = TypeId::kList;
constexpr static const std::string_view kElementName = "element";

/// \brief Construct a list of the given element. The name of the child
Expand Down Expand Up @@ -157,6 +159,7 @@ class ICEBERG_EXPORT ListType : public NestedType {
/// \brief A data type representing a dictionary of values.
class ICEBERG_EXPORT MapType : public NestedType {
public:
constexpr static const TypeId kTypeId = TypeId::kMap;
constexpr static const std::string_view kKeyName = "key";
constexpr static const std::string_view kValueName = "value";

Expand Down Expand Up @@ -194,6 +197,8 @@ class ICEBERG_EXPORT MapType : public NestedType {
/// \brief A data type representing a boolean (true or false).
class ICEBERG_EXPORT BooleanType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kBoolean;

BooleanType() = default;
~BooleanType() override = default;

Expand All @@ -207,6 +212,8 @@ class ICEBERG_EXPORT BooleanType : public PrimitiveType {
/// \brief A data type representing a 32-bit signed integer.
class ICEBERG_EXPORT IntType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kInt;

IntType() = default;
~IntType() override = default;

Expand All @@ -220,6 +227,8 @@ class ICEBERG_EXPORT IntType : public PrimitiveType {
/// \brief A data type representing a 64-bit signed integer.
class ICEBERG_EXPORT LongType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kLong;

LongType() = default;
~LongType() override = default;

Expand All @@ -234,6 +243,8 @@ class ICEBERG_EXPORT LongType : public PrimitiveType {
/// float.
class ICEBERG_EXPORT FloatType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kFloat;

FloatType() = default;
~FloatType() override = default;

Expand All @@ -248,6 +259,8 @@ class ICEBERG_EXPORT FloatType : public PrimitiveType {
/// float.
class ICEBERG_EXPORT DoubleType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kDouble;

DoubleType() = default;
~DoubleType() override = default;

Expand All @@ -261,6 +274,7 @@ class ICEBERG_EXPORT DoubleType : public PrimitiveType {
/// \brief A data type representing a fixed-precision decimal.
class ICEBERG_EXPORT DecimalType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kDecimal;
constexpr static const int32_t kMaxPrecision = 38;

/// \brief Construct a decimal type with the given precision and scale.
Expand Down Expand Up @@ -288,6 +302,8 @@ class ICEBERG_EXPORT DecimalType : public PrimitiveType {
/// timezone or time.
class ICEBERG_EXPORT DateType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kDate;

DateType() = default;
~DateType() override = default;

Expand All @@ -302,6 +318,8 @@ class ICEBERG_EXPORT DateType : public PrimitiveType {
/// reference to a timezone or date.
class ICEBERG_EXPORT TimeType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kTime;

TimeType() = default;
~TimeType() override = default;

Expand All @@ -326,6 +344,8 @@ class ICEBERG_EXPORT TimestampBase : public PrimitiveType {
/// reference to a timezone.
class ICEBERG_EXPORT TimestampType : public TimestampBase {
public:
constexpr static const TypeId kTypeId = TypeId::kTimestamp;

TimestampType() = default;
~TimestampType() override = default;

Expand All @@ -343,6 +363,8 @@ class ICEBERG_EXPORT TimestampType : public TimestampBase {
/// epoch in UTC. A time zone or offset is not stored.
class ICEBERG_EXPORT TimestampTzType : public TimestampBase {
public:
constexpr static const TypeId kTypeId = TypeId::kTimestampTz;

TimestampTzType() = default;
~TimestampTzType() override = default;

Expand All @@ -359,6 +381,8 @@ class ICEBERG_EXPORT TimestampTzType : public TimestampBase {
/// \brief A data type representing an arbitrary-length byte sequence.
class ICEBERG_EXPORT BinaryType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kBinary;

BinaryType() = default;
~BinaryType() override = default;

Expand All @@ -373,6 +397,8 @@ class ICEBERG_EXPORT BinaryType : public PrimitiveType {
/// (encoded in UTF-8).
class ICEBERG_EXPORT StringType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kString;

StringType() = default;
~StringType() override = default;

Expand All @@ -386,6 +412,8 @@ class ICEBERG_EXPORT StringType : public PrimitiveType {
/// \brief A data type representing a fixed-length bytestring.
class ICEBERG_EXPORT FixedType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kFixed;

/// \brief Construct a fixed type with the given length.
explicit FixedType(int32_t length);
~FixedType() override = default;
Expand All @@ -407,6 +435,8 @@ class ICEBERG_EXPORT FixedType : public PrimitiveType {
/// it is effectively a fixed(16).
class ICEBERG_EXPORT UuidType : public PrimitiveType {
public:
constexpr static const TypeId kTypeId = TypeId::kUuid;

UuidType() = default;
~UuidType() override = default;

Expand Down
Loading
Loading