From ab148b2e5ed8eac00100f1f328f48bec7f703873 Mon Sep 17 00:00:00 2001 From: glywk Date: Tue, 24 Dec 2024 05:30:13 +0100 Subject: [PATCH] [AVRO-4081][C++] Add big decimal support and update documentation (#3148) --- .../docs/++version++/Specification/_index.md | 8 ++++--- lang/c++/impl/Compiler.cc | 6 ++++- lang/c++/impl/LogicalType.cc | 3 +++ lang/c++/impl/Node.cc | 7 ++++++ lang/c++/include/avro/LogicalType.hh | 1 + lang/c++/test/SchemaTests.cc | 22 ++++++++++++++++--- 6 files changed, 40 insertions(+), 7 deletions(-) diff --git a/doc/content/en/docs/++version++/Specification/_index.md b/doc/content/en/docs/++version++/Specification/_index.md index 53b815c105b..950bae11762 100755 --- a/doc/content/en/docs/++version++/Specification/_index.md +++ b/doc/content/en/docs/++version++/Specification/_index.md @@ -787,6 +787,8 @@ A logical type is always serialized using its underlying Avro type so that value Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type. ### Decimal + +#### Fixed precision The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled × 10-scale_. A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute. @@ -810,11 +812,11 @@ Scale must be zero or a positive integer less than or equal to the precision. For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match. -**alternative** +#### Scalable precision As it's not always possible to fix scale and precision in advance for a decimal field, `big-decimal` is another `decimal` logical type restrict to Avro _bytes_. -_Currently only available in Java and Rust_. +_Currently only available in C++, Java and Rust_. ```json { @@ -822,7 +824,7 @@ _Currently only available in Java and Rust_. "logicalType": "big-decimal" } ``` -Here, as scale property is stored in value itself it needs more bytes than preceding `decimal` type, but it allows more flexibility. +Here, bytes array contains two serialized properties. First part is an Avro byte arrays which is the two's-complement representation of the unscaled integer value in big-endian byte order. The second part is the scale property stored as an Avro integer. Scale must be zero or a positive integer less than or equal to the precision. Value itself needs more bytes than preceding `decimal` type, but it allows more flexibility. ### UUID diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc index f1e2dfd96a5..73aaa9bbb27 100644 --- a/lang/c++/impl/Compiler.cc +++ b/lang/c++/impl/Compiler.cc @@ -359,7 +359,11 @@ static LogicalType makeLogicalType(const Entity &e, const Object &m) { } LogicalType::Type t = LogicalType::NONE; - if (typeField == "date") + if (typeField == "big-decimal" + && !containsField(m, "precision") + && !containsField(m, "scale")) + t = LogicalType::BIG_DECIMAL; + else if (typeField == "date") t = LogicalType::DATE; else if (typeField == "time-millis") t = LogicalType::TIME_MILLIS; diff --git a/lang/c++/impl/LogicalType.cc b/lang/c++/impl/LogicalType.cc index ed6a12f0892..18da72a23a5 100644 --- a/lang/c++/impl/LogicalType.cc +++ b/lang/c++/impl/LogicalType.cc @@ -51,6 +51,9 @@ void LogicalType::setScale(int32_t scale) { void LogicalType::printJson(std::ostream &os) const { switch (type_) { case LogicalType::NONE: break; + case LogicalType::BIG_DECIMAL: + os << R"("logicalType": "big-decimal")"; + break; case LogicalType::DECIMAL: os << R"("logicalType": "decimal")"; os << ", \"precision\": " << precision_; diff --git a/lang/c++/impl/Node.cc b/lang/c++/impl/Node.cc index fd9ee9d6dcf..615727128e8 100644 --- a/lang/c++/impl/Node.cc +++ b/lang/c++/impl/Node.cc @@ -139,6 +139,13 @@ void Node::setLogicalType(LogicalType logicalType) { // Check that the logical type is applicable to the node type. switch (logicalType.type()) { case LogicalType::NONE: break; + case LogicalType::BIG_DECIMAL: { + if (type_ != AVRO_BYTES) { + throw Exception("BIG_DECIMAL logical type can annotate " + "only BYTES type"); + } + break; + } case LogicalType::DECIMAL: { if (type_ != AVRO_BYTES && type_ != AVRO_FIXED) { throw Exception("DECIMAL logical type can annotate " diff --git a/lang/c++/include/avro/LogicalType.hh b/lang/c++/include/avro/LogicalType.hh index b2a7d0294ff..5b274bcb741 100644 --- a/lang/c++/include/avro/LogicalType.hh +++ b/lang/c++/include/avro/LogicalType.hh @@ -29,6 +29,7 @@ class AVRO_DECL LogicalType { public: enum Type { NONE, + BIG_DECIMAL, DECIMAL, DATE, TIME_MILLIS, diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc index 477e36046eb..bda02ad4c88 100644 --- a/lang/c++/test/SchemaTests.cc +++ b/lang/c++/test/SchemaTests.cc @@ -314,6 +314,7 @@ const char *roundTripSchemas[] = { R"({"type":"fixed","name":"Test","size":1})", // Logical types + R"({"type":"bytes","logicalType":"big-decimal"})", R"({"type":"bytes","logicalType":"decimal","precision":12,"scale":6})", R"({"type":"fixed","name":"test","size":16,"logicalType":"decimal","precision":38,"scale":9})", R"({"type":"fixed","name":"test","size":129,"logicalType":"decimal","precision":310,"scale":155})", @@ -361,6 +362,7 @@ const char *roundTripSchemas[] = { const char *malformedLogicalTypes[] = { // Wrong base type. + R"({"type":"long","logicalType": "big-decimal"})", R"({"type":"long","logicalType": "decimal","precision": 10})", R"({"type":"string","logicalType":"date"})", R"({"type":"string","logicalType":"time-millis"})", @@ -379,9 +381,12 @@ const char *malformedLogicalTypes[] = { R"({"type":"fixed","logicalType":"decimal","size":4,"name":"a","precision":20})", R"({"type":"fixed","logicalType":"decimal","size":129,"name":"a","precision":311})", // Scale is larger than precision. - R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})" -}; - + R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})", + // Precision is not supported by the big-decimal logical type + // and scale is integrated in bytes. + R"({"type":"bytes","logicalType": "big-decimal","precision": 9})", + R"({"type":"bytes","logicalType": "big-decimal","scale": 2})", + R"({"type":"bytes","logicalType": "big-decimal","precision": 9,"scale": 2})"}; const char *schemasToCompact[] = { // Schema without any whitespace R"({"type":"record","name":"Test","fields":[]})", @@ -469,6 +474,10 @@ static void testCompactSchemas() { } static void testLogicalTypes() { + const char *bytesBigDecimalType = R"({ + "type": "bytes", + "logicalType": "big-decimal" + })"; const char *bytesDecimalType = R"({ "type": "bytes", "logicalType": "decimal", @@ -496,6 +505,13 @@ static void testLogicalTypes() { const char *uuidType = R"({"type": "string","logicalType": "uuid"})"; // AVRO-2923 Union with LogicalType const char *unionType = R"([{"type":"string", "logicalType":"uuid"},"null"]})"; + { + BOOST_TEST_CHECKPOINT(bytesBigDecimalType); + ValidSchema schema = compileJsonSchemaFromString(bytesBigDecimalType); + BOOST_CHECK(schema.root()->type() == AVRO_BYTES); + LogicalType logicalType = schema.root()->logicalType(); + BOOST_CHECK(logicalType.type() == LogicalType::BIG_DECIMAL); + } { BOOST_TEST_CHECKPOINT(bytesDecimalType); ValidSchema schema1 = compileJsonSchemaFromString(bytesDecimalType);