From 1b78e69f6de50a511fb57fb275294168bbc49c24 Mon Sep 17 00:00:00 2001 From: Christophe Le Saec <51320496+clesaec@users.noreply.github.com> Date: Tue, 19 Sep 2023 16:00:46 +0200 Subject: [PATCH] AVRO-3649: Fix for union type to match default values for any innertype (#2503) * avro-3649: Union type to match default values for any innertype --- .../docs/++version++/Specification/_index.md | 4 +- .../src/main/java/org/apache/avro/Schema.java | 26 ++++++++-- .../org/apache/avro/reflect/ReflectData.java | 7 +-- .../test/java/org/apache/avro/TestSchema.java | 52 +++++++++++++++++++ .../org/apache/avro/reflect/TestReflect.java | 28 ++++++++++ .../test/java/org/apache/avro/TestSchema.java | 26 ++++------ 6 files changed, 117 insertions(+), 26 deletions(-) diff --git a/doc/content/en/docs/++version++/Specification/_index.md b/doc/content/en/docs/++version++/Specification/_index.md index 7cc5a17547e..4772b001086 100755 --- a/doc/content/en/docs/++version++/Specification/_index.md +++ b/doc/content/en/docs/++version++/Specification/_index.md @@ -77,7 +77,7 @@ Records use the type name "record" and support the following attributes: * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. * _aliases_: a JSON array of strings, providing alternate names for this field (optional). - * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. + * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema that matches in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. *field default values* @@ -160,7 +160,7 @@ For example, a map from string to long is declared with: ### Unions Unions, as mentioned above, are represented using JSON arrays. For example, `["null", "string"]` declares a schema which may be either a null or string. -(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match the first element of the union. Thus, for unions containing "null", the "null" is usually listed first, since the default value of such unions is typically null.) +(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match with one element of the union. Unions may not contain more than one schema with the same type, except for the named types record, fixed and enum. For example, unions containing two array types or two map types are not permitted, but two types with different names are permitted. (Names permit efficient resolution when reading and writing unions.) diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java b/lang/java/avro/src/main/java/org/apache/avro/Schema.java index 8933f20f07f..38a6e4a9e42 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java @@ -1332,6 +1332,16 @@ public UnionSchema(LockableArrayList types) { } } + /** + * Checks if a JSON value matches the schema. + * + * @param jsonValue a value to check against the schema + * @return true if the value is valid according to this schema + */ + public boolean isValidDefault(JsonNode jsonValue) { + return this.types.stream().anyMatch((Schema s) -> s.isValidDefault(jsonValue)); + } + @Override public List getTypes() { return types; @@ -1768,13 +1778,23 @@ public static void setNameValidator(final Schema.NameValidator validator) { private static final ThreadLocal VALIDATE_DEFAULTS = ThreadLocalWithInitial.of(() -> true); private static JsonNode validateDefault(String fieldName, Schema schema, JsonNode defaultValue) { - if (VALIDATE_DEFAULTS.get() && (defaultValue != null) && !isValidDefault(schema, defaultValue)) { // invalid default + if (VALIDATE_DEFAULTS.get() && (defaultValue != null) && !schema.isValidDefault(defaultValue)) { // invalid default String message = "Invalid default for field " + fieldName + ": " + defaultValue + " not a " + schema; throw new AvroTypeException(message); // throw exception } return defaultValue; } + /** + * Checks if a JSON value matches the schema. + * + * @param jsonValue a value to check against the schema + * @return true if the value is valid according to this schema + */ + public boolean isValidDefault(JsonNode jsonValue) { + return isValidDefault(this, jsonValue); + } + private static boolean isValidDefault(Schema schema, JsonNode defaultValue) { if (defaultValue == null) return false; @@ -1809,8 +1829,8 @@ private static boolean isValidDefault(Schema schema, JsonNode defaultValue) { if (!isValidDefault(schema.getValueType(), value)) return false; return true; - case UNION: // union default: first branch - return isValidDefault(schema.getTypes().get(0), defaultValue); + case UNION: // union default: any branch + return schema.getTypes().stream().anyMatch((Schema s) -> isValidValue(s, defaultValue)); case RECORD: if (!defaultValue.isObject()) return false; diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java index 9271cfa98de..347490679ee 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java @@ -617,11 +617,8 @@ protected Object createSchemaDefaultValue(Type type, Field field, Schema fieldSc AvroDefault defaultAnnotation = field.getAnnotation(AvroDefault.class); defaultValue = (defaultAnnotation == null) ? null : Schema.parseJsonToObject(defaultAnnotation.value()); - if (defaultValue == null && fieldSchema.getType() == Schema.Type.UNION) { - Schema defaultType = fieldSchema.getTypes().get(0); - if (defaultType.getType() == Schema.Type.NULL) { - defaultValue = JsonProperties.NULL_VALUE; - } + if (defaultValue == null && fieldSchema.isNullable()) { + defaultValue = JsonProperties.NULL_VALUE; } return defaultValue; } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java index 9900fd635cf..64748da1364 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java @@ -39,6 +39,11 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.IntNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.NullNode; +import com.fasterxml.jackson.databind.node.TextNode; import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; @@ -421,6 +426,53 @@ void qualifiedName() { assertEquals("Int", nameInt.getQualified("space")); } + @Test + void validValue() { + // Valid null value + final Schema nullSchema = Schema.create(Type.NULL); + assertTrue(nullSchema.isValidDefault(JsonNodeFactory.instance.nullNode())); + + // Valid int value + final Schema intSchema = Schema.create(Type.INT); + assertTrue(intSchema.isValidDefault(JsonNodeFactory.instance.numberNode(12))); + + // Valid Text value + final Schema strSchema = Schema.create(Type.STRING); + assertTrue(strSchema.isValidDefault(new TextNode("textNode"))); + + // Valid Array value + final Schema arraySchema = Schema.createArray(Schema.create(Type.STRING)); + final ArrayNode arrayValue = JsonNodeFactory.instance.arrayNode(); + assertTrue(arraySchema.isValidDefault(arrayValue)); // empty array + + arrayValue.add("Hello"); + arrayValue.add("World"); + assertTrue(arraySchema.isValidDefault(arrayValue)); + + arrayValue.add(5); + assertFalse(arraySchema.isValidDefault(arrayValue)); + + // Valid Union type + final Schema unionSchema = Schema.createUnion(strSchema, intSchema, nullSchema); + assertTrue(unionSchema.isValidDefault(JsonNodeFactory.instance.textNode("Hello"))); + assertTrue(unionSchema.isValidDefault(new IntNode(23))); + assertTrue(unionSchema.isValidDefault(JsonNodeFactory.instance.nullNode())); + + assertFalse(unionSchema.isValidDefault(arrayValue)); + + // Array of union + final Schema arrayUnion = Schema.createArray(unionSchema); + final ArrayNode arrayUnionValue = JsonNodeFactory.instance.arrayNode(); + arrayUnionValue.add("Hello"); + arrayUnionValue.add(NullNode.getInstance()); + assertTrue(arrayUnion.isValidDefault(arrayUnionValue)); + + // Union String, bytes + final Schema unionStrBytes = Schema.createUnion(strSchema, Schema.create(Type.BYTES)); + assertTrue(unionStrBytes.isValidDefault(JsonNodeFactory.instance.textNode("Hello"))); + assertFalse(unionStrBytes.isValidDefault(JsonNodeFactory.instance.numberNode(123))); + } + @Test void enumLateDefine() { String schemaString = "{\n" + " \"type\":\"record\",\n" + " \"name\": \"Main\",\n" + " \"fields\":[\n" diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java index e8fadeea71e..5f52a2cf789 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java @@ -536,6 +536,34 @@ public static interface P2 { void error() throws E1; } + private static class NullableDefaultTest { + @Nullable + @AvroDefault("1") + int foo; + } + + @Test + public void testAvroNullableDefault() { + check(NullableDefaultTest.class, + "{\"type\":\"record\",\"name\":\"NullableDefaultTest\"," + + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + + "{\"name\":\"foo\",\"type\":[\"null\",\"int\"],\"default\":1}]}"); + } + + private static class UnionDefaultTest { + @Union({ Integer.class, String.class }) + @AvroDefault("1") + Object foo; + } + + @Test + public void testAvroUnionDefault() { + check(UnionDefaultTest.class, + "{\"type\":\"record\",\"name\":\"UnionDefaultTest\"," + + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + + "{\"name\":\"foo\",\"type\":[\"int\",\"string\"],\"default\":1}]}"); + } + @Test void p2() throws Exception { Schema e1 = ReflectData.get().getSchema(E1.class); diff --git a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java index eb2d5c91835..d85b28effa3 100644 --- a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java +++ b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java @@ -302,26 +302,20 @@ void lisp(TestInfo testInfo) throws Exception { void union(TestInfo testInfo) throws Exception { check(new File(DIR, testInfo.getTestMethod().get().getName()), "[\"string\", \"long\"]", false); checkDefault("[\"double\", \"long\"]", "1.1", 1.1); + checkDefault("[\"double\", \"string\"]", "\"TheString\"", new Utf8("TheString")); // test that erroneous default values cause errors for (String type : new String[] { "int", "long", "float", "double", "string", "bytes", "boolean" }) { - checkValidateDefaults("[\"" + type + "\", \"null\"]", "null"); // schema parse time - boolean error = false; - try { - checkDefault("[\"" + type + "\", \"null\"]", "null", 0); // read time - } catch (AvroTypeException e) { - error = true; - } - assertTrue(error); - checkValidateDefaults("[\"null\", \"" + type + "\"]", "0"); // schema parse time - error = false; - try { - checkDefault("[\"null\", \"" + type + "\"]", "0", null); // read time - } catch (AvroTypeException e) { - error = true; - } - assertTrue(error); +// checkValidateDefaults("[\"" + type + "\", \"null\"]", "null"); // schema parse time + checkDefault("[\"" + type + "\", \"null\"]", "null", null); // read time } + checkDefault("[\"null\", \"int\"]", "0", 0); + checkDefault("[\"null\", \"long\"]", "0", 0l); + checkDefault("[\"null\", \"float\"]", "0.0", 0.0f); + checkDefault("[\"null\", \"double\"]", "0.0", 0.0d); + checkDefault("[\"null\", \"string\"]", "\"Hi\"", new Utf8("Hi")); + checkDefault("[\"null\", \"bytes\"]", "\"01\"", ByteBuffer.wrap("01".getBytes(StandardCharsets.UTF_8))); + checkDefault("[\"null\", \"boolean\"]", "true", true); // check union json String record = "{\"type\":\"record\",\"name\":\"Foo\",\"fields\":[]}";