diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java b/lang/java/avro/src/main/java/org/apache/avro/Schema.java index cec128ba6be..8933f20f07f 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java @@ -990,8 +990,8 @@ public int hashCode() { } } - private static final ThreadLocal SEEN_EQUALS = ThreadLocalWithInitial.of(HashSet::new); - private static final ThreadLocal SEEN_HASHCODE = ThreadLocalWithInitial.of(IdentityHashMap::new); + private static final ThreadLocal> SEEN_EQUALS = ThreadLocalWithInitial.of(HashSet::new); + private static final ThreadLocal> SEEN_HASHCODE = ThreadLocalWithInitial.of(IdentityHashMap::new); @SuppressWarnings(value = "unchecked") private static class RecordSchema extends NamedSchema { @@ -1087,7 +1087,7 @@ public boolean equals(Object o) { @Override int computeHash() { - Map seen = SEEN_HASHCODE.get(); + Map seen = SEEN_HASHCODE.get(); if (seen.containsKey(this)) return 0; // prevent stack overflow boolean first = seen.isEmpty(); @@ -1109,8 +1109,8 @@ void toJson(Names names, JsonGenerator gen) throws IOException { gen.writeStringField("type", isError ? "error" : "record"); writeName(names, gen); names.space = name.space; // set default namespace - if (getDoc() != null) - gen.writeStringField("doc", getDoc()); + if (this.getDoc() != null) + gen.writeStringField("doc", this.getDoc()); if (fields != null) { gen.writeFieldName("fields"); @@ -1480,9 +1480,17 @@ public NullSchema() { */ public static class Parser { private Names names = new Names(); - private boolean validate = true; + private final Schema.NameValidator validate; private boolean validateDefaults = true; + public Parser() { + this(NameValidator.UTF_VALIDATOR); + } + + public Parser(final NameValidator validate) { + this.validate = validate; + } + /** * Adds the provided types to the set of defined, named types known to this * parser. deprecated: use addTypes(Iterable types) @@ -1510,17 +1518,6 @@ public Map getTypes() { return result; } - /** Enable or disable name validation. */ - public Parser setValidate(boolean validate) { - this.validate = validate; - return this; - } - - /** True iff names are validated. True by default. */ - public boolean getValidate() { - return this.validate; - } - /** Enable or disable default value validation. */ public Parser setValidateDefaults(boolean validateDefaults) { this.validateDefaults = validateDefaults; @@ -1587,7 +1584,7 @@ private static interface ParseFunction { } private Schema runParser(JsonParser parser, ParseFunction f) throws IOException { - boolean saved = validateNames.get(); + NameValidator saved = validateNames.get(); boolean savedValidateDefaults = VALIDATE_DEFAULTS.get(); try { validateNames.set(validate); @@ -1683,7 +1680,8 @@ public static Schema parse(String jsonSchema) { */ @Deprecated public static Schema parse(String jsonSchema, boolean validate) { - return new Parser().setValidate(validate).parse(jsonSchema); + final NameValidator validator = validate ? NameValidator.UTF_VALIDATOR : NameValidator.NO_VALIDATION; + return new Parser(validator).parse(jsonSchema); } static final Map PRIMITIVES = new HashMap<>(); @@ -1752,27 +1750,21 @@ public Schema put(Name name, Schema schema) { } } - private static ThreadLocal validateNames = ThreadLocalWithInitial.of(() -> true); + private static ThreadLocal validateNames = ThreadLocalWithInitial + .of(() -> NameValidator.UTF_VALIDATOR); private static String validateName(String name) { - if (!validateNames.get()) - return name; // not validating names - if (name == null) - throw new SchemaParseException("Null name"); - int length = name.length(); - if (length == 0) - throw new SchemaParseException("Empty name"); - char first = name.charAt(0); - if (!(Character.isLetter(first) || first == '_')) - throw new SchemaParseException("Illegal initial character: " + name); - for (int i = 1; i < length; i++) { - char c = name.charAt(i); - if (!(Character.isLetterOrDigit(c) || c == '_')) - throw new SchemaParseException("Illegal character in: " + name); + NameValidator.Result result = validateNames.get().validate(name); + if (!result.isOK()) { + throw new SchemaParseException(result.errors); } return name; } + public static void setNameValidator(final Schema.NameValidator validator) { + Schema.validateNames.set(validator); + } + private static final ThreadLocal VALIDATE_DEFAULTS = ThreadLocalWithInitial.of(() -> true); private static JsonNode validateDefault(String fieldName, Schema schema, JsonNode defaultValue) { @@ -2299,6 +2291,84 @@ private static String getFieldAlias(Name record, String field, Map= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } + + private boolean isDigit(char c) { + return c >= '0' && c <= '9'; + } + + }; + + } + /** * No change is permitted on LockableArrayList once lock() has been called on * it. diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java index a2b5172251d..150d2ace9ba 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java @@ -139,7 +139,7 @@ void initialize(InputStream in, byte[] magic) throws IOException { // finalize the header header.metaKeyList = Collections.unmodifiableList(header.metaKeyList); - header.schema = new Schema.Parser().setValidate(false).setValidateDefaults(false) + header.schema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse(getMetaString(DataFileConstants.SCHEMA)); this.codec = resolveCodec(); reader.setSchema(header.schema); diff --git a/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java new file mode 100644 index 00000000000..6846c4434cf --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.stream.Stream; + +class SchemaNameValidatorTest { + + @ParameterizedTest + @MethodSource("data") + void validator(Schema.NameValidator validator, String input, boolean expectedResult) { + Schema.NameValidator.Result result = validator.validate(input); + Assertions.assertEquals(expectedResult, result.isOK(), result.getErrors()); + } + + static Stream data() { + return Stream.of(Arguments.of(Schema.NameValidator.UTF_VALIDATOR, null, false), // null not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, null, false), // null not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "", false), // empty not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "", false), // empty not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "Hello world", false), // space not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "Hello world", false), // space not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H&", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H&", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H=", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H=", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H]", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H]", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "Hello_world", true), + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "Hello_world", true), + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "éàçô", true), // Accept accent + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "éàçô", false), // Not Accept accent + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "5éàçô", false), // can't start with number + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "5éàçô", false), // can't start with number + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "_Hello_world", true), + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "_Hello_world", true)); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java index fb42d639ecb..a85b966409b 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java @@ -87,7 +87,7 @@ void throttledInputStream() throws IOException { // magic header check. This happens with throttled input stream, // where we read into buffer less bytes than requested. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) + Schema legacySchema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); File f = Files.createTempFile("testThrottledInputStream", ".avro").toFile(); @@ -146,7 +146,7 @@ void inputStreamEOF() throws IOException { // AVRO-2944 describes hanging/failure in reading Avro file with performing // magic header check. This potentially happens with a defective input stream // where a -1 value is unexpectedly returned from a read. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) + Schema legacySchema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); File f = Files.createTempFile("testInputStreamEOF", ".avro").toFile(); @@ -195,7 +195,7 @@ void ignoreSchemaValidationOnRead() throws IOException { // This schema has an accent in the name and the default for the field doesn't // match the first type in the union. A Java SDK in the past could create a file // containing this schema. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) + Schema legacySchema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse("{\"type\": \"record\", \"name\": \"InvalidAccëntWithInvalidNull\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java index b4c259e92f0..9900fd635cf 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java @@ -500,17 +500,15 @@ public void parseAliases() throws JsonProcessingException { } @Test - void testContentAfterAvsc() throws Exception { - Schema.Parser parser = new Schema.Parser(); - parser.setValidate(true); + void testContentAfterAvsc() { + Schema.Parser parser = new Schema.Parser(Schema.NameValidator.UTF_VALIDATOR); parser.setValidateDefaults(true); assertThrows(SchemaParseException.class, () -> parser.parse("{\"type\": \"string\"}; DROP TABLE STUDENTS")); } @Test void testContentAfterAvscInInputStream() throws Exception { - Schema.Parser parser = new Schema.Parser(); - parser.setValidate(true); + Schema.Parser parser = new Schema.Parser(Schema.NameValidator.UTF_VALIDATOR); parser.setValidateDefaults(true); String avsc = "{\"type\": \"string\"}; DROP TABLE STUDENTS"; ByteArrayInputStream is = new ByteArrayInputStream(avsc.getBytes(StandardCharsets.UTF_8)); @@ -526,8 +524,7 @@ void testContentAfterAvscInFile() throws Exception { writer.flush(); } - Schema.Parser parser = new Schema.Parser(); - parser.setValidate(true); + Schema.Parser parser = new Schema.Parser(Schema.NameValidator.UTF_VALIDATOR); parser.setValidateDefaults(true); assertThrows(SchemaParseException.class, () -> parser.parse(avscFile)); } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java index 784bfba021d..fcbaae65570 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java @@ -38,6 +38,8 @@ import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecordBuilder; + +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -874,4 +876,25 @@ void namespaceDefaulting() { assertEquals(a2, a1); } + + @Test + void namesAcceptAll() throws InterruptedException { + // Ensure that Schema.setNameValidator won't interfere with others unit tests. + Runnable r = () -> { + Schema.setNameValidator(Schema.NameValidator.NO_VALIDATION); + final Schema schema = SchemaBuilder.record("7name").fields().name("123").type(Schema.create(Schema.Type.INT)) + .noDefault().endRecord(); + Assertions.assertNotNull(schema); + Assertions.assertEquals("7name", schema.getName()); + final Schema.Field field = schema.getField("123"); + Assertions.assertEquals("123", field.name()); + }; + + final Throwable[] exception = new Throwable[] { null }; + Thread t = new Thread(r); + t.setUncaughtExceptionHandler((Thread th, Throwable e) -> exception[0] = e); + t.start(); + t.join(); + Assertions.assertNull(exception[0], () -> exception[0].getMessage()); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java index 470010e6f73..e8fadeea71e 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java @@ -1232,7 +1232,7 @@ private static class Z { @Test void dollarTerminatedNamespaceCompatibility() { ReflectData data = ReflectData.get(); - Schema s = new Schema.Parser().setValidate(false).parse( + Schema s = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse( "{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect$\",\"fields\":[]}"); assertEquals(data.getSchema(data.getClass(s)).toString(), "{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[]}"); @@ -1242,7 +1242,7 @@ void dollarTerminatedNamespaceCompatibility() { void dollarTerminatedNestedStaticClassNamespaceCompatibility() { ReflectData data = ReflectData.get(); // Older versions of Avro generated this namespace on nested records. - Schema s = new Schema.Parser().setValidate(false).parse( + Schema s = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse( "{\"type\":\"record\",\"name\":\"AnotherSampleRecord\",\"namespace\":\"org.apache.avro.reflect.TestReflect$SampleRecord\",\"fields\":[]}"); assertThat(data.getSchema(data.getClass(s)).getFullName(), is("org.apache.avro.reflect.TestReflect.SampleRecord.AnotherSampleRecord")); diff --git a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java index 9bb3e281a7d..eb2d5c91835 100644 --- a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java +++ b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java @@ -205,9 +205,9 @@ void record(TestInfo testInfo) throws Exception { @Test void invalidNameTolerance() { - new Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"1X\",\"fields\":[]}"); - new Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"X-\",\"fields\":[]}"); - new Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"X$\",\"fields\":[]}"); + new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"1X\",\"fields\":[]}"); + new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X-\",\"fields\":[]}"); + new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X$\",\"fields\":[]}"); } @Test