Skip to content

Commit e92eac8

Browse files
committed
AVRO-3666: Refactor for recent changes
Includes the use of NameValidator and parsing multiple files with circular references between them.
1 parent d9e1754 commit e92eac8

32 files changed

+1183
-739
lines changed

.editorconfig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ root = true
1919
charset = utf-8
2020
end_of_line = lf
2121
insert_final_newline = true
22+
ij_any_block_comment_at_first_column = false
23+
ij_any_line_comment_at_first_column = false
2224

2325
[*.{java,xml,sh}]
2426
indent_style = space

lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
import java.io.IOException;
2121
import java.net.URI;
22-
import java.util.Collection;
2322

2423
/**
2524
* Schema parser for a specific schema format.
@@ -29,46 +28,50 @@
2928
* schema sources.
3029
* </p>
3130
*
32-
* <h2>Note to implementers:</h2>
33-
*
34-
* <p>
35-
* Implementations are located using a {@link java.util.ServiceLoader}. See that
36-
* class for details.
37-
* </p>
38-
*
39-
* <p>
40-
* You can expect that schemas being read are invalid, so you are encouraged to
41-
* return {@code null} upon parsing failure where the input clearly doesn't make
42-
* sense (e.g., reading "/**" when expecting JSON). If the input is likely in
43-
* the correct format, but invalid, throw a {@link SchemaParseException}
44-
* instead.
45-
* </p>
46-
*
4731
* <p>
48-
* Note that throwing anything other than a {@code SchemaParseException} will
49-
* abort the parsing process, so reserve that for rethrowing exceptions.
32+
* Implementations are located using a {@link java.util.ServiceLoader} and must
33+
* therefore be threadsafe. See the {@code ServiceLoader} class for details on
34+
* loading your implementation.
5035
* </p>
5136
*
5237
* @see java.util.ServiceLoader
5338
*/
5439
public interface FormattedSchemaParser {
5540
/**
56-
* Parse a schema from a text based source. Can use the base location of the
57-
* schema (e.g., the directory where the schema file lives) if available.
58-
*
5941
* <p>
60-
* Implementations should add all named schemas they parse to the collection.
42+
* Parse schema definitions from a text based source.
6143
* </p>
6244
*
63-
* @param types a mutable collection of known types; parsed named
64-
* schemata will be added
45+
* <h2>Notes for implementers:</h2>
46+
*
47+
* <ul>
48+
* <li>Schema definitions are expected not to be in the format the parser
49+
* expects. So when the input clearly doesn't make sense (e.g., reading "/**"
50+
* when expecting JSON), it is a good idea not to do anything (especially
51+
* calling methods on the @code ParseContext}).</li>
52+
* <li>The parameter {@code parseContext} is not thread-safe.</li>
53+
* <li>When parsing, all parsed schema definitions should be added to the
54+
* provided {@link ParseContext}.</li>
55+
* <li>Optionally, you may return a "main" schema. Some schema definitions have
56+
* one, for example the schema defined by the root of the JSON document in a
57+
* <a href="https://avro.apache.org/docs/current/specification/">standard schema
58+
* definition</a>. If unsure, return {@code null}.</li>
59+
* <li>If parsing fails, throw a {@link SchemaParseException}. This will let the
60+
* parsing process recover and continue.</li>
61+
* <li>Throwing anything other than a {@code SchemaParseException} will abort
62+
* the parsing process, so reserve that for rethrowing exceptions.</li>
63+
* </ul>
64+
*
65+
* @param parseContext the current parse context: all parsed schemata should
66+
* be added here to resolve names with; contains all
67+
* previously known types
6568
* @param baseUri the base location of the schema, or {@code null} if
6669
* not known
67-
* @param formattedSchema the schema as text
68-
* @return the parsed schema, or {@code null} if the format is not supported
70+
* @param formattedSchema the text of the schema definition(s) to parse
71+
* @return the main schema, if any
6972
* @throws IOException when the schema cannot be read
7073
* @throws SchemaParseException when the schema cannot be parsed
7174
*/
72-
Schema parse(Collection<Schema> types, URI baseUri, CharSequence formattedSchema)
75+
Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema)
7376
throws IOException, SchemaParseException;
7477
}

lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919

2020
import java.io.IOException;
2121
import java.net.URI;
22-
import java.util.ArrayList;
23-
import java.util.Collection;
2422

2523
/**
2624
* Schema parser for JSON formatted schemata. This initial implementation simply
@@ -59,32 +57,26 @@ public static Schema parseInternal(String... fragments) {
5957
for (String fragment : fragments) {
6058
buffer.append(fragment);
6159
}
62-
return new JsonSchemaParser().parse(new ArrayList<>(), buffer, true);
60+
return new JsonSchemaParser().parse(new ParseContext(NameValidator.NO_VALIDATION), buffer, null);
6361
}
6462

6563
@Override
66-
public Schema parse(Collection<Schema> schemas, URI baseUri, CharSequence formattedSchema)
64+
public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema)
6765
throws IOException, SchemaParseException {
68-
return parse(schemas, formattedSchema, false);
66+
return parse(parseContext, formattedSchema, parseContext.nameValidator);
6967
}
7068

71-
private Schema parse(Collection<Schema> schemas, CharSequence formattedSchema, boolean skipValidation)
69+
private Schema parse(ParseContext parseContext, CharSequence formattedSchema, NameValidator nameValidator)
7270
throws SchemaParseException {
73-
// TODO: refactor JSON parsing out of the Schema class
74-
Schema.Parser parser;
75-
if (skipValidation) {
76-
parser = new Schema.Parser(Schema.NameValidator.NO_VALIDATION);
71+
Schema.Parser parser = new Schema.Parser(nameValidator);
72+
if (nameValidator == NameValidator.NO_VALIDATION) {
7773
parser.setValidateDefaults(false);
7874
} else {
79-
parser = new Schema.Parser();
80-
}
81-
if (schemas != null) {
82-
parser.addTypes(schemas);
75+
parser = new Schema.Parser(nameValidator);
8376
}
77+
parser.addTypes(parseContext.typesByName().values());
8478
Schema schema = parser.parse(formattedSchema.toString());
85-
if (schemas != null) {
86-
schemas.addAll(parser.getTypes().values());
87-
}
79+
parser.getTypes().values().forEach(parseContext::put);
8880
return schema;
8981
}
9082
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* https://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.avro;
19+
20+
public interface NameValidator {
21+
22+
class Result {
23+
private final String errors;
24+
25+
public Result(final String errors) {
26+
this.errors = errors;
27+
}
28+
29+
public boolean isOK() {
30+
return this == NameValidator.OK;
31+
}
32+
33+
public String getErrors() {
34+
return errors;
35+
}
36+
}
37+
38+
Result OK = new Result(null);
39+
40+
default Result validate(String name) {
41+
return OK;
42+
}
43+
44+
NameValidator NO_VALIDATION = new NameValidator() {
45+
};
46+
47+
NameValidator UTF_VALIDATOR = new NameValidator() {
48+
@Override
49+
public Result validate(final String name) {
50+
if (name == null) {
51+
return new Result("Null name");
52+
}
53+
int length = name.length();
54+
if (length == 0) {
55+
return new Result("Empty name");
56+
}
57+
char first = name.charAt(0);
58+
if (!(Character.isLetter(first) || first == '_')) {
59+
return new Result("Illegal initial character: " + name);
60+
}
61+
for (int i = 1; i < length; i++) {
62+
char c = name.charAt(i);
63+
if (!(Character.isLetterOrDigit(c) || c == '_')) {
64+
return new Result("Illegal character in: " + name);
65+
}
66+
}
67+
return OK;
68+
}
69+
};
70+
71+
NameValidator STRICT_VALIDATOR = new NameValidator() {
72+
@Override
73+
public Result validate(final String name) {
74+
if (name == null) {
75+
return new Result("Null name");
76+
}
77+
int length = name.length();
78+
if (length == 0) {
79+
return new Result("Empty name");
80+
}
81+
char first = name.charAt(0);
82+
if (!(isLetter(first) || first == '_')) {
83+
return new Result("Illegal initial character: " + name);
84+
}
85+
for (int i = 1; i < length; i++) {
86+
char c = name.charAt(i);
87+
if (!(isLetter(c) || isDigit(c) || c == '_')) {
88+
return new Result("Illegal character in: " + name);
89+
}
90+
}
91+
return OK;
92+
}
93+
94+
private boolean isLetter(char c) {
95+
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
96+
}
97+
98+
private boolean isDigit(char c) {
99+
return c >= '0' && c <= '9';
100+
}
101+
102+
};
103+
104+
}

0 commit comments

Comments
 (0)