Skip to content

Commit 7d6ab9e

Browse files
author
yehe
committed
add path based json parser
1 parent d0c4330 commit 7d6ab9e

File tree

5 files changed

+375
-4
lines changed

5 files changed

+375
-4
lines changed

src/main/java/org/simdjson/OnDemandJsonIterator.java

+13-2
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ void init(byte[] buffer, int len) {
3939
this.len = len;
4040
this.depth = 1;
4141
}
42-
4342
void skipChild() {
4443
skipChild(depth - 1);
4544
}
@@ -442,6 +441,19 @@ Float getFloat() {
442441
}
443442
return numberParser.parseFloat(buffer, len, idx);
444443
}
444+
String getOrCompressAsString() {
445+
depth--;
446+
int idx = indexer.getAndAdvance();
447+
if (buffer[idx] == '"') {
448+
return new String(buffer, idx + 1, indexer.peek() - idx - 2);
449+
} else {
450+
return new String(buffer, idx, indexer.peek() - idx);
451+
}
452+
}
453+
String getObjectKey() {
454+
int idx = indexer.getAndAdvance();
455+
return new String(buffer, idx + 1, indexer.peek() - idx - 2);
456+
}
445457

446458
int getRootString(byte[] stringBuffer) {
447459
depth--;
@@ -621,7 +633,6 @@ IteratorResult startIteratingRootObject() {
621633
}
622634
return IteratorResult.NOT_EMPTY;
623635
}
624-
625636
boolean nextObjectField() {
626637
int idx = indexer.getAndAdvance();
627638
byte character = buffer[idx];
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package org.simdjson;
2+
3+
import java.lang.reflect.Type;
4+
import java.util.HashMap;
5+
import java.util.Map;
6+
7+
public class OnDemandJsonValue {
8+
private final Map<Object, OnDemandJsonValue> children;
9+
private OnDemandJsonValue parent;
10+
private ResolvedClass.ResolvedClassCategory type;
11+
private Object value;
12+
private long version;
13+
private boolean isLeaf;
14+
15+
public OnDemandJsonValue() {
16+
this.children = new HashMap<>();
17+
this.parent = null;
18+
this.value = null;
19+
this.version = 0L;
20+
this.isLeaf = false;
21+
}
22+
23+
public Map<Object, OnDemandJsonValue> getChildren() {
24+
return children;
25+
}
26+
27+
public OnDemandJsonValue getParent() {
28+
return parent;
29+
}
30+
31+
public void setParent(OnDemandJsonValue parent) {
32+
this.parent = parent;
33+
}
34+
35+
public Object getValue() {
36+
return value;
37+
}
38+
39+
public void setValue(Object value) {
40+
this.value = value;
41+
}
42+
43+
public long getVersion() {
44+
return version;
45+
}
46+
47+
public void setVersion(long version) {
48+
this.version = version;
49+
}
50+
public ResolvedClass.ResolvedClassCategory getType() {
51+
return type;
52+
}
53+
54+
public void setType(ResolvedClass.ResolvedClassCategory type) {
55+
this.type = type;
56+
}
57+
58+
public boolean isLeaf() {
59+
return isLeaf;
60+
}
61+
62+
public void setLeaf(boolean leaf) {
63+
isLeaf = leaf;
64+
}
65+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
package org.simdjson;
2+
3+
import java.util.Arrays;
4+
5+
public class PathsBasedJsonParser {
6+
private static final int PADDING = 64;
7+
private static final int DEFAULT_CAPACITY = 34 * 1024 * 1024; // we should be able to handle jsons <= 34MiB
8+
private static final String SINGLE_LEFT_BRACKET = "[";
9+
private static final String DOUBLE_LEFT_BRACKET = "[[";
10+
private static final String SINGLE_RIGHT_BRACKET = "]";
11+
private static final String DOUBLE_RIGHT_BRACKET = "]]";
12+
private String typeDelimiter = ":";
13+
private String pathDelimiter = "\\.";
14+
private final Object[] EMPTY_RESULT;
15+
private Object[] result;
16+
private OnDemandJsonValue[] row;
17+
private long currentVersion = 0;
18+
private OnDemandJsonValue ptr;
19+
private byte[] padded;
20+
private final StructuralIndexer indexer;
21+
private final BitIndexes bitIndexes;
22+
private final OnDemandJsonIterator jsonIterator;
23+
private final byte[] paddedBuffer;
24+
private final OnDemandJsonValue root = new OnDemandJsonValue();
25+
private static final ResolvedClass.ResolvedClassCategory DEFAULT_TYPE = ResolvedClass.ResolvedClassCategory.STRING;
26+
27+
public PathsBasedJsonParser(String... args) {
28+
this.bitIndexes = new BitIndexes(DEFAULT_CAPACITY);
29+
this.indexer = new StructuralIndexer(bitIndexes);
30+
this.jsonIterator = new OnDemandJsonIterator(bitIndexes, PADDING);
31+
this.EMPTY_RESULT = new Object[args.length];
32+
Arrays.fill(this.EMPTY_RESULT, null);
33+
this.result = new Object[args.length];
34+
this.paddedBuffer = new byte[DEFAULT_CAPACITY];
35+
this.row = new OnDemandJsonValue[args.length];
36+
constructPathTree(args);
37+
}
38+
private void constructPathTree(String... args) {
39+
for (int i = 0; i < args.length; i++) {
40+
String[] pathAndType = args[i].split(typeDelimiter);
41+
ResolvedClass.ResolvedClassCategory type = DEFAULT_TYPE;
42+
if (pathAndType.length >= 2) {
43+
type = ResolvedClass.ResolvedClassCategory.valueOf(pathAndType[1]);
44+
}
45+
String path = pathAndType[0];
46+
// construct path tree
47+
OnDemandJsonValue cur = root;
48+
for (String step : path.split(pathDelimiter)) {
49+
Object key;
50+
if (step.startsWith(SINGLE_LEFT_BRACKET) && !step.startsWith(DOUBLE_LEFT_BRACKET)) {
51+
key = Integer.parseInt(step.substring(1, step.length() - 1));
52+
} else {
53+
key = step.replace(DOUBLE_LEFT_BRACKET, SINGLE_LEFT_BRACKET)
54+
.replace(DOUBLE_RIGHT_BRACKET, SINGLE_RIGHT_BRACKET);
55+
}
56+
if (!cur.getChildren().containsKey(key)) {
57+
OnDemandJsonValue child = new OnDemandJsonValue();
58+
child.setParent(cur);
59+
cur.getChildren().put(key, child);
60+
}
61+
cur = cur.getChildren().get(key);
62+
}
63+
cur.setLeaf(true);
64+
cur.setType(type);
65+
row[i] = cur;
66+
}
67+
}
68+
69+
public Object[] parse(byte[] buffer, int len) {
70+
if (buffer == null || buffer.length == 0) {
71+
return EMPTY_RESULT;
72+
}
73+
padded = padIfNeeded(buffer, len);
74+
Utf8Validator.validate(padded, len);
75+
indexer.index(padded, len);
76+
jsonIterator.init(padded, len);
77+
this.currentVersion++;
78+
this.ptr = root;
79+
switch (padded[bitIndexes.peek()]) {
80+
case '{':
81+
parseRootObject();
82+
break;
83+
case '[':
84+
parseRootArray();
85+
break;
86+
default:
87+
throw new RuntimeException("invalid json format, must start with { or [");
88+
}
89+
return getResult();
90+
}
91+
private void parseRootObject() {
92+
OnDemandJsonIterator.IteratorResult iteratorResult = jsonIterator.startIteratingRootObject();
93+
iteratorObjectElements(iteratorResult);
94+
jsonIterator.assertNoMoreJsonValues();
95+
}
96+
private void parseObject() {
97+
OnDemandJsonIterator.IteratorResult iteratorResult = jsonIterator.startIteratingObject();
98+
iteratorObjectElements(iteratorResult);
99+
}
100+
private void parseRootArray() {
101+
OnDemandJsonIterator.IteratorResult iteratorResult = jsonIterator.startIteratingRootArray();
102+
iteratorArrayElements(iteratorResult);
103+
jsonIterator.assertNoMoreJsonValues();
104+
}
105+
private void parseArray() {
106+
OnDemandJsonIterator.IteratorResult iteratorResult = jsonIterator.startIteratingArray();
107+
iteratorArrayElements(iteratorResult);
108+
}
109+
private void parseValue() {
110+
Object value = switch (ptr.getType()) {
111+
case BOOLEAN_PRIMITIVE -> jsonIterator.getNonNullBoolean();
112+
case BOOLEAN -> jsonIterator.getBoolean();
113+
case BYTE_PRIMITIVE -> jsonIterator.getNonNullByte();
114+
case BYTE -> jsonIterator.getByte();
115+
case SHORT_PRIMITIVE -> jsonIterator.getNonNullShort();
116+
case SHORT -> jsonIterator.getShort();
117+
case INT_PRIMITIVE -> jsonIterator.getNonNullInt();
118+
case INT -> jsonIterator.getInt();
119+
case LONG_PRIMITIVE -> jsonIterator.getNonNullLong();
120+
case LONG -> jsonIterator.getLong();
121+
case FLOAT_PRIMITIVE -> jsonIterator.getNonNullFloat();
122+
case FLOAT -> jsonIterator.getFloat();
123+
case DOUBLE_PRIMITIVE -> jsonIterator.getNonNullDouble();
124+
case DOUBLE -> jsonIterator.getDouble();
125+
case CHAR_PRIMITIVE -> jsonIterator.getNonNullChar();
126+
case CHAR -> jsonIterator.getChar();
127+
case STRING -> jsonIterator.getOrCompressAsString();
128+
default -> throw new RuntimeException("only support basic type, not support " + ptr.getType().name());
129+
};
130+
ptr.setValue(value);
131+
}
132+
private void iteratorObjectElements(OnDemandJsonIterator.IteratorResult result) {
133+
if (result == OnDemandJsonIterator.IteratorResult.NOT_EMPTY) {
134+
int collected = 0;
135+
int fieldNum = ptr.getChildren().size();
136+
boolean hasFields = true;
137+
int parentDepth = jsonIterator.getDepth() - 1;
138+
while (collected < fieldNum && hasFields) {
139+
String key = jsonIterator.getObjectKey();
140+
jsonIterator.moveToFieldValue();
141+
if (ptr.getChildren().containsKey(key)) {
142+
ptr = ptr.getChildren().get(key);
143+
parseElement();
144+
collected++;
145+
ptr = ptr.getParent();
146+
} else {
147+
jsonIterator.skipChild();
148+
}
149+
hasFields = jsonIterator.nextObjectField();
150+
}
151+
jsonIterator.skipChild(parentDepth);
152+
}
153+
}
154+
private void iteratorArrayElements(OnDemandJsonIterator.IteratorResult result) {
155+
if (result == OnDemandJsonIterator.IteratorResult.NOT_EMPTY) {
156+
int collected = 0;
157+
int fieldNum = ptr.getChildren().size();
158+
boolean hasFields = true;
159+
int index = 0;
160+
int parentDepth = jsonIterator.getDepth() - 2;
161+
while (collected < fieldNum && hasFields) {
162+
if (ptr.getChildren().containsKey(index)) {
163+
ptr = ptr.getChildren().get(index);
164+
parseElement();
165+
collected++;
166+
ptr = ptr.getParent();
167+
} else {
168+
jsonIterator.skipChild();
169+
}
170+
index++;
171+
hasFields = jsonIterator.nextArrayElement();
172+
}
173+
jsonIterator.skipChild(parentDepth);
174+
}
175+
}
176+
private void parseElement() {
177+
char currentChar = (char) padded[bitIndexes.peek()];
178+
if (currentChar == '{' || currentChar == '[') {
179+
int startOffset = bitIndexes.peek();
180+
if (currentChar == '{') {
181+
parseObject();
182+
} else {
183+
parseArray();
184+
}
185+
if (ptr.isLeaf()) {
186+
int endOffset = bitIndexes.peek();
187+
ptr.setVersion(currentVersion);
188+
ptr.setValue(new String(padded, startOffset, endOffset - startOffset));
189+
}
190+
} else {
191+
if (ptr.isLeaf()) {
192+
ptr.setVersion(currentVersion);
193+
}
194+
parseValue();
195+
}
196+
}
197+
private Object[] getResult() {
198+
for (int i = 0; i < result.length; i++) {
199+
if (row[i].getVersion() < currentVersion) {
200+
result[i] = null;
201+
continue;
202+
}
203+
result[i] = row[i].getValue();
204+
}
205+
return result;
206+
}
207+
private byte[] padIfNeeded(byte[] buffer, int len) {
208+
if (buffer.length - len < PADDING) {
209+
System.arraycopy(buffer, 0, paddedBuffer, 0, len);
210+
return paddedBuffer;
211+
}
212+
return buffer;
213+
}
214+
215+
}

src/main/java/org/simdjson/ResolvedClass.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
import java.nio.charset.StandardCharsets;
1111
import java.util.List;
1212

13-
class ResolvedClass {
13+
public class ResolvedClass {
1414

15-
enum ResolvedClassCategory {
15+
public enum ResolvedClassCategory {
1616
BOOLEAN_PRIMITIVE(boolean.class, new boolean[0]),
1717
BOOLEAN(Boolean.class, new Boolean[0]),
1818
BYTE_PRIMITIVE(byte.class, new byte[0]),

0 commit comments

Comments
 (0)