17
17
import org .elasticsearch .index .fielddata .ScriptDocValues ;
18
18
import org .elasticsearch .index .fielddata .plain .LongArrayIndexFieldData ;
19
19
20
+ /**
21
+ * A buildable collector which iterates through value of a long datetime field, applying timezone rounding to them.
22
+ *
23
+ * @param <V> the IndexFieldData type of the datetime field
24
+ */
20
25
public abstract class TimestampFirstCollector <V extends AtomicFieldData <? extends ScriptDocValues >> extends BuildableCollector {
21
26
27
+ /**
28
+ * An empty iterator over long values.
29
+ */
22
30
protected static final Iter EMPTY = new Iter .Empty ();
23
31
24
32
private LongValues _keyFieldValues ;
@@ -36,20 +44,34 @@ public abstract class TimestampFirstCollector<V extends AtomicFieldData<? extend
36
44
private BytesValues _valueFieldValues ;
37
45
private BytesValues .Iter _valueFieldIter ;
38
46
47
+ /**
48
+ * Create a new collector.
49
+ *
50
+ * @param keyFieldData key (datetime) field data
51
+ * @param valueFieldData value field data
52
+ * @param tzRounding time zone rounding
53
+ */
39
54
public TimestampFirstCollector (final LongArrayIndexFieldData keyFieldData ,
40
55
final IndexFieldData <V > valueFieldData , final TimeZoneRounding tzRounding ) {
41
56
_keyFieldData = keyFieldData ;
42
57
_valueFieldData = valueFieldData ;
43
58
_tzRounding = tzRounding ;
44
59
}
45
60
61
+ /**
62
+ * Create a new collector.
63
+ *
64
+ * @param keyFieldData key (datetime) field data
65
+ * @param tzRounding time zone rounding
66
+ */
46
67
public TimestampFirstCollector (final LongArrayIndexFieldData keyFieldData ,
47
68
final TimeZoneRounding tzRounding ) {
48
69
this (keyFieldData , null , tzRounding );
49
70
}
50
71
51
72
@ Override
52
73
public void collect (final int doc ) throws IOException {
74
+ // If the datetime field has ordinals available, we can take a bunch of shortcuts later
53
75
if (_keyFieldValues instanceof WithOrdinals ) {
54
76
_docOrds = ((WithOrdinals ) _keyFieldValues ).ordinals ().getOrds (doc );
55
77
_docOrdPointer = _docOrds .offset ;
@@ -66,16 +88,35 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
66
88
if (hasValueField ())
67
89
_valueFieldValues = _valueFieldData .load (context ).getBytesValues ();
68
90
91
+ // If we have ordinals avilable, we can do most of the work up front.
92
+ // We build a mapping from ords to rounded timestamps, so we never
93
+ // have to retrieve the field values for a given document. We just
94
+ // see which ordinals it has and then get the rounded timestamps they
95
+ // correspond to.
96
+
97
+ // One drawback of this approach is that if we have a very aggressively
98
+ // filtered query, there might be many ordinals which are never used by
99
+ // any of the documents we will be looking at. So we'd be wasting effort
100
+ // by calculating timestamps for all of the ordinals up front.
101
+ // TODO come up with a heuristic to avoid falling into this trap.
102
+
69
103
if (_keyFieldValues instanceof WithOrdinals ) {
70
104
final int maxOrd = ((WithOrdinals ) _keyFieldValues ).ordinals ().getMaxOrd ();
71
105
int tsPointer = 0 ;
106
+
107
+ // _timestamps holds the rounded timestamps
72
108
_timestamps .resetQuick ();
73
109
_timestamps .add (0 );
110
+
111
+ // _ordToTimestampPointers has one entry for every ord
74
112
_ordToTimestampPointers .resetQuick ();
75
113
_ordToTimestampPointers .add (0 );
114
+
115
+ // We cache these for some small optimizations
76
116
long lastDateTime = 0 ;
77
117
long lastTimestamp = 0 ;
78
118
for (int i = 1 ; i < maxOrd ; i ++) {
119
+ // Get the next ordinal's value so we can calculate its timestamp
79
120
final long datetime = ((WithOrdinals ) _keyFieldValues ).getValueByOrd (i );
80
121
81
122
// If this datetime is less than a second after the previously-seen timestamp, it will have the same timestamp
@@ -95,6 +136,8 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
95
136
}
96
137
}
97
138
lastDateTime = datetime ;
139
+
140
+ // Add timestamp pointer for this ord -- could be the same as the previous ord, or a new one
98
141
_ordToTimestampPointers .add (tsPointer );
99
142
}
100
143
} else {
@@ -105,6 +148,11 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
105
148
@ Override
106
149
public void postCollection () {}
107
150
151
+ /**
152
+ * Are there any more timestamps available?
153
+ *
154
+ * @return true/false
155
+ */
108
156
protected boolean hasNextTimestamp () {
109
157
if (_keyFieldValues instanceof WithOrdinals ) {
110
158
return _docOrdPointer < _docOrds .length ;
@@ -113,12 +161,19 @@ protected boolean hasNextTimestamp() {
113
161
}
114
162
}
115
163
164
+ /**
165
+ * Get the next timestamp, i.e. the rounded value of the next available datetime.
166
+ *
167
+ * @return the timestamp
168
+ */
116
169
protected long nextTimestamp () {
117
170
if (_keyFieldValues instanceof WithOrdinals ) {
171
+ // We can bypass getting the raw datetime value, and go from ord to timestamp directly (well, directly-ish)
118
172
final long ts = _timestamps .get (_ordToTimestampPointers .get (_docOrds .ints [_docOrdPointer ]));
119
173
_docOrdPointer ++;
120
174
return ts ;
121
175
} else {
176
+ // Get the next raw datetime, and if necessary, round it
122
177
final long datetime = _docIter .next ();
123
178
// If this datetime is less than a second after the previously-seen timestamp, it will have the same timestamp
124
179
// (true because we don't support granularity less than 1 sec)
@@ -134,14 +189,31 @@ protected long nextTimestamp() {
134
189
}
135
190
}
136
191
192
+ /**
193
+ * Returns true if this iterator is getting each timestamp once per value of a value field.
194
+ * Otherwise, it's getting each timestamp once per document.
195
+ *
196
+ * @return true/false
197
+ */
137
198
protected boolean hasValueField () {
138
199
return _valueFieldData != null ;
139
200
}
140
201
202
+ /**
203
+ * Returns true if there is another value of a value field available, for the current doc.
204
+ * If there isn't, or we're not using a value field, returns false.
205
+ *
206
+ * @return true/false
207
+ */
141
208
protected boolean hasNextValue () {
142
209
return _valueFieldIter != null && _valueFieldIter .hasNext ();
143
210
}
144
211
212
+ /**
213
+ * Gets the next value of the value field, or null if we're not using a value field.
214
+ *
215
+ * @return the next value as a BytesRef, or null
216
+ */
145
217
protected BytesRef nextValue () {
146
218
return _valueFieldIter == null ? null : _valueFieldIter .next ();
147
219
}
0 commit comments