1717import org .elasticsearch .index .fielddata .ScriptDocValues ;
1818import org .elasticsearch .index .fielddata .plain .LongArrayIndexFieldData ;
1919
20+ /**
21+ * A buildable collector which iterates through value of a long datetime field, applying timezone rounding to them.
22+ *
23+ * @param <V> the IndexFieldData type of the datetime field
24+ */
2025public abstract class TimestampFirstCollector <V extends AtomicFieldData <? extends ScriptDocValues >> extends BuildableCollector {
2126
27+ /**
28+ * An empty iterator over long values.
29+ */
2230 protected static final Iter EMPTY = new Iter .Empty ();
2331
2432 private LongValues _keyFieldValues ;
@@ -36,20 +44,34 @@ public abstract class TimestampFirstCollector<V extends AtomicFieldData<? extend
3644 private BytesValues _valueFieldValues ;
3745 private BytesValues .Iter _valueFieldIter ;
3846
47+ /**
48+ * Create a new collector.
49+ *
50+ * @param keyFieldData key (datetime) field data
51+ * @param valueFieldData value field data
52+ * @param tzRounding time zone rounding
53+ */
3954 public TimestampFirstCollector (final LongArrayIndexFieldData keyFieldData ,
4055 final IndexFieldData <V > valueFieldData , final TimeZoneRounding tzRounding ) {
4156 _keyFieldData = keyFieldData ;
4257 _valueFieldData = valueFieldData ;
4358 _tzRounding = tzRounding ;
4459 }
4560
61+ /**
62+ * Create a new collector.
63+ *
64+ * @param keyFieldData key (datetime) field data
65+ * @param tzRounding time zone rounding
66+ */
4667 public TimestampFirstCollector (final LongArrayIndexFieldData keyFieldData ,
4768 final TimeZoneRounding tzRounding ) {
4869 this (keyFieldData , null , tzRounding );
4970 }
5071
5172 @ Override
5273 public void collect (final int doc ) throws IOException {
74+ // If the datetime field has ordinals available, we can take a bunch of shortcuts later
5375 if (_keyFieldValues instanceof WithOrdinals ) {
5476 _docOrds = ((WithOrdinals ) _keyFieldValues ).ordinals ().getOrds (doc );
5577 _docOrdPointer = _docOrds .offset ;
@@ -66,16 +88,35 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
6688 if (hasValueField ())
6789 _valueFieldValues = _valueFieldData .load (context ).getBytesValues ();
6890
91+ // If we have ordinals avilable, we can do most of the work up front.
92+ // We build a mapping from ords to rounded timestamps, so we never
93+ // have to retrieve the field values for a given document. We just
94+ // see which ordinals it has and then get the rounded timestamps they
95+ // correspond to.
96+
97+ // One drawback of this approach is that if we have a very aggressively
98+ // filtered query, there might be many ordinals which are never used by
99+ // any of the documents we will be looking at. So we'd be wasting effort
100+ // by calculating timestamps for all of the ordinals up front.
101+ // TODO come up with a heuristic to avoid falling into this trap.
102+
69103 if (_keyFieldValues instanceof WithOrdinals ) {
70104 final int maxOrd = ((WithOrdinals ) _keyFieldValues ).ordinals ().getMaxOrd ();
71105 int tsPointer = 0 ;
106+
107+ // _timestamps holds the rounded timestamps
72108 _timestamps .resetQuick ();
73109 _timestamps .add (0 );
110+
111+ // _ordToTimestampPointers has one entry for every ord
74112 _ordToTimestampPointers .resetQuick ();
75113 _ordToTimestampPointers .add (0 );
114+
115+ // We cache these for some small optimizations
76116 long lastDateTime = 0 ;
77117 long lastTimestamp = 0 ;
78118 for (int i = 1 ; i < maxOrd ; i ++) {
119+ // Get the next ordinal's value so we can calculate its timestamp
79120 final long datetime = ((WithOrdinals ) _keyFieldValues ).getValueByOrd (i );
80121
81122 // If this datetime is less than a second after the previously-seen timestamp, it will have the same timestamp
@@ -95,6 +136,8 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
95136 }
96137 }
97138 lastDateTime = datetime ;
139+
140+ // Add timestamp pointer for this ord -- could be the same as the previous ord, or a new one
98141 _ordToTimestampPointers .add (tsPointer );
99142 }
100143 } else {
@@ -105,6 +148,11 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
105148 @ Override
106149 public void postCollection () {}
107150
151+ /**
152+ * Are there any more timestamps available?
153+ *
154+ * @return true/false
155+ */
108156 protected boolean hasNextTimestamp () {
109157 if (_keyFieldValues instanceof WithOrdinals ) {
110158 return _docOrdPointer < _docOrds .length ;
@@ -113,12 +161,19 @@ protected boolean hasNextTimestamp() {
113161 }
114162 }
115163
164+ /**
165+ * Get the next timestamp, i.e. the rounded value of the next available datetime.
166+ *
167+ * @return the timestamp
168+ */
116169 protected long nextTimestamp () {
117170 if (_keyFieldValues instanceof WithOrdinals ) {
171+ // We can bypass getting the raw datetime value, and go from ord to timestamp directly (well, directly-ish)
118172 final long ts = _timestamps .get (_ordToTimestampPointers .get (_docOrds .ints [_docOrdPointer ]));
119173 _docOrdPointer ++;
120174 return ts ;
121175 } else {
176+ // Get the next raw datetime, and if necessary, round it
122177 final long datetime = _docIter .next ();
123178 // If this datetime is less than a second after the previously-seen timestamp, it will have the same timestamp
124179 // (true because we don't support granularity less than 1 sec)
@@ -134,14 +189,31 @@ protected long nextTimestamp() {
134189 }
135190 }
136191
192+ /**
193+ * Returns true if this iterator is getting each timestamp once per value of a value field.
194+ * Otherwise, it's getting each timestamp once per document.
195+ *
196+ * @return true/false
197+ */
137198 protected boolean hasValueField () {
138199 return _valueFieldData != null ;
139200 }
140201
202+ /**
203+ * Returns true if there is another value of a value field available, for the current doc.
204+ * If there isn't, or we're not using a value field, returns false.
205+ *
206+ * @return true/false
207+ */
141208 protected boolean hasNextValue () {
142209 return _valueFieldIter != null && _valueFieldIter .hasNext ();
143210 }
144211
212+ /**
213+ * Gets the next value of the value field, or null if we're not using a value field.
214+ *
215+ * @return the next value as a BytesRef, or null
216+ */
145217 protected BytesRef nextValue () {
146218 return _valueFieldIter == null ? null : _valueFieldIter .next ();
147219 }
0 commit comments