Skip to content

Commit 2d077df

Browse files
author
Andrew Clegg
committed
Merge branch 'release/2.1.5'
2 parents e1fc5ec + 6b1ba4d commit 2d077df

File tree

8 files changed

+162
-44
lines changed

8 files changed

+162
-44
lines changed

CHANGELOG.md

+14
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
Release 2.1.5
2+
-------------
3+
4+
More documentation improvements, code cleanup.
5+
6+
Workaround for a compiler issue in some versions of Java 6:
7+
8+
https://github.com/pearson-enabling-technologies/elasticsearch-approx-plugin/issues/41
9+
10+
Release 2.1.4
11+
-------------
12+
13+
Documentation improvements.
14+
115
Release 2.1.3
216
-------------
317

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ Plugin < 1.3.0: ElasticSearch 0.19.X, tested on 0.19.11
1717

1818
Plugin 1.3.X: ElasticSearch 0.20.X, tested on 0.20.6
1919

20-
Plugin 2.1.4: ElasticSearch 0.90.2, plus significant feature and performance improvements, and breaking API changes
20+
Plugin 2.1.5: ElasticSearch 0.90.2, plus significant feature and performance improvements, and breaking API changes, compared to 1.3.X branch
2121

2222
ElasticSearch 0.90.3 is not supported yet.
2323

24-
**N.B.** If you are upgrading from a previous version to 2.1.0, please read the
24+
**N.B.** If you are upgrading from a previous version to 2.1.X, please read the
2525
following carefully, as the syntax (and semantics) have changed in several places.
2626

2727

pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<modelVersion>4.0.0</modelVersion>
44
<groupId>com.pearson.entech</groupId>
55
<artifactId>elasticsearch-approx-plugin</artifactId>
6-
<version>2.1.4</version>
6+
<version>2.1.5</version>
77
<properties>
88
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
99
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>

src/main/java/com/pearson/entech/elasticsearch/search/facet/approx/date/collectors/TimestampFirstCollector.java

+72
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,16 @@
1717
import org.elasticsearch.index.fielddata.ScriptDocValues;
1818
import org.elasticsearch.index.fielddata.plain.LongArrayIndexFieldData;
1919

20+
/**
21+
* A buildable collector which iterates through value of a long datetime field, applying timezone rounding to them.
22+
*
23+
* @param <V> the IndexFieldData type of the datetime field
24+
*/
2025
public abstract class TimestampFirstCollector<V extends AtomicFieldData<? extends ScriptDocValues>> extends BuildableCollector {
2126

27+
/**
28+
* An empty iterator over long values.
29+
*/
2230
protected static final Iter EMPTY = new Iter.Empty();
2331

2432
private LongValues _keyFieldValues;
@@ -36,20 +44,34 @@ public abstract class TimestampFirstCollector<V extends AtomicFieldData<? extend
3644
private BytesValues _valueFieldValues;
3745
private BytesValues.Iter _valueFieldIter;
3846

47+
/**
48+
* Create a new collector.
49+
*
50+
* @param keyFieldData key (datetime) field data
51+
* @param valueFieldData value field data
52+
* @param tzRounding time zone rounding
53+
*/
3954
public TimestampFirstCollector(final LongArrayIndexFieldData keyFieldData,
4055
final IndexFieldData<V> valueFieldData, final TimeZoneRounding tzRounding) {
4156
_keyFieldData = keyFieldData;
4257
_valueFieldData = valueFieldData;
4358
_tzRounding = tzRounding;
4459
}
4560

61+
/**
62+
* Create a new collector.
63+
*
64+
* @param keyFieldData key (datetime) field data
65+
* @param tzRounding time zone rounding
66+
*/
4667
public TimestampFirstCollector(final LongArrayIndexFieldData keyFieldData,
4768
final TimeZoneRounding tzRounding) {
4869
this(keyFieldData, null, tzRounding);
4970
}
5071

5172
@Override
5273
public void collect(final int doc) throws IOException {
74+
// If the datetime field has ordinals available, we can take a bunch of shortcuts later
5375
if(_keyFieldValues instanceof WithOrdinals) {
5476
_docOrds = ((WithOrdinals) _keyFieldValues).ordinals().getOrds(doc);
5577
_docOrdPointer = _docOrds.offset;
@@ -66,16 +88,35 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
6688
if(hasValueField())
6789
_valueFieldValues = _valueFieldData.load(context).getBytesValues();
6890

91+
// If we have ordinals avilable, we can do most of the work up front.
92+
// We build a mapping from ords to rounded timestamps, so we never
93+
// have to retrieve the field values for a given document. We just
94+
// see which ordinals it has and then get the rounded timestamps they
95+
// correspond to.
96+
97+
// One drawback of this approach is that if we have a very aggressively
98+
// filtered query, there might be many ordinals which are never used by
99+
// any of the documents we will be looking at. So we'd be wasting effort
100+
// by calculating timestamps for all of the ordinals up front.
101+
// TODO come up with a heuristic to avoid falling into this trap.
102+
69103
if(_keyFieldValues instanceof WithOrdinals) {
70104
final int maxOrd = ((WithOrdinals) _keyFieldValues).ordinals().getMaxOrd();
71105
int tsPointer = 0;
106+
107+
// _timestamps holds the rounded timestamps
72108
_timestamps.resetQuick();
73109
_timestamps.add(0);
110+
111+
// _ordToTimestampPointers has one entry for every ord
74112
_ordToTimestampPointers.resetQuick();
75113
_ordToTimestampPointers.add(0);
114+
115+
// We cache these for some small optimizations
76116
long lastDateTime = 0;
77117
long lastTimestamp = 0;
78118
for(int i = 1; i < maxOrd; i++) {
119+
// Get the next ordinal's value so we can calculate its timestamp
79120
final long datetime = ((WithOrdinals) _keyFieldValues).getValueByOrd(i);
80121

81122
// If this datetime is less than a second after the previously-seen timestamp, it will have the same timestamp
@@ -95,6 +136,8 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
95136
}
96137
}
97138
lastDateTime = datetime;
139+
140+
// Add timestamp pointer for this ord -- could be the same as the previous ord, or a new one
98141
_ordToTimestampPointers.add(tsPointer);
99142
}
100143
} else {
@@ -105,6 +148,11 @@ public void setNextReader(final AtomicReaderContext context) throws IOException
105148
@Override
106149
public void postCollection() {}
107150

151+
/**
152+
* Are there any more timestamps available?
153+
*
154+
* @return true/false
155+
*/
108156
protected boolean hasNextTimestamp() {
109157
if(_keyFieldValues instanceof WithOrdinals) {
110158
return _docOrdPointer < _docOrds.length;
@@ -113,12 +161,19 @@ protected boolean hasNextTimestamp() {
113161
}
114162
}
115163

164+
/**
165+
* Get the next timestamp, i.e. the rounded value of the next available datetime.
166+
*
167+
* @return the timestamp
168+
*/
116169
protected long nextTimestamp() {
117170
if(_keyFieldValues instanceof WithOrdinals) {
171+
// We can bypass getting the raw datetime value, and go from ord to timestamp directly (well, directly-ish)
118172
final long ts = _timestamps.get(_ordToTimestampPointers.get(_docOrds.ints[_docOrdPointer]));
119173
_docOrdPointer++;
120174
return ts;
121175
} else {
176+
// Get the next raw datetime, and if necessary, round it
122177
final long datetime = _docIter.next();
123178
// If this datetime is less than a second after the previously-seen timestamp, it will have the same timestamp
124179
// (true because we don't support granularity less than 1 sec)
@@ -134,14 +189,31 @@ protected long nextTimestamp() {
134189
}
135190
}
136191

192+
/**
193+
* Returns true if this iterator is getting each timestamp once per value of a value field.
194+
* Otherwise, it's getting each timestamp once per document.
195+
*
196+
* @return true/false
197+
*/
137198
protected boolean hasValueField() {
138199
return _valueFieldData != null;
139200
}
140201

202+
/**
203+
* Returns true if there is another value of a value field available, for the current doc.
204+
* If there isn't, or we're not using a value field, returns false.
205+
*
206+
* @return true/false
207+
*/
141208
protected boolean hasNextValue() {
142209
return _valueFieldIter != null && _valueFieldIter.hasNext();
143210
}
144211

212+
/**
213+
* Gets the next value of the value field, or null if we're not using a value field.
214+
*
215+
* @return the next value as a BytesRef, or null
216+
*/
145217
protected BytesRef nextValue() {
146218
return _valueFieldIter == null ? null : _valueFieldIter.next();
147219
}

src/main/java/com/pearson/entech/elasticsearch/search/facet/approx/date/external/XContentEnabledList.java

+35-1
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,65 @@
88
import org.elasticsearch.common.xcontent.XContentBuilder;
99
import org.elasticsearch.common.xcontent.XContentBuilderString;
1010

11+
/**
12+
* An ArrayList that implements ToXContent too. Has a name
13+
* attribute which is used as its fieldname in XContent output. Then
14+
* the list elements are rendered as an XContent list using their own
15+
* toXContent() methods.
16+
*
17+
* @param <E> list element type; must in turn implement ToXContent
18+
*/
1119
public class XContentEnabledList<E extends ToXContent>
1220
extends ArrayList<E> implements ToXContent {
1321

22+
private static final long serialVersionUID = 1L;
23+
1424
private final String _name;
1525

1626
private final XContentBuilderString _xName;
1727

28+
/**
29+
* Create a list by copying in the values of the other collection.
30+
*
31+
* @param data the collection to copy
32+
* @param name the name of the new list
33+
*/
1834
public XContentEnabledList(final Collection<? extends E> data, final String name) {
1935
super(data);
2036
_name = name;
2137
_xName = null;
2238
}
2339

40+
/**
41+
* Create a list with the initial capacity specified.
42+
*
43+
* @param initialCapacity the starting capacity
44+
* @param name the name of the new list
45+
*/
2446
public XContentEnabledList(final int initialCapacity, final String name) {
2547
super(initialCapacity);
2648
_name = name;
2749
_xName = null;
2850
}
2951

52+
/**
53+
* Create an empty list.
54+
*
55+
* @param name the name of the new list
56+
*/
3057
public XContentEnabledList(final String name) {
3158
_name = name;
3259
_xName = null;
3360
}
3461

35-
public XContentEnabledList(final int size, final XContentBuilderString name) {
62+
/**
63+
* Create a list with the initial capacity specified.
64+
*
65+
* @param initialCapacity the starting capacity
66+
* @param name the name of the new list, as XContent
67+
*/
68+
public XContentEnabledList(final int initialCapacity, final XContentBuilderString name) {
69+
super(initialCapacity);
3670
_name = null;
3771
_xName = name;
3872
}

src/main/java/com/pearson/entech/elasticsearch/search/facet/approx/date/internal/DateFacetExecutor.java

+26-31
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import org.elasticsearch.common.joda.TimeZoneRounding;
44
import org.elasticsearch.index.fielddata.IndexFieldData;
5-
import org.elasticsearch.index.fielddata.LongValues.Iter;
65
import org.elasticsearch.index.fielddata.plain.LongArrayIndexFieldData;
76
import org.elasticsearch.search.facet.FacetExecutor;
87
import org.elasticsearch.search.facet.FacetPhaseExecutionException;
@@ -15,49 +14,45 @@
1514
import com.pearson.entech.elasticsearch.search.facet.approx.date.collectors.SlicedDistinctCollector;
1615
import com.pearson.entech.elasticsearch.search.facet.approx.date.collectors.TimestampFirstCollector;
1716

17+
/**
18+
* Executor for all date facets.
19+
*/
1820
public class DateFacetExecutor extends FacetExecutor {
1921

20-
private static final Iter __emptyIter = new Iter.Empty();
22+
private final TimestampFirstCollector<?> _collector;
2123

22-
private final LongArrayIndexFieldData _keyFieldData;
23-
private final IndexFieldData _valueFieldData;
24-
private final IndexFieldData _distinctFieldData;
25-
private final IndexFieldData _sliceFieldData;
24+
// TODO proper use of generics
2625

27-
private final TimestampFirstCollector _collector;
26+
/**
27+
* Create a new executor.
28+
*
29+
* @param keyFieldData field data for the datetime field used for timestamps
30+
* @param valueFieldData field data for the optional value field, can be null
31+
* @param distinctFieldData field data for the optional distinct field, can be null
32+
* @param sliceFieldData field data for the optional slice field, can be null
33+
* @param tzRounding a timezone rounding object
34+
* @param exactThreshold exact count threshold when doing distincts
35+
*/
36+
public DateFacetExecutor(final LongArrayIndexFieldData keyFieldData, final IndexFieldData<?> valueFieldData,
37+
final IndexFieldData<?> distinctFieldData, final IndexFieldData<?> sliceFieldData,
38+
final TimeZoneRounding tzRounding, final int exactThreshold) {
2839

29-
private final TimeZoneRounding _tzRounding;
30-
31-
private final int _exactThreshold;
32-
33-
public DateFacetExecutor(final LongArrayIndexFieldData keyFieldData, final IndexFieldData valueFieldData,
34-
final IndexFieldData distinctFieldData, final IndexFieldData sliceFieldData,
35-
final TimeZoneRounding tzRounding, final int exactThreshold, final boolean debug) {
36-
_keyFieldData = keyFieldData;
37-
_valueFieldData = valueFieldData;
38-
_distinctFieldData = distinctFieldData;
39-
_sliceFieldData = sliceFieldData;
40-
_tzRounding = tzRounding;
41-
_exactThreshold = exactThreshold;
42-
43-
// TODO type safety for the following constructors
44-
45-
if(_distinctFieldData == null && _sliceFieldData == null)
46-
if(_valueFieldData == null)
40+
if(distinctFieldData == null && sliceFieldData == null)
41+
if(valueFieldData == null)
4742
_collector = new CountingCollector<NullFieldData>(keyFieldData, tzRounding);
4843
else
49-
_collector = new CountingCollector(keyFieldData, _valueFieldData, tzRounding);
50-
else if(_distinctFieldData == null)
51-
if(_valueFieldData == null)
44+
_collector = new CountingCollector(keyFieldData, valueFieldData, tzRounding);
45+
else if(distinctFieldData == null)
46+
if(valueFieldData == null)
5247
_collector = new SlicedCollector(keyFieldData, sliceFieldData, tzRounding);
5348
else
5449
_collector = new SlicedCollector(keyFieldData, valueFieldData, sliceFieldData, tzRounding);
55-
else if(_sliceFieldData == null)
56-
if(_valueFieldData == null)
50+
else if(sliceFieldData == null)
51+
if(valueFieldData == null)
5752
_collector = new DistinctCollector(keyFieldData, distinctFieldData, tzRounding, exactThreshold);
5853
else
5954
throw new FacetPhaseExecutionException("unknown date_facet", "Can't use distinct_field and value_field together");
60-
else if(_valueFieldData == null)
55+
else if(valueFieldData == null)
6156
_collector = new SlicedDistinctCollector(keyFieldData, sliceFieldData, distinctFieldData, tzRounding, exactThreshold);
6257
else
6358
throw new FacetPhaseExecutionException("unknown date_facet", "Can't use distinct_field and value_field together");

0 commit comments

Comments
 (0)