From 9ff8e4fbac882f73b0f3642ffafc1f9282a4d405 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 11 Aug 2020 09:28:21 +0000
Subject: [PATCH 01/73] Bump luceneVersion from 5.3.0 to 8.6.0

Bumps `luceneVersion` from 5.3.0 to 8.6.0.

Updates `lucene-core` from 5.3.0 to 8.6.0

Updates `lucene-queryparser` from 5.3.0 to 8.6.0

Updates `lucene-analyzers-common` from 5.3.0 to 8.6.0

Updates `lucene-join` from 5.3.0 to 8.6.0

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 mode change 100755 => 100644 pom.xml
diff --git a/pom.xml b/pom.xml
old mode 100755
new mode 100644
index 61a4eed..693a538
--- a/pom.xml
+++ b/pom.xml
@@ -14,7 +14,7 @@
 		<repoUrl>https://repo.icatproject.org/repo</repoUrl>
 		<project.scm.id>github</project.scm.id>
 		<gitUrl>https://github.com/icatproject/icat.lucene</gitUrl>
-		<luceneVersion>5.3.0</luceneVersion>
+		<luceneVersion>8.6.0</luceneVersion>
 	</properties>
 
 	<repositories>

From 48433e59bc16de0ae75630552f7075b2884d68ee Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Tue, 11 Jan 2022 04:53:04 +0000
Subject: [PATCH 02/73] Update imports and replace deprecated functionality

---
 .../org/icatproject/lucene/IcatAnalyzer.java  | 21 ++++++++++----
 .../java/org/icatproject/lucene/Lucene.java   | 29 ++++++++++---------
 2 files changed, 32 insertions(+), 18 deletions(-)
 mode change 100644 => 100755 src/main/java/org/icatproject/lucene/IcatAnalyzer.java
 mode change 100644 => 100755 src/main/java/org/icatproject/lucene/Lucene.java

diff --git a/src/main/java/org/icatproject/lucene/IcatAnalyzer.java b/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
old mode 100644
new mode 100755
index cb6767e..fcae1c9
--- a/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
+++ b/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
@@ -4,22 +4,33 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
 import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.apache.lucene.analysis.standard.StandardFilter;
+// import org.apache.lucene.analysis.standard.StandardAnalyzer ;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 
+// public class IcatAnalyzer extends Analyzer {
+
+// 	@Override
+// 	protected TokenStreamComponents createComponents(String fieldName) {
+// 		StandardAnalyzer analyzer = new StandardAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
+// 		Analyzer.TokenStreamComponents stream = analyzer.createComponents(fieldName);
+// 		sink = new EnglishPossessiveFilter(stream.getTokenStream());
+// 		sink = new PorterStemFilter(sink);
+// 		return new TokenStreamComponents(source, sink);
+// 	}
+// }
+
 public class IcatAnalyzer extends Analyzer {
 
 	@Override
 	protected TokenStreamComponents createComponents(String fieldName) {
 		Tokenizer source = new StandardTokenizer();
-		TokenStream sink = new StandardFilter(source);
-		sink = new EnglishPossessiveFilter(sink);
+		TokenStream sink = new EnglishPossessiveFilter(source);
 		sink = new LowerCaseFilter(sink);
-		sink = new StopFilter(sink, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+		sink = new StopFilter(sink, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
 		sink = new PorterStemFilter(sink);
 		return new TokenStreamComponents(source, sink);
 	}
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
old mode 100644
new mode 100755
index 2015323..9c55eca
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -41,9 +41,10 @@
 import javax.ws.rs.core.MediaType;
 
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.DoubleField;
+import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriter;
@@ -57,7 +58,6 @@
 import org.apache.lucene.search.BooleanQuery.Builder;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.NumericRangeQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.SearcherManager;
@@ -85,7 +85,7 @@ enum AttributeName {
 	}
 
 	enum FieldType {
-		TextField, StringField, SortedDocValuesField, DoubleField
+		TextField, StringField, SortedDocValuesField, DoublePoint
 	}
 
 	private class IndexBucket {
@@ -238,7 +238,7 @@ private void add(HttpServletRequest request, String entityName, When when, JsonP
 				long num = parser.getLong();
 				if (fType == FieldType.SortedDocValuesField) {
 					value = Long.toString(num);
-				} else if (fType == FieldType.DoubleField) {
+				} else if (fType == FieldType.DoublePoint) {
 					dvalue = parser.getBigDecimal().doubleValue();
 				} else {
 					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
@@ -262,8 +262,11 @@ private void add(HttpServletRequest request, String entityName, When when, JsonP
 					doc.add(new StringField(name, value, store));
 				} else if (fType == FieldType.SortedDocValuesField) {
 					doc.add(new SortedDocValuesField(name, new BytesRef(value)));
-				} else if (fType == FieldType.DoubleField) {
-					doc.add(new DoubleField(name, dvalue, store));
+				} else if (fType == FieldType.DoublePoint) {
+					doc.add(new DoublePoint(name, dvalue));
+					if (store == Store.YES) {
+						doc.add(new StoredField(name, dvalue));
+					}
 				}
 			} else if (ev == Event.END_ARRAY) {
 				if (id == null) {
@@ -353,7 +356,7 @@ public void commit() throws LuceneException {
 					bucket.indexWriter.commit();
 					if (cached != 0) {
 						logger.debug("Synch has committed {} {} changes to Lucene - now have {} documents indexed",
-								cached, entry.getKey(), bucket.indexWriter.numDocs());
+								cached, entry.getKey(), bucket.indexWriter.getDocStats().numDocs);
 					}
 					bucket.searcherManager.maybeRefreshBlocking();
 				}
@@ -379,10 +382,10 @@ private IndexBucket createBucket(String name) {
 				iwriter.commit();
 				iwriter.deleteDocuments(new Term("dummy", "dummy"));
 				iwriter.commit();
-				logger.debug("Now have " + iwriter.numDocs() + " documents indexed");
+				logger.debug("Now have " + iwriter.getDocStats().numDocs + " documents indexed");
 			}
 			bucket.indexWriter = iwriter;
-			bucket.searcherManager = new SearcherManager(iwriter, false, null);
+			bucket.searcherManager = new SearcherManager(iwriter, false, false, null);
 			logger.debug("Bucket for {} is now ready", name);
 			return bucket;
 		} catch (Throwable e) {
@@ -791,7 +794,7 @@ private String luceneSearchResult(String name, Search search, int maxResults, Lo
 		TopDocs topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults)
 				: isearcher.searchAfter(search.lastDoc, search.query, maxResults);
 		ScoreDoc[] hits = topDocs.scoreDocs;
-		logger.debug("Hits " + topDocs.totalHits + " maxscore " + topDocs.getMaxScore());
+		logger.debug("Hits " + topDocs.totalHits + " maxscore " + topDocs.scoreDocs[0].score);
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
 			gen.writeStartObject();
@@ -850,8 +853,8 @@ private Builder parseParameter(JsonValue p) {
 					new BytesRef(pUpperDateValue), true, true), Occur.MUST);
 
 		} else if (pLowerNumericValue != null && pUpperNumericValue != null) {
-			paramQuery.add(NumericRangeQuery.newDoubleRange("numericValue", pLowerNumericValue, pUpperNumericValue,
-					true, true), Occur.MUST);
+			paramQuery.add(DoublePoint.newRangeQuery("numericValue", pLowerNumericValue, pUpperNumericValue),
+					Occur.MUST);
 		}
 		return paramQuery;
 	}
@@ -870,7 +873,7 @@ public void unlock(@PathParam("entityName") String entityName) throws LuceneExce
 			bucket.indexWriter.commit();
 			if (cached != 0) {
 				logger.debug("Unlock has committed {} {} changes to Lucene - now have {} documents indexed", cached,
-						entityName, bucket.indexWriter.numDocs());
+						entityName, bucket.indexWriter.getDocStats().numDocs);
 			}
 			bucket.searcherManager.maybeRefreshBlocking();
 		} catch (IOException e) {

From 60b659b7874b444e40c748bf5d5c844ceb4e5045 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 12 Jan 2022 17:14:30 +0000
Subject: [PATCH 03/73] Enable basic sorted set facets #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 491 ++++++++++++------
 src/test/java/icat/lucene/TestLucene.java     | 123 +++++
 2 files changed, 445 insertions(+), 169 deletions(-)
 mode change 100644 => 100755 src/test/java/icat/lucene/TestLucene.java

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 11b1d5b..17f48f4 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -8,6 +8,7 @@
 import java.nio.file.Files;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Timer;
@@ -47,9 +48,20 @@
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.facet.Facets;
+import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.facet.LabelAndValue;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
+import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.ReaderManager;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
 import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
 import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
 import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
@@ -85,18 +97,20 @@ enum AttributeName {
 	}
 
 	enum FieldType {
-		TextField, StringField, SortedDocValuesField, DoublePoint
+		TextField, StringField, SortedDocValuesField, DoublePoint, SortedSetDocValuesFacetField,
 	}
 
 	private class IndexBucket {
 		private FSDirectory directory;
 		private IndexWriter indexWriter;
+		private ReaderManager readerManager;
 		private SearcherManager searcherManager;
 		private AtomicBoolean locked = new AtomicBoolean();
 	}
 
 	public class Search {
-		public Map<String, IndexSearcher> map;
+		public Map<String, DirectoryReader> readerMap;
+		public Map<String, IndexSearcher> searcherMap;
 		public Query query;
 		public ScoreDoc lastDoc;
 	}
@@ -109,6 +123,8 @@ enum When {
 
 	private static final Marker fatal = MarkerFactory.getMarker("FATAL");
 
+	private final FacetsConfig facetsConfig = new FacetsConfig();
+
 	private java.nio.file.Path luceneDirectory;
 
 	private int luceneCommitMillis;
@@ -262,6 +278,8 @@ private void add(HttpServletRequest request, String entityName, When when, JsonP
 					doc.add(new StringField(name, value, store));
 				} else if (fType == FieldType.SortedDocValuesField) {
 					doc.add(new SortedDocValuesField(name, new BytesRef(value)));
+				} else if (fType == FieldType.SortedSetDocValuesFacetField) {
+					doc.add(new SortedSetDocValuesFacetField(name, value));
 				} else if (fType == FieldType.DoublePoint) {
 					doc.add(new DoublePoint(name, dvalue));
 					if (store == Store.YES) {
@@ -274,13 +292,13 @@ private void add(HttpServletRequest request, String entityName, When when, JsonP
 						throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 								"Lucene locked for " + entityName);
 					}
-					bucket.indexWriter.addDocument(doc);
+					bucket.indexWriter.addDocument(facetsConfig.build(doc));
 				} else {
 					if (bucket.locked.get()) {
 						throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 								"Lucene locked for " + entityName);
 					}
-					bucket.indexWriter.updateDocument(new Term("id", id.toString()), doc);
+					bucket.indexWriter.updateDocument(new Term("id", id.toString()), facetsConfig.build(doc));
 				}
 				return;
 			} else {
@@ -358,6 +376,7 @@ public void commit() throws LuceneException {
 						logger.debug("Synch has committed {} {} changes to Lucene - now have {} documents indexed",
 								cached, entry.getKey(), bucket.indexWriter.getDocStats().numDocs);
 					}
+					bucket.readerManager.maybeRefreshBlocking();
 					bucket.searcherManager.maybeRefreshBlocking();
 				}
 			}
@@ -385,6 +404,7 @@ private IndexBucket createBucket(String name) {
 				logger.debug("Now have " + iwriter.getDocStats().numDocs + " documents indexed");
 			}
 			bucket.indexWriter = iwriter;
+			bucket.readerManager = new ReaderManager(iwriter, false, false);
 			bucket.searcherManager = new SearcherManager(iwriter, false, false, null);
 			logger.debug("Bucket for {} is now ready", name);
 			return bucket;
@@ -404,56 +424,7 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("maxRes
 		Long uid = null;
 		try {
 			uid = bucketNum.getAndIncrement();
-			Search search = new Search();
-			searches.put(uid, search);
-			Map<String, IndexSearcher> map = new HashMap<>();
-			search.map = map;
-
-			try (JsonReader r = Json.createReader(request.getInputStream())) {
-				JsonObject o = r.readObject();
-				String userName = o.getString("user", null);
-
-				BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
-
-				if (userName != null) {
-					Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
-							new TermQuery(new Term("name", userName)), getSearcher(map, "InvestigationUser"),
-							ScoreMode.None);
-
-					Query invQuery = JoinUtil.createJoinQuery("id", false, "investigation", iuQuery,
-							getSearcher(map, "Investigation"), ScoreMode.None);
-
-					Query dsQuery = JoinUtil.createJoinQuery("id", false, "dataset", invQuery,
-							getSearcher(map, "Dataset"), ScoreMode.None);
-
-					theQuery.add(dsQuery, Occur.MUST);
-				}
-
-				String text = o.getString("text", null);
-				if (text != null) {
-					theQuery.add(parser.parse(text, "text"), Occur.MUST);
-				}
-
-				String lower = o.getString("lower", null);
-				String upper = o.getString("upper", null);
-				if (lower != null && upper != null) {
-					theQuery.add(new TermRangeQuery("date", new BytesRef(lower), new BytesRef(upper), true, true),
-							Occur.MUST);
-				}
-
-				if (o.containsKey("params")) {
-					JsonArray params = o.getJsonArray("params");
-					IndexSearcher datafileParameterSearcher = getSearcher(map, "DatafileParameter");
-					for (JsonValue p : params) {
-						BooleanQuery.Builder paramQuery = parseParameter(p);
-						Query toQuery = JoinUtil.createJoinQuery("datafile", false, "id", paramQuery.build(),
-								datafileParameterSearcher, ScoreMode.None);
-						theQuery.add(toQuery, Occur.MUST);
-					}
-				}
-				search.query = maybeEmptyQuery(theQuery);
-			}
-
+			Search search = datafilesQuery(request, uid);
 			return luceneSearchResult("Datafile", search, maxResults, uid);
 		} catch (Exception e) {
 			logger.error("Error", e);
@@ -483,61 +454,87 @@ public String datafilesAfter(@PathParam("uid") long uid, @QueryParam("maxResults
 	@POST
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
-	@Path("datasets")
-	public String datasets(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults)
-			throws LuceneException {
-
+	@Path("datafiles/facet")
+	public String datafilesFacet(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults,
+			@QueryParam("maxLabels") int maxLabels) throws LuceneException {
 		Long uid = null;
 		try {
 			uid = bucketNum.getAndIncrement();
-			Search search = new Search();
-			searches.put(uid, search);
-			Map<String, IndexSearcher> map = new HashMap<>();
-			search.map = map;
-			try (JsonReader r = Json.createReader(request.getInputStream())) {
-				JsonObject o = r.readObject();
-				String userName = o.getString("user", null);
+			Search search = datafilesQuery(request, uid);
+			return luceneFacetResult("Datafile", search, maxResults, maxLabels, uid);
+		} catch (Exception e) {
+			logger.error("Error", e);
+			freeSearcher(uid);
+			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+		}
 
-				BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
+	}
 
-				if (userName != null) {
+	private Search datafilesQuery(HttpServletRequest request, Long uid) throws IOException, QueryNodeException {
+		Search search = new Search();
+		searches.put(uid, search);
+		Map<String, IndexSearcher> searcherMap = new HashMap<>();
+		Map<String, DirectoryReader> readerMap = new HashMap<>();
+		search.searcherMap = searcherMap;
+		search.readerMap = readerMap;
+		try (JsonReader r = Json.createReader(request.getInputStream())) {
+			JsonObject o = r.readObject();
+			String userName = o.getString("user", null);
 
-					Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
-							new TermQuery(new Term("name", userName)), getSearcher(map, "InvestigationUser"),
-							ScoreMode.None);
+			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
-					Query invQuery = JoinUtil.createJoinQuery("id", false, "investigation", iuQuery,
-							getSearcher(map, "Investigation"), ScoreMode.None);
+			if (userName != null) {
+				Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
+						new TermQuery(new Term("name", userName)), getSearcher(searcherMap, "InvestigationUser"),
+						ScoreMode.None);
 
-					theQuery.add(invQuery, Occur.MUST);
-				}
+				Query invQuery = JoinUtil.createJoinQuery("id", false, "investigation", iuQuery,
+						getSearcher(searcherMap, "Investigation"), ScoreMode.None);
 
-				String text = o.getString("text", null);
-				if (text != null) {
-					theQuery.add(parser.parse(text, "text"), Occur.MUST);
-				}
+				Query dsQuery = JoinUtil.createJoinQuery("id", false, "dataset", invQuery,
+						getSearcher(searcherMap, "Dataset"), ScoreMode.None);
 
-				String lower = o.getString("lower", null);
-				String upper = o.getString("upper", null);
-				if (lower != null && upper != null) {
-					theQuery.add(new TermRangeQuery("startDate", new BytesRef(lower), new BytesRef(upper), true, true),
-							Occur.MUST);
-					theQuery.add(new TermRangeQuery("endDate", new BytesRef(lower), new BytesRef(upper), true, true),
-							Occur.MUST);
-				}
+				theQuery.add(dsQuery, Occur.MUST);
+			}
 
-				if (o.containsKey("params")) {
-					JsonArray params = o.getJsonArray("params");
-					IndexSearcher datasetParameterSearcher = getSearcher(map, "DatasetParameter");
-					for (JsonValue p : params) {
-						BooleanQuery.Builder paramQuery = parseParameter(p);
-						Query toQuery = JoinUtil.createJoinQuery("dataset", false, "id", paramQuery.build(),
-								datasetParameterSearcher, ScoreMode.None);
-						theQuery.add(toQuery, Occur.MUST);
-					}
+			String text = o.getString("text", null);
+			if (text != null) {
+				theQuery.add(parser.parse(text, "text"), Occur.MUST);
+			}
+
+			String lower = o.getString("lower", null);
+			String upper = o.getString("upper", null);
+			if (lower != null && upper != null) {
+				theQuery.add(new TermRangeQuery("date", new BytesRef(lower), new BytesRef(upper), true, true),
+						Occur.MUST);
+			}
+
+			if (o.containsKey("params")) {
+				JsonArray params = o.getJsonArray("params");
+				IndexSearcher datafileParameterSearcher = getSearcher(searcherMap, "DatafileParameter");
+				for (JsonValue p : params) {
+					BooleanQuery.Builder paramQuery = parseParameter(p);
+					Query toQuery = JoinUtil.createJoinQuery("datafile", false, "id", paramQuery.build(),
+							datafileParameterSearcher, ScoreMode.None);
+					theQuery.add(toQuery, Occur.MUST);
 				}
-				search.query = maybeEmptyQuery(theQuery);
 			}
+			search.query = maybeEmptyQuery(theQuery);
+		}
+		return search;
+	}
+
+	@POST
+	@Consumes(MediaType.APPLICATION_JSON)
+	@Produces(MediaType.APPLICATION_JSON)
+	@Path("datasets")
+	public String datasets(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults)
+			throws LuceneException {
+
+		Long uid = null;
+		try {
+			uid = bucketNum.getAndIncrement();
+			Search search = datasetsQuery(request, uid);
 			return luceneSearchResult("Dataset", search, maxResults, uid);
 		} catch (Exception e) {
 			logger.error("Error", e);
@@ -565,6 +562,79 @@ public String datasetsAfter(@PathParam("uid") long uid, @QueryParam("maxResults"
 		}
 	}
 
+	@POST
+	@Consumes(MediaType.APPLICATION_JSON)
+	@Produces(MediaType.APPLICATION_JSON)
+	@Path("datasets/facet")
+	public String datasetsFacet(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults,
+			@QueryParam("maxLabels") int maxLabels) throws LuceneException {
+		Long uid = null;
+		try {
+			uid = bucketNum.getAndIncrement();
+			Search search = datasetsQuery(request, uid);
+			return luceneFacetResult("Dataset", search, maxResults, maxLabels, uid);
+		} catch (Exception e) {
+			logger.error("Error", e);
+			freeSearcher(uid);
+			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+		}
+
+	}
+
+	private Search datasetsQuery(HttpServletRequest request, Long uid) throws IOException, QueryNodeException {
+		Search search = new Search();
+		searches.put(uid, search);
+		Map<String, IndexSearcher> searcherMap = new HashMap<>();
+		Map<String, DirectoryReader> readerMap = new HashMap<>();
+		search.searcherMap = searcherMap;
+		search.readerMap = readerMap;
+		try (JsonReader r = Json.createReader(request.getInputStream())) {
+			JsonObject o = r.readObject();
+			String userName = o.getString("user", null);
+
+			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
+
+			if (userName != null) {
+
+				Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
+						new TermQuery(new Term("name", userName)), getSearcher(searcherMap, "InvestigationUser"),
+						ScoreMode.None);
+
+				Query invQuery = JoinUtil.createJoinQuery("id", false, "investigation", iuQuery,
+						getSearcher(searcherMap, "Investigation"), ScoreMode.None);
+
+				theQuery.add(invQuery, Occur.MUST);
+			}
+
+			String text = o.getString("text", null);
+			if (text != null) {
+				theQuery.add(parser.parse(text, "text"), Occur.MUST);
+			}
+
+			String lower = o.getString("lower", null);
+			String upper = o.getString("upper", null);
+			if (lower != null && upper != null) {
+				theQuery.add(new TermRangeQuery("startDate", new BytesRef(lower), new BytesRef(upper), true, true),
+						Occur.MUST);
+				theQuery.add(new TermRangeQuery("endDate", new BytesRef(lower), new BytesRef(upper), true, true),
+						Occur.MUST);
+			}
+
+			if (o.containsKey("params")) {
+				JsonArray params = o.getJsonArray("params");
+				IndexSearcher datasetParameterSearcher = getSearcher(searcherMap, "DatasetParameter");
+				for (JsonValue p : params) {
+					BooleanQuery.Builder paramQuery = parseParameter(p);
+					Query toQuery = JoinUtil.createJoinQuery("dataset", false, "id", paramQuery.build(),
+							datasetParameterSearcher, ScoreMode.None);
+					theQuery.add(toQuery, Occur.MUST);
+				}
+			}
+			search.query = maybeEmptyQuery(theQuery);
+		}
+		return search;
+	}
+
 	@PreDestroy
 	private void exit() {
 		logger.info("Closing down icat.lucene");
@@ -576,6 +646,7 @@ private void exit() {
 		try {
 			for (Entry<String, IndexBucket> entry : indexBuckets.entrySet()) {
 				IndexBucket bucket = entry.getValue();
+				bucket.readerManager.close();
 				bucket.searcherManager.close();
 				bucket.indexWriter.commit();
 				bucket.indexWriter.close();
@@ -592,7 +663,18 @@ private void exit() {
 	public void freeSearcher(@PathParam("uid") Long uid) throws LuceneException {
 		if (uid != null) { // May not be set for internal calls
 			logger.debug("Requesting freeSearcher {}", uid);
-			Map<String, IndexSearcher> search = searches.get(uid).map;
+			Map<String, IndexSearcher> search = searches.get(uid).searcherMap;
+			Map<String, DirectoryReader> read = searches.get(uid).readerMap;
+			for (Entry<String, DirectoryReader> entry : read.entrySet()) {
+				String name = entry.getKey();
+				DirectoryReader directoryReader = entry.getValue();
+				ReaderManager manager = indexBuckets.computeIfAbsent(name, k -> createBucket(k)).readerManager;
+				try {
+					manager.release(directoryReader);
+				} catch (IOException e) {
+					throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+				}
+			}
 			for (Entry<String, IndexSearcher> entry : search.entrySet()) {
 				String name = entry.getKey();
 				IndexSearcher isearcher = entry.getValue();
@@ -607,6 +689,16 @@ public void freeSearcher(@PathParam("uid") Long uid) throws LuceneException {
 		}
 	}
 
+	private DirectoryReader getReader(Map<String, DirectoryReader> bucket, String name) throws IOException {
+		DirectoryReader directoryReader = bucket.get(name);
+		if (directoryReader == null) {
+			directoryReader = indexBuckets.computeIfAbsent(name, k -> createBucket(k)).readerManager.acquire();
+			bucket.put(name, directoryReader);
+			logger.debug("Remember searcher for {}", name);
+		}
+		return directoryReader;
+	}
+
 	/*
 	 * Need a new set of IndexSearchers for each search as identified by a uid
 	 */
@@ -672,83 +764,13 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 		Long uid = null;
 		try {
 			uid = bucketNum.getAndIncrement();
-			Search search = new Search();
-			searches.put(uid, search);
-			Map<String, IndexSearcher> map = new HashMap<>();
-			search.map = map;
-			try (JsonReader r = Json.createReader(request.getInputStream())) {
-				JsonObject o = r.readObject();
-				String userName = o.getString("user", null);
-
-				BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
-
-				if (userName != null) {
-					Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
-							new TermQuery(new Term("name", userName)), getSearcher(map, "InvestigationUser"),
-							ScoreMode.None);
-					theQuery.add(iuQuery, Occur.MUST);
-				}
-
-				String text = o.getString("text", null);
-				if (text != null) {
-					theQuery.add(parser.parse(text, "text"), Occur.MUST);
-				}
-
-				String lower = o.getString("lower", null);
-				String upper = o.getString("upper", null);
-				if (lower != null && upper != null) {
-					theQuery.add(new TermRangeQuery("startDate", new BytesRef(lower), new BytesRef(upper), true, true),
-							Occur.MUST);
-					theQuery.add(new TermRangeQuery("endDate", new BytesRef(lower), new BytesRef(upper), true, true),
-							Occur.MUST);
-				}
-
-				if (o.containsKey("params")) {
-					JsonArray params = o.getJsonArray("params");
-					IndexSearcher investigationParameterSearcher = getSearcher(map, "InvestigationParameter");
-
-					for (JsonValue p : params) {
-						BooleanQuery.Builder paramQuery = parseParameter(p);
-						Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", paramQuery.build(),
-								investigationParameterSearcher, ScoreMode.None);
-						theQuery.add(toQuery, Occur.MUST);
-					}
-				}
-
-				if (o.containsKey("samples")) {
-					JsonArray samples = o.getJsonArray("samples");
-					IndexSearcher sampleSearcher = getSearcher(map, "Sample");
-
-					for (JsonValue s : samples) {
-						JsonString sample = (JsonString) s;
-						BooleanQuery.Builder sampleQuery = new BooleanQuery.Builder();
-						sampleQuery.add(parser.parse(sample.getString(), "text"), Occur.MUST);
-						Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", sampleQuery.build(),
-								sampleSearcher, ScoreMode.None);
-						theQuery.add(toQuery, Occur.MUST);
-					}
-				}
-
-				String userFullName = o.getString("userFullName", null);
-				if (userFullName != null) {
-					BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();
-					userFullNameQuery.add(parser.parse(userFullName, "text"), Occur.MUST);
-					IndexSearcher investigationUserSearcher = getSearcher(map, "InvestigationUser");
-					Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", userFullNameQuery.build(),
-							investigationUserSearcher, ScoreMode.None);
-					theQuery.add(toQuery, Occur.MUST);
-				}
-
-				search.query = maybeEmptyQuery(theQuery);
-			}
-			logger.info("Query: {}", search.query);
+			Search search = investigationsQuery(request, uid);
 			return luceneSearchResult("Investigation", search, maxResults, uid);
 		} catch (Exception e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
-
 	}
 
 	@GET
@@ -769,6 +791,100 @@ public String investigationsAfter(@PathParam("uid") long uid, @QueryParam("maxRe
 		}
 	}
 
+	@POST
+	@Consumes(MediaType.APPLICATION_JSON)
+	@Produces(MediaType.APPLICATION_JSON)
+	@Path("investigations/facet")
+	public String investigationsFacet(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults,
+			@QueryParam("maxLabels") int maxLabels) throws LuceneException {
+		Long uid = null;
+		try {
+			uid = bucketNum.getAndIncrement();
+			Search search = investigationsQuery(request, uid);
+			return luceneFacetResult("Investigation", search, maxResults, maxLabels, uid);
+		} catch (Exception e) {
+			logger.error("Error", e);
+			freeSearcher(uid);
+			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+		}
+	}
+
+	private Search investigationsQuery(HttpServletRequest request, Long uid) throws IOException, QueryNodeException {
+		Search search = new Search();
+		searches.put(uid, search);
+		Map<String, IndexSearcher> searcherMap = new HashMap<>();
+		Map<String, DirectoryReader> readerMap = new HashMap<>();
+		search.searcherMap = searcherMap;
+		search.readerMap = readerMap;
+		try (JsonReader r = Json.createReader(request.getInputStream())) {
+			JsonObject o = r.readObject();
+			String userName = o.getString("user", null);
+
+			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
+
+			if (userName != null) {
+				Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
+						new TermQuery(new Term("name", userName)), getSearcher(searcherMap, "InvestigationUser"),
+						ScoreMode.None);
+				theQuery.add(iuQuery, Occur.MUST);
+			}
+
+			String text = o.getString("text", null);
+			if (text != null) {
+				theQuery.add(parser.parse(text, "text"), Occur.MUST);
+			}
+
+			String lower = o.getString("lower", null);
+			String upper = o.getString("upper", null);
+			if (lower != null && upper != null) {
+				theQuery.add(new TermRangeQuery("startDate", new BytesRef(lower), new BytesRef(upper), true, true),
+						Occur.MUST);
+				theQuery.add(new TermRangeQuery("endDate", new BytesRef(lower), new BytesRef(upper), true, true),
+						Occur.MUST);
+			}
+
+			if (o.containsKey("params")) {
+				JsonArray params = o.getJsonArray("params");
+				IndexSearcher investigationParameterSearcher = getSearcher(searcherMap, "InvestigationParameter");
+
+				for (JsonValue p : params) {
+					BooleanQuery.Builder paramQuery = parseParameter(p);
+					Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", paramQuery.build(),
+							investigationParameterSearcher, ScoreMode.None);
+					theQuery.add(toQuery, Occur.MUST);
+				}
+			}
+
+			if (o.containsKey("samples")) {
+				JsonArray samples = o.getJsonArray("samples");
+				IndexSearcher sampleSearcher = getSearcher(searcherMap, "Sample");
+
+				for (JsonValue s : samples) {
+					JsonString sample = (JsonString) s;
+					BooleanQuery.Builder sampleQuery = new BooleanQuery.Builder();
+					sampleQuery.add(parser.parse(sample.getString(), "text"), Occur.MUST);
+					Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", sampleQuery.build(),
+							sampleSearcher, ScoreMode.None);
+					theQuery.add(toQuery, Occur.MUST);
+				}
+			}
+
+			String userFullName = o.getString("userFullName", null);
+			if (userFullName != null) {
+				BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();
+				userFullNameQuery.add(parser.parse(userFullName, "text"), Occur.MUST);
+				IndexSearcher investigationUserSearcher = getSearcher(searcherMap, "InvestigationUser");
+				Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", userFullNameQuery.build(),
+						investigationUserSearcher, ScoreMode.None);
+				theQuery.add(toQuery, Occur.MUST);
+			}
+
+			search.query = maybeEmptyQuery(theQuery);
+		}
+		logger.info("Query: {}", search.query);
+		return search;
+	}
+
 	@POST
 	@Path("lock/{entityName}")
 	public void lock(@PathParam("entityName") String entityName) throws LuceneException {
@@ -785,8 +901,44 @@ public void lock(@PathParam("entityName") String entityName) throws LuceneExcept
 		}
 	}
 
+	private String luceneFacetResult(String name, Search search, int maxResults, int maxLabels, Long uid)
+			throws IOException {
+		IndexSearcher isearcher = getSearcher(search.searcherMap, name);
+		DirectoryReader directoryReader = getReader(search.readerMap, name);
+		logger.debug("To facet in {} for {} {} with {} from {} ", name, search.query, maxResults, isearcher,
+				search.lastDoc);
+		DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(directoryReader);
+		FacetsCollector facetsCollector = new FacetsCollector();
+		FacetsCollector.search(isearcher, search.query, maxResults, facetsCollector);
+		Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
+		List<FacetResult> results = facets.getAllDims(maxLabels);
+		logger.debug("Facets found for " + results.size() + " dimensions");
+		ByteArrayOutputStream baos = new ByteArrayOutputStream();
+		try (JsonGenerator gen = Json.createGenerator(baos)) {
+			gen.writeStartObject();
+			if (uid != null) {
+				gen.write("uid", uid);
+			}
+			gen.writeStartArray("facets"); // array of all facet dimensions
+			for (FacetResult result : results) {
+				gen.writeStartArray(result.dim); // array of labelValues for a given dimension
+				for (LabelAndValue labelValue : result.labelValues) {
+					gen.writeStartArray("labelValue"); // 2 element array of label, value
+					gen.write(labelValue.label);
+					gen.write(labelValue.value.longValue());
+					gen.writeEnd(); // array of label, value
+				}
+				gen.writeEnd(); // array of labelValues for a given dimension
+			}
+			gen.writeEnd(); // array of facet dimensions
+			gen.writeEnd(); // object
+		}
+		logger.debug("Json returned {}", baos.toString());
+		return baos.toString();
+	}
+
 	private String luceneSearchResult(String name, Search search, int maxResults, Long uid) throws IOException {
-		IndexSearcher isearcher = getSearcher(search.map, name);
+		IndexSearcher isearcher = getSearcher(search.searcherMap, name);
 		logger.debug("To search in {} for {} {} with {} from {} ", name, search.query, maxResults, isearcher,
 				search.lastDoc);
 		TopDocs topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults)
@@ -874,6 +1026,7 @@ public void unlock(@PathParam("entityName") String entityName) throws LuceneExce
 						entityName, bucket.indexWriter.getDocStats().numDocs);
 			}
 			bucket.searcherManager.maybeRefreshBlocking();
+			bucket.readerManager.maybeRefreshBlocking();
 		} catch (IOException e) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
diff --git a/src/test/java/icat/lucene/TestLucene.java b/src/test/java/icat/lucene/TestLucene.java
old mode 100644
new mode 100755
index f5cd493..9624f52
--- a/src/test/java/icat/lucene/TestLucene.java
+++ b/src/test/java/icat/lucene/TestLucene.java
@@ -9,13 +9,23 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.facet.Facets;
+import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.facet.LabelAndValue;
+import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
@@ -45,6 +55,8 @@ public class TestLucene {
 
 	static final int scale = (int) 1.0e5;
 
+	private final FacetsConfig facetsConfig = new FacetsConfig();
+
 	@Test
 	public void testIcatAnalyzer() throws Exception {
 		final String text = "This is a demo   of the 1st (or is it number 2) all singing and dancing TokenStream's API with added aardvarks";
@@ -171,6 +183,57 @@ public void testJoins() throws Exception {
 		System.out.println("Join tests took " + (System.currentTimeMillis() - start) + "ms");
 	}
 
+	@Test
+	public void testFacets() throws Exception {
+		Analyzer analyzer = new IcatAnalyzer();
+		IndexWriterConfig config;
+
+		Path tmpLuceneDir = Files.createTempDirectory("lucene");
+		FSDirectory investigationDirectory = FSDirectory.open(tmpLuceneDir.resolve("Investigation"));
+		config = new IndexWriterConfig(analyzer);
+		config.setOpenMode(OpenMode.CREATE);
+		IndexWriter investigationWriter = new IndexWriter(investigationDirectory, config);
+
+		// Add investigations with parameter and sample Facets
+		addFacetedInvestigation(investigationWriter, "inv1", 101, "parameter1", "sample1");
+		addFacetedInvestigation(investigationWriter, "inv2", 102, "parameter2", "sample2");
+
+		// Add investigations with only the parameter Facet
+		for (int i = 0; i < scale; i++) {
+			addFacetedInvestigation(investigationWriter, "extra" + i, 500 + i, "parameter0");
+		}
+
+		investigationWriter.close();
+
+		DirectoryReader directoryReader =  DirectoryReader.open(investigationDirectory);
+		IndexSearcher investigationSearcher = new IndexSearcher(directoryReader);
+		StandardQueryParser parser = new StandardQueryParser();
+		StandardQueryConfigHandler qpConf = (StandardQueryConfigHandler) parser.getQueryConfigHandler();
+		qpConf.set(ConfigurationKeys.ANALYZER, analyzer);
+		qpConf.set(ConfigurationKeys.ALLOW_LEADING_WILDCARD, true);
+		Map<String, Number> labelValuesParameter = new HashMap<>();
+		Map<String, Number> labelValuesSample = new HashMap<>();
+
+		long start = System.currentTimeMillis();
+
+		// Get Facets that are relevant for "inv1"
+		labelValuesParameter.put("parameter1", 1);
+		labelValuesSample.put("sample1", 1);
+		checkFacets(labelValuesParameter, labelValuesSample, "inv1", investigationSearcher, directoryReader, parser);
+		
+		// Get Facets that are relevant for "inv*"
+		labelValuesParameter.put("parameter2", 1);
+		labelValuesSample.put("sample2", 1);
+		checkFacets(labelValuesParameter, labelValuesSample, "inv*", investigationSearcher, directoryReader, parser);
+		
+		// Get all Facets for "*"
+		labelValuesParameter.put("parameter0", scale);
+		checkFacets(labelValuesParameter, labelValuesSample, "*", investigationSearcher, directoryReader, parser);
+
+		System.out.println("Facet tests took " + (System.currentTimeMillis() - start) + "ms");
+	}
+
+
 	private void checkDatafiles(List<Integer> dnums, String fname, String uname, IndexSearcher investigationSearcher,
 			IndexSearcher investigationUserSearcher, IndexSearcher datasetSearcher, IndexSearcher datafileSearcher,
 			StandardQueryParser parser) throws IOException, QueryNodeException {
@@ -253,6 +316,20 @@ private ScoreDoc[] get(String iname, String uname, IndexSearcher investigationSe
 
 	}
 
+	/* Facets */
+	private Facets get(String iname, IndexSearcher investigationSearcher, DirectoryReader directoryReader,
+			StandardQueryParser parser) throws QueryNodeException, IOException {
+		BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
+		if (iname != null) {
+			theQuery.add(parser.parse(iname, "name"), Occur.MUST);
+		}
+		DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(directoryReader);
+		FacetsCollector facetsCollector = new FacetsCollector();
+		FacetsCollector.search(investigationSearcher, theQuery.build(), 50, facetsCollector);
+		Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
+		return facets;
+	}
+
 	private void checkDatasets(List<Integer> dnums, String sname, String uname, IndexSearcher investigationSearcher,
 			IndexSearcher investigationUserSearcher, IndexSearcher datasetSearcher, StandardQueryParser parser)
 			throws IOException, QueryNodeException {
@@ -265,6 +342,31 @@ private void checkDatasets(List<Integer> dnums, String sname, String uname, Inde
 
 	}
 
+	private void checkFacets(Map<String, Number> labelValuesParameter, Map<String, Number> labelValuesSample,
+			String iname, IndexSearcher investigationSearcher, DirectoryReader directoryReader,
+			StandardQueryParser parser) throws QueryNodeException, IOException {
+		Facets facets = get(iname, investigationSearcher, directoryReader, parser);
+		List<FacetResult> results = facets.getAllDims(50);
+		if (labelValuesParameter.size() > 0) {
+			FacetResult parameterResult = results.remove(0);
+			assertEquals("Dimension", "parameter", parameterResult.dim);
+			assertEquals("Length", labelValuesParameter.size(), parameterResult.labelValues.length);
+			for (LabelAndValue labelValue : parameterResult.labelValues) {
+				assertTrue("Label", labelValuesParameter.containsKey(labelValue.label));
+				assertEquals("Value", labelValuesParameter.get(labelValue.label), labelValue.value);
+			}
+		}
+		if (labelValuesSample.size() > 0) {
+			FacetResult sampleResult = results.remove(0);
+			assertEquals("Dimension", "sample", sampleResult.dim);
+			assertEquals("Length", labelValuesSample.size(), sampleResult.labelValues.length);
+			for (LabelAndValue labelValue : sampleResult.labelValues) {
+				assertTrue("Label", labelValuesSample.containsKey(labelValue.label));
+				assertEquals("Value", labelValuesSample.get(labelValue.label), labelValue.value);
+			}
+		}
+	}
+
 	private void checkInvestigations(List<Integer> dnums, String iname, String uname,
 			IndexSearcher investigationSearcher, IndexSearcher investigationUserSearcher, StandardQueryParser parser)
 			throws QueryNodeException, IOException {
@@ -285,6 +387,27 @@ private void addInvestigation(IndexWriter iwriter, String name, long iNum) throw
 		iwriter.addDocument(doc);
 	}
 
+	private void addFacetedInvestigation(IndexWriter iwriter, String name, long iNum, String parameterValue,
+			String sampleValue) throws IOException {
+		Document doc = new Document();
+		doc.add(new StringField("name", name, Store.NO));
+		doc.add(new SortedDocValuesField("id", new BytesRef(Long.toString(iNum))));
+		doc.add(new StringField("id", Long.toString(iNum), Store.YES));
+		doc.add(new SortedSetDocValuesFacetField("parameter", parameterValue));
+		doc.add(new SortedSetDocValuesFacetField("sample", sampleValue));
+		iwriter.addDocument(facetsConfig.build(doc));
+	}
+
+	private void addFacetedInvestigation(IndexWriter iwriter, String name, long iNum, String parameterValue)
+			throws IOException {
+		Document doc = new Document();
+		doc.add(new StringField("name", name, Store.NO));
+		doc.add(new SortedDocValuesField("id", new BytesRef(Long.toString(iNum))));
+		doc.add(new StringField("id", Long.toString(iNum), Store.YES));
+		doc.add(new SortedSetDocValuesFacetField("parameter", parameterValue));
+		iwriter.addDocument(facetsConfig.build(doc));
+	}
+
 	private void addInvestigationUser(IndexWriter iwriter, String name, long iNum) throws IOException {
 		Document doc = new Document();
 		doc.add(new StringField("name", name, Store.NO));

From f3b1dff919ec668bc6ac18801be4f731a0d2ce65 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 12 Jan 2022 17:28:54 +0000
Subject: [PATCH 04/73] Update pom.xml with Facets #19

---
 pom.xml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)
 mode change 100644 => 100755 pom.xml

diff --git a/pom.xml b/pom.xml
old mode 100644
new mode 100755
index a0b5bad..d6ec96c
--- a/pom.xml
+++ b/pom.xml
@@ -86,6 +86,12 @@
 			<version>${luceneVersion}</version>
 		</dependency>
 
+		<dependency>
+			<groupId>org.apache.lucene</groupId>
+			<artifactId>lucene-facet</artifactId>
+			<version>${luceneVersion}</version>
+		</dependency>
+
 		<dependency>
 			<groupId>javax</groupId>
 			<artifactId>javaee-api</artifactId>
@@ -327,6 +333,3 @@
 
 	<description>Exposes lucene calls to an icat server</description>
 </project>
-
-
-

From 1229a9a214fc5824dec995f96c0611b5bb4aaf46 Mon Sep 17 00:00:00 2001
From: Stuart Pullinger <stuartpullinger@gmail.com>
Date: Fri, 5 Jun 2020 11:15:57 +0000
Subject: [PATCH 05/73] Query on datafile date property. Fixes #8

---
 src/main/java/org/icatproject/lucene/Lucene.java | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 9c55eca..11b1d5b 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -437,9 +437,7 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("maxRes
 				String lower = o.getString("lower", null);
 				String upper = o.getString("upper", null);
 				if (lower != null && upper != null) {
-					theQuery.add(new TermRangeQuery("startDate", new BytesRef(lower), new BytesRef(upper), true, true),
-							Occur.MUST);
-					theQuery.add(new TermRangeQuery("endDate", new BytesRef(lower), new BytesRef(upper), true, true),
+					theQuery.add(new TermRangeQuery("date", new BytesRef(lower), new BytesRef(upper), true, true),
 							Occur.MUST);
 				}
 

From 290ad81bae3bd9b991791090cad5e13d98c46dfe Mon Sep 17 00:00:00 2001
From: Matthew Richards <matthew.richards@stfc.ac.uk>
Date: Tue, 24 Aug 2021 09:13:13 +0000
Subject: [PATCH 06/73] Update release notes for 1.1.1 release

---
 src/site/xhtml/release-notes.xhtml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/site/xhtml/release-notes.xhtml b/src/site/xhtml/release-notes.xhtml
index c9246d4..5ffafa2 100644
--- a/src/site/xhtml/release-notes.xhtml
+++ b/src/site/xhtml/release-notes.xhtml
@@ -6,6 +6,9 @@
 
 	<h1>ICAT Lucene Server Release Notes</h1>
 
+	<h2>1.1.1</h2>
+	<p>Fixes date queries on datafiles</p>
+
 	<h2>1.1.0</h2>
 	<p>Make it work with icat.server 4.9.1 and bug fixes</p>
 	<ul>

From d80515e5bfaa022d429ac87ce2d293453a60245d Mon Sep 17 00:00:00 2001
From: Matthew Richards <matthew.richards@stfc.ac.uk>
Date: Tue, 24 Aug 2021 09:36:55 +0000
Subject: [PATCH 07/73] [maven-release-plugin] prepare release v1.1.1

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 693a538..56c5141 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
 
 	<groupId>org.icatproject</groupId>
 	<artifactId>icat.lucene</artifactId>
-	<version>1.1.1-SNAPSHOT</version>
+	<version>1.1.1</version>
 	<packaging>war</packaging>
 	<name>ICAT Lucene</name>
 
@@ -28,7 +28,7 @@
 		<connection>scm:git:${gitUrl}.git</connection>
 		<developerConnection>scm:git:${gitUrl}.git</developerConnection>
 		<url>${gitUrl}</url>
-		<tag>HEAD</tag>
+		<tag>v1.1.1</tag>
 	</scm>
 
 	<issueManagement>

From 7bf345959e83b9154d5479caedbb08445be21a26 Mon Sep 17 00:00:00 2001
From: Matthew Richards <matthew.richards@stfc.ac.uk>
Date: Tue, 24 Aug 2021 09:43:12 +0000
Subject: [PATCH 08/73] [maven-release-plugin] prepare for next development
 iteration

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 56c5141..1eab5ad 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
 
 	<groupId>org.icatproject</groupId>
 	<artifactId>icat.lucene</artifactId>
-	<version>1.1.1</version>
+	<version>1.1.2-SNAPSHOT</version>
 	<packaging>war</packaging>
 	<name>ICAT Lucene</name>
 
@@ -28,7 +28,7 @@
 		<connection>scm:git:${gitUrl}.git</connection>
 		<developerConnection>scm:git:${gitUrl}.git</developerConnection>
 		<url>${gitUrl}</url>
-		<tag>v1.1.1</tag>
+		<tag>HEAD</tag>
 	</scm>
 
 	<issueManagement>

From a44db6587943c697148ca0fb785ff63638524c42 Mon Sep 17 00:00:00 2001
From: Stuart Pullinger <stuartpullinger@gmail.com>
Date: Fri, 27 Aug 2021 11:29:52 +0000
Subject: [PATCH 09/73] [maven-release-plugin] prepare release v1.1.1

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 1eab5ad..56c5141 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
 
 	<groupId>org.icatproject</groupId>
 	<artifactId>icat.lucene</artifactId>
-	<version>1.1.2-SNAPSHOT</version>
+	<version>1.1.1</version>
 	<packaging>war</packaging>
 	<name>ICAT Lucene</name>
 
@@ -28,7 +28,7 @@
 		<connection>scm:git:${gitUrl}.git</connection>
 		<developerConnection>scm:git:${gitUrl}.git</developerConnection>
 		<url>${gitUrl}</url>
-		<tag>HEAD</tag>
+		<tag>v1.1.1</tag>
 	</scm>
 
 	<issueManagement>

From 32f9fbeee24278a6300e18a2656c49f2dadb11c2 Mon Sep 17 00:00:00 2001
From: Stuart Pullinger <stuartpullinger@gmail.com>
Date: Tue, 14 Jan 2020 12:00:13 +0000
Subject: [PATCH 10/73] Converted setup to python 3

---
 src/main/scripts/setup | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/scripts/setup b/src/main/scripts/setup
index 6371b9f..b0f7f0c 100755
--- a/src/main/scripts/setup
+++ b/src/main/scripts/setup
@@ -27,14 +27,14 @@ if arg == "INSTALL":
         ovfiles = [[prop_name, "WEB-INF/classes"]]
         if os.path.exists("logback.xml"): ovfiles.append(["logback.xml", "WEB-INF/classes"])
         actions.deploy(deploymentorder=80, files=ovfiles)
-    except Exception, e:
+    except Exception as e:
         abort(str(e))
                 
 if arg == "UNINSTALL":        
 
     try:
         uninstall()
-    except Exception, e:
+    except Exception as e:
         abort(str(e))       
     
             

From b902385ceba087bb9108b17ad00b627c3a319b85 Mon Sep 17 00:00:00 2001
From: Matthew Richards <matthew.richards@stfc.ac.uk>
Date: Thu, 16 Sep 2021 09:17:00 +0000
Subject: [PATCH 11/73] Update icat.utils version

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 56c5141..abfc732 100644
--- a/pom.xml
+++ b/pom.xml
@@ -95,7 +95,7 @@
 		<dependency>
 			<groupId>org.icatproject</groupId>
 			<artifactId>icat.utils</artifactId>
-			<version>4.16.0</version>
+			<version>4.16.1</version>
 		</dependency>
 
 		<dependency>

From 4416be4dfc6752ea877fdd1549631409348e37a3 Mon Sep 17 00:00:00 2001
From: Matthew Richards <matthew.richards@stfc.ac.uk>
Date: Thu, 16 Sep 2021 09:25:18 +0000
Subject: [PATCH 12/73] Update version and release notes

---
 pom.xml                            | 2 +-
 src/site/xhtml/release-notes.xhtml | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index abfc732..4d0f4cd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
 
 	<groupId>org.icatproject</groupId>
 	<artifactId>icat.lucene</artifactId>
-	<version>1.1.1</version>
+	<version>1.1.2</version>
 	<packaging>war</packaging>
 	<name>ICAT Lucene</name>
 
diff --git a/src/site/xhtml/release-notes.xhtml b/src/site/xhtml/release-notes.xhtml
index 5ffafa2..a8e1b76 100644
--- a/src/site/xhtml/release-notes.xhtml
+++ b/src/site/xhtml/release-notes.xhtml
@@ -5,6 +5,8 @@
 <body>
 
 	<h1>ICAT Lucene Server Release Notes</h1>
+	<h2>1.1.2</h2>
+	<p>Changes to support Python 3. Now works on Python 2.7 and Python 3. Note: support for Python 2.6 is now dropped.</p>
 
 	<h2>1.1.1</h2>
 	<p>Fixes date queries on datafiles</p>

From 8a36fb0a4d88b6b8efeea6b392b53d6244b6d5ea Mon Sep 17 00:00:00 2001
From: Matthew Richards <matthew.richards@stfc.ac.uk>
Date: Thu, 16 Sep 2021 09:32:31 +0000
Subject: [PATCH 13/73] Add snapshot to version

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 4d0f4cd..f907525 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
 
 	<groupId>org.icatproject</groupId>
 	<artifactId>icat.lucene</artifactId>
-	<version>1.1.2</version>
+	<version>1.1.2-SNAPSHOT</version>
 	<packaging>war</packaging>
 	<name>ICAT Lucene</name>
 

From f0be663477de33791daa3238593aa583c3acb7de Mon Sep 17 00:00:00 2001
From: Matthew Richards <matthew.richards@stfc.ac.uk>
Date: Thu, 16 Sep 2021 09:49:18 +0000
Subject: [PATCH 14/73] [maven-release-plugin] prepare release v1.1.2

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index f907525..a0b5bad 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
 
 	<groupId>org.icatproject</groupId>
 	<artifactId>icat.lucene</artifactId>
-	<version>1.1.2-SNAPSHOT</version>
+	<version>1.1.2</version>
 	<packaging>war</packaging>
 	<name>ICAT Lucene</name>
 
@@ -28,7 +28,7 @@
 		<connection>scm:git:${gitUrl}.git</connection>
 		<developerConnection>scm:git:${gitUrl}.git</developerConnection>
 		<url>${gitUrl}</url>
-		<tag>v1.1.1</tag>
+		<tag>v1.1.2</tag>
 	</scm>
 
 	<issueManagement>

From 0ea77096c011c5147018028a0235ab788bf97987 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Tue, 11 Jan 2022 07:11:22 +0000
Subject: [PATCH 15/73] Replace travis.yml with ci-build.yml #13

---
 .github/workflows/ci-build.yml | 29 +++++++++++++++++++++++++++++
 .travis.yml                    | 16 ----------------
 2 files changed, 29 insertions(+), 16 deletions(-)
 create mode 100755 .github/workflows/ci-build.yml
 delete mode 100644 .travis.yml

diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml
new file mode 100755
index 0000000..c5121a5
--- /dev/null
+++ b/.github/workflows/ci-build.yml
@@ -0,0 +1,29 @@
+name: CI Build
+on:
+  workflow_dispatch:
+  pull_request:
+  push:
+    branches:
+      - master
+
+strategy:
+  fail-fast: false
+  matrix:
+    version: [8]
+    experimental: [false]
+    include:
+      - version: 11
+        experimental: true
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up JDK
+        uses: actions/setup-java@v2
+        with:
+          java-version: ${{ matrix.version }}
+          distribution: 'open'
+      - name: Build with Maven
+        run: mvn install
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 3ba1242..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,16 +0,0 @@
-language: java
-jdk:
-  - openjdk8
-  - openjdk11
-
-jobs:
-  allow_failures:
-    - jdk: openjdk11
-
-dist: xenial
-
-cache:
-  directories:
-    - $HOME/.m2
-
-install: true

From df3f18a9cbbb69394b238152c8c00f8c9b6fdc39 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Tue, 11 Jan 2022 07:16:43 +0000
Subject: [PATCH 16/73] Update CI status badge for GHA #13

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 mode change 100644 => 100755 README.md

diff --git a/README.md b/README.md
old mode 100644
new mode 100755
index 38eba26..b3357b2
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # icat.lucene
 
-[![Build Status](https://travis-ci.org/icatproject/icat.lucene.svg?branch=master)](https://travis-ci.org/icatproject/icat.lucene)
+[![Build Status](https://github.com/icatproject/icat.lucene/workflows/CI%20Build/badge.svg?branch=master)](https://github.com/icatproject/icat.lucene/actions?query=workflow%3A%22CI+Build%22)
 
 General installation instructions are at http://www.icatproject.org/installation/component
 

From 093a0ff69151d1c39aa2cd0fed12b8b4c4a98372 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Tue, 11 Jan 2022 07:33:26 +0000
Subject: [PATCH 17/73] Move strategy matrix inside build #13

---
 .github/workflows/ci-build.yml | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml
index c5121a5..52dca67 100755
--- a/.github/workflows/ci-build.yml
+++ b/.github/workflows/ci-build.yml
@@ -6,18 +6,20 @@ on:
     branches:
       - master
 
-strategy:
-  fail-fast: false
-  matrix:
-    version: [8]
-    experimental: [false]
-    include:
-      - version: 11
-        experimental: true
-
 jobs:
   build:
     runs-on: ubuntu-latest
+    continue-on-error: ${{ matrix.experimental }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - version: 8
+            experimental: false
+        include:
+          - version: 11
+            experimental: true
+
     steps:
       - uses: actions/checkout@v2
       - name: Set up JDK

From e81defda208e0be1e731bb5accdd50b048b053fb Mon Sep 17 00:00:00 2001
From: patrick-austin <61705287+patrick-austin@users.noreply.github.com>
Date: Fri, 21 Jan 2022 17:21:18 +0000
Subject: [PATCH 18/73] Remove redundant inclue #13

---
 .github/workflows/ci-build.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml
index 52dca67..f7023e1 100755
--- a/.github/workflows/ci-build.yml
+++ b/.github/workflows/ci-build.yml
@@ -16,7 +16,6 @@ jobs:
         include:
           - version: 8
             experimental: false
-        include:
           - version: 11
             experimental: true
 

From aec760f84cfb427e39f2cf08d9b43c7d77f86398 Mon Sep 17 00:00:00 2001
From: patrick-austin <61705287+patrick-austin@users.noreply.github.com>
Date: Fri, 21 Jan 2022 17:36:36 +0000
Subject: [PATCH 19/73] Change OpenJDK distribution #13

---
 .github/workflows/ci-build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml
index f7023e1..a30a9da 100755
--- a/.github/workflows/ci-build.yml
+++ b/.github/workflows/ci-build.yml
@@ -25,6 +25,6 @@ jobs:
         uses: actions/setup-java@v2
         with:
           java-version: ${{ matrix.version }}
-          distribution: 'open'
+          distribution: 'temurin'
       - name: Build with Maven
         run: mvn install

From 3a4c301dbdf15350a93c00c4143b6dc4cfd58b02 Mon Sep 17 00:00:00 2001
From: patrick-austin <61705287+patrick-austin@users.noreply.github.com>
Date: Mon, 24 Jan 2022 10:22:35 +0000
Subject: [PATCH 20/73] Change Maven command to "mvn test -B" #13

---
 .github/workflows/ci-build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml
index a30a9da..0f93ef8 100755
--- a/.github/workflows/ci-build.yml
+++ b/.github/workflows/ci-build.yml
@@ -26,5 +26,5 @@ jobs:
         with:
           java-version: ${{ matrix.version }}
           distribution: 'temurin'
-      - name: Build with Maven
-        run: mvn install
+      - name: Test with Maven
+        run: mvn test -B

From b5b5d2d5ea29744441a15b3e05360bec4d8cac88 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 2 Feb 2022 15:30:22 +0000
Subject: [PATCH 21/73] Avoid index error for maxScore

---
 src/main/java/org/icatproject/lucene/Lucene.java | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 11b1d5b..737542b 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -792,7 +792,13 @@ private String luceneSearchResult(String name, Search search, int maxResults, Lo
 		TopDocs topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults)
 				: isearcher.searchAfter(search.lastDoc, search.query, maxResults);
 		ScoreDoc[] hits = topDocs.scoreDocs;
-		logger.debug("Hits " + topDocs.totalHits + " maxscore " + topDocs.scoreDocs[0].score);
+		Float maxScore;
+		if (hits.length == 0) {
+			maxScore = Float.NaN;
+		} else {
+			maxScore = hits[0].score;
+		}
+		logger.debug("Hits " + topDocs.totalHits + " maxscore " + maxScore);
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
 			gen.writeStartObject();

From 3ecdaace1cfee990af3eed9fd6a8d762f2d49ff4 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Tue, 11 Jan 2022 16:15:37 +0000
Subject: [PATCH 22/73] Add synonym injection on search #16

---
 .../org/icatproject/lucene/IcatAnalyzer.java  |  34 +++--
 .../lucene/IcatSynonymAnalyzer.java           |  50 +++++++
 .../java/org/icatproject/lucene/Lucene.java   |  10 +-
 src/main/resources/synonym.txt                | 124 ++++++++++++++++++
 src/test/java/icat/lucene/TestLucene.java     |  59 +++++++++
 5 files changed, 262 insertions(+), 15 deletions(-)
 create mode 100755 src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java
 create mode 100755 src/main/resources/synonym.txt
 mode change 100644 => 100755 src/test/java/icat/lucene/TestLucene.java

diff --git a/src/main/java/org/icatproject/lucene/IcatAnalyzer.java b/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
index fcae1c9..d02a542 100755
--- a/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
+++ b/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
@@ -1,36 +1,42 @@
 package org.icatproject.lucene;
 
+import java.util.Arrays;
+import java.util.List;
+
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
 import org.apache.lucene.analysis.en.PorterStemFilter;
-// import org.apache.lucene.analysis.standard.StandardAnalyzer ;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 
-// public class IcatAnalyzer extends Analyzer {
-
-// 	@Override
-// 	protected TokenStreamComponents createComponents(String fieldName) {
-// 		StandardAnalyzer analyzer = new StandardAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
-// 		Analyzer.TokenStreamComponents stream = analyzer.createComponents(fieldName);
-// 		sink = new EnglishPossessiveFilter(stream.getTokenStream());
-// 		sink = new PorterStemFilter(sink);
-// 		return new TokenStreamComponents(source, sink);
-// 	}
-// }
 
 public class IcatAnalyzer extends Analyzer {
 
+	public static final CharArraySet SCIENTIFIC_STOP_WORDS_SET;
+
+	/**
+	 * Do not include (As At Be In No) in the stop words as these are chemical
+	 * symbols. Otherwise, the set should match Lucene's ENGLISH_STOP_WORDS_SET
+	 */
+	static {
+		final List<String> stopWords =
+			Arrays.asList("a", "an", "and", "are", "but", "by", "for", "if", "into", "is",
+					"it", "not", "of", "on", "or", "such", "that", "the", "their", "then",
+					"there", "these", "they", "this", "to", "was", "will", "with");
+		final CharArraySet stopSet = new CharArraySet(stopWords, false);
+		SCIENTIFIC_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
+	}
+
 	@Override
 	protected TokenStreamComponents createComponents(String fieldName) {
 		Tokenizer source = new StandardTokenizer();
 		TokenStream sink = new EnglishPossessiveFilter(source);
 		sink = new LowerCaseFilter(sink);
-		sink = new StopFilter(sink, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
+		sink = new StopFilter(sink, SCIENTIFIC_STOP_WORDS_SET);
 		sink = new PorterStemFilter(sink);
 		return new TokenStreamComponents(source, sink);
 	}
diff --git a/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java b/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java
new file mode 100755
index 0000000..82703be
--- /dev/null
+++ b/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java
@@ -0,0 +1,50 @@
+package org.icatproject.lucene;
+
+import java.io.FileNotFoundException;
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.text.ParseException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.synonym.SolrSynonymParser;
+import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
+import org.apache.lucene.analysis.synonym.SynonymMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class IcatSynonymAnalyzer extends Analyzer {
+
+    private SynonymMap synonyms;
+
+    public IcatSynonymAnalyzer() 
+            throws IOException, ParseException {
+        super();
+        // Load synonyms from resource file
+        InputStream in = IcatSynonymAnalyzer.class.getClassLoader().getResourceAsStream("synonym.txt");
+        BufferedReader reader = new BufferedReader(new InputStreamReader(in));
+        SolrSynonymParser parser = new SolrSynonymParser(true, true, new StandardAnalyzer());
+        parser.parse(reader);
+        synonyms = parser.build();
+    }
+
+	@Override
+	protected TokenStreamComponents createComponents(String fieldName) {
+		Tokenizer source = new StandardTokenizer();
+		TokenStream sink = new EnglishPossessiveFilter(source);
+		sink = new LowerCaseFilter(sink);
+		sink = new StopFilter(sink, IcatAnalyzer.SCIENTIFIC_STOP_WORDS_SET);
+		sink = new PorterStemFilter(sink);
+        sink = new SynonymGraphFilter(sink, synonyms, false);
+		return new TokenStreamComponents(source, sink);
+	}
+}
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 737542b..32f2d39 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -6,6 +6,7 @@
 import java.net.HttpURLConnection;
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
+import java.text.ParseException;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
@@ -638,7 +639,14 @@ private void init() {
 
 			parser = new StandardQueryParser();
 			StandardQueryConfigHandler qpConf = (StandardQueryConfigHandler) parser.getQueryConfigHandler();
-			qpConf.set(ConfigurationKeys.ANALYZER, analyzer);
+			try {
+				// Attempt init an Analyzer which injects synonyms for searching
+				qpConf.set(ConfigurationKeys.ANALYZER, new IcatSynonymAnalyzer());
+			} catch (IOException | ParseException e) {
+				// If synonym files cannot be parsed, default to using the same analyzer as for writing
+				logger.info("Synonym files not found, synonyms will not be injected");
+				qpConf.set(ConfigurationKeys.ANALYZER, analyzer);
+			}
 			qpConf.set(ConfigurationKeys.ALLOW_LEADING_WILDCARD, true);
 
 			timer = new Timer("LuceneCommitTimer");
diff --git a/src/main/resources/synonym.txt b/src/main/resources/synonym.txt
new file mode 100755
index 0000000..92cc5e9
--- /dev/null
+++ b/src/main/resources/synonym.txt
@@ -0,0 +1,124 @@
+# Synonyms to be applied after stemming according to the Porter algorithm
+
+# Technical terms
+ionis, ioniz
+
+# Elements
+Hydrogen, H
+Helium, He
+Lithium, Li
+Beryllium, Be
+Boron, B
+Carbon, C
+Nitrogen, N
+Oxygen, O
+Fluorine, F
+Neon, Ne
+Sodium,Na
+Magnesium,Mg
+Aluminum,Al
+Silicon,Si
+Phosphorus,P
+Sulfur, Sulphur,S
+Chlorine,Cl
+Argon,Ar
+Potassium,K
+Calcium,Ca
+Scandium,Sc
+Titanium,Ti
+Vanadium,V
+Chromium,Cr
+Manganese,Mn
+Iron,Fe
+Cobalt,Co
+Nickel,Ni
+Copper,Cu
+Zinc,Zn
+Gallium,Ga
+Germanium,Ge
+Arsenic,As
+Selenium,Se
+Bromine,Br
+Krypton,Kr
+Rubidium,Rb
+Strontium,Sr
+Yttrium,Y
+Zirconium,Zr
+Niobium,Nb
+Molybdenum,Mo
+Technetium,Tc
+Ruthenium,Ru
+Rhodium,Rh
+Palladium,Pd
+Silver,Ag
+Cadmium,Cd
+Indium,In
+Tin,Sn
+Antimony,Sb
+Tellurium,Te
+Iodine,I
+Xenon,Xe
+Caesium, Cesium, Cs
+Barium,Ba
+Lanthanum,La
+Cerium,Ce
+Praseodymium,Pr
+Neodymium,Nd
+Promethium,Pm
+Samarium,Sm
+Europium,Eu
+Gadolinium,Gd
+Terbium,Tb
+Dysprosium,Dy
+Holmium,Ho
+Erbium,Er
+Thulium,Tm
+Ytterbium,Yb
+Lutetium,Lu
+Hafnium,Hf
+Tantalum,Ta
+Tungsten, Wolfram,W
+Rhenium,Re
+Osmium,Os
+Iridium,Ir
+Platinum,Pt
+Gold,Au
+Mercury,Hg
+Thallium,Tl
+Lead,Pb
+Bismuth,Bi
+Polonium,Po
+Astatine,At
+Radon,Rn
+Francium,Fr
+Radium,Ra
+Actinium,Ac
+Thorium,Th
+Protactinium,Pa
+Uranium,U
+Neptunium,Np
+Plutonium,Pu
+Americium,Am
+Curium,Cm
+Berkelium,Bk
+Californium,Cf
+Einsteinium,Es
+Fermium,Fm
+Mendelevium,Md
+Nobelium,No
+Lawrencium,Lr
+Rutherfordium,Rf
+Dubnium,Db
+Seaborgium,Sg
+Bohrium,Bh
+Hassium,Hs
+Meitnerium, Mt
+Darmstadtium ,Ds
+Roentgenium ,Rg
+Copernicium ,Cn
+Nihonium,Nh
+Flerovium,Fl
+Moscovium,Mc
+Livermorium,Lv
+Tennessine,Ts
+Oganesson,Og
\ No newline at end of file
diff --git a/src/test/java/icat/lucene/TestLucene.java b/src/test/java/icat/lucene/TestLucene.java
old mode 100644
new mode 100755
index f5cd493..a03caa4
--- a/src/test/java/icat/lucene/TestLucene.java
+++ b/src/test/java/icat/lucene/TestLucene.java
@@ -39,6 +39,7 @@
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.BytesRef;
 import org.icatproject.lucene.IcatAnalyzer;
+import org.icatproject.lucene.IcatSynonymAnalyzer;
 import org.junit.Test;
 
 public class TestLucene {
@@ -70,6 +71,64 @@ public void testIcatAnalyzer() throws Exception {
 		assertEquals(" demo 1st number 2 all sing danc tokenstream api ad aardvark", newString);
 	}
 
+	/**
+	 * Test that IcatSynonymAnalyzer injects stems for alternate spellings and
+	 * chemical symbols for the elements 
+	 */
+	@Test
+	public void testIcatSynonymAnalyzer() throws Exception {
+		final String text = "hydrogen Helium LITHIUM be B NE ionisation";
+		int n = 0;
+		String newString = "";
+
+		try (Analyzer analyzer = new IcatSynonymAnalyzer()) {
+			TokenStream stream = analyzer.tokenStream("field", new StringReader(text));
+			CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
+			try {
+				stream.reset(); // Curiously this is required
+				while (stream.incrementToken()) {
+					n++;
+					newString = newString + " " + termAtt;
+				}
+				stream.end();
+			} finally {
+				stream.close();
+			}
+		}
+
+		assertEquals(14, n);
+		assertEquals(" h hydrogen he helium li lithium beryllium be boron b neon ne ioniz ionis", newString);
+	}
+
+	/**
+	 * Test that we do not stop words that are chemical symbols (As At Be In No)
+	 * but otherwise filter out stop words
+	 */
+	@Test
+	public void testIcatAnalyzerStopWords() throws Exception {
+		final String text = "as at be in no that the their then there";
+		int n = 0;
+		String newString = "";
+
+		try (Analyzer analyzer = new IcatAnalyzer()) {
+			TokenStream stream = analyzer.tokenStream("field", new StringReader(text));
+			CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
+			try {
+				stream.reset(); // Curiously this is required
+				while (stream.incrementToken()) {
+					n++;
+					newString = newString + " " + termAtt;
+				}
+				stream.end();
+			} finally {
+				stream.close();
+			}
+		}
+
+		assertEquals(5, n);
+		assertEquals(" as at be in no", newString);
+	}
+
 	@Test
 	public void testJoins() throws Exception {
 		Analyzer analyzer = new IcatAnalyzer();

From bcf46af9630f9fb126d584ac69936fb194dfbb4c Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 2 Feb 2022 15:30:22 +0000
Subject: [PATCH 23/73] Avoid index error for maxScore

---
 src/main/java/org/icatproject/lucene/Lucene.java | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 17f48f4..faba565 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -944,7 +944,13 @@ private String luceneSearchResult(String name, Search search, int maxResults, Lo
 		TopDocs topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults)
 				: isearcher.searchAfter(search.lastDoc, search.query, maxResults);
 		ScoreDoc[] hits = topDocs.scoreDocs;
-		logger.debug("Hits " + topDocs.totalHits + " maxscore " + topDocs.scoreDocs[0].score);
+		Float maxScore;
+		if (hits.length == 0) {
+			maxScore = Float.NaN;
+		} else {
+			maxScore = hits[0].score;
+		}
+		logger.debug("Hits " + topDocs.totalHits + " maxscore " + maxScore);
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
 			gen.writeStartObject();

From 2046da5398d7d102495550480594746058fee277 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 10 Feb 2022 17:48:13 +0000
Subject: [PATCH 24/73] Handle facet exceptions from server tests #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 62 +++++++++++++------
 1 file changed, 42 insertions(+), 20 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index faba565..a02078e 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -6,6 +6,7 @@
 import java.net.HttpURLConnection;
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
+import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
@@ -733,6 +734,8 @@ private void init() {
 			qpConf.set(ConfigurationKeys.ANALYZER, analyzer);
 			qpConf.set(ConfigurationKeys.ALLOW_LEADING_WILDCARD, true);
 
+			facetsConfig.setMultiValued("sample", true);
+
 			timer = new Timer("LuceneCommitTimer");
 			timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
 
@@ -902,36 +905,55 @@ public void lock(@PathParam("entityName") String entityName) throws LuceneExcept
 	}
 
 	private String luceneFacetResult(String name, Search search, int maxResults, int maxLabels, Long uid)
-			throws IOException {
-		IndexSearcher isearcher = getSearcher(search.searcherMap, name);
-		DirectoryReader directoryReader = getReader(search.readerMap, name);
-		logger.debug("To facet in {} for {} {} with {} from {} ", name, search.query, maxResults, isearcher,
-				search.lastDoc);
-		DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(directoryReader);
-		FacetsCollector facetsCollector = new FacetsCollector();
-		FacetsCollector.search(isearcher, search.query, maxResults, facetsCollector);
-		Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
-		List<FacetResult> results = facets.getAllDims(maxLabels);
-		logger.debug("Facets found for " + results.size() + " dimensions");
+			throws IOException, IllegalStateException {
+		List<FacetResult> results;
+		if (maxResults <= 0 || maxLabels <= 0) {
+			// This will result in no Facets and a null pointer, so return early
+			logger.warn("No facets possible for maxResults={}, maxLabels={}, returning empty list", maxResults, maxLabels);
+			results = new ArrayList<>();
+		} else {
+			// TODO Consider either making this approach uniform, or whether to only do it for entities where we facet
+			DirectoryReader directoryReader = getReader(search.readerMap, name);
+			IndexSearcher isearcher = new IndexSearcher(directoryReader);
+			logger.debug("To facet in {} for {} {} with {} from {} ", name, search.query, maxResults, isearcher,
+					search.lastDoc);
+			try {
+				DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(directoryReader);
+				FacetsCollector facetsCollector = new FacetsCollector();
+				logger.debug("DR: {}, IS: {}", directoryReader, isearcher.getTopReaderContext());
+				FacetsCollector.search(isearcher, search.query, maxResults, facetsCollector);
+				Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
+				logger.debug("facets: {}, maxLabels: {}, maxResults: {}", facets, maxLabels, maxResults);
+				results = facets.getAllDims(maxLabels);
+			} catch (IllegalArgumentException e) {
+				// This can occur if no fields in the index have been faceted
+				logger.error("No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
+				results = new ArrayList<>();
+			} catch (IllegalStateException e) {
+				// This can occur if we do not create the IndexSearcher from the same DirectoryReader as we used to
+				// create the state
+				logger.error("IndexSearcher used is not based on the DirectoryReader used for facet counting: "
+					+ e.getClass() + " " + e.getMessage());
+				throw e;
+			}
+			logger.debug("Facets found for " + results.size() + " dimensions");
+		}
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
 			gen.writeStartObject();
 			if (uid != null) {
 				gen.write("uid", uid);
 			}
-			gen.writeStartArray("facets"); // array of all facet dimensions
+			gen.writeStartObject("dimensions"); // object containing all facet dimensions
 			for (FacetResult result : results) {
-				gen.writeStartArray(result.dim); // array of labelValues for a given dimension
+				gen.writeStartObject(result.dim); // object containing labelValues for a given dimension
 				for (LabelAndValue labelValue : result.labelValues) {
-					gen.writeStartArray("labelValue"); // 2 element array of label, value
-					gen.write(labelValue.label);
-					gen.write(labelValue.value.longValue());
-					gen.writeEnd(); // array of label, value
+					gen.write(labelValue.label, labelValue.value.longValue());
 				}
-				gen.writeEnd(); // array of labelValues for a given dimension
+				gen.writeEnd(); // object containing labelValues
 			}
-			gen.writeEnd(); // array of facet dimensions
-			gen.writeEnd(); // object
+			gen.writeEnd(); // object containing dimensions
+			gen.writeEnd();
 		}
 		logger.debug("Json returned {}", baos.toString());
 		return baos.toString();

From 7c127688a92399022f423d620f0596f3dcf99aec Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 11 Feb 2022 05:36:20 +0000
Subject: [PATCH 25/73] Add script to generate synonyms from csv #16

---
 .../org/icatproject/lucene/IcatAnalyzer.java  |   2 +-
 .../lucene/IcatSynonymAnalyzer.java           |   6 +-
 src/main/resources/synonym.txt                | 127 ++++++++++++-
 src/main/scripts/parse_synonyms.py            | 176 ++++++++++++++++++
 src/test/java/icat/lucene/TestLucene.java     |  10 +-
 5 files changed, 307 insertions(+), 14 deletions(-)
 create mode 100644 src/main/scripts/parse_synonyms.py

diff --git a/src/main/java/org/icatproject/lucene/IcatAnalyzer.java b/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
index d02a542..a70cbd2 100755
--- a/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
+++ b/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
@@ -25,7 +25,7 @@ public class IcatAnalyzer extends Analyzer {
 	static {
 		final List<String> stopWords =
 			Arrays.asList("a", "an", "and", "are", "but", "by", "for", "if", "into", "is",
-					"it", "not", "of", "on", "or", "such", "that", "the", "their", "then",
+					"it", "not", "on", "or", "such", "that", "the", "their", "then",
 					"there", "these", "they", "this", "to", "was", "will", "with");
 		final CharArraySet stopSet = new CharArraySet(stopWords, false);
 		SCIENTIFIC_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
diff --git a/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java b/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java
index 82703be..26841f1 100755
--- a/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java
+++ b/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java
@@ -1,6 +1,5 @@
 package org.icatproject.lucene;
 
-import java.io.FileNotFoundException;
 import java.io.BufferedReader;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -14,13 +13,10 @@
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
 import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.synonym.SolrSynonymParser;
 import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
 import org.apache.lucene.analysis.synonym.SynonymMap;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 public class IcatSynonymAnalyzer extends Analyzer {
 
@@ -32,7 +28,7 @@ public IcatSynonymAnalyzer()
         // Load synonyms from resource file
         InputStream in = IcatSynonymAnalyzer.class.getClassLoader().getResourceAsStream("synonym.txt");
         BufferedReader reader = new BufferedReader(new InputStreamReader(in));
-        SolrSynonymParser parser = new SolrSynonymParser(true, true, new StandardAnalyzer());
+        SolrSynonymParser parser = new SolrSynonymParser(true, true, new IcatAnalyzer());
         parser.parse(reader);
         synonyms = parser.build();
     }
diff --git a/src/main/resources/synonym.txt b/src/main/resources/synonym.txt
index 92cc5e9..1d3bc5d 100755
--- a/src/main/resources/synonym.txt
+++ b/src/main/resources/synonym.txt
@@ -1,7 +1,7 @@
 # Synonyms to be applied after stemming according to the Porter algorithm
 
-# Technical terms
-ionis, ioniz
+# Alternate spellings
+ionise, ionize
 
 # Elements
 Hydrogen, H
@@ -121,4 +121,125 @@ Flerovium,Fl
 Moscovium,Mc
 Livermorium,Lv
 Tennessine,Ts
-Oganesson,Og
\ No newline at end of file
+Oganesson,Og
+
+# Techniques
+propagation technique, forward scattering technique => propagation technique, forward scattering technique
+time of flight technique, TOF => time of flight technique, TOF
+ultrafast probe, femtosecond probe => ultrafast probe, femtosecond probe
+MuSR, muon spin resonance => MuSR, muon spin resonance
+obtain crystal structure, crystallography => obtain crystal structure, crystallography
+time dependent study, time resolved study => time dependent study, time resolved study
+ARPES, angle resolved photoemission spectroscopy => ARPES, angle resolved photoemission spectroscopy
+grazing incidence SAS, grazing incidence small angle scattering, GISAS => grazing incidence SAS, grazing incidence small angle scattering, GISAS
+NPD, neutron powder diffraction => NPD, neutron powder diffraction
+XPD, x-ray powder diffraction => XPD, x-ray powder diffraction
+single crystal x-ray diffraction, x-ray single crystal diffraction, SXRD => single crystal x-ray diffraction, x-ray single crystal diffraction, SXRD
+hard photoelectron spectroscopy, hard x-ray photoelectron spectroscopy, HAXPES => hard photoelectron spectroscopy, hard x-ray photoelectron spectroscopy, HAXPES
+inelastic small angle scatteringng, inelastic SAS => inelastic small angle scatteringng, inelastic SAS
+infrared spectroscopy, IR spectroscopy => infrared spectroscopy, IR spectroscopy
+fluorescence microscopy, microfluorescence, Micro XRF => fluorescence microscopy, microfluorescence, Micro XRF
+PCS, photon correlation spectroscopy => PCS, photon correlation spectroscopy
+quasielastic spin echo, quasi elastic spin echo, quasielastic neutron spin echo scattering => quasielastic spin echo, quasi elastic spin echo, quasielastic neutron spin echo scattering
+reflectivity, reflectometry => reflectivity, reflectometry
+anomalous scattering, anomalous diffraction, resonant diffraction => anomalous scattering, anomalous diffraction, resonant diffraction
+STM, scanning transmission microscopy => STM, scanning transmission microscopy
+small angle diffraction, small angle scattering, SAS => small angle diffraction, small angle scattering, SAS
+spin echo small angle scattering, spin echo SANS => spin echo small angle scattering, spin echo SANS
+UV circular dichroism, UVCD => UV circular dichroism, UVCD
+ultra small angle scattering, USAS => ultra small angle scattering, USAS
+topography, diffraction imaging => topography, diffraction imaging
+XMCD, x-ray magnetic circular dichroism => XMCD, x-ray magnetic circular dichroism
+linear dichroism, LD => linear dichroism, LD
+x-ray excited optical luminescence, XEOL => x-ray excited optical luminescence, XEOL
+magnetic circular dichroism, MCD => magnetic circular dichroism, MCD
+magnetochiral dichroism, MChD => magnetochiral dichroism, MChD
+natural circular dichroism, NCD => natural circular dichroism, NCD
+EM, electron microscopy => EM, electron microscopy
+photoemission microscopy, PEEM, photoemission electron microscopy => photoemission microscopy, PEEM, photoemission electron microscopy
+scanning microscopy, scanning probe microscopy => scanning microscopy, scanning probe microscopy
+XRR, x-ray reflectometry, x-ray reflectivity => XRR, x-ray reflectometry, x-ray reflectivity
+energy dispersive diffraction, EDD => energy dispersive diffraction, EDD
+energy dispersive x-ray diffraction, EDXRD => energy dispersive x-ray diffraction, EDXRD
+grazing incidence x-ray diffraction, GIXD => grazing incidence x-ray diffraction, GIXD
+grazing incidence small angle x-ray scattering, GISAXS => grazing incidence small angle x-ray scattering, GISAXS
+high pressure single crystal diffraction, Diffraction => high pressure single crystal diffraction, Diffraction
+protein crystallography, macromolecular crystallography, MX => protein crystallography, macromolecular crystallography, MX
+multi wavelength anomalous dispersion, multi wavelength anomalous diffraction, MAD => multi wavelength anomalous dispersion, multi wavelength anomalous diffraction, MAD
+PhD, photoelectron diffraction => PhD, photoelectron diffraction
+SFX, serial femtosecond crystallography => SFX, serial femtosecond crystallography
+serial synchrotron crystallography, SSX => serial synchrotron crystallography, SSX
+single wavelength anomalous diffraction, SAD, single wavelength anomalous dispersion => single wavelength anomalous diffraction, SAD, single wavelength anomalous dispersion
+chemical crystallography, small molecule diffraction, small molecule crystallography => chemical crystallography, small molecule diffraction, small molecule crystallography
+x-ray standing wave, XSW => x-ray standing wave, XSW
+coherent diffraction imaging, coherent diffractive imaging, CDI => coherent diffraction imaging, coherent diffractive imaging, CDI
+nano infrared spectroscopy, infrared nanospectroscopy imaging => nano infrared spectroscopy, infrared nanospectroscopy imaging
+XRF, x-ray fluorescence => XRF, x-ray fluorescence
+infrared microscopy, IR microscopy => infrared microscopy, IR microscopy
+pair distribution function, PDF => pair distribution function, PDF
+inelastic x-ray scattering, IXS => inelastic x-ray scattering, IXS
+resonant inelastic x-ray scattering, RIXS => resonant inelastic x-ray scattering, RIXS
+resonant x-ray scattering, RXS => resonant x-ray scattering, RXS
+resonant soft x-ray scattering, RSXS => resonant soft x-ray scattering, RSXS
+small angle x-ray scattering, SAXS => small angle x-ray scattering, SAXS
+SANS, small angle neutron scattering => SANS, small angle neutron scattering
+wide angle x-ray scattering, WAXS => wide angle x-ray scattering, WAXS
+circular dichroism, CD => circular dichroism, CD
+EDX, energy dispersive x-ray spectroscopy => EDX, energy dispersive x-ray spectroscopy
+XAS, x-ray absorption spectroscopy => XAS, x-ray absorption spectroscopy
+XAFS, x-ray absorption fine structure => XAFS, x-ray absorption fine structure
+extended x-ray absorption fine structure, EXAFS => extended x-ray absorption fine structure, EXAFS
+XANES, x-ray absorption near edge structure, NEXAFS => XANES, x-ray absorption near edge structure, NEXAFS
+x-ray emission spectroscopy, XES => x-ray emission spectroscopy, XES
+PES, photoelectron spectroscopy => PES, photoelectron spectroscopy
+x-ray photoelectron spectroscopy, XPS => x-ray photoelectron spectroscopy, XPS
+x-ray photon correlation spectroscopy, XPCS => x-ray photon correlation spectroscopy, XPCS
+x-ray tomography, CT scan => x-ray tomography, CT scan
+Absorption-based tomographic microscopy, absorption microtomography => Absorption-based tomographic microscopy, absorption microtomography
+Ultra-fast tomographic microscopy, ultrafast microtomography => Ultra-fast tomographic microscopy, ultrafast microtomography
+XRD, x-ray diffraction => XRD, x-ray diffraction
+STXM, scanning transmission x-ray microscopy => STXM, scanning transmission x-ray microscopy
+TEY, total electron yield => TEY, total electron yield
+XMCD total electron yield, XMCD TEY => XMCD total electron yield, XMCD TEY
+neutron reflectivity, neutron reflectometry => neutron reflectivity, neutron reflectometry
+ultra small angle x-ray scattering, USAXS => ultra small angle x-ray scattering, USAXS
+polarized neutron reflectometry, polarized neutron reflectivity => polarized neutron reflectometry, polarized neutron reflectivity
+TOF spectrometry, time-of-flight spectrometry, TOF spectroscopy => TOF spectrometry, time-of-flight spectrometry, TOF spectroscopy
+inelastic neutron scattering spectroscopy, inelastic neutron spectroscopy, inelastic neutron scattering => inelastic neutron scattering spectroscopy, inelastic neutron spectroscopy, inelastic neutron scattering
+x-ray magnetic linear dichroism, XMLD => x-ray magnetic linear dichroism, XMLD
+resonant elastic x-ray scattering, REXS => resonant elastic x-ray scattering, REXS
+x-ray refraction radiography, x-ray refraction imaging => x-ray refraction radiography, x-ray refraction imaging
+time dependent scattering, time resolved scattering => time dependent scattering, time resolved scattering
+time resolved diffraction, time dependent diffraction => time resolved diffraction, time dependent diffraction
+time dependent absorption, time resolved absorption => time dependent absorption, time resolved absorption
+anomalous small angle x-ray scattering, ASAXS => anomalous small angle x-ray scattering, ASAXS
+ASAX, anomalous solution x-ray scattering => ASAX, anomalous solution x-ray scattering
+grazing incidence small angle neutron scattering, GISANS => grazing incidence small angle neutron scattering, GISANS
+VSANS, very small angle neutron scattering => VSANS, very small angle neutron scattering
+micro SAXS tomography, micro small angle x-ray scattering tomography => micro SAXS tomography, micro small angle x-ray scattering tomography
+micro grazing incidence small angle x-ray scattering tomography, micro GISAXS tomography => micro grazing incidence small angle x-ray scattering tomography, micro GISAXS tomography
+nano angle resolved photoemission spectroscopy, nano ARPES => nano angle resolved photoemission spectroscopy, nano ARPES
+scanning x-ray microscopy, x-ray scanning microscopy => scanning x-ray microscopy, x-ray scanning microscopy
+high resolution x-ray photoelectron spectroscopy, HR-XPS => high resolution x-ray photoelectron spectroscopy, HR-XPS
+resolution elastic neutron scattering, elastic neutron scattering spectroscopy, RENS => resolution elastic neutron scattering, elastic neutron scattering spectroscopy, RENS
+x-ray magnetochiral dichroism, XMChiD => x-ray magnetochiral dichroism, XMChiD
+x-ray natural circular dichroism, XNCD => x-ray natural circular dichroism, XNCD
+XNLD, x-ray natural linear dichroism => XNLD, x-ray natural linear dichroism
+fragment screening, crystallographic fragment screening => fragment screening, crystallographic fragment screening
+microfocus macromolecular crystallography, microfocus MX => microfocus macromolecular crystallography, microfocus MX
+nanofocus MX, nanofocus macromolecular crystallography => nanofocus MX, nanofocus macromolecular crystallography
+MR, molecular replacement => MR, molecular replacement
+time resolved serial femtosecond crystallography, TR-SFX => time resolved serial femtosecond crystallography, TR-SFX
+fixed target serial synchrotron crystallography, FT-SSX => fixed target serial synchrotron crystallography, FT-SSX
+LCP-SSX, lipidic cubic phase serial synchrotron crystallography => LCP-SSX, lipidic cubic phase serial synchrotron crystallography
+TR-SSX, time resolved serial synchrotron crystallography => TR-SSX, time resolved serial synchrotron crystallography
+CLXM, correlative light x-ray microscopy => CLXM, correlative light x-ray microscopy
+grazing incidence wide angle scattering, GIWAXS => grazing incidence wide angle scattering, GIWAXS
+high resolution angle resolved photoemission spectroscopy, HR-ARPES => high resolution angle resolved photoemission spectroscopy, HR-ARPES
+atomic force microscopy, AFM => atomic force microscopy, AFM
+AFM-IR, atomic force microscope infrared spectroscopy => AFM-IR, atomic force microscope infrared spectroscopy
+fourier transform infrared spectroscopy, FTIR => fourier transform infrared spectroscopy, FTIR
+EDE, energy dispersive extended x-ray absorption fine structure, ED-EXAFS => EDE, energy dispersive extended x-ray absorption fine structure, ED-EXAFS
+radiation therapy, radiotherapy => radiation therapy, radiotherapy
+surface crystallography, obtain surface atomic structure => surface crystallography, obtain surface atomic structure
+x-ray birefringence imaging , XBI => x-ray birefringence imaging , XBI
diff --git a/src/main/scripts/parse_synonyms.py b/src/main/scripts/parse_synonyms.py
new file mode 100644
index 0000000..11e7621
--- /dev/null
+++ b/src/main/scripts/parse_synonyms.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python
+
+import csv
+import sys
+from typing import Dict, List
+
+
+def addToParents(
+    relationships: Dict[str, Dict[str, List[str]]],
+    label: str,
+    parents: List[str],
+    childDepth: int
+):
+    """
+    Adds the `label` to all the entries in `relationships` that have a key in
+    `parents`, then recursively calls itself to add `label` to any
+    grandparents. `childDepth` is decreased by 1 for each generation to prevent
+    exponentially large injections.
+
+    Parameters
+    ----------
+    relationships: Dict[str, Dict[str, List[str]]]
+        Maps terms to an inner dictionary containing arrays for "alternatives",
+        "parents", and "children".
+    label: str
+        The term to be added to its `parents`.
+    parents: List[str]
+        The direct parents of the current `label`
+    childDepth: int
+        The number of generations of children to inject for each term.
+        For example, a value of 2 would inject children and their children.
+        0 will only add alternative terms. Negative integers will add all
+        children, grandchildren, etc. Note that this may result in an
+        exponentially large number of terms
+    """
+    if childDepth != 0:
+        for parent in parents:
+            try:
+                relationships[parent]["children"].append(label)
+                addToParents(
+                    relationships,
+                    label,
+                    relationships[parent]["parents"],
+                    childDepth - 1,
+                )
+            except KeyError:
+                pass
+
+
+def main(inputFile: str, outputFile: str, mode: str, maxChildDepth: int):
+    """
+    Reads an CSV file of terminology and writes it into Solr synonym format
+    for use in synonym injection. Alternative terms are always written, and the
+    number of child terms is configurable by `maxChildDepth`.
+
+    Parameters
+    ----------
+    inputFile: str
+        CSV file to read ontology from.
+    outputFile: str
+        Solr synonym output file.
+    mode: str
+        Python file mode (w, a, ...) to use when writing the output file.
+    maxChildDepth: int
+        The maximum number of generations of children to inject for each term.
+        For example, a value of 2 would inject children and their children.
+        0 will only add alternative terms. Negative integers will add all
+        children, grandchildren, etc. Note that this may result in an
+        exponentially large number of terms
+    """
+    altIndices = []
+    parentIndices = []
+    # equivalentIndices = []
+    relationships = {}
+    with open(inputFile) as f:
+        reader = csv.reader(f)
+
+        # Dynamically determine header positions
+        headers = next(reader)
+        for i, header in enumerate(headers):
+            if "Label" == header.strip():
+                labelIndex = i
+            # elif "Class Type" == header:
+            #     classIndex = i
+            elif "Alt Label" in header.strip():
+                altIndices.append(i)
+            elif "Parent IRI" == header.strip():
+                parentIndices.append(i)
+            # elif "Equivalent" == header.strip():
+            #     equivalentIndices.append(i)
+
+        for entries in reader:
+            try:
+                int(entries[0])
+            except (ValueError, IndexError):
+                # If we do not have an ID, continue to the next line
+                continue
+
+            label = entries[labelIndex]
+            if label in relationships.keys():
+                raise ValueError(f"Duplicate entry for label {label}")
+
+            # relationships[label] = {
+            #     "alternatives": [],
+            #     "parents": [],
+            #     "equivalent": [],
+            #     "children": [],
+            # }
+            relationships[label] = {
+                "alternatives": [], "parents": [], "children": []
+            }
+            # classType = entries[classIndex]
+            for altIndex in altIndices:
+                alternativeLabel = entries[altIndex]
+                if alternativeLabel != "":
+                    relationships[label]["alternatives"].append(
+                        alternativeLabel
+                    )
+            for parentIndex in parentIndices:
+                parent = entries[parentIndex]
+                if parent != "":
+                    relationships[label]["parents"].append(parent)
+            # for equivalentIndex in equivalentIndices:
+            #     equivalentLabel = entries[equivalentIndex]
+            #     if equivalentLabel != "":
+            #         relationships[label]["equivalent"].append(equivalentLabel)
+
+    print(f"{len(relationships)} relationships found")
+    for label, relationship in relationships.items():
+        addToParents(
+            relationships, label, relationship["parents"], maxChildDepth
+        )
+
+    output = ""
+    for label, relationship in relationships.items():
+        # Only write to file if we have alternative or child terms
+        if (len(relationship["alternatives"]) > 0
+                or len(relationship["children"]) > 0):
+            leftHandSide = ", ".join(
+                set([label] + relationship["alternatives"])
+            )
+            rightHandSide = ", ".join(
+                set(
+                    [label]
+                    + relationship["alternatives"]
+                    + relationship["children"]
+                )
+            )
+            output += leftHandSide + " => " + rightHandSide + "\n"
+
+    with open(outputFile, mode) as f:
+        f.write(output)
+
+
+if __name__ == "__main__":
+    args = sys.argv
+    try:
+        inputFile = args[1]
+    except IndexError as e:
+        raise IndexError("inputFile to parse not provided") from e
+    try:
+        outputFile = args[2]
+    except IndexError as e:
+        raise IndexError("outputFile to write to not provided") from e
+    try:
+        mode = args[3]
+    except IndexError:
+        # Default to appending to the outputFile (no overwrite)
+        mode = "a"
+    try:
+        maxChildDepth = int(args[4])
+    except (IndexError, ValueError):
+        # Default to 0 depth (only alternative terms)
+        maxChildDepth = 0
+
+    main(inputFile, outputFile, mode, maxChildDepth)
diff --git a/src/test/java/icat/lucene/TestLucene.java b/src/test/java/icat/lucene/TestLucene.java
index a03caa4..82b8cb3 100755
--- a/src/test/java/icat/lucene/TestLucene.java
+++ b/src/test/java/icat/lucene/TestLucene.java
@@ -67,8 +67,8 @@ public void testIcatAnalyzer() throws Exception {
 			}
 		}
 
-		assertEquals(11, n);
-		assertEquals(" demo 1st number 2 all sing danc tokenstream api ad aardvark", newString);
+		assertEquals(12, n);
+		assertEquals(" demo of 1st number 2 all sing danc tokenstream api ad aardvark", newString);
 	}
 
 	/**
@@ -77,7 +77,7 @@ public void testIcatAnalyzer() throws Exception {
 	 */
 	@Test
 	public void testIcatSynonymAnalyzer() throws Exception {
-		final String text = "hydrogen Helium LITHIUM be B NE ionisation";
+		final String text = "hydrogen Helium LITHIUM be B NE ionisation TIME of FLIGHT technique ArPeS";
 		int n = 0;
 		String newString = "";
 
@@ -96,8 +96,8 @@ public void testIcatSynonymAnalyzer() throws Exception {
 			}
 		}
 
-		assertEquals(14, n);
-		assertEquals(" h hydrogen he helium li lithium beryllium be boron b neon ne ioniz ionis", newString);
+		assertEquals(24, n);
+		assertEquals(" h hydrogen he helium li lithium beryllium be boron b neon ne ioniz ionis time tof of flight techniqu arp angl resolv photoemiss spectroscopi", newString);
 	}
 
 	/**

From b32f3aa0460bbe544c49e1972d1a1d72c0dd59f7 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Sat, 12 Feb 2022 12:48:28 +0000
Subject: [PATCH 26/73] Take equivalent labels into account #16

---
 src/main/resources/synonym.txt     | 159 +++++++++++++++--------------
 src/main/scripts/parse_synonyms.py |  45 +++++---
 2 files changed, 108 insertions(+), 96 deletions(-)

diff --git a/src/main/resources/synonym.txt b/src/main/resources/synonym.txt
index 1d3bc5d..5e633da 100755
--- a/src/main/resources/synonym.txt
+++ b/src/main/resources/synonym.txt
@@ -124,122 +124,123 @@ Tennessine,Ts
 Oganesson,Og
 
 # Techniques
-propagation technique, forward scattering technique => propagation technique, forward scattering technique
-time of flight technique, TOF => time of flight technique, TOF
-ultrafast probe, femtosecond probe => ultrafast probe, femtosecond probe
+forward scattering technique, propagation technique => forward scattering technique, propagation technique
+TOF, time of flight technique => TOF, time of flight technique
+femtosecond probe, ultrafast probe => femtosecond probe, ultrafast probe
 MuSR, muon spin resonance => MuSR, muon spin resonance
-obtain crystal structure, crystallography => obtain crystal structure, crystallography
+crystallography, obtain crystal structure => crystallography, obtain crystal structure
 time dependent study, time resolved study => time dependent study, time resolved study
 ARPES, angle resolved photoemission spectroscopy => ARPES, angle resolved photoemission spectroscopy
-grazing incidence SAS, grazing incidence small angle scattering, GISAS => grazing incidence SAS, grazing incidence small angle scattering, GISAS
+GISAS, grazing incidence SAS, grazing incidence small angle scattering => GISAS, grazing incidence SAS, grazing incidence small angle scattering
 NPD, neutron powder diffraction => NPD, neutron powder diffraction
 XPD, x-ray powder diffraction => XPD, x-ray powder diffraction
-single crystal x-ray diffraction, x-ray single crystal diffraction, SXRD => single crystal x-ray diffraction, x-ray single crystal diffraction, SXRD
-hard photoelectron spectroscopy, hard x-ray photoelectron spectroscopy, HAXPES => hard photoelectron spectroscopy, hard x-ray photoelectron spectroscopy, HAXPES
-inelastic small angle scatteringng, inelastic SAS => inelastic small angle scatteringng, inelastic SAS
-infrared spectroscopy, IR spectroscopy => infrared spectroscopy, IR spectroscopy
-fluorescence microscopy, microfluorescence, Micro XRF => fluorescence microscopy, microfluorescence, Micro XRF
+SXRD, single crystal x-ray diffraction, x-ray single crystal diffraction => SXRD, single crystal x-ray diffraction, x-ray single crystal diffraction
+HAXPES, hard photoelectron spectroscopy, hard x-ray photoelectron spectroscopy => HAXPES, hard photoelectron spectroscopy, hard x-ray photoelectron spectroscopy
+inelastic SAS, inelastic small angle scatteringng => inelastic SAS, inelastic small angle scatteringng
+IR spectroscopy, infrared spectroscopy => IR spectroscopy, infrared spectroscopy
+Micro XRF, fluorescence microscopy, microfluorescence => Micro XRF, fluorescence microscopy, microfluorescence
 PCS, photon correlation spectroscopy => PCS, photon correlation spectroscopy
-quasielastic spin echo, quasi elastic spin echo, quasielastic neutron spin echo scattering => quasielastic spin echo, quasi elastic spin echo, quasielastic neutron spin echo scattering
+quasi elastic spin echo, quasielastic neutron spin echo scattering, quasielastic spin echo => quasi elastic spin echo, quasielastic neutron spin echo scattering, quasielastic spin echo
 reflectivity, reflectometry => reflectivity, reflectometry
-anomalous scattering, anomalous diffraction, resonant diffraction => anomalous scattering, anomalous diffraction, resonant diffraction
+anomalous diffraction, anomalous scattering, resonant diffraction => anomalous diffraction, anomalous scattering, resonant diffraction
 STM, scanning transmission microscopy => STM, scanning transmission microscopy
-small angle diffraction, small angle scattering, SAS => small angle diffraction, small angle scattering, SAS
-spin echo small angle scattering, spin echo SANS => spin echo small angle scattering, spin echo SANS
+SAS, small angle diffraction, small angle scattering => SAS, small angle diffraction, small angle scattering
+spin echo SANS, spin echo small angle scattering => spin echo SANS, spin echo small angle scattering
 UV circular dichroism, UVCD => UV circular dichroism, UVCD
-ultra small angle scattering, USAS => ultra small angle scattering, USAS
-topography, diffraction imaging => topography, diffraction imaging
+USAS, ultra small angle scattering => USAS, ultra small angle scattering
+diffraction imaging, topography => diffraction imaging, topography
 XMCD, x-ray magnetic circular dichroism => XMCD, x-ray magnetic circular dichroism
-linear dichroism, LD => linear dichroism, LD
-x-ray excited optical luminescence, XEOL => x-ray excited optical luminescence, XEOL
-magnetic circular dichroism, MCD => magnetic circular dichroism, MCD
-magnetochiral dichroism, MChD => magnetochiral dichroism, MChD
-natural circular dichroism, NCD => natural circular dichroism, NCD
+LD, linear dichroism => LD, linear dichroism
+XEOL, x-ray excited optical luminescence => XEOL, x-ray excited optical luminescence
+MCD, magnetic circular dichroism => MCD, magnetic circular dichroism
+MChD, magnetochiral dichroism => MChD, magnetochiral dichroism
+NCD, natural circular dichroism => NCD, natural circular dichroism
 EM, electron microscopy => EM, electron microscopy
-photoemission microscopy, PEEM, photoemission electron microscopy => photoemission microscopy, PEEM, photoemission electron microscopy
+PEEM, photoemission electron microscopy, photoemission microscopy => PEEM, photoemission electron microscopy, photoemission microscopy
 scanning microscopy, scanning probe microscopy => scanning microscopy, scanning probe microscopy
-XRR, x-ray reflectometry, x-ray reflectivity => XRR, x-ray reflectometry, x-ray reflectivity
-energy dispersive diffraction, EDD => energy dispersive diffraction, EDD
-energy dispersive x-ray diffraction, EDXRD => energy dispersive x-ray diffraction, EDXRD
-grazing incidence x-ray diffraction, GIXD => grazing incidence x-ray diffraction, GIXD
-grazing incidence small angle x-ray scattering, GISAXS => grazing incidence small angle x-ray scattering, GISAXS
-high pressure single crystal diffraction, Diffraction => high pressure single crystal diffraction, Diffraction
-protein crystallography, macromolecular crystallography, MX => protein crystallography, macromolecular crystallography, MX
-multi wavelength anomalous dispersion, multi wavelength anomalous diffraction, MAD => multi wavelength anomalous dispersion, multi wavelength anomalous diffraction, MAD
+XRR, x-ray reflectivity, x-ray reflectometry => XRR, x-ray reflectivity, x-ray reflectometry
+EDD, energy dispersive diffraction => EDD, energy dispersive diffraction
+EDXRD, energy dispersive x-ray diffraction => EDXRD, energy dispersive x-ray diffraction
+GIXD, grazing incidence x-ray diffraction => GIXD, grazing incidence x-ray diffraction
+GISAXS, grazing incidence small angle x-ray scattering => GISAXS, grazing incidence small angle x-ray scattering
+Diffraction, high pressure single crystal diffraction => Diffraction, high pressure single crystal diffraction
+MX, macromolecular crystallography, protein crystallography => MX, macromolecular crystallography, protein crystallography
+MAD, multi wavelength anomalous diffraction, multi wavelength anomalous dispersion => MAD, multi wavelength anomalous diffraction, multi wavelength anomalous dispersion
 PhD, photoelectron diffraction => PhD, photoelectron diffraction
 SFX, serial femtosecond crystallography => SFX, serial femtosecond crystallography
-serial synchrotron crystallography, SSX => serial synchrotron crystallography, SSX
-single wavelength anomalous diffraction, SAD, single wavelength anomalous dispersion => single wavelength anomalous diffraction, SAD, single wavelength anomalous dispersion
-chemical crystallography, small molecule diffraction, small molecule crystallography => chemical crystallography, small molecule diffraction, small molecule crystallography
-x-ray standing wave, XSW => x-ray standing wave, XSW
-coherent diffraction imaging, coherent diffractive imaging, CDI => coherent diffraction imaging, coherent diffractive imaging, CDI
-nano infrared spectroscopy, infrared nanospectroscopy imaging => nano infrared spectroscopy, infrared nanospectroscopy imaging
+SSX, serial synchrotron crystallography => SSX, serial synchrotron crystallography
+SAD, single wavelength anomalous diffraction, single wavelength anomalous dispersion => SAD, single wavelength anomalous diffraction, single wavelength anomalous dispersion
+chemical crystallography, small molecule crystallography, small molecule diffraction => chemical crystallography, small molecule crystallography, small molecule diffraction
+XSW, x-ray standing wave => XSW, x-ray standing wave
+CDI, coherent diffraction imaging, coherent diffractive imaging => CDI, coherent diffraction imaging, coherent diffractive imaging
+infrared nanospectroscopy imaging, nano infrared spectroscopy => infrared nanospectroscopy imaging, nano infrared spectroscopy
 XRF, x-ray fluorescence => XRF, x-ray fluorescence
-infrared microscopy, IR microscopy => infrared microscopy, IR microscopy
-pair distribution function, PDF => pair distribution function, PDF
-inelastic x-ray scattering, IXS => inelastic x-ray scattering, IXS
-resonant inelastic x-ray scattering, RIXS => resonant inelastic x-ray scattering, RIXS
-resonant x-ray scattering, RXS => resonant x-ray scattering, RXS
-resonant soft x-ray scattering, RSXS => resonant soft x-ray scattering, RSXS
-small angle x-ray scattering, SAXS => small angle x-ray scattering, SAXS
+IR microscopy, infrared microscopy => IR microscopy, infrared microscopy
+PDF, pair distribution function => PDF, pair distribution function
+IXS, inelastic x-ray scattering => IXS, inelastic x-ray scattering
+RIXS, resonant inelastic x-ray scattering => RIXS, resonant inelastic x-ray scattering
+RXS, resonant x-ray scattering => RXS, resonant x-ray scattering
+RSXS, resonant soft x-ray scattering => RSXS, resonant soft x-ray scattering
+SAXS, small angle x-ray scattering => SAXS, small angle x-ray scattering
 SANS, small angle neutron scattering => SANS, small angle neutron scattering
-wide angle x-ray scattering, WAXS => wide angle x-ray scattering, WAXS
-circular dichroism, CD => circular dichroism, CD
+WAXS, wide angle x-ray scattering => WAXS, wide angle x-ray scattering
+CD, circular dichroism => CD, circular dichroism
 EDX, energy dispersive x-ray spectroscopy => EDX, energy dispersive x-ray spectroscopy
 XAS, x-ray absorption spectroscopy => XAS, x-ray absorption spectroscopy
 XAFS, x-ray absorption fine structure => XAFS, x-ray absorption fine structure
-extended x-ray absorption fine structure, EXAFS => extended x-ray absorption fine structure, EXAFS
-XANES, x-ray absorption near edge structure, NEXAFS => XANES, x-ray absorption near edge structure, NEXAFS
-x-ray emission spectroscopy, XES => x-ray emission spectroscopy, XES
+EXAFS, extended x-ray absorption fine structure => EXAFS, extended x-ray absorption fine structure
+NEXAFS, XANES, x-ray absorption near edge structure => NEXAFS, XANES, x-ray absorption near edge structure
+XES, x-ray emission spectroscopy => XES, x-ray emission spectroscopy
 PES, photoelectron spectroscopy => PES, photoelectron spectroscopy
-x-ray photoelectron spectroscopy, XPS => x-ray photoelectron spectroscopy, XPS
-x-ray photon correlation spectroscopy, XPCS => x-ray photon correlation spectroscopy, XPCS
-x-ray tomography, CT scan => x-ray tomography, CT scan
+XPS, x-ray photoelectron spectroscopy => XPS, x-ray photoelectron spectroscopy
+XPCS, x-ray photon correlation spectroscopy => XPCS, x-ray photon correlation spectroscopy
+CT scan, x-ray tomography => CT scan, x-ray tomography
 Absorption-based tomographic microscopy, absorption microtomography => Absorption-based tomographic microscopy, absorption microtomography
 Ultra-fast tomographic microscopy, ultrafast microtomography => Ultra-fast tomographic microscopy, ultrafast microtomography
 XRD, x-ray diffraction => XRD, x-ray diffraction
 STXM, scanning transmission x-ray microscopy => STXM, scanning transmission x-ray microscopy
 TEY, total electron yield => TEY, total electron yield
-XMCD total electron yield, XMCD TEY => XMCD total electron yield, XMCD TEY
+XMCD TEY, XMCD total electron yield => XMCD TEY, XMCD total electron yield
 neutron reflectivity, neutron reflectometry => neutron reflectivity, neutron reflectometry
-ultra small angle x-ray scattering, USAXS => ultra small angle x-ray scattering, USAXS
-polarized neutron reflectometry, polarized neutron reflectivity => polarized neutron reflectometry, polarized neutron reflectivity
-TOF spectrometry, time-of-flight spectrometry, TOF spectroscopy => TOF spectrometry, time-of-flight spectrometry, TOF spectroscopy
-inelastic neutron scattering spectroscopy, inelastic neutron spectroscopy, inelastic neutron scattering => inelastic neutron scattering spectroscopy, inelastic neutron spectroscopy, inelastic neutron scattering
-x-ray magnetic linear dichroism, XMLD => x-ray magnetic linear dichroism, XMLD
-resonant elastic x-ray scattering, REXS => resonant elastic x-ray scattering, REXS
-x-ray refraction radiography, x-ray refraction imaging => x-ray refraction radiography, x-ray refraction imaging
+USAXS, ultra small angle x-ray scattering => USAXS, ultra small angle x-ray scattering
+polarized neutron reflectivity, polarized neutron reflectometry => polarized neutron reflectivity, polarized neutron reflectometry
+TOF spectrometry, TOF spectroscopy, time-of-flight spectrometry => TOF spectrometry, TOF spectroscopy, time-of-flight spectrometry
+inelastic neutron scattering, inelastic neutron scattering spectroscopy, inelastic neutron spectroscopy => inelastic neutron scattering, inelastic neutron scattering spectroscopy, inelastic neutron spectroscopy
+XMLD, x-ray magnetic linear dichroism => XMLD, x-ray magnetic linear dichroism
+REXS, resonant elastic x-ray scattering => REXS, resonant elastic x-ray scattering
+x-ray refraction imaging, x-ray refraction radiography => x-ray refraction imaging, x-ray refraction radiography
 time dependent scattering, time resolved scattering => time dependent scattering, time resolved scattering
-time resolved diffraction, time dependent diffraction => time resolved diffraction, time dependent diffraction
+time dependent diffraction, time resolved diffraction => time dependent diffraction, time resolved diffraction
 time dependent absorption, time resolved absorption => time dependent absorption, time resolved absorption
-anomalous small angle x-ray scattering, ASAXS => anomalous small angle x-ray scattering, ASAXS
+ASAXS, anomalous small angle x-ray scattering => ASAXS, anomalous small angle x-ray scattering
 ASAX, anomalous solution x-ray scattering => ASAX, anomalous solution x-ray scattering
-grazing incidence small angle neutron scattering, GISANS => grazing incidence small angle neutron scattering, GISANS
+GISANS, grazing incidence small angle neutron scattering => GISANS, grazing incidence small angle neutron scattering
 VSANS, very small angle neutron scattering => VSANS, very small angle neutron scattering
 micro SAXS tomography, micro small angle x-ray scattering tomography => micro SAXS tomography, micro small angle x-ray scattering tomography
-micro grazing incidence small angle x-ray scattering tomography, micro GISAXS tomography => micro grazing incidence small angle x-ray scattering tomography, micro GISAXS tomography
-nano angle resolved photoemission spectroscopy, nano ARPES => nano angle resolved photoemission spectroscopy, nano ARPES
+micro GISAXS tomography, micro grazing incidence small angle x-ray scattering tomography => micro GISAXS tomography, micro grazing incidence small angle x-ray scattering tomography
+nano ARPES, nano angle resolved photoemission spectroscopy => nano ARPES, nano angle resolved photoemission spectroscopy
 scanning x-ray microscopy, x-ray scanning microscopy => scanning x-ray microscopy, x-ray scanning microscopy
-high resolution x-ray photoelectron spectroscopy, HR-XPS => high resolution x-ray photoelectron spectroscopy, HR-XPS
-resolution elastic neutron scattering, elastic neutron scattering spectroscopy, RENS => resolution elastic neutron scattering, elastic neutron scattering spectroscopy, RENS
-x-ray magnetochiral dichroism, XMChiD => x-ray magnetochiral dichroism, XMChiD
-x-ray natural circular dichroism, XNCD => x-ray natural circular dichroism, XNCD
+HR-XPS, high resolution x-ray photoelectron spectroscopy => HR-XPS, high resolution x-ray photoelectron spectroscopy
+RENS, elastic neutron scattering spectroscopy, resolution elastic neutron scattering => RENS, elastic neutron scattering spectroscopy, resolution elastic neutron scattering
+XMChiD, x-ray magnetochiral dichroism => XMChiD, x-ray magnetochiral dichroism
+XNCD, x-ray natural circular dichroism => XNCD, x-ray natural circular dichroism
 XNLD, x-ray natural linear dichroism => XNLD, x-ray natural linear dichroism
-fragment screening, crystallographic fragment screening => fragment screening, crystallographic fragment screening
-microfocus macromolecular crystallography, microfocus MX => microfocus macromolecular crystallography, microfocus MX
+crystallographic fragment screening, fragment screening => crystallographic fragment screening, fragment screening
+microfocus MX, microfocus macromolecular crystallography => microfocus MX, microfocus macromolecular crystallography
 nanofocus MX, nanofocus macromolecular crystallography => nanofocus MX, nanofocus macromolecular crystallography
 MR, molecular replacement => MR, molecular replacement
-time resolved serial femtosecond crystallography, TR-SFX => time resolved serial femtosecond crystallography, TR-SFX
-fixed target serial synchrotron crystallography, FT-SSX => fixed target serial synchrotron crystallography, FT-SSX
+TR-SFX, time resolved serial femtosecond crystallography => TR-SFX, time resolved serial femtosecond crystallography
+FT-SSX, fixed target serial synchrotron crystallography => FT-SSX, fixed target serial synchrotron crystallography
 LCP-SSX, lipidic cubic phase serial synchrotron crystallography => LCP-SSX, lipidic cubic phase serial synchrotron crystallography
 TR-SSX, time resolved serial synchrotron crystallography => TR-SSX, time resolved serial synchrotron crystallography
 CLXM, correlative light x-ray microscopy => CLXM, correlative light x-ray microscopy
-grazing incidence wide angle scattering, GIWAXS => grazing incidence wide angle scattering, GIWAXS
-high resolution angle resolved photoemission spectroscopy, HR-ARPES => high resolution angle resolved photoemission spectroscopy, HR-ARPES
-atomic force microscopy, AFM => atomic force microscopy, AFM
+GIWAXS, grazing incidence wide angle scattering => GIWAXS, grazing incidence wide angle scattering
+HR-ARPES, high resolution angle resolved photoemission spectroscopy => HR-ARPES, high resolution angle resolved photoemission spectroscopy
+AFM, atomic force microscopy => AFM, atomic force microscopy
 AFM-IR, atomic force microscope infrared spectroscopy => AFM-IR, atomic force microscope infrared spectroscopy
-fourier transform infrared spectroscopy, FTIR => fourier transform infrared spectroscopy, FTIR
-EDE, energy dispersive extended x-ray absorption fine structure, ED-EXAFS => EDE, energy dispersive extended x-ray absorption fine structure, ED-EXAFS
+FTIR, fourier transform infrared spectroscopy => FTIR, fourier transform infrared spectroscopy
+ED-EXAFS, EDE, energy dispersive extended x-ray absorption fine structure => ED-EXAFS, EDE, energy dispersive extended x-ray absorption fine structure
 radiation therapy, radiotherapy => radiation therapy, radiotherapy
-surface crystallography, obtain surface atomic structure => surface crystallography, obtain surface atomic structure
-x-ray birefringence imaging , XBI => x-ray birefringence imaging , XBI
+obtain surface atomic structure, surface crystallography => obtain surface atomic structure, surface crystallography
+XBI, x-ray birefringence imaging  => XBI, x-ray birefringence imaging 
+
diff --git a/src/main/scripts/parse_synonyms.py b/src/main/scripts/parse_synonyms.py
index 11e7621..3ae3d55 100644
--- a/src/main/scripts/parse_synonyms.py
+++ b/src/main/scripts/parse_synonyms.py
@@ -37,6 +37,10 @@ def addToParents(
         for parent in parents:
             try:
                 relationships[parent]["children"].append(label)
+                # If the parent is equivalent to anything, also add label as a
+                # child of the equivalentParent
+                for equivalentParent in relationships[parent]["equivalent"]:
+                    relationships[equivalentParent]["children"].append(label)
                 addToParents(
                     relationships,
                     label,
@@ -70,7 +74,8 @@ def main(inputFile: str, outputFile: str, mode: str, maxChildDepth: int):
     """
     altIndices = []
     parentIndices = []
-    # equivalentIndices = []
+    equivalentIndices = []
+    equivalentPairs = {}
     relationships = {}
     with open(inputFile) as f:
         reader = csv.reader(f)
@@ -86,8 +91,8 @@ def main(inputFile: str, outputFile: str, mode: str, maxChildDepth: int):
                 altIndices.append(i)
             elif "Parent IRI" == header.strip():
                 parentIndices.append(i)
-            # elif "Equivalent" == header.strip():
-            #     equivalentIndices.append(i)
+            elif "Equivalent" == header.strip():
+                equivalentIndices.append(i)
 
         for entries in reader:
             try:
@@ -100,14 +105,11 @@ def main(inputFile: str, outputFile: str, mode: str, maxChildDepth: int):
             if label in relationships.keys():
                 raise ValueError(f"Duplicate entry for label {label}")
 
-            # relationships[label] = {
-            #     "alternatives": [],
-            #     "parents": [],
-            #     "equivalent": [],
-            #     "children": [],
-            # }
             relationships[label] = {
-                "alternatives": [], "parents": [], "children": []
+                "alternatives": [],
+                "parents": [],
+                "equivalent": [],
+                "children": [],
             }
             # classType = entries[classIndex]
             for altIndex in altIndices:
@@ -120,10 +122,19 @@ def main(inputFile: str, outputFile: str, mode: str, maxChildDepth: int):
                 parent = entries[parentIndex]
                 if parent != "":
                     relationships[label]["parents"].append(parent)
-            # for equivalentIndex in equivalentIndices:
-            #     equivalentLabel = entries[equivalentIndex]
-            #     if equivalentLabel != "":
-            #         relationships[label]["equivalent"].append(equivalentLabel)
+            for equivalentIndex in equivalentIndices:
+                equivalentLabel = entries[equivalentIndex]
+                if equivalentLabel != "":
+                    relationships[label]["equivalent"].append(equivalentLabel)
+                    equivalentPairs[equivalentLabel] = label
+
+    # If A is equivalent to B, then also set B equivalent to A
+    # This ensures they share all children
+    for key, value in equivalentPairs.items():
+        try:
+            relationships[key]["equivalent"].append(value)
+        except KeyError:
+            pass
 
     print(f"{len(relationships)} relationships found")
     for label, relationship in relationships.items():
@@ -137,14 +148,14 @@ def main(inputFile: str, outputFile: str, mode: str, maxChildDepth: int):
         if (len(relationship["alternatives"]) > 0
                 or len(relationship["children"]) > 0):
             leftHandSide = ", ".join(
-                set([label] + relationship["alternatives"])
+                sorted(set([label] + relationship["alternatives"]))
             )
             rightHandSide = ", ".join(
-                set(
+                sorted(set(
                     [label]
                     + relationship["alternatives"]
                     + relationship["children"]
-                )
+                ))
             )
             output += leftHandSide + " => " + rightHandSide + "\n"
 

From 3b5fd8cabf88bd6b6a2a238b9b5ff8dfe288daa1 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Sat, 12 Feb 2022 12:57:05 +0000
Subject: [PATCH 27/73] Change order of terms in tests #16

---
 src/test/java/icat/lucene/TestLucene.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/java/icat/lucene/TestLucene.java b/src/test/java/icat/lucene/TestLucene.java
index 82b8cb3..babebb5 100755
--- a/src/test/java/icat/lucene/TestLucene.java
+++ b/src/test/java/icat/lucene/TestLucene.java
@@ -97,7 +97,7 @@ public void testIcatSynonymAnalyzer() throws Exception {
 		}
 
 		assertEquals(24, n);
-		assertEquals(" h hydrogen he helium li lithium beryllium be boron b neon ne ioniz ionis time tof of flight techniqu arp angl resolv photoemiss spectroscopi", newString);
+		assertEquals(" h hydrogen he helium li lithium beryllium be boron b neon ne ioniz ionis tof time of flight techniqu arp angl resolv photoemiss spectroscopi", newString);
 	}
 
 	/**

From fea2d47ab78fa96c3a8ab2a32b0b84e2c45be38e Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 9 Mar 2022 20:55:38 +0000
Subject: [PATCH 28/73] Replace searcherManager with readerManager #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 86 +++++++------------
 1 file changed, 29 insertions(+), 57 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index a02078e..6734362 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -73,7 +73,6 @@
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.SearcherManager;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TopDocs;
@@ -105,13 +104,11 @@ private class IndexBucket {
 		private FSDirectory directory;
 		private IndexWriter indexWriter;
 		private ReaderManager readerManager;
-		private SearcherManager searcherManager;
 		private AtomicBoolean locked = new AtomicBoolean();
 	}
 
 	public class Search {
 		public Map<String, DirectoryReader> readerMap;
-		public Map<String, IndexSearcher> searcherMap;
 		public Query query;
 		public ScoreDoc lastDoc;
 	}
@@ -378,7 +375,6 @@ public void commit() throws LuceneException {
 								cached, entry.getKey(), bucket.indexWriter.getDocStats().numDocs);
 					}
 					bucket.readerManager.maybeRefreshBlocking();
-					bucket.searcherManager.maybeRefreshBlocking();
 				}
 			}
 		} catch (IOException e) {
@@ -406,7 +402,6 @@ private IndexBucket createBucket(String name) {
 			}
 			bucket.indexWriter = iwriter;
 			bucket.readerManager = new ReaderManager(iwriter, false, false);
-			bucket.searcherManager = new SearcherManager(iwriter, false, false, null);
 			logger.debug("Bucket for {} is now ready", name);
 			return bucket;
 		} catch (Throwable e) {
@@ -474,9 +469,9 @@ public String datafilesFacet(@Context HttpServletRequest request, @QueryParam("m
 	private Search datafilesQuery(HttpServletRequest request, Long uid) throws IOException, QueryNodeException {
 		Search search = new Search();
 		searches.put(uid, search);
-		Map<String, IndexSearcher> searcherMap = new HashMap<>();
+		// Map<String, IndexSearcher> searcherMap = new HashMap<>();
 		Map<String, DirectoryReader> readerMap = new HashMap<>();
-		search.searcherMap = searcherMap;
+		// search.searcherMap = searcherMap;
 		search.readerMap = readerMap;
 		try (JsonReader r = Json.createReader(request.getInputStream())) {
 			JsonObject o = r.readObject();
@@ -486,14 +481,14 @@ private Search datafilesQuery(HttpServletRequest request, Long uid) throws IOExc
 
 			if (userName != null) {
 				Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
-						new TermQuery(new Term("name", userName)), getSearcher(searcherMap, "InvestigationUser"),
+						new TermQuery(new Term("name", userName)), getSearcher(readerMap, "InvestigationUser"),
 						ScoreMode.None);
 
 				Query invQuery = JoinUtil.createJoinQuery("id", false, "investigation", iuQuery,
-						getSearcher(searcherMap, "Investigation"), ScoreMode.None);
+						getSearcher(readerMap, "Investigation"), ScoreMode.None);
 
 				Query dsQuery = JoinUtil.createJoinQuery("id", false, "dataset", invQuery,
-						getSearcher(searcherMap, "Dataset"), ScoreMode.None);
+						getSearcher(readerMap, "Dataset"), ScoreMode.None);
 
 				theQuery.add(dsQuery, Occur.MUST);
 			}
@@ -510,10 +505,10 @@ private Search datafilesQuery(HttpServletRequest request, Long uid) throws IOExc
 						Occur.MUST);
 			}
 
-			if (o.containsKey("params")) {
-				JsonArray params = o.getJsonArray("params");
-				IndexSearcher datafileParameterSearcher = getSearcher(searcherMap, "DatafileParameter");
-				for (JsonValue p : params) {
+			if (o.containsKey("parameters")) {
+				JsonArray parameters = o.getJsonArray("parameters");
+				IndexSearcher datafileParameterSearcher = getSearcher(readerMap, "DatafileParameter");
+				for (JsonValue p : parameters) {
 					BooleanQuery.Builder paramQuery = parseParameter(p);
 					Query toQuery = JoinUtil.createJoinQuery("datafile", false, "id", paramQuery.build(),
 							datafileParameterSearcher, ScoreMode.None);
@@ -585,9 +580,7 @@ public String datasetsFacet(@Context HttpServletRequest request, @QueryParam("ma
 	private Search datasetsQuery(HttpServletRequest request, Long uid) throws IOException, QueryNodeException {
 		Search search = new Search();
 		searches.put(uid, search);
-		Map<String, IndexSearcher> searcherMap = new HashMap<>();
 		Map<String, DirectoryReader> readerMap = new HashMap<>();
-		search.searcherMap = searcherMap;
 		search.readerMap = readerMap;
 		try (JsonReader r = Json.createReader(request.getInputStream())) {
 			JsonObject o = r.readObject();
@@ -598,11 +591,11 @@ private Search datasetsQuery(HttpServletRequest request, Long uid) throws IOExce
 			if (userName != null) {
 
 				Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
-						new TermQuery(new Term("name", userName)), getSearcher(searcherMap, "InvestigationUser"),
+						new TermQuery(new Term("name", userName)), getSearcher(readerMap, "InvestigationUser"),
 						ScoreMode.None);
 
 				Query invQuery = JoinUtil.createJoinQuery("id", false, "investigation", iuQuery,
-						getSearcher(searcherMap, "Investigation"), ScoreMode.None);
+						getSearcher(readerMap, "Investigation"), ScoreMode.None);
 
 				theQuery.add(invQuery, Occur.MUST);
 			}
@@ -621,10 +614,10 @@ private Search datasetsQuery(HttpServletRequest request, Long uid) throws IOExce
 						Occur.MUST);
 			}
 
-			if (o.containsKey("params")) {
-				JsonArray params = o.getJsonArray("params");
-				IndexSearcher datasetParameterSearcher = getSearcher(searcherMap, "DatasetParameter");
-				for (JsonValue p : params) {
+			if (o.containsKey("parameters")) {
+				JsonArray parameters = o.getJsonArray("parameters");
+				IndexSearcher datasetParameterSearcher = getSearcher(readerMap, "DatasetParameter");
+				for (JsonValue p : parameters) {
 					BooleanQuery.Builder paramQuery = parseParameter(p);
 					Query toQuery = JoinUtil.createJoinQuery("dataset", false, "id", paramQuery.build(),
 							datasetParameterSearcher, ScoreMode.None);
@@ -648,7 +641,6 @@ private void exit() {
 			for (Entry<String, IndexBucket> entry : indexBuckets.entrySet()) {
 				IndexBucket bucket = entry.getValue();
 				bucket.readerManager.close();
-				bucket.searcherManager.close();
 				bucket.indexWriter.commit();
 				bucket.indexWriter.close();
 				bucket.directory.close();
@@ -664,9 +656,8 @@ private void exit() {
 	public void freeSearcher(@PathParam("uid") Long uid) throws LuceneException {
 		if (uid != null) { // May not be set for internal calls
 			logger.debug("Requesting freeSearcher {}", uid);
-			Map<String, IndexSearcher> search = searches.get(uid).searcherMap;
-			Map<String, DirectoryReader> read = searches.get(uid).readerMap;
-			for (Entry<String, DirectoryReader> entry : read.entrySet()) {
+			Map<String, DirectoryReader> readerMap = searches.get(uid).readerMap;
+			for (Entry<String, DirectoryReader> entry : readerMap.entrySet()) {
 				String name = entry.getKey();
 				DirectoryReader directoryReader = entry.getValue();
 				ReaderManager manager = indexBuckets.computeIfAbsent(name, k -> createBucket(k)).readerManager;
@@ -676,16 +667,6 @@ public void freeSearcher(@PathParam("uid") Long uid) throws LuceneException {
 					throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 				}
 			}
-			for (Entry<String, IndexSearcher> entry : search.entrySet()) {
-				String name = entry.getKey();
-				IndexSearcher isearcher = entry.getValue();
-				SearcherManager manager = indexBuckets.computeIfAbsent(name, k -> createBucket(k)).searcherManager;
-				try {
-					manager.release(isearcher);
-				} catch (IOException e) {
-					throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-				}
-			}
 			searches.remove(uid);
 		}
 	}
@@ -703,14 +684,8 @@ private DirectoryReader getReader(Map<String, DirectoryReader> bucket, String na
 	/*
 	 * Need a new set of IndexSearchers for each search as identified by a uid
 	 */
-	private IndexSearcher getSearcher(Map<String, IndexSearcher> bucket, String name) throws IOException {
-		IndexSearcher isearcher = bucket.get(name);
-		if (isearcher == null) {
-			isearcher = indexBuckets.computeIfAbsent(name, k -> createBucket(k)).searcherManager.acquire();
-			bucket.put(name, isearcher);
-			logger.debug("Remember searcher for {}", name);
-		}
-		return isearcher;
+	private IndexSearcher getSearcher(Map<String, DirectoryReader> bucket, String name) throws IOException {
+		return new IndexSearcher(getReader(bucket, name));
 	}
 
 	@PostConstruct
@@ -734,7 +709,8 @@ private void init() {
 			qpConf.set(ConfigurationKeys.ANALYZER, analyzer);
 			qpConf.set(ConfigurationKeys.ALLOW_LEADING_WILDCARD, true);
 
-			facetsConfig.setMultiValued("sample", true);
+			facetsConfig.setMultiValued("sampleName", true);
+			facetsConfig.setMultiValued("parameterName", true);
 
 			timer = new Timer("LuceneCommitTimer");
 			timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
@@ -815,9 +791,7 @@ public String investigationsFacet(@Context HttpServletRequest request, @QueryPar
 	private Search investigationsQuery(HttpServletRequest request, Long uid) throws IOException, QueryNodeException {
 		Search search = new Search();
 		searches.put(uid, search);
-		Map<String, IndexSearcher> searcherMap = new HashMap<>();
 		Map<String, DirectoryReader> readerMap = new HashMap<>();
-		search.searcherMap = searcherMap;
 		search.readerMap = readerMap;
 		try (JsonReader r = Json.createReader(request.getInputStream())) {
 			JsonObject o = r.readObject();
@@ -827,7 +801,7 @@ private Search investigationsQuery(HttpServletRequest request, Long uid) throws
 
 			if (userName != null) {
 				Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
-						new TermQuery(new Term("name", userName)), getSearcher(searcherMap, "InvestigationUser"),
+						new TermQuery(new Term("name", userName)), getSearcher(readerMap, "InvestigationUser"),
 						ScoreMode.None);
 				theQuery.add(iuQuery, Occur.MUST);
 			}
@@ -846,11 +820,11 @@ private Search investigationsQuery(HttpServletRequest request, Long uid) throws
 						Occur.MUST);
 			}
 
-			if (o.containsKey("params")) {
-				JsonArray params = o.getJsonArray("params");
-				IndexSearcher investigationParameterSearcher = getSearcher(searcherMap, "InvestigationParameter");
+			if (o.containsKey("parameters")) {
+				JsonArray parameters = o.getJsonArray("parameters");
+				IndexSearcher investigationParameterSearcher = getSearcher(readerMap, "InvestigationParameter");
 
-				for (JsonValue p : params) {
+				for (JsonValue p : parameters) {
 					BooleanQuery.Builder paramQuery = parseParameter(p);
 					Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", paramQuery.build(),
 							investigationParameterSearcher, ScoreMode.None);
@@ -860,7 +834,7 @@ private Search investigationsQuery(HttpServletRequest request, Long uid) throws
 
 			if (o.containsKey("samples")) {
 				JsonArray samples = o.getJsonArray("samples");
-				IndexSearcher sampleSearcher = getSearcher(searcherMap, "Sample");
+				IndexSearcher sampleSearcher = getSearcher(readerMap, "Sample");
 
 				for (JsonValue s : samples) {
 					JsonString sample = (JsonString) s;
@@ -876,7 +850,7 @@ private Search investigationsQuery(HttpServletRequest request, Long uid) throws
 			if (userFullName != null) {
 				BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();
 				userFullNameQuery.add(parser.parse(userFullName, "text"), Occur.MUST);
-				IndexSearcher investigationUserSearcher = getSearcher(searcherMap, "InvestigationUser");
+				IndexSearcher investigationUserSearcher = getSearcher(readerMap, "InvestigationUser");
 				Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", userFullNameQuery.build(),
 						investigationUserSearcher, ScoreMode.None);
 				theQuery.add(toQuery, Occur.MUST);
@@ -912,7 +886,6 @@ private String luceneFacetResult(String name, Search search, int maxResults, int
 			logger.warn("No facets possible for maxResults={}, maxLabels={}, returning empty list", maxResults, maxLabels);
 			results = new ArrayList<>();
 		} else {
-			// TODO Consider either making this approach uniform, or whether to only do it for entities where we facet
 			DirectoryReader directoryReader = getReader(search.readerMap, name);
 			IndexSearcher isearcher = new IndexSearcher(directoryReader);
 			logger.debug("To facet in {} for {} {} with {} from {} ", name, search.query, maxResults, isearcher,
@@ -960,7 +933,7 @@ private String luceneFacetResult(String name, Search search, int maxResults, int
 	}
 
 	private String luceneSearchResult(String name, Search search, int maxResults, Long uid) throws IOException {
-		IndexSearcher isearcher = getSearcher(search.searcherMap, name);
+		IndexSearcher isearcher = getSearcher(search.readerMap, name);
 		logger.debug("To search in {} for {} {} with {} from {} ", name, search.query, maxResults, isearcher,
 				search.lastDoc);
 		TopDocs topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults)
@@ -1053,7 +1026,6 @@ public void unlock(@PathParam("entityName") String entityName) throws LuceneExce
 				logger.debug("Unlock has committed {} {} changes to Lucene - now have {} documents indexed", cached,
 						entityName, bucket.indexWriter.getDocStats().numDocs);
 			}
-			bucket.searcherManager.maybeRefreshBlocking();
 			bucket.readerManager.maybeRefreshBlocking();
 		} catch (IOException e) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());

From a4a822b92407df277ff568d204797a9060fb4d75 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 24 Mar 2022 00:15:59 +0000
Subject: [PATCH 29/73] Enable sorting of string fields #25

---
 .../java/org/icatproject/lucene/Lucene.java   | 96 +++++++++++++++----
 1 file changed, 79 insertions(+), 17 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 737542b..bf92cde 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -1,13 +1,16 @@
 package org.icatproject.lucene;
 
+import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
 import java.net.HttpURLConnection;
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
+import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Timer;
@@ -56,11 +59,14 @@
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BooleanQuery.Builder;
+import org.apache.lucene.search.SortField.Type;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TopDocs;
@@ -99,6 +105,7 @@ public class Search {
 		public Map<String, IndexSearcher> map;
 		public Query query;
 		public ScoreDoc lastDoc;
+		public Sort sort;
 	}
 
 	enum When {
@@ -398,7 +405,8 @@ private IndexBucket createBucket(String name) {
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
 	@Path("datafiles")
-	public String datafiles(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults)
+	public String datafiles(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults,
+			@QueryParam("sort") String sort)
 			throws LuceneException {
 
 		Long uid = null;
@@ -408,6 +416,7 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("maxRes
 			searches.put(uid, search);
 			Map<String, IndexSearcher> map = new HashMap<>();
 			search.map = map;
+			search.sort = parseSort(sort);
 
 			try (JsonReader r = Json.createReader(request.getInputStream())) {
 				JsonObject o = r.readObject();
@@ -441,10 +450,10 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("maxRes
 							Occur.MUST);
 				}
 
-				if (o.containsKey("params")) {
-					JsonArray params = o.getJsonArray("params");
+				if (o.containsKey("parameters")) {
+					JsonArray parameters = o.getJsonArray("parameters");
 					IndexSearcher datafileParameterSearcher = getSearcher(map, "DatafileParameter");
-					for (JsonValue p : params) {
+					for (JsonValue p : parameters) {
 						BooleanQuery.Builder paramQuery = parseParameter(p);
 						Query toQuery = JoinUtil.createJoinQuery("datafile", false, "id", paramQuery.build(),
 								datafileParameterSearcher, ScoreMode.None);
@@ -484,7 +493,8 @@ public String datafilesAfter(@PathParam("uid") long uid, @QueryParam("maxResults
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
 	@Path("datasets")
-	public String datasets(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults)
+	public String datasets(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults,
+			@QueryParam("sort") String sort)
 			throws LuceneException {
 
 		Long uid = null;
@@ -494,6 +504,7 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("maxResu
 			searches.put(uid, search);
 			Map<String, IndexSearcher> map = new HashMap<>();
 			search.map = map;
+			search.sort = parseSort(sort);
 			try (JsonReader r = Json.createReader(request.getInputStream())) {
 				JsonObject o = r.readObject();
 				String userName = o.getString("user", null);
@@ -526,10 +537,10 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("maxResu
 							Occur.MUST);
 				}
 
-				if (o.containsKey("params")) {
-					JsonArray params = o.getJsonArray("params");
+				if (o.containsKey("parameters")) {
+					JsonArray parameters = o.getJsonArray("parameters");
 					IndexSearcher datasetParameterSearcher = getSearcher(map, "DatasetParameter");
-					for (JsonValue p : params) {
+					for (JsonValue p : parameters) {
 						BooleanQuery.Builder paramQuery = parseParameter(p);
 						Query toQuery = JoinUtil.createJoinQuery("dataset", false, "id", paramQuery.build(),
 								datasetParameterSearcher, ScoreMode.None);
@@ -667,7 +678,8 @@ public void run() {
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
 	@Path("investigations")
-	public String investigations(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults)
+	public String investigations(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults,
+			@QueryParam("sort") String sort)
 			throws LuceneException {
 		Long uid = null;
 		try {
@@ -676,6 +688,7 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 			searches.put(uid, search);
 			Map<String, IndexSearcher> map = new HashMap<>();
 			search.map = map;
+			search.sort = parseSort(sort);
 			try (JsonReader r = Json.createReader(request.getInputStream())) {
 				JsonObject o = r.readObject();
 				String userName = o.getString("user", null);
@@ -703,11 +716,11 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 							Occur.MUST);
 				}
 
-				if (o.containsKey("params")) {
-					JsonArray params = o.getJsonArray("params");
+				if (o.containsKey("parameters")) {
+					JsonArray parameters = o.getJsonArray("parameters");
 					IndexSearcher investigationParameterSearcher = getSearcher(map, "InvestigationParameter");
 
-					for (JsonValue p : params) {
+					for (JsonValue p : parameters) {
 						BooleanQuery.Builder paramQuery = parseParameter(p);
 						Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", paramQuery.build(),
 								investigationParameterSearcher, ScoreMode.None);
@@ -789,8 +802,15 @@ private String luceneSearchResult(String name, Search search, int maxResults, Lo
 		IndexSearcher isearcher = getSearcher(search.map, name);
 		logger.debug("To search in {} for {} {} with {} from {} ", name, search.query, maxResults, isearcher,
 				search.lastDoc);
-		TopDocs topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults)
-				: isearcher.searchAfter(search.lastDoc, search.query, maxResults);
+		TopDocs topDocs;
+		if (search.sort == null) {
+			// Use default score sorting
+			topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults)
+					: isearcher.searchAfter(search.lastDoc, search.query, maxResults);
+		} else {
+			topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults, search.sort)
+					: isearcher.searchAfter(search.lastDoc, search.query, maxResults, search.sort);
+		}
 		ScoreDoc[] hits = topDocs.scoreDocs;
 		Float maxScore;
 		if (hits.length == 0) {
@@ -810,7 +830,13 @@ private String luceneSearchResult(String name, Search search, int maxResults, Lo
 				Document doc = isearcher.doc(hit.doc);
 				gen.writeStartArray();
 				gen.write(Long.parseLong(doc.get("id")));
-				gen.write(hit.score);
+				Float score = hit.score;
+				if (score.equals(Float.NaN)) {
+					// If we didn't sort by score, then this will be NaN
+					gen.write(-1.);
+				} else {
+					gen.write(hit.score);
+				}
 				gen.writeEnd(); // array
 			}
 			gen.writeEnd(); // array results
@@ -847,9 +873,11 @@ private Builder parseParameter(JsonValue p) {
 		String pLowerDateValue = parameter.getString("lowerDateValue", null);
 		String pUpperDateValue = parameter.getString("upperDateValue", null);
 		Double pLowerNumericValue = parameter.containsKey("lowerNumericValue")
-				? parameter.getJsonNumber("lowerNumericValue").doubleValue() : null;
+				? parameter.getJsonNumber("lowerNumericValue").doubleValue()
+				: null;
 		Double pUpperNumericValue = parameter.containsKey("upperNumericValue")
-				? parameter.getJsonNumber("upperNumericValue").doubleValue() : null;
+				? parameter.getJsonNumber("upperNumericValue").doubleValue()
+				: null;
 		if (pStringValue != null) {
 			paramQuery.add(new WildcardQuery(new Term("stringValue", pStringValue)), Occur.MUST);
 		} else if (pLowerDateValue != null && pUpperDateValue != null) {
@@ -863,6 +891,40 @@ private Builder parseParameter(JsonValue p) {
 		return paramQuery;
 	}
 
+	/**
+	 * Parses the String from the request into a Lucene Sort object. Multiple sort
+	 * criteria are supported, and will be applied in order.
+	 * 
+	 * @param sort String representation of a JSON object with the field(s) to sort
+	 *             as keys, and the direction ("asc" or "desc") as value(s).
+	 * @return Lucene Sort object
+	 * @throws LuceneException If the value for any key isn't "asc" or "desc"
+	 */
+	private Sort parseSort(String sort) throws LuceneException {
+		if (sort == null || sort.equals("")) {
+			return null;
+		}
+		try (JsonReader reader = Json.createReader(new ByteArrayInputStream(sort.getBytes()))) {
+			JsonObject object = reader.readObject();
+			List<SortField> fields = new ArrayList<>();
+			for (String key : object.keySet()) {
+				String order = object.getString(key);
+				Boolean reverse;
+				if (order.equals("asc")) {
+					reverse = false;
+				} else if (order.equals("desc")) {
+					reverse = true;
+				} else {
+					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+							"Sort order must be 'asc' or 'desc' but it was '" + order + "'");
+				}
+
+				fields.add(new SortField(key, Type.STRING, reverse));
+			}
+			return new Sort(fields.toArray(new SortField[0]));
+		}
+	}
+
 	@POST
 	@Path("unlock/{entityName}")
 	public void unlock(@PathParam("entityName") String entityName) throws LuceneException {

From 8eda4caf949ae3a4cd68727192deb861e36bf266 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Sat, 26 Mar 2022 00:13:27 +0000
Subject: [PATCH 30/73] Add support for fields and searchAfter #25

---
 .../java/org/icatproject/lucene/Lucene.java   | 268 ++++++++++--------
 1 file changed, 156 insertions(+), 112 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index bf92cde..5193897 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -4,14 +4,17 @@
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
+import java.io.StringReader;
 import java.net.HttpURLConnection;
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.Map.Entry;
 import java.util.Timer;
 import java.util.TimerTask;
@@ -58,6 +61,7 @@
 import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.BooleanQuery.Builder;
 import org.apache.lucene.search.SortField.Type;
 import org.apache.lucene.search.IndexSearcher;
@@ -70,6 +74,8 @@
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldDocs;
+import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.search.join.JoinUtil;
 import org.apache.lucene.search.join.ScoreMode;
@@ -104,8 +110,8 @@ private class IndexBucket {
 	public class Search {
 		public Map<String, IndexSearcher> map;
 		public Query query;
-		public ScoreDoc lastDoc;
 		public Sort sort;
+		public Set<String> fields = new HashSet<String>();
 	}
 
 	enum When {
@@ -213,7 +219,7 @@ private void add(HttpServletRequest request, String entityName, When when, JsonP
 		String name = null;
 		String value = null;
 		Double dvalue = null;
-		Store store = Store.NO;
+		Store store = Store.YES;
 		Document doc = new Document();
 
 		parser.next(); // Skip the [
@@ -257,18 +263,26 @@ private void add(HttpServletRequest request, String entityName, When when, JsonP
 				} else {
 					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, "Bad VALUE_TRUE " + attName);
 				}
+			} else if (ev == Event.VALUE_FALSE) {
+				if (attName == AttributeName.store) {
+					store = Store.NO;
+				} else {
+					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, "Bad VALUE_FALSE " + attName);
+				}
 			} else if (ev == Event.START_OBJECT) {
 				fType = null;
 				name = null;
 				value = null;
-				store = Store.NO;
+				store = Store.YES;
 			} else if (ev == Event.END_OBJECT) {
 				if (fType == FieldType.TextField) {
 					doc.add(new TextField(name, value, store));
 				} else if (fType == FieldType.StringField) {
 					doc.add(new StringField(name, value, store));
 				} else if (fType == FieldType.SortedDocValuesField) {
+					// Any field we sort on must be stored to enable searching after
 					doc.add(new SortedDocValuesField(name, new BytesRef(value)));
+					doc.add(new StoredField(name, value));
 				} else if (fType == FieldType.DoublePoint) {
 					doc.add(new DoublePoint(name, dvalue));
 					if (store == Store.YES) {
@@ -405,9 +419,8 @@ private IndexBucket createBucket(String name) {
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
 	@Path("datafiles")
-	public String datafiles(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults,
-			@QueryParam("sort") String sort)
-			throws LuceneException {
+	public String datafiles(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
+			@QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
 
 		Long uid = null;
 		try {
@@ -420,7 +433,8 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("maxRes
 
 			try (JsonReader r = Json.createReader(request.getInputStream())) {
 				JsonObject o = r.readObject();
-				String userName = o.getString("user", null);
+				JsonObject query = o.getJsonObject("query");
+				String userName = query.getString("user", null);
 
 				BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
@@ -438,20 +452,20 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("maxRes
 					theQuery.add(dsQuery, Occur.MUST);
 				}
 
-				String text = o.getString("text", null);
+				String text = query.getString("text", null);
 				if (text != null) {
 					theQuery.add(parser.parse(text, "text"), Occur.MUST);
 				}
 
-				String lower = o.getString("lower", null);
-				String upper = o.getString("upper", null);
+				String lower = query.getString("lower", null);
+				String upper = query.getString("upper", null);
 				if (lower != null && upper != null) {
 					theQuery.add(new TermRangeQuery("date", new BytesRef(lower), new BytesRef(upper), true, true),
 							Occur.MUST);
 				}
 
-				if (o.containsKey("parameters")) {
-					JsonArray parameters = o.getJsonArray("parameters");
+				if (query.containsKey("parameters")) {
+					JsonArray parameters = query.getJsonArray("parameters");
 					IndexSearcher datafileParameterSearcher = getSearcher(map, "DatafileParameter");
 					for (JsonValue p : parameters) {
 						BooleanQuery.Builder paramQuery = parseParameter(p);
@@ -461,9 +475,13 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("maxRes
 					}
 				}
 				search.query = maybeEmptyQuery(theQuery);
+				if (o.containsKey("fields")) {
+					List<JsonString> jsonStrings = o.getJsonArray("fields").getValuesAs(JsonString.class);
+					jsonStrings.forEach((jsonString) -> search.fields.add(jsonString.getString()));
+				}
 			}
 
-			return luceneSearchResult("Datafile", search, maxResults, uid);
+			return luceneSearchResult("Datafile", search, searchAfter, maxResults, uid);
 		} catch (Exception e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
@@ -471,31 +489,12 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("maxRes
 		}
 	}
 
-	@GET
-	@Produces(MediaType.APPLICATION_JSON)
-	@Path("datafiles/{uid}")
-	public String datafilesAfter(@PathParam("uid") long uid, @QueryParam("maxResults") int maxResults)
-			throws LuceneException {
-		try {
-			Search search = searches.get(uid);
-			try {
-				return luceneSearchResult("Datafile", search, maxResults, null);
-			} catch (Exception e) {
-				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-			}
-		} catch (Exception e) {
-			freeSearcher(uid);
-			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-		}
-	}
-
 	@POST
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
 	@Path("datasets")
-	public String datasets(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults,
-			@QueryParam("sort") String sort)
-			throws LuceneException {
+	public String datasets(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
+			@QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
 
 		Long uid = null;
 		try {
@@ -507,7 +506,8 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("maxResu
 			search.sort = parseSort(sort);
 			try (JsonReader r = Json.createReader(request.getInputStream())) {
 				JsonObject o = r.readObject();
-				String userName = o.getString("user", null);
+				JsonObject query = o.getJsonObject("query");
+				String userName = query.getString("user", null);
 
 				BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
@@ -523,13 +523,13 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("maxResu
 					theQuery.add(invQuery, Occur.MUST);
 				}
 
-				String text = o.getString("text", null);
+				String text = query.getString("text", null);
 				if (text != null) {
 					theQuery.add(parser.parse(text, "text"), Occur.MUST);
 				}
 
-				String lower = o.getString("lower", null);
-				String upper = o.getString("upper", null);
+				String lower = query.getString("lower", null);
+				String upper = query.getString("upper", null);
 				if (lower != null && upper != null) {
 					theQuery.add(new TermRangeQuery("startDate", new BytesRef(lower), new BytesRef(upper), true, true),
 							Occur.MUST);
@@ -537,8 +537,8 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("maxResu
 							Occur.MUST);
 				}
 
-				if (o.containsKey("parameters")) {
-					JsonArray parameters = o.getJsonArray("parameters");
+				if (query.containsKey("parameters")) {
+					JsonArray parameters = query.getJsonArray("parameters");
 					IndexSearcher datasetParameterSearcher = getSearcher(map, "DatasetParameter");
 					for (JsonValue p : parameters) {
 						BooleanQuery.Builder paramQuery = parseParameter(p);
@@ -548,8 +548,12 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("maxResu
 					}
 				}
 				search.query = maybeEmptyQuery(theQuery);
+				if (o.containsKey("fields")) {
+					List<JsonString> jsonStrings = o.getJsonArray("fields").getValuesAs(JsonString.class);
+					jsonStrings.forEach((jsonString) -> search.fields.add(jsonString.getString()));
+				}
 			}
-			return luceneSearchResult("Dataset", search, maxResults, uid);
+			return luceneSearchResult("Dataset", search, searchAfter, maxResults, uid);
 		} catch (Exception e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
@@ -558,24 +562,6 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("maxResu
 
 	}
 
-	@GET
-	@Produces(MediaType.APPLICATION_JSON)
-	@Path("datasets/{uid}")
-	public String datasetsAfter(@PathParam("uid") long uid, @QueryParam("maxResults") int maxResults)
-			throws LuceneException {
-		try {
-			Search search = searches.get(uid);
-			try {
-				return luceneSearchResult("Dataset", search, maxResults, null);
-			} catch (Exception e) {
-				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-			}
-		} catch (Exception e) {
-			freeSearcher(uid);
-			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-		}
-	}
-
 	@PreDestroy
 	private void exit() {
 		logger.info("Closing down icat.lucene");
@@ -678,9 +664,8 @@ public void run() {
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
 	@Path("investigations")
-	public String investigations(@Context HttpServletRequest request, @QueryParam("maxResults") int maxResults,
-			@QueryParam("sort") String sort)
-			throws LuceneException {
+	public String investigations(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
+			@QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
 		Long uid = null;
 		try {
 			uid = bucketNum.getAndIncrement();
@@ -691,7 +676,8 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 			search.sort = parseSort(sort);
 			try (JsonReader r = Json.createReader(request.getInputStream())) {
 				JsonObject o = r.readObject();
-				String userName = o.getString("user", null);
+				JsonObject query = o.getJsonObject("query");
+				String userName = query.getString("user", null);
 
 				BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
@@ -702,13 +688,13 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 					theQuery.add(iuQuery, Occur.MUST);
 				}
 
-				String text = o.getString("text", null);
+				String text = query.getString("text", null);
 				if (text != null) {
 					theQuery.add(parser.parse(text, "text"), Occur.MUST);
 				}
 
-				String lower = o.getString("lower", null);
-				String upper = o.getString("upper", null);
+				String lower = query.getString("lower", null);
+				String upper = query.getString("upper", null);
 				if (lower != null && upper != null) {
 					theQuery.add(new TermRangeQuery("startDate", new BytesRef(lower), new BytesRef(upper), true, true),
 							Occur.MUST);
@@ -716,8 +702,8 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 							Occur.MUST);
 				}
 
-				if (o.containsKey("parameters")) {
-					JsonArray parameters = o.getJsonArray("parameters");
+				if (query.containsKey("parameters")) {
+					JsonArray parameters = query.getJsonArray("parameters");
 					IndexSearcher investigationParameterSearcher = getSearcher(map, "InvestigationParameter");
 
 					for (JsonValue p : parameters) {
@@ -728,8 +714,8 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 					}
 				}
 
-				if (o.containsKey("samples")) {
-					JsonArray samples = o.getJsonArray("samples");
+				if (query.containsKey("samples")) {
+					JsonArray samples = query.getJsonArray("samples");
 					IndexSearcher sampleSearcher = getSearcher(map, "Sample");
 
 					for (JsonValue s : samples) {
@@ -742,7 +728,7 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 					}
 				}
 
-				String userFullName = o.getString("userFullName", null);
+				String userFullName = query.getString("userFullName", null);
 				if (userFullName != null) {
 					BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();
 					userFullNameQuery.add(parser.parse(userFullName, "text"), Occur.MUST);
@@ -753,9 +739,13 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 				}
 
 				search.query = maybeEmptyQuery(theQuery);
+				if (o.containsKey("fields")) {
+					List<JsonString> jsonStrings = o.getJsonArray("fields").getValuesAs(JsonString.class);
+					jsonStrings.forEach((jsonString) -> search.fields.add(jsonString.getString()));
+				}
 			}
 			logger.info("Query: {}", search.query);
-			return luceneSearchResult("Investigation", search, maxResults, uid);
+			return luceneSearchResult("Investigation", search, searchAfter, maxResults, uid);
 		} catch (Exception e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
@@ -764,24 +754,6 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 
 	}
 
-	@GET
-	@Produces(MediaType.APPLICATION_JSON)
-	@Path("investigations/{uid}")
-	public String investigationsAfter(@PathParam("uid") long uid, @QueryParam("maxResults") int maxResults)
-			throws LuceneException {
-		try {
-			Search search = searches.get(uid);
-			try {
-				return luceneSearchResult("Investigation", search, maxResults, null);
-			} catch (Exception e) {
-				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-			}
-		} catch (Exception e) {
-			freeSearcher(uid);
-			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-		}
-	}
-
 	@POST
 	@Path("lock/{entityName}")
 	public void lock(@PathParam("entityName") String entityName) throws LuceneException {
@@ -798,52 +770,91 @@ public void lock(@PathParam("entityName") String entityName) throws LuceneExcept
 		}
 	}
 
-	private String luceneSearchResult(String name, Search search, int maxResults, Long uid) throws IOException {
+	private String luceneSearchResult(String name, Search search, String searchAfter, int maxResults, Long uid)
+			throws IOException, LuceneException {
 		IndexSearcher isearcher = getSearcher(search.map, name);
 		logger.debug("To search in {} for {} {} with {} from {} ", name, search.query, maxResults, isearcher,
-				search.lastDoc);
-		TopDocs topDocs;
+				searchAfter);
+		FieldDoc searchAfterDoc = parseSearchAfter(searchAfter);
+		ScoreDoc[] hits;
+		TotalHits totalHits;
+		SortField[] fields = null;
 		if (search.sort == null) {
 			// Use default score sorting
-			topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults)
-					: isearcher.searchAfter(search.lastDoc, search.query, maxResults);
+			TopDocs topDocs;
+			topDocs = searchAfterDoc == null ? isearcher.search(search.query, maxResults)
+					: isearcher.searchAfter(searchAfterDoc, search.query, maxResults);
+			hits = topDocs.scoreDocs;
+			totalHits = topDocs.totalHits;
 		} else {
-			topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults, search.sort)
-					: isearcher.searchAfter(search.lastDoc, search.query, maxResults, search.sort);
+			// Use specified sorting
+			TopFieldDocs topFieldDocs;
+			topFieldDocs = searchAfterDoc == null ? isearcher.search(search.query, maxResults, search.sort)
+					: isearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort, false);
+			hits = topFieldDocs.scoreDocs;
+			totalHits = topFieldDocs.totalHits;
+			fields = topFieldDocs.fields;
 		}
-		ScoreDoc[] hits = topDocs.scoreDocs;
 		Float maxScore;
 		if (hits.length == 0) {
 			maxScore = Float.NaN;
 		} else {
 			maxScore = hits[0].score;
 		}
-		logger.debug("Hits " + topDocs.totalHits + " maxscore " + maxScore);
+		logger.debug("Hits " + totalHits + " maxscore " + maxScore);
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
 			gen.writeStartObject();
-			if (uid != null) {
-				gen.write("uid", uid);
-			}
 			gen.writeStartArray("results");
 			for (ScoreDoc hit : hits) {
 				Document doc = isearcher.doc(hit.doc);
-				gen.writeStartArray();
-				gen.write(Long.parseLong(doc.get("id")));
+				gen.writeStartObject().write("id", Long.parseLong(doc.get("id")));
 				Float score = hit.score;
-				if (score.equals(Float.NaN)) {
-					// If we didn't sort by score, then this will be NaN
-					gen.write(-1.);
-				} else {
-					gen.write(hit.score);
+				if (!score.equals(Float.NaN)) {
+					gen.write("score", hit.score);
 				}
-				gen.writeEnd(); // array
+				gen.writeStartObject("source");
+				doc.forEach((field) -> {
+					if (search.fields.contains(field.name())) {
+						if (field.stringValue() != null) {
+							gen.write(field.name(), field.stringValue());
+						} else if (field.numericValue() != null) {
+							gen.write(field.name(), field.numericValue().doubleValue());
+						}
+					}
+				});
+				gen.writeEnd();
+				gen.writeEnd(); // result object
 			}
 			gen.writeEnd(); // array results
+			if (hits.length == maxResults) {
+				ScoreDoc lastDoc = hits[hits.length - 1];
+				gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex", lastDoc.shardIndex);
+				float lastScore = lastDoc.score;
+				if (!Float.isNaN(lastScore)) {
+					gen.write("score", lastScore);
+				}
+				if (fields != null) {
+					Document lastDocument = isearcher.doc(lastDoc.doc);
+					gen.writeStartArray("fields");
+					for (SortField sortField : fields) {
+						Type type = sortField.getType();
+						if (type.equals(Type.STRING)) {
+							String lastValue = lastDocument.get(sortField.getField());
+							if (lastValue == null) {
+								throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field "
+										+ sortField.getField()
+										+ " used for sorting was not present on the Lucene Document; all sortable fields must also be stored.");
+							}
+							gen.write(lastValue);
+						}
+					}
+					gen.writeEnd();
+				}
+				gen.writeEnd();
+			}
 			gen.writeEnd(); // object
 		}
-
-		search.lastDoc = hits.length == 0 ? null : hits[hits.length - 1];
 		logger.debug("Json returned {}", baos.toString());
 		return baos.toString();
 	}
@@ -925,6 +936,39 @@ private Sort parseSort(String sort) throws LuceneException {
 		}
 	}
 
+	/**
+	 * Parses a Lucene ScoreDoc to be "searched after" from a String representation
+	 * of a JSON array.
+	 * 
+	 * @param searchAfter String representation of a JSON object containing the
+	 *                    document id or "doc" (String), score ("float") in that
+	 *                    order.
+	 * @return FieldDoc object built from the provided String, or null if
+	 *         searchAfter was itself null or an empty String.
+	 */
+	private FieldDoc parseSearchAfter(String searchAfter) {
+		if (searchAfter != null && !searchAfter.equals("")) {
+			logger.debug("Attempting to parseSearchAfter from {}", searchAfter);
+			JsonReader reader = Json.createReader(new StringReader(searchAfter));
+			JsonObject object = reader.readObject();
+			int doc = object.getInt("doc");
+			int shardIndex = object.getInt("shardIndex");
+			float score = Float.NaN;
+			List<BytesRef> fields = new ArrayList<>();
+			if (object.containsKey("score")) {
+				score = object.getJsonNumber("score").bigDecimalValue().floatValue();
+			}
+			if (object.containsKey("fields")) {
+				List<JsonString> jsonStrings = object.getJsonArray("fields").getValuesAs(JsonString.class);
+				for (JsonString jsonString : jsonStrings) {
+					fields.add(new BytesRef(jsonString.getString()));
+				}
+			}
+			return new FieldDoc(doc, score, fields.toArray(), shardIndex);
+		}
+		return null;
+	}
+
 	@POST
 	@Path("unlock/{entityName}")
 	public void unlock(@PathParam("entityName") String entityName) throws LuceneException {

From 851cedb7b8dd6e1915bbe74cab67752d72e046c6 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Mon, 4 Apr 2022 11:17:46 +0000
Subject: [PATCH 31/73] Implement incremental sharding #26

---
 src/main/config/run.properties.example        |   1 +
 .../java/org/icatproject/lucene/Lucene.java   | 321 +++++++++++++-----
 src/main/resources/run.properties             |   1 +
 3 files changed, 237 insertions(+), 86 deletions(-)

diff --git a/src/main/config/run.properties.example b/src/main/config/run.properties.example
index b010790..4aeab39 100644
--- a/src/main/config/run.properties.example
+++ b/src/main/config/run.properties.example
@@ -3,4 +3,5 @@
 
 directory     = ${HOME}/data/lucene
 commitSeconds = 5
+maxShardSize  = 2147483648
 ip            = 127.0.0.1/32
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 11b1d5b..8b6b0b4 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -6,8 +6,10 @@
 import java.net.HttpURLConnection;
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
+import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Timer;
@@ -47,8 +49,11 @@
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.ReaderManager;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
 import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
@@ -60,7 +65,6 @@
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.SearcherManager;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TopDocs;
@@ -88,15 +92,185 @@ enum FieldType {
 		TextField, StringField, SortedDocValuesField, DoublePoint
 	}
 
-	private class IndexBucket {
+	private class ShardBucket {
 		private FSDirectory directory;
 		private IndexWriter indexWriter;
-		private SearcherManager searcherManager;
+		private ReaderManager readerManager;
+
+		/**
+		 * Creates a bucket for accessing the read and write functionality for a single
+		 * "shard" Lucene index which can then be grouped to represent a single document
+		 * type.
+		 * 
+		 * @param shardPath Path to the directory used as storage for this shard.
+		 * @throws IOException
+		 */
+		public ShardBucket(java.nio.file.Path shardPath) throws IOException {
+			directory = FSDirectory.open(shardPath);
+			IndexWriterConfig config = new IndexWriterConfig(analyzer);
+			indexWriter = new IndexWriter(directory, config);
+			String[] files = directory.listAll();
+			if (files.length == 1 && files[0].equals("write.lock")) {
+				logger.debug("Directory only has the write.lock file so store and delete a dummy document");
+				Document doc = new Document();
+				doc.add(new StringField("dummy", "dummy", Store.NO));
+				indexWriter.addDocument(doc);
+				indexWriter.commit();
+				indexWriter.deleteDocuments(new Term("dummy", "dummy"));
+				indexWriter.commit();
+				logger.debug("Now have " + indexWriter.getDocStats().numDocs + " documents indexed");
+			}
+			readerManager = new ReaderManager(indexWriter);
+		}
+	}
+
+	private class IndexBucket {
+		private String entityName;
+		private Map<Long, ShardBucket> shardMap = new HashMap<>();
 		private AtomicBoolean locked = new AtomicBoolean();
+
+		/**
+		 * Creates a bucket for accessing the high level functionality, such as
+		 * searching, for a single document type. Incoming documents will be routed to
+		 * one of the individual "shard" indices that are grouped by this Object.
+		 * 
+		 * @param entityName The name of the entity that this index contains documents
+		 *                   for.
+		 */
+		public IndexBucket(String entityName) {
+			try {
+				this.entityName = entityName;
+				Long shardIndex = 0L;
+				java.nio.file.Path shardPath = luceneDirectory.resolve(entityName);
+				do {
+					ShardBucket shardBucket = new ShardBucket(shardPath);
+					shardMap.put(shardIndex, shardBucket);
+					shardIndex++;
+					shardPath = luceneDirectory.resolve(entityName + "_" + shardIndex);
+				} while (Files.isDirectory(shardPath));
+				logger.debug("Bucket for {} is now ready with {} shards", entityName, shardIndex);
+			} catch (Throwable e) {
+				logger.error("Can't continue " + e.getClass() + " " + e.getMessage());
+			}
+		}
+
+		/**
+		 * Acquires DirectoryReaders from the ReaderManagers of the individual shards in
+		 * this bucket.
+		 * 
+		 * @return Array of DirectoryReaders for all shards in this bucket.
+		 * @throws IOException
+		 */
+		public DirectoryReader[] acquireReaders() throws IOException {
+			List<DirectoryReader> subReaders = new ArrayList<>();
+			for (ShardBucket shardBucket : shardMap.values()) {
+				subReaders.add(shardBucket.readerManager.acquire());
+			}
+			return subReaders.toArray(new DirectoryReader[0]);
+		}
+
+		/**
+		 * Creates a new ShardBucket and stores it in the shardMap.
+		 * 
+		 * @param shardKey The identifier for the new shard to be created. For
+		 *                 simplicity, should a Long starting at 0 and incrementing by 1
+		 *                 for each new shard.
+		 * @return A new ShardBucket with the provided shardKey.
+		 * @throws IOException
+		 */
+		public ShardBucket buildShardBucket(Long shardKey) throws IOException {
+			ShardBucket shardBucket = new ShardBucket(luceneDirectory.resolve(entityName + "_" + shardKey));
+			shardMap.put(shardKey, shardBucket);
+			return shardBucket;
+		}
+
+		/**
+		 * Commits Documents for writing on all "shard" indices for this bucket.
+		 * 
+		 * @param command    The high level command which called this function. Only
+		 *                   used for debug logging.
+		 * @param entityName The name of the entities being committed. Only used for
+		 *                   debug logging.
+		 * @throws IOException
+		 */
+		public void commit(String command, String entityName) throws IOException {
+			for (Entry<Long, ShardBucket> entry : shardMap.entrySet()) {
+				ShardBucket shardBucket = entry.getValue();
+				int cached = shardBucket.indexWriter.numRamDocs();
+				shardBucket.indexWriter.commit();
+				if (cached != 0) {
+					logger.debug("{} has committed {} {} changes to Lucene - now have {} documents indexed in shard {}",
+							command, cached, entityName, shardBucket.indexWriter.getDocStats().numDocs, entry.getKey());
+				}
+				shardBucket.readerManager.maybeRefreshBlocking();
+			}
+		}
+
+		/**
+		 * Commits and closes all "shard" indices for this bucket.
+		 * 
+		 * @throws IOException
+		 */
+		public void close() throws IOException {
+			for (ShardBucket shardBucket : shardMap.values()) {
+				shardBucket.readerManager.close();
+				shardBucket.indexWriter.commit();
+				shardBucket.indexWriter.close();
+				shardBucket.directory.close();
+			}
+		}
+
+		/**
+		 * Provides the ShardBucket that should be used for reading/writing the Document
+		 * with the provided id. All ids up to luceneMaxShardSize are indexed in the
+		 * first shard, after that a new shard is created for the next
+		 * luceneMaxShardSize Documents and so on.
+		 * 
+		 * @param id The id of a Document to be routed.
+		 * @return The ShardBucket that the relevant Document is/should be indexed in.
+		 * @throws IOException
+		 */
+		public ShardBucket routeShard(Long id) throws IOException {
+			if (id == null) {
+				// If we don't have id, provide the first bucket
+				return shardMap.get(0L);
+			}
+			Long shard = id / luceneMaxShardSize;
+			ShardBucket shardBucket = shardMap.get(shard);
+			if (shardBucket == null) {
+				shardBucket = buildShardBucket(shard);
+			}
+			return shardBucket;
+		}
+
+		/**
+		 * Provides the IndexWriter that should be used for writing the Document with
+		 * the provided id.
+		 * 
+		 * @param id The id of a Document to be routed.
+		 * @return The relevant IndexWriter.
+		 * @throws IOException
+		 */
+		public IndexWriter getWriter(Long id) throws IOException {
+			return routeShard(id).indexWriter;
+		}
+
+		public void releaseReaders(DirectoryReader[] subReaders) throws IOException, LuceneException {
+			if (subReaders.length != shardMap.size()) {
+				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
+						"Was expecting the same number of DirectoryReaders as ShardBuckets, but had "
+								+ subReaders.length + ", " + shardMap.size() + " respectively.");
+			}
+			int i = 0;
+			for (ShardBucket shardBucket : shardMap.values()) {
+				shardBucket.readerManager.release(subReaders[i]);
+				i++;
+			}
+		}
 	}
 
 	public class Search {
-		public Map<String, IndexSearcher> map;
+		public Map<String, DirectoryReader[]> map;
 		public Query query;
 		public ScoreDoc lastDoc;
 	}
@@ -112,6 +286,7 @@ enum When {
 	private java.nio.file.Path luceneDirectory;
 
 	private int luceneCommitMillis;
+	private Long luceneMaxShardSize;
 
 	private AtomicLong bucketNum = new AtomicLong();
 	private Map<String, IndexBucket> indexBuckets = new ConcurrentHashMap<>();
@@ -170,12 +345,13 @@ public void modify(@Context HttpServletRequest request) throws LuceneException {
 				ev = parser.next();
 				if (ev == Event.VALUE_NULL) {
 					try {
-						IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> createBucket(k));
+						IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
 						if (bucket.locked.get()) {
 							throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 									"Lucene locked for " + entityName);
 						}
-						bucket.indexWriter.deleteDocuments(new Term("id", Long.toString(id)));
+						ShardBucket shardBucket = bucket.routeShard(id);
+						shardBucket.indexWriter.deleteDocuments(new Term("id", Long.toString(id)));
 					} catch (IOException e) {
 						throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 					}
@@ -199,7 +375,7 @@ public void modify(@Context HttpServletRequest request) throws LuceneException {
 	private void add(HttpServletRequest request, String entityName, When when, JsonParser parser, Long id)
 			throws LuceneException, IOException {
 
-		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> createBucket(k));
+		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
 
 		AttributeName attName = null;
 		FieldType fType = null;
@@ -274,13 +450,20 @@ private void add(HttpServletRequest request, String entityName, When when, JsonP
 						throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 								"Lucene locked for " + entityName);
 					}
-					bucket.indexWriter.addDocument(doc);
+					String documentId = doc.get("id");
+					if (documentId == null) {
+						logger.warn(
+								"Adding Document without an id field is not recommended, routing, updates and deletions will not be available for this Document.");
+						bucket.getWriter(null).addDocument(doc);
+					} else {
+						bucket.getWriter(Long.valueOf(documentId)).addDocument(doc);
+					}
 				} else {
 					if (bucket.locked.get()) {
 						throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 								"Lucene locked for " + entityName);
 					}
-					bucket.indexWriter.updateDocument(new Term("id", id.toString()), doc);
+					bucket.getWriter(id).updateDocument(new Term("id", id.toString()), doc);
 				}
 				return;
 			} else {
@@ -352,13 +535,7 @@ public void commit() throws LuceneException {
 			for (Entry<String, IndexBucket> entry : indexBuckets.entrySet()) {
 				IndexBucket bucket = entry.getValue();
 				if (!bucket.locked.get()) {
-					int cached = bucket.indexWriter.numRamDocs();
-					bucket.indexWriter.commit();
-					if (cached != 0) {
-						logger.debug("Synch has committed {} {} changes to Lucene - now have {} documents indexed",
-								cached, entry.getKey(), bucket.indexWriter.getDocStats().numDocs);
-					}
-					bucket.searcherManager.maybeRefreshBlocking();
+					bucket.commit("Synch", entry.getKey());
 				}
 			}
 		} catch (IOException e) {
@@ -366,34 +543,6 @@ public void commit() throws LuceneException {
 		}
 	}
 
-	private IndexBucket createBucket(String name) {
-		try {
-			IndexBucket bucket = new IndexBucket();
-			FSDirectory directory = FSDirectory.open(luceneDirectory.resolve(name));
-			bucket.directory = directory;
-			IndexWriterConfig config = new IndexWriterConfig(analyzer);
-			IndexWriter iwriter = new IndexWriter(directory, config);
-			String[] files = directory.listAll();
-			if (files.length == 1 && files[0].equals("write.lock")) {
-				logger.debug("Directory only has the write.lock file so store and delete a dummy document");
-				Document doc = new Document();
-				doc.add(new StringField("dummy", "dummy", Store.NO));
-				iwriter.addDocument(doc);
-				iwriter.commit();
-				iwriter.deleteDocuments(new Term("dummy", "dummy"));
-				iwriter.commit();
-				logger.debug("Now have " + iwriter.getDocStats().numDocs + " documents indexed");
-			}
-			bucket.indexWriter = iwriter;
-			bucket.searcherManager = new SearcherManager(iwriter, false, false, null);
-			logger.debug("Bucket for {} is now ready", name);
-			return bucket;
-		} catch (Throwable e) {
-			logger.error("Can't continue " + e.getClass() + " " + e.getMessage());
-			return null;
-		}
-	}
-
 	@POST
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
@@ -406,7 +555,7 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("maxRes
 			uid = bucketNum.getAndIncrement();
 			Search search = new Search();
 			searches.put(uid, search);
-			Map<String, IndexSearcher> map = new HashMap<>();
+			Map<String, DirectoryReader[]> map = new HashMap<>();
 			search.map = map;
 
 			try (JsonReader r = Json.createReader(request.getInputStream())) {
@@ -441,10 +590,10 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("maxRes
 							Occur.MUST);
 				}
 
-				if (o.containsKey("params")) {
-					JsonArray params = o.getJsonArray("params");
+				if (o.containsKey("parameters")) {
+					JsonArray parameters = o.getJsonArray("parameters");
 					IndexSearcher datafileParameterSearcher = getSearcher(map, "DatafileParameter");
-					for (JsonValue p : params) {
+					for (JsonValue p : parameters) {
 						BooleanQuery.Builder paramQuery = parseParameter(p);
 						Query toQuery = JoinUtil.createJoinQuery("datafile", false, "id", paramQuery.build(),
 								datafileParameterSearcher, ScoreMode.None);
@@ -492,7 +641,7 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("maxResu
 			uid = bucketNum.getAndIncrement();
 			Search search = new Search();
 			searches.put(uid, search);
-			Map<String, IndexSearcher> map = new HashMap<>();
+			Map<String, DirectoryReader[]> map = new HashMap<>();
 			search.map = map;
 			try (JsonReader r = Json.createReader(request.getInputStream())) {
 				JsonObject o = r.readObject();
@@ -526,10 +675,10 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("maxResu
 							Occur.MUST);
 				}
 
-				if (o.containsKey("params")) {
-					JsonArray params = o.getJsonArray("params");
+				if (o.containsKey("parameters")) {
+					JsonArray parameters = o.getJsonArray("parameters");
 					IndexSearcher datasetParameterSearcher = getSearcher(map, "DatasetParameter");
-					for (JsonValue p : params) {
+					for (JsonValue p : parameters) {
 						BooleanQuery.Builder paramQuery = parseParameter(p);
 						Query toQuery = JoinUtil.createJoinQuery("dataset", false, "id", paramQuery.build(),
 								datasetParameterSearcher, ScoreMode.None);
@@ -574,12 +723,8 @@ private void exit() {
 			timer = null; // This seems to be necessary to make it really stop
 		}
 		try {
-			for (Entry<String, IndexBucket> entry : indexBuckets.entrySet()) {
-				IndexBucket bucket = entry.getValue();
-				bucket.searcherManager.close();
-				bucket.indexWriter.commit();
-				bucket.indexWriter.close();
-				bucket.directory.close();
+			for (IndexBucket bucket : indexBuckets.values()) {
+				bucket.close();
 			}
 			logger.info("Closed down icat.lucene");
 		} catch (Exception e) {
@@ -592,13 +737,12 @@ private void exit() {
 	public void freeSearcher(@PathParam("uid") Long uid) throws LuceneException {
 		if (uid != null) { // May not be set for internal calls
 			logger.debug("Requesting freeSearcher {}", uid);
-			Map<String, IndexSearcher> search = searches.get(uid).map;
-			for (Entry<String, IndexSearcher> entry : search.entrySet()) {
+			Map<String, DirectoryReader[]> search = searches.get(uid).map;
+			for (Entry<String, DirectoryReader[]> entry : search.entrySet()) {
 				String name = entry.getKey();
-				IndexSearcher isearcher = entry.getValue();
-				SearcherManager manager = indexBuckets.computeIfAbsent(name, k -> createBucket(k)).searcherManager;
+				DirectoryReader[] subReaders = entry.getValue();
 				try {
-					manager.release(isearcher);
+					indexBuckets.computeIfAbsent(name, k -> new IndexBucket(k)).releaseReaders(subReaders);
 				} catch (IOException e) {
 					throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 				}
@@ -610,14 +754,14 @@ public void freeSearcher(@PathParam("uid") Long uid) throws LuceneException {
 	/*
 	 * Need a new set of IndexSearchers for each search as identified by a uid
 	 */
-	private IndexSearcher getSearcher(Map<String, IndexSearcher> bucket, String name) throws IOException {
-		IndexSearcher isearcher = bucket.get(name);
-		if (isearcher == null) {
-			isearcher = indexBuckets.computeIfAbsent(name, k -> createBucket(k)).searcherManager.acquire();
-			bucket.put(name, isearcher);
+	private IndexSearcher getSearcher(Map<String, DirectoryReader[]> bucket, String name) throws IOException {
+		DirectoryReader[] subReaders = bucket.get(name);
+		if (subReaders == null) {
+			subReaders = indexBuckets.computeIfAbsent(name, k -> new IndexBucket(k)).acquireReaders();
+			bucket.put(name, subReaders);
 			logger.debug("Remember searcher for {}", name);
 		}
-		return isearcher;
+		return new IndexSearcher(new MultiReader(subReaders, false));
 	}
 
 	@PostConstruct
@@ -633,6 +777,7 @@ private void init() {
 			}
 
 			luceneCommitMillis = props.getPositiveInt("commitSeconds") * 1000;
+			luceneMaxShardSize = Math.max(props.getPositiveLong("maxShardSize"), new Long(Integer.MAX_VALUE + 1));
 
 			analyzer = new IcatAnalyzer();
 
@@ -674,7 +819,7 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 			uid = bucketNum.getAndIncrement();
 			Search search = new Search();
 			searches.put(uid, search);
-			Map<String, IndexSearcher> map = new HashMap<>();
+			Map<String, DirectoryReader[]> map = new HashMap<>();
 			search.map = map;
 			try (JsonReader r = Json.createReader(request.getInputStream())) {
 				JsonObject o = r.readObject();
@@ -703,11 +848,11 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("m
 							Occur.MUST);
 				}
 
-				if (o.containsKey("params")) {
-					JsonArray params = o.getJsonArray("params");
+				if (o.containsKey("parameters")) {
+					JsonArray parameters = o.getJsonArray("parameters");
 					IndexSearcher investigationParameterSearcher = getSearcher(map, "InvestigationParameter");
 
-					for (JsonValue p : params) {
+					for (JsonValue p : parameters) {
 						BooleanQuery.Builder paramQuery = parseParameter(p);
 						Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", paramQuery.build(),
 								investigationParameterSearcher, ScoreMode.None);
@@ -773,13 +918,15 @@ public String investigationsAfter(@PathParam("uid") long uid, @QueryParam("maxRe
 	@Path("lock/{entityName}")
 	public void lock(@PathParam("entityName") String entityName) throws LuceneException {
 		logger.info("Requesting lock of {} index", entityName);
-		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> createBucket(k));
+		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
 
 		if (!bucket.locked.compareAndSet(false, true)) {
 			throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE, "Lucene already locked for " + entityName);
 		}
 		try {
-			bucket.indexWriter.deleteAll();
+			for (ShardBucket shardBucket : bucket.shardMap.values()) {
+				shardBucket.indexWriter.deleteAll();
+			}
 		} catch (IOException e) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
@@ -792,7 +939,13 @@ private String luceneSearchResult(String name, Search search, int maxResults, Lo
 		TopDocs topDocs = search.lastDoc == null ? isearcher.search(search.query, maxResults)
 				: isearcher.searchAfter(search.lastDoc, search.query, maxResults);
 		ScoreDoc[] hits = topDocs.scoreDocs;
-		logger.debug("Hits " + topDocs.totalHits + " maxscore " + topDocs.scoreDocs[0].score);
+		Float maxScore;
+		if (hits.length == 0) {
+			maxScore = Float.NaN;
+		} else {
+			maxScore = hits[0].score;
+		}
+		logger.debug("Hits " + topDocs.totalHits + " maxscore " + maxScore);
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
 			gen.writeStartObject();
@@ -841,9 +994,11 @@ private Builder parseParameter(JsonValue p) {
 		String pLowerDateValue = parameter.getString("lowerDateValue", null);
 		String pUpperDateValue = parameter.getString("upperDateValue", null);
 		Double pLowerNumericValue = parameter.containsKey("lowerNumericValue")
-				? parameter.getJsonNumber("lowerNumericValue").doubleValue() : null;
+				? parameter.getJsonNumber("lowerNumericValue").doubleValue()
+				: null;
 		Double pUpperNumericValue = parameter.containsKey("upperNumericValue")
-				? parameter.getJsonNumber("upperNumericValue").doubleValue() : null;
+				? parameter.getJsonNumber("upperNumericValue").doubleValue()
+				: null;
 		if (pStringValue != null) {
 			paramQuery.add(new WildcardQuery(new Term("stringValue", pStringValue)), Occur.MUST);
 		} else if (pLowerDateValue != null && pUpperDateValue != null) {
@@ -861,19 +1016,13 @@ private Builder parseParameter(JsonValue p) {
 	@Path("unlock/{entityName}")
 	public void unlock(@PathParam("entityName") String entityName) throws LuceneException {
 		logger.debug("Requesting unlock of {} index", entityName);
-		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> createBucket(k));
+		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
 		if (!bucket.locked.compareAndSet(true, false)) {
 			throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 					"Lucene is not currently locked for " + entityName);
 		}
 		try {
-			int cached = bucket.indexWriter.numRamDocs();
-			bucket.indexWriter.commit();
-			if (cached != 0) {
-				logger.debug("Unlock has committed {} {} changes to Lucene - now have {} documents indexed", cached,
-						entityName, bucket.indexWriter.getDocStats().numDocs);
-			}
-			bucket.searcherManager.maybeRefreshBlocking();
+			bucket.commit("Unlock", entityName);
 		} catch (IOException e) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
diff --git a/src/main/resources/run.properties b/src/main/resources/run.properties
index b010790..4aeab39 100644
--- a/src/main/resources/run.properties
+++ b/src/main/resources/run.properties
@@ -3,4 +3,5 @@
 
 directory     = ${HOME}/data/lucene
 commitSeconds = 5
+maxShardSize  = 2147483648
 ip            = 127.0.0.1/32

From 9477ea84b4ef5e89cbc044ec33627b11de3fb757 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 6 Apr 2022 03:30:42 +0000
Subject: [PATCH 32/73] Rename JSON keys for clarity over id #18

---
 .../java/org/icatproject/lucene/Lucene.java   | 60 ++++++++++++-------
 1 file changed, 38 insertions(+), 22 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index ba9c859..400caf1 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -71,7 +71,6 @@
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.SearcherManager;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TermQuery;
@@ -457,7 +456,7 @@ private void add(HttpServletRequest request, String entityName, When when, JsonP
 				} else if (fType == FieldType.SortedDocValuesField) {
 					// Any field we sort on must be stored to enable searching after
 					doc.add(new SortedDocValuesField(name, new BytesRef(value)));
-					doc.add(new StoredField(name, value));
+					doc.add(new StoredField(name, value)); // TODO potentially remove this, or the version in LuceneApi
 				} else if (fType == FieldType.DoublePoint) {
 					doc.add(new DoublePoint(name, dvalue));
 					if (store == Store.YES) {
@@ -710,6 +709,41 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("search_
 
 	}
 
+	/**
+	 * Encodes core Lucene information (keys preceded by underscores) and a
+	 * selection of the Document's source fields to JSON to be returned to
+	 * icat.server. Note that "_id" is the Lucene Document id, and should not be
+	 * confused with the ICAT entity id, which should be denoted by the key "id"
+	 * within the "_source" object.
+	 * 
+	 * @param gen      JsonGenerator to encode the information to.
+	 * @param hit      ScoreDoc representing a single search result.
+	 * @param searcher IndexSearcher used to get the Document for the hit.
+	 * @param search   Search object containing the fields to return.
+	 * @throws IOException
+	 */
+	private void encodeResult(JsonGenerator gen, ScoreDoc hit, IndexSearcher searcher, Search search)
+			throws IOException {
+		int luceneDocId = hit.doc;
+		Document document = searcher.doc(luceneDocId);
+		gen.writeStartObject().write("_id", luceneDocId);
+		Float score = hit.score;
+		if (!score.equals(Float.NaN)) {
+			gen.write("_score", hit.score);
+		}
+		gen.writeStartObject("_source");
+		document.forEach((field) -> {
+			if (search.fields.contains(field.name())) {
+				if (field.stringValue() != null) {
+					gen.write(field.name(), field.stringValue());
+				} else if (field.numericValue() != null) {
+					gen.write(field.name(), field.numericValue().doubleValue());
+				}
+			}
+		});
+		gen.writeEnd().writeEnd(); // source object, result object
+	}
+
 	@PreDestroy
 	private void exit() {
 		logger.info("Closing down icat.lucene");
@@ -950,27 +984,9 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 		logger.debug("Hits " + totalHits + " maxscore " + maxScore);
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
-			gen.writeStartObject();
-			gen.writeStartArray("results");
+			gen.writeStartObject().writeStartArray("results");
 			for (ScoreDoc hit : hits) {
-				Document doc = isearcher.doc(hit.doc);
-				gen.writeStartObject().write("id", Long.parseLong(doc.get("id")));
-				Float score = hit.score;
-				if (!score.equals(Float.NaN)) {
-					gen.write("score", hit.score);
-				}
-				gen.writeStartObject("source");
-				doc.forEach((field) -> {
-					if (search.fields.contains(field.name())) {
-						if (field.stringValue() != null) {
-							gen.write(field.name(), field.stringValue());
-						} else if (field.numericValue() != null) {
-							gen.write(field.name(), field.numericValue().doubleValue());
-						}
-					}
-				});
-				gen.writeEnd();
-				gen.writeEnd(); // result object
+				encodeResult(gen, hit, isearcher, search);
 			}
 			gen.writeEnd(); // array results
 			if (hits.length == maxResults) {

From 434b66b8e5d33d93ee508da680abb106bfd1235f Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 8 Apr 2022 09:28:53 +0100
Subject: [PATCH 33/73] Text fields and related entities #30

---
 .../java/org/icatproject/lucene/Lucene.java   | 777 +++++++++++-------
 1 file changed, 498 insertions(+), 279 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 400caf1..4a69314 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -8,13 +8,16 @@
 import java.net.HttpURLConnection;
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.TimeZone;
 import java.util.Map.Entry;
 import java.util.Timer;
 import java.util.TimerTask;
@@ -27,13 +30,13 @@
 import javax.ejb.Singleton;
 import javax.json.Json;
 import javax.json.JsonArray;
+import javax.json.JsonNumber;
 import javax.json.JsonObject;
 import javax.json.JsonReader;
 import javax.json.JsonString;
 import javax.json.JsonValue;
+import javax.json.JsonValue.ValueType;
 import javax.json.stream.JsonGenerator;
-import javax.json.stream.JsonParser;
-import javax.json.stream.JsonParser.Event;
 import javax.servlet.http.HttpServletRequest;
 import javax.ws.rs.Consumes;
 import javax.ws.rs.DELETE;
@@ -48,20 +51,21 @@
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.ReaderManager;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
-import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
-import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.FieldDoc;
@@ -73,8 +77,8 @@
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSortField;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.search.TotalHits;
@@ -83,6 +87,7 @@
 import org.apache.lucene.search.join.ScoreMode;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
 import org.icatproject.lucene.exceptions.LuceneException;
 import org.icatproject.utils.CheckedProperties;
 import org.slf4j.Logger;
@@ -94,14 +99,6 @@
 @Singleton
 public class Lucene {
 
-	enum AttributeName {
-		type, name, value, date, store
-	}
-
-	enum FieldType {
-		TextField, StringField, SortedDocValuesField, DoublePoint
-	}
-
 	private class ShardBucket {
 		private FSDirectory directory;
 		private IndexWriter indexWriter;
@@ -286,13 +283,88 @@ public class Search {
 		public Set<String> fields = new HashSet<String>();
 	}
 
-	enum When {
-		Now, Sometime
+	private static class ParentRelationship {
+		public String parentName;
+		public String fieldPrefix;
+
+		public ParentRelationship(String parentName, String fieldPrefix) {
+			this.parentName = parentName;
+			this.fieldPrefix = fieldPrefix;
+		}
+
 	}
 
 	private static final Logger logger = LoggerFactory.getLogger(Lucene.class);
-
 	private static final Marker fatal = MarkerFactory.getMarker("FATAL");
+	private static final SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
+
+	private static final Set<String> doubleFields = new HashSet<>();
+	private static final Set<String> longFields = new HashSet<>();
+	private static final Set<String> sortFields = new HashSet<>();
+	private static final Set<String> textFields = new HashSet<>();
+	private static final Set<String> indexedEntities = new HashSet<>();
+	private static final Map<String, ParentRelationship[]> relationships = new HashMap<>();
+
+	private static final IcatAnalyzer analyzer = new IcatAnalyzer();
+	private static final StandardQueryParser genericParser = new StandardQueryParser();
+	private static final StandardQueryParser datafileParser = new StandardQueryParser();
+	private static final StandardQueryParser datasetParser = new StandardQueryParser();
+	private static final StandardQueryParser investigationParser = new StandardQueryParser();
+	private static final StandardQueryParser sampleParser = new StandardQueryParser();
+
+	static {
+		TimeZone tz = TimeZone.getTimeZone("GMT");
+		df.setTimeZone(tz);
+
+		doubleFields.add("numericValue");
+		longFields.addAll(Arrays.asList("date", "startDate", "endDate", "dateTimeValue"));
+		sortFields.addAll(Arrays.asList("datafile.id", "dataset.id", "investigation.id", "id", "date", "startDate",
+				"endDate", "name"));
+		textFields.addAll(Arrays.asList("name", "visitId", "description", "datafileFormat.name", "sample.name",
+				"sample.type.name", "title", "summary", "facility.name", "user.fullName"));
+
+		indexedEntities.addAll(Arrays.asList("Datafile", "Dataset", "Investigation", "DatafileParameter",
+				"DatasetParameter", "InvestigationParameter", "InvestigationUser", "Sample"));
+
+		relationships.put("User", new ParentRelationship[] { new ParentRelationship("InvestigationUser", "user") });
+		relationships.put("Sample", new ParentRelationship[] { new ParentRelationship("Dataset", "sample") });
+		relationships.put("SampleType", new ParentRelationship[] { new ParentRelationship("Sample", "type"),
+				new ParentRelationship("Dataset", "sample.type") });
+		relationships.put("InvestigationType",
+				new ParentRelationship[] { new ParentRelationship("Investigation", "type") });
+		relationships.put("DatasetType", new ParentRelationship[] { new ParentRelationship("Dataset", "type") });
+		relationships.put("DatafileFormat",
+				new ParentRelationship[] { new ParentRelationship("Datafile", "datafileFormat") });
+		relationships.put("Facility", new ParentRelationship[] { new ParentRelationship("Investigation", "facility") });
+		relationships.put("ParameterType",
+				new ParentRelationship[] { new ParentRelationship("DatafileParameter", "type"),
+						new ParentRelationship("DatasetParameter", "type"),
+						new ParentRelationship("InvestigationParameter", "type") });
+
+		genericParser.setAllowLeadingWildcard(true);
+		genericParser.setAnalyzer(analyzer);
+
+		CharSequence[] datafileFields = { "name", "description", "doi", "datafileFormat.name" };
+		datafileParser.setAllowLeadingWildcard(true);
+		datafileParser.setAnalyzer(analyzer);
+		datafileParser.setMultiFields(datafileFields);
+
+		CharSequence[] datasetFields = { "name", "description", "doi", "sample.name", "sample.type.name", "type.name" };
+		datasetParser.setAllowLeadingWildcard(true);
+		datasetParser.setAnalyzer(analyzer);
+		datasetParser.setMultiFields(datasetFields);
+
+		CharSequence[] investigationFields = { "name", "visitId", "title", "summary", "doi", "facility.name",
+				"type.name" };
+		investigationParser.setAllowLeadingWildcard(true);
+		investigationParser.setAnalyzer(analyzer);
+		investigationParser.setMultiFields(investigationFields);
+
+		CharSequence[] sampleFields = { "sample.name", "sample.type.name" };
+		sampleParser.setAllowLeadingWildcard(true);
+		sampleParser.setAnalyzer(analyzer);
+		sampleParser.setMultiFields(sampleFields);
+	}
 
 	private java.nio.file.Path luceneDirectory;
 
@@ -301,12 +373,9 @@ enum When {
 
 	private AtomicLong bucketNum = new AtomicLong();
 	private Map<String, IndexBucket> indexBuckets = new ConcurrentHashMap<>();
-	private StandardQueryParser parser;
 
 	private Timer timer;
 
-	private IcatAnalyzer analyzer;
-
 	private Map<Long, Search> searches = new ConcurrentHashMap<>();
 
 	/**
@@ -333,48 +402,26 @@ public void modify(@Context HttpServletRequest request) throws LuceneException {
 
 		logger.debug("Requesting modify");
 		int count = 0;
-
-		try (JsonParser parser = Json.createParser(request.getInputStream())) {
-
-			Event ev = parser.next();
-			if (ev != Event.START_ARRAY) {
-				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Unexpected " + ev.name());
-			}
-			ev = parser.next();
-
-			while (true) {
-				if (ev == Event.END_ARRAY) {
-					break;
-				}
-				if (ev != Event.START_ARRAY) {
-					throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Unexpected " + ev.name());
-				}
-				ev = parser.next();
-				String entityName = parser.getString();
-				ev = parser.next();
-				Long id = (ev == Event.VALUE_NULL) ? null : parser.getLong();
-				ev = parser.next();
-				if (ev == Event.VALUE_NULL) {
-					try {
-						IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
-						if (bucket.locked.get()) {
-							throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
-									"Lucene locked for " + entityName);
-						}
-						ShardBucket shardBucket = bucket.routeShard(id);
-						shardBucket.indexWriter.deleteDocuments(new Term("id", Long.toString(id)));
-					} catch (IOException e) {
-						throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-					}
+		try (JsonReader reader = Json.createReader(request.getInputStream())) {
+			List<JsonObject> operations = reader.readArray().getValuesAs(JsonObject.class);
+			for (JsonObject operation : operations) {
+				if (operation.size() != 1) {
+					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+							"Operation object should only have one key/value pair, but request had "
+									+ operation.size());
+				} else if (operation.containsKey("create")) {
+					create(operation.getJsonObject("create"));
+				} else if (operation.containsKey("update")) {
+					update(operation.getJsonObject("update"));
+				} else if (operation.containsKey("delete")) {
+					delete(operation.getJsonObject("delete"));
 				} else {
-					add(request, entityName, When.Sometime, parser, id);
+					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+							"Operation key should be one of 'create', 'update', 'delete', but it was "
+									+ operation.keySet());
 				}
-				ev = parser.next(); // end of triple
-				count++;
-				ev = parser.next(); // either end of input or start of new
-									// triple
 			}
-
+			count = operations.size();
 		} catch (IOException e) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
@@ -382,115 +429,6 @@ public void modify(@Context HttpServletRequest request) throws LuceneException {
 
 	}
 
-	/* if id is not null this is actually an update */
-	private void add(HttpServletRequest request, String entityName, When when, JsonParser parser, Long id)
-			throws LuceneException, IOException {
-
-		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
-
-		AttributeName attName = null;
-		FieldType fType = null;
-		String name = null;
-		String value = null;
-		Double dvalue = null;
-		Store store = Store.YES;
-		Document doc = new Document();
-
-		parser.next(); // Skip the [
-		while (parser.hasNext()) {
-			Event ev = parser.next();
-			if (ev == Event.KEY_NAME) {
-				try {
-					attName = AttributeName.valueOf(parser.getString());
-				} catch (Exception e) {
-					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-							"Found unknown field type " + e.getMessage());
-				}
-			} else if (ev == Event.VALUE_STRING) {
-				if (attName == AttributeName.type) {
-					try {
-						fType = FieldType.valueOf(parser.getString());
-					} catch (Exception e) {
-						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-								"Found unknown field type " + e.getMessage());
-					}
-				} else if (attName == AttributeName.name) {
-					name = parser.getString();
-				} else if (attName == AttributeName.value) {
-					value = parser.getString();
-				} else {
-					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, "Bad VALUE_STRING " + attName);
-				}
-			} else if (ev == Event.VALUE_NUMBER) {
-				long num = parser.getLong();
-				if (fType == FieldType.SortedDocValuesField) {
-					value = Long.toString(num);
-				} else if (fType == FieldType.DoublePoint) {
-					dvalue = parser.getBigDecimal().doubleValue();
-				} else {
-					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-							"Bad VALUE_NUMBER " + attName + " " + fType);
-				}
-			} else if (ev == Event.VALUE_TRUE) {
-				if (attName == AttributeName.store) {
-					store = Store.YES;
-				} else {
-					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, "Bad VALUE_TRUE " + attName);
-				}
-			} else if (ev == Event.VALUE_FALSE) {
-				if (attName == AttributeName.store) {
-					store = Store.NO;
-				} else {
-					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, "Bad VALUE_FALSE " + attName);
-				}
-			} else if (ev == Event.START_OBJECT) {
-				fType = null;
-				name = null;
-				value = null;
-				store = Store.YES;
-			} else if (ev == Event.END_OBJECT) {
-				if (fType == FieldType.TextField) {
-					doc.add(new TextField(name, value, store));
-				} else if (fType == FieldType.StringField) {
-					doc.add(new StringField(name, value, store));
-				} else if (fType == FieldType.SortedDocValuesField) {
-					// Any field we sort on must be stored to enable searching after
-					doc.add(new SortedDocValuesField(name, new BytesRef(value)));
-					doc.add(new StoredField(name, value)); // TODO potentially remove this, or the version in LuceneApi
-				} else if (fType == FieldType.DoublePoint) {
-					doc.add(new DoublePoint(name, dvalue));
-					if (store == Store.YES) {
-						doc.add(new StoredField(name, dvalue));
-					}
-				}
-			} else if (ev == Event.END_ARRAY) {
-				if (id == null) {
-					if (bucket.locked.get() && when == When.Sometime) {
-						throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
-								"Lucene locked for " + entityName);
-					}
-					String documentId = doc.get("id");
-					if (documentId == null) {
-						logger.warn(
-								"Adding Document without an id field is not recommended, routing, updates and deletions will not be available for this Document.");
-						bucket.getWriter(null).addDocument(doc);
-					} else {
-						bucket.getWriter(Long.valueOf(documentId)).addDocument(doc);
-					}
-				} else {
-					if (bucket.locked.get()) {
-						throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
-								"Lucene locked for " + entityName);
-					}
-					bucket.getWriter(id).updateDocument(new Term("id", id.toString()), doc);
-				}
-				return;
-			} else {
-				throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, "Unexpected token in Json: " + ev);
-			}
-		}
-	}
-
 	/**
 	 * Expect an array of documents each encoded as an array of things to add to
 	 * the document
@@ -500,22 +438,48 @@ private void add(HttpServletRequest request, String entityName, When when, JsonP
 	@Path("addNow/{entityName}")
 	public void addNow(@Context HttpServletRequest request, @PathParam("entityName") String entityName)
 			throws LuceneException {
+		List<JsonObject> documents;
 		logger.debug("Requesting addNow of {}", entityName);
-		int count = 0;
-		try (JsonParser parser = Json.createParser(request.getInputStream())) {
-			Event ev = parser.next(); // Opening [
-			while (true) {
-				ev = parser.next(); // Final ] or another document
-				if (ev == Event.END_ARRAY) {
-					break;
-				}
-				add(request, entityName, When.Now, parser, null);
-				count++;
+		try (JsonReader reader = Json.createReader(request.getInputStream())) {
+			documents = reader.readArray().getValuesAs(JsonObject.class);
+			for (JsonObject document : documents) {
+				createNow(entityName, document);
 			}
 		} catch (IOException e) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
-		logger.debug("Added {} {} documents", count, entityName);
+		logger.debug("Added {} {} documents", documents.size(), entityName);
+	}
+
+	/**
+	 * Extracts values from queryJson in order to add one or more range query terms
+	 * using queryBuilder.
+	 * 
+	 * Note that values in queryJson are expected to be precise only to the minute,
+	 * and so to ensure that our range is inclusive, we add 59.999 seconds onto the
+	 * upper value only.
+	 * 
+	 * If either upper or lower keys do not yield values then a half open range is
+	 * created. If both are absent, then nothing is added to the query.
+	 * 
+	 * @param queryBuilder Builder for the Lucene query.
+	 * @param queryJson    JsonObject representing the query parameters.
+	 * @param lowerKey     Key in queryJson of the lower date value
+	 * @param upperKey     Key in queryJson of the upper date value
+	 * @param fields       Name of one or more fields to apply the range query to.
+	 * @throws LuceneException
+	 */
+	private static void buildDateRanges(Builder queryBuilder, JsonObject queryJson, String lowerKey, String upperKey,
+			String... fields) throws LuceneException {
+		Long lower = parseDate(queryJson, lowerKey, 0);
+		Long upper = parseDate(queryJson, upperKey, 59999);
+		if (lower != null || upper != null) {
+			lower = (lower == null) ? Long.MIN_VALUE : lower;
+			upper = (upper == null) ? Long.MAX_VALUE : upper;
+			for (String field : fields) {
+				queryBuilder.add(LongPoint.newRangeQuery(field, lower, upper), Occur.MUST);
+			}
+		}
 	}
 
 	/*
@@ -562,6 +526,37 @@ public void commit() throws LuceneException {
 		}
 	}
 
+	private void create(JsonObject operationBody) throws NumberFormatException, IOException, LuceneException {
+		String entityName = operationBody.getString("_index");
+		if (relationships.containsKey(entityName)) {
+			updateByRelation(operationBody, false);
+		}
+		if (indexedEntities.contains(entityName)) {
+			String icatId = operationBody.getString("_id");
+			Document document = parseDocument(operationBody.getJsonObject("doc"));
+			logger.trace("create {} {}", entityName, document.toString());
+			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
+			if (bucket.locked.get()) {
+				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
+						"Lucene locked for " + entityName);
+			}
+			bucket.getWriter(new Long(icatId)).addDocument(document);
+		}
+	}
+
+	private void createNow(String entityName, JsonObject documentJson)
+			throws NumberFormatException, IOException, LuceneException {
+		if (!documentJson.containsKey("id")) {
+			throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+					"id was not in the document keys " + documentJson.keySet());
+		}
+		String icatId = documentJson.getString("id");
+		Document document = parseDocument(documentJson);
+		logger.trace("create {} {}", entityName, document.toString());
+		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
+		bucket.getWriter(new Long(icatId)).addDocument(document);
+	}
+
 	@POST
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
@@ -586,37 +581,25 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("search
 				BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
 				if (userName != null) {
-					Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
-							new TermQuery(new Term("name", userName)), getSearcher(map, "InvestigationUser"),
+					Query iuQuery = JoinUtil.createJoinQuery("investigation.id", false, "investigation.id",
+							new TermQuery(new Term("user.name", userName)), getSearcher(map, "InvestigationUser"),
 							ScoreMode.None);
-
-					Query invQuery = JoinUtil.createJoinQuery("id", false, "investigation", iuQuery,
-							getSearcher(map, "Investigation"), ScoreMode.None);
-
-					Query dsQuery = JoinUtil.createJoinQuery("id", false, "dataset", invQuery,
-							getSearcher(map, "Dataset"), ScoreMode.None);
-
-					theQuery.add(dsQuery, Occur.MUST);
+					theQuery.add(iuQuery, Occur.MUST);
 				}
 
 				String text = query.getString("text", null);
 				if (text != null) {
-					theQuery.add(parser.parse(text, "text"), Occur.MUST);
+					theQuery.add(datafileParser.parse(text, null), Occur.MUST);
 				}
 
-				String lower = query.getString("lower", null);
-				String upper = query.getString("upper", null);
-				if (lower != null && upper != null) {
-					theQuery.add(new TermRangeQuery("date", new BytesRef(lower), new BytesRef(upper), true, true),
-							Occur.MUST);
-				}
+				buildDateRanges(theQuery, query, "lower", "upper", "date");
 
 				if (query.containsKey("parameters")) {
 					JsonArray parameters = query.getJsonArray("parameters");
 					IndexSearcher datafileParameterSearcher = getSearcher(map, "DatafileParameter");
 					for (JsonValue p : parameters) {
 						BooleanQuery.Builder paramQuery = parseParameter(p);
-						Query toQuery = JoinUtil.createJoinQuery("datafile", false, "id", paramQuery.build(),
+						Query toQuery = JoinUtil.createJoinQuery("datafile.id", false, "id", paramQuery.build(),
 								datafileParameterSearcher, ScoreMode.None);
 						theQuery.add(toQuery, Occur.MUST);
 					}
@@ -660,36 +643,26 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("search_
 
 				if (userName != null) {
 
-					Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
-							new TermQuery(new Term("name", userName)), getSearcher(map, "InvestigationUser"),
+					Query iuQuery = JoinUtil.createJoinQuery("investigation.id", false, "investigation.id",
+							new TermQuery(new Term("user.name", userName)), getSearcher(map, "InvestigationUser"),
 							ScoreMode.None);
 
-					Query invQuery = JoinUtil.createJoinQuery("id", false, "investigation", iuQuery,
-							getSearcher(map, "Investigation"), ScoreMode.None);
-
-					theQuery.add(invQuery, Occur.MUST);
+					theQuery.add(iuQuery, Occur.MUST);
 				}
 
 				String text = query.getString("text", null);
 				if (text != null) {
-					theQuery.add(parser.parse(text, "text"), Occur.MUST);
+					theQuery.add(datasetParser.parse(text, null), Occur.MUST);
 				}
 
-				String lower = query.getString("lower", null);
-				String upper = query.getString("upper", null);
-				if (lower != null && upper != null) {
-					theQuery.add(new TermRangeQuery("startDate", new BytesRef(lower), new BytesRef(upper), true, true),
-							Occur.MUST);
-					theQuery.add(new TermRangeQuery("endDate", new BytesRef(lower), new BytesRef(upper), true, true),
-							Occur.MUST);
-				}
+				buildDateRanges(theQuery, query, "lower", "upper", "startDate", "endDate");
 
 				if (query.containsKey("parameters")) {
 					JsonArray parameters = query.getJsonArray("parameters");
 					IndexSearcher datasetParameterSearcher = getSearcher(map, "DatasetParameter");
 					for (JsonValue p : parameters) {
 						BooleanQuery.Builder paramQuery = parseParameter(p);
-						Query toQuery = JoinUtil.createJoinQuery("dataset", false, "id", paramQuery.build(),
+						Query toQuery = JoinUtil.createJoinQuery("dataset.id", false, "id", paramQuery.build(),
 								datasetParameterSearcher, ScoreMode.None);
 						theQuery.add(toQuery, Occur.MUST);
 					}
@@ -709,6 +682,45 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("search_
 
 	}
 
+	private void delete(JsonObject operationBody) throws LuceneException, IOException {
+		String entityName = operationBody.getString("_index");
+		if (relationships.containsKey(entityName)) {
+			updateByRelation(operationBody, true);
+		}
+		if (indexedEntities.contains(entityName)) {
+			String icatId = operationBody.getString("_id");
+			try {
+				IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
+				if (bucket.locked.get()) {
+					throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
+							"Lucene locked for " + entityName);
+				}
+				logger.trace("delete {} {}", entityName, icatId);
+				ShardBucket shardBucket = bucket.routeShard(new Long(icatId));
+				shardBucket.indexWriter.deleteDocuments(new Term("id", icatId));
+			} catch (IOException e) {
+				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+			}
+		}
+	}
+
+	/**
+	 * Converts String into number of ms since epoch.
+	 * 
+	 * @param value String representing a Date in the format "yyyyMMddHHmm".
+	 * @return Number of ms since epoch, or null if value was null
+	 * @throws java.text.ParseException
+	 */
+	protected static Long decodeTime(String value) throws java.text.ParseException {
+		if (value == null) {
+			return null;
+		} else {
+			synchronized (df) {
+				return df.parse(value).getTime();
+			}
+		}
+	}
+
 	/**
 	 * Encodes core Lucene information (keys preceded by underscores) and a
 	 * selection of the Document's source fields to JSON to be returned to
@@ -733,11 +745,14 @@ private void encodeResult(JsonGenerator gen, ScoreDoc hit, IndexSearcher searche
 		}
 		gen.writeStartObject("_source");
 		document.forEach((field) -> {
-			if (search.fields.contains(field.name())) {
-				if (field.stringValue() != null) {
-					gen.write(field.name(), field.stringValue());
-				} else if (field.numericValue() != null) {
-					gen.write(field.name(), field.numericValue().doubleValue());
+			String fieldName = field.name();
+			if (search.fields.contains(fieldName)) {
+				if (longFields.contains(fieldName)) {
+					gen.write(fieldName, field.numericValue().longValue());
+				} else if (doubleFields.contains(fieldName)) {
+					gen.write(fieldName, field.numericValue().doubleValue());
+				} else {
+					gen.write(fieldName, field.stringValue());
 				}
 			}
 		});
@@ -784,11 +799,11 @@ public void freeSearcher(@PathParam("uid") Long uid) throws LuceneException {
 	/*
 	 * Need a new set of IndexSearchers for each search as identified by a uid
 	 */
-	private IndexSearcher getSearcher(Map<String, DirectoryReader[]> bucket, String name) throws IOException {
-		DirectoryReader[] subReaders = bucket.get(name);
+	private IndexSearcher getSearcher(Map<String, DirectoryReader[]> map, String name) throws IOException {
+		DirectoryReader[] subReaders = map.get(name);
 		if (subReaders == null) {
 			subReaders = indexBuckets.computeIfAbsent(name, k -> new IndexBucket(k)).acquireReaders();
-			bucket.put(name, subReaders);
+			map.put(name, subReaders);
 			logger.debug("Remember searcher for {}", name);
 		}
 		return new IndexSearcher(new MultiReader(subReaders, false));
@@ -809,13 +824,6 @@ private void init() {
 			luceneCommitMillis = props.getPositiveInt("commitSeconds") * 1000;
 			luceneMaxShardSize = Math.max(props.getPositiveLong("maxShardSize"), new Long(Integer.MAX_VALUE + 1));
 
-			analyzer = new IcatAnalyzer();
-
-			parser = new StandardQueryParser();
-			StandardQueryConfigHandler qpConf = (StandardQueryConfigHandler) parser.getQueryConfigHandler();
-			qpConf.set(ConfigurationKeys.ANALYZER, analyzer);
-			qpConf.set(ConfigurationKeys.ALLOW_LEADING_WILDCARD, true);
-
 			timer = new Timer("LuceneCommitTimer");
 			timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
 
@@ -860,25 +868,18 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 				BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
 				if (userName != null) {
-					Query iuQuery = JoinUtil.createJoinQuery("investigation", false, "id",
-							new TermQuery(new Term("name", userName)), getSearcher(map, "InvestigationUser"),
+					Query iuQuery = JoinUtil.createJoinQuery("investigation.id", false, "id",
+							new TermQuery(new Term("user.name", userName)), getSearcher(map, "InvestigationUser"),
 							ScoreMode.None);
 					theQuery.add(iuQuery, Occur.MUST);
 				}
 
 				String text = query.getString("text", null);
 				if (text != null) {
-					theQuery.add(parser.parse(text, "text"), Occur.MUST);
+					theQuery.add(investigationParser.parse(text, null), Occur.MUST);
 				}
 
-				String lower = query.getString("lower", null);
-				String upper = query.getString("upper", null);
-				if (lower != null && upper != null) {
-					theQuery.add(new TermRangeQuery("startDate", new BytesRef(lower), new BytesRef(upper), true, true),
-							Occur.MUST);
-					theQuery.add(new TermRangeQuery("endDate", new BytesRef(lower), new BytesRef(upper), true, true),
-							Occur.MUST);
-				}
+				buildDateRanges(theQuery, query, "lower", "upper", "startDate", "endDate");
 
 				if (query.containsKey("parameters")) {
 					JsonArray parameters = query.getJsonArray("parameters");
@@ -886,7 +887,7 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 
 					for (JsonValue p : parameters) {
 						BooleanQuery.Builder paramQuery = parseParameter(p);
-						Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", paramQuery.build(),
+						Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id", paramQuery.build(),
 								investigationParameterSearcher, ScoreMode.None);
 						theQuery.add(toQuery, Occur.MUST);
 					}
@@ -899,8 +900,8 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 					for (JsonValue s : samples) {
 						JsonString sample = (JsonString) s;
 						BooleanQuery.Builder sampleQuery = new BooleanQuery.Builder();
-						sampleQuery.add(parser.parse(sample.getString(), "text"), Occur.MUST);
-						Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", sampleQuery.build(),
+						sampleQuery.add(sampleParser.parse(sample.getString(), null), Occur.MUST);
+						Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id", sampleQuery.build(),
 								sampleSearcher, ScoreMode.None);
 						theQuery.add(toQuery, Occur.MUST);
 					}
@@ -909,9 +910,9 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 				String userFullName = query.getString("userFullName", null);
 				if (userFullName != null) {
 					BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();
-					userFullNameQuery.add(parser.parse(userFullName, "text"), Occur.MUST);
+					userFullNameQuery.add(genericParser.parse(userFullName, "user.fullName"), Occur.MUST);
 					IndexSearcher investigationUserSearcher = getSearcher(map, "InvestigationUser");
-					Query toQuery = JoinUtil.createJoinQuery("investigation", false, "id", userFullNameQuery.build(),
+					Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id", userFullNameQuery.build(),
 							investigationUserSearcher, ScoreMode.None);
 					theQuery.add(toQuery, Occur.MUST);
 				}
@@ -1000,22 +1001,35 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 					Document lastDocument = isearcher.doc(lastDoc.doc);
 					gen.writeStartArray("fields");
 					for (SortField sortField : fields) {
-						Type type = sortField.getType();
-						if (type.equals(Type.STRING)) {
-							String lastValue = lastDocument.get(sortField.getField());
-							if (lastValue == null) {
-								throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field "
-										+ sortField.getField()
-										+ " used for sorting was not present on the Lucene Document; all sortable fields must also be stored.");
-							}
-							gen.write(lastValue);
+						IndexableField indexableField = lastDocument.getField(sortField.getField());
+						if (indexableField == null) {
+							throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field "
+									+ sortField.getField()
+									+ " used for sorting was not present on the Lucene Document; all sortable fields must also be stored.");
+						}
+						Type type = (sortField instanceof SortedNumericSortField)
+								? ((SortedNumericSortField) sortField).getNumericType()
+								: sortField.getType();
+						switch (type) {
+							case LONG:
+								gen.write(indexableField.numericValue().longValue());
+								break;
+							case DOUBLE:
+								gen.write(indexableField.numericValue().doubleValue());
+								break;
+							case STRING:
+								gen.write(indexableField.stringValue());
+								break;
+							default:
+								throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
+										"SortField.Type must be one of LONG, DOUBLE, STRING, but it was " + type);
 						}
 					}
-					gen.writeEnd();
+					gen.writeEnd(); // end "fields" array
 				}
-				gen.writeEnd();
+				gen.writeEnd(); // end "search_after" object
 			}
-			gen.writeEnd(); // object
+			gen.writeEnd(); // end enclosing object
 		}
 		logger.debug("Json returned {}", baos.toString());
 		return baos.toString();
@@ -1030,34 +1044,167 @@ private Query maybeEmptyQuery(Builder theQuery) {
 		return query;
 	}
 
-	private Builder parseParameter(JsonValue p) {
+	/**
+	 * Parses a date/time value from jsonObject. Can account for either a Long
+	 * value, or a String value encoded in the format yyyyMMddHHmm.
+	 * 
+	 * @param jsonObject JsonObject containing the date to be parsed.
+	 * @param key        Key of the date/time value in jsonObject.
+	 * @param offset     In the case of STRING ValueType, add offset ms before
+	 *                   returning. This accounts for the fact the String format
+	 *                   used is only precise to minutes and not seconds.
+	 * @return null if jsonObject does not contain the key, number of ms since epoch
+	 *         otherwise.
+	 * @throws LuceneException If the ValueType is not NUMBER or STRING, or if a
+	 *                         STRING value cannot be parsed.
+	 */
+	private static Long parseDate(JsonObject jsonObject, String key, int offset) throws LuceneException {
+		if (jsonObject.containsKey(key)) {
+			ValueType valueType = jsonObject.get(key).getValueType();
+			switch (valueType) {
+				case STRING:
+					String dateString = jsonObject.getString(key);
+					try {
+						return decodeTime(dateString) + offset;
+					} catch (Exception e) {
+						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+								"Could not parse date " + dateString + " using expected format yyyyMMddHHmm");
+					}
+				case NUMBER:
+					return jsonObject.getJsonNumber(key).longValueExact();
+				default:
+					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+							"Dates should be represented by a NUMBER or STRING JsonValue, but got " + valueType);
+			}
+		}
+		return null;
+	}
+
+	/**
+	 * Builds a Lucene Document from the parsed json.
+	 * 
+	 * @param json Key value pairs of fields.
+	 * @return Lucene Document.
+	 */
+	private Document parseDocument(JsonObject json) {
+		Document document = new Document();
+		for (String key : json.keySet()) {
+			addField(json, document, key);
+		}
+		return document;
+	}
+
+	private void addField(JsonObject json, Document document, String key) {
+		// SortedDocValuesField need to be indexed in addition to indexing a Field for
+		// searching/storing, so deal with that first
+		addSortField(json, document, key);
+
+		if (doubleFields.contains(key)) {
+			Double value = json.getJsonNumber(key).doubleValue();
+			document.add(new DoublePoint(key, value));
+			document.add(new StoredField(key, value));
+		} else if (longFields.contains(key)) {
+			Long value = json.getJsonNumber(key).longValueExact();
+			document.add(new LongPoint(key, value));
+			document.add(new StoredField(key, value));
+		} else if (textFields.contains(key)) {
+			document.add(new TextField(key, json.getString(key), Store.YES));
+		} else {
+			document.add(new StringField(key, json.getString(key), Store.YES));
+		}
+	}
+
+	private void addSortField(JsonObject json, Document document, String key) {
+		if (sortFields.contains(key)) {
+			if (longFields.contains(key)) {
+				document.add(new SortedNumericDocValuesField(key, json.getJsonNumber(key).longValueExact()));
+			} else if (doubleFields.contains(key)) {
+				long sortableLong = NumericUtils.doubleToSortableLong(json.getJsonNumber(key).doubleValue());
+				document.add(new SortedNumericDocValuesField(key, sortableLong));
+			} else {
+				document.add(new SortedDocValuesField(key, new BytesRef(json.getString(key))));
+			}
+		}
+	}
+
+	private void addSortField(IndexableField field, Document document) {
+		String key = field.name();
+		if (sortFields.contains(key)) {
+			if (longFields.contains(key)) {
+				document.add(new SortedNumericDocValuesField(key, field.numericValue().longValue()));
+			} else if (doubleFields.contains(key)) {
+				long sortableLong = NumericUtils.doubleToSortableLong(field.numericValue().doubleValue());
+				document.add(new SortedNumericDocValuesField(key, sortableLong));
+			} else {
+				document.add(new SortedDocValuesField(key, new BytesRef(field.stringValue())));
+			}
+		}
+	}
+
+	/**
+	 * Returns a new Lucene Document that has the same fields as were present in
+	 * oldDocument, except in cases where json has an entry for that field. In this
+	 * case, the json value is used instead.
+	 * 
+	 * @param json        Key value pairs of fields to overwrite fields already
+	 *                    present in oldDocument.
+	 * @param oldDocument Lucene Document to be updated.
+	 * @return Lucene Document with updated fields.
+	 */
+	private Document updateDocument(JsonObject json, Document oldDocument) {
+		Document newDocument = new Document();
+		for (IndexableField field : oldDocument.getFields()) {
+			String fieldName = field.name();
+			if (json.keySet().contains(fieldName)) {
+				addField(json, newDocument, fieldName);
+			} else {
+				addSortField(field, newDocument);
+				newDocument.add(field);
+			}
+		}
+		return newDocument;
+	}
+
+	/**
+	 * Returns a new Lucene Document that has the same fields as were present in
+	 * oldDocument, except in cases where the field name starts with fieldPrefix.
+	 * 
+	 * @param fieldPrefix Any fields with a name starting with this String will not
+	 *                    be present in the returned Document.
+	 * @param oldDocument Lucene Document to be pruned.
+	 * @return Lucene Document with pruned fields.
+	 */
+	private Document pruneDocument(String fieldPrefix, Document oldDocument) {
+		Document newDocument = new Document();
+		for (IndexableField field : oldDocument.getFields()) {
+			if (!field.name().startsWith(fieldPrefix)) {
+				addSortField(field, newDocument);
+				newDocument.add(field);
+			}
+		}
+		return newDocument;
+	}
+
+	private Builder parseParameter(JsonValue p) throws LuceneException {
 		JsonObject parameter = (JsonObject) p;
 		BooleanQuery.Builder paramQuery = new BooleanQuery.Builder();
 		String pName = parameter.getString("name", null);
 		if (pName != null) {
-			paramQuery.add(new WildcardQuery(new Term("name", pName)), Occur.MUST);
+			paramQuery.add(new WildcardQuery(new Term("type.name", pName)), Occur.MUST);
 		}
 
 		String pUnits = parameter.getString("units", null);
 		if (pUnits != null) {
-			paramQuery.add(new WildcardQuery(new Term("units", pUnits)), Occur.MUST);
+			paramQuery.add(new WildcardQuery(new Term("type.units", pUnits)), Occur.MUST);
 		}
-		String pStringValue = parameter.getString("stringValue", null);
-		String pLowerDateValue = parameter.getString("lowerDateValue", null);
-		String pUpperDateValue = parameter.getString("upperDateValue", null);
-		Double pLowerNumericValue = parameter.containsKey("lowerNumericValue")
-				? parameter.getJsonNumber("lowerNumericValue").doubleValue()
-				: null;
-		Double pUpperNumericValue = parameter.containsKey("upperNumericValue")
-				? parameter.getJsonNumber("upperNumericValue").doubleValue()
-				: null;
-		if (pStringValue != null) {
+		if (parameter.containsKey("stringValue")) {
+			String pStringValue = parameter.getString("stringValue", null);
 			paramQuery.add(new WildcardQuery(new Term("stringValue", pStringValue)), Occur.MUST);
-		} else if (pLowerDateValue != null && pUpperDateValue != null) {
-			paramQuery.add(new TermRangeQuery("dateTimeValue", new BytesRef(pLowerDateValue),
-					new BytesRef(pUpperDateValue), true, true), Occur.MUST);
-
-		} else if (pLowerNumericValue != null && pUpperNumericValue != null) {
+		} else if (parameter.containsKey("lowerDateValue") && parameter.containsKey("upperDateValue")) {
+			buildDateRanges(paramQuery, parameter, "lowerDateValue", "upperDateValue", "dateTimeValue");
+		} else if (parameter.containsKey("lowerNumericValue") && parameter.containsKey("upperNumericValue")) {
+			Double pLowerNumericValue = parameter.getJsonNumber("lowerNumericValue").doubleValue();
+			Double pUpperNumericValue = parameter.getJsonNumber("upperNumericValue").doubleValue();
 			paramQuery.add(DoublePoint.newRangeQuery("numericValue", pLowerNumericValue, pUpperNumericValue),
 					Occur.MUST);
 		}
@@ -1092,7 +1239,13 @@ private Sort parseSort(String sort) throws LuceneException {
 							"Sort order must be 'asc' or 'desc' but it was '" + order + "'");
 				}
 
-				fields.add(new SortField(key, Type.STRING, reverse));
+				if (longFields.contains(key)) {
+					fields.add(new SortedNumericSortField(key, Type.LONG, reverse));
+				} else if (doubleFields.contains(key)) {
+					fields.add(new SortedNumericSortField(key, Type.DOUBLE, reverse));
+				} else {
+					fields.add(new SortField(key, Type.STRING, reverse));
+				}
 			}
 			return new Sort(fields.toArray(new SortField[0]));
 		}
@@ -1107,8 +1260,10 @@ private Sort parseSort(String sort) throws LuceneException {
 	 *                    order.
 	 * @return FieldDoc object built from the provided String, or null if
 	 *         searchAfter was itself null or an empty String.
+	 * @throws LuceneException If an entry in the fields array is not a STRING or
+	 *                         NUMBER
 	 */
-	private FieldDoc parseSearchAfter(String searchAfter) {
+	private FieldDoc parseSearchAfter(String searchAfter) throws LuceneException {
 		if (searchAfter != null && !searchAfter.equals("")) {
 			logger.debug("Attempting to parseSearchAfter from {}", searchAfter);
 			JsonReader reader = Json.createReader(new StringReader(searchAfter));
@@ -1116,14 +1271,30 @@ private FieldDoc parseSearchAfter(String searchAfter) {
 			int doc = object.getInt("doc");
 			int shardIndex = object.getInt("shardIndex");
 			float score = Float.NaN;
-			List<BytesRef> fields = new ArrayList<>();
+			List<Object> fields = new ArrayList<>();
 			if (object.containsKey("score")) {
 				score = object.getJsonNumber("score").bigDecimalValue().floatValue();
 			}
 			if (object.containsKey("fields")) {
-				List<JsonString> jsonStrings = object.getJsonArray("fields").getValuesAs(JsonString.class);
-				for (JsonString jsonString : jsonStrings) {
-					fields.add(new BytesRef(jsonString.getString()));
+				JsonArray jsonArray = object.getJsonArray("fields");
+				for (JsonValue value : jsonArray) {
+					switch (value.getValueType()) {
+						case NUMBER:
+							JsonNumber number = ((JsonNumber) value);
+							if (number.toString().contains(".")) {
+								fields.add(number.doubleValue());
+							} else {
+								fields.add(number.longValueExact());
+							}
+							break;
+						case STRING:
+							fields.add(new BytesRef(((JsonString) value).getString()));
+							break;
+						default:
+							throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+									"fields should be an array of STRING and NUMBER, but had entry of type "
+											+ value.getValueType());
+					}
 				}
 			}
 			return new FieldDoc(doc, score, fields.toArray(), shardIndex);
@@ -1147,4 +1318,52 @@ public void unlock(@PathParam("entityName") String entityName) throws LuceneExce
 		}
 	}
 
+	private void update(JsonObject operationBody) throws LuceneException, NumberFormatException, IOException {
+		String entityName = operationBody.getString("_index");
+		if (relationships.containsKey(entityName)) {
+			updateByRelation(operationBody, false);
+		}
+		if (indexedEntities.contains(entityName)) {
+			String icatId = operationBody.getString("_id");
+			Document document = parseDocument(operationBody.getJsonObject("doc"));
+			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
+			if (bucket.locked.get()) {
+				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
+						"Lucene locked for " + entityName);
+			}
+			logger.trace("update: {}", document);
+			bucket.getWriter(new Long(icatId)).updateDocument(new Term("id", icatId), document);
+		}
+	}
+
+	private void updateByRelation(JsonObject operationBody, Boolean delete)
+			throws LuceneException, NumberFormatException, IOException {
+		for (ParentRelationship parentRelationship : relationships.get(operationBody.getString("_index"))) {
+			String childId = operationBody.getString("_id");
+			IndexBucket bucket = indexBuckets.computeIfAbsent(parentRelationship.parentName, k -> new IndexBucket(k));
+			if (bucket.locked.get()) {
+				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
+						"Lucene locked for " + parentRelationship.parentName);
+			}
+			IndexSearcher searcher = getSearcher(new HashMap<>(), parentRelationship.parentName);
+
+			int blockSize = 10000;
+			TermQuery query = new TermQuery(new Term(parentRelationship.fieldPrefix + ".id", childId));
+			Sort sort = new Sort(new SortField("id", Type.STRING));
+			ScoreDoc[] scoreDocs = searcher.search(query, blockSize, sort).scoreDocs;
+			while (scoreDocs.length != 0) {
+				TopDocs topDocs = searcher.search(query, blockSize);
+				for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
+					Document oldDocument = searcher.doc(scoreDoc.doc);
+					String parentId = oldDocument.get("id");
+					Document newDocument = delete ? pruneDocument(parentRelationship.fieldPrefix, oldDocument)
+							: updateDocument(operationBody.getJsonObject("doc"), oldDocument);
+					logger.trace("updateByRelation: {}", newDocument);
+					bucket.getWriter(new Long(parentId)).updateDocument(new Term("id", parentId), newDocument);
+				}
+				scoreDocs = searcher.searchAfter(scoreDocs[scoreDocs.length - 1], query, blockSize, sort).scoreDocs;
+			}
+		}
+	}
+
 }

From fbc99e667474342b1ba482a233c522ada98685c3 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 14 Apr 2022 00:21:02 +0100
Subject: [PATCH 34/73] Enable generic String and range facets #19

---
 .../lucene/FacetDimensionRequest.java         |  26 ++
 .../java/org/icatproject/lucene/Lucene.java   | 344 +++++++++++-------
 2 files changed, 239 insertions(+), 131 deletions(-)
 create mode 100644 src/main/java/org/icatproject/lucene/FacetDimensionRequest.java

diff --git a/src/main/java/org/icatproject/lucene/FacetDimensionRequest.java b/src/main/java/org/icatproject/lucene/FacetDimensionRequest.java
new file mode 100644
index 0000000..736f2d3
--- /dev/null
+++ b/src/main/java/org/icatproject/lucene/FacetDimensionRequest.java
@@ -0,0 +1,26 @@
+package org.icatproject.lucene;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.facet.range.Range;
+
+public class FacetDimensionRequest {
+
+	private String dimension;
+    private List<Range> ranges;
+
+	public FacetDimensionRequest(String dimension) {
+		this.dimension = dimension;
+        this.ranges = new ArrayList<>();
+	}
+
+	public List<Range> getRanges() {
+        return ranges;
+    }
+
+    public String getDimension() {
+		return dimension;
+	}
+
+}
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index c73d56e..6e118f6 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -52,9 +52,9 @@
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
@@ -63,8 +63,14 @@
 import org.apache.lucene.facet.FacetsCollector;
 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.facet.LabelAndValue;
+import org.apache.lucene.facet.range.DoubleRange;
+import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
+import org.apache.lucene.facet.range.LongRange;
+import org.apache.lucene.facet.range.LongRangeFacetCounts;
+import org.apache.lucene.facet.range.Range;
 import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@@ -86,6 +92,7 @@
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.TermInSetQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TopFieldDocs;
@@ -129,7 +136,7 @@ public ShardBucket(java.nio.file.Path shardPath) throws IOException {
 				logger.debug("Directory only has the write.lock file so store and delete a dummy document");
 				Document doc = new Document();
 				doc.add(new StringField("dummy", "dummy", Store.NO));
-				indexWriter.addDocument(doc);
+				indexWriter.addDocument(facetsConfig.build(doc));
 				indexWriter.commit();
 				indexWriter.deleteDocuments(new Term("dummy", "dummy"));
 				indexWriter.commit();
@@ -289,6 +296,7 @@ public class Search {
 		public Query query;
 		public Sort sort;
 		public Set<String> fields = new HashSet<String>();
+		public Set<FacetDimensionRequest> dimensions = new HashSet<FacetDimensionRequest>();
 	}
 
 	private static class ParentRelationship {
@@ -307,6 +315,7 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 	private static final SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
 
 	private static final Set<String> doubleFields = new HashSet<>();
+	private static final Set<String> facetFields = new HashSet<>();
 	private static final Set<String> longFields = new HashSet<>();
 	private static final Set<String> sortFields = new HashSet<>();
 	private static final Set<String> textFields = new HashSet<>();
@@ -325,6 +334,7 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 		df.setTimeZone(tz);
 
 		doubleFields.add("numericValue");
+		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name"));
 		longFields.addAll(Arrays.asList("date", "startDate", "endDate", "dateTimeValue"));
 		sortFields.addAll(Arrays.asList("datafile.id", "dataset.id", "investigation.id", "id", "date", "startDate",
 				"endDate", "name"));
@@ -550,7 +560,7 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 						"Lucene locked for " + entityName);
 			}
-			bucket.getWriter(new Long(icatId)).addDocument(document);
+			bucket.getWriter(new Long(icatId)).addDocument(facetsConfig.build(document));
 		}
 	}
 
@@ -564,7 +574,7 @@ private void createNow(String entityName, JsonObject documentJson)
 		Document document = parseDocument(documentJson);
 		logger.trace("create {} {}", entityName, document.toString());
 		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
-		bucket.getWriter(new Long(icatId)).addDocument(document);
+		bucket.getWriter(new Long(icatId)).addDocument(facetsConfig.build(document));
 	}
 
 	@POST
@@ -585,26 +595,8 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("search
 		}
 	}
 
-	@POST
-	@Consumes(MediaType.APPLICATION_JSON)
-	@Produces(MediaType.APPLICATION_JSON)
-	@Path("datafiles/facet")
-	public String datafilesFacet(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter, @QueryParam("maxResults") int maxResults,
-			@QueryParam("maxLabels") int maxLabels, @QueryParam("sort") String sort) throws LuceneException {
-		Long uid = null;
-		try {
-			uid = bucketNum.getAndIncrement();
-			Search search = datafilesQuery(request, sort, uid);
-			return luceneFacetResult("Datafile", search, searchAfter, maxResults, maxLabels, uid);
-		} catch (Exception e) {
-			logger.error("Error", e);
-			freeSearcher(uid);
-			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-		}
-
-	}
-
-	private Search datafilesQuery(HttpServletRequest request, String sort, Long uid) throws IOException, QueryNodeException, LuceneException {
+	private Search datafilesQuery(HttpServletRequest request, String sort, Long uid)
+			throws IOException, QueryNodeException, LuceneException {
 		Search search = new Search();
 		searches.put(uid, search);
 		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
@@ -659,72 +651,10 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("search_
 			@QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
 
 		Long uid = null;
-		try {
-			uid = bucketNum.getAndIncrement();
-			Search search = new Search();
-			searches.put(uid, search);
-			Map<String, DirectoryReader[]> readerMap = new HashMap<>();
-			search.readerMap = readerMap;
-			search.sort = parseSort(sort);
-			try (JsonReader r = Json.createReader(request.getInputStream())) {
-				JsonObject o = r.readObject();
-				JsonObject query = o.getJsonObject("query");
-				String userName = query.getString("user", null);
-
-				BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
-
-				if (userName != null) {
-
-					Query iuQuery = JoinUtil.createJoinQuery("investigation.id", false, "investigation.id",
-							new TermQuery(new Term("user.name", userName)), getSearcher(readerMap, "InvestigationUser"),
-							ScoreMode.None);
-
-					theQuery.add(iuQuery, Occur.MUST);
-				}
-
-				String text = query.getString("text", null);
-				if (text != null) {
-					theQuery.add(datasetParser.parse(text, null), Occur.MUST);
-				}
-
-				buildDateRanges(theQuery, query, "lower", "upper", "startDate", "endDate");
-
-				if (query.containsKey("parameters")) {
-					JsonArray parameters = query.getJsonArray("parameters");
-					IndexSearcher datasetParameterSearcher = getSearcher(readerMap, "DatasetParameter");
-					for (JsonValue p : parameters) {
-						BooleanQuery.Builder paramQuery = parseParameter(p);
-						Query toQuery = JoinUtil.createJoinQuery("dataset.id", false, "id", paramQuery.build(),
-								datasetParameterSearcher, ScoreMode.None);
-						theQuery.add(toQuery, Occur.MUST);
-					}
-				}
-				search.query = maybeEmptyQuery(theQuery);
-				if (o.containsKey("fields")) {
-					List<JsonString> jsonStrings = o.getJsonArray("fields").getValuesAs(JsonString.class);
-					jsonStrings.forEach((jsonString) -> search.fields.add(jsonString.getString()));
-				}
-			}
-			return luceneSearchResult("Dataset", search, searchAfter, maxResults, uid);
-		} catch (Exception e) {
-			logger.error("Error", e);
-			freeSearcher(uid);
-			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-		}
-
-	}
-
-	@POST
-	@Consumes(MediaType.APPLICATION_JSON)
-	@Produces(MediaType.APPLICATION_JSON)
-	@Path("datasets/facet")
-	public String datasetsFacet(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter, @QueryParam("maxResults") int maxResults,
-			@QueryParam("maxLabels") int maxLabels, @QueryParam("sort") String sort) throws LuceneException {
-		Long uid = null;
 		try {
 			uid = bucketNum.getAndIncrement();
 			Search search = datasetsQuery(request, sort, uid);
-			return luceneFacetResult("Dataset", search, searchAfter, maxResults, maxLabels, uid);
+			return luceneSearchResult("Dataset", search, searchAfter, maxResults, uid);
 		} catch (Exception e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
@@ -733,7 +663,8 @@ public String datasetsFacet(@Context HttpServletRequest request, @QueryParam("se
 
 	}
 
-	private Search datasetsQuery(HttpServletRequest request, String sort, Long uid) throws IOException, QueryNodeException, LuceneException {
+	private Search datasetsQuery(HttpServletRequest request, String sort, Long uid)
+			throws IOException, QueryNodeException, LuceneException {
 		Search search = new Search();
 		searches.put(uid, search);
 		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
@@ -876,6 +807,25 @@ private void exit() {
 		}
 	}
 
+	@POST
+	@Consumes(MediaType.APPLICATION_JSON)
+	@Produces(MediaType.APPLICATION_JSON)
+	@Path("{entityName}/facet")
+	public String facet(@PathParam("entityName") String entityName, @Context HttpServletRequest request,
+			@QueryParam("search_after") String searchAfter, @QueryParam("maxResults") int maxResults,
+			@QueryParam("maxLabels") int maxLabels, @QueryParam("sort") String sort) throws LuceneException {
+		Long uid = null;
+		try {
+			uid = bucketNum.getAndIncrement();
+			Search search = genericQuery(request, sort, uid);
+			return luceneFacetResult(entityName, search, searchAfter, maxResults, maxLabels, uid);
+		} catch (Exception e) {
+			logger.error("Error", e);
+			freeSearcher(uid);
+			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+		}
+	}
+
 	@DELETE
 	@Path("freeSearcher/{uid}")
 	public void freeSearcher(@PathParam("uid") Long uid) throws LuceneException {
@@ -895,6 +845,125 @@ public void freeSearcher(@PathParam("uid") Long uid) throws LuceneException {
 		}
 	}
 
+	/**
+	 * Parses a query and associated information from an incoming request without
+	 * any logic specific to a single index or entity. As such it may not be as
+	 * powerful, but is sufficient for simple queries (like those for faceting).
+	 * 
+	 * @param request Request containing the query and other Json encoded
+	 *                information such as fields and dimensions.
+	 * @param sort    String representing the sorting criteria for the search.
+	 * @param uid     Identifier for the search.
+	 * @return Search object with the query, sort, and optionally the fields and
+	 *         dimensions to search set.
+	 * @throws IOException     If Json cannot be parsed from the request
+	 * @throws LuceneException If the types of the JsonValues in the query do not
+	 *                         match those supported by icat.lucene
+	 */
+	private Search genericQuery(HttpServletRequest request, String sort, Long uid) throws IOException, LuceneException {
+		Search search = new Search();
+		searches.put(uid, search);
+		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
+		search.readerMap = readerMap;
+		search.sort = parseSort(sort);
+		try (JsonReader r = Json.createReader(request.getInputStream())) {
+			JsonObject o = r.readObject();
+			JsonObject jsonQuery = o.getJsonObject("query");
+			BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
+			for (Entry<String, JsonValue> entry : jsonQuery.entrySet()) {
+				String field = entry.getKey();
+				ValueType valueType = entry.getValue().getValueType();
+				switch (valueType) {
+					case STRING:
+						JsonString stringValue = (JsonString) entry.getValue();
+						luceneQuery.add(new TermQuery(new Term(field, stringValue.getString())), Occur.MUST);
+						break;
+					case NUMBER:
+						JsonNumber numberValue = (JsonNumber) entry.getValue();
+						if (longFields.contains(field)) {
+							luceneQuery.add(LongPoint.newExactQuery(field, numberValue.longValueExact()), Occur.FILTER);
+						} else if (doubleFields.contains(field)) {
+							luceneQuery.add(DoublePoint.newExactQuery(field, numberValue.doubleValue()), Occur.FILTER);
+						} else {
+							throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+									"Value had type NUMBER, but field " + field
+											+ " is not a known longField or doubleField");
+						}
+						break;
+					case ARRAY:
+						// Only support array of String as list of ICAT ids is currently only use case
+						JsonArray arrayValue = (JsonArray) entry.getValue();
+						ArrayList<BytesRef> bytesArray = new ArrayList<>();
+						for (JsonString value : arrayValue.getValuesAs(JsonString.class)) {
+							bytesArray.add(new BytesRef(value.getChars()));
+						}
+						luceneQuery.add(new TermInSetQuery(field, bytesArray), Occur.MUST);
+						break;
+					default:
+						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+								"Query values should be ARRAY, STRING or NUMBER, but had value of type " + valueType);
+				}
+			}
+			search.query = maybeEmptyQuery(luceneQuery);
+			logger.info("Query: {}", search.query);
+			if (o.containsKey("fields")) {
+				List<JsonString> jsonStrings = o.getJsonArray("fields").getValuesAs(JsonString.class);
+				jsonStrings.forEach((jsonString) -> search.fields.add(jsonString.getString()));
+				logger.info("Fields: {}", search.fields);
+			}
+			if (o.containsKey("dimensions")) {
+				List<JsonObject> dimensionObjects = o.getJsonArray("dimensions").getValuesAs(JsonObject.class);
+				for (JsonObject dimensionObject : dimensionObjects) {
+					if (!dimensionObject.containsKey("dimension")) {
+						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+								"'dimension' not specified for facet request " + dimensionObject.toString());
+					}
+					String dimension = dimensionObject.getString("dimension");
+					FacetDimensionRequest facetDimensionRequest = new FacetDimensionRequest(dimension);
+					if (dimensionObject.containsKey("ranges")) {
+						List<Range> ranges = facetDimensionRequest.getRanges();
+						if (longFields.contains(dimension)) {
+							for (JsonObject range : dimensionObject.getJsonArray("ranges")
+									.getValuesAs(JsonObject.class)) {
+								Long lower = Long.MIN_VALUE;
+								Long upper = Long.MAX_VALUE;
+								if (range.containsKey("lower")) {
+									lower = range.getJsonNumber("lower").longValueExact();
+								}
+								if (range.containsKey("upper")) {
+									upper = range.getJsonNumber("upper").longValueExact();
+								}
+								String label = lower.toString() + "_" + upper.toString();
+								ranges.add(new LongRange(label, lower, true, upper, true));
+							}
+						} else if (doubleFields.contains(dimension)) {
+							for (JsonObject range : dimensionObject.getJsonArray("ranges")
+									.getValuesAs(JsonObject.class)) {
+								Double lower = Double.MIN_VALUE;
+								Double upper = Double.MAX_VALUE;
+								if (range.containsKey("lower")) {
+									lower = range.getJsonNumber("lower").doubleValue();
+								}
+								if (range.containsKey("upper")) {
+									upper = range.getJsonNumber("upper").doubleValue();
+								}
+								String label = lower.toString() + "_" + upper.toString();
+								ranges.add(new DoubleRange(label, lower, true, upper, true));
+							}
+						} else {
+							throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+									"'ranges' specified for dimension " + dimension
+											+ " but this is not a supported numeric field");
+						}
+					}
+					search.dimensions.add(facetDimensionRequest);
+				}
+				logger.info("Dimensions: {}", search.dimensions.size());
+			}
+		}
+		return search;
+	}
+
 	private MultiReader getMultiReader(Map<String, DirectoryReader[]> readerMap, String name) throws IOException {
 		DirectoryReader[] subReaders = readerMap.get(name);
 		if (subReaders == null) {
@@ -964,25 +1033,8 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 		}
 	}
 
-	@POST
-	@Consumes(MediaType.APPLICATION_JSON)
-	@Produces(MediaType.APPLICATION_JSON)
-	@Path("investigations/facet")
-	public String investigationsFacet(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter, @QueryParam("maxResults") int maxResults,
-			@QueryParam("maxLabels") int maxLabels, @QueryParam("sort") String sort) throws LuceneException {
-		Long uid = null;
-		try {
-			uid = bucketNum.getAndIncrement();
-			Search search = investigationsQuery(request, sort, uid);
-			return luceneFacetResult("Investigation", search, searchAfter, maxResults, maxLabels, uid);
-		} catch (Exception e) {
-			logger.error("Error", e);
-			freeSearcher(uid);
-			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-		}
-	}
-
-	private Search investigationsQuery(HttpServletRequest request, String sort, Long uid) throws IOException, QueryNodeException, LuceneException {
+	private Search investigationsQuery(HttpServletRequest request, String sort, Long uid)
+			throws IOException, QueryNodeException, LuceneException {
 		Search search = new Search();
 		searches.put(uid, search);
 		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
@@ -1073,54 +1125,78 @@ public void lock(@PathParam("entityName") String entityName) throws LuceneExcept
 		}
 	}
 
-	private String luceneFacetResult(String name, Search search, String searchAfter, int maxResults, int maxLabels, Long uid)
-			throws IOException, IllegalStateException {
-		List<FacetResult> results;
+	private String luceneFacetResult(String name, Search search, String searchAfter, int maxResults, int maxLabels,
+			Long uid) throws IOException, IllegalStateException, LuceneException {
+		List<FacetResult> results = new ArrayList<>();
+		List<FacetResult> rangeResults = new ArrayList<>();
 		if (maxResults <= 0 || maxLabels <= 0) {
 			// This will result in no Facets and a null pointer, so return early
-			logger.warn("No facets possible for maxResults={}, maxLabels={}, returning empty list", maxResults, maxLabels);
-			results = new ArrayList<>();
+			logger.warn("Cannot facet when maxResults={}, maxLabels={}, returning empty list", maxResults, maxLabels);
 		} else {
 			MultiReader directoryReader = getMultiReader(search.readerMap, name);
 			IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
+			FacetsCollector facetsCollector = new FacetsCollector();
+			FacetsCollector.search(indexSearcher, search.query, maxResults, facetsCollector);
 			logger.debug("To facet in {} for {} {} with {} from {} ", name, search.query, maxResults, indexSearcher,
 					searchAfter);
+			for (FacetDimensionRequest facetDimensionRequest : search.dimensions) {
+				if (facetDimensionRequest.getRanges().size() > 0) {
+					String dimension = facetDimensionRequest.getDimension();
+					if (longFields.contains(dimension)) {
+						LongRange[] ranges = facetDimensionRequest.getRanges().toArray(new LongRange[0]);
+						Facets facets = new LongRangeFacetCounts(dimension, facetsCollector, ranges);
+						rangeResults.addAll(facets.getAllDims(maxLabels));
+					} else if (doubleFields.contains(dimension)) {
+						DoubleRange[] ranges = facetDimensionRequest.getRanges().toArray(new DoubleRange[0]);
+						Facets facets = new DoubleRangeFacetCounts(dimension, facetsCollector, ranges);
+						rangeResults.addAll(facets.getAllDims(maxLabels));
+					} else {
+						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+								"'ranges' specified for dimension " + dimension
+										+ " but this is not a supported numeric field");
+					}
+				}
+			}
 			try {
 				DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(directoryReader);
-				FacetsCollector facetsCollector = new FacetsCollector();
-				FacetsCollector.search(indexSearcher, search.query, maxResults, facetsCollector);
 				Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
 				logger.debug("facets: {}, maxLabels: {}, maxResults: {}", facets, maxLabels, maxResults);
 				results = facets.getAllDims(maxLabels);
 			} catch (IllegalArgumentException e) {
 				// This can occur if no fields in the index have been faceted
 				logger.error("No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
-				results = new ArrayList<>();
 			} catch (IllegalStateException e) {
-				// This can occur if we do not create the IndexSearcher from the same DirectoryReader as we used to
-				// create the state
+				// This can occur if we do not create the IndexSearcher from the same
+				// DirectoryReader as we used to create the state
 				logger.error("IndexSearcher used is not based on the DirectoryReader used for facet counting: "
-					+ e.getClass() + " " + e.getMessage());
+						+ e.getClass() + " " + e.getMessage());
 				throw e;
 			}
 			logger.debug("Facets found for " + results.size() + " dimensions");
 		}
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
-			gen.writeStartObject();
-			gen.writeStartObject("dimensions"); // object containing all facet dimensions
-			for (FacetResult result : results) {
+			gen.writeStartObject().writeStartObject("dimensions"); // object containing all facet dimensions
+			Set<String> dimensionSet = new HashSet<>();
+			search.dimensions.forEach(d -> dimensionSet.add(d.getDimension()));
+			writeFacetResults(dimensionSet, results, gen);
+			writeFacetResults(new HashSet<>(), rangeResults, gen);
+			gen.writeEnd().writeEnd(); // object containing dimensions
+		}
+		logger.debug("Json returned {}", baos.toString());
+		return baos.toString();
+	}
+
+	private void writeFacetResults(Set<String> dimensionSet, List<FacetResult> results, JsonGenerator gen) {
+		for (FacetResult result : results) {
+			if (dimensionSet.size() == 0 || dimensionSet.contains(result.dim)) {
 				gen.writeStartObject(result.dim); // object containing labelValues for a given dimension
 				for (LabelAndValue labelValue : result.labelValues) {
 					gen.write(labelValue.label, labelValue.value.longValue());
 				}
 				gen.writeEnd(); // object containing labelValues
 			}
-			gen.writeEnd(); // object containing dimensions
-			gen.writeEnd();
 		}
-		logger.debug("Json returned {}", baos.toString());
-		return baos.toString();
 	}
 
 	private String luceneSearchResult(String name, Search search, String searchAfter, int maxResults, Long uid)
@@ -1271,6 +1347,11 @@ private void addField(JsonObject json, Document document, String key) {
 		// searching/storing, so deal with that first
 		addSortField(json, document, key);
 
+		// Likewise, faceted fields should be considered separately
+		if (facetFields.contains(key)) {
+			document.add(new SortedSetDocValuesFacetField(key, json.getString(key)));
+		}
+
 		if (doubleFields.contains(key)) {
 			Double value = json.getJsonNumber(key).doubleValue();
 			document.add(new DoublePoint(key, value));
@@ -1289,10 +1370,10 @@ private void addField(JsonObject json, Document document, String key) {
 	private void addSortField(JsonObject json, Document document, String key) {
 		if (sortFields.contains(key)) {
 			if (longFields.contains(key)) {
-				document.add(new SortedNumericDocValuesField(key, json.getJsonNumber(key).longValueExact()));
+				document.add(new NumericDocValuesField(key, json.getJsonNumber(key).longValueExact()));
 			} else if (doubleFields.contains(key)) {
 				long sortableLong = NumericUtils.doubleToSortableLong(json.getJsonNumber(key).doubleValue());
-				document.add(new SortedNumericDocValuesField(key, sortableLong));
+				document.add(new NumericDocValuesField(key, sortableLong));
 			} else {
 				document.add(new SortedDocValuesField(key, new BytesRef(json.getString(key))));
 			}
@@ -1303,10 +1384,10 @@ private void addSortField(IndexableField field, Document document) {
 		String key = field.name();
 		if (sortFields.contains(key)) {
 			if (longFields.contains(key)) {
-				document.add(new SortedNumericDocValuesField(key, field.numericValue().longValue()));
+				document.add(new NumericDocValuesField(key, field.numericValue().longValue()));
 			} else if (doubleFields.contains(key)) {
 				long sortableLong = NumericUtils.doubleToSortableLong(field.numericValue().doubleValue());
-				document.add(new SortedNumericDocValuesField(key, sortableLong));
+				document.add(new NumericDocValuesField(key, sortableLong));
 			} else {
 				document.add(new SortedDocValuesField(key, new BytesRef(field.stringValue())));
 			}
@@ -1504,7 +1585,7 @@ private void update(JsonObject operationBody) throws LuceneException, NumberForm
 						"Lucene locked for " + entityName);
 			}
 			logger.trace("update: {}", document);
-			bucket.getWriter(new Long(icatId)).updateDocument(new Term("id", icatId), document);
+			bucket.getWriter(new Long(icatId)).updateDocument(new Term("id", icatId), facetsConfig.build(document));
 		}
 	}
 
@@ -1531,7 +1612,8 @@ private void updateByRelation(JsonObject operationBody, Boolean delete)
 					Document newDocument = delete ? pruneDocument(parentRelationship.fieldPrefix, oldDocument)
 							: updateDocument(operationBody.getJsonObject("doc"), oldDocument);
 					logger.trace("updateByRelation: {}", newDocument);
-					bucket.getWriter(new Long(parentId)).updateDocument(new Term("id", parentId), newDocument);
+					bucket.getWriter(new Long(parentId)).updateDocument(new Term("id", parentId),
+							facetsConfig.build(newDocument));
 				}
 				scoreDocs = searcher.searchAfter(scoreDocs[scoreDocs.length - 1], query, blockSize, sort).scoreDocs;
 			}

From 8907a7c351d3825a90d40ac20ed555ab98cdfaa0 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 14 Apr 2022 05:02:32 +0100
Subject: [PATCH 35/73] Basic unit conversion #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 44 ++++++++++++++++++-
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 6e118f6..ebf9dba 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -37,6 +37,10 @@
 import javax.json.JsonValue;
 import javax.json.JsonValue.ValueType;
 import javax.json.stream.JsonGenerator;
+import javax.measure.IncommensurableException;
+import javax.measure.Unit;
+import javax.measure.UnitConverter;
+import javax.measure.format.MeasurementParseException;
 import javax.servlet.http.HttpServletRequest;
 import javax.ws.rs.Consumes;
 import javax.ws.rs.DELETE;
@@ -110,6 +114,9 @@
 import org.slf4j.Marker;
 import org.slf4j.MarkerFactory;
 
+import tech.units.indriya.format.SimpleUnitFormat;
+import tech.units.indriya.unit.Units;
+
 @Path("/")
 @Singleton
 public class Lucene {
@@ -313,6 +320,7 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 	private static final Logger logger = LoggerFactory.getLogger(Lucene.class);
 	private static final Marker fatal = MarkerFactory.getMarker("FATAL");
 	private static final SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
+	private static final SimpleUnitFormat unitFormat = SimpleUnitFormat.getInstance();
 
 	private static final Set<String> doubleFields = new HashSet<>();
 	private static final Set<String> facetFields = new HashSet<>();
@@ -333,11 +341,13 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 		TimeZone tz = TimeZone.getTimeZone("GMT");
 		df.setTimeZone(tz);
 
-		doubleFields.add("numericValue");
+		unitFormat.alias(Units.CELSIUS, "celsius"); // TODO this should be generalised with the units we need
+
+		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI"));
 		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name"));
 		longFields.addAll(Arrays.asList("date", "startDate", "endDate", "dateTimeValue"));
 		sortFields.addAll(Arrays.asList("datafile.id", "dataset.id", "investigation.id", "id", "date", "startDate",
-				"endDate", "name"));
+				"endDate", "name", "stringValue", "dateTimeValue", "numericValue", "numericValueSI"));
 		textFields.addAll(Arrays.asList("name", "visitId", "description", "datafileFormat.name", "sample.name",
 				"sample.type.name", "title", "summary", "facility.name", "user.fullName"));
 
@@ -1365,6 +1375,36 @@ private void addField(JsonObject json, Document document, String key) {
 		} else {
 			document.add(new StringField(key, json.getString(key), Store.YES));
 		}
+
+		// Whenever the units are set or changed, convert to SI
+		if (key.equals("type.units")) {
+			String unitString = json.getString("type.units");
+			IndexableField field = document.getField("numericValue");
+			double value;
+			if (field != null) {
+				value = NumericUtils.sortableLongToDouble(field.numericValue().longValue());
+			} else if (json.containsKey("numericValue")) {
+				value = json.getJsonNumber(key).doubleValue();
+			} else {
+				// Strings and date/time values also have units, so if we aren't dealing with a
+				// number don't convert
+				return;
+			}
+			try {
+				logger.trace("Attempting to convert {} {}", value, unitString);
+				Unit<?> unit = unitFormat.parse(unitString);
+				Unit<?> systemUnit = unit.getSystemUnit();
+				UnitConverter converter = unit.getConverterToAny(systemUnit);
+				Double systemValue = converter.convert(value);
+				document.add(new DoublePoint("numericValueSI", systemValue));
+				document.add(new StoredField("numericValueSI", systemValue));
+				long sortableLong = NumericUtils.doubleToSortableLong(systemValue);
+				document.add(new NumericDocValuesField("numericValueSI", sortableLong));
+				document.add(new StringField("type.unitsSI", systemUnit.getName(), Store.YES));
+			} catch (IncommensurableException | MeasurementParseException e) {
+				logger.error("Unable to convert 'type.units' of {} due to {}", unitString, e.getMessage());
+			}
+		}
 	}
 
 	private void addSortField(JsonObject json, Document document, String key) {

From 8438e1fbc72f21d7a8048ccd4554b04e58826df1 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 14 Apr 2022 05:19:10 +0100
Subject: [PATCH 36/73] Add unit conversion dependencies #19

---
 pom.xml | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/pom.xml b/pom.xml
index d6ec96c..ab0d094 100755
--- a/pom.xml
+++ b/pom.xml
@@ -98,6 +98,18 @@
 			<version>7.0</version>
 		</dependency>
 
+		<dependency>
+			<groupId>javax.measure</groupId>
+			<artifactId>unit-api</artifactId>
+			<version>2.1.3</version>
+		</dependency>
+
+		<dependency>
+			<groupId>tech.units</groupId>
+			<artifactId>indriya</artifactId>
+			<version>2.1.3</version>
+		  </dependency>
+
 		<dependency>
 			<groupId>org.icatproject</groupId>
 			<artifactId>icat.utils</artifactId>

From 45a39485abf063bed2d423f67db3d11f0ca630d6 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Sat, 30 Apr 2022 02:03:33 +0100
Subject: [PATCH 37/73] Refactor unit conversion to utils #19

---
 pom.xml                                       |  14 +-
 .../java/org/icatproject/lucene/Lucene.java   | 251 +++++++++---------
 src/main/resources/run.properties             |   1 +
 3 files changed, 129 insertions(+), 137 deletions(-)

diff --git a/pom.xml b/pom.xml
index ab0d094..ae6d0c3 100755
--- a/pom.xml
+++ b/pom.xml
@@ -98,22 +98,10 @@
 			<version>7.0</version>
 		</dependency>
 
-		<dependency>
-			<groupId>javax.measure</groupId>
-			<artifactId>unit-api</artifactId>
-			<version>2.1.3</version>
-		</dependency>
-
-		<dependency>
-			<groupId>tech.units</groupId>
-			<artifactId>indriya</artifactId>
-			<version>2.1.3</version>
-		  </dependency>
-
 		<dependency>
 			<groupId>org.icatproject</groupId>
 			<artifactId>icat.utils</artifactId>
-			<version>4.16.1</version>
+			<version>4.16.2-SNAPSHOT</version>
 		</dependency>
 
 		<dependency>
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index ebf9dba..1edc024 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -32,18 +32,14 @@
 import javax.json.JsonArray;
 import javax.json.JsonNumber;
 import javax.json.JsonObject;
+import javax.json.JsonObjectBuilder;
 import javax.json.JsonReader;
 import javax.json.JsonString;
 import javax.json.JsonValue;
 import javax.json.JsonValue.ValueType;
 import javax.json.stream.JsonGenerator;
-import javax.measure.IncommensurableException;
-import javax.measure.Unit;
-import javax.measure.UnitConverter;
-import javax.measure.format.MeasurementParseException;
 import javax.servlet.http.HttpServletRequest;
 import javax.ws.rs.Consumes;
-import javax.ws.rs.DELETE;
 import javax.ws.rs.GET;
 import javax.ws.rs.POST;
 import javax.ws.rs.Path;
@@ -109,14 +105,13 @@
 import org.apache.lucene.util.NumericUtils;
 import org.icatproject.lucene.exceptions.LuceneException;
 import org.icatproject.utils.CheckedProperties;
+import org.icatproject.utils.IcatUnits;
+import org.icatproject.utils.IcatUnits.SystemValue;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.Marker;
 import org.slf4j.MarkerFactory;
 
-import tech.units.indriya.format.SimpleUnitFormat;
-import tech.units.indriya.unit.Units;
-
 @Path("/")
 @Singleton
 public class Lucene {
@@ -302,6 +297,7 @@ public class Search {
 		public Map<String, DirectoryReader[]> readerMap;
 		public Query query;
 		public Sort sort;
+		public boolean scored;
 		public Set<String> fields = new HashSet<String>();
 		public Set<FacetDimensionRequest> dimensions = new HashSet<FacetDimensionRequest>();
 	}
@@ -320,7 +316,6 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 	private static final Logger logger = LoggerFactory.getLogger(Lucene.class);
 	private static final Marker fatal = MarkerFactory.getMarker("FATAL");
 	private static final SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
-	private static final SimpleUnitFormat unitFormat = SimpleUnitFormat.getInstance();
 
 	private static final Set<String> doubleFields = new HashSet<>();
 	private static final Set<String> facetFields = new HashSet<>();
@@ -341,8 +336,6 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 		TimeZone tz = TimeZone.getTimeZone("GMT");
 		df.setTimeZone(tz);
 
-		unitFormat.alias(Units.CELSIUS, "celsius"); // TODO this should be generalised with the units we need
-
 		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI"));
 		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name"));
 		longFields.addAll(Arrays.asList("date", "startDate", "endDate", "dateTimeValue"));
@@ -388,7 +381,7 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 		investigationParser.setAnalyzer(analyzer);
 		investigationParser.setMultiFields(investigationFields);
 
-		CharSequence[] sampleFields = { "sample.name", "sample.type.name" };
+		CharSequence[] sampleFields = { "name", "type.name" };
 		sampleParser.setAllowLeadingWildcard(true);
 		sampleParser.setAnalyzer(analyzer);
 		sampleParser.setMultiFields(sampleFields);
@@ -407,6 +400,7 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 	private Timer timer;
 
 	private Map<Long, Search> searches = new ConcurrentHashMap<>();
+	private IcatUnits icatUnits;
 
 	/**
 	 * return the version of the lucene server
@@ -590,7 +584,7 @@ private void createNow(String entityName, JsonObject documentJson)
 	@POST
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
-	@Path("datafiles")
+	@Path("datafile")
 	public String datafiles(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
 			@QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
 		Long uid = null;
@@ -611,6 +605,7 @@ private Search datafilesQuery(HttpServletRequest request, String sort, Long uid)
 		searches.put(uid, search);
 		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
 		search.readerMap = readerMap;
+		search.scored = (sort == null || sort.equals(""));
 		search.sort = parseSort(sort);
 
 		try (JsonReader r = Json.createReader(request.getInputStream())) {
@@ -656,7 +651,7 @@ private Search datafilesQuery(HttpServletRequest request, String sort, Long uid)
 	@POST
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
-	@Path("datasets")
+	@Path("dataset")
 	public String datasets(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
 			@QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
 
@@ -679,6 +674,7 @@ private Search datasetsQuery(HttpServletRequest request, String sort, Long uid)
 		searches.put(uid, search);
 		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
 		search.readerMap = readerMap;
+		search.scored = (sort == null || sort.equals(""));
 		search.sort = parseSort(sort);
 		try (JsonReader r = Json.createReader(request.getInputStream())) {
 			JsonObject o = r.readObject();
@@ -836,11 +832,8 @@ public String facet(@PathParam("entityName") String entityName, @Context HttpSer
 		}
 	}
 
-	@DELETE
-	@Path("freeSearcher/{uid}")
-	public void freeSearcher(@PathParam("uid") Long uid) throws LuceneException {
+	public void freeSearcher(Long uid) throws LuceneException {
 		if (uid != null) { // May not be set for internal calls
-			logger.debug("Requesting freeSearcher {}", uid);
 			Map<String, DirectoryReader[]> search = searches.get(uid).readerMap;
 			for (Entry<String, DirectoryReader[]> entry : search.entrySet()) {
 				String name = entry.getKey();
@@ -875,6 +868,7 @@ private Search genericQuery(HttpServletRequest request, String sort, Long uid) t
 		searches.put(uid, search);
 		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
 		search.readerMap = readerMap;
+		search.scored = (sort == null || sort.equals(""));
 		search.sort = parseSort(sort);
 		try (JsonReader r = Json.createReader(request.getInputStream())) {
 			JsonObject o = r.readObject();
@@ -937,28 +931,34 @@ private Search genericQuery(HttpServletRequest request, String sort, Long uid) t
 									.getValuesAs(JsonObject.class)) {
 								Long lower = Long.MIN_VALUE;
 								Long upper = Long.MAX_VALUE;
-								if (range.containsKey("lower")) {
-									lower = range.getJsonNumber("lower").longValueExact();
+								if (range.containsKey("from")) {
+									lower = range.getJsonNumber("from").longValueExact();
+								}
+								if (range.containsKey("to")) {
+									upper = range.getJsonNumber("to").longValueExact();
 								}
-								if (range.containsKey("upper")) {
-									upper = range.getJsonNumber("upper").longValueExact();
+								String label = lower.toString() + "-" + upper.toString();
+								if (range.containsKey("key")) {
+									label = range.getString("key");
 								}
-								String label = lower.toString() + "_" + upper.toString();
-								ranges.add(new LongRange(label, lower, true, upper, true));
+								ranges.add(new LongRange(label, lower, true, upper, false));
 							}
 						} else if (doubleFields.contains(dimension)) {
 							for (JsonObject range : dimensionObject.getJsonArray("ranges")
 									.getValuesAs(JsonObject.class)) {
 								Double lower = Double.MIN_VALUE;
 								Double upper = Double.MAX_VALUE;
-								if (range.containsKey("lower")) {
-									lower = range.getJsonNumber("lower").doubleValue();
+								String label = lower.toString() + "-" + upper.toString();
+								if (range.containsKey("from")) {
+									lower = range.getJsonNumber("from").doubleValue();
 								}
-								if (range.containsKey("upper")) {
-									upper = range.getJsonNumber("upper").doubleValue();
+								if (range.containsKey("to")) {
+									upper = range.getJsonNumber("to").doubleValue();
 								}
-								String label = lower.toString() + "_" + upper.toString();
-								ranges.add(new DoubleRange(label, lower, true, upper, true));
+								if (range.containsKey("key")) {
+									label = range.getString("key");
+								}
+								ranges.add(new DoubleRange(label, lower, true, upper, false));
 							}
 						} else {
 							throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
@@ -1006,6 +1006,8 @@ private void init() {
 			timer = new Timer("LuceneCommitTimer");
 			timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
 
+			icatUnits = new IcatUnits(props.getString("units", ""));
+
 		} catch (Exception e) {
 			logger.error(fatal, e.getMessage());
 			throw new IllegalStateException(e.getMessage());
@@ -1028,7 +1030,7 @@ public void run() {
 	@POST
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
-	@Path("investigations")
+	@Path("investigation")
 	public String investigations(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
 			@QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
 		Long uid = null;
@@ -1049,6 +1051,7 @@ private Search investigationsQuery(HttpServletRequest request, String sort, Long
 		searches.put(uid, search);
 		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
 		search.readerMap = readerMap;
+		search.scored = (sort == null || sort.equals(""));
 		search.sort = parseSort(sort);
 		try (JsonReader r = Json.createReader(request.getInputStream())) {
 			JsonObject o = r.readObject();
@@ -1184,60 +1187,43 @@ private String luceneFacetResult(String name, Search search, String searchAfter,
 			}
 			logger.debug("Facets found for " + results.size() + " dimensions");
 		}
-		ByteArrayOutputStream baos = new ByteArrayOutputStream();
-		try (JsonGenerator gen = Json.createGenerator(baos)) {
-			gen.writeStartObject().writeStartObject("dimensions"); // object containing all facet dimensions
-			Set<String> dimensionSet = new HashSet<>();
-			search.dimensions.forEach(d -> dimensionSet.add(d.getDimension()));
-			writeFacetResults(dimensionSet, results, gen);
-			writeFacetResults(new HashSet<>(), rangeResults, gen);
-			gen.writeEnd().writeEnd(); // object containing dimensions
-		}
-		logger.debug("Json returned {}", baos.toString());
-		return baos.toString();
-	}
-
-	private void writeFacetResults(Set<String> dimensionSet, List<FacetResult> results, JsonGenerator gen) {
+		Set<String> dimensionSet = new HashSet<>();
+		search.dimensions.forEach(d -> dimensionSet.add(d.getDimension()));
+		JsonObjectBuilder aggregationsBuilder = Json.createObjectBuilder();
 		for (FacetResult result : results) {
 			if (dimensionSet.size() == 0 || dimensionSet.contains(result.dim)) {
-				gen.writeStartObject(result.dim); // object containing labelValues for a given dimension
-				for (LabelAndValue labelValue : result.labelValues) {
-					gen.write(labelValue.label, labelValue.value.longValue());
-				}
-				gen.writeEnd(); // object containing labelValues
+				buildBuckets(aggregationsBuilder, result);
 			}
 		}
+		for (FacetResult result : rangeResults) {
+			buildBuckets(aggregationsBuilder, result);
+		}
+		return Json.createObjectBuilder().add("aggregations", aggregationsBuilder).build().toString();
+	}
+
+	private void buildBuckets(JsonObjectBuilder aggregationsBuilder, FacetResult result) {
+		JsonObjectBuilder bucketsBuilder = Json.createObjectBuilder();
+		for (LabelAndValue labelValue : result.labelValues) {
+			JsonObjectBuilder bucketBuilder = Json.createObjectBuilder();
+			bucketsBuilder.add(labelValue.label, bucketBuilder.add("doc_count", labelValue.value.longValue()));
+		}
+		aggregationsBuilder.add(result.dim, Json.createObjectBuilder().add("buckets", bucketsBuilder));
 	}
 
 	private String luceneSearchResult(String name, Search search, String searchAfter, int maxResults, Long uid)
 			throws IOException, LuceneException {
 		IndexSearcher isearcher = getSearcher(search.readerMap, name);
-		logger.debug("To search in {} for {} {} with {} from {} ", name, search.query, maxResults, isearcher,
-				searchAfter);
+		String format = "Search {} with: query {}, maxResults, searchAfter {}, scored {} ";
+		logger.debug(format, name, search.query, maxResults, searchAfter, search.scored);
 		FieldDoc searchAfterDoc = parseSearchAfter(searchAfter);
-		ScoreDoc[] hits;
-		TotalHits totalHits;
-		SortField[] fields = null;
-		if (search.sort == null) {
-			// Use default score sorting
-			TopDocs topDocs;
-			topDocs = searchAfterDoc == null ? isearcher.search(search.query, maxResults)
-					: isearcher.searchAfter(searchAfterDoc, search.query, maxResults);
-			hits = topDocs.scoreDocs;
-			totalHits = topDocs.totalHits;
-		} else {
-			// Use specified sorting
-			TopFieldDocs topFieldDocs;
-			topFieldDocs = searchAfterDoc == null ? isearcher.search(search.query, maxResults, search.sort)
-					: isearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort, false);
-			hits = topFieldDocs.scoreDocs;
-			totalHits = topFieldDocs.totalHits;
-			fields = topFieldDocs.fields;
-		}
-		Float maxScore;
-		if (hits.length == 0) {
-			maxScore = Float.NaN;
-		} else {
+		TopFieldDocs topFieldDocs = searchAfterDoc == null
+				? isearcher.search(search.query, maxResults, search.sort, search.scored)
+				: isearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort, search.scored);
+		ScoreDoc[] hits = topFieldDocs.scoreDocs;
+		TotalHits totalHits = topFieldDocs.totalHits;
+		SortField[] fields = topFieldDocs.fields;
+		Float maxScore = Float.NaN;
+		if (hits.length > 0) {
 			maxScore = hits[0].score;
 		}
 		logger.debug("Hits " + totalHits + " maxscore " + maxScore);
@@ -1259,10 +1245,15 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 					Document lastDocument = isearcher.doc(lastDoc.doc);
 					gen.writeStartArray("fields");
 					for (SortField sortField : fields) {
-						IndexableField indexableField = lastDocument.getField(sortField.getField());
+						String fieldName = sortField.getField();
+						if (fieldName == null) {
+							// SCORE sorting will have a null fieldName
+							gen.write(lastDoc.score);
+							continue;
+						}
+						IndexableField indexableField = lastDocument.getField(fieldName);
 						if (indexableField == null) {
-							throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field "
-									+ sortField.getField()
+							throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field " + fieldName
 									+ " used for sorting was not present on the Lucene Document; all sortable fields must also be stored.");
 						}
 						Type type = (sortField instanceof SortedNumericSortField)
@@ -1390,25 +1381,29 @@ private void addField(JsonObject json, Document document, String key) {
 				// number don't convert
 				return;
 			}
-			try {
-				logger.trace("Attempting to convert {} {}", value, unitString);
-				Unit<?> unit = unitFormat.parse(unitString);
-				Unit<?> systemUnit = unit.getSystemUnit();
-				UnitConverter converter = unit.getConverterToAny(systemUnit);
-				Double systemValue = converter.convert(value);
-				document.add(new DoublePoint("numericValueSI", systemValue));
-				document.add(new StoredField("numericValueSI", systemValue));
-				long sortableLong = NumericUtils.doubleToSortableLong(systemValue);
+			logger.trace("Attempting to convert {} {}", value, unitString);
+			SystemValue systemValue = icatUnits.new SystemValue(value, unitString);
+			if (systemValue.units != null) {
+				document.add(new StringField("type.unitsSI", systemValue.units, Store.YES));
+			}
+			if (systemValue.value != null) {
+				document.add(new DoublePoint("numericValueSI", systemValue.value));
+				document.add(new StoredField("numericValueSI", systemValue.value));
+				long sortableLong = NumericUtils.doubleToSortableLong(systemValue.value);
 				document.add(new NumericDocValuesField("numericValueSI", sortableLong));
-				document.add(new StringField("type.unitsSI", systemUnit.getName(), Store.YES));
-			} catch (IncommensurableException | MeasurementParseException e) {
-				logger.error("Unable to convert 'type.units' of {} due to {}", unitString, e.getMessage());
 			}
 		}
 	}
 
 	private void addSortField(JsonObject json, Document document, String key) {
 		if (sortFields.contains(key)) {
+			if (key.equals("id")) {
+				// Id is a special case, as we need to to be SORTED as a byte ref to allow joins
+				// but also SORTED_NUMERIC to ensure a deterministic order to results
+				Long value = new Long(json.getString(key));
+				document.add(new NumericDocValuesField("id.long", value));
+				document.add(new StoredField("id.long", value));
+			}
 			if (longFields.contains(key)) {
 				document.add(new NumericDocValuesField(key, json.getJsonNumber(key).longValueExact()));
 			} else if (doubleFields.contains(key)) {
@@ -1423,6 +1418,13 @@ private void addSortField(JsonObject json, Document document, String key) {
 	private void addSortField(IndexableField field, Document document) {
 		String key = field.name();
 		if (sortFields.contains(key)) {
+			if (key.equals("id")) {
+				// Id is a special case, as we need to to be SORTED as a byte ref to allow joins
+				// but also SORTED_NUMERIC to ensure a deterministic order to results
+				Long value = new Long(field.stringValue());
+				document.add(new NumericDocValuesField("id.long", value));
+				document.add(new StoredField("id.long", value));
+			}
 			if (longFields.contains(key)) {
 				document.add(new NumericDocValuesField(key, field.numericValue().longValue()));
 			} else if (doubleFields.contains(key)) {
@@ -1515,7 +1517,7 @@ private Builder parseParameter(JsonValue p) throws LuceneException {
 	 */
 	private Sort parseSort(String sort) throws LuceneException {
 		if (sort == null || sort.equals("")) {
-			return null;
+			return new Sort(SortField.FIELD_SCORE, new SortedNumericSortField("id.long", Type.LONG));
 		}
 		try (JsonReader reader = Json.createReader(new ByteArrayInputStream(sort.getBytes()))) {
 			JsonObject object = reader.readObject();
@@ -1540,6 +1542,7 @@ private Sort parseSort(String sort) throws LuceneException {
 					fields.add(new SortField(key, Type.STRING, reverse));
 				}
 			}
+			fields.add(new SortedNumericSortField("id.long", Type.LONG));
 			return new Sort(fields.toArray(new SortField[0]));
 		}
 	}
@@ -1557,42 +1560,42 @@ private Sort parseSort(String sort) throws LuceneException {
 	 *                         NUMBER
 	 */
 	private FieldDoc parseSearchAfter(String searchAfter) throws LuceneException {
-		if (searchAfter != null && !searchAfter.equals("")) {
-			logger.debug("Attempting to parseSearchAfter from {}", searchAfter);
-			JsonReader reader = Json.createReader(new StringReader(searchAfter));
-			JsonObject object = reader.readObject();
-			int doc = object.getInt("doc");
-			int shardIndex = object.getInt("shardIndex");
-			float score = Float.NaN;
-			List<Object> fields = new ArrayList<>();
-			if (object.containsKey("score")) {
-				score = object.getJsonNumber("score").bigDecimalValue().floatValue();
-			}
-			if (object.containsKey("fields")) {
-				JsonArray jsonArray = object.getJsonArray("fields");
-				for (JsonValue value : jsonArray) {
-					switch (value.getValueType()) {
-						case NUMBER:
-							JsonNumber number = ((JsonNumber) value);
-							if (number.toString().contains(".")) {
-								fields.add(number.doubleValue());
-							} else {
-								fields.add(number.longValueExact());
-							}
-							break;
-						case STRING:
-							fields.add(new BytesRef(((JsonString) value).getString()));
-							break;
-						default:
-							throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-									"fields should be an array of STRING and NUMBER, but had entry of type "
-											+ value.getValueType());
-					}
+		if (searchAfter == null || searchAfter.equals("")) {
+			return null;
+		}
+		logger.debug("Attempting to parseSearchAfter from {}", searchAfter);
+		JsonReader reader = Json.createReader(new StringReader(searchAfter));
+		JsonObject object = reader.readObject();
+		int doc = object.getInt("doc");
+		int shardIndex = object.getInt("shardIndex");
+		float score = Float.NaN;
+		List<Object> fields = new ArrayList<>();
+		if (object.containsKey("score")) {
+			score = object.getJsonNumber("score").bigDecimalValue().floatValue();
+		}
+		if (object.containsKey("fields")) {
+			JsonArray jsonArray = object.getJsonArray("fields");
+			for (JsonValue value : jsonArray) {
+				switch (value.getValueType()) {
+					case NUMBER:
+						JsonNumber number = ((JsonNumber) value);
+						if (number.toString().contains(".")) {
+							fields.add(number.bigDecimalValue().floatValue());
+						} else {
+							fields.add(number.longValueExact());
+						}
+						break;
+					case STRING:
+						fields.add(new BytesRef(((JsonString) value).getString()));
+						break;
+					default:
+						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+								"fields should be an array of STRING and NUMBER, but had entry of type "
+										+ value.getValueType());
 				}
 			}
-			return new FieldDoc(doc, score, fields.toArray(), shardIndex);
 		}
-		return null;
+		return new FieldDoc(doc, score, fields.toArray(), shardIndex);
 	}
 
 	@POST
diff --git a/src/main/resources/run.properties b/src/main/resources/run.properties
index 4aeab39..25babbd 100644
--- a/src/main/resources/run.properties
+++ b/src/main/resources/run.properties
@@ -5,3 +5,4 @@ directory     = ${HOME}/data/lucene
 commitSeconds = 5
 maxShardSize  = 2147483648
 ip            = 127.0.0.1/32
+units = \u2103: celsius degC, K: kelvin

From 885673898951cfdb1d054829f6a498ec18b830f6 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 25 May 2022 17:51:30 +0100
Subject: [PATCH 38/73] Use mapping for parseSearchAfter types #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 38 +++++++++++++------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 1edc024..a28b3ea 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -1213,12 +1213,10 @@ private void buildBuckets(JsonObjectBuilder aggregationsBuilder, FacetResult res
 	private String luceneSearchResult(String name, Search search, String searchAfter, int maxResults, Long uid)
 			throws IOException, LuceneException {
 		IndexSearcher isearcher = getSearcher(search.readerMap, name);
-		String format = "Search {} with: query {}, maxResults, searchAfter {}, scored {} ";
+		String format = "Search {} with: query {}, maxResults {}, searchAfter {}, scored {}";
 		logger.debug(format, name, search.query, maxResults, searchAfter, search.scored);
-		FieldDoc searchAfterDoc = parseSearchAfter(searchAfter);
-		TopFieldDocs topFieldDocs = searchAfterDoc == null
-				? isearcher.search(search.query, maxResults, search.sort, search.scored)
-				: isearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort, search.scored);
+		FieldDoc searchAfterDoc = parseSearchAfter(searchAfter, search.sort.getSort());
+		TopFieldDocs topFieldDocs = isearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort, search.scored);
 		ScoreDoc[] hits = topFieldDocs.scoreDocs;
 		TotalHits totalHits = topFieldDocs.totalHits;
 		SortField[] fields = topFieldDocs.fields;
@@ -1559,7 +1557,7 @@ private Sort parseSort(String sort) throws LuceneException {
 	 * @throws LuceneException If an entry in the fields array is not a STRING or
 	 *                         NUMBER
 	 */
-	private FieldDoc parseSearchAfter(String searchAfter) throws LuceneException {
+	private FieldDoc parseSearchAfter(String searchAfter, SortField[] sortFields) throws LuceneException {
 		if (searchAfter == null || searchAfter.equals("")) {
 			return null;
 		}
@@ -1575,14 +1573,32 @@ private FieldDoc parseSearchAfter(String searchAfter) throws LuceneException {
 		}
 		if (object.containsKey("fields")) {
 			JsonArray jsonArray = object.getJsonArray("fields");
-			for (JsonValue value : jsonArray) {
+			if (jsonArray.size() != sortFields.length) {
+				throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+						"fields should have the same length as sort, but they were "
+								+ jsonArray.size() + " and " + sortFields.length);
+			}
+			for (int i = 0; i < sortFields.length; i++) {
+				JsonValue value = jsonArray.get(i);
 				switch (value.getValueType()) {
 					case NUMBER:
 						JsonNumber number = ((JsonNumber) value);
-						if (number.toString().contains(".")) {
-							fields.add(number.bigDecimalValue().floatValue());
-						} else {
-							fields.add(number.longValueExact());
+						switch (sortFields[i].getType()) {
+							case FLOAT:
+							case DOUBLE:
+							case SCORE:
+								fields.add(number.bigDecimalValue().floatValue());
+								break;
+							case INT:
+							case LONG:
+							case DOC:
+							case CUSTOM:
+								fields.add(number.longValueExact());
+								break;
+							default:
+								throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+										"fields contained a NUMBER but the corresponding field was "
+												+ sortFields[i]);
 						}
 						break;
 					case STRING:

From 008c68a568d6077810ebbad07125e2923495a5ac Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 1 Jun 2022 11:17:55 +0000
Subject: [PATCH 39/73] WIP sharding changes from stash #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 471 +++++++++++-------
 1 file changed, 294 insertions(+), 177 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index a28b3ea..eead564 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -30,11 +30,13 @@
 import javax.ejb.Singleton;
 import javax.json.Json;
 import javax.json.JsonArray;
+import javax.json.JsonException;
 import javax.json.JsonNumber;
 import javax.json.JsonObject;
 import javax.json.JsonObjectBuilder;
 import javax.json.JsonReader;
 import javax.json.JsonString;
+import javax.json.JsonStructure;
 import javax.json.JsonValue;
 import javax.json.JsonValue.ValueType;
 import javax.json.stream.JsonGenerator;
@@ -71,12 +73,9 @@
 import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
-import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.MultiReader;
-import org.apache.lucene.index.ReaderManager;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
 import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
@@ -89,6 +88,7 @@
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.SearcherManager;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SortedNumericSortField;
@@ -119,7 +119,8 @@ public class Lucene {
 	private class ShardBucket {
 		private FSDirectory directory;
 		private IndexWriter indexWriter;
-		private ReaderManager readerManager;
+		private SearcherManager searcherManager;
+		private AtomicLong documentCount;
 
 		/**
 		 * Creates a bucket for accessing the read and write functionality for a single
@@ -144,13 +145,29 @@ public ShardBucket(java.nio.file.Path shardPath) throws IOException {
 				indexWriter.commit();
 				logger.debug("Now have " + indexWriter.getDocStats().numDocs + " documents indexed");
 			}
-			readerManager = new ReaderManager(indexWriter);
+			searcherManager = new SearcherManager(indexWriter, null);
+			IndexSearcher indexSearcher = null;
+			try {
+				indexSearcher = searcherManager.acquire();
+				int numDocs = indexSearcher.getIndexReader().numDocs();
+				documentCount = new AtomicLong(numDocs);
+			} finally {
+				searcherManager.release(indexSearcher);
+			}
+		}
+
+		public int commit() throws IOException {
+			int cached = indexWriter.numRamDocs();
+			indexWriter.commit();
+			searcherManager.maybeRefreshBlocking();
+			return cached;
 		}
 	}
 
 	private class IndexBucket {
 		private String entityName;
-		private Map<Long, ShardBucket> shardMap = new HashMap<>();
+		// private Map<Long, ShardBucket> shardMap = new HashMap<>();
+		private List<ShardBucket> shardList = new ArrayList<>();
 		private AtomicBoolean locked = new AtomicBoolean();
 
 		/**
@@ -168,7 +185,8 @@ public IndexBucket(String entityName) {
 				java.nio.file.Path shardPath = luceneDirectory.resolve(entityName);
 				do {
 					ShardBucket shardBucket = new ShardBucket(shardPath);
-					shardMap.put(shardIndex, shardBucket);
+					// shardMap.put(shardIndex, shardBucket);
+					shardList.add(shardBucket);
 					shardIndex++;
 					shardPath = luceneDirectory.resolve(entityName + "_" + shardIndex);
 				} while (Files.isDirectory(shardPath));
@@ -185,12 +203,32 @@ public IndexBucket(String entityName) {
 		 * @return Array of DirectoryReaders for all shards in this bucket.
 		 * @throws IOException
 		 */
-		public DirectoryReader[] acquireReaders() throws IOException {
-			List<DirectoryReader> subReaders = new ArrayList<>();
-			for (ShardBucket shardBucket : shardMap.values()) {
-				subReaders.add(shardBucket.readerManager.acquire());
+		// public DirectoryReader[] acquireReaders() throws IOException {
+		// 	List<DirectoryReader> subReaders = new ArrayList<>();
+		// 	for (ShardBucket shardBucket : shardMap.values()) {
+		// 		subReaders.add(shardBucket.searcherManager.acquire());
+		// 	}
+		// 	return subReaders.toArray(new DirectoryReader[0]);
+		// }
+
+		public List<IndexSearcher> acquireSearchers() throws IOException {
+			List<IndexSearcher> subSearchers = new ArrayList<>();
+			// for (ShardBucket shardBucket : shardMap.values()) {
+			for (ShardBucket shardBucket : shardList) {
+				subSearchers.add(shardBucket.searcherManager.acquire());
 			}
-			return subReaders.toArray(new DirectoryReader[0]);
+			return subSearchers;
+		}
+
+		public void addDocument(Document document) throws IOException {
+			ShardBucket shardBucket = routeShard();
+			shardBucket.indexWriter.addDocument(document);
+			shardBucket.documentCount.incrementAndGet();
+		}
+
+		public void updateDocument(Term term, Document document) throws IOException {
+			ShardBucket shardBucket = routeShard();
+			shardBucket.indexWriter.updateDocument(term, document);
 		}
 
 		/**
@@ -202,9 +240,9 @@ public DirectoryReader[] acquireReaders() throws IOException {
 		 * @return A new ShardBucket with the provided shardKey.
 		 * @throws IOException
 		 */
-		public ShardBucket buildShardBucket(Long shardKey) throws IOException {
+		public ShardBucket buildShardBucket(int shardKey) throws IOException {
 			ShardBucket shardBucket = new ShardBucket(luceneDirectory.resolve(entityName + "_" + shardKey));
-			shardMap.put(shardKey, shardBucket);
+			shardList.add(shardBucket);
 			return shardBucket;
 		}
 
@@ -218,15 +256,13 @@ public ShardBucket buildShardBucket(Long shardKey) throws IOException {
 		 * @throws IOException
 		 */
 		public void commit(String command, String entityName) throws IOException {
-			for (Entry<Long, ShardBucket> entry : shardMap.entrySet()) {
-				ShardBucket shardBucket = entry.getValue();
-				int cached = shardBucket.indexWriter.numRamDocs();
-				shardBucket.indexWriter.commit();
+			// for (Entry<Long, ShardBucket> entry : shardMap.entrySet()) {
+			for (ShardBucket shardBucket : shardList) {
+				int cached = shardBucket.commit();
 				if (cached != 0) {
-					logger.debug("{} has committed {} {} changes to Lucene - now have {} documents indexed in shard {}",
-							command, cached, entityName, shardBucket.indexWriter.getDocStats().numDocs, entry.getKey());
+					logger.debug("{} has committed {} {} changes to Lucene - now have {} documents indexed in {}",
+							command, cached, entityName, shardBucket.indexWriter.getDocStats().numDocs, shardBucket.directory.getDirectory().toString());
 				}
-				shardBucket.readerManager.maybeRefreshBlocking();
 			}
 		}
 
@@ -236,8 +272,9 @@ public void commit(String command, String entityName) throws IOException {
 		 * @throws IOException
 		 */
 		public void close() throws IOException {
-			for (ShardBucket shardBucket : shardMap.values()) {
-				shardBucket.readerManager.close();
+			// for (ShardBucket shardBucket : shardMap.values()) {
+			for (ShardBucket shardBucket : shardList) {
+				shardBucket.searcherManager.close();
 				shardBucket.indexWriter.commit();
 				shardBucket.indexWriter.close();
 				shardBucket.directory.close();
@@ -254,15 +291,22 @@ public void close() throws IOException {
 		 * @return The ShardBucket that the relevant Document is/should be indexed in.
 		 * @throws IOException
 		 */
-		public ShardBucket routeShard(Long id) throws IOException {
-			if (id == null) {
-				// If we don't have id, provide the first bucket
-				return shardMap.get(0L);
-			}
-			Long shard = id / luceneMaxShardSize;
-			ShardBucket shardBucket = shardMap.get(shard);
-			if (shardBucket == null) {
-				shardBucket = buildShardBucket(shard);
+		public ShardBucket routeShard() throws IOException {
+			// if (id == null || !shardedIndices.contains(entityName.toLowerCase())) {
+			// 	// If we don't have id, provide the first bucket
+			// 	return shardMap.get(0L);
+			// }
+			// Long shard = id / luceneMaxShardSize;
+			// ShardBucket shardBucket = shardMap.get(shard);
+			// if (shardBucket == null) {
+			// 	shardBucket = buildShardBucket(shard);
+			// }
+			// return shardBucket;
+			int size = shardList.size();
+			ShardBucket shardBucket = shardList.get(size - 1);
+			if (shardBucket.documentCount.get() >= luceneMaxShardSize) {
+				shardBucket.indexWriter.commit();
+				shardBucket = buildShardBucket(size);
 			}
 			return shardBucket;
 		}
@@ -275,31 +319,75 @@ public ShardBucket routeShard(Long id) throws IOException {
 		 * @return The relevant IndexWriter.
 		 * @throws IOException
 		 */
-		public IndexWriter getWriter(Long id) throws IOException {
-			return routeShard(id).indexWriter;
-		}
+		// public IndexWriter getWriter(String entityName, Long id) throws IOException {
+		// 	return routeShard(entityName, id).indexWriter;
+		// }
 
-		public void releaseReaders(DirectoryReader[] subReaders) throws IOException, LuceneException {
-			if (subReaders.length != shardMap.size()) {
+		public void releaseReaders(List<IndexSearcher> subSearchers) throws IOException, LuceneException {
+			if (subSearchers.size() != shardList.size()) {
 				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
 						"Was expecting the same number of DirectoryReaders as ShardBuckets, but had "
-								+ subReaders.length + ", " + shardMap.size() + " respectively.");
+								+ subSearchers.size() + ", " + shardList.size() + " respectively.");
 			}
 			int i = 0;
-			for (ShardBucket shardBucket : shardMap.values()) {
-				shardBucket.readerManager.release(subReaders[i]);
+			for (ShardBucket shardBucket : shardList) {
+				shardBucket.searcherManager.release(subSearchers.get(i));
 				i++;
 			}
 		}
 	}
 
 	public class Search {
-		public Map<String, DirectoryReader[]> readerMap;
+		public Map<String, List<IndexSearcher>> searcherMap;
 		public Query query;
 		public Sort sort;
 		public boolean scored;
 		public Set<String> fields = new HashSet<String>();
 		public Set<FacetDimensionRequest> dimensions = new HashSet<FacetDimensionRequest>();
+		
+		/**
+		 * Parses the String from the request into a Lucene Sort object. Multiple sort
+		 * criteria are supported, and will be applied in order.
+		 * 
+		 * @param sortString String representation of a JSON object with the field(s) to sort
+		 *             as keys, and the direction ("asc" or "desc") as value(s).
+		 * @return Lucene Sort object
+		 * @throws LuceneException If the value for any key isn't "asc" or "desc"
+		 */
+		public void parseSort(String sortString) throws LuceneException {
+			if (sortString == null || sortString.equals("")|| sortString.equals("{}")) {
+				scored = true;
+				sort = new Sort(SortField.FIELD_SCORE, new SortedNumericSortField("id.long", Type.LONG));
+				return;
+			}
+			try (JsonReader reader = Json.createReader(new ByteArrayInputStream(sortString.getBytes()))) {
+				JsonObject object = reader.readObject();
+				List<SortField> fields = new ArrayList<>();
+				for (String key : object.keySet()) {
+					String order = object.getString(key);
+					Boolean reverse;
+					if (order.equals("asc")) {
+						reverse = false;
+					} else if (order.equals("desc")) {
+						reverse = true;
+					} else {
+						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+								"Sort order must be 'asc' or 'desc' but it was '" + order + "'");
+					}
+
+					if (longFields.contains(key)) {
+						fields.add(new SortedNumericSortField(key, Type.LONG, reverse));
+					} else if (doubleFields.contains(key)) {
+						fields.add(new SortedNumericSortField(key, Type.DOUBLE, reverse));
+					} else {
+						fields.add(new SortField(key, Type.STRING, reverse));
+					}
+				}
+				fields.add(new SortedNumericSortField("id.long", Type.LONG));
+				scored = false;
+				sort = new Sort(fields.toArray(new SortField[0]));
+			}
+		}
 	}
 
 	private static class ParentRelationship {
@@ -390,9 +478,10 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 	private final FacetsConfig facetsConfig = new FacetsConfig();
 
 	private java.nio.file.Path luceneDirectory;
-
+	private Set<String> shardedIndices;
 	private int luceneCommitMillis;
 	private Long luceneMaxShardSize;
+	private long maxSearchTimeSeconds;
 
 	private AtomicLong bucketNum = new AtomicLong();
 	private Map<String, IndexBucket> indexBuckets = new ConcurrentHashMap<>();
@@ -463,13 +552,17 @@ public void modify(@Context HttpServletRequest request) throws LuceneException {
 	public void addNow(@Context HttpServletRequest request, @PathParam("entityName") String entityName)
 			throws LuceneException {
 		List<JsonObject> documents;
+		JsonStructure value = null;
 		logger.debug("Requesting addNow of {}", entityName);
 		try (JsonReader reader = Json.createReader(request.getInputStream())) {
-			documents = reader.readArray().getValuesAs(JsonObject.class);
+			value = reader.read();
+			documents = ((JsonArray) value).getValuesAs(JsonObject.class);
 			for (JsonObject document : documents) {
 				createNow(entityName, document);
 			}
-		} catch (IOException e) {
+		} catch (IOException | JsonException e) {
+			
+			logger.error("Could not parse JSON from {}", value.toString());
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
 		logger.debug("Added {} {} documents", documents.size(), entityName);
@@ -564,7 +657,8 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 						"Lucene locked for " + entityName);
 			}
-			bucket.getWriter(new Long(icatId)).addDocument(facetsConfig.build(document));
+			// bucket.getWriter(entityName, new Long(icatId)).addDocument(facetsConfig.build(document));
+			bucket.addDocument(facetsConfig.build(document));
 		}
 	}
 
@@ -574,11 +668,12 @@ private void createNow(String entityName, JsonObject documentJson)
 			throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
 					"id was not in the document keys " + documentJson.keySet());
 		}
-		String icatId = documentJson.getString("id");
+		// String icatId = documentJson.getString("id");
 		Document document = parseDocument(documentJson);
 		logger.trace("create {} {}", entityName, document.toString());
 		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
-		bucket.getWriter(new Long(icatId)).addDocument(facetsConfig.build(document));
+		// bucket.getWriter(entityName, new Long(icatId)).addDocument(facetsConfig.build(document));
+		bucket.addDocument(facetsConfig.build(document));
 	}
 
 	@POST
@@ -603,10 +698,9 @@ private Search datafilesQuery(HttpServletRequest request, String sort, Long uid)
 			throws IOException, QueryNodeException, LuceneException {
 		Search search = new Search();
 		searches.put(uid, search);
-		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
-		search.readerMap = readerMap;
-		search.scored = (sort == null || sort.equals(""));
-		search.sort = parseSort(sort);
+		Map<String, List<IndexSearcher>> readerMap = new HashMap<>();
+		search.searcherMap = readerMap;
+		search.parseSort(sort);
 
 		try (JsonReader r = Json.createReader(request.getInputStream())) {
 			JsonObject o = r.readObject();
@@ -672,10 +766,9 @@ private Search datasetsQuery(HttpServletRequest request, String sort, Long uid)
 			throws IOException, QueryNodeException, LuceneException {
 		Search search = new Search();
 		searches.put(uid, search);
-		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
-		search.readerMap = readerMap;
-		search.scored = (sort == null || sort.equals(""));
-		search.sort = parseSort(sort);
+		Map<String, List<IndexSearcher>> readerMap = new HashMap<>();
+		search.searcherMap = readerMap;
+		search.parseSort(sort);
 		try (JsonReader r = Json.createReader(request.getInputStream())) {
 			JsonObject o = r.readObject();
 			JsonObject query = o.getJsonObject("query");
@@ -749,8 +842,10 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 							"Lucene locked for " + entityName);
 				}
 				logger.trace("delete {} {}", entityName, icatId);
-				ShardBucket shardBucket = bucket.routeShard(new Long(icatId));
-				shardBucket.indexWriter.deleteDocuments(new Term("id", icatId));
+				for (ShardBucket shardBucket: bucket.shardList) {
+					shardBucket.indexWriter.deleteDocuments(new Term("id", icatId));
+				}
+				// ShardBucket shardBucket = bucket.routeShard(entityName, new Long(icatId));
 			} catch (IOException e) {
 				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 			}
@@ -826,7 +921,6 @@ public String facet(@PathParam("entityName") String entityName, @Context HttpSer
 			Search search = genericQuery(request, sort, uid);
 			return luceneFacetResult(entityName, search, searchAfter, maxResults, maxLabels, uid);
 		} catch (Exception e) {
-			logger.error("Error", e);
 			freeSearcher(uid);
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
@@ -834,10 +928,10 @@ public String facet(@PathParam("entityName") String entityName, @Context HttpSer
 
 	public void freeSearcher(Long uid) throws LuceneException {
 		if (uid != null) { // May not be set for internal calls
-			Map<String, DirectoryReader[]> search = searches.get(uid).readerMap;
-			for (Entry<String, DirectoryReader[]> entry : search.entrySet()) {
+			Map<String, List<IndexSearcher>> search = searches.get(uid).searcherMap;
+			for (Entry<String, List<IndexSearcher>> entry : search.entrySet()) {
 				String name = entry.getKey();
-				DirectoryReader[] subReaders = entry.getValue();
+				List<IndexSearcher> subReaders = entry.getValue();
 				try {
 					indexBuckets.computeIfAbsent(name, k -> new IndexBucket(k)).releaseReaders(subReaders);
 				} catch (IOException e) {
@@ -866,10 +960,9 @@ public void freeSearcher(Long uid) throws LuceneException {
 	private Search genericQuery(HttpServletRequest request, String sort, Long uid) throws IOException, LuceneException {
 		Search search = new Search();
 		searches.put(uid, search);
-		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
-		search.readerMap = readerMap;
-		search.scored = (sort == null || sort.equals(""));
-		search.sort = parseSort(sort);
+		Map<String, List<IndexSearcher>> readerMap = new HashMap<>();
+		search.searcherMap = readerMap;
+		search.parseSort(sort);
 		try (JsonReader r = Json.createReader(request.getInputStream())) {
 			JsonObject o = r.readObject();
 			JsonObject jsonQuery = o.getJsonObject("query");
@@ -974,18 +1067,23 @@ private Search genericQuery(HttpServletRequest request, String sort, Long uid) t
 		return search;
 	}
 
-	private MultiReader getMultiReader(Map<String, DirectoryReader[]> readerMap, String name) throws IOException {
-		DirectoryReader[] subReaders = readerMap.get(name);
-		if (subReaders == null) {
-			subReaders = indexBuckets.computeIfAbsent(name, k -> new IndexBucket(k)).acquireReaders();
-			readerMap.put(name, subReaders);
+	private List<IndexSearcher> getSearchers(Map<String, List<IndexSearcher>> readerMap, String name) throws IOException {
+		List<IndexSearcher> subSearchers = readerMap.get(name);
+		if (subSearchers == null) {
+			subSearchers = indexBuckets.computeIfAbsent(name, k -> new IndexBucket(k)).acquireSearchers();
+			readerMap.put(name, subSearchers);
 			logger.debug("Remember searcher for {}", name);
 		}
-		return new MultiReader(subReaders, false);
+		return subSearchers;
 	}
 
-	private IndexSearcher getSearcher(Map<String, DirectoryReader[]> readerMap, String name) throws IOException {
-		return new IndexSearcher(getMultiReader(readerMap, name));
+	private IndexSearcher getSearcher(Map<String, List<IndexSearcher>> readerMap, String name) throws IOException, LuceneException {
+		List<IndexSearcher> subSearchers = readerMap.get(name);
+		subSearchers = getSearchers(readerMap, name);
+		if (subSearchers.size() > 1) {
+			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Cannot get single IndexSearcher for " + name + " as it has " + subSearchers.size() + " shards");
+		}
+		return subSearchers.get(0);
 	}
 
 	@PostConstruct
@@ -1002,18 +1100,23 @@ private void init() {
 
 			luceneCommitMillis = props.getPositiveInt("commitSeconds") * 1000;
 			luceneMaxShardSize = Math.max(props.getPositiveLong("maxShardSize"), new Long(Integer.MAX_VALUE + 1));
+			maxSearchTimeSeconds = props.has("maxSearchTimeSeconds") ? props.getPositiveLong("maxSearchTimeSeconds") : 5;
 
 			timer = new Timer("LuceneCommitTimer");
 			timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
 
 			icatUnits = new IcatUnits(props.getString("units", ""));
 
+			String shardedIndicesString = props.getString("shardedIndices", "").toLowerCase();
+			shardedIndices = new HashSet<>(Arrays.asList(shardedIndicesString.split("\\s+")));
+
 		} catch (Exception e) {
 			logger.error(fatal, e.getMessage());
 			throw new IllegalStateException(e.getMessage());
 		}
 
-		logger.info("Initialised icat.lucene");
+		logger.info("Initialised icat.lucene with directory {}, commitSeconds {}, maxShardSize {}, shardedIndices {}, maxSearchTimeSeconds {}",
+				luceneDirectory, luceneCommitMillis, luceneMaxShardSize, shardedIndices, maxSearchTimeSeconds);
 	}
 
 	class CommitTimerTask extends TimerTask {
@@ -1049,10 +1152,9 @@ private Search investigationsQuery(HttpServletRequest request, String sort, Long
 			throws IOException, QueryNodeException, LuceneException {
 		Search search = new Search();
 		searches.put(uid, search);
-		Map<String, DirectoryReader[]> readerMap = new HashMap<>();
-		search.readerMap = readerMap;
-		search.scored = (sort == null || sort.equals(""));
-		search.sort = parseSort(sort);
+		Map<String, List<IndexSearcher>> readerMap = new HashMap<>();
+		search.searcherMap = readerMap;
+		search.parseSort(sort);
 		try (JsonReader r = Json.createReader(request.getInputStream())) {
 			JsonObject o = r.readObject();
 			JsonObject query = o.getJsonObject("query");
@@ -1130,7 +1232,7 @@ public void lock(@PathParam("entityName") String entityName) throws LuceneExcept
 			throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE, "Lucene already locked for " + entityName);
 		}
 		try {
-			for (ShardBucket shardBucket : bucket.shardMap.values()) {
+			for (ShardBucket shardBucket : bucket.shardList) {
 				shardBucket.indexWriter.deleteAll();
 			}
 		} catch (IOException e) {
@@ -1140,83 +1242,130 @@ public void lock(@PathParam("entityName") String entityName) throws LuceneExcept
 
 	private String luceneFacetResult(String name, Search search, String searchAfter, int maxResults, int maxLabels,
 			Long uid) throws IOException, IllegalStateException, LuceneException {
-		List<FacetResult> results = new ArrayList<>();
-		List<FacetResult> rangeResults = new ArrayList<>();
+		Map<String, Map<String, Long>> results = new HashMap<>();
+		Map<String, Map<String, Long>> rangeResults = new HashMap<>();
 		if (maxResults <= 0 || maxLabels <= 0) {
 			// This will result in no Facets and a null pointer, so return early
 			logger.warn("Cannot facet when maxResults={}, maxLabels={}, returning empty list", maxResults, maxLabels);
 		} else {
-			MultiReader directoryReader = getMultiReader(search.readerMap, name);
-			IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
-			FacetsCollector facetsCollector = new FacetsCollector();
-			FacetsCollector.search(indexSearcher, search.query, maxResults, facetsCollector);
-			logger.debug("To facet in {} for {} {} with {} from {} ", name, search.query, maxResults, indexSearcher,
-					searchAfter);
-			for (FacetDimensionRequest facetDimensionRequest : search.dimensions) {
-				if (facetDimensionRequest.getRanges().size() > 0) {
-					String dimension = facetDimensionRequest.getDimension();
-					if (longFields.contains(dimension)) {
-						LongRange[] ranges = facetDimensionRequest.getRanges().toArray(new LongRange[0]);
-						Facets facets = new LongRangeFacetCounts(dimension, facetsCollector, ranges);
-						rangeResults.addAll(facets.getAllDims(maxLabels));
-					} else if (doubleFields.contains(dimension)) {
-						DoubleRange[] ranges = facetDimensionRequest.getRanges().toArray(new DoubleRange[0]);
-						Facets facets = new DoubleRangeFacetCounts(dimension, facetsCollector, ranges);
-						rangeResults.addAll(facets.getAllDims(maxLabels));
-					} else {
-						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-								"'ranges' specified for dimension " + dimension
-										+ " but this is not a supported numeric field");
+			List<IndexSearcher> searchers = getSearchers(search.searcherMap, name);
+			for (IndexSearcher indexSearcher : searchers) {
+				FacetsCollector facetsCollector = new FacetsCollector();
+				FacetsCollector.search(indexSearcher, search.query, maxResults, facetsCollector);
+				logger.debug("To facet in {} for {} {} with {} from {} ", name, search.query, maxResults, indexSearcher,
+						searchAfter);
+				for (FacetDimensionRequest facetDimensionRequest : search.dimensions) {
+					if (facetDimensionRequest.getRanges().size() > 0) {
+						String dimension = facetDimensionRequest.getDimension();
+						if (longFields.contains(dimension)) {
+							LongRange[] ranges = facetDimensionRequest.getRanges().toArray(new LongRange[0]);
+							Facets facets = new LongRangeFacetCounts(dimension, facetsCollector, ranges);
+							putFacets(maxLabels, rangeResults, facets);
+						} else if (doubleFields.contains(dimension)) {
+							DoubleRange[] ranges = facetDimensionRequest.getRanges().toArray(new DoubleRange[0]);
+							Facets facets = new DoubleRangeFacetCounts(dimension, facetsCollector, ranges);
+							putFacets(maxLabels, rangeResults, facets);
+						} else {
+							throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+									"'ranges' specified for dimension " + dimension
+											+ " but this is not a supported numeric field");
+						}
 					}
 				}
-			}
-			try {
-				DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(directoryReader);
-				Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
-				logger.debug("facets: {}, maxLabels: {}, maxResults: {}", facets, maxLabels, maxResults);
-				results = facets.getAllDims(maxLabels);
-			} catch (IllegalArgumentException e) {
-				// This can occur if no fields in the index have been faceted
-				logger.error("No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
-			} catch (IllegalStateException e) {
-				// This can occur if we do not create the IndexSearcher from the same
-				// DirectoryReader as we used to create the state
-				logger.error("IndexSearcher used is not based on the DirectoryReader used for facet counting: "
-						+ e.getClass() + " " + e.getMessage());
-				throw e;
+				try {
+					DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(indexSearcher.getIndexReader());
+					Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
+					logger.debug("facets: {}, maxLabels: {}, maxResults: {}", facets, maxLabels, maxResults);
+					putFacets(maxLabels, results, facets);
+				} catch (IllegalArgumentException e) {
+					// This can occur if no fields in the index have been faceted
+					logger.error("No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
+				} catch (IllegalStateException e) {
+					// This can occur if we do not create the IndexSearcher from the same
+					// DirectoryReader as we used to create the state
+					logger.error("IndexSearcher used is not based on the DirectoryReader used for facet counting: "
+							+ e.getClass() + " " + e.getMessage());
+					throw e;
+				}
 			}
 			logger.debug("Facets found for " + results.size() + " dimensions");
 		}
 		Set<String> dimensionSet = new HashSet<>();
 		search.dimensions.forEach(d -> dimensionSet.add(d.getDimension()));
 		JsonObjectBuilder aggregationsBuilder = Json.createObjectBuilder();
-		for (FacetResult result : results) {
-			if (dimensionSet.size() == 0 || dimensionSet.contains(result.dim)) {
-				buildBuckets(aggregationsBuilder, result);
+		for (Entry<String, Map<String, Long>> dimensionEntry : results.entrySet()) {
+			if (dimensionSet.size() == 0 || dimensionSet.contains(dimensionEntry.getKey())) {
+				buildBuckets(aggregationsBuilder, dimensionEntry);
 			}
 		}
-		for (FacetResult result : rangeResults) {
-			buildBuckets(aggregationsBuilder, result);
+		for (Entry<String, Map<String, Long>> dimensionEntry : results.entrySet()) {
+			buildBuckets(aggregationsBuilder, dimensionEntry);
 		}
 		return Json.createObjectBuilder().add("aggregations", aggregationsBuilder).build().toString();
 	}
 
-	private void buildBuckets(JsonObjectBuilder aggregationsBuilder, FacetResult result) {
+	private void putFacets(int maxLabels, Map<String, Map<String, Long>> rangeResults, Facets facets)
+			throws IOException {
+		for (FacetResult result : facets.getAllDims(maxLabels)) {
+			String dim = result.dim;
+			if (rangeResults.containsKey(dim)) {
+				Map<String, Long> labelMap = rangeResults.get(dim);
+				for (LabelAndValue labelAndValue : result.labelValues) {
+					String label = labelAndValue.label;
+					if (labelMap.containsKey(label)) {
+						labelMap.put(label, labelMap.get(label) + labelAndValue.value.longValue());
+					} else {
+						labelMap.put(label, labelAndValue.value.longValue());
+					}
+				}
+			} else {
+				Map<String, Long> labelMap = new HashMap<>();
+				for (LabelAndValue labelAndValue : result.labelValues) {
+					labelMap.put(labelAndValue.label, labelAndValue.value.longValue());
+				}
+				rangeResults.put(dim, labelMap);
+			}
+		}
+	}
+
+	private void buildBuckets(JsonObjectBuilder aggregationsBuilder, Entry<String, Map<String, Long>> result) {
 		JsonObjectBuilder bucketsBuilder = Json.createObjectBuilder();
-		for (LabelAndValue labelValue : result.labelValues) {
+		for (Entry<String, Long> labelValue : result.getValue().entrySet()) {
 			JsonObjectBuilder bucketBuilder = Json.createObjectBuilder();
-			bucketsBuilder.add(labelValue.label, bucketBuilder.add("doc_count", labelValue.value.longValue()));
+			bucketsBuilder.add(labelValue.getKey(), bucketBuilder.add("doc_count", labelValue.getValue()));
 		}
-		aggregationsBuilder.add(result.dim, Json.createObjectBuilder().add("buckets", bucketsBuilder));
+		aggregationsBuilder.add(result.getKey(), Json.createObjectBuilder().add("buckets", bucketsBuilder));
 	}
 
 	private String luceneSearchResult(String name, Search search, String searchAfter, int maxResults, Long uid)
 			throws IOException, LuceneException {
-		IndexSearcher isearcher = getSearcher(search.readerMap, name);
+		List<IndexSearcher> searchers = getSearchers(search.searcherMap, name);
 		String format = "Search {} with: query {}, maxResults {}, searchAfter {}, scored {}";
 		logger.debug(format, name, search.query, maxResults, searchAfter, search.scored);
 		FieldDoc searchAfterDoc = parseSearchAfter(searchAfter, search.sort.getSort());
-		TopFieldDocs topFieldDocs = isearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort, search.scored);
+		TopFieldDocs topFieldDocs;
+		if (searchers.size() > 0) {
+			List<TopFieldDocs> shardHits = new ArrayList<>();
+			int i = 0;
+			long startTime = System.currentTimeMillis();
+			for (IndexSearcher indexSearcher : searchers) {
+				// checkMaxMatches(name, search, indexSearcher);
+				TopFieldDocs shardDocs = indexSearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort, search.scored);
+				shardHits.add(shardDocs);
+				logger.debug("{} hits on shard {} out of {} total docs", shardDocs.totalHits, i, indexSearcher.getIndexReader().numDocs());
+				i++;
+				long duration = (System.currentTimeMillis() - startTime);
+				if (duration > maxSearchTimeSeconds * 1000) {
+					logger.info("Stopping search after {} shards due to {} ms having elapsed", i, duration);
+					break;
+				} 
+			}
+			topFieldDocs = TopFieldDocs.merge(search.sort, 0, maxResults, shardHits.toArray(new TopFieldDocs[i]), true);
+		} else {
+			IndexSearcher indexSearcher = searchers.get(0);
+			// checkMaxMatches(name, search, indexSearcher);
+			topFieldDocs = indexSearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort, search.scored);
+		}
 		ScoreDoc[] hits = topFieldDocs.scoreDocs;
 		TotalHits totalHits = topFieldDocs.totalHits;
 		SortField[] fields = topFieldDocs.fields;
@@ -1229,7 +1378,7 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
 			gen.writeStartObject().writeStartArray("results");
 			for (ScoreDoc hit : hits) {
-				encodeResult(gen, hit, isearcher, search);
+				encodeResult(gen, hit, searchers.get(hit.shardIndex), search);
 			}
 			gen.writeEnd(); // array results
 			if (hits.length == maxResults) {
@@ -1240,7 +1389,7 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 					gen.write("score", lastScore);
 				}
 				if (fields != null) {
-					Document lastDocument = isearcher.doc(lastDoc.doc);
+					Document lastDocument = searchers.get(lastDoc.shardIndex).doc(lastDoc.doc);
 					gen.writeStartArray("fields");
 					for (SortField sortField : fields) {
 						String fieldName = sortField.getField();
@@ -1278,10 +1427,17 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 			}
 			gen.writeEnd(); // end enclosing object
 		}
-		logger.debug("Json returned {}", baos.toString());
+		logger.trace("Json returned {}", baos.toString());
 		return baos.toString();
 	}
 
+	// private void checkMaxMatches(String name, Search search, IndexSearcher indexSearcher)
+	// 		throws IOException, LuceneException {
+	// 	if (shardedIndices.contains(name.toLowerCase()) && indexSearcher.count(search.query) > luceneMaxMatchingDocuments) {
+	// 		throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Query exceeded the maximum number of matching documents " + luceneMaxMatchingDocuments);
+	// 	}
+	// }
+
 	private Query maybeEmptyQuery(Builder theQuery) {
 		Query query = theQuery.build();
 		if (query.toString().isEmpty()) {
@@ -1402,6 +1558,7 @@ private void addSortField(JsonObject json, Document document, String key) {
 				document.add(new NumericDocValuesField("id.long", value));
 				document.add(new StoredField("id.long", value));
 			}
+			// TODO add special case for startDate -> date to make sorting easier
 			if (longFields.contains(key)) {
 				document.add(new NumericDocValuesField(key, json.getJsonNumber(key).longValueExact()));
 			} else if (doubleFields.contains(key)) {
@@ -1504,47 +1661,6 @@ private Builder parseParameter(JsonValue p) throws LuceneException {
 		return paramQuery;
 	}
 
-	/**
-	 * Parses the String from the request into a Lucene Sort object. Multiple sort
-	 * criteria are supported, and will be applied in order.
-	 * 
-	 * @param sort String representation of a JSON object with the field(s) to sort
-	 *             as keys, and the direction ("asc" or "desc") as value(s).
-	 * @return Lucene Sort object
-	 * @throws LuceneException If the value for any key isn't "asc" or "desc"
-	 */
-	private Sort parseSort(String sort) throws LuceneException {
-		if (sort == null || sort.equals("")) {
-			return new Sort(SortField.FIELD_SCORE, new SortedNumericSortField("id.long", Type.LONG));
-		}
-		try (JsonReader reader = Json.createReader(new ByteArrayInputStream(sort.getBytes()))) {
-			JsonObject object = reader.readObject();
-			List<SortField> fields = new ArrayList<>();
-			for (String key : object.keySet()) {
-				String order = object.getString(key);
-				Boolean reverse;
-				if (order.equals("asc")) {
-					reverse = false;
-				} else if (order.equals("desc")) {
-					reverse = true;
-				} else {
-					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-							"Sort order must be 'asc' or 'desc' but it was '" + order + "'");
-				}
-
-				if (longFields.contains(key)) {
-					fields.add(new SortedNumericSortField(key, Type.LONG, reverse));
-				} else if (doubleFields.contains(key)) {
-					fields.add(new SortedNumericSortField(key, Type.DOUBLE, reverse));
-				} else {
-					fields.add(new SortField(key, Type.STRING, reverse));
-				}
-			}
-			fields.add(new SortedNumericSortField("id.long", Type.LONG));
-			return new Sort(fields.toArray(new SortField[0]));
-		}
-	}
-
 	/**
 	 * Parses a Lucene ScoreDoc to be "searched after" from a String representation
 	 * of a JSON array.
@@ -1564,7 +1680,6 @@ private FieldDoc parseSearchAfter(String searchAfter, SortField[] sortFields) th
 		logger.debug("Attempting to parseSearchAfter from {}", searchAfter);
 		JsonReader reader = Json.createReader(new StringReader(searchAfter));
 		JsonObject object = reader.readObject();
-		int doc = object.getInt("doc");
 		int shardIndex = object.getInt("shardIndex");
 		float score = Float.NaN;
 		List<Object> fields = new ArrayList<>();
@@ -1611,7 +1726,7 @@ private FieldDoc parseSearchAfter(String searchAfter, SortField[] sortFields) th
 				}
 			}
 		}
-		return new FieldDoc(doc, score, fields.toArray(), shardIndex);
+		return new FieldDoc(0, score, fields.toArray(), shardIndex); // TODO
 	}
 
 	@POST
@@ -1644,7 +1759,8 @@ private void update(JsonObject operationBody) throws LuceneException, NumberForm
 						"Lucene locked for " + entityName);
 			}
 			logger.trace("update: {}", document);
-			bucket.getWriter(new Long(icatId)).updateDocument(new Term("id", icatId), facetsConfig.build(document));
+			// bucket.getWriter(entityName, new Long(icatId)).updateDocument(new Term("id", icatId), facetsConfig.build(document));
+			bucket.updateDocument(new Term("id", icatId), facetsConfig.build(document));
 		}
 	}
 
@@ -1671,8 +1787,9 @@ private void updateByRelation(JsonObject operationBody, Boolean delete)
 					Document newDocument = delete ? pruneDocument(parentRelationship.fieldPrefix, oldDocument)
 							: updateDocument(operationBody.getJsonObject("doc"), oldDocument);
 					logger.trace("updateByRelation: {}", newDocument);
-					bucket.getWriter(new Long(parentId)).updateDocument(new Term("id", parentId),
-							facetsConfig.build(newDocument));
+					// bucket.getWriter(parentRelationship.parentName, new Long(parentId)).updateDocument(new Term("id", parentId),
+					// 		facetsConfig.build(newDocument));
+					bucket.updateDocument(new Term("id", parentId), facetsConfig.build(newDocument));
 				}
 				scoreDocs = searcher.searchAfter(scoreDocs[scoreDocs.length - 1], query, blockSize, sort).scoreDocs;
 			}

From 49373f507ae139b330cc361956836409dd2dfd6e Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 8 Jun 2022 16:00:33 +0000
Subject: [PATCH 40/73] Add fields needed for DGS component #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 301 ++++++++++--------
 1 file changed, 164 insertions(+), 137 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index eead564..88629b6 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -24,6 +24,7 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Consumer;
 
 import javax.annotation.PostConstruct;
 import javax.annotation.PreDestroy;
@@ -197,23 +198,14 @@ public IndexBucket(String entityName) {
 		}
 
 		/**
-		 * Acquires DirectoryReaders from the ReaderManagers of the individual shards in
+		 * Acquires IndexSearchers from the SearcherManagers of the individual shards in
 		 * this bucket.
 		 * 
 		 * @return Array of DirectoryReaders for all shards in this bucket.
 		 * @throws IOException
 		 */
-		// public DirectoryReader[] acquireReaders() throws IOException {
-		// 	List<DirectoryReader> subReaders = new ArrayList<>();
-		// 	for (ShardBucket shardBucket : shardMap.values()) {
-		// 		subReaders.add(shardBucket.searcherManager.acquire());
-		// 	}
-		// 	return subReaders.toArray(new DirectoryReader[0]);
-		// }
-
 		public List<IndexSearcher> acquireSearchers() throws IOException {
 			List<IndexSearcher> subSearchers = new ArrayList<>();
-			// for (ShardBucket shardBucket : shardMap.values()) {
 			for (ShardBucket shardBucket : shardList) {
 				subSearchers.add(shardBucket.searcherManager.acquire());
 			}
@@ -256,12 +248,13 @@ public ShardBucket buildShardBucket(int shardKey) throws IOException {
 		 * @throws IOException
 		 */
 		public void commit(String command, String entityName) throws IOException {
-			// for (Entry<Long, ShardBucket> entry : shardMap.entrySet()) {
 			for (ShardBucket shardBucket : shardList) {
 				int cached = shardBucket.commit();
 				if (cached != 0) {
+					int numDocs = shardBucket.indexWriter.getDocStats().numDocs;
+					String directoryName = shardBucket.directory.getDirectory().toString();
 					logger.debug("{} has committed {} {} changes to Lucene - now have {} documents indexed in {}",
-							command, cached, entityName, shardBucket.indexWriter.getDocStats().numDocs, shardBucket.directory.getDirectory().toString());
+							command, cached, entityName, numDocs, directoryName);
 				}
 			}
 		}
@@ -282,26 +275,15 @@ public void close() throws IOException {
 		}
 
 		/**
-		 * Provides the ShardBucket that should be used for reading/writing the Document
-		 * with the provided id. All ids up to luceneMaxShardSize are indexed in the
-		 * first shard, after that a new shard is created for the next
-		 * luceneMaxShardSize Documents and so on.
+		 * Provides the ShardBucket that should be used for writing the next Document.
+		 * All Documents up to luceneMaxShardSize are indexed in the first shard, after
+		 * that a new shard is created for the next luceneMaxShardSize Documents and so
+		 * on.
 		 * 
-		 * @param id The id of a Document to be routed.
 		 * @return The ShardBucket that the relevant Document is/should be indexed in.
 		 * @throws IOException
 		 */
 		public ShardBucket routeShard() throws IOException {
-			// if (id == null || !shardedIndices.contains(entityName.toLowerCase())) {
-			// 	// If we don't have id, provide the first bucket
-			// 	return shardMap.get(0L);
-			// }
-			// Long shard = id / luceneMaxShardSize;
-			// ShardBucket shardBucket = shardMap.get(shard);
-			// if (shardBucket == null) {
-			// 	shardBucket = buildShardBucket(shard);
-			// }
-			// return shardBucket;
 			int size = shardList.size();
 			ShardBucket shardBucket = shardList.get(size - 1);
 			if (shardBucket.documentCount.get() >= luceneMaxShardSize) {
@@ -311,18 +293,6 @@ public ShardBucket routeShard() throws IOException {
 			return shardBucket;
 		}
 
-		/**
-		 * Provides the IndexWriter that should be used for writing the Document with
-		 * the provided id.
-		 * 
-		 * @param id The id of a Document to be routed.
-		 * @return The relevant IndexWriter.
-		 * @throws IOException
-		 */
-		// public IndexWriter getWriter(String entityName, Long id) throws IOException {
-		// 	return routeShard(entityName, id).indexWriter;
-		// }
-
 		public void releaseReaders(List<IndexSearcher> subSearchers) throws IOException, LuceneException {
 			if (subSearchers.size() != shardList.size()) {
 				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
@@ -343,19 +313,45 @@ public class Search {
 		public Sort sort;
 		public boolean scored;
 		public Set<String> fields = new HashSet<String>();
+		public Map<String, Set<String>> joinedFields = new HashMap<>();
 		public Set<FacetDimensionRequest> dimensions = new HashSet<FacetDimensionRequest>();
-		
+
+		public void parseFields(JsonObject jsonObject) throws LuceneException {
+			if (jsonObject.containsKey("fields")) {
+				List<JsonString> fieldStrings = jsonObject.getJsonArray("fields").getValuesAs(JsonString.class);
+				logger.trace("Parsing fields from {}", fieldStrings);
+				for (JsonString jsonString : fieldStrings) {
+					String[] splitString = jsonString.getString().split(" ");
+					if (splitString.length == 1) {
+						fields.add(splitString[0]);
+					} else if (splitString.length == 2) {
+						if (joinedFields.containsKey(splitString[0])) {
+							joinedFields.get(splitString[0]).add(splitString[1]);
+						} else {
+							joinedFields.putIfAbsent(splitString[0],
+									new HashSet<String>(Arrays.asList(splitString[1])));
+						}
+					} else {
+						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+								"Could not parse field: " + jsonString.getString());
+					}
+				}
+			}
+
+		}
+
 		/**
 		 * Parses the String from the request into a Lucene Sort object. Multiple sort
 		 * criteria are supported, and will be applied in order.
 		 * 
-		 * @param sortString String representation of a JSON object with the field(s) to sort
-		 *             as keys, and the direction ("asc" or "desc") as value(s).
+		 * @param sortString String representation of a JSON object with the field(s) to
+		 *                   sort
+		 *                   as keys, and the direction ("asc" or "desc") as value(s).
 		 * @return Lucene Sort object
 		 * @throws LuceneException If the value for any key isn't "asc" or "desc"
 		 */
 		public void parseSort(String sortString) throws LuceneException {
-			if (sortString == null || sortString.equals("")|| sortString.equals("{}")) {
+			if (sortString == null || sortString.equals("") || sortString.equals("{}")) {
 				scored = true;
 				sort = new Sort(SortField.FIELD_SCORE, new SortedNumericSortField("id.long", Type.LONG));
 				return;
@@ -426,16 +422,21 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 
 		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI"));
 		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name"));
-		longFields.addAll(Arrays.asList("date", "startDate", "endDate", "dateTimeValue"));
-		sortFields.addAll(Arrays.asList("datafile.id", "dataset.id", "investigation.id", "id", "date", "startDate",
-				"endDate", "name", "stringValue", "dateTimeValue", "numericValue", "numericValueSI"));
-		textFields.addAll(Arrays.asList("name", "visitId", "description", "datafileFormat.name", "sample.name",
-				"sample.type.name", "title", "summary", "facility.name", "user.fullName"));
+		longFields.addAll(Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate"));
+		sortFields.addAll(Arrays.asList("datafile.id", "dataset.id", "investigation.id", "instrument.id", "id", "date",
+				"startDate", "endDate", "name", "stringValue", "dateTimeValue", "numericValue", "numericValueSI"));
+		textFields.addAll(Arrays.asList("name", "visitId", "description", "location", "dataset.name",
+				"investigation.name", "instrument.name", "isntrument.fullName", "datafileFormat.name", "sample.name",
+				"sample.type.name", "title", "summary", "facility.name", "user.fullName", "type.name"));
 
 		indexedEntities.addAll(Arrays.asList("Datafile", "Dataset", "Investigation", "DatafileParameter",
-				"DatasetParameter", "InvestigationParameter", "InvestigationUser", "Sample"));
+				"DatasetParameter", "InstrumentScientist", "InvestigationInstrument", "InvestigationParameter",
+				"InvestigationUser", "Sample"));
 
-		relationships.put("User", new ParentRelationship[] { new ParentRelationship("InvestigationUser", "user") });
+		relationships.put("Instrument",
+				new ParentRelationship[] { new ParentRelationship("InvestigationInstrument", "instrument") });
+		relationships.put("User", new ParentRelationship[] { new ParentRelationship("InvestigationUser", "user"),
+				new ParentRelationship("InstrumentScientist", "user") });
 		relationships.put("Sample", new ParentRelationship[] { new ParentRelationship("Dataset", "sample") });
 		relationships.put("SampleType", new ParentRelationship[] { new ParentRelationship("Sample", "type"),
 				new ParentRelationship("Dataset", "sample.type") });
@@ -449,11 +450,14 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 				new ParentRelationship[] { new ParentRelationship("DatafileParameter", "type"),
 						new ParentRelationship("DatasetParameter", "type"),
 						new ParentRelationship("InvestigationParameter", "type") });
+		relationships.put("Investigation",
+				new ParentRelationship[] { new ParentRelationship("Dataset", "investigation"),
+						new ParentRelationship("datafile", "investigation") });
 
 		genericParser.setAllowLeadingWildcard(true);
 		genericParser.setAnalyzer(analyzer);
 
-		CharSequence[] datafileFields = { "name", "description", "doi", "datafileFormat.name" };
+		CharSequence[] datafileFields = { "name", "description", "doi", "location", "datafileFormat.name" };
 		datafileParser.setAllowLeadingWildcard(true);
 		datafileParser.setAnalyzer(analyzer);
 		datafileParser.setMultiFields(datafileFields);
@@ -561,7 +565,7 @@ public void addNow(@Context HttpServletRequest request, @PathParam("entityName")
 				createNow(entityName, document);
 			}
 		} catch (IOException | JsonException e) {
-			
+
 			logger.error("Could not parse JSON from {}", value.toString());
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
@@ -599,6 +603,21 @@ private static void buildDateRanges(Builder queryBuilder, JsonObject queryJson,
 		}
 	}
 
+	private void buildUserNameQuery(Map<String, List<IndexSearcher>> readerMap, String userName,
+			BooleanQuery.Builder theQuery, String toField)
+			throws IOException, LuceneException {
+		TermQuery fromQuery = new TermQuery(new Term("user.name", userName));
+		Query investigationUserQuery = JoinUtil.createJoinQuery("investigation.id", false, toField, fromQuery,
+				getSearcher(readerMap, "InvestigationUser"), ScoreMode.None);
+		Query instrumentScientistQuery = JoinUtil.createJoinQuery("instrument.id", false, "instrument.id", fromQuery,
+				getSearcher(readerMap, "InstrumentScientist"), ScoreMode.None);
+		Query investigationInstrumentQuery = JoinUtil.createJoinQuery("investigation.id", false, toField,
+				instrumentScientistQuery, getSearcher(readerMap, "InvestigationInstrument"), ScoreMode.None);
+		Builder userNameQueryBuilder = new BooleanQuery.Builder();
+		userNameQueryBuilder.add(investigationUserQuery, Occur.SHOULD).add(investigationInstrumentQuery, Occur.SHOULD);
+		theQuery.add(userNameQueryBuilder.build(), Occur.MUST);
+	}
+
 	/*
 	 * This is only for testing purposes. Other calls to the service will not
 	 * work properly while this operation is in progress.
@@ -649,7 +668,6 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 			updateByRelation(operationBody, false);
 		}
 		if (indexedEntities.contains(entityName)) {
-			String icatId = operationBody.getString("_id");
 			Document document = parseDocument(operationBody.getJsonObject("doc"));
 			logger.trace("create {} {}", entityName, document.toString());
 			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
@@ -657,7 +675,6 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 						"Lucene locked for " + entityName);
 			}
-			// bucket.getWriter(entityName, new Long(icatId)).addDocument(facetsConfig.build(document));
 			bucket.addDocument(facetsConfig.build(document));
 		}
 	}
@@ -668,11 +685,9 @@ private void createNow(String entityName, JsonObject documentJson)
 			throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
 					"id was not in the document keys " + documentJson.keySet());
 		}
-		// String icatId = documentJson.getString("id");
 		Document document = parseDocument(documentJson);
 		logger.trace("create {} {}", entityName, document.toString());
 		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
-		// bucket.getWriter(entityName, new Long(icatId)).addDocument(facetsConfig.build(document));
 		bucket.addDocument(facetsConfig.build(document));
 	}
 
@@ -710,10 +725,7 @@ private Search datafilesQuery(HttpServletRequest request, String sort, Long uid)
 			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
 			if (userName != null) {
-				Query iuQuery = JoinUtil.createJoinQuery("investigation.id", false, "investigation.id",
-						new TermQuery(new Term("user.name", userName)), getSearcher(readerMap, "InvestigationUser"),
-						ScoreMode.None);
-				theQuery.add(iuQuery, Occur.MUST);
+				buildUserNameQuery(readerMap, userName, theQuery, "investigation.id");
 			}
 
 			String text = query.getString("text", null);
@@ -734,10 +746,7 @@ private Search datafilesQuery(HttpServletRequest request, String sort, Long uid)
 				}
 			}
 			search.query = maybeEmptyQuery(theQuery);
-			if (o.containsKey("fields")) {
-				List<JsonString> jsonStrings = o.getJsonArray("fields").getValuesAs(JsonString.class);
-				jsonStrings.forEach((jsonString) -> search.fields.add(jsonString.getString()));
-			}
+			search.parseFields(o);
 		}
 		return search;
 	}
@@ -777,12 +786,7 @@ private Search datasetsQuery(HttpServletRequest request, String sort, Long uid)
 			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
 			if (userName != null) {
-
-				Query iuQuery = JoinUtil.createJoinQuery("investigation.id", false, "investigation.id",
-						new TermQuery(new Term("user.name", userName)), getSearcher(readerMap, "InvestigationUser"),
-						ScoreMode.None);
-
-				theQuery.add(iuQuery, Occur.MUST);
+				buildUserNameQuery(readerMap, userName, theQuery, "investigation.id");
 			}
 
 			String text = query.getString("text", null);
@@ -803,10 +807,7 @@ private Search datasetsQuery(HttpServletRequest request, String sort, Long uid)
 				}
 			}
 			search.query = maybeEmptyQuery(theQuery);
-			if (o.containsKey("fields")) {
-				List<JsonString> jsonStrings = o.getJsonArray("fields").getValuesAs(JsonString.class);
-				jsonStrings.forEach((jsonString) -> search.fields.add(jsonString.getString()));
-			}
+			search.parseFields(o);
 		}
 		return search;
 	}
@@ -842,10 +843,9 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 							"Lucene locked for " + entityName);
 				}
 				logger.trace("delete {} {}", entityName, icatId);
-				for (ShardBucket shardBucket: bucket.shardList) {
+				for (ShardBucket shardBucket : bucket.shardList) {
 					shardBucket.indexWriter.deleteDocuments(new Term("id", icatId));
 				}
-				// ShardBucket shardBucket = bucket.routeShard(entityName, new Long(icatId));
 			} catch (IOException e) {
 				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 			}
@@ -864,9 +864,10 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 	 * @param searcher IndexSearcher used to get the Document for the hit.
 	 * @param search   Search object containing the fields to return.
 	 * @throws IOException
+	 * @throws LuceneException
 	 */
-	private void encodeResult(JsonGenerator gen, ScoreDoc hit, IndexSearcher searcher, Search search)
-			throws IOException {
+	private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, IndexSearcher searcher, Search search)
+			throws IOException, LuceneException {
 		int luceneDocId = hit.doc;
 		Document document = searcher.doc(luceneDocId);
 		gen.writeStartObject().write("_id", luceneDocId);
@@ -875,9 +876,42 @@ private void encodeResult(JsonGenerator gen, ScoreDoc hit, IndexSearcher searche
 			gen.write("_score", hit.score);
 		}
 		gen.writeStartObject("_source");
-		document.forEach((field) -> {
+		document.forEach(encodeField(gen, search.fields));
+		for (String joinedEntityName : search.joinedFields.keySet()) {
+			List<IndexSearcher> searchers = getSearchers(search.searcherMap, joinedEntityName);
+			Search joinedSearch = new Search();
+			String fld;
+			String parentId;
+			if (joinedEntityName.toLowerCase().contains("investigation")) {
+				fld = "investigation.id";
+				if (entityName.toLowerCase().equals("investigation")) {
+					parentId = document.get("id");
+				} else {
+					parentId = document.get("investigation.id");
+				}
+			} else {
+				fld = entityName.toLowerCase() + ".id";
+				parentId = document.get("id");
+			}
+			joinedSearch.query = new TermQuery(new Term(fld, parentId));
+			joinedSearch.sort = new Sort(new SortedNumericSortField("id.long", Type.LONG));
+			TopFieldDocs topFieldDocs = searchShards(joinedSearch, 100, searchers, null);
+			gen.writeStartArray(joinedEntityName.toLowerCase());
+			for (ScoreDoc joinedHit : topFieldDocs.scoreDocs) {
+				gen.writeStartObject();
+				Document joinedDocument = searchers.get(joinedHit.shardIndex).doc(joinedHit.doc);
+				joinedDocument.forEach(encodeField(gen, search.joinedFields.get(joinedEntityName)));
+				gen.writeEnd();
+			}
+			gen.writeEnd();
+		}
+		gen.writeEnd().writeEnd(); // source object, result object
+	}
+
+	private Consumer<? super IndexableField> encodeField(JsonGenerator gen, Set<String> fields) {
+		return (field) -> {
 			String fieldName = field.name();
-			if (search.fields.contains(fieldName)) {
+			if (fields.contains(fieldName)) {
 				if (longFields.contains(fieldName)) {
 					gen.write(fieldName, field.numericValue().longValue());
 				} else if (doubleFields.contains(fieldName)) {
@@ -886,8 +920,7 @@ private void encodeResult(JsonGenerator gen, ScoreDoc hit, IndexSearcher searche
 					gen.write(fieldName, field.stringValue());
 				}
 			}
-		});
-		gen.writeEnd().writeEnd(); // source object, result object
+		};
 	}
 
 	@PreDestroy
@@ -1003,11 +1036,7 @@ private Search genericQuery(HttpServletRequest request, String sort, Long uid) t
 			}
 			search.query = maybeEmptyQuery(luceneQuery);
 			logger.info("Query: {}", search.query);
-			if (o.containsKey("fields")) {
-				List<JsonString> jsonStrings = o.getJsonArray("fields").getValuesAs(JsonString.class);
-				jsonStrings.forEach((jsonString) -> search.fields.add(jsonString.getString()));
-				logger.info("Fields: {}", search.fields);
-			}
+			search.parseFields(o);
 			if (o.containsKey("dimensions")) {
 				List<JsonObject> dimensionObjects = o.getJsonArray("dimensions").getValuesAs(JsonObject.class);
 				for (JsonObject dimensionObject : dimensionObjects) {
@@ -1067,7 +1096,8 @@ private Search genericQuery(HttpServletRequest request, String sort, Long uid) t
 		return search;
 	}
 
-	private List<IndexSearcher> getSearchers(Map<String, List<IndexSearcher>> readerMap, String name) throws IOException {
+	private List<IndexSearcher> getSearchers(Map<String, List<IndexSearcher>> readerMap, String name)
+			throws IOException {
 		List<IndexSearcher> subSearchers = readerMap.get(name);
 		if (subSearchers == null) {
 			subSearchers = indexBuckets.computeIfAbsent(name, k -> new IndexBucket(k)).acquireSearchers();
@@ -1077,11 +1107,13 @@ private List<IndexSearcher> getSearchers(Map<String, List<IndexSearcher>> reader
 		return subSearchers;
 	}
 
-	private IndexSearcher getSearcher(Map<String, List<IndexSearcher>> readerMap, String name) throws IOException, LuceneException {
+	private IndexSearcher getSearcher(Map<String, List<IndexSearcher>> readerMap, String name)
+			throws IOException, LuceneException {
 		List<IndexSearcher> subSearchers = readerMap.get(name);
 		subSearchers = getSearchers(readerMap, name);
 		if (subSearchers.size() > 1) {
-			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Cannot get single IndexSearcher for " + name + " as it has " + subSearchers.size() + " shards");
+			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
+					"Cannot get single IndexSearcher for " + name + " as it has " + subSearchers.size() + " shards");
 		}
 		return subSearchers.get(0);
 	}
@@ -1100,7 +1132,8 @@ private void init() {
 
 			luceneCommitMillis = props.getPositiveInt("commitSeconds") * 1000;
 			luceneMaxShardSize = Math.max(props.getPositiveLong("maxShardSize"), new Long(Integer.MAX_VALUE + 1));
-			maxSearchTimeSeconds = props.has("maxSearchTimeSeconds") ? props.getPositiveLong("maxSearchTimeSeconds") : 5;
+			maxSearchTimeSeconds = props.has("maxSearchTimeSeconds") ? props.getPositiveLong("maxSearchTimeSeconds")
+					: 5;
 
 			timer = new Timer("LuceneCommitTimer");
 			timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
@@ -1115,7 +1148,8 @@ private void init() {
 			throw new IllegalStateException(e.getMessage());
 		}
 
-		logger.info("Initialised icat.lucene with directory {}, commitSeconds {}, maxShardSize {}, shardedIndices {}, maxSearchTimeSeconds {}",
+		logger.info(
+				"Initialised icat.lucene with directory {}, commitSeconds {}, maxShardSize {}, shardedIndices {}, maxSearchTimeSeconds {}",
 				luceneDirectory, luceneCommitMillis, luceneMaxShardSize, shardedIndices, maxSearchTimeSeconds);
 	}
 
@@ -1163,10 +1197,7 @@ private Search investigationsQuery(HttpServletRequest request, String sort, Long
 			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
 			if (userName != null) {
-				Query iuQuery = JoinUtil.createJoinQuery("investigation.id", false, "id",
-						new TermQuery(new Term("user.name", userName)), getSearcher(readerMap, "InvestigationUser"),
-						ScoreMode.None);
-				theQuery.add(iuQuery, Occur.MUST);
+				buildUserNameQuery(readerMap, userName, theQuery, "id");
 			}
 
 			String text = query.getString("text", null);
@@ -1213,10 +1244,7 @@ private Search investigationsQuery(HttpServletRequest request, String sort, Long
 			}
 
 			search.query = maybeEmptyQuery(theQuery);
-			if (o.containsKey("fields")) {
-				List<JsonString> jsonStrings = o.getJsonArray("fields").getValuesAs(JsonString.class);
-				jsonStrings.forEach((jsonString) -> search.fields.add(jsonString.getString()));
-			}
+			search.parseFields(o);
 		}
 		logger.info("Query: {}", search.query);
 		return search;
@@ -1273,13 +1301,15 @@ private String luceneFacetResult(String name, Search search, String searchAfter,
 					}
 				}
 				try {
-					DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(indexSearcher.getIndexReader());
+					DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
+							indexSearcher.getIndexReader());
 					Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
 					logger.debug("facets: {}, maxLabels: {}, maxResults: {}", facets, maxLabels, maxResults);
 					putFacets(maxLabels, results, facets);
 				} catch (IllegalArgumentException e) {
 					// This can occur if no fields in the index have been faceted
-					logger.error("No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
+					logger.error(
+							"No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
 				} catch (IllegalStateException e) {
 					// This can occur if we do not create the IndexSearcher from the same
 					// DirectoryReader as we used to create the state
@@ -1343,29 +1373,7 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 		String format = "Search {} with: query {}, maxResults {}, searchAfter {}, scored {}";
 		logger.debug(format, name, search.query, maxResults, searchAfter, search.scored);
 		FieldDoc searchAfterDoc = parseSearchAfter(searchAfter, search.sort.getSort());
-		TopFieldDocs topFieldDocs;
-		if (searchers.size() > 0) {
-			List<TopFieldDocs> shardHits = new ArrayList<>();
-			int i = 0;
-			long startTime = System.currentTimeMillis();
-			for (IndexSearcher indexSearcher : searchers) {
-				// checkMaxMatches(name, search, indexSearcher);
-				TopFieldDocs shardDocs = indexSearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort, search.scored);
-				shardHits.add(shardDocs);
-				logger.debug("{} hits on shard {} out of {} total docs", shardDocs.totalHits, i, indexSearcher.getIndexReader().numDocs());
-				i++;
-				long duration = (System.currentTimeMillis() - startTime);
-				if (duration > maxSearchTimeSeconds * 1000) {
-					logger.info("Stopping search after {} shards due to {} ms having elapsed", i, duration);
-					break;
-				} 
-			}
-			topFieldDocs = TopFieldDocs.merge(search.sort, 0, maxResults, shardHits.toArray(new TopFieldDocs[i]), true);
-		} else {
-			IndexSearcher indexSearcher = searchers.get(0);
-			// checkMaxMatches(name, search, indexSearcher);
-			topFieldDocs = indexSearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort, search.scored);
-		}
+		TopFieldDocs topFieldDocs = searchShards(search, maxResults, searchers, searchAfterDoc);
 		ScoreDoc[] hits = topFieldDocs.scoreDocs;
 		TotalHits totalHits = topFieldDocs.totalHits;
 		SortField[] fields = topFieldDocs.fields;
@@ -1378,7 +1386,7 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
 			gen.writeStartObject().writeStartArray("results");
 			for (ScoreDoc hit : hits) {
-				encodeResult(gen, hit, searchers.get(hit.shardIndex), search);
+				encodeResult(name, gen, hit, searchers.get(hit.shardIndex), search);
 			}
 			gen.writeEnd(); // array results
 			if (hits.length == maxResults) {
@@ -1431,12 +1439,34 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 		return baos.toString();
 	}
 
-	// private void checkMaxMatches(String name, Search search, IndexSearcher indexSearcher)
-	// 		throws IOException, LuceneException {
-	// 	if (shardedIndices.contains(name.toLowerCase()) && indexSearcher.count(search.query) > luceneMaxMatchingDocuments) {
-	// 		throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Query exceeded the maximum number of matching documents " + luceneMaxMatchingDocuments);
-	// 	}
-	// }
+	private TopFieldDocs searchShards(Search search, int maxResults, List<IndexSearcher> searchers,
+			FieldDoc searchAfterDoc) throws IOException {
+		TopFieldDocs topFieldDocs;
+		if (searchers.size() > 0) {
+			List<TopFieldDocs> shardHits = new ArrayList<>();
+			int i = 0;
+			long startTime = System.currentTimeMillis();
+			for (IndexSearcher indexSearcher : searchers) {
+				TopFieldDocs shardDocs = indexSearcher.searchAfter(searchAfterDoc, search.query, maxResults,
+						search.sort, search.scored);
+				shardHits.add(shardDocs);
+				logger.debug("{} hits on shard {} out of {} total docs", shardDocs.totalHits, i,
+						indexSearcher.getIndexReader().numDocs());
+				i++;
+				long duration = (System.currentTimeMillis() - startTime);
+				if (duration > maxSearchTimeSeconds * 1000) {
+					logger.info("Stopping search after {} shards due to {} ms having elapsed", i, duration);
+					break;
+				}
+			}
+			topFieldDocs = TopFieldDocs.merge(search.sort, 0, maxResults, shardHits.toArray(new TopFieldDocs[i]), true);
+		} else {
+			IndexSearcher indexSearcher = searchers.get(0);
+			topFieldDocs = indexSearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort,
+					search.scored);
+		}
+		return topFieldDocs;
+	}
 
 	private Query maybeEmptyQuery(Builder theQuery) {
 		Query query = theQuery.build();
@@ -1504,7 +1534,7 @@ private void addField(JsonObject json, Document document, String key) {
 
 		// Likewise, faceted fields should be considered separately
 		if (facetFields.contains(key)) {
-			document.add(new SortedSetDocValuesFacetField(key, json.getString(key)));
+			document.add(new SortedSetDocValuesFacetField(key + ".keyword", json.getString(key)));
 		}
 
 		if (doubleFields.contains(key)) {
@@ -1558,7 +1588,7 @@ private void addSortField(JsonObject json, Document document, String key) {
 				document.add(new NumericDocValuesField("id.long", value));
 				document.add(new StoredField("id.long", value));
 			}
-			// TODO add special case for startDate -> date to make sorting easier
+			// TODO add special case for startDate -> date to make sorting easier?
 			if (longFields.contains(key)) {
 				document.add(new NumericDocValuesField(key, json.getJsonNumber(key).longValueExact()));
 			} else if (doubleFields.contains(key)) {
@@ -1759,7 +1789,6 @@ private void update(JsonObject operationBody) throws LuceneException, NumberForm
 						"Lucene locked for " + entityName);
 			}
 			logger.trace("update: {}", document);
-			// bucket.getWriter(entityName, new Long(icatId)).updateDocument(new Term("id", icatId), facetsConfig.build(document));
 			bucket.updateDocument(new Term("id", icatId), facetsConfig.build(document));
 		}
 	}
@@ -1787,8 +1816,6 @@ private void updateByRelation(JsonObject operationBody, Boolean delete)
 					Document newDocument = delete ? pruneDocument(parentRelationship.fieldPrefix, oldDocument)
 							: updateDocument(operationBody.getJsonObject("doc"), oldDocument);
 					logger.trace("updateByRelation: {}", newDocument);
-					// bucket.getWriter(parentRelationship.parentName, new Long(parentId)).updateDocument(new Term("id", parentId),
-					// 		facetsConfig.build(newDocument));
 					bucket.updateDocument(new Term("id", parentId), facetsConfig.build(newDocument));
 				}
 				scoreDocs = searcher.searchAfter(scoreDocs[scoreDocs.length - 1], query, blockSize, sort).scoreDocs;

From 2fc0f8ef6a06e95d4ef0681dada6dd7f0881b150 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 10 Jun 2022 20:18:58 +0100
Subject: [PATCH 41/73] Use .keyword for string facets #19

---
 .../lucene/FacetDimensionRequest.java         |  26 ----
 .../icatproject/lucene/FacetedDimension.java  |  88 +++++++++++
 .../java/org/icatproject/lucene/Lucene.java   | 142 ++++++++++--------
 3 files changed, 165 insertions(+), 91 deletions(-)
 delete mode 100644 src/main/java/org/icatproject/lucene/FacetDimensionRequest.java
 create mode 100644 src/main/java/org/icatproject/lucene/FacetedDimension.java

diff --git a/src/main/java/org/icatproject/lucene/FacetDimensionRequest.java b/src/main/java/org/icatproject/lucene/FacetDimensionRequest.java
deleted file mode 100644
index 736f2d3..0000000
--- a/src/main/java/org/icatproject/lucene/FacetDimensionRequest.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package org.icatproject.lucene;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.lucene.facet.range.Range;
-
-public class FacetDimensionRequest {
-
-	private String dimension;
-    private List<Range> ranges;
-
-	public FacetDimensionRequest(String dimension) {
-		this.dimension = dimension;
-        this.ranges = new ArrayList<>();
-	}
-
-	public List<Range> getRanges() {
-        return ranges;
-    }
-
-    public String getDimension() {
-		return dimension;
-	}
-
-}
diff --git a/src/main/java/org/icatproject/lucene/FacetedDimension.java b/src/main/java/org/icatproject/lucene/FacetedDimension.java
new file mode 100644
index 0000000..3173d27
--- /dev/null
+++ b/src/main/java/org/icatproject/lucene/FacetedDimension.java
@@ -0,0 +1,88 @@
+package org.icatproject.lucene;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.json.Json;
+import javax.json.JsonObjectBuilder;
+
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.facet.LabelAndValue;
+import org.apache.lucene.facet.range.Range;
+
+public class FacetedDimension {
+
+	private String dimension;
+	private List<Range> ranges;
+	private List<String> labels;
+	private List<Long> counts;
+
+	/**
+	 * For a single dimension (field), stores labels (the unique values or ranges of
+	 * values for that field in the index) and their respective counts (the number
+	 * of times that label appears in different documents).
+	 * 
+	 * For example, a dimension might be "colour", the label "red", and the count 5.
+	 * 
+	 * @param dimension The dimension, or field, to be faceted
+	 */
+	public FacetedDimension(String dimension) {
+		this.dimension = dimension;
+		this.ranges = new ArrayList<>();
+		this.labels = new ArrayList<>();
+		this.counts = new ArrayList<>();
+	}
+
+	/**
+	 * Extracts the count for each label in the FacetResult. If the label has
+	 * already been encountered, the count is incremented rather than being
+	 * overridden. Essentially, this allows faceting to be performed across multiple
+	 * shards.
+	 * 
+	 * @param facetResult A Lucene FacetResult object corresponding the relevant
+	 *                    dimension
+	 */
+	public void addResult(FacetResult facetResult) {
+		for (LabelAndValue labelAndValue : facetResult.labelValues) {
+			String label = labelAndValue.label;
+			int labelIndex = labels.indexOf(label);
+			if (labelIndex == -1) {
+				labels.add(label);
+				counts.add(labelAndValue.value.longValue());
+			} else {
+				counts.set(labelIndex, counts.get(labelIndex) + labelAndValue.value.longValue());
+			}
+		}
+	}
+
+	/**
+	 * Formats the labels and counts into Json.
+	 * 
+	 * @param aggregationsBuilder The JsonObjectBuilder to add the facets for this
+	 *                            dimension to.
+	 */
+	public void buildResponse(JsonObjectBuilder aggregationsBuilder) {
+		JsonObjectBuilder bucketsBuilder = Json.createObjectBuilder();
+		for (int i = 0; i < labels.size(); i++) {
+			JsonObjectBuilder bucketBuilder = Json.createObjectBuilder();
+			bucketsBuilder.add(labels.get(i), bucketBuilder.add("doc_count", counts.get(i)));
+		}
+		aggregationsBuilder.add(dimension, Json.createObjectBuilder().add("buckets", bucketsBuilder));
+	}
+
+	/**
+	 * @return The list of Lucene Range Objects for use with numerical facets.
+	 *         For String faceting, this will be empty.
+	 */
+	public List<Range> getRanges() {
+		return ranges;
+	}
+
+	/**
+	 * @return The dimension that these labels and counts correspond to.
+	 */
+	public String getDimension() {
+		return dimension;
+	}
+
+}
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 88629b6..b83452b 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -65,7 +65,6 @@
 import org.apache.lucene.facet.Facets;
 import org.apache.lucene.facet.FacetsCollector;
 import org.apache.lucene.facet.FacetsConfig;
-import org.apache.lucene.facet.LabelAndValue;
 import org.apache.lucene.facet.range.DoubleRange;
 import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
 import org.apache.lucene.facet.range.LongRange;
@@ -314,7 +313,7 @@ public class Search {
 		public boolean scored;
 		public Set<String> fields = new HashSet<String>();
 		public Map<String, Set<String>> joinedFields = new HashMap<>();
-		public Set<FacetDimensionRequest> dimensions = new HashSet<FacetDimensionRequest>();
+		public Map<String, FacetedDimension> dimensions = new HashMap<String, FacetedDimension>();
 
 		public void parseFields(JsonObject jsonObject) throws LuceneException {
 			if (jsonObject.containsKey("fields")) {
@@ -426,7 +425,7 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 		sortFields.addAll(Arrays.asList("datafile.id", "dataset.id", "investigation.id", "instrument.id", "id", "date",
 				"startDate", "endDate", "name", "stringValue", "dateTimeValue", "numericValue", "numericValueSI"));
 		textFields.addAll(Arrays.asList("name", "visitId", "description", "location", "dataset.name",
-				"investigation.name", "instrument.name", "isntrument.fullName", "datafileFormat.name", "sample.name",
+				"investigation.name", "instrument.name", "instrument.fullName", "datafileFormat.name", "sample.name",
 				"sample.type.name", "title", "summary", "facility.name", "user.fullName", "type.name"));
 
 		indexedEntities.addAll(Arrays.asList("Datafile", "Dataset", "Investigation", "DatafileParameter",
@@ -869,8 +868,9 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 	private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, IndexSearcher searcher, Search search)
 			throws IOException, LuceneException {
 		int luceneDocId = hit.doc;
+		int shardIndex = hit.shardIndex;
 		Document document = searcher.doc(luceneDocId);
-		gen.writeStartObject().write("_id", luceneDocId);
+		gen.writeStartObject().write("_id", luceneDocId).write("_shardIndex", shardIndex);
 		Float score = hit.score;
 		if (!score.equals(Float.NaN)) {
 			gen.write("_score", hit.score);
@@ -1045,7 +1045,7 @@ private Search genericQuery(HttpServletRequest request, String sort, Long uid) t
 								"'dimension' not specified for facet request " + dimensionObject.toString());
 					}
 					String dimension = dimensionObject.getString("dimension");
-					FacetDimensionRequest facetDimensionRequest = new FacetDimensionRequest(dimension);
+					FacetedDimension facetDimensionRequest = new FacetedDimension(dimension);
 					if (dimensionObject.containsKey("ranges")) {
 						List<Range> ranges = facetDimensionRequest.getRanges();
 						if (longFields.contains(dimension)) {
@@ -1088,7 +1088,7 @@ private Search genericQuery(HttpServletRequest request, String sort, Long uid) t
 											+ " but this is not a supported numeric field");
 						}
 					}
-					search.dimensions.add(facetDimensionRequest);
+					search.dimensions.put(dimension, facetDimensionRequest);
 				}
 				logger.info("Dimensions: {}", search.dimensions.size());
 			}
@@ -1270,42 +1270,67 @@ public void lock(@PathParam("entityName") String entityName) throws LuceneExcept
 
 	private String luceneFacetResult(String name, Search search, String searchAfter, int maxResults, int maxLabels,
 			Long uid) throws IOException, IllegalStateException, LuceneException {
-		Map<String, Map<String, Long>> results = new HashMap<>();
-		Map<String, Map<String, Long>> rangeResults = new HashMap<>();
+		// If no dimensions were specified, perform "sparse" faceting on all applicable
+		// string values
+		boolean sparse = search.dimensions.size() == 0;
+		// By default, assume we do not need to perform string based faceting for
+		// specific dimensions
+		boolean facetStrings = false;
 		if (maxResults <= 0 || maxLabels <= 0) {
 			// This will result in no Facets and a null pointer, so return early
 			logger.warn("Cannot facet when maxResults={}, maxLabels={}, returning empty list", maxResults, maxLabels);
 		} else {
+			// Iterate over shards and aggregate the facets from each
 			List<IndexSearcher> searchers = getSearchers(search.searcherMap, name);
+			logger.debug("Faceting {} with {} after {} ", name, search.query, searchAfter);
 			for (IndexSearcher indexSearcher : searchers) {
 				FacetsCollector facetsCollector = new FacetsCollector();
 				FacetsCollector.search(indexSearcher, search.query, maxResults, facetsCollector);
-				logger.debug("To facet in {} for {} {} with {} from {} ", name, search.query, maxResults, indexSearcher,
-						searchAfter);
-				for (FacetDimensionRequest facetDimensionRequest : search.dimensions) {
-					if (facetDimensionRequest.getRanges().size() > 0) {
-						String dimension = facetDimensionRequest.getDimension();
+				for (FacetedDimension facetedDimension : search.dimensions.values()) {
+					if (facetedDimension.getRanges().size() > 0) {
+						// Perform range based facets for a numeric field
+						String dimension = facetedDimension.getDimension();
+						Facets facets;
 						if (longFields.contains(dimension)) {
-							LongRange[] ranges = facetDimensionRequest.getRanges().toArray(new LongRange[0]);
-							Facets facets = new LongRangeFacetCounts(dimension, facetsCollector, ranges);
-							putFacets(maxLabels, rangeResults, facets);
+							LongRange[] ranges = facetedDimension.getRanges().toArray(new LongRange[0]);
+							facets = new LongRangeFacetCounts(dimension, facetsCollector, ranges);
 						} else if (doubleFields.contains(dimension)) {
-							DoubleRange[] ranges = facetDimensionRequest.getRanges().toArray(new DoubleRange[0]);
-							Facets facets = new DoubleRangeFacetCounts(dimension, facetsCollector, ranges);
-							putFacets(maxLabels, rangeResults, facets);
+							DoubleRange[] ranges = facetedDimension.getRanges().toArray(new DoubleRange[0]);
+							facets = new DoubleRangeFacetCounts(dimension, facetsCollector, ranges);
 						} else {
 							throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
 									"'ranges' specified for dimension " + dimension
 											+ " but this is not a supported numeric field");
 						}
+						FacetResult facetResult = facets.getTopChildren(maxLabels, dimension);
+						facetedDimension.addResult(facetResult);
+					} else {
+						// Have a specific string dimension to facet, but these should all be done at
+						// once for efficiency
+						facetStrings = true;
 					}
 				}
 				try {
-					DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
-							indexSearcher.getIndexReader());
-					Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
-					logger.debug("facets: {}, maxLabels: {}, maxResults: {}", facets, maxLabels, maxResults);
-					putFacets(maxLabels, results, facets);
+					if (sparse) {
+						// Facet all applicable string fields
+						DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
+								indexSearcher.getIndexReader());
+						Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
+						addFacetResults(maxLabels, search.dimensions, facets);
+					} else if (facetStrings) {
+						// Only add facets to the results if they match one of the requested dimensions
+						DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
+								indexSearcher.getIndexReader());
+						Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
+						List<FacetResult> facetResults = facets.getAllDims(maxLabels);
+						for (FacetResult facetResult : facetResults) {
+							String dimension = facetResult.dim;
+							FacetedDimension facetedDimension = search.dimensions.get(dimension);
+							if (facetedDimension != null) {
+								facetedDimension.addResult(facetResult);
+							}
+						}
+					}
 				} catch (IllegalArgumentException e) {
 					// This can occur if no fields in the index have been faceted
 					logger.error(
@@ -1318,55 +1343,38 @@ private String luceneFacetResult(String name, Search search, String searchAfter,
 					throw e;
 				}
 			}
-			logger.debug("Facets found for " + results.size() + " dimensions");
 		}
-		Set<String> dimensionSet = new HashSet<>();
-		search.dimensions.forEach(d -> dimensionSet.add(d.getDimension()));
+		// Build results
 		JsonObjectBuilder aggregationsBuilder = Json.createObjectBuilder();
-		for (Entry<String, Map<String, Long>> dimensionEntry : results.entrySet()) {
-			if (dimensionSet.size() == 0 || dimensionSet.contains(dimensionEntry.getKey())) {
-				buildBuckets(aggregationsBuilder, dimensionEntry);
-			}
-		}
-		for (Entry<String, Map<String, Long>> dimensionEntry : results.entrySet()) {
-			buildBuckets(aggregationsBuilder, dimensionEntry);
-		}
+		search.dimensions.values().forEach(facetedDimension -> facetedDimension.buildResponse(aggregationsBuilder));
 		return Json.createObjectBuilder().add("aggregations", aggregationsBuilder).build().toString();
 	}
 
-	private void putFacets(int maxLabels, Map<String, Map<String, Long>> rangeResults, Facets facets)
+	/**
+	 * Add Facets for all dimensions. This will create FacetDimension Objects if the
+	 * do not already exist in the facetedDimensionMap, otherwise the counts for
+	 * each label will be aggregated.
+	 * 
+	 * @param maxLabels           The maximum number of labels for a given
+	 *                            dimension. This labels with the highest counts are
+	 *                            returned first.
+	 * @param facetedDimensionMap Map containing the dimensions that have been or
+	 *                            should be faceted.
+	 * @param facets              Lucene facets object containing all dimensions.
+	 * @throws IOException
+	 */
+	private void addFacetResults(int maxLabels, Map<String, FacetedDimension> facetedDimensionMap, Facets facets)
 			throws IOException {
-		for (FacetResult result : facets.getAllDims(maxLabels)) {
-			String dim = result.dim;
-			if (rangeResults.containsKey(dim)) {
-				Map<String, Long> labelMap = rangeResults.get(dim);
-				for (LabelAndValue labelAndValue : result.labelValues) {
-					String label = labelAndValue.label;
-					if (labelMap.containsKey(label)) {
-						labelMap.put(label, labelMap.get(label) + labelAndValue.value.longValue());
-					} else {
-						labelMap.put(label, labelAndValue.value.longValue());
-					}
-				}
-			} else {
-				Map<String, Long> labelMap = new HashMap<>();
-				for (LabelAndValue labelAndValue : result.labelValues) {
-					labelMap.put(labelAndValue.label, labelAndValue.value.longValue());
-				}
-				rangeResults.put(dim, labelMap);
+		for (FacetResult facetResult : facets.getAllDims(maxLabels)) {
+			String dim = facetResult.dim;
+			FacetedDimension facetedDimension = facetedDimensionMap.get(dim);
+			if (facetedDimension == null) {
+				facetedDimension = new FacetedDimension(facetResult.dim);
 			}
+			facetedDimension.addResult(facetResult);
 		}
 	}
 
-	private void buildBuckets(JsonObjectBuilder aggregationsBuilder, Entry<String, Map<String, Long>> result) {
-		JsonObjectBuilder bucketsBuilder = Json.createObjectBuilder();
-		for (Entry<String, Long> labelValue : result.getValue().entrySet()) {
-			JsonObjectBuilder bucketBuilder = Json.createObjectBuilder();
-			bucketsBuilder.add(labelValue.getKey(), bucketBuilder.add("doc_count", labelValue.getValue()));
-		}
-		aggregationsBuilder.add(result.getKey(), Json.createObjectBuilder().add("buckets", bucketsBuilder));
-	}
-
 	private String luceneSearchResult(String name, Search search, String searchAfter, int maxResults, Long uid)
 			throws IOException, LuceneException {
 		List<IndexSearcher> searchers = getSearchers(search.searcherMap, name);
@@ -1535,6 +1543,7 @@ private void addField(JsonObject json, Document document, String key) {
 		// Likewise, faceted fields should be considered separately
 		if (facetFields.contains(key)) {
 			document.add(new SortedSetDocValuesFacetField(key + ".keyword", json.getString(key)));
+			document.add(new StringField(key + ".keyword", json.getString(key), Store.NO));
 		}
 
 		if (doubleFields.contains(key)) {
@@ -1670,7 +1679,7 @@ private Builder parseParameter(JsonValue p) throws LuceneException {
 		BooleanQuery.Builder paramQuery = new BooleanQuery.Builder();
 		String pName = parameter.getString("name", null);
 		if (pName != null) {
-			paramQuery.add(new WildcardQuery(new Term("type.name", pName)), Occur.MUST);
+			paramQuery.add(new WildcardQuery(new Term("type.name.keyword", pName)), Occur.MUST);
 		}
 
 		String pUnits = parameter.getString("units", null);
@@ -1710,7 +1719,10 @@ private FieldDoc parseSearchAfter(String searchAfter, SortField[] sortFields) th
 		logger.debug("Attempting to parseSearchAfter from {}", searchAfter);
 		JsonReader reader = Json.createReader(new StringReader(searchAfter));
 		JsonObject object = reader.readObject();
+		// shardIndex and Lucene doc Id are always needed to determine tie breaks, even
+		// if the field sort resulted in no ties in the first place
 		int shardIndex = object.getInt("shardIndex");
+		int doc = object.getInt("doc");
 		float score = Float.NaN;
 		List<Object> fields = new ArrayList<>();
 		if (object.containsKey("score")) {
@@ -1756,7 +1768,7 @@ private FieldDoc parseSearchAfter(String searchAfter, SortField[] sortFields) th
 				}
 			}
 		}
-		return new FieldDoc(0, score, fields.toArray(), shardIndex); // TODO
+		return new FieldDoc(doc, score, fields.toArray(), shardIndex);
 	}
 
 	@POST

From 973d31cf6b267a26ccf1d7fd900e29546846b20b Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 16 Jun 2022 14:08:12 +0000
Subject: [PATCH 42/73] Allow searchAfter for uneven shards #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 34 ++++++++++++++-----
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index b83452b..237c581 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -879,6 +879,7 @@ private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, In
 		document.forEach(encodeField(gen, search.fields));
 		for (String joinedEntityName : search.joinedFields.keySet()) {
 			List<IndexSearcher> searchers = getSearchers(search.searcherMap, joinedEntityName);
+			List<ShardBucket> shards = getShards(search.searcherMap, joinedEntityName);
 			Search joinedSearch = new Search();
 			String fld;
 			String parentId;
@@ -895,7 +896,7 @@ private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, In
 			}
 			joinedSearch.query = new TermQuery(new Term(fld, parentId));
 			joinedSearch.sort = new Sort(new SortedNumericSortField("id.long", Type.LONG));
-			TopFieldDocs topFieldDocs = searchShards(joinedSearch, 100, searchers, null);
+			TopFieldDocs topFieldDocs = searchShards(joinedSearch, 100, shards, null);
 			gen.writeStartArray(joinedEntityName.toLowerCase());
 			for (ScoreDoc joinedHit : topFieldDocs.scoreDocs) {
 				gen.writeStartObject();
@@ -1118,6 +1119,10 @@ private IndexSearcher getSearcher(Map<String, List<IndexSearcher>> readerMap, St
 		return subSearchers.get(0);
 	}
 
+	private List<ShardBucket> getShards(Map<String, List<IndexSearcher>> readerMap, String name) {
+		return indexBuckets.computeIfAbsent(name, k -> new IndexBucket(k)).shardList;
+	}
+
 	@PostConstruct
 	private void init() {
 		logger.info("Initialising icat.lucene");
@@ -1317,6 +1322,7 @@ private String luceneFacetResult(String name, Search search, String searchAfter,
 								indexSearcher.getIndexReader());
 						Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
 						addFacetResults(maxLabels, search.dimensions, facets);
+						logger.trace("Sparse faceting found results for {} dimensions", search.dimensions.size());
 					} else if (facetStrings) {
 						// Only add facets to the results if they match one of the requested dimensions
 						DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
@@ -1367,9 +1373,11 @@ private void addFacetResults(int maxLabels, Map<String, FacetedDimension> facete
 			throws IOException {
 		for (FacetResult facetResult : facets.getAllDims(maxLabels)) {
 			String dim = facetResult.dim;
+			logger.trace("Sparse faceting: FacetResult for {}", dim);
 			FacetedDimension facetedDimension = facetedDimensionMap.get(dim);
 			if (facetedDimension == null) {
 				facetedDimension = new FacetedDimension(facetResult.dim);
+				facetedDimensionMap.put(dim, facetedDimension);
 			}
 			facetedDimension.addResult(facetResult);
 		}
@@ -1378,10 +1386,11 @@ private void addFacetResults(int maxLabels, Map<String, FacetedDimension> facete
 	private String luceneSearchResult(String name, Search search, String searchAfter, int maxResults, Long uid)
 			throws IOException, LuceneException {
 		List<IndexSearcher> searchers = getSearchers(search.searcherMap, name);
+		List<ShardBucket> shards = getShards(search.searcherMap, name);
 		String format = "Search {} with: query {}, maxResults {}, searchAfter {}, scored {}";
 		logger.debug(format, name, search.query, maxResults, searchAfter, search.scored);
 		FieldDoc searchAfterDoc = parseSearchAfter(searchAfter, search.sort.getSort());
-		TopFieldDocs topFieldDocs = searchShards(search, maxResults, searchers, searchAfterDoc);
+		TopFieldDocs topFieldDocs = searchShards(search, maxResults, shards, searchAfterDoc);
 		ScoreDoc[] hits = topFieldDocs.scoreDocs;
 		TotalHits totalHits = topFieldDocs.totalHits;
 		SortField[] fields = topFieldDocs.fields;
@@ -1447,19 +1456,28 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 		return baos.toString();
 	}
 
-	private TopFieldDocs searchShards(Search search, int maxResults, List<IndexSearcher> searchers,
+	private TopFieldDocs searchShards(Search search, int maxResults, List<ShardBucket> shards,
 			FieldDoc searchAfterDoc) throws IOException {
 		TopFieldDocs topFieldDocs;
-		if (searchers.size() > 0) {
+		if (shards.size() > 0) {
 			List<TopFieldDocs> shardHits = new ArrayList<>();
 			int i = 0;
+			int doc = searchAfterDoc != null ? searchAfterDoc.doc : -1;
 			long startTime = System.currentTimeMillis();
-			for (IndexSearcher indexSearcher : searchers) {
+			for (ShardBucket shard : shards) {
+				int docCount = shard.documentCount.intValue();
+				if (searchAfterDoc != null) {
+					if (doc > docCount) {
+						searchAfterDoc.doc = docCount - 1;
+					} else {
+						searchAfterDoc.doc = doc;
+					}
+				}
+				IndexSearcher indexSearcher = shard.searcherManager.acquire();
 				TopFieldDocs shardDocs = indexSearcher.searchAfter(searchAfterDoc, search.query, maxResults,
 						search.sort, search.scored);
 				shardHits.add(shardDocs);
-				logger.debug("{} hits on shard {} out of {} total docs", shardDocs.totalHits, i,
-						indexSearcher.getIndexReader().numDocs());
+				logger.debug("{} hits on shard {} out of {} total docs", shardDocs.totalHits, i, docCount);
 				i++;
 				long duration = (System.currentTimeMillis() - startTime);
 				if (duration > maxSearchTimeSeconds * 1000) {
@@ -1469,7 +1487,7 @@ private TopFieldDocs searchShards(Search search, int maxResults, List<IndexSearc
 			}
 			topFieldDocs = TopFieldDocs.merge(search.sort, 0, maxResults, shardHits.toArray(new TopFieldDocs[i]), true);
 		} else {
-			IndexSearcher indexSearcher = searchers.get(0);
+			IndexSearcher indexSearcher = shards.get(0).searcherManager.acquire();
 			topFieldDocs = indexSearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort,
 					search.scored);
 		}

From b3d4c52c354ab0142f4f02c92a2b1d6c42a7b7ad Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 15 Jun 2022 23:49:57 +0100
Subject: [PATCH 43/73] Sparse string faceting fix #19

---
 src/main/java/org/icatproject/lucene/Lucene.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 237c581..224d77c 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -1398,7 +1398,7 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 		if (hits.length > 0) {
 			maxScore = hits[0].score;
 		}
-		logger.debug("Hits " + totalHits + " maxscore " + maxScore);
+		logger.debug("{} maxscore {}", totalHits, maxScore);
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
 			gen.writeStartObject().writeStartArray("results");
@@ -1477,7 +1477,7 @@ private TopFieldDocs searchShards(Search search, int maxResults, List<ShardBucke
 				TopFieldDocs shardDocs = indexSearcher.searchAfter(searchAfterDoc, search.query, maxResults,
 						search.sort, search.scored);
 				shardHits.add(shardDocs);
-				logger.debug("{} hits on shard {} out of {} total docs", shardDocs.totalHits, i, docCount);
+				logger.debug("{} on shard {} out of {} total docs", shardDocs.totalHits, i, docCount);
 				i++;
 				long duration = (System.currentTimeMillis() - startTime);
 				if (duration > maxSearchTimeSeconds * 1000) {

From 757da57f16464d98b8ac4412a79d745207a5ef66 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 16 Jun 2022 02:27:00 +0100
Subject: [PATCH 44/73] Filters and aborted search support #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 115 +++++++++++-------
 1 file changed, 71 insertions(+), 44 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 224d77c..8047289 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -314,6 +314,7 @@ public class Search {
 		public Set<String> fields = new HashSet<String>();
 		public Map<String, Set<String>> joinedFields = new HashMap<>();
 		public Map<String, FacetedDimension> dimensions = new HashMap<String, FacetedDimension>();
+		public boolean aborted = false;
 
 		public void parseFields(JsonObject jsonObject) throws LuceneException {
 			if (jsonObject.containsKey("fields")) {
@@ -723,6 +724,13 @@ private Search datafilesQuery(HttpServletRequest request, String sort, Long uid)
 
 			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
+			if (query.containsKey("filter")) {
+				JsonObject filterObject = query.getJsonObject("filter");
+				for (String fld : filterObject.keySet()) {
+					theQuery.add(new TermQuery(new Term(fld, filterObject.getString(fld))), Occur.FILTER);
+				}
+			}
+
 			if (userName != null) {
 				buildUserNameQuery(readerMap, userName, theQuery, "investigation.id");
 			}
@@ -784,6 +792,13 @@ private Search datasetsQuery(HttpServletRequest request, String sort, Long uid)
 
 			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
+			if (query.containsKey("filter")) {
+				JsonObject filterObject = query.getJsonObject("filter");
+				for (String fld : filterObject.keySet()) {
+					theQuery.add(new TermQuery(new Term(fld, filterObject.getString(fld))), Occur.FILTER);
+				}
+			}
+
 			if (userName != null) {
 				buildUserNameQuery(readerMap, userName, theQuery, "investigation.id");
 			}
@@ -1201,6 +1216,13 @@ private Search investigationsQuery(HttpServletRequest request, String sort, Long
 
 			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
+			if (query.containsKey("filter")) {
+				JsonObject filterObject = query.getJsonObject("filter");
+				for (String fld : filterObject.keySet()) {
+					theQuery.add(new TermQuery(new Term(fld, filterObject.getString(fld))), Occur.FILTER);
+				}
+			}
+
 			if (userName != null) {
 				buildUserNameQuery(readerMap, userName, theQuery, "id");
 			}
@@ -1401,54 +1423,58 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 		logger.debug("{} maxscore {}", totalHits, maxScore);
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
-			gen.writeStartObject().writeStartArray("results");
-			for (ScoreDoc hit : hits) {
-				encodeResult(name, gen, hit, searchers.get(hit.shardIndex), search);
-			}
-			gen.writeEnd(); // array results
-			if (hits.length == maxResults) {
-				ScoreDoc lastDoc = hits[hits.length - 1];
-				gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex", lastDoc.shardIndex);
-				float lastScore = lastDoc.score;
-				if (!Float.isNaN(lastScore)) {
-					gen.write("score", lastScore);
+			gen.writeStartObject();
+			gen.write("aborted", search.aborted);
+			if (!search.aborted) {
+				gen.writeStartArray("results");
+				for (ScoreDoc hit : hits) {
+					encodeResult(name, gen, hit, searchers.get(hit.shardIndex), search);
 				}
-				if (fields != null) {
-					Document lastDocument = searchers.get(lastDoc.shardIndex).doc(lastDoc.doc);
-					gen.writeStartArray("fields");
-					for (SortField sortField : fields) {
-						String fieldName = sortField.getField();
-						if (fieldName == null) {
-							// SCORE sorting will have a null fieldName
-							gen.write(lastDoc.score);
-							continue;
-						}
-						IndexableField indexableField = lastDocument.getField(fieldName);
-						if (indexableField == null) {
-							throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field " + fieldName
-									+ " used for sorting was not present on the Lucene Document; all sortable fields must also be stored.");
-						}
-						Type type = (sortField instanceof SortedNumericSortField)
-								? ((SortedNumericSortField) sortField).getNumericType()
-								: sortField.getType();
-						switch (type) {
-							case LONG:
-								gen.write(indexableField.numericValue().longValue());
-								break;
-							case DOUBLE:
-								gen.write(indexableField.numericValue().doubleValue());
-								break;
-							case STRING:
-								gen.write(indexableField.stringValue());
-								break;
-							default:
-								throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
-										"SortField.Type must be one of LONG, DOUBLE, STRING, but it was " + type);
+				gen.writeEnd(); // array results
+				if (hits.length == maxResults) {
+					ScoreDoc lastDoc = hits[hits.length - 1];
+					gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex", lastDoc.shardIndex);
+					float lastScore = lastDoc.score;
+					if (!Float.isNaN(lastScore)) {
+						gen.write("score", lastScore);
+					}
+					if (fields != null) {
+						Document lastDocument = searchers.get(lastDoc.shardIndex).doc(lastDoc.doc);
+						gen.writeStartArray("fields");
+						for (SortField sortField : fields) {
+							String fieldName = sortField.getField();
+							if (fieldName == null) {
+								// SCORE sorting will have a null fieldName
+								gen.write(lastDoc.score);
+								continue;
+							}
+							IndexableField indexableField = lastDocument.getField(fieldName);
+							if (indexableField == null) {
+								throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field " + fieldName
+										+ " used for sorting was not present on the Lucene Document; all sortable fields must also be stored.");
+							}
+							Type type = (sortField instanceof SortedNumericSortField)
+									? ((SortedNumericSortField) sortField).getNumericType()
+									: sortField.getType();
+							switch (type) {
+								case LONG:
+									gen.write(indexableField.numericValue().longValue());
+									break;
+								case DOUBLE:
+									gen.write(indexableField.numericValue().doubleValue());
+									break;
+								case STRING:
+									gen.write(indexableField.stringValue());
+									break;
+								default:
+									throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
+											"SortField.Type must be one of LONG, DOUBLE, STRING, but it was " + type);
+							}
 						}
+						gen.writeEnd(); // end "fields" array
 					}
-					gen.writeEnd(); // end "fields" array
+					gen.writeEnd(); // end "search_after" object
 				}
-				gen.writeEnd(); // end "search_after" object
 			}
 			gen.writeEnd(); // end enclosing object
 		}
@@ -1482,6 +1508,7 @@ private TopFieldDocs searchShards(Search search, int maxResults, List<ShardBucke
 				long duration = (System.currentTimeMillis() - startTime);
 				if (duration > maxSearchTimeSeconds * 1000) {
 					logger.info("Stopping search after {} shards due to {} ms having elapsed", i, duration);
+					search.aborted = true;
 					break;
 				}
 			}

From 663ea420e617308a2ef7eb074f0d41d3b368fa41 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 17 Jun 2022 09:30:32 +0000
Subject: [PATCH 45/73] Enable parsing of multivalued filters #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 67 +++++++++++++------
 1 file changed, 45 insertions(+), 22 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 8047289..c4cee0c 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -603,6 +603,43 @@ private static void buildDateRanges(Builder queryBuilder, JsonObject queryJson,
 		}
 	}
 
+	/**
+	 * Builds Term queries (exact string matches without tokenizing) from the filter
+	 * object in the query request. This is intended to be used with the faceting,
+	 * with the fields having the ".keyword" suffix.
+	 * 
+	 * @param requestedQuery Json object containing details of the query.
+	 * @param queryBuilder   Builder for the overall boolean query to be build.
+	 * @throws LuceneException If the values in the filter object are neither STRING
+	 *                         nor ARRAY of STRING.
+	 */
+	private void buildFilterQueries(JsonObject requestedQuery, BooleanQuery.Builder queryBuilder)
+			throws LuceneException {
+		if (requestedQuery.containsKey("filter")) {
+			JsonObject filterObject = requestedQuery.getJsonObject("filter");
+			for (String fld : filterObject.keySet()) {
+				ValueType valueType = filterObject.get(fld).getValueType();
+				switch (valueType) {
+					case ARRAY:
+						BooleanQuery.Builder dimensionQuery = new BooleanQuery.Builder();
+						for (JsonString value : filterObject.getJsonArray(fld).getValuesAs(JsonString.class)) {
+							dimensionQuery.add(new TermQuery(new Term(fld, value.getString())), Occur.SHOULD);
+						}
+						queryBuilder.add(dimensionQuery.build(), Occur.FILTER);
+						break;
+
+					case STRING:
+						queryBuilder.add(new TermQuery(new Term(fld, filterObject.getString(fld))), Occur.FILTER);
+						break;
+
+					default:
+						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+								"filter object values should be STRING or ARRAY, but were " + valueType);
+				}
+			}
+		}
+	}
+
 	private void buildUserNameQuery(Map<String, List<IndexSearcher>> readerMap, String userName,
 			BooleanQuery.Builder theQuery, String toField)
 			throws IOException, LuceneException {
@@ -724,12 +761,7 @@ private Search datafilesQuery(HttpServletRequest request, String sort, Long uid)
 
 			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
-			if (query.containsKey("filter")) {
-				JsonObject filterObject = query.getJsonObject("filter");
-				for (String fld : filterObject.keySet()) {
-					theQuery.add(new TermQuery(new Term(fld, filterObject.getString(fld))), Occur.FILTER);
-				}
-			}
+			buildFilterQueries(query, theQuery);
 
 			if (userName != null) {
 				buildUserNameQuery(readerMap, userName, theQuery, "investigation.id");
@@ -792,12 +824,7 @@ private Search datasetsQuery(HttpServletRequest request, String sort, Long uid)
 
 			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
-			if (query.containsKey("filter")) {
-				JsonObject filterObject = query.getJsonObject("filter");
-				for (String fld : filterObject.keySet()) {
-					theQuery.add(new TermQuery(new Term(fld, filterObject.getString(fld))), Occur.FILTER);
-				}
-			}
+			buildFilterQueries(query, theQuery);
 
 			if (userName != null) {
 				buildUserNameQuery(readerMap, userName, theQuery, "investigation.id");
@@ -1216,12 +1243,7 @@ private Search investigationsQuery(HttpServletRequest request, String sort, Long
 
 			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
 
-			if (query.containsKey("filter")) {
-				JsonObject filterObject = query.getJsonObject("filter");
-				for (String fld : filterObject.keySet()) {
-					theQuery.add(new TermQuery(new Term(fld, filterObject.getString(fld))), Occur.FILTER);
-				}
-			}
+			buildFilterQueries(query, theQuery);
 
 			if (userName != null) {
 				buildUserNameQuery(readerMap, userName, theQuery, "id");
@@ -1409,8 +1431,8 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 			throws IOException, LuceneException {
 		List<IndexSearcher> searchers = getSearchers(search.searcherMap, name);
 		List<ShardBucket> shards = getShards(search.searcherMap, name);
-		String format = "Search {} with: query {}, maxResults {}, searchAfter {}, scored {}";
-		logger.debug(format, name, search.query, maxResults, searchAfter, search.scored);
+		String format = "Search {} with: query {}, maxResults {}, searchAfter {}, scored {}, fields {}";
+		logger.debug(format, name, search.query, maxResults, searchAfter, search.scored, search.fields);
 		FieldDoc searchAfterDoc = parseSearchAfter(searchAfter, search.sort.getSort());
 		TopFieldDocs topFieldDocs = searchShards(search, maxResults, shards, searchAfterDoc);
 		ScoreDoc[] hits = topFieldDocs.scoreDocs;
@@ -1433,7 +1455,8 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 				gen.writeEnd(); // array results
 				if (hits.length == maxResults) {
 					ScoreDoc lastDoc = hits[hits.length - 1];
-					gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex", lastDoc.shardIndex);
+					gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex",
+							lastDoc.shardIndex);
 					float lastScore = lastDoc.score;
 					if (!Float.isNaN(lastScore)) {
 						gen.write("score", lastScore);
@@ -1478,7 +1501,7 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 			}
 			gen.writeEnd(); // end enclosing object
 		}
-		logger.trace("Json returned {}", baos.toString());
+		logger.debug("Json returned {}", baos.toString());
 		return baos.toString();
 	}
 

From eaafc89fda2a25b206963bca9f496d1262301179 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Mon, 20 Jun 2022 17:18:31 +0100
Subject: [PATCH 46/73] Refactors and Javadoc comments #19

---
 .../icatproject/lucene/DocumentMapping.java   |  105 ++
 .../icatproject/lucene/FacetedDimension.java  |   29 +-
 .../java/org/icatproject/lucene/Lucene.java   | 1119 +++++------------
 .../org/icatproject/lucene/SearchBucket.java  |  813 ++++++++++++
 src/main/resources/run.properties             |    2 +-
 5 files changed, 1282 insertions(+), 786 deletions(-)
 create mode 100644 src/main/java/org/icatproject/lucene/DocumentMapping.java
 create mode 100644 src/main/java/org/icatproject/lucene/SearchBucket.java

diff --git a/src/main/java/org/icatproject/lucene/DocumentMapping.java b/src/main/java/org/icatproject/lucene/DocumentMapping.java
new file mode 100644
index 0000000..27aa532
--- /dev/null
+++ b/src/main/java/org/icatproject/lucene/DocumentMapping.java
@@ -0,0 +1,105 @@
+package org.icatproject.lucene;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
+
+public class DocumentMapping {
+
+	/**
+	 * Represents the parent child relationship between two ICAT entities.
+	 */
+	public static class ParentRelationship {
+		public String parentName;
+		public String fieldPrefix;
+
+		/**
+		 * @param parentName Name of the parent entity.
+		 * @param fieldPrefix How nested fields should be prefixed.
+		 */
+		public ParentRelationship(String parentName, String fieldPrefix) {
+			this.parentName = parentName;
+			this.fieldPrefix = fieldPrefix;
+		}
+
+	}
+
+	public static final Set<String> doubleFields = new HashSet<>();
+	public static final Set<String> facetFields = new HashSet<>();
+	public static final Set<String> longFields = new HashSet<>();
+	public static final Set<String> sortFields = new HashSet<>();
+	public static final Set<String> textFields = new HashSet<>();
+	public static final Set<String> indexedEntities = new HashSet<>();
+	public static final Map<String, ParentRelationship[]> relationships = new HashMap<>();
+
+	public static final IcatAnalyzer analyzer = new IcatAnalyzer();
+	public static final StandardQueryParser genericParser = new StandardQueryParser();
+	public static final StandardQueryParser datafileParser = new StandardQueryParser();
+	public static final StandardQueryParser datasetParser = new StandardQueryParser();
+	public static final StandardQueryParser investigationParser = new StandardQueryParser();
+	public static final StandardQueryParser sampleParser = new StandardQueryParser();
+
+    static {
+		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI"));
+		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name", "stringValue"));
+		longFields.addAll(Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate"));
+		sortFields.addAll(Arrays.asList("datafile.id", "dataset.id", "investigation.id", "instrument.id", "id", "date",
+				"startDate", "endDate", "name", "stringValue", "dateTimeValue", "numericValue", "numericValueSI"));
+		textFields.addAll(Arrays.asList("name", "visitId", "description", "location", "dataset.name",
+				"investigation.name", "instrument.name", "instrument.fullName", "datafileFormat.name", "sample.name",
+				"sample.type.name", "title", "summary", "facility.name", "user.fullName", "type.name"));
+
+		indexedEntities.addAll(Arrays.asList("Datafile", "Dataset", "Investigation", "DatafileParameter",
+				"DatasetParameter", "InstrumentScientist", "InvestigationInstrument", "InvestigationParameter",
+				"InvestigationUser", "Sample"));
+
+		relationships.put("Instrument",
+				new ParentRelationship[] { new ParentRelationship("InvestigationInstrument", "instrument") });
+		relationships.put("User", new ParentRelationship[] { new ParentRelationship("InvestigationUser", "user"),
+				new ParentRelationship("InstrumentScientist", "user") });
+		relationships.put("Sample", new ParentRelationship[] { new ParentRelationship("Dataset", "sample") });
+		relationships.put("SampleType", new ParentRelationship[] { new ParentRelationship("Sample", "type"),
+				new ParentRelationship("Dataset", "sample.type") });
+		relationships.put("InvestigationType",
+				new ParentRelationship[] { new ParentRelationship("Investigation", "type") });
+		relationships.put("DatasetType", new ParentRelationship[] { new ParentRelationship("Dataset", "type") });
+		relationships.put("DatafileFormat",
+				new ParentRelationship[] { new ParentRelationship("Datafile", "datafileFormat") });
+		relationships.put("Facility", new ParentRelationship[] { new ParentRelationship("Investigation", "facility") });
+		relationships.put("ParameterType",
+				new ParentRelationship[] { new ParentRelationship("DatafileParameter", "type"),
+						new ParentRelationship("DatasetParameter", "type"),
+						new ParentRelationship("InvestigationParameter", "type") });
+		relationships.put("Investigation",
+				new ParentRelationship[] { new ParentRelationship("Dataset", "investigation"),
+						new ParentRelationship("datafile", "investigation") });
+
+		genericParser.setAllowLeadingWildcard(true);
+		genericParser.setAnalyzer(analyzer);
+
+		CharSequence[] datafileFields = { "name", "description", "location", "datafileFormat.name" };
+		datafileParser.setAllowLeadingWildcard(true);
+		datafileParser.setAnalyzer(analyzer);
+		datafileParser.setMultiFields(datafileFields);
+
+		CharSequence[] datasetFields = { "name", "description", "sample.name", "sample.type.name", "type.name" };
+		datasetParser.setAllowLeadingWildcard(true);
+		datasetParser.setAnalyzer(analyzer);
+		datasetParser.setMultiFields(datasetFields);
+
+		CharSequence[] investigationFields = { "name", "visitId", "title", "summary", "facility.name",
+				"type.name" };
+		investigationParser.setAllowLeadingWildcard(true);
+		investigationParser.setAnalyzer(analyzer);
+		investigationParser.setMultiFields(investigationFields);
+
+		CharSequence[] sampleFields = { "name", "type.name" };
+		sampleParser.setAllowLeadingWildcard(true);
+		sampleParser.setAnalyzer(analyzer);
+		sampleParser.setMultiFields(sampleFields);
+    }
+}
diff --git a/src/main/java/org/icatproject/lucene/FacetedDimension.java b/src/main/java/org/icatproject/lucene/FacetedDimension.java
index 3173d27..98c51c5 100644
--- a/src/main/java/org/icatproject/lucene/FacetedDimension.java
+++ b/src/main/java/org/icatproject/lucene/FacetedDimension.java
@@ -8,8 +8,17 @@
 
 import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.LabelAndValue;
+import org.apache.lucene.facet.range.DoubleRange;
+import org.apache.lucene.facet.range.LongRange;
 import org.apache.lucene.facet.range.Range;
 
+/**
+ * For a single dimension (field), stores labels (the unique values or ranges of
+ * values for that field in the index) and their respective counts (the number
+ * of times that label appears in different documents).
+ * 
+ * For example, a dimension might be "colour", the label "red", and the count 5.
+ */
 public class FacetedDimension {
 
 	private String dimension;
@@ -18,11 +27,8 @@ public class FacetedDimension {
 	private List<Long> counts;
 
 	/**
-	 * For a single dimension (field), stores labels (the unique values or ranges of
-	 * values for that field in the index) and their respective counts (the number
-	 * of times that label appears in different documents).
-	 * 
-	 * For example, a dimension might be "colour", the label "red", and the count 5.
+	 * Creates an "empty" FacetedDimension. The dimension (field) is set but ranges,
+	 * labels and counts are not.
 	 * 
 	 * @param dimension The dimension, or field, to be faceted
 	 */
@@ -65,7 +71,18 @@ public void buildResponse(JsonObjectBuilder aggregationsBuilder) {
 		JsonObjectBuilder bucketsBuilder = Json.createObjectBuilder();
 		for (int i = 0; i < labels.size(); i++) {
 			JsonObjectBuilder bucketBuilder = Json.createObjectBuilder();
-			bucketsBuilder.add(labels.get(i), bucketBuilder.add("doc_count", counts.get(i)));
+			bucketBuilder.add("doc_count", counts.get(i));
+			if (ranges.size() > i) {
+				Range range = ranges.get(i);
+				if (range.getClass().getSimpleName().equals("LongRange")) {
+					bucketBuilder.add("from", ((LongRange) range).min);
+					bucketBuilder.add("to", ((LongRange) range).max);
+				} else if (range.getClass().getSimpleName().equals("DoubleRange")) {
+					bucketBuilder.add("from", ((DoubleRange) range).min);
+					bucketBuilder.add("to", ((DoubleRange) range).max);
+				}
+			}
+			bucketsBuilder.add(labels.get(i), bucketBuilder);
 		}
 		aggregationsBuilder.add(dimension, Json.createObjectBuilder().add("buckets", bucketsBuilder));
 	}
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index c4cee0c..282d413 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -1,14 +1,11 @@
 package org.icatproject.lucene;
 
-import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
-import java.io.StringReader;
 import java.net.HttpURLConnection;
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
-import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
@@ -16,9 +13,8 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-import java.util.TimeZone;
 import java.util.Map.Entry;
+import java.util.Set;
 import java.util.Timer;
 import java.util.TimerTask;
 import java.util.concurrent.ConcurrentHashMap;
@@ -32,14 +28,10 @@
 import javax.json.Json;
 import javax.json.JsonArray;
 import javax.json.JsonException;
-import javax.json.JsonNumber;
 import javax.json.JsonObject;
 import javax.json.JsonObjectBuilder;
 import javax.json.JsonReader;
-import javax.json.JsonString;
 import javax.json.JsonStructure;
-import javax.json.JsonValue;
-import javax.json.JsonValue.ValueType;
 import javax.json.stream.JsonGenerator;
 import javax.servlet.http.HttpServletRequest;
 import javax.ws.rs.Consumes;
@@ -54,9 +46,9 @@
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
@@ -69,7 +61,6 @@
 import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
 import org.apache.lucene.facet.range.LongRange;
 import org.apache.lucene.facet.range.LongRangeFacetCounts;
-import org.apache.lucene.facet.range.Range;
 import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
@@ -77,32 +68,22 @@
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
-import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.FieldDoc;
-import org.apache.lucene.search.BooleanQuery.Builder;
-import org.apache.lucene.search.SortField.Type;
 import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.SearcherManager;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortField.Type;
 import org.apache.lucene.search.SortedNumericSortField;
-import org.apache.lucene.search.TermInSetQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.search.TotalHits;
-import org.apache.lucene.search.WildcardQuery;
-import org.apache.lucene.search.join.JoinUtil;
-import org.apache.lucene.search.join.ScoreMode;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.NumericUtils;
+import org.icatproject.lucene.SearchBucket.SearchType;
 import org.icatproject.lucene.exceptions.LuceneException;
 import org.icatproject.utils.CheckedProperties;
 import org.icatproject.utils.IcatUnits;
@@ -116,6 +97,10 @@
 @Singleton
 public class Lucene {
 
+	/**
+	 * A bucket for accessing the read and write functionality for a single "shard"
+	 * Lucene index which can then be grouped to represent a single document type.
+	 */
 	private class ShardBucket {
 		private FSDirectory directory;
 		private IndexWriter indexWriter;
@@ -132,7 +117,7 @@ private class ShardBucket {
 		 */
 		public ShardBucket(java.nio.file.Path shardPath) throws IOException {
 			directory = FSDirectory.open(shardPath);
-			IndexWriterConfig config = new IndexWriterConfig(analyzer);
+			IndexWriterConfig config = new IndexWriterConfig(DocumentMapping.analyzer);
 			indexWriter = new IndexWriter(directory, config);
 			String[] files = directory.listAll();
 			if (files.length == 1 && files[0].equals("write.lock")) {
@@ -156,6 +141,12 @@ public ShardBucket(java.nio.file.Path shardPath) throws IOException {
 			}
 		}
 
+		/**
+		 * Commits all pending cached documents to this shard.
+		 * 
+		 * @return The number of documents committed to this shard.
+		 * @throws IOException
+		 */
 		public int commit() throws IOException {
 			int cached = indexWriter.numRamDocs();
 			indexWriter.commit();
@@ -164,9 +155,13 @@ public int commit() throws IOException {
 		}
 	}
 
+	/**
+	 * A bucket for accessing the high level functionality, such as
+	 * searching, for a single document type. Incoming documents will be routed to
+	 * one of the individual "shard" indices that are grouped by this Object.
+	 */
 	private class IndexBucket {
 		private String entityName;
-		// private Map<Long, ShardBucket> shardMap = new HashMap<>();
 		private List<ShardBucket> shardList = new ArrayList<>();
 		private AtomicBoolean locked = new AtomicBoolean();
 
@@ -180,12 +175,11 @@ private class IndexBucket {
 		 */
 		public IndexBucket(String entityName) {
 			try {
-				this.entityName = entityName;
+				this.entityName = entityName.toLowerCase();
 				Long shardIndex = 0L;
 				java.nio.file.Path shardPath = luceneDirectory.resolve(entityName);
 				do {
 					ShardBucket shardBucket = new ShardBucket(shardPath);
-					// shardMap.put(shardIndex, shardBucket);
 					shardList.add(shardBucket);
 					shardIndex++;
 					shardPath = luceneDirectory.resolve(entityName + "_" + shardIndex);
@@ -200,7 +194,7 @@ public IndexBucket(String entityName) {
 		 * Acquires IndexSearchers from the SearcherManagers of the individual shards in
 		 * this bucket.
 		 * 
-		 * @return Array of DirectoryReaders for all shards in this bucket.
+		 * @return List of IndexSearchers for all shards in this bucket.
 		 * @throws IOException
 		 */
 		public List<IndexSearcher> acquireSearchers() throws IOException {
@@ -211,15 +205,29 @@ public List<IndexSearcher> acquireSearchers() throws IOException {
 			return subSearchers;
 		}
 
+		/**
+		 * Adds a document to the appropriate shard for this index.
+		 * 
+		 * @param document The document to be added.
+		 * @throws IOException
+		 */
 		public void addDocument(Document document) throws IOException {
 			ShardBucket shardBucket = routeShard();
 			shardBucket.indexWriter.addDocument(document);
 			shardBucket.documentCount.incrementAndGet();
 		}
 
+		/**
+		 * Updates documents matching the term with the provided document.
+		 * 
+		 * @param term     Term identifying the old document(s) to be updated.
+		 * @param document The document that will replace the old document(s).
+		 * @throws IOException
+		 */
 		public void updateDocument(Term term, Document document) throws IOException {
-			ShardBucket shardBucket = routeShard();
-			shardBucket.indexWriter.updateDocument(term, document);
+			for (ShardBucket shardBucket : shardList) {
+				shardBucket.indexWriter.updateDocument(term, document);
+			}
 		}
 
 		/**
@@ -264,7 +272,6 @@ public void commit(String command, String entityName) throws IOException {
 		 * @throws IOException
 		 */
 		public void close() throws IOException {
-			// for (ShardBucket shardBucket : shardMap.values()) {
 			for (ShardBucket shardBucket : shardList) {
 				shardBucket.searcherManager.close();
 				shardBucket.indexWriter.commit();
@@ -292,7 +299,14 @@ public ShardBucket routeShard() throws IOException {
 			return shardBucket;
 		}
 
-		public void releaseReaders(List<IndexSearcher> subSearchers) throws IOException, LuceneException {
+		/**
+		 * Releases all provided searchers for the shards in this bucket.
+		 * 
+		 * @param subSearchers List of IndexSearcher, in shard order.
+		 * @throws IOException
+		 * @throws LuceneException If the number of searchers and shards isn't the same.
+		 */
+		public void releaseSearchers(List<IndexSearcher> subSearchers) throws IOException, LuceneException {
 			if (subSearchers.size() != shardList.size()) {
 				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
 						"Was expecting the same number of DirectoryReaders as ShardBuckets, but had "
@@ -306,178 +320,8 @@ public void releaseReaders(List<IndexSearcher> subSearchers) throws IOException,
 		}
 	}
 
-	public class Search {
-		public Map<String, List<IndexSearcher>> searcherMap;
-		public Query query;
-		public Sort sort;
-		public boolean scored;
-		public Set<String> fields = new HashSet<String>();
-		public Map<String, Set<String>> joinedFields = new HashMap<>();
-		public Map<String, FacetedDimension> dimensions = new HashMap<String, FacetedDimension>();
-		public boolean aborted = false;
-
-		public void parseFields(JsonObject jsonObject) throws LuceneException {
-			if (jsonObject.containsKey("fields")) {
-				List<JsonString> fieldStrings = jsonObject.getJsonArray("fields").getValuesAs(JsonString.class);
-				logger.trace("Parsing fields from {}", fieldStrings);
-				for (JsonString jsonString : fieldStrings) {
-					String[] splitString = jsonString.getString().split(" ");
-					if (splitString.length == 1) {
-						fields.add(splitString[0]);
-					} else if (splitString.length == 2) {
-						if (joinedFields.containsKey(splitString[0])) {
-							joinedFields.get(splitString[0]).add(splitString[1]);
-						} else {
-							joinedFields.putIfAbsent(splitString[0],
-									new HashSet<String>(Arrays.asList(splitString[1])));
-						}
-					} else {
-						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-								"Could not parse field: " + jsonString.getString());
-					}
-				}
-			}
-
-		}
-
-		/**
-		 * Parses the String from the request into a Lucene Sort object. Multiple sort
-		 * criteria are supported, and will be applied in order.
-		 * 
-		 * @param sortString String representation of a JSON object with the field(s) to
-		 *                   sort
-		 *                   as keys, and the direction ("asc" or "desc") as value(s).
-		 * @return Lucene Sort object
-		 * @throws LuceneException If the value for any key isn't "asc" or "desc"
-		 */
-		public void parseSort(String sortString) throws LuceneException {
-			if (sortString == null || sortString.equals("") || sortString.equals("{}")) {
-				scored = true;
-				sort = new Sort(SortField.FIELD_SCORE, new SortedNumericSortField("id.long", Type.LONG));
-				return;
-			}
-			try (JsonReader reader = Json.createReader(new ByteArrayInputStream(sortString.getBytes()))) {
-				JsonObject object = reader.readObject();
-				List<SortField> fields = new ArrayList<>();
-				for (String key : object.keySet()) {
-					String order = object.getString(key);
-					Boolean reverse;
-					if (order.equals("asc")) {
-						reverse = false;
-					} else if (order.equals("desc")) {
-						reverse = true;
-					} else {
-						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-								"Sort order must be 'asc' or 'desc' but it was '" + order + "'");
-					}
-
-					if (longFields.contains(key)) {
-						fields.add(new SortedNumericSortField(key, Type.LONG, reverse));
-					} else if (doubleFields.contains(key)) {
-						fields.add(new SortedNumericSortField(key, Type.DOUBLE, reverse));
-					} else {
-						fields.add(new SortField(key, Type.STRING, reverse));
-					}
-				}
-				fields.add(new SortedNumericSortField("id.long", Type.LONG));
-				scored = false;
-				sort = new Sort(fields.toArray(new SortField[0]));
-			}
-		}
-	}
-
-	private static class ParentRelationship {
-		public String parentName;
-		public String fieldPrefix;
-
-		public ParentRelationship(String parentName, String fieldPrefix) {
-			this.parentName = parentName;
-			this.fieldPrefix = fieldPrefix;
-		}
-
-	}
-
-	private static final Logger logger = LoggerFactory.getLogger(Lucene.class);
+	static final Logger logger = LoggerFactory.getLogger(Lucene.class);
 	private static final Marker fatal = MarkerFactory.getMarker("FATAL");
-	private static final SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
-
-	private static final Set<String> doubleFields = new HashSet<>();
-	private static final Set<String> facetFields = new HashSet<>();
-	private static final Set<String> longFields = new HashSet<>();
-	private static final Set<String> sortFields = new HashSet<>();
-	private static final Set<String> textFields = new HashSet<>();
-	private static final Set<String> indexedEntities = new HashSet<>();
-	private static final Map<String, ParentRelationship[]> relationships = new HashMap<>();
-
-	private static final IcatAnalyzer analyzer = new IcatAnalyzer();
-	private static final StandardQueryParser genericParser = new StandardQueryParser();
-	private static final StandardQueryParser datafileParser = new StandardQueryParser();
-	private static final StandardQueryParser datasetParser = new StandardQueryParser();
-	private static final StandardQueryParser investigationParser = new StandardQueryParser();
-	private static final StandardQueryParser sampleParser = new StandardQueryParser();
-
-	static {
-		TimeZone tz = TimeZone.getTimeZone("GMT");
-		df.setTimeZone(tz);
-
-		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI"));
-		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name"));
-		longFields.addAll(Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate"));
-		sortFields.addAll(Arrays.asList("datafile.id", "dataset.id", "investigation.id", "instrument.id", "id", "date",
-				"startDate", "endDate", "name", "stringValue", "dateTimeValue", "numericValue", "numericValueSI"));
-		textFields.addAll(Arrays.asList("name", "visitId", "description", "location", "dataset.name",
-				"investigation.name", "instrument.name", "instrument.fullName", "datafileFormat.name", "sample.name",
-				"sample.type.name", "title", "summary", "facility.name", "user.fullName", "type.name"));
-
-		indexedEntities.addAll(Arrays.asList("Datafile", "Dataset", "Investigation", "DatafileParameter",
-				"DatasetParameter", "InstrumentScientist", "InvestigationInstrument", "InvestigationParameter",
-				"InvestigationUser", "Sample"));
-
-		relationships.put("Instrument",
-				new ParentRelationship[] { new ParentRelationship("InvestigationInstrument", "instrument") });
-		relationships.put("User", new ParentRelationship[] { new ParentRelationship("InvestigationUser", "user"),
-				new ParentRelationship("InstrumentScientist", "user") });
-		relationships.put("Sample", new ParentRelationship[] { new ParentRelationship("Dataset", "sample") });
-		relationships.put("SampleType", new ParentRelationship[] { new ParentRelationship("Sample", "type"),
-				new ParentRelationship("Dataset", "sample.type") });
-		relationships.put("InvestigationType",
-				new ParentRelationship[] { new ParentRelationship("Investigation", "type") });
-		relationships.put("DatasetType", new ParentRelationship[] { new ParentRelationship("Dataset", "type") });
-		relationships.put("DatafileFormat",
-				new ParentRelationship[] { new ParentRelationship("Datafile", "datafileFormat") });
-		relationships.put("Facility", new ParentRelationship[] { new ParentRelationship("Investigation", "facility") });
-		relationships.put("ParameterType",
-				new ParentRelationship[] { new ParentRelationship("DatafileParameter", "type"),
-						new ParentRelationship("DatasetParameter", "type"),
-						new ParentRelationship("InvestigationParameter", "type") });
-		relationships.put("Investigation",
-				new ParentRelationship[] { new ParentRelationship("Dataset", "investigation"),
-						new ParentRelationship("datafile", "investigation") });
-
-		genericParser.setAllowLeadingWildcard(true);
-		genericParser.setAnalyzer(analyzer);
-
-		CharSequence[] datafileFields = { "name", "description", "doi", "location", "datafileFormat.name" };
-		datafileParser.setAllowLeadingWildcard(true);
-		datafileParser.setAnalyzer(analyzer);
-		datafileParser.setMultiFields(datafileFields);
-
-		CharSequence[] datasetFields = { "name", "description", "doi", "sample.name", "sample.type.name", "type.name" };
-		datasetParser.setAllowLeadingWildcard(true);
-		datasetParser.setAnalyzer(analyzer);
-		datasetParser.setMultiFields(datasetFields);
-
-		CharSequence[] investigationFields = { "name", "visitId", "title", "summary", "doi", "facility.name",
-				"type.name" };
-		investigationParser.setAllowLeadingWildcard(true);
-		investigationParser.setAnalyzer(analyzer);
-		investigationParser.setMultiFields(investigationFields);
-
-		CharSequence[] sampleFields = { "name", "type.name" };
-		sampleParser.setAllowLeadingWildcard(true);
-		sampleParser.setAnalyzer(analyzer);
-		sampleParser.setMultiFields(sampleFields);
-	}
 
 	private final FacetsConfig facetsConfig = new FacetsConfig();
 
@@ -492,8 +336,8 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 
 	private Timer timer;
 
-	private Map<Long, Search> searches = new ConcurrentHashMap<>();
-	private IcatUnits icatUnits;
+	private Map<Long, SearchBucket> searches = new ConcurrentHashMap<>();
+	public IcatUnits icatUnits;
 
 	/**
 	 * return the version of the lucene server
@@ -516,7 +360,6 @@ public String getVersion() {
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Path("modify")
 	public void modify(@Context HttpServletRequest request) throws LuceneException {
-
 		logger.debug("Requesting modify");
 		int count = 0;
 		try (JsonReader reader = Json.createReader(request.getInputStream())) {
@@ -572,89 +415,6 @@ public void addNow(@Context HttpServletRequest request, @PathParam("entityName")
 		logger.debug("Added {} {} documents", documents.size(), entityName);
 	}
 
-	/**
-	 * Extracts values from queryJson in order to add one or more range query terms
-	 * using queryBuilder.
-	 * 
-	 * Note that values in queryJson are expected to be precise only to the minute,
-	 * and so to ensure that our range is inclusive, we add 59.999 seconds onto the
-	 * upper value only.
-	 * 
-	 * If either upper or lower keys do not yield values then a half open range is
-	 * created. If both are absent, then nothing is added to the query.
-	 * 
-	 * @param queryBuilder Builder for the Lucene query.
-	 * @param queryJson    JsonObject representing the query parameters.
-	 * @param lowerKey     Key in queryJson of the lower date value
-	 * @param upperKey     Key in queryJson of the upper date value
-	 * @param fields       Name of one or more fields to apply the range query to.
-	 * @throws LuceneException
-	 */
-	private static void buildDateRanges(Builder queryBuilder, JsonObject queryJson, String lowerKey, String upperKey,
-			String... fields) throws LuceneException {
-		Long lower = parseDate(queryJson, lowerKey, 0);
-		Long upper = parseDate(queryJson, upperKey, 59999);
-		if (lower != null || upper != null) {
-			lower = (lower == null) ? Long.MIN_VALUE : lower;
-			upper = (upper == null) ? Long.MAX_VALUE : upper;
-			for (String field : fields) {
-				queryBuilder.add(LongPoint.newRangeQuery(field, lower, upper), Occur.MUST);
-			}
-		}
-	}
-
-	/**
-	 * Builds Term queries (exact string matches without tokenizing) from the filter
-	 * object in the query request. This is intended to be used with the faceting,
-	 * with the fields having the ".keyword" suffix.
-	 * 
-	 * @param requestedQuery Json object containing details of the query.
-	 * @param queryBuilder   Builder for the overall boolean query to be build.
-	 * @throws LuceneException If the values in the filter object are neither STRING
-	 *                         nor ARRAY of STRING.
-	 */
-	private void buildFilterQueries(JsonObject requestedQuery, BooleanQuery.Builder queryBuilder)
-			throws LuceneException {
-		if (requestedQuery.containsKey("filter")) {
-			JsonObject filterObject = requestedQuery.getJsonObject("filter");
-			for (String fld : filterObject.keySet()) {
-				ValueType valueType = filterObject.get(fld).getValueType();
-				switch (valueType) {
-					case ARRAY:
-						BooleanQuery.Builder dimensionQuery = new BooleanQuery.Builder();
-						for (JsonString value : filterObject.getJsonArray(fld).getValuesAs(JsonString.class)) {
-							dimensionQuery.add(new TermQuery(new Term(fld, value.getString())), Occur.SHOULD);
-						}
-						queryBuilder.add(dimensionQuery.build(), Occur.FILTER);
-						break;
-
-					case STRING:
-						queryBuilder.add(new TermQuery(new Term(fld, filterObject.getString(fld))), Occur.FILTER);
-						break;
-
-					default:
-						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-								"filter object values should be STRING or ARRAY, but were " + valueType);
-				}
-			}
-		}
-	}
-
-	private void buildUserNameQuery(Map<String, List<IndexSearcher>> readerMap, String userName,
-			BooleanQuery.Builder theQuery, String toField)
-			throws IOException, LuceneException {
-		TermQuery fromQuery = new TermQuery(new Term("user.name", userName));
-		Query investigationUserQuery = JoinUtil.createJoinQuery("investigation.id", false, toField, fromQuery,
-				getSearcher(readerMap, "InvestigationUser"), ScoreMode.None);
-		Query instrumentScientistQuery = JoinUtil.createJoinQuery("instrument.id", false, "instrument.id", fromQuery,
-				getSearcher(readerMap, "InstrumentScientist"), ScoreMode.None);
-		Query investigationInstrumentQuery = JoinUtil.createJoinQuery("investigation.id", false, toField,
-				instrumentScientistQuery, getSearcher(readerMap, "InvestigationInstrument"), ScoreMode.None);
-		Builder userNameQueryBuilder = new BooleanQuery.Builder();
-		userNameQueryBuilder.add(investigationUserQuery, Occur.SHOULD).add(investigationInstrumentQuery, Occur.SHOULD);
-		theQuery.add(userNameQueryBuilder.build(), Occur.MUST);
-	}
-
 	/*
 	 * This is only for testing purposes. Other calls to the service will not
 	 * work properly while this operation is in progress.
@@ -683,6 +443,9 @@ public void clear() throws LuceneException {
 
 	}
 
+	/**
+	 * Commits any pending documents to their respective index.
+	 */
 	@POST
 	@Path("commit")
 	public void commit() throws LuceneException {
@@ -699,15 +462,25 @@ public void commit() throws LuceneException {
 		}
 	}
 
+	/**
+	 * Creates a new Lucene document, provided that the target index is not locked
+	 * for another operation.
+	 * 
+	 * @param operationBody JsonObject containing the "_index" that the new "doc"
+	 *                      should be created in.
+	 * @throws NumberFormatException
+	 * @throws IOException
+	 * @throws LuceneException
+	 */
 	private void create(JsonObject operationBody) throws NumberFormatException, IOException, LuceneException {
 		String entityName = operationBody.getString("_index");
-		if (relationships.containsKey(entityName)) {
+		if (DocumentMapping.relationships.containsKey(entityName)) {
 			updateByRelation(operationBody, false);
 		}
-		if (indexedEntities.contains(entityName)) {
+		if (DocumentMapping.indexedEntities.contains(entityName)) {
 			Document document = parseDocument(operationBody.getJsonObject("doc"));
 			logger.trace("create {} {}", entityName, document.toString());
-			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
+			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 			if (bucket.locked.get()) {
 				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 						"Lucene locked for " + entityName);
@@ -716,6 +489,15 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 		}
 	}
 
+	/**
+	 * Creates a new Lucene document.
+	 * 
+	 * @param entityName   Name of the entity/index to create the document in.
+	 * @param documentJson JsonObject representation of the document to be created.
+	 * @throws NumberFormatException
+	 * @throws IOException
+	 * @throws LuceneException
+	 */
 	private void createNow(String entityName, JsonObject documentJson)
 			throws NumberFormatException, IOException, LuceneException {
 		if (!documentJson.containsKey("id")) {
@@ -724,10 +506,22 @@ private void createNow(String entityName, JsonObject documentJson)
 		}
 		Document document = parseDocument(documentJson);
 		logger.trace("create {} {}", entityName, document.toString());
-		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
+		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 		bucket.addDocument(facetsConfig.build(document));
 	}
 
+	/**
+	 * Perform search on the Datafile entity/index.
+	 * 
+	 * @param request     Incoming Http request containing the query as Json.
+	 * @param searchAfter String of Json representing the last Lucene Document from
+	 *                    a previous search.
+	 * @param maxResults  The maximum number of results to include in the returned
+	 *                    Json.
+	 * @param sort        String of Json representing the sort criteria.
+	 * @return String of Json representing the results of the search.
+	 * @throws LuceneException
+	 */
 	@POST
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
@@ -737,8 +531,9 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("search
 		Long uid = null;
 		try {
 			uid = bucketNum.getAndIncrement();
-			Search search = datafilesQuery(request, sort, uid);
-			return luceneSearchResult("Datafile", search, searchAfter, maxResults, uid);
+			SearchBucket search = new SearchBucket(this, SearchType.DATAFILE, request, sort, searchAfter);
+			searches.put(uid, search);
+			return luceneSearchResult("Datafile", search, searchAfter, maxResults);
 		} catch (Exception e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
@@ -746,50 +541,18 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("search
 		}
 	}
 
-	private Search datafilesQuery(HttpServletRequest request, String sort, Long uid)
-			throws IOException, QueryNodeException, LuceneException {
-		Search search = new Search();
-		searches.put(uid, search);
-		Map<String, List<IndexSearcher>> readerMap = new HashMap<>();
-		search.searcherMap = readerMap;
-		search.parseSort(sort);
-
-		try (JsonReader r = Json.createReader(request.getInputStream())) {
-			JsonObject o = r.readObject();
-			JsonObject query = o.getJsonObject("query");
-			String userName = query.getString("user", null);
-
-			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
-
-			buildFilterQueries(query, theQuery);
-
-			if (userName != null) {
-				buildUserNameQuery(readerMap, userName, theQuery, "investigation.id");
-			}
-
-			String text = query.getString("text", null);
-			if (text != null) {
-				theQuery.add(datafileParser.parse(text, null), Occur.MUST);
-			}
-
-			buildDateRanges(theQuery, query, "lower", "upper", "date");
-
-			if (query.containsKey("parameters")) {
-				JsonArray parameters = query.getJsonArray("parameters");
-				IndexSearcher datafileParameterSearcher = getSearcher(readerMap, "DatafileParameter");
-				for (JsonValue p : parameters) {
-					BooleanQuery.Builder paramQuery = parseParameter(p);
-					Query toQuery = JoinUtil.createJoinQuery("datafile.id", false, "id", paramQuery.build(),
-							datafileParameterSearcher, ScoreMode.None);
-					theQuery.add(toQuery, Occur.MUST);
-				}
-			}
-			search.query = maybeEmptyQuery(theQuery);
-			search.parseFields(o);
-		}
-		return search;
-	}
-
+	/**
+	 * Perform search on the Dataset entity/index.
+	 * 
+	 * @param request     Incoming Http request containing the query as Json.
+	 * @param searchAfter String of Json representing the last Lucene Document from
+	 *                    a previous search.
+	 * @param maxResults  The maximum number of results to include in the returned
+	 *                    Json.
+	 * @param sort        String of Json representing the sort criteria.
+	 * @return String of Json representing the results of the search.
+	 * @throws LuceneException
+	 */
 	@POST
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
@@ -800,8 +563,9 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("search_
 		Long uid = null;
 		try {
 			uid = bucketNum.getAndIncrement();
-			Search search = datasetsQuery(request, sort, uid);
-			return luceneSearchResult("Dataset", search, searchAfter, maxResults, uid);
+			SearchBucket search = new SearchBucket(this, SearchType.DATASET, request, sort, searchAfter);
+			searches.put(uid, search);
+			return luceneSearchResult("Dataset", search, searchAfter, maxResults);
 		} catch (Exception e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
@@ -810,75 +574,24 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("search_
 
 	}
 
-	private Search datasetsQuery(HttpServletRequest request, String sort, Long uid)
-			throws IOException, QueryNodeException, LuceneException {
-		Search search = new Search();
-		searches.put(uid, search);
-		Map<String, List<IndexSearcher>> readerMap = new HashMap<>();
-		search.searcherMap = readerMap;
-		search.parseSort(sort);
-		try (JsonReader r = Json.createReader(request.getInputStream())) {
-			JsonObject o = r.readObject();
-			JsonObject query = o.getJsonObject("query");
-			String userName = query.getString("user", null);
-
-			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
-
-			buildFilterQueries(query, theQuery);
-
-			if (userName != null) {
-				buildUserNameQuery(readerMap, userName, theQuery, "investigation.id");
-			}
-
-			String text = query.getString("text", null);
-			if (text != null) {
-				theQuery.add(datasetParser.parse(text, null), Occur.MUST);
-			}
-
-			buildDateRanges(theQuery, query, "lower", "upper", "startDate", "endDate");
-
-			if (query.containsKey("parameters")) {
-				JsonArray parameters = query.getJsonArray("parameters");
-				IndexSearcher datasetParameterSearcher = getSearcher(readerMap, "DatasetParameter");
-				for (JsonValue p : parameters) {
-					BooleanQuery.Builder paramQuery = parseParameter(p);
-					Query toQuery = JoinUtil.createJoinQuery("dataset.id", false, "id", paramQuery.build(),
-							datasetParameterSearcher, ScoreMode.None);
-					theQuery.add(toQuery, Occur.MUST);
-				}
-			}
-			search.query = maybeEmptyQuery(theQuery);
-			search.parseFields(o);
-		}
-		return search;
-	}
-
 	/**
-	 * Converts String into number of ms since epoch.
+	 * Deletes a Lucene document, provided that the target index is not locked for
+	 * another operation.
 	 * 
-	 * @param value String representing a Date in the format "yyyyMMddHHmm".
-	 * @return Number of ms since epoch, or null if value was null
-	 * @throws java.text.ParseException
+	 * @param operationBody JsonObject containing the "_index" and the "_id" of the
+	 *                      Document to be deleted.
+	 * @throws LuceneException
+	 * @throws IOException
 	 */
-	protected static Long decodeTime(String value) throws java.text.ParseException {
-		if (value == null) {
-			return null;
-		} else {
-			synchronized (df) {
-				return df.parse(value).getTime();
-			}
-		}
-	}
-
 	private void delete(JsonObject operationBody) throws LuceneException, IOException {
 		String entityName = operationBody.getString("_index");
-		if (relationships.containsKey(entityName)) {
+		if (DocumentMapping.relationships.containsKey(entityName)) {
 			updateByRelation(operationBody, true);
 		}
-		if (indexedEntities.contains(entityName)) {
+		if (DocumentMapping.indexedEntities.contains(entityName)) {
 			String icatId = operationBody.getString("_id");
 			try {
-				IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
+				IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 				if (bucket.locked.get()) {
 					throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 							"Lucene locked for " + entityName);
@@ -907,7 +620,8 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 	 * @throws IOException
 	 * @throws LuceneException
 	 */
-	private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, IndexSearcher searcher, Search search)
+	private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, IndexSearcher searcher,
+			SearchBucket search)
 			throws IOException, LuceneException {
 		int luceneDocId = hit.doc;
 		int shardIndex = hit.shardIndex;
@@ -921,8 +635,8 @@ private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, In
 		document.forEach(encodeField(gen, search.fields));
 		for (String joinedEntityName : search.joinedFields.keySet()) {
 			List<IndexSearcher> searchers = getSearchers(search.searcherMap, joinedEntityName);
-			List<ShardBucket> shards = getShards(search.searcherMap, joinedEntityName);
-			Search joinedSearch = new Search();
+			List<ShardBucket> shards = getShards(joinedEntityName);
+			SearchBucket joinedSearch = new SearchBucket(this);
 			String fld;
 			String parentId;
 			if (joinedEntityName.toLowerCase().contains("investigation")) {
@@ -931,11 +645,13 @@ private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, In
 					parentId = document.get("id");
 				} else {
 					parentId = document.get("investigation.id");
+					logger.debug("investigation.id {}", parentId);
 				}
 			} else {
 				fld = entityName.toLowerCase() + ".id";
 				parentId = document.get("id");
 			}
+			logger.debug("fld {}, parentId {}", fld, parentId);
 			joinedSearch.query = new TermQuery(new Term(fld, parentId));
 			joinedSearch.sort = new Sort(new SortedNumericSortField("id.long", Type.LONG));
 			TopFieldDocs topFieldDocs = searchShards(joinedSearch, 100, shards, null);
@@ -955,9 +671,9 @@ private Consumer<? super IndexableField> encodeField(JsonGenerator gen, Set<Stri
 		return (field) -> {
 			String fieldName = field.name();
 			if (fields.contains(fieldName)) {
-				if (longFields.contains(fieldName)) {
+				if (DocumentMapping.longFields.contains(fieldName)) {
 					gen.write(fieldName, field.numericValue().longValue());
-				} else if (doubleFields.contains(fieldName)) {
+				} else if (DocumentMapping.doubleFields.contains(fieldName)) {
 					gen.write(fieldName, field.numericValue().doubleValue());
 				} else {
 					gen.write(fieldName, field.stringValue());
@@ -984,6 +700,24 @@ private void exit() {
 		}
 	}
 
+	/**
+	 * Perform faceting on an entity/index. The query associated with the request
+	 * should determine which Documents to consider, and optionally the dimensions
+	 * to facet. If no dimensions are provided, "sparse" faceting is performed
+	 * across relevant string fields (but no Range faceting occurs).
+	 * 
+	 * @param entityName  Name of the entity/index to facet on.
+	 * @param request     Incoming Http request containing the query as Json.
+	 * @param searchAfter String of Json representing the last Lucene Document from
+	 *                    a previous search.
+	 * @param maxResults  The maximum number of results to include in the returned
+	 *                    Json.
+	 * @param maxLabels   The maximum number of labels to return for each dimension
+	 *                    of the facets.
+	 * @param sort        String of Json representing the sort criteria.
+	 * @return String of Json representing the results of the faceting.
+	 * @throws LuceneException
+	 */
 	@POST
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
@@ -994,14 +728,21 @@ public String facet(@PathParam("entityName") String entityName, @Context HttpSer
 		Long uid = null;
 		try {
 			uid = bucketNum.getAndIncrement();
-			Search search = genericQuery(request, sort, uid);
-			return luceneFacetResult(entityName, search, searchAfter, maxResults, maxLabels, uid);
+			SearchBucket search = new SearchBucket(this, SearchType.GENERIC, request, sort, null);
+			searches.put(uid, search);
+			return luceneFacetResult(entityName, search, searchAfter, maxResults, maxLabels);
 		} catch (Exception e) {
 			freeSearcher(uid);
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
 	}
 
+	/**
+	 * Releases all IndexSearchers associated with uid.
+	 * 
+	 * @param uid Unique Identifier for a set of IndexSearcher to be released.
+	 * @throws LuceneException
+	 */
 	public void freeSearcher(Long uid) throws LuceneException {
 		if (uid != null) { // May not be set for internal calls
 			Map<String, List<IndexSearcher>> search = searches.get(uid).searcherMap;
@@ -1009,7 +750,8 @@ public void freeSearcher(Long uid) throws LuceneException {
 				String name = entry.getKey();
 				List<IndexSearcher> subReaders = entry.getValue();
 				try {
-					indexBuckets.computeIfAbsent(name, k -> new IndexBucket(k)).releaseReaders(subReaders);
+					indexBuckets.computeIfAbsent(name.toLowerCase(), k -> new IndexBucket(k))
+							.releaseSearchers(subReaders);
 				} catch (IOException e) {
 					throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 				}
@@ -1019,141 +761,39 @@ public void freeSearcher(Long uid) throws LuceneException {
 	}
 
 	/**
-	 * Parses a query and associated information from an incoming request without
-	 * any logic specific to a single index or entity. As such it may not be as
-	 * powerful, but is sufficient for simple queries (like those for faceting).
+	 * Gets all IndexSearchers needed for the shards of a given entity/index.
 	 * 
-	 * @param request Request containing the query and other Json encoded
-	 *                information such as fields and dimensions.
-	 * @param sort    String representing the sorting criteria for the search.
-	 * @param uid     Identifier for the search.
-	 * @return Search object with the query, sort, and optionally the fields and
-	 *         dimensions to search set.
-	 * @throws IOException     If Json cannot be parsed from the request
-	 * @throws LuceneException If the types of the JsonValues in the query do not
-	 *                         match those supported by icat.lucene
+	 * @param searcherMap Map of entity names to their IndexSearchers.
+	 * @param name        Name of the entity to get the IndexSearchers for.
+	 * @return List of IndexSearchers for name.
+	 * @throws IOException
 	 */
-	private Search genericQuery(HttpServletRequest request, String sort, Long uid) throws IOException, LuceneException {
-		Search search = new Search();
-		searches.put(uid, search);
-		Map<String, List<IndexSearcher>> readerMap = new HashMap<>();
-		search.searcherMap = readerMap;
-		search.parseSort(sort);
-		try (JsonReader r = Json.createReader(request.getInputStream())) {
-			JsonObject o = r.readObject();
-			JsonObject jsonQuery = o.getJsonObject("query");
-			BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
-			for (Entry<String, JsonValue> entry : jsonQuery.entrySet()) {
-				String field = entry.getKey();
-				ValueType valueType = entry.getValue().getValueType();
-				switch (valueType) {
-					case STRING:
-						JsonString stringValue = (JsonString) entry.getValue();
-						luceneQuery.add(new TermQuery(new Term(field, stringValue.getString())), Occur.MUST);
-						break;
-					case NUMBER:
-						JsonNumber numberValue = (JsonNumber) entry.getValue();
-						if (longFields.contains(field)) {
-							luceneQuery.add(LongPoint.newExactQuery(field, numberValue.longValueExact()), Occur.FILTER);
-						} else if (doubleFields.contains(field)) {
-							luceneQuery.add(DoublePoint.newExactQuery(field, numberValue.doubleValue()), Occur.FILTER);
-						} else {
-							throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-									"Value had type NUMBER, but field " + field
-											+ " is not a known longField or doubleField");
-						}
-						break;
-					case ARRAY:
-						// Only support array of String as list of ICAT ids is currently only use case
-						JsonArray arrayValue = (JsonArray) entry.getValue();
-						ArrayList<BytesRef> bytesArray = new ArrayList<>();
-						for (JsonString value : arrayValue.getValuesAs(JsonString.class)) {
-							bytesArray.add(new BytesRef(value.getChars()));
-						}
-						luceneQuery.add(new TermInSetQuery(field, bytesArray), Occur.MUST);
-						break;
-					default:
-						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-								"Query values should be ARRAY, STRING or NUMBER, but had value of type " + valueType);
-				}
-			}
-			search.query = maybeEmptyQuery(luceneQuery);
-			logger.info("Query: {}", search.query);
-			search.parseFields(o);
-			if (o.containsKey("dimensions")) {
-				List<JsonObject> dimensionObjects = o.getJsonArray("dimensions").getValuesAs(JsonObject.class);
-				for (JsonObject dimensionObject : dimensionObjects) {
-					if (!dimensionObject.containsKey("dimension")) {
-						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-								"'dimension' not specified for facet request " + dimensionObject.toString());
-					}
-					String dimension = dimensionObject.getString("dimension");
-					FacetedDimension facetDimensionRequest = new FacetedDimension(dimension);
-					if (dimensionObject.containsKey("ranges")) {
-						List<Range> ranges = facetDimensionRequest.getRanges();
-						if (longFields.contains(dimension)) {
-							for (JsonObject range : dimensionObject.getJsonArray("ranges")
-									.getValuesAs(JsonObject.class)) {
-								Long lower = Long.MIN_VALUE;
-								Long upper = Long.MAX_VALUE;
-								if (range.containsKey("from")) {
-									lower = range.getJsonNumber("from").longValueExact();
-								}
-								if (range.containsKey("to")) {
-									upper = range.getJsonNumber("to").longValueExact();
-								}
-								String label = lower.toString() + "-" + upper.toString();
-								if (range.containsKey("key")) {
-									label = range.getString("key");
-								}
-								ranges.add(new LongRange(label, lower, true, upper, false));
-							}
-						} else if (doubleFields.contains(dimension)) {
-							for (JsonObject range : dimensionObject.getJsonArray("ranges")
-									.getValuesAs(JsonObject.class)) {
-								Double lower = Double.MIN_VALUE;
-								Double upper = Double.MAX_VALUE;
-								String label = lower.toString() + "-" + upper.toString();
-								if (range.containsKey("from")) {
-									lower = range.getJsonNumber("from").doubleValue();
-								}
-								if (range.containsKey("to")) {
-									upper = range.getJsonNumber("to").doubleValue();
-								}
-								if (range.containsKey("key")) {
-									label = range.getString("key");
-								}
-								ranges.add(new DoubleRange(label, lower, true, upper, false));
-							}
-						} else {
-							throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-									"'ranges' specified for dimension " + dimension
-											+ " but this is not a supported numeric field");
-						}
-					}
-					search.dimensions.put(dimension, facetDimensionRequest);
-				}
-				logger.info("Dimensions: {}", search.dimensions.size());
-			}
-		}
-		return search;
-	}
-
-	private List<IndexSearcher> getSearchers(Map<String, List<IndexSearcher>> readerMap, String name)
+	private List<IndexSearcher> getSearchers(Map<String, List<IndexSearcher>> searcherMap, String name)
 			throws IOException {
-		List<IndexSearcher> subSearchers = readerMap.get(name);
+		String nameLowercase = name.toLowerCase();
+		List<IndexSearcher> subSearchers = searcherMap.get(nameLowercase);
 		if (subSearchers == null) {
-			subSearchers = indexBuckets.computeIfAbsent(name, k -> new IndexBucket(k)).acquireSearchers();
-			readerMap.put(name, subSearchers);
-			logger.debug("Remember searcher for {}", name);
+			subSearchers = indexBuckets.computeIfAbsent(nameLowercase, k -> new IndexBucket(k)).acquireSearchers();
+			searcherMap.put(nameLowercase, subSearchers);
+			logger.debug("Remember searcher for {}", nameLowercase);
 		}
 		return subSearchers;
 	}
 
-	private IndexSearcher getSearcher(Map<String, List<IndexSearcher>> readerMap, String name)
+	/**
+	 * Gets a single IndexSearcher for name. When multiple shards are possible,
+	 * getSearchers should be used instead.
+	 * 
+	 * @param searcherMap Map of entity names to their IndexSearchers.
+	 * @param name        Name of the entity to get the IndexSearcher for.
+	 * @return The IndexSearcher for name.
+	 * @throws IOException
+	 * @throws LuceneException If there are more than one shard for name.
+	 */
+	public IndexSearcher getSearcher(Map<String, List<IndexSearcher>> searcherMap, String name)
 			throws IOException, LuceneException {
-		List<IndexSearcher> subSearchers = readerMap.get(name);
-		subSearchers = getSearchers(readerMap, name);
+		List<IndexSearcher> subSearchers = searcherMap.get(name);
+		subSearchers = getSearchers(searcherMap, name);
 		if (subSearchers.size() > 1) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
 					"Cannot get single IndexSearcher for " + name + " as it has " + subSearchers.size() + " shards");
@@ -1161,8 +801,14 @@ private IndexSearcher getSearcher(Map<String, List<IndexSearcher>> readerMap, St
 		return subSearchers.get(0);
 	}
 
-	private List<ShardBucket> getShards(Map<String, List<IndexSearcher>> readerMap, String name) {
-		return indexBuckets.computeIfAbsent(name, k -> new IndexBucket(k)).shardList;
+	/**
+	 * Gets all ShardBuckets of a given entity/index.
+	 * 
+	 * @param name Name of the entity to get the ShardBuckets for.
+	 * @return List of ShardBuckets for name.
+	 */
+	private List<ShardBucket> getShards(String name) {
+		return indexBuckets.computeIfAbsent(name.toLowerCase(), k -> new IndexBucket(k)).shardList;
 	}
 
 	@PostConstruct
@@ -1211,6 +857,18 @@ public void run() {
 		}
 	}
 
+	/**
+	 * Perform search on the Investigation entity/index.
+	 * 
+	 * @param request     Incoming Http request containing the query as Json.
+	 * @param searchAfter String of Json representing the last Lucene Document from
+	 *                    a previous search.
+	 * @param maxResults  The maximum number of results to include in the returned
+	 *                    Json.
+	 * @param sort        String of Json representing the sort criteria.
+	 * @return String of Json representing the results of the search.
+	 * @throws LuceneException
+	 */
 	@POST
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
@@ -1220,8 +878,9 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 		Long uid = null;
 		try {
 			uid = bucketNum.getAndIncrement();
-			Search search = investigationsQuery(request, sort, uid);
-			return luceneSearchResult("Investigation", search, searchAfter, maxResults, uid);
+			SearchBucket search = new SearchBucket(this, SearchType.INVESTIGATION, request, sort, searchAfter);
+			searches.put(uid, search);
+			return luceneSearchResult("Investigation", search, searchAfter, maxResults);
 		} catch (Exception e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
@@ -1229,81 +888,19 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 		}
 	}
 
-	private Search investigationsQuery(HttpServletRequest request, String sort, Long uid)
-			throws IOException, QueryNodeException, LuceneException {
-		Search search = new Search();
-		searches.put(uid, search);
-		Map<String, List<IndexSearcher>> readerMap = new HashMap<>();
-		search.searcherMap = readerMap;
-		search.parseSort(sort);
-		try (JsonReader r = Json.createReader(request.getInputStream())) {
-			JsonObject o = r.readObject();
-			JsonObject query = o.getJsonObject("query");
-			String userName = query.getString("user", null);
-
-			BooleanQuery.Builder theQuery = new BooleanQuery.Builder();
-
-			buildFilterQueries(query, theQuery);
-
-			if (userName != null) {
-				buildUserNameQuery(readerMap, userName, theQuery, "id");
-			}
-
-			String text = query.getString("text", null);
-			if (text != null) {
-				theQuery.add(investigationParser.parse(text, null), Occur.MUST);
-			}
-
-			buildDateRanges(theQuery, query, "lower", "upper", "startDate", "endDate");
-
-			if (query.containsKey("parameters")) {
-				JsonArray parameters = query.getJsonArray("parameters");
-				IndexSearcher investigationParameterSearcher = getSearcher(readerMap, "InvestigationParameter");
-
-				for (JsonValue p : parameters) {
-					BooleanQuery.Builder paramQuery = parseParameter(p);
-					Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id", paramQuery.build(),
-							investigationParameterSearcher, ScoreMode.None);
-					theQuery.add(toQuery, Occur.MUST);
-				}
-			}
-
-			if (query.containsKey("samples")) {
-				JsonArray samples = query.getJsonArray("samples");
-				IndexSearcher sampleSearcher = getSearcher(readerMap, "Sample");
-
-				for (JsonValue s : samples) {
-					JsonString sample = (JsonString) s;
-					BooleanQuery.Builder sampleQuery = new BooleanQuery.Builder();
-					sampleQuery.add(sampleParser.parse(sample.getString(), null), Occur.MUST);
-					Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id", sampleQuery.build(),
-							sampleSearcher, ScoreMode.None);
-					theQuery.add(toQuery, Occur.MUST);
-				}
-			}
-
-			String userFullName = query.getString("userFullName", null);
-			if (userFullName != null) {
-				BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();
-				userFullNameQuery.add(genericParser.parse(userFullName, "user.fullName"), Occur.MUST);
-				IndexSearcher investigationUserSearcher = getSearcher(readerMap, "InvestigationUser");
-				Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id", userFullNameQuery.build(),
-						investigationUserSearcher, ScoreMode.None);
-				theQuery.add(toQuery, Occur.MUST);
-			}
-
-			search.query = maybeEmptyQuery(theQuery);
-			search.parseFields(o);
-		}
-		logger.info("Query: {}", search.query);
-		return search;
-	}
-
+	/**
+	 * Locks the specified index for population, removing all existing documents and
+	 * preventing normal modify operations until the index is unlocked.
+	 * 
+	 * @param entityName Name of the entity/index to lock.
+	 * @throws LuceneException If already locked, or if there's an IOException when
+	 *                         deleting documents.
+	 */
 	@POST
 	@Path("lock/{entityName}")
 	public void lock(@PathParam("entityName") String entityName) throws LuceneException {
 		logger.info("Requesting lock of {} index", entityName);
-		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
+		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 
 		if (!bucket.locked.compareAndSet(false, true)) {
 			throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE, "Lucene already locked for " + entityName);
@@ -1317,8 +914,26 @@ public void lock(@PathParam("entityName") String entityName) throws LuceneExcept
 		}
 	}
 
-	private String luceneFacetResult(String name, Search search, String searchAfter, int maxResults, int maxLabels,
-			Long uid) throws IOException, IllegalStateException, LuceneException {
+	/**
+	 * Perform faceting on an entity/index.
+	 * 
+	 * @param name        Entity/index to facet.
+	 * @param search      SearchBucket containing the search query, dimensions to
+	 *                    facet etc.
+	 * @param searchAfter String of Json representing the last Lucene Document from
+	 *                    a previous search.
+	 * @param maxResults  The maximum number of results from the search.
+	 * @param maxLabels   The maximum number of labels to return for each dimension
+	 *                    of the facets.
+	 * @return String of Json representing the facets of the search results.
+	 * @throws IOException
+	 * @throws IllegalStateException If the IndexSearcher and its DirectoryReader
+	 *                               are not in sync.
+	 * @throws LuceneException       If ranges are provided for a non-numeric field,
+	 *                               or something else goes wrong.
+	 */
+	private String luceneFacetResult(String name, SearchBucket search, String searchAfter, int maxResults,
+			int maxLabels) throws IOException, IllegalStateException, LuceneException {
 		// If no dimensions were specified, perform "sparse" faceting on all applicable
 		// string values
 		boolean sparse = search.dimensions.size() == 0;
@@ -1334,16 +949,18 @@ private String luceneFacetResult(String name, Search search, String searchAfter,
 			logger.debug("Faceting {} with {} after {} ", name, search.query, searchAfter);
 			for (IndexSearcher indexSearcher : searchers) {
 				FacetsCollector facetsCollector = new FacetsCollector();
-				FacetsCollector.search(indexSearcher, search.query, maxResults, facetsCollector);
+				TopDocs results = FacetsCollector.search(indexSearcher, search.query, maxResults, facetsCollector);
+				logger.debug("{}", results.totalHits);
 				for (FacetedDimension facetedDimension : search.dimensions.values()) {
 					if (facetedDimension.getRanges().size() > 0) {
+						logger.debug("Ranges: {}", facetedDimension.getRanges().get(0).getClass().getSimpleName());
 						// Perform range based facets for a numeric field
 						String dimension = facetedDimension.getDimension();
 						Facets facets;
-						if (longFields.contains(dimension)) {
+						if (DocumentMapping.longFields.contains(dimension)) {
 							LongRange[] ranges = facetedDimension.getRanges().toArray(new LongRange[0]);
 							facets = new LongRangeFacetCounts(dimension, facetsCollector, ranges);
-						} else if (doubleFields.contains(dimension)) {
+						} else if (DocumentMapping.doubleFields.contains(dimension)) {
 							DoubleRange[] ranges = facetedDimension.getRanges().toArray(new DoubleRange[0]);
 							facets = new DoubleRangeFacetCounts(dimension, facetsCollector, ranges);
 						} else {
@@ -1374,8 +991,10 @@ private String luceneFacetResult(String name, Search search, String searchAfter,
 						Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
 						List<FacetResult> facetResults = facets.getAllDims(maxLabels);
 						for (FacetResult facetResult : facetResults) {
-							String dimension = facetResult.dim;
+							String dimension = facetResult.dim.replace(".keyword", "");
 							FacetedDimension facetedDimension = search.dimensions.get(dimension);
+							logger.debug("String facets found for {}, requested dimensions were {}", dimension,
+									search.dimensions.keySet());
 							if (facetedDimension != null) {
 								facetedDimension.addResult(facetResult);
 							}
@@ -1397,7 +1016,9 @@ private String luceneFacetResult(String name, Search search, String searchAfter,
 		// Build results
 		JsonObjectBuilder aggregationsBuilder = Json.createObjectBuilder();
 		search.dimensions.values().forEach(facetedDimension -> facetedDimension.buildResponse(aggregationsBuilder));
-		return Json.createObjectBuilder().add("aggregations", aggregationsBuilder).build().toString();
+		String aggregations = Json.createObjectBuilder().add("aggregations", aggregationsBuilder).build().toString();
+		logger.debug("aggregations: {}", aggregations);
+		return aggregations;
 	}
 
 	/**
@@ -1416,25 +1037,37 @@ private String luceneFacetResult(String name, Search search, String searchAfter,
 	private void addFacetResults(int maxLabels, Map<String, FacetedDimension> facetedDimensionMap, Facets facets)
 			throws IOException {
 		for (FacetResult facetResult : facets.getAllDims(maxLabels)) {
-			String dim = facetResult.dim;
+			String dim = facetResult.dim.replace(".keyword", "");
 			logger.trace("Sparse faceting: FacetResult for {}", dim);
 			FacetedDimension facetedDimension = facetedDimensionMap.get(dim);
 			if (facetedDimension == null) {
-				facetedDimension = new FacetedDimension(facetResult.dim);
+				facetedDimension = new FacetedDimension(dim);
 				facetedDimensionMap.put(dim, facetedDimension);
 			}
 			facetedDimension.addResult(facetResult);
 		}
 	}
 
-	private String luceneSearchResult(String name, Search search, String searchAfter, int maxResults, Long uid)
+	/**
+	 * Perform search on name.
+	 * 
+	 * @param name        Entity/index to search.
+	 * @param search      SearchBucket containing the search query, dimensions to
+	 *                    facet etc.
+	 * @param searchAfter String of Json representing the last Lucene Document from
+	 *                    a previous search.
+	 * @param maxResults  The maximum number of results from the search.
+	 * @return String of Json representing the results of the search.
+	 * @throws IOException
+	 * @throws LuceneException
+	 */
+	private String luceneSearchResult(String name, SearchBucket search, String searchAfter, int maxResults)
 			throws IOException, LuceneException {
 		List<IndexSearcher> searchers = getSearchers(search.searcherMap, name);
-		List<ShardBucket> shards = getShards(search.searcherMap, name);
+		List<ShardBucket> shards = getShards(name);
 		String format = "Search {} with: query {}, maxResults {}, searchAfter {}, scored {}, fields {}";
 		logger.debug(format, name, search.query, maxResults, searchAfter, search.scored, search.fields);
-		FieldDoc searchAfterDoc = parseSearchAfter(searchAfter, search.sort.getSort());
-		TopFieldDocs topFieldDocs = searchShards(search, maxResults, shards, searchAfterDoc);
+		TopFieldDocs topFieldDocs = searchShards(search, maxResults, shards, search.searchAfter);
 		ScoreDoc[] hits = topFieldDocs.scoreDocs;
 		TotalHits totalHits = topFieldDocs.totalHits;
 		SortField[] fields = topFieldDocs.fields;
@@ -1505,7 +1138,18 @@ private String luceneSearchResult(String name, Search search, String searchAfter
 		return baos.toString();
 	}
 
-	private TopFieldDocs searchShards(Search search, int maxResults, List<ShardBucket> shards,
+	/**
+	 * Performs a search by iterating over all relevant shards.
+	 * 
+	 * @param search         SearchBucket containing the search query, dimensions to
+	 *                       facet etc.
+	 * @param maxResults     The maximum number of results from the search.
+	 * @param shards         List of all ShardBuckets for the entity to be searched.
+	 * @param searchAfterDoc The last Lucene FieldDoc from a previous search.
+	 * @return Lucene TopFieldDocs resulting from the search.
+	 * @throws IOException
+	 */
+	private TopFieldDocs searchShards(SearchBucket search, int maxResults, List<ShardBucket> shards,
 			FieldDoc searchAfterDoc) throws IOException {
 		TopFieldDocs topFieldDocs;
 		if (shards.size() > 0) {
@@ -1544,51 +1188,6 @@ private TopFieldDocs searchShards(Search search, int maxResults, List<ShardBucke
 		return topFieldDocs;
 	}
 
-	private Query maybeEmptyQuery(Builder theQuery) {
-		Query query = theQuery.build();
-		if (query.toString().isEmpty()) {
-			query = new MatchAllDocsQuery();
-		}
-		logger.debug("Lucene query {}", query);
-		return query;
-	}
-
-	/**
-	 * Parses a date/time value from jsonObject. Can account for either a Long
-	 * value, or a String value encoded in the format yyyyMMddHHmm.
-	 * 
-	 * @param jsonObject JsonObject containing the date to be parsed.
-	 * @param key        Key of the date/time value in jsonObject.
-	 * @param offset     In the case of STRING ValueType, add offset ms before
-	 *                   returning. This accounts for the fact the String format
-	 *                   used is only precise to minutes and not seconds.
-	 * @return null if jsonObject does not contain the key, number of ms since epoch
-	 *         otherwise.
-	 * @throws LuceneException If the ValueType is not NUMBER or STRING, or if a
-	 *                         STRING value cannot be parsed.
-	 */
-	private static Long parseDate(JsonObject jsonObject, String key, int offset) throws LuceneException {
-		if (jsonObject.containsKey(key)) {
-			ValueType valueType = jsonObject.get(key).getValueType();
-			switch (valueType) {
-				case STRING:
-					String dateString = jsonObject.getString(key);
-					try {
-						return decodeTime(dateString) + offset;
-					} catch (Exception e) {
-						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-								"Could not parse date " + dateString + " using expected format yyyyMMddHHmm");
-					}
-				case NUMBER:
-					return jsonObject.getJsonNumber(key).longValueExact();
-				default:
-					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-							"Dates should be represented by a NUMBER or STRING JsonValue, but got " + valueType);
-			}
-		}
-		return null;
-	}
-
 	/**
 	 * Builds a Lucene Document from the parsed json.
 	 * 
@@ -1603,26 +1202,33 @@ private Document parseDocument(JsonObject json) {
 		return document;
 	}
 
+	/**
+	 * Extracts key/value pairs from json to add to the Lucene document.
+	 * 
+	 * @param json     JsonObject containing the field/value pairs to be added.
+	 * @param document Lucene Document being built.
+	 * @param key      Name of the field to be added.
+	 */
 	private void addField(JsonObject json, Document document, String key) {
 		// SortedDocValuesField need to be indexed in addition to indexing a Field for
 		// searching/storing, so deal with that first
 		addSortField(json, document, key);
 
 		// Likewise, faceted fields should be considered separately
-		if (facetFields.contains(key)) {
+		if (DocumentMapping.facetFields.contains(key)) {
 			document.add(new SortedSetDocValuesFacetField(key + ".keyword", json.getString(key)));
 			document.add(new StringField(key + ".keyword", json.getString(key), Store.NO));
 		}
 
-		if (doubleFields.contains(key)) {
+		if (DocumentMapping.doubleFields.contains(key)) {
 			Double value = json.getJsonNumber(key).doubleValue();
 			document.add(new DoublePoint(key, value));
 			document.add(new StoredField(key, value));
-		} else if (longFields.contains(key)) {
+		} else if (DocumentMapping.longFields.contains(key)) {
 			Long value = json.getJsonNumber(key).longValueExact();
 			document.add(new LongPoint(key, value));
 			document.add(new StoredField(key, value));
-		} else if (textFields.contains(key)) {
+		} else if (DocumentMapping.textFields.contains(key)) {
 			document.add(new TextField(key, json.getString(key), Store.YES));
 		} else {
 			document.add(new StringField(key, json.getString(key), Store.YES));
@@ -1656,8 +1262,16 @@ private void addField(JsonObject json, Document document, String key) {
 		}
 	}
 
+	/**
+	 * Extracts key/value pairs from json to add to the Lucene document. Handles
+	 * fields which need to be sortable.
+	 * 
+	 * @param json     JsonObject containing the field/value pairs to be added.
+	 * @param document Lucene Document being built.
+	 * @param key      Name of the field to be added.
+	 */
 	private void addSortField(JsonObject json, Document document, String key) {
-		if (sortFields.contains(key)) {
+		if (DocumentMapping.sortFields.contains(key)) {
 			if (key.equals("id")) {
 				// Id is a special case, as we need to to be SORTED as a byte ref to allow joins
 				// but also SORTED_NUMERIC to ensure a deterministic order to results
@@ -1666,9 +1280,9 @@ private void addSortField(JsonObject json, Document document, String key) {
 				document.add(new StoredField("id.long", value));
 			}
 			// TODO add special case for startDate -> date to make sorting easier?
-			if (longFields.contains(key)) {
+			if (DocumentMapping.longFields.contains(key)) {
 				document.add(new NumericDocValuesField(key, json.getJsonNumber(key).longValueExact()));
-			} else if (doubleFields.contains(key)) {
+			} else if (DocumentMapping.doubleFields.contains(key)) {
 				long sortableLong = NumericUtils.doubleToSortableLong(json.getJsonNumber(key).doubleValue());
 				document.add(new NumericDocValuesField(key, sortableLong));
 			} else {
@@ -1677,9 +1291,16 @@ private void addSortField(JsonObject json, Document document, String key) {
 		}
 	}
 
+	/**
+	 * Re-adds the content of a Lucene IndexableField to a Lucene Document. This is
+	 * needed when updating Documents to ensure sorting is not lost.
+	 * 
+	 * @param field    Lucene IndexableField to be re-added to the document.
+	 * @param document Lucene Document being built.
+	 */
 	private void addSortField(IndexableField field, Document document) {
 		String key = field.name();
-		if (sortFields.contains(key)) {
+		if (DocumentMapping.sortFields.contains(key)) {
 			if (key.equals("id")) {
 				// Id is a special case, as we need to to be SORTED as a byte ref to allow joins
 				// but also SORTED_NUMERIC to ensure a deterministic order to results
@@ -1687,9 +1308,9 @@ private void addSortField(IndexableField field, Document document) {
 				document.add(new NumericDocValuesField("id.long", value));
 				document.add(new StoredField("id.long", value));
 			}
-			if (longFields.contains(key)) {
+			if (DocumentMapping.longFields.contains(key)) {
 				document.add(new NumericDocValuesField(key, field.numericValue().longValue()));
-			} else if (doubleFields.contains(key)) {
+			} else if (DocumentMapping.doubleFields.contains(key)) {
 				long sortableLong = NumericUtils.doubleToSortableLong(field.numericValue().doubleValue());
 				document.add(new NumericDocValuesField(key, sortableLong));
 			} else {
@@ -1742,108 +1363,20 @@ private Document pruneDocument(String fieldPrefix, Document oldDocument) {
 		return newDocument;
 	}
 
-	private Builder parseParameter(JsonValue p) throws LuceneException {
-		JsonObject parameter = (JsonObject) p;
-		BooleanQuery.Builder paramQuery = new BooleanQuery.Builder();
-		String pName = parameter.getString("name", null);
-		if (pName != null) {
-			paramQuery.add(new WildcardQuery(new Term("type.name.keyword", pName)), Occur.MUST);
-		}
-
-		String pUnits = parameter.getString("units", null);
-		if (pUnits != null) {
-			paramQuery.add(new WildcardQuery(new Term("type.units", pUnits)), Occur.MUST);
-		}
-		if (parameter.containsKey("stringValue")) {
-			String pStringValue = parameter.getString("stringValue", null);
-			paramQuery.add(new WildcardQuery(new Term("stringValue", pStringValue)), Occur.MUST);
-		} else if (parameter.containsKey("lowerDateValue") && parameter.containsKey("upperDateValue")) {
-			buildDateRanges(paramQuery, parameter, "lowerDateValue", "upperDateValue", "dateTimeValue");
-		} else if (parameter.containsKey("lowerNumericValue") && parameter.containsKey("upperNumericValue")) {
-			Double pLowerNumericValue = parameter.getJsonNumber("lowerNumericValue").doubleValue();
-			Double pUpperNumericValue = parameter.getJsonNumber("upperNumericValue").doubleValue();
-			paramQuery.add(DoublePoint.newRangeQuery("numericValue", pLowerNumericValue, pUpperNumericValue),
-					Occur.MUST);
-		}
-		return paramQuery;
-	}
-
 	/**
-	 * Parses a Lucene ScoreDoc to be "searched after" from a String representation
-	 * of a JSON array.
+	 * Unlocks the specified index after population, commiting all pending documents
+	 * and
+	 * allowing normal modify operations again.
 	 * 
-	 * @param searchAfter String representation of a JSON object containing the
-	 *                    document id or "doc" (String), score ("float") in that
-	 *                    order.
-	 * @return FieldDoc object built from the provided String, or null if
-	 *         searchAfter was itself null or an empty String.
-	 * @throws LuceneException If an entry in the fields array is not a STRING or
-	 *                         NUMBER
+	 * @param entityName Name of the entity/index to unlock.
+	 * @throws LuceneException If not locked, or if there's an IOException when
+	 *                         committing documents.
 	 */
-	private FieldDoc parseSearchAfter(String searchAfter, SortField[] sortFields) throws LuceneException {
-		if (searchAfter == null || searchAfter.equals("")) {
-			return null;
-		}
-		logger.debug("Attempting to parseSearchAfter from {}", searchAfter);
-		JsonReader reader = Json.createReader(new StringReader(searchAfter));
-		JsonObject object = reader.readObject();
-		// shardIndex and Lucene doc Id are always needed to determine tie breaks, even
-		// if the field sort resulted in no ties in the first place
-		int shardIndex = object.getInt("shardIndex");
-		int doc = object.getInt("doc");
-		float score = Float.NaN;
-		List<Object> fields = new ArrayList<>();
-		if (object.containsKey("score")) {
-			score = object.getJsonNumber("score").bigDecimalValue().floatValue();
-		}
-		if (object.containsKey("fields")) {
-			JsonArray jsonArray = object.getJsonArray("fields");
-			if (jsonArray.size() != sortFields.length) {
-				throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-						"fields should have the same length as sort, but they were "
-								+ jsonArray.size() + " and " + sortFields.length);
-			}
-			for (int i = 0; i < sortFields.length; i++) {
-				JsonValue value = jsonArray.get(i);
-				switch (value.getValueType()) {
-					case NUMBER:
-						JsonNumber number = ((JsonNumber) value);
-						switch (sortFields[i].getType()) {
-							case FLOAT:
-							case DOUBLE:
-							case SCORE:
-								fields.add(number.bigDecimalValue().floatValue());
-								break;
-							case INT:
-							case LONG:
-							case DOC:
-							case CUSTOM:
-								fields.add(number.longValueExact());
-								break;
-							default:
-								throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-										"fields contained a NUMBER but the corresponding field was "
-												+ sortFields[i]);
-						}
-						break;
-					case STRING:
-						fields.add(new BytesRef(((JsonString) value).getString()));
-						break;
-					default:
-						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-								"fields should be an array of STRING and NUMBER, but had entry of type "
-										+ value.getValueType());
-				}
-			}
-		}
-		return new FieldDoc(doc, score, fields.toArray(), shardIndex);
-	}
-
 	@POST
 	@Path("unlock/{entityName}")
 	public void unlock(@PathParam("entityName") String entityName) throws LuceneException {
 		logger.debug("Requesting unlock of {} index", entityName);
-		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
+		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 		if (!bucket.locked.compareAndSet(true, false)) {
 			throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 					"Lucene is not currently locked for " + entityName);
@@ -1855,15 +1388,26 @@ public void unlock(@PathParam("entityName") String entityName) throws LuceneExce
 		}
 	}
 
+	/**
+	 * Updates an existing Lucene document, provided that the target index is not
+	 * locked
+	 * for another operation.
+	 * 
+	 * @param operationBody JsonObject containing the "_index" that the new "doc"
+	 *                      should be created in.
+	 * @throws LuceneException
+	 * @throws NumberFormatException
+	 * @throws IOException
+	 */
 	private void update(JsonObject operationBody) throws LuceneException, NumberFormatException, IOException {
 		String entityName = operationBody.getString("_index");
-		if (relationships.containsKey(entityName)) {
+		if (DocumentMapping.relationships.containsKey(entityName)) {
 			updateByRelation(operationBody, false);
 		}
-		if (indexedEntities.contains(entityName)) {
+		if (DocumentMapping.indexedEntities.contains(entityName)) {
 			String icatId = operationBody.getString("_id");
 			Document document = parseDocument(operationBody.getJsonObject("doc"));
-			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
+			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 			if (bucket.locked.get()) {
 				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 						"Lucene locked for " + entityName);
@@ -1873,11 +1417,28 @@ private void update(JsonObject operationBody) throws LuceneException, NumberForm
 		}
 	}
 
+	/**
+	 * Updates an existing Lucene document, provided that the target index is not
+	 * locked
+	 * for another operation. In this case, the entity being updated does not have
+	 * its own index, but exists as fields on a parent. For example,
+	 * InvestigationType on an Investigation.
+	 * 
+	 * @param operationBody JsonObject containing the "_index" that the new "doc"
+	 *                      should be created in.
+	 * @param delete        Whether to delete the related entity (or just update its
+	 *                      values).
+	 * @throws LuceneException
+	 * @throws NumberFormatException
+	 * @throws IOException
+	 */
 	private void updateByRelation(JsonObject operationBody, Boolean delete)
 			throws LuceneException, NumberFormatException, IOException {
-		for (ParentRelationship parentRelationship : relationships.get(operationBody.getString("_index"))) {
+		for (DocumentMapping.ParentRelationship parentRelationship : DocumentMapping.relationships
+				.get(operationBody.getString("_index"))) {
 			String childId = operationBody.getString("_id");
-			IndexBucket bucket = indexBuckets.computeIfAbsent(parentRelationship.parentName, k -> new IndexBucket(k));
+			IndexBucket bucket = indexBuckets.computeIfAbsent(parentRelationship.parentName.toLowerCase(),
+					k -> new IndexBucket(k));
 			if (bucket.locked.get()) {
 				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 						"Lucene locked for " + parentRelationship.parentName);
diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
new file mode 100644
index 0000000..21dd667
--- /dev/null
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -0,0 +1,813 @@
+package org.icatproject.lucene;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.StringReader;
+import java.net.HttpURLConnection;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.Map.Entry;
+
+import javax.json.Json;
+import javax.json.JsonArray;
+import javax.json.JsonNumber;
+import javax.json.JsonObject;
+import javax.json.JsonReader;
+import javax.json.JsonString;
+import javax.json.JsonValue;
+import javax.json.JsonValue.ValueType;
+import javax.servlet.http.HttpServletRequest;
+
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.facet.range.DoubleRange;
+import org.apache.lucene.facet.range.LongRange;
+import org.apache.lucene.facet.range.Range;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.TermInSetQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery.Builder;
+import org.apache.lucene.search.SortField.Type;
+import org.apache.lucene.search.join.JoinUtil;
+import org.apache.lucene.search.join.ScoreMode;
+import org.apache.lucene.util.BytesRef;
+import org.icatproject.lucene.exceptions.LuceneException;
+import org.icatproject.utils.IcatUnits.SystemValue;
+
+/**
+ * Bucket for information relating to a single search.
+ */
+public class SearchBucket {
+
+    public enum SearchType {
+        DATAFILE, DATASET, INVESTIGATION, GENERIC
+    }
+
+    private Lucene lucene;
+    public Map<String, List<IndexSearcher>> searcherMap;
+    public Query query;
+    public Sort sort;
+    public FieldDoc searchAfter;
+    public boolean scored;
+    public Set<String> fields = new HashSet<String>();
+    public Map<String, Set<String>> joinedFields = new HashMap<>();
+    public Map<String, FacetedDimension> dimensions = new HashMap<String, FacetedDimension>();
+    public boolean aborted = false;
+    private static final SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
+
+    static {
+        TimeZone tz = TimeZone.getTimeZone("GMT");
+        df.setTimeZone(tz);
+    }
+
+    /**
+     * Creates an empty search bucket.
+     * 
+     * @param lucene IcatLucene instance.
+     */
+    public SearchBucket(Lucene lucene) {
+        this.lucene = lucene;
+    }
+
+    /**
+     * Creates a new search from the provided request and Url parameters.
+     * 
+     * @param lucene      IcatLucene instance.
+     * @param searchType  The SearchType determines how the query is built for
+     *                    specific entities.
+     * @param request     Incoming Http request containing the query as Json.
+     * @param sort        Sort criteria as a Json encoded string.
+     * @param searchAfter The last FieldDoc of a previous search, encoded as Json.
+     * @throws LuceneException
+     * @throws IOException
+     * @throws QueryNodeException
+     */
+    public SearchBucket(Lucene lucene, SearchType searchType, HttpServletRequest request, String sort, String searchAfter)
+            throws LuceneException, IOException, QueryNodeException {
+        this.lucene = lucene;
+        searcherMap = new HashMap<>();
+        parseSort(sort);
+        try (JsonReader r = Json.createReader(request.getInputStream())) {
+            JsonObject o = r.readObject();
+            parseFields(o);
+            parseDimensions(o); // Don't need for DF
+            JsonObject jsonQuery = o.getJsonObject("query");
+            BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
+            String userName;
+            String text;
+            switch (searchType) {
+                case GENERIC:
+                    parseGenericQuery(jsonQuery, luceneQuery);
+                    return;
+                case DATAFILE:
+                    parseSearchAfter(searchAfter);
+                    buildFilterQueries("datafile", jsonQuery, luceneQuery);
+
+                    userName = jsonQuery.getString("user", null);
+                    if (userName != null) {
+                        buildUserNameQuery(userName, luceneQuery, "investigation.id");
+                    }
+
+                    text = jsonQuery.getString("text", null);
+                    if (text != null) {
+                        luceneQuery.add(DocumentMapping.datafileParser.parse(text, null), Occur.MUST);
+                    }
+
+                    buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "date");
+
+                    if (jsonQuery.containsKey("parameters")) {
+                        JsonArray parameters = jsonQuery.getJsonArray("parameters");
+                        IndexSearcher datafileParameterSearcher = lucene.getSearcher(searcherMap, "DatafileParameter");
+                        for (JsonValue p : parameters) {
+                            BooleanQuery.Builder paramQuery = parseParameter(p);
+                            Query toQuery = JoinUtil.createJoinQuery("datafile.id", false, "id", paramQuery.build(),
+                                    datafileParameterSearcher, ScoreMode.None);
+                            luceneQuery.add(toQuery, Occur.MUST);
+                        }
+                    }
+                    query = maybeEmptyQuery(luceneQuery);
+                    return;
+                case DATASET:
+                    parseSearchAfter(searchAfter);
+                    buildFilterQueries("dataset", jsonQuery, luceneQuery);
+
+                    userName = jsonQuery.getString("user", null);
+                    if (userName != null) {
+                        buildUserNameQuery(userName, luceneQuery, "investigation.id");
+                    }
+
+                    text = jsonQuery.getString("text", null);
+                    if (text != null) {
+                        luceneQuery.add(DocumentMapping.datasetParser.parse(text, null), Occur.MUST);
+                    }
+
+                    buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "startDate", "endDate");
+
+                    if (jsonQuery.containsKey("parameters")) {
+                        JsonArray parameters = jsonQuery.getJsonArray("parameters");
+                        IndexSearcher parameterSearcher = lucene.getSearcher(searcherMap, "DatasetParameter");
+                        for (JsonValue p : parameters) {
+                            BooleanQuery.Builder paramQuery = parseParameter(p);
+                            Query toQuery = JoinUtil.createJoinQuery("dataset.id", false, "id", paramQuery.build(),
+                                    parameterSearcher, ScoreMode.None);
+                            luceneQuery.add(toQuery, Occur.MUST);
+                        }
+                    }
+                    query = maybeEmptyQuery(luceneQuery);
+                    return;
+                case INVESTIGATION:
+                    parseSearchAfter(searchAfter);
+                    buildFilterQueries("investigation", jsonQuery, luceneQuery);
+
+                    userName = jsonQuery.getString("user", null);
+                    if (userName != null) {
+                        buildUserNameQuery(userName, luceneQuery, "id");
+                    }
+
+                    text = jsonQuery.getString("text", null);
+                    if (text != null) {
+                        luceneQuery.add(DocumentMapping.investigationParser.parse(text, null), Occur.MUST);
+                    }
+
+                    buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "startDate", "endDate");
+
+                    if (jsonQuery.containsKey("parameters")) {
+                        JsonArray parameters = jsonQuery.getJsonArray("parameters");
+                        IndexSearcher parameterSearcher = lucene.getSearcher(searcherMap, "InvestigationParameter");
+                        for (JsonValue p : parameters) {
+                            BooleanQuery.Builder paramQuery = parseParameter(p);
+                            Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id",
+                                    paramQuery.build(),
+                                    parameterSearcher, ScoreMode.None);
+                            luceneQuery.add(toQuery, Occur.MUST);
+                        }
+                    }
+
+                    if (jsonQuery.containsKey("samples")) {
+                        JsonArray samples = jsonQuery.getJsonArray("samples");
+                        IndexSearcher sampleSearcher = lucene.getSearcher(searcherMap, "Sample");
+
+                        for (JsonValue s : samples) {
+                            JsonString sample = (JsonString) s;
+                            BooleanQuery.Builder sampleQuery = new BooleanQuery.Builder();
+                            sampleQuery.add(DocumentMapping.sampleParser.parse(sample.getString(), null), Occur.MUST);
+                            Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id",
+                                    sampleQuery.build(),
+                                    sampleSearcher, ScoreMode.None);
+                            luceneQuery.add(toQuery, Occur.MUST);
+                        }
+                    }
+
+                    String userFullName = jsonQuery.getString("userFullName", null);
+                    if (userFullName != null) {
+                        BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();
+                        userFullNameQuery.add(DocumentMapping.genericParser.parse(userFullName, "user.fullName"),
+                                Occur.MUST);
+                        IndexSearcher investigationUserSearcher = lucene.getSearcher(searcherMap, "InvestigationUser");
+                        Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id",
+                                userFullNameQuery.build(),
+                                investigationUserSearcher, ScoreMode.None);
+                        luceneQuery.add(toQuery, Occur.MUST);
+                    }
+                    query = maybeEmptyQuery(luceneQuery);
+                    return;
+            }
+        }
+    }
+
+    /**
+     * Extracts values from queryJson in order to add one or more range query terms
+     * using queryBuilder.
+     * 
+     * Note that values in queryJson are expected to be precise only to the minute,
+     * and so to ensure that our range is inclusive, we add 59.999 seconds onto the
+     * upper value only.
+     * 
+     * If either upper or lower keys do not yield values then a half open range is
+     * created. If both are absent, then nothing is added to the query.
+     * 
+     * @param queryBuilder Builder for the Lucene query.
+     * @param queryJson    JsonObject representing the query parameters.
+     * @param lowerKey     Key in queryJson of the lower date value
+     * @param upperKey     Key in queryJson of the upper date value
+     * @param fields       Name of one or more fields to apply the range query to.
+     * @throws LuceneException
+     */
+    private void buildDateRanges(Builder queryBuilder, JsonObject queryJson, String lowerKey, String upperKey,
+            String... fields) throws LuceneException {
+        Long lower = parseDate(queryJson, lowerKey, 0);
+        Long upper = parseDate(queryJson, upperKey, 59999);
+        if (lower != null || upper != null) {
+            lower = (lower == null) ? Long.MIN_VALUE : lower;
+            upper = (upper == null) ? Long.MAX_VALUE : upper;
+            for (String field : fields) {
+                queryBuilder.add(LongPoint.newRangeQuery(field, lower, upper), Occur.MUST);
+            }
+        }
+    }
+
+    /**
+     * Builds Term queries (exact string matches without tokenizing) Range queries
+     * or Nested/Joined queries from the filter
+     * object in the query request.
+     * 
+     * @param requestedQuery Json object containing details of the query.
+     * @param queryBuilder   Builder for the overall boolean query to be build.
+     * @throws LuceneException If the values in the filter object are neither STRING
+     *                         nor ARRAY of STRING.
+     * @throws IOException
+     */
+    private void buildFilterQueries(String target, JsonObject requestedQuery, Builder queryBuilder)
+            throws LuceneException, IOException {
+        if (requestedQuery.containsKey("filter")) {
+            JsonObject filterObject = requestedQuery.getJsonObject("filter");
+            for (String key : filterObject.keySet()) {
+                JsonValue value = filterObject.get(key);
+                ValueType valueType = value.getValueType();
+                int i = key.indexOf(".");
+                String filterTarget = i == -1 ? key : key.substring(0, i);
+                String fld = key.substring(i + 1);
+                Query dimensionQuery;
+                switch (valueType) {
+                    case ARRAY:
+                        Builder builder = new BooleanQuery.Builder();
+                        // If the key was just a nested entity (no ".") then we should FILTER all of our
+                        // queries on that entity.
+                        Occur occur = i == -1 ? Occur.FILTER : Occur.SHOULD;
+                        for (JsonValue arrayValue : filterObject.getJsonArray(key)) {
+                            Query arrayQuery = parseFilter(target, fld, arrayValue);
+                            builder.add(arrayQuery, occur);
+                        }
+                        dimensionQuery = builder.build();
+                        break;
+
+                    default:
+                        dimensionQuery = parseFilter(target, fld, value);
+                }
+                // Nest the dimension query if needed
+                if (i != -1 && !target.equals(filterTarget)) {
+                    // If we are targeting a different entity, nest the entire array as SHOULD
+                    // BUT only if we haven't already nested the queries (as we do when the key was
+                    // just a nested entity)
+                    IndexSearcher nestedSearcher = lucene.getSearcher(searcherMap, filterTarget);
+                    Query nestedQuery = JoinUtil.createJoinQuery(target + ".id", false, "id", dimensionQuery,
+                            nestedSearcher, ScoreMode.None);
+                    queryBuilder.add(nestedQuery, Occur.FILTER);
+                } else {
+                    // Otherwise, just add as SHOULD to the main query directly
+                    queryBuilder.add(dimensionQuery, Occur.FILTER);
+                }
+            }
+        }
+    }
+
+    /**
+     * Parses a single filter field value pair into Lucene objects. Can handle
+     * simple strings, range objects or nested filters.
+     * 
+     * @param target The target entity of the search, but not necessarily this
+     *               filter
+     * @param fld    The field to apply the query to
+     * @param value  JsonValue (JsonString or JsonObject) to parse a Lucene Query
+     *               from
+     * @return A Lucene Query object parsed from the provided value
+     * @throws IOException
+     * @throws LuceneException
+     */
+    private Query parseFilter(String target, String fld, JsonValue value) throws IOException, LuceneException {
+        ValueType valueType = value.getValueType();
+        switch (valueType) {
+            case STRING:
+                // Simplest case involving a single field/value pair
+                return new TermQuery(new Term(fld + ".keyword", ((JsonString) value).getString()));
+
+            case OBJECT:
+                JsonObject valueObject = (JsonObject) value;
+                if (valueObject.containsKey("filter")) {
+                    // Parse a nested query
+                    IndexSearcher nestedSearcher = lucene.getSearcher(searcherMap, fld);
+                    List<JsonObject> nestedFilters = valueObject.getJsonArray("filter").getValuesAs(JsonObject.class);
+                    Builder nestedBoolBuilder = new BooleanQuery.Builder();
+                    nestedFilters.forEach(nestedFilter -> {
+                        String nestedField = nestedFilter.getString("field");
+                        if (nestedFilter.containsKey("value")) {
+                            TermQuery query = new TermQuery(new Term(nestedField + ".keyword", nestedFilter.getString("value")));
+                            nestedBoolBuilder.add(query, Occur.FILTER);
+                        } else {
+                            buildNestedRangeQuery(nestedField, nestedFilter, nestedBoolBuilder);
+                        }
+                    });
+                    return JoinUtil.createJoinQuery(target + ".id", false, "id", nestedBoolBuilder.build(),
+                            nestedSearcher, ScoreMode.None);
+                } else {
+                    // Single range of values for a field
+                    JsonNumber from = valueObject.getJsonNumber("from");
+                    JsonNumber to = valueObject.getJsonNumber("to");
+                    if (DocumentMapping.longFields.contains(fld)) {
+                        return LongPoint.newRangeQuery(fld, from.longValueExact(), to.longValueExact());
+                    } else {
+                        return DoublePoint.newRangeQuery(fld, from.doubleValue(), to.doubleValue());
+                    }
+                }
+
+            default:
+                throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                        "filter object values should be STRING or OBJECT, but were " + valueType);
+        }
+    }
+
+    /**
+     * Builds a range query, intended for use with numeric or date/time parameters.
+     * 
+     * @param fld         Name of the field to apply the range to.
+     * @param valueObject JsonObject containing "from", "to" and optionally "units"
+     *                    as keys for a range of values.
+     * @param builder     BooleanQuery.Builder for the nested query
+     */
+    private void buildNestedRangeQuery(String fld, JsonObject valueObject, BooleanQuery.Builder builder) {
+        if (DocumentMapping.longFields.contains(fld)) {
+            long from = Long.MIN_VALUE;
+            long to = Long.MAX_VALUE;
+            try {
+                from = valueObject.getJsonNumber("from").longValueExact();
+            } catch (ArithmeticException e) {
+                // pass
+            }
+            try {
+                to = valueObject.getJsonNumber("to").longValueExact();
+            } catch (ArithmeticException e) {
+                // pass
+            }
+            builder.add(LongPoint.newRangeQuery(fld, from, to), Occur.FILTER);
+        } else {
+            double from = valueObject.getJsonNumber("from").doubleValue();
+            double to = valueObject.getJsonNumber("to").doubleValue();
+            String units = valueObject.getString("units", null);
+            if (units != null) {
+                SystemValue fromValue = lucene.icatUnits.new SystemValue(from, units);
+                SystemValue toValue = lucene.icatUnits.new SystemValue(to, units);
+                if (fromValue.value != null && toValue.value != null) {
+                    // If we were able to parse the units, apply query to the SI value
+                    builder.add(DoublePoint.newRangeQuery(fld + "SI", fromValue.value, toValue.value), Occur.FILTER);
+                } else {
+                    // If units could not be parsed, make them part of the query on the raw data
+                    builder.add(DoublePoint.newRangeQuery(fld, from, to), Occur.FILTER);
+                    builder.add(new TermQuery(new Term("type.units.keyword", units)), Occur.FILTER);
+                }
+            } else {
+                // If units were not provided, just apply to the raw data
+                builder.add(DoublePoint.newRangeQuery(fld, from, to), Occur.FILTER);
+            }
+        }
+    }
+
+    /**
+     * Builds a query against InvestigationUser and InstrumentScientist entities
+     * using the provided userName.
+     * 
+     * @param userName    The value of the user.name field to query for.
+     * @param luceneQuery BooleanQuery.Builder in use for main entity query.
+     * @param toField     The field on the main entity to join to, practically
+     *                    either "id" or "investigation.id".
+     * @throws IOException
+     * @throws LuceneException
+     */
+    private void buildUserNameQuery(String userName, BooleanQuery.Builder luceneQuery, String toField)
+            throws IOException, LuceneException {
+        TermQuery fromQuery = new TermQuery(new Term("user.name", userName));
+        Query investigationUserQuery = JoinUtil.createJoinQuery("investigation.id", false, toField, fromQuery,
+                lucene.getSearcher(searcherMap, "InvestigationUser"), ScoreMode.None);
+        Query instrumentScientistQuery = JoinUtil.createJoinQuery("instrument.id", false, "instrument.id", fromQuery,
+                lucene.getSearcher(searcherMap, "InstrumentScientist"), ScoreMode.None);
+        Query investigationInstrumentQuery = JoinUtil.createJoinQuery("investigation.id", false, toField,
+                instrumentScientistQuery, lucene.getSearcher(searcherMap, "InvestigationInstrument"), ScoreMode.None);
+        Builder userNameQueryBuilder = new BooleanQuery.Builder();
+        userNameQueryBuilder.add(investigationUserQuery, Occur.SHOULD).add(investigationInstrumentQuery, Occur.SHOULD);
+        luceneQuery.add(userNameQueryBuilder.build(), Occur.MUST);
+    }
+
+    /**
+     * Converts String into number of ms since epoch.
+     * 
+     * @param value String representing a Date in the format "yyyyMMddHHmm".
+     * @return Number of ms since epoch, or null if value was null
+     * @throws java.text.ParseException
+     */
+    protected static Long decodeTime(String value) throws java.text.ParseException {
+        if (value == null) {
+            return null;
+        } else {
+            synchronized (df) {
+                return df.parse(value).getTime();
+            }
+        }
+    }
+
+    /**
+     * Either builds the query from the provided builder, or creates a
+     * MatchAllDocsQuery to use if the Builder was empty.
+     * 
+     * @param luceneQuery BooleanQuery.Builder
+     * @return Lucene Query
+     */
+    private Query maybeEmptyQuery(Builder luceneQuery) {
+        Query query = luceneQuery.build();
+        if (query.toString().isEmpty()) {
+            query = new MatchAllDocsQuery();
+        }
+        return query;
+    }
+
+    /**
+     * Parses a date/time value from jsonObject. Can account for either a Long
+     * value, or a String value encoded in the format yyyyMMddHHmm.
+     * 
+     * @param jsonObject JsonObject containing the date to be parsed.
+     * @param key        Key of the date/time value in jsonObject.
+     * @param offset     In the case of STRING ValueType, add offset ms before
+     *                   returning. This accounts for the fact the String format
+     *                   used is only precise to minutes and not seconds.
+     * @return null if jsonObject does not contain the key, number of ms since epoch
+     *         otherwise.
+     * @throws LuceneException If the ValueType is not NUMBER or STRING, or if a
+     *                         STRING value cannot be parsed.
+     */
+    private Long parseDate(JsonObject jsonObject, String key, int offset) throws LuceneException {
+        if (jsonObject.containsKey(key)) {
+            ValueType valueType = jsonObject.get(key).getValueType();
+            switch (valueType) {
+                case STRING:
+                    String dateString = jsonObject.getString(key);
+                    try {
+                        return decodeTime(dateString) + offset;
+                    } catch (Exception e) {
+                        throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                                "Could not parse date " + dateString + " using expected format yyyyMMddHHmm");
+                    }
+                case NUMBER:
+                    return jsonObject.getJsonNumber(key).longValueExact();
+                default:
+                    throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                            "Dates should be represented by a NUMBER or STRING JsonValue, but got " + valueType);
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Parses dimensions to apply faceting to from the incoming Json. If ranges are
+     * specified, these are also parsed.
+     * 
+     * @param jsonObject Json from incoming search request.
+     * @throws LuceneException
+     */
+    private void parseDimensions(JsonObject jsonObject) throws LuceneException {
+        if (jsonObject.containsKey("dimensions")) {
+            List<JsonObject> dimensionObjects = jsonObject.getJsonArray("dimensions").getValuesAs(JsonObject.class);
+            for (JsonObject dimensionObject : dimensionObjects) {
+                if (!dimensionObject.containsKey("dimension")) {
+                    throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                            "'dimension' not specified for facet request " + dimensionObject.toString());
+                }
+                String dimension = dimensionObject.getString("dimension");
+                FacetedDimension facetDimensionRequest = new FacetedDimension(dimension);
+                if (dimensionObject.containsKey("ranges")) {
+                    List<Range> ranges = facetDimensionRequest.getRanges();
+                    List<JsonObject> jsonRanges = dimensionObject.getJsonArray("ranges").getValuesAs(JsonObject.class);
+                    if (DocumentMapping.longFields.contains(dimension)) {
+                        for (JsonObject range : jsonRanges) {
+                            Long lower = Long.MIN_VALUE;
+                            Long upper = Long.MAX_VALUE;
+                            if (range.containsKey("from")) {
+                                lower = range.getJsonNumber("from").longValueExact();
+                            }
+                            if (range.containsKey("to")) {
+                                upper = range.getJsonNumber("to").longValueExact();
+                            }
+                            String label = lower.toString() + "-" + upper.toString();
+                            if (range.containsKey("key")) {
+                                label = range.getString("key");
+                            }
+                            ranges.add(new LongRange(label, lower, true, upper, false));
+                        }
+                    } else if (DocumentMapping.doubleFields.contains(dimension)) {
+                        for (JsonObject range : jsonRanges) {
+                            Double lower = Double.MIN_VALUE;
+                            Double upper = Double.MAX_VALUE;
+                            if (range.containsKey("from")) {
+                                lower = range.getJsonNumber("from").doubleValue();
+                            }
+                            if (range.containsKey("to")) {
+                                upper = range.getJsonNumber("to").doubleValue();
+                            }
+                            String label = lower.toString() + "-" + upper.toString();
+                            if (range.containsKey("key")) {
+                                label = range.getString("key");
+                            }
+                            ranges.add(new DoubleRange(label, lower, true, upper, false));
+                        }
+                    } else {
+                        throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                                "'ranges' specified for dimension " + dimension
+                                        + " but this is not a supported numeric field");
+                    }
+                }
+                dimensions.put(dimension, facetDimensionRequest);
+            }
+        }
+    }
+
+    /**
+     * Parses the fields to return with the search results from Json.
+     * 
+     * @param jsonObject The Json from the search request.
+     * @throws LuceneException If the parsing fails.
+     */
+    public void parseFields(JsonObject jsonObject) throws LuceneException {
+        if (jsonObject.containsKey("fields")) {
+            List<JsonString> fieldStrings = jsonObject.getJsonArray("fields").getValuesAs(JsonString.class);
+            // logger.trace("Parsing fields from {}", fieldStrings);
+            for (JsonString jsonString : fieldStrings) {
+                String[] splitString = jsonString.getString().split(" ");
+                if (splitString.length == 1) {
+                    // Fields without a space apply directly to the target entity
+                    fields.add(splitString[0]);
+                } else if (splitString.length == 2) {
+                    // Otherwise, the first element is the target of a join, with the second being a
+                    // field on that joined entity.
+                    if (joinedFields.containsKey(splitString[0])) {
+                        joinedFields.get(splitString[0]).add(splitString[1]);
+                    } else {
+                        joinedFields.putIfAbsent(splitString[0],
+                                new HashSet<String>(Arrays.asList(splitString[1])));
+                    }
+                } else {
+                    throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                            "Could not parse field: " + jsonString.getString());
+                }
+            }
+        }
+    }
+
+    /**
+     * Parses a query and associated information from an incoming request without
+     * any logic specific to a single index or entity. As such it may not be as
+     * powerful, but is sufficient for simple queries (like those for faceting).
+     * 
+     * @param jsonQuery   Incoming query request encoded as Json.
+     * @param luceneQuery Lucene BooleanQuery.Builder
+     * @throws LuceneException If the types of the JsonValues in the query do not
+     *                         match those supported by icat.lucene
+     */
+    private void parseGenericQuery(JsonObject jsonQuery, BooleanQuery.Builder luceneQuery) throws LuceneException {
+        for (Entry<String, JsonValue> entry : jsonQuery.entrySet()) {
+            String field = entry.getKey();
+            ValueType valueType = entry.getValue().getValueType();
+            switch (valueType) {
+                case STRING:
+                    JsonString stringValue = (JsonString) entry.getValue();
+                    String fld = DocumentMapping.facetFields.contains(field) ? field + ".keyword" : field;
+                    luceneQuery.add(new TermQuery(new Term(fld, stringValue.getString())), Occur.MUST);
+                    break;
+                case NUMBER:
+                    JsonNumber numberValue = (JsonNumber) entry.getValue();
+                    if (DocumentMapping.longFields.contains(field)) {
+                        luceneQuery.add(LongPoint.newExactQuery(field, numberValue.longValueExact()), Occur.FILTER);
+                    } else if (DocumentMapping.doubleFields.contains(field)) {
+                        luceneQuery.add(DoublePoint.newExactQuery(field, numberValue.doubleValue()), Occur.FILTER);
+                    } else {
+                        throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                                "Value had type NUMBER, but field " + field
+                                        + " is not a known longField or doubleField");
+                    }
+                    break;
+                case ARRAY:
+                    // Only support array of String as list of ICAT ids is currently only use case
+                    JsonArray arrayValue = (JsonArray) entry.getValue();
+                    ArrayList<BytesRef> bytesArray = new ArrayList<>();
+                    String valueAsString;
+                    for (JsonValue value : arrayValue) {
+                        if (value.getValueType().equals(ValueType.STRING)) {
+                            valueAsString = ((JsonString) value).getString();
+                        } else {
+                            valueAsString = value.toString();
+                        }
+                        bytesArray.add(new BytesRef(valueAsString));
+                    }
+                    luceneQuery.add(new TermInSetQuery(field, bytesArray), Occur.MUST);
+                    break;
+                default:
+                    throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                            "Query values should be ARRAY, STRING or NUMBER, but had value of type " + valueType);
+            }
+        }
+        query = maybeEmptyQuery(luceneQuery);
+    }
+
+    /**
+     * Parses query applying to a single parameter from incoming Json.
+     * 
+     * @param p JsonValue (JsonObject) representing a query against a single
+     *          parameter.
+     * @return BooleanQuery.Builder for a single parameter.
+     * @throws LuceneException
+     */
+    private Builder parseParameter(JsonValue p) throws LuceneException {
+        JsonObject parameter = (JsonObject) p;
+        BooleanQuery.Builder paramQuery = new BooleanQuery.Builder();
+        String pName = parameter.getString("name", null);
+        if (pName != null) {
+            paramQuery.add(new WildcardQuery(new Term("type.name.keyword", pName)), Occur.MUST);
+        }
+
+        String pUnits = parameter.getString("units", null);
+        if (pUnits != null) {
+            paramQuery.add(new WildcardQuery(new Term("type.units", pUnits)), Occur.MUST);
+        }
+        if (parameter.containsKey("stringValue")) {
+            String pStringValue = parameter.getString("stringValue", null);
+            paramQuery.add(new WildcardQuery(new Term("stringValue", pStringValue)), Occur.MUST);
+        } else if (parameter.containsKey("lowerDateValue") && parameter.containsKey("upperDateValue")) {
+            buildDateRanges(paramQuery, parameter, "lowerDateValue", "upperDateValue", "dateTimeValue");
+        } else if (parameter.containsKey("lowerNumericValue") && parameter.containsKey("upperNumericValue")) {
+            Double pLowerNumericValue = parameter.getJsonNumber("lowerNumericValue").doubleValue();
+            Double pUpperNumericValue = parameter.getJsonNumber("upperNumericValue").doubleValue();
+            paramQuery.add(DoublePoint.newRangeQuery("numericValue", pLowerNumericValue, pUpperNumericValue),
+                    Occur.MUST);
+        }
+        return paramQuery;
+    }
+
+    /**
+     * Parses a Lucene FieldDoc to be "searched after" from a String representation
+     * of a JSON array. null if searchAfter was itself null or an empty String.
+     * 
+     * @param searchAfter String representation of a JSON object containing the
+     *                    document id or "doc" (String), score ("float") in that
+     *                    order.
+     * @return FieldDoc object built from the provided String, or
+     * @throws LuceneException If an entry in the fields array is not a STRING or
+     *                         NUMBER
+     */
+    private void parseSearchAfter(String searchAfter) throws LuceneException {
+        if (searchAfter == null || searchAfter.equals("")) {
+            return;
+        }
+        SortField[] sortFields = sort.getSort();
+        JsonReader reader = Json.createReader(new StringReader(searchAfter));
+        JsonObject object = reader.readObject();
+        // shardIndex and Lucene doc Id are always needed to determine tie breaks, even
+        // if the field sort resulted in no ties in the first place
+        int shardIndex = object.getInt("shardIndex");
+        int doc = object.getInt("doc");
+        float score = Float.NaN;
+        List<Object> fields = new ArrayList<>();
+        if (object.containsKey("score")) {
+            score = object.getJsonNumber("score").bigDecimalValue().floatValue();
+        }
+        if (object.containsKey("fields")) {
+            JsonArray jsonArray = object.getJsonArray("fields");
+            if (jsonArray.size() != sortFields.length) {
+                throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                        "fields should have the same length as sort, but they were "
+                                + jsonArray.size() + " and " + sortFields.length);
+            }
+            for (int i = 0; i < sortFields.length; i++) {
+                JsonValue value = jsonArray.get(i);
+                switch (value.getValueType()) {
+                    case NUMBER:
+                        JsonNumber number = ((JsonNumber) value);
+                        switch (sortFields[i].getType()) {
+                            case FLOAT:
+                            case DOUBLE:
+                            case SCORE:
+                                fields.add(number.bigDecimalValue().floatValue());
+                                break;
+                            case INT:
+                            case LONG:
+                            case DOC:
+                            case CUSTOM:
+                                fields.add(number.longValueExact());
+                                break;
+                            default:
+                                throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                                        "fields contained a NUMBER but the corresponding field was "
+                                                + sortFields[i]);
+                        }
+                        break;
+                    case STRING:
+                        fields.add(new BytesRef(((JsonString) value).getString()));
+                        break;
+                    default:
+                        throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                                "fields should be an array of STRING and NUMBER, but had entry of type "
+                                        + value.getValueType());
+                }
+            }
+        }
+        this.searchAfter = new FieldDoc(doc, score, fields.toArray(), shardIndex);
+    }
+
+    /**
+     * Parses the String from the request into a Lucene Sort object. Multiple sort
+     * criteria are supported, and will be applied in order.
+     * 
+     * @param sortString String representation of a JSON object with the field(s) to
+     *                   sort
+     *                   as keys, and the direction ("asc" or "desc") as value(s).
+     * @return Lucene Sort object
+     * @throws LuceneException If the value for any key isn't "asc" or "desc"
+     */
+    public void parseSort(String sortString) throws LuceneException {
+        if (sortString == null || sortString.equals("") || sortString.equals("{}")) {
+            scored = true;
+            sort = new Sort(SortField.FIELD_SCORE, new SortedNumericSortField("id.long", Type.LONG));
+            return;
+        }
+        try (JsonReader reader = Json.createReader(new ByteArrayInputStream(sortString.getBytes()))) {
+            JsonObject object = reader.readObject();
+            List<SortField> fields = new ArrayList<>();
+            for (String key : object.keySet()) {
+                String order = object.getString(key);
+                Boolean reverse;
+                if (order.equals("asc")) {
+                    reverse = false;
+                } else if (order.equals("desc")) {
+                    reverse = true;
+                } else {
+                    throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+                            "Sort order must be 'asc' or 'desc' but it was '" + order + "'");
+                }
+
+                if (DocumentMapping.longFields.contains(key)) {
+                    fields.add(new SortedNumericSortField(key, Type.LONG, reverse));
+                } else if (DocumentMapping.doubleFields.contains(key)) {
+                    fields.add(new SortedNumericSortField(key, Type.DOUBLE, reverse));
+                } else {
+                    fields.add(new SortField(key, Type.STRING, reverse));
+                }
+            }
+            fields.add(new SortedNumericSortField("id.long", Type.LONG));
+            scored = false;
+            sort = new Sort(fields.toArray(new SortField[0]));
+        }
+    }
+}
diff --git a/src/main/resources/run.properties b/src/main/resources/run.properties
index 25babbd..99fcae0 100644
--- a/src/main/resources/run.properties
+++ b/src/main/resources/run.properties
@@ -5,4 +5,4 @@ directory     = ${HOME}/data/lucene
 commitSeconds = 5
 maxShardSize  = 2147483648
 ip            = 127.0.0.1/32
-units = \u2103: celsius degC, K: kelvin
+units = J: eV 1.602176634e-19; \u2103: celsius, degC; K: kelvin

From 49132308f685ed8880fe896c55ffbc8fc0387a0b Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 22 Jun 2022 02:45:00 +0100
Subject: [PATCH 47/73] Support for searching on sample name #19

---
 .../icatproject/lucene/DocumentMapping.java   | 77 ++++++++++++-------
 .../java/org/icatproject/lucene/Lucene.java   | 13 ++--
 .../org/icatproject/lucene/SearchBucket.java  | 24 ++----
 3 files changed, 63 insertions(+), 51 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/DocumentMapping.java b/src/main/java/org/icatproject/lucene/DocumentMapping.java
index 27aa532..469a7f8 100644
--- a/src/main/java/org/icatproject/lucene/DocumentMapping.java
+++ b/src/main/java/org/icatproject/lucene/DocumentMapping.java
@@ -15,15 +15,18 @@ public class DocumentMapping {
 	 */
 	public static class ParentRelationship {
 		public String parentName;
-		public String fieldPrefix;
+		public String joiningField;
+		public Set<String> fields;
 
 		/**
-		 * @param parentName Name of the parent entity.
-		 * @param fieldPrefix How nested fields should be prefixed.
+		 * @param parentName  Name of the parent entity.
+		 * @param joiningField Field that joins the child to its parent.
+		 * @param fields Fields that should be updated by this relationship.
 		 */
-		public ParentRelationship(String parentName, String fieldPrefix) {
+		public ParentRelationship(String parentName, String joiningField, String... fields) {
 			this.parentName = parentName;
-			this.fieldPrefix = fieldPrefix;
+			this.joiningField = joiningField;
+			this.fields = new HashSet<>(Arrays.asList(fields));
 		}
 
 	}
@@ -43,12 +46,14 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 	public static final StandardQueryParser investigationParser = new StandardQueryParser();
 	public static final StandardQueryParser sampleParser = new StandardQueryParser();
 
-    static {
+	static {
 		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI"));
 		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name", "stringValue"));
-		longFields.addAll(Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate"));
-		sortFields.addAll(Arrays.asList("datafile.id", "dataset.id", "investigation.id", "instrument.id", "id", "date",
-				"startDate", "endDate", "name", "stringValue", "dateTimeValue", "numericValue", "numericValueSI"));
+		longFields.addAll(
+				Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate", "fileSize"));
+		sortFields.addAll(Arrays.asList("datafile.id", "dataset.id", "investigation.id", "instrument.id", "id",
+				"sample.investigation.id", "date", "name", "stringValue", "dateTimeValue", "numericValue",
+				"numericValueSI", "fileSize"));
 		textFields.addAll(Arrays.asList("name", "visitId", "description", "location", "dataset.name",
 				"investigation.name", "instrument.name", "instrument.fullName", "datafileFormat.name", "sample.name",
 				"sample.type.name", "title", "summary", "facility.name", "user.fullName", "type.name"));
@@ -58,35 +63,51 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 				"InvestigationUser", "Sample"));
 
 		relationships.put("Instrument",
-				new ParentRelationship[] { new ParentRelationship("InvestigationInstrument", "instrument") });
-		relationships.put("User", new ParentRelationship[] { new ParentRelationship("InvestigationUser", "user"),
-				new ParentRelationship("InstrumentScientist", "user") });
-		relationships.put("Sample", new ParentRelationship[] { new ParentRelationship("Dataset", "sample") });
-		relationships.put("SampleType", new ParentRelationship[] { new ParentRelationship("Sample", "type"),
-				new ParentRelationship("Dataset", "sample.type") });
+				new ParentRelationship[] { new ParentRelationship("InvestigationInstrument", "instrument.id",
+						"instrument.name", "instrument.fullName") });
+		relationships.put("User",
+				new ParentRelationship[] {
+						new ParentRelationship("InvestigationUser", "user.id", "user.name", "user.fullName"),
+						new ParentRelationship("InstrumentScientist", "user.id", "user.name", "user.fullName") });
+		relationships.put("Sample", new ParentRelationship[] {
+				new ParentRelationship("Dataset", "sample.id", "sample.name", "sample.investigation.id"),
+				new ParentRelationship("Datafile", "sample.id", "sample.name", "sample.investigation.id") });
+		relationships.put("SampleType",
+				new ParentRelationship[] { new ParentRelationship("Sample", "type.id", "type.name"),
+						new ParentRelationship("Dataset", "sample.type.id", "sample.type.name"),
+						new ParentRelationship("Datafile", "sample.type.id", "sample.type.name") });
 		relationships.put("InvestigationType",
-				new ParentRelationship[] { new ParentRelationship("Investigation", "type") });
-		relationships.put("DatasetType", new ParentRelationship[] { new ParentRelationship("Dataset", "type") });
+				new ParentRelationship[] { new ParentRelationship("Investigation", "type.id", "type.name") });
+		relationships.put("DatasetType",
+				new ParentRelationship[] { new ParentRelationship("Dataset", "type.id", "type.name") });
 		relationships.put("DatafileFormat",
-				new ParentRelationship[] { new ParentRelationship("Datafile", "datafileFormat") });
-		relationships.put("Facility", new ParentRelationship[] { new ParentRelationship("Investigation", "facility") });
+				new ParentRelationship[] {
+						new ParentRelationship("Datafile", "datafileFormat.id", "datafileFormat.name") });
+		relationships.put("Facility",
+				new ParentRelationship[] { new ParentRelationship("Investigation", "facility.id", "facility.name") });
 		relationships.put("ParameterType",
-				new ParentRelationship[] { new ParentRelationship("DatafileParameter", "type"),
-						new ParentRelationship("DatasetParameter", "type"),
-						new ParentRelationship("InvestigationParameter", "type") });
+				new ParentRelationship[] { new ParentRelationship("DatafileParameter", "type.id", "type.name"),
+						new ParentRelationship("DatasetParameter", "type.id", "type.name"),
+						new ParentRelationship("InvestigationParameter", "type.id", "type.name") });
 		relationships.put("Investigation",
-				new ParentRelationship[] { new ParentRelationship("Dataset", "investigation"),
-						new ParentRelationship("datafile", "investigation") });
+				new ParentRelationship[] {
+						new ParentRelationship("Dataset", "investigation.id", "investigation.name",
+								"investigation.title", "investigation.startDate", "visitId"),
+						new ParentRelationship("datafile", "investigation.id", "investigation.name", "visitId") });
+		relationships.put("Dataset",
+				new ParentRelationship[] { new ParentRelationship("Datafile", "dataset.id", "dataset.name") });
 
 		genericParser.setAllowLeadingWildcard(true);
 		genericParser.setAnalyzer(analyzer);
 
-		CharSequence[] datafileFields = { "name", "description", "location", "datafileFormat.name" };
+		CharSequence[] datafileFields = { "name", "description", "location", "datafileFormat.name", "visitId",
+				"sample.name", "sample.type.name" };
 		datafileParser.setAllowLeadingWildcard(true);
 		datafileParser.setAnalyzer(analyzer);
 		datafileParser.setMultiFields(datafileFields);
 
-		CharSequence[] datasetFields = { "name", "description", "sample.name", "sample.type.name", "type.name" };
+		CharSequence[] datasetFields = { "name", "description", "sample.name", "sample.type.name", "type.name",
+				"visitId" };
 		datasetParser.setAllowLeadingWildcard(true);
 		datasetParser.setAnalyzer(analyzer);
 		datasetParser.setMultiFields(datasetFields);
@@ -97,9 +118,9 @@ public ParentRelationship(String parentName, String fieldPrefix) {
 		investigationParser.setAnalyzer(analyzer);
 		investigationParser.setMultiFields(investigationFields);
 
-		CharSequence[] sampleFields = { "name", "type.name" };
+		CharSequence[] sampleFields = { "sample.name", "sample.type.name" };
 		sampleParser.setAllowLeadingWildcard(true);
 		sampleParser.setAnalyzer(analyzer);
 		sampleParser.setMultiFields(sampleFields);
-    }
+	}
 }
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 282d413..bab4747 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -1279,7 +1279,6 @@ private void addSortField(JsonObject json, Document document, String key) {
 				document.add(new NumericDocValuesField("id.long", value));
 				document.add(new StoredField("id.long", value));
 			}
-			// TODO add special case for startDate -> date to make sorting easier?
 			if (DocumentMapping.longFields.contains(key)) {
 				document.add(new NumericDocValuesField(key, json.getJsonNumber(key).longValueExact()));
 			} else if (DocumentMapping.doubleFields.contains(key)) {
@@ -1345,17 +1344,17 @@ private Document updateDocument(JsonObject json, Document oldDocument) {
 
 	/**
 	 * Returns a new Lucene Document that has the same fields as were present in
-	 * oldDocument, except in cases where the field name starts with fieldPrefix.
+	 * oldDocument, except those provided as an argument to prune.
 	 * 
-	 * @param fieldPrefix Any fields with a name starting with this String will not
+	 * @param fields These fields will not
 	 *                    be present in the returned Document.
 	 * @param oldDocument Lucene Document to be pruned.
 	 * @return Lucene Document with pruned fields.
 	 */
-	private Document pruneDocument(String fieldPrefix, Document oldDocument) {
+	private Document pruneDocument(Set<String> fields, Document oldDocument) {
 		Document newDocument = new Document();
 		for (IndexableField field : oldDocument.getFields()) {
-			if (!field.name().startsWith(fieldPrefix)) {
+			if (!fields.contains(field.name())) {
 				addSortField(field, newDocument);
 				newDocument.add(field);
 			}
@@ -1446,7 +1445,7 @@ private void updateByRelation(JsonObject operationBody, Boolean delete)
 			IndexSearcher searcher = getSearcher(new HashMap<>(), parentRelationship.parentName);
 
 			int blockSize = 10000;
-			TermQuery query = new TermQuery(new Term(parentRelationship.fieldPrefix + ".id", childId));
+			TermQuery query = new TermQuery(new Term(parentRelationship.joiningField, childId));
 			Sort sort = new Sort(new SortField("id", Type.STRING));
 			ScoreDoc[] scoreDocs = searcher.search(query, blockSize, sort).scoreDocs;
 			while (scoreDocs.length != 0) {
@@ -1454,7 +1453,7 @@ private void updateByRelation(JsonObject operationBody, Boolean delete)
 				for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
 					Document oldDocument = searcher.doc(scoreDoc.doc);
 					String parentId = oldDocument.get("id");
-					Document newDocument = delete ? pruneDocument(parentRelationship.fieldPrefix, oldDocument)
+					Document newDocument = delete ? pruneDocument(parentRelationship.fields, oldDocument)
 							: updateDocument(operationBody.getJsonObject("doc"), oldDocument);
 					logger.trace("updateByRelation: {}", newDocument);
 					bucket.updateDocument(new Term("id", parentId), facetsConfig.build(newDocument));
diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
index 21dd667..35afa91 100644
--- a/src/main/java/org/icatproject/lucene/SearchBucket.java
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -184,7 +184,14 @@ public SearchBucket(Lucene lucene, SearchType searchType, HttpServletRequest req
 
                     text = jsonQuery.getString("text", null);
                     if (text != null) {
-                        luceneQuery.add(DocumentMapping.investigationParser.parse(text, null), Occur.MUST);
+                        Builder textBuilder = new BooleanQuery.Builder();
+                        textBuilder.add(DocumentMapping.investigationParser.parse(text, null), Occur.SHOULD);
+
+                        IndexSearcher sampleSearcher = lucene.getSearcher(searcherMap, "Sample");
+                        Query joinedSampleQuery = JoinUtil.createJoinQuery("sample.investigation.id", false, "id",
+                                DocumentMapping.sampleParser.parse(text, null), sampleSearcher, ScoreMode.Avg);
+                        textBuilder.add(joinedSampleQuery, Occur.SHOULD);
+                        luceneQuery.add(textBuilder.build(), Occur.MUST);
                     }
 
                     buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "startDate", "endDate");
@@ -201,21 +208,6 @@ public SearchBucket(Lucene lucene, SearchType searchType, HttpServletRequest req
                         }
                     }
 
-                    if (jsonQuery.containsKey("samples")) {
-                        JsonArray samples = jsonQuery.getJsonArray("samples");
-                        IndexSearcher sampleSearcher = lucene.getSearcher(searcherMap, "Sample");
-
-                        for (JsonValue s : samples) {
-                            JsonString sample = (JsonString) s;
-                            BooleanQuery.Builder sampleQuery = new BooleanQuery.Builder();
-                            sampleQuery.add(DocumentMapping.sampleParser.parse(sample.getString(), null), Occur.MUST);
-                            Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id",
-                                    sampleQuery.build(),
-                                    sampleSearcher, ScoreMode.None);
-                            luceneQuery.add(toQuery, Occur.MUST);
-                        }
-                    }
-
                     String userFullName = jsonQuery.getString("userFullName", null);
                     if (userFullName != null) {
                         BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();

From 338dda3a41f8630fdb7804646f4a709de84c55a6 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 22 Jul 2022 13:17:57 +0100
Subject: [PATCH 48/73] SampleParameter, fileCount, value in range #19

---
 .../icatproject/lucene/DocumentMapping.java   |  31 +--
 .../java/org/icatproject/lucene/Lucene.java   | 234 +++++++++++++++---
 .../org/icatproject/lucene/SearchBucket.java  |  78 +++++-
 3 files changed, 293 insertions(+), 50 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/DocumentMapping.java b/src/main/java/org/icatproject/lucene/DocumentMapping.java
index 469a7f8..75500d4 100644
--- a/src/main/java/org/icatproject/lucene/DocumentMapping.java
+++ b/src/main/java/org/icatproject/lucene/DocumentMapping.java
@@ -19,16 +19,15 @@ public static class ParentRelationship {
 		public Set<String> fields;
 
 		/**
-		 * @param parentName  Name of the parent entity.
+		 * @param parentName   Name of the parent entity.
 		 * @param joiningField Field that joins the child to its parent.
-		 * @param fields Fields that should be updated by this relationship.
+		 * @param fields       Fields that should be updated by this relationship.
 		 */
 		public ParentRelationship(String parentName, String joiningField, String... fields) {
 			this.parentName = parentName;
 			this.joiningField = joiningField;
 			this.fields = new HashSet<>(Arrays.asList(fields));
 		}
-
 	}
 
 	public static final Set<String> doubleFields = new HashSet<>();
@@ -47,20 +46,23 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 	public static final StandardQueryParser sampleParser = new StandardQueryParser();
 
 	static {
-		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI"));
+		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI", "rangeTop", "rangeTopSI", "rangeBottom",
+				"rangeBottomSI"));
 		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name", "stringValue"));
 		longFields.addAll(
-				Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate", "fileSize"));
-		sortFields.addAll(Arrays.asList("datafile.id", "dataset.id", "investigation.id", "instrument.id", "id",
-				"sample.investigation.id", "date", "name", "stringValue", "dateTimeValue", "numericValue",
-				"numericValueSI", "fileSize"));
+				Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate", "fileSize",
+						"fileCount"));
+		sortFields.addAll(
+				Arrays.asList("datafile.id", "dataset.id", "investigation.id", "instrument.id", "id", "sample.id",
+						"sample.investigation.id", "date", "name", "stringValue", "dateTimeValue", "numericValue",
+						"numericValueSI", "fileSize", "fileCount"));
 		textFields.addAll(Arrays.asList("name", "visitId", "description", "location", "dataset.name",
 				"investigation.name", "instrument.name", "instrument.fullName", "datafileFormat.name", "sample.name",
-				"sample.type.name", "title", "summary", "facility.name", "user.fullName", "type.name"));
+				"sample.type.name", "title", "summary", "facility.name", "user.fullName", "type.name", "doi"));
 
 		indexedEntities.addAll(Arrays.asList("Datafile", "Dataset", "Investigation", "DatafileParameter",
 				"DatasetParameter", "InstrumentScientist", "InvestigationInstrument", "InvestigationParameter",
-				"InvestigationUser", "Sample"));
+				"InvestigationUser", "Sample", "SampleParameter"));
 
 		relationships.put("Instrument",
 				new ParentRelationship[] { new ParentRelationship("InvestigationInstrument", "instrument.id",
@@ -88,7 +90,8 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 		relationships.put("ParameterType",
 				new ParentRelationship[] { new ParentRelationship("DatafileParameter", "type.id", "type.name"),
 						new ParentRelationship("DatasetParameter", "type.id", "type.name"),
-						new ParentRelationship("InvestigationParameter", "type.id", "type.name") });
+						new ParentRelationship("InvestigationParameter", "type.id", "type.name"),
+						new ParentRelationship("SampleParameter", "type.id", "type.name") });
 		relationships.put("Investigation",
 				new ParentRelationship[] {
 						new ParentRelationship("Dataset", "investigation.id", "investigation.name",
@@ -101,19 +104,19 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 		genericParser.setAnalyzer(analyzer);
 
 		CharSequence[] datafileFields = { "name", "description", "location", "datafileFormat.name", "visitId",
-				"sample.name", "sample.type.name" };
+				"sample.name", "sample.type.name", "doi" };
 		datafileParser.setAllowLeadingWildcard(true);
 		datafileParser.setAnalyzer(analyzer);
 		datafileParser.setMultiFields(datafileFields);
 
 		CharSequence[] datasetFields = { "name", "description", "sample.name", "sample.type.name", "type.name",
-				"visitId" };
+				"visitId", "doi" };
 		datasetParser.setAllowLeadingWildcard(true);
 		datasetParser.setAnalyzer(analyzer);
 		datasetParser.setMultiFields(datasetFields);
 
 		CharSequence[] investigationFields = { "name", "visitId", "title", "summary", "facility.name",
-				"type.name" };
+				"type.name", "doi" };
 		investigationParser.setAllowLeadingWildcard(true);
 		investigationParser.setAnalyzer(analyzer);
 		investigationParser.setMultiFields(investigationFields);
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index bab4747..b88d775 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -28,6 +28,7 @@
 import javax.json.Json;
 import javax.json.JsonArray;
 import javax.json.JsonException;
+import javax.json.JsonNumber;
 import javax.json.JsonObject;
 import javax.json.JsonObjectBuilder;
 import javax.json.JsonReader;
@@ -478,7 +479,8 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 			updateByRelation(operationBody, false);
 		}
 		if (DocumentMapping.indexedEntities.contains(entityName)) {
-			Document document = parseDocument(operationBody.getJsonObject("doc"));
+			JsonObject documentObject = operationBody.getJsonObject("doc");
+			Document document = parseDocument(documentObject);
 			logger.trace("create {} {}", entityName, document.toString());
 			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 			if (bucket.locked.get()) {
@@ -486,6 +488,77 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 						"Lucene locked for " + entityName);
 			}
 			bucket.addDocument(facetsConfig.build(document));
+			// Special case for filesizes
+			if (entityName.equals("Datafile")) {
+				JsonNumber jsonFileSize = documentObject.getJsonNumber("fileSize");
+				if (jsonFileSize != null) {
+					String datasetId = documentObject.getString("dataset.id", null);
+					String investigationId = documentObject.getString("investigation.id", null);
+					logger.trace("Aggregating {} to {}, {}", jsonFileSize.longValue(), datasetId, investigationId);
+					aggregateFileSize(jsonFileSize.longValueExact(), 0, 1, datasetId, "dataset");
+					aggregateFileSize(jsonFileSize.longValueExact(), 0, 1, investigationId, "investigation");
+				}
+			}
+		}
+	}
+
+	/**
+	 * Changes the fileSize on an entity by the specified amount. This is used to
+	 * aggregate the individual fileSize of Datafiles up to Dataset and
+	 * Investigation sizes.
+	 * 
+	 * @param sizeToAdd      Increases the fileSize of the entity by this much.
+	 *                       Should be 0 for deletes.
+	 * @param sizeToSubtract Decreases the fileSize of the entity by this much.
+	 *                       Should be 0 for creates.
+	 * @param deltaFileCount Changes the file count by this much.
+	 * @param entityId       Icat id of entity to update.
+	 * @param index          Index (entity) to update.
+	 * @throws IOException
+	 */
+	private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFileCount, String entityId, String index)
+			throws IOException {
+		long deltaFileSize = sizeToAdd - sizeToSubtract;
+		if (entityId != null && (deltaFileSize != 0 || deltaFileCount != 0)) {
+			IndexBucket indexBucket = indexBuckets.computeIfAbsent(index, k -> new IndexBucket(k));
+			for (ShardBucket shardBucket : indexBucket.shardList) {
+				shardBucket.commit();
+				IndexSearcher searcher = shardBucket.searcherManager.acquire();
+				Term idTerm = new Term("id", entityId);
+				TopDocs topDocs = searcher.search(new TermQuery(idTerm), 1);
+				if (topDocs.totalHits.value == 1) {
+					int docId = topDocs.scoreDocs[0].doc;
+					Document document = searcher.doc(docId);
+					shardBucket.searcherManager.release(searcher);
+					Set<String> prunedFields = new HashSet<>();
+					List<IndexableField> fieldsToAdd = new ArrayList<>();
+
+					if (deltaFileSize != 0) {
+						prunedFields.add("fileSize");
+						long oldSize = document.getField("fileSize").numericValue().longValue();
+						long newSize = oldSize == -1 ? deltaFileSize: oldSize + deltaFileSize;
+						fieldsToAdd.add(new LongPoint("fileSize", newSize));
+						fieldsToAdd.add(new StoredField("fileSize", newSize));
+						fieldsToAdd.add(new NumericDocValuesField("fileSize", newSize));
+					}
+
+					if (deltaFileCount != 0) {
+						prunedFields.add("fileCount");
+						long oldCount = document.getField("fileCount").numericValue().longValue();
+						long newCount = oldCount + deltaFileCount;
+						fieldsToAdd.add(new LongPoint("fileCount", newCount));
+						fieldsToAdd.add(new StoredField("fileCount", newCount));
+						fieldsToAdd.add(new NumericDocValuesField("fileCount", newCount));
+					}
+
+					Document newDocument = pruneDocument(prunedFields, document);
+					fieldsToAdd.forEach(field -> newDocument.add(field));
+					shardBucket.indexWriter.updateDocument(idTerm, facetsConfig.build(newDocument));
+					shardBucket.commit();
+					break;
+				}
+				shardBucket.searcherManager.release(searcher);
+			}
 		}
 	}
 
@@ -597,8 +670,31 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 							"Lucene locked for " + entityName);
 				}
 				logger.trace("delete {} {}", entityName, icatId);
+				// Special case for filesizes
+				Term term = new Term("id", icatId);
+				if (entityName.equals("Datafile")) {
+					long sizeToSubtract = 0;
+					for (ShardBucket shardBucket : bucket.shardList) {
+						IndexSearcher datafileSearcher = shardBucket.searcherManager.acquire();
+						TopDocs topDocs = datafileSearcher.search(new TermQuery(term), 1);
+						if (topDocs.totalHits.value == 1) {
+							int docId = topDocs.scoreDocs[0].doc;
+							Document datasetDocument = datafileSearcher.doc(docId);
+							sizeToSubtract = datasetDocument.getField("fileSize").numericValue().longValue();
+							if (sizeToSubtract > 0) {
+								String datasetId = datasetDocument.getField("dataset.id").stringValue();
+								String investigationId = datasetDocument.getField("investigation.id").stringValue();
+								aggregateFileSize(0, sizeToSubtract, -1, datasetId, "dataset");
+								aggregateFileSize(0, sizeToSubtract, -1, investigationId, "investigation");
+							}
+							shardBucket.searcherManager.release(datafileSearcher);
+							break;
+						}
+						shardBucket.searcherManager.release(datafileSearcher);
+					}
+				}
 				for (ShardBucket shardBucket : bucket.shardList) {
-					shardBucket.indexWriter.deleteDocuments(new Term("id", icatId));
+					shardBucket.indexWriter.deleteDocuments(term);
 				}
 			} catch (IOException e) {
 				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
@@ -1114,10 +1210,18 @@ private String luceneSearchResult(String name, SearchBucket search, String searc
 									: sortField.getType();
 							switch (type) {
 								case LONG:
-									gen.write(indexableField.numericValue().longValue());
+									if (indexableField.numericValue() != null) {
+										gen.write(indexableField.numericValue().longValue());
+									} else if (indexableField.stringValue() != null) {
+										gen.write(new Long(indexableField.stringValue()));
+									}
 									break;
 								case DOUBLE:
-									gen.write(indexableField.numericValue().doubleValue());
+									if (indexableField.numericValue() != null) {
+										gen.write(indexableField.numericValue().doubleValue());
+									} else if (indexableField.stringValue() != null) {
+										gen.write(new Double(indexableField.stringValue()));
+									}
 									break;
 								case STRING:
 									gen.write(indexableField.stringValue());
@@ -1237,28 +1341,75 @@ private void addField(JsonObject json, Document document, String key) {
 		// Whenever the units are set or changed, convert to SI
 		if (key.equals("type.units")) {
 			String unitString = json.getString("type.units");
-			IndexableField field = document.getField("numericValue");
-			double value;
-			if (field != null) {
-				value = NumericUtils.sortableLongToDouble(field.numericValue().longValue());
-			} else if (json.containsKey("numericValue")) {
-				value = json.getJsonNumber(key).doubleValue();
-			} else {
-				// Strings and date/time values also have units, so if we aren't dealing with a
-				// number don't convert
-				return;
-			}
-			logger.trace("Attempting to convert {} {}", value, unitString);
-			SystemValue systemValue = icatUnits.new SystemValue(value, unitString);
-			if (systemValue.units != null) {
-				document.add(new StringField("type.unitsSI", systemValue.units, Store.YES));
-			}
-			if (systemValue.value != null) {
-				document.add(new DoublePoint("numericValueSI", systemValue.value));
-				document.add(new StoredField("numericValueSI", systemValue.value));
-				long sortableLong = NumericUtils.doubleToSortableLong(systemValue.value);
-				document.add(new NumericDocValuesField("numericValueSI", sortableLong));
-			}
+			convertValue(document, json, unitString, "numericValue");
+			convertValue(document, json, unitString, "rangeTop");
+			convertValue(document, json, unitString, "rangeBottom");
+		}
+	}
+
+	/**
+	 * Attempts to convert numericFieldName from json into SI units from its recorded unitString, and then add it to the Lucene document.
+	 * 
+	 * @param document Lucene Document to add the field to.
+	 * @param json     JsonObject containing the field/value pairs to be added.
+	 * @param unitString Units of the value to be converted.
+	 * @param numericFieldName Name (key) of the field to convert and add.
+	 */
+	private void convertValue(Document document, JsonObject json, String unitString, String numericFieldName) {
+		IndexableField field = document.getField(numericFieldName);
+		double value;
+		if (field != null) {
+			value = NumericUtils.sortableLongToDouble(field.numericValue().longValue());
+		} else if (json.containsKey(numericFieldName)) {
+			value = json.getJsonNumber(numericFieldName).doubleValue();
+		} else {
+			// If we aren't dealing with the desired numeric field don't convert
+			return;
+		}
+		logger.trace("Attempting to convert {} {}", value, unitString);
+		SystemValue systemValue = icatUnits.new SystemValue(value, unitString);
+		if (systemValue.units != null) {
+			document.add(new StringField("type.unitsSI", systemValue.units, Store.YES));
+		}
+		if (systemValue.value != null) {
+			document.add(new DoublePoint(numericFieldName + "SI", systemValue.value));
+			document.add(new StoredField(numericFieldName + "SI", systemValue.value));
+			long sortableLong = NumericUtils.doubleToSortableLong(systemValue.value);
+			document.add(new NumericDocValuesField(numericFieldName + "SI", sortableLong));
+		}
+	}
+
+	/**
+	 * Adds field to document taking its typing, sorting and faceting into account.
+	 * 
+	 * @param field Lucene IndexableField to add to the document.
+	 * @param document Lucene Document to add the field to.
+	 */
+	private void addField(IndexableField field, Document document) {
+		// SortedDocValuesField need to be indexed in addition to indexing a Field for
+		// searching/storing, so deal with that first
+		addSortField(field, document);
+		String key = field.name();
+
+		// Likewise, faceted fields should be considered separately
+		if (DocumentMapping.facetFields.contains(key)) {
+			String value = field.stringValue();
+			document.add(new SortedSetDocValuesFacetField(key + ".keyword", value));
+			document.add(new StringField(key + ".keyword", value, Store.NO));
+		}
+
+		if (DocumentMapping.doubleFields.contains(key)) {
+			Double value = field.numericValue().doubleValue();
+			document.add(new DoublePoint(key, value));
+			document.add(new StoredField(key, value));
+		} else if (DocumentMapping.longFields.contains(key)) {
+			Long value = field.numericValue().longValue();
+			document.add(new LongPoint(key, value));
+			document.add(new StoredField(key, value));
+		} else if (DocumentMapping.textFields.contains(key)) {
+			document.add(new TextField(key, field.stringValue(), Store.YES));
+		} else {
+			document.add(new StringField(key, field.stringValue(), Store.YES));
 		}
 	}
 
@@ -1346,7 +1497,7 @@ private Document updateDocument(JsonObject json, Document oldDocument) {
 	 * Returns a new Lucene Document that has the same fields as were present in
 	 * oldDocument, except those provided as an argument to prune.
 	 * 
-	 * @param fields These fields will not
+	 * @param fields      These fields will not
 	 *                    be present in the returned Document.
 	 * @param oldDocument Lucene Document to be pruned.
 	 * @return Lucene Document with pruned fields.
@@ -1355,8 +1506,7 @@ private Document pruneDocument(Set<String> fields, Document oldDocument) {
 		Document newDocument = new Document();
 		for (IndexableField field : oldDocument.getFields()) {
 			if (!fields.contains(field.name())) {
-				addSortField(field, newDocument);
-				newDocument.add(field);
+				addField(field, newDocument);
 			}
 		}
 		return newDocument;
@@ -1405,12 +1555,36 @@ private void update(JsonObject operationBody) throws LuceneException, NumberForm
 		}
 		if (DocumentMapping.indexedEntities.contains(entityName)) {
 			String icatId = operationBody.getString("_id");
-			Document document = parseDocument(operationBody.getJsonObject("doc"));
+			JsonObject documentObject = operationBody.getJsonObject("doc");
+			Document document = parseDocument(documentObject);
 			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 			if (bucket.locked.get()) {
 				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
 						"Lucene locked for " + entityName);
 			}
+			// Special case for filesizes
+			if (entityName.equals("Datafile")) {
+				JsonNumber jsonFileSize = documentObject.getJsonNumber("fileSize");
+				if (jsonFileSize != null) {
+					long sizeToSubtract = 0;
+					List<IndexSearcher> datafileSearchers = bucket.acquireSearchers();
+					for (IndexSearcher datafileSearcher : datafileSearchers) {
+						TopDocs topDocs = datafileSearcher.search(new TermQuery(new Term("id", icatId)), 1);
+						if (topDocs.totalHits.value == 1) {
+							int docId = topDocs.scoreDocs[0].doc;
+							Document datasetDocument = datafileSearcher.doc(docId);
+							sizeToSubtract = datasetDocument.getField("fileSize").numericValue().longValue();
+							if (jsonFileSize.longValueExact() != sizeToSubtract) {
+								String datasetId = documentObject.getString("dataset.id", null);
+								String investigationId = documentObject.getString("investigation.id", null);
+								aggregateFileSize(jsonFileSize.longValueExact(), sizeToSubtract, 0, datasetId, "dataset");
+								aggregateFileSize(jsonFileSize.longValueExact(), sizeToSubtract, 0, investigationId, "investigation");
+							}
+							break;
+						}
+					}
+				}
+			}
 			logger.trace("update: {}", document);
 			bucket.updateDocument(new Term("id", icatId), facetsConfig.build(document));
 		}
diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
index 35afa91..57c6fa4 100644
--- a/src/main/java/org/icatproject/lucene/SearchBucket.java
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -108,7 +108,7 @@ public SearchBucket(Lucene lucene, SearchType searchType, HttpServletRequest req
         try (JsonReader r = Json.createReader(request.getInputStream())) {
             JsonObject o = r.readObject();
             parseFields(o);
-            parseDimensions(o); // Don't need for DF
+            parseDimensions(o);
             JsonObject jsonQuery = o.getJsonObject("query");
             BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
             String userName;
@@ -300,8 +300,14 @@ private void buildFilterQueries(String target, JsonObject requestedQuery, Builde
                     // BUT only if we haven't already nested the queries (as we do when the key was
                     // just a nested entity)
                     IndexSearcher nestedSearcher = lucene.getSearcher(searcherMap, filterTarget);
-                    Query nestedQuery = JoinUtil.createJoinQuery(target + ".id", false, "id", dimensionQuery,
-                            nestedSearcher, ScoreMode.None);
+                    Query nestedQuery;
+                    if (filterTarget.equals("sample") && !target.equals("investigation")) {
+                        nestedQuery = JoinUtil.createJoinQuery("sample.id", false, "sample.id", dimensionQuery,
+                                nestedSearcher, ScoreMode.None);
+                    } else {
+                        nestedQuery = JoinUtil.createJoinQuery(target + ".id", false, "id", dimensionQuery,
+                                nestedSearcher, ScoreMode.None);
+                    }
                     queryBuilder.add(nestedQuery, Occur.FILTER);
                 } else {
                     // Otherwise, just add as SHOULD to the main query directly
@@ -343,12 +349,26 @@ private Query parseFilter(String target, String fld, JsonValue value) throws IOE
                         if (nestedFilter.containsKey("value")) {
                             TermQuery query = new TermQuery(new Term(nestedField + ".keyword", nestedFilter.getString("value")));
                             nestedBoolBuilder.add(query, Occur.FILTER);
+                        } else if (nestedFilter.containsKey("exact")) {
+                            buildNestedExactQuery(nestedField, nestedFilter, nestedBoolBuilder);
                         } else {
                             buildNestedRangeQuery(nestedField, nestedFilter, nestedBoolBuilder);
                         }
                     });
-                    return JoinUtil.createJoinQuery(target + ".id", false, "id", nestedBoolBuilder.build(),
-                            nestedSearcher, ScoreMode.None);
+                    if (fld.contains("sample") && !target.equals("investigation")) {
+                        // Datasets and Datafiles join by sample.id on both fields
+                        return JoinUtil.createJoinQuery("sample.id", false, "sample.id", nestedBoolBuilder.build(),
+                                nestedSearcher, ScoreMode.None);
+                    } else if (fld.equals("sampleparameter") && target.equals("investigation")) {
+                        Query sampleQuery = JoinUtil.createJoinQuery("sample.id", false, "sample.id", nestedBoolBuilder.build(),
+                                nestedSearcher, ScoreMode.None);
+                        Query investigationQuery = JoinUtil.createJoinQuery("sample.investigation.id", false, "id", sampleQuery,
+                                lucene.getSearcher(searcherMap, "sample"), ScoreMode.None);
+                        return investigationQuery;
+                    } else {
+                        return JoinUtil.createJoinQuery(target + ".id", false, "id", nestedBoolBuilder.build(),
+                                nestedSearcher, ScoreMode.None);
+                    }
                 } else {
                     // Single range of values for a field
                     JsonNumber from = valueObject.getJsonNumber("from");
@@ -366,6 +386,52 @@ private Query parseFilter(String target, String fld, JsonValue value) throws IOE
         }
     }
 
+    /**
+     * Builds an exact numeric query, intended for use with numeric or date/time parameters.
+     * 
+     * @param fld         Name of the field to apply the range to.
+     * @param valueObject JsonObject containing "exact", and optionally "units"
+     *                    as keys for an exact value.
+     * @param builder     BooleanQuery.Builder for the nested query
+     */
+    private void buildNestedExactQuery(String fld, JsonObject valueObject, BooleanQuery.Builder builder) {
+        if (DocumentMapping.longFields.contains(fld)) {
+            long exact = valueObject.getJsonNumber("exact").longValueExact();
+            builder.add(LongPoint.newExactQuery(fld, exact), Occur.FILTER);
+        } else {
+            Builder rangeBuilder = new BooleanQuery.Builder();
+            Builder exactOrRangeBuilder = new BooleanQuery.Builder();
+            double exact = valueObject.getJsonNumber("exact").doubleValue();
+            String units = valueObject.getString("units", null);
+            if (units != null) {
+                SystemValue exactValue = lucene.icatUnits.new SystemValue(exact, units);
+                if (exactValue.value != null ) {
+                    // If we were able to parse the units, apply query to the SI value
+                    rangeBuilder.add(DoublePoint.newRangeQuery("rangeTopSI", exactValue.value, Double.POSITIVE_INFINITY), Occur.FILTER);
+                    rangeBuilder.add(DoublePoint.newRangeQuery("rangeBottomSI", Double.NEGATIVE_INFINITY, exactValue.value), Occur.FILTER);
+                    exactOrRangeBuilder.add(rangeBuilder.build(), Occur.SHOULD);
+                    exactOrRangeBuilder.add(DoublePoint.newExactQuery(fld + "SI", exactValue.value), Occur.SHOULD);
+                    builder.add(exactOrRangeBuilder.build(), Occur.FILTER);
+                } else {
+                    // If units could not be parsed, make them part of the query on the raw data
+                    rangeBuilder.add(DoublePoint.newRangeQuery("rangeTop", exact, Double.POSITIVE_INFINITY), Occur.FILTER);
+                    rangeBuilder.add(DoublePoint.newRangeQuery("rangeBottom", Double.NEGATIVE_INFINITY, exact), Occur.FILTER);
+                    exactOrRangeBuilder.add(rangeBuilder.build(), Occur.SHOULD);
+                    exactOrRangeBuilder.add(DoublePoint.newExactQuery(fld, exact), Occur.SHOULD);
+                    builder.add(exactOrRangeBuilder.build(), Occur.FILTER);
+                    builder.add(new TermQuery(new Term("type.units", units)), Occur.FILTER);
+                }
+            } else {
+                // If units were not provided, just apply to the raw data
+                rangeBuilder.add(DoublePoint.newRangeQuery("rangeTop", exact, Double.POSITIVE_INFINITY), Occur.FILTER);
+                rangeBuilder.add(DoublePoint.newRangeQuery("rangeBottom", Double.NEGATIVE_INFINITY, exact), Occur.FILTER);
+                exactOrRangeBuilder.add(rangeBuilder.build(), Occur.SHOULD);
+                exactOrRangeBuilder.add(DoublePoint.newExactQuery(fld, exact), Occur.SHOULD);
+                builder.add(exactOrRangeBuilder.build(), Occur.FILTER);
+            }
+        }
+    }
+
     /**
      * Builds a range query, intended for use with numeric or date/time parameters.
      * 
@@ -402,7 +468,7 @@ private void buildNestedRangeQuery(String fld, JsonObject valueObject, BooleanQu
                 } else {
                     // If units could not be parsed, make them part of the query on the raw data
                     builder.add(DoublePoint.newRangeQuery(fld, from, to), Occur.FILTER);
-                    builder.add(new TermQuery(new Term("type.units.keyword", units)), Occur.FILTER);
+                    builder.add(new TermQuery(new Term("type.units", units)), Occur.FILTER);
                 }
             } else {
                 // If units were not provided, just apply to the raw data

From ce51e33f0763fda21a830efb093e7bb04899740b Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Tue, 2 Aug 2022 14:44:42 +0000
Subject: [PATCH 49/73] Add utility to lock #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 67 +++++++++++++------
 src/main/resources/run.properties             | 11 +--
 2 files changed, 51 insertions(+), 27 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index b88d775..6d7f902 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -71,6 +71,7 @@
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.SearcherManager;
 import org.apache.lucene.search.Sort;
@@ -331,6 +332,7 @@ public void releaseSearchers(List<IndexSearcher> subSearchers) throws IOExceptio
 	private int luceneCommitMillis;
 	private Long luceneMaxShardSize;
 	private long maxSearchTimeSeconds;
+	private boolean aggregateFiles;
 
 	private AtomicLong bucketNum = new AtomicLong();
 	private Map<String, IndexBucket> indexBuckets = new ConcurrentHashMap<>();
@@ -408,11 +410,12 @@ public void addNow(@Context HttpServletRequest request, @PathParam("entityName")
 			for (JsonObject document : documents) {
 				createNow(entityName, document);
 			}
-		} catch (IOException | JsonException e) {
-
+		} catch (JsonException e) {
 			logger.error("Could not parse JSON from {}", value.toString());
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-		}
+		} catch (IOException e) {
+			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+		} 
 		logger.debug("Added {} {} documents", documents.size(), entityName);
 	}
 
@@ -489,7 +492,7 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 			}
 			bucket.addDocument(facetsConfig.build(document));
 			// Special case for filesizes
-			if (entityName.equals("Datafile")) {
+			if (aggregateFiles && entityName.equals("Datafile")) {
 				JsonNumber jsonFileSize = documentObject.getJsonNumber("fileSize");
 				if (jsonFileSize != null) {
 					String datasetId = documentObject.getString("dataset.id", null);
@@ -573,10 +576,6 @@ private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFi
 	 */
 	private void createNow(String entityName, JsonObject documentJson)
 			throws NumberFormatException, IOException, LuceneException {
-		if (!documentJson.containsKey("id")) {
-			throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-					"id was not in the document keys " + documentJson.keySet());
-		}
 		Document document = parseDocument(documentJson);
 		logger.trace("create {} {}", entityName, document.toString());
 		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
@@ -672,7 +671,7 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 				logger.trace("delete {} {}", entityName, icatId);
 				// Special case for filesizes
 				Term term = new Term("id", icatId);
-				if (entityName.equals("Datafile")) {
+				if (aggregateFiles && entityName.equals("Datafile")) {
 					long sizeToSubtract = 0;
 					for (ShardBucket shardBucket : bucket.shardList) {
 						IndexSearcher datafileSearcher = shardBucket.searcherManager.acquire();
@@ -741,13 +740,11 @@ private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, In
 					parentId = document.get("id");
 				} else {
 					parentId = document.get("investigation.id");
-					logger.debug("investigation.id {}", parentId);
 				}
 			} else {
 				fld = entityName.toLowerCase() + ".id";
 				parentId = document.get("id");
 			}
-			logger.debug("fld {}, parentId {}", fld, parentId);
 			joinedSearch.query = new TermQuery(new Term(fld, parentId));
 			joinedSearch.sort = new Sort(new SortedNumericSortField("id.long", Type.LONG));
 			TopFieldDocs topFieldDocs = searchShards(joinedSearch, 100, shards, null);
@@ -923,6 +920,7 @@ private void init() {
 			luceneMaxShardSize = Math.max(props.getPositiveLong("maxShardSize"), new Long(Integer.MAX_VALUE + 1));
 			maxSearchTimeSeconds = props.has("maxSearchTimeSeconds") ? props.getPositiveLong("maxSearchTimeSeconds")
 					: 5;
+			aggregateFiles = props.getBoolean("aggregateFiles", false);
 
 			timer = new Timer("LuceneCommitTimer");
 			timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
@@ -985,26 +983,51 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 	}
 
 	/**
-	 * Locks the specified index for population, removing all existing documents and
+	 * Locks the specified index for population, optionally removing all existing documents and
 	 * preventing normal modify operations until the index is unlocked.
 	 * 
 	 * @param entityName Name of the entity/index to lock.
+	 * @param request    Incoming request. In order to delete all existing documents, the accompanying Json should specify <code>{"delete": true}</code>.
 	 * @throws LuceneException If already locked, or if there's an IOException when
 	 *                         deleting documents.
 	 */
 	@POST
 	@Path("lock/{entityName}")
-	public void lock(@PathParam("entityName") String entityName) throws LuceneException {
-		logger.info("Requesting lock of {} index", entityName);
-		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
+	@Consumes(MediaType.APPLICATION_JSON)
+	@Produces(MediaType.APPLICATION_JSON)
+	public String lock(@PathParam("entityName") String entityName, @Context HttpServletRequest request) throws LuceneException {
+		try (JsonReader reader = Json.createReader(request.getInputStream())) {
+			boolean delete = reader.readObject().getBoolean("delete", false);
+			logger.info("Requesting lock of {} index, delete={}", entityName, delete);
+			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 
-		if (!bucket.locked.compareAndSet(false, true)) {
-			throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE, "Lucene already locked for " + entityName);
-		}
-		try {
-			for (ShardBucket shardBucket : bucket.shardList) {
-				shardBucket.indexWriter.deleteAll();
+			if (!bucket.locked.compareAndSet(false, true)) {
+				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE, "Lucene already locked for " + entityName);
+			}
+			JsonObjectBuilder builder = Json.createObjectBuilder();
+			if (delete) {
+				for (ShardBucket shardBucket : bucket.shardList) {
+					shardBucket.indexWriter.deleteAll();
+				}
+				// Reset the shardList so we reset the routing
+				bucket.shardList = Arrays.asList(bucket.shardList.get(0));
+				return builder.add("currentId", 0).build().toString();
+			}
+			SearchBucket searchBucket = new SearchBucket(this);
+			searchBucket.query = new MatchAllDocsQuery();
+			searchBucket.fields.add("id");
+			searchBucket.scored = false;
+			searchBucket.sort = new Sort(new SortedNumericSortField("id.long", Type.LONG, true));
+			TopFieldDocs topFieldDocs = searchShards(searchBucket, 1, bucket.shardList, null);
+			if (topFieldDocs.totalHits.value == 0) {
+				return builder.add("currentId", 0).build().toString();
 			}
+			int shardIndex = topFieldDocs.scoreDocs[0].shardIndex;
+			int doc = topFieldDocs.scoreDocs[0].doc;
+			IndexSearcher searcher = bucket.shardList.get(shardIndex).searcherManager.acquire();
+			String id = searcher.doc(doc).get("id");
+			bucket.shardList.get(shardIndex).searcherManager.release(searcher);
+			return builder.add("currentId", new Long(id)).build().toString();
 		} catch (IOException e) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
@@ -1563,7 +1586,7 @@ private void update(JsonObject operationBody) throws LuceneException, NumberForm
 						"Lucene locked for " + entityName);
 			}
 			// Special case for filesizes
-			if (entityName.equals("Datafile")) {
+			if (aggregateFiles && entityName.equals("Datafile")) {
 				JsonNumber jsonFileSize = documentObject.getJsonNumber("fileSize");
 				if (jsonFileSize != null) {
 					long sizeToSubtract = 0;
diff --git a/src/main/resources/run.properties b/src/main/resources/run.properties
index 99fcae0..7189854 100644
--- a/src/main/resources/run.properties
+++ b/src/main/resources/run.properties
@@ -1,8 +1,9 @@
 # Real comments in this file are marked with '#' whereas commented out lines
 # are marked with '!'
 
-directory     = ${HOME}/data/lucene
-commitSeconds = 5
-maxShardSize  = 2147483648
-ip            = 127.0.0.1/32
-units = J: eV 1.602176634e-19; \u2103: celsius, degC; K: kelvin
+directory      = ${HOME}/data/lucene
+commitSeconds  = 5
+maxShardSize   = 2147483648
+ip             = 127.0.0.1/32
+units          = J: eV 1.602176634e-19; \u2103: celsius, degC; K: kelvin
+aggregateFiles = false

From 5f59e1dfc936da1aaefdcab4ea6d0d51958da723 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Sun, 24 Jul 2022 08:03:56 +0100
Subject: [PATCH 50/73] Formatting changes #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 48 +++++++++++--------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 6d7f902..d0dc91e 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -415,7 +415,7 @@ public void addNow(@Context HttpServletRequest request, @PathParam("entityName")
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		} catch (IOException e) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-		} 
+		}
 		logger.debug("Added {} {} documents", documents.size(), entityName);
 	}
 
@@ -519,7 +519,8 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 	 * @param index          Index (entity) to update.
 	 * @throws IOException
 	 */
-	private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFileCount, String entityId, String index)
+	private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFileCount, String entityId,
+			String index)
 			throws IOException {
 		long deltaFileSize = sizeToAdd - sizeToSubtract;
 		if (entityId != null && (deltaFileSize != 0 || deltaFileCount != 0)) {
@@ -539,7 +540,7 @@ private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFi
 					if (deltaFileSize != 0) {
 						prunedFields.add("fileSize");
 						long oldSize = document.getField("fileSize").numericValue().longValue();
-						long newSize = oldSize == -1 ? deltaFileSize: oldSize + deltaFileSize;
+						long newSize = oldSize + deltaFileSize;
 						fieldsToAdd.add(new LongPoint("fileSize", newSize));
 						fieldsToAdd.add(new StoredField("fileSize", newSize));
 						fieldsToAdd.add(new NumericDocValuesField("fileSize", newSize));
@@ -935,9 +936,10 @@ private void init() {
 			throw new IllegalStateException(e.getMessage());
 		}
 
-		logger.info(
-				"Initialised icat.lucene with directory {}, commitSeconds {}, maxShardSize {}, shardedIndices {}, maxSearchTimeSeconds {}",
-				luceneDirectory, luceneCommitMillis, luceneMaxShardSize, shardedIndices, maxSearchTimeSeconds);
+		String format = "Initialised icat.lucene with directory {}, commitSeconds {}, maxShardSize {}, "
+				+ "shardedIndices {}, maxSearchTimeSeconds {}, aggregateFiles {}";
+		logger.info(format, luceneDirectory, luceneCommitMillis, luceneMaxShardSize, shardedIndices,
+				maxSearchTimeSeconds, aggregateFiles);
 	}
 
 	class CommitTimerTask extends TimerTask {
@@ -983,11 +985,14 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 	}
 
 	/**
-	 * Locks the specified index for population, optionally removing all existing documents and
-	 * preventing normal modify operations until the index is unlocked.
+	 * Locks the specified index for population, optionally removing all existing
+	 * documents and preventing normal modify operations until the index is
+	 * unlocked.
 	 * 
 	 * @param entityName Name of the entity/index to lock.
-	 * @param request    Incoming request. In order to delete all existing documents, the accompanying Json should specify <code>{"delete": true}</code>.
+	 * @param request    Incoming request. In order to delete all existing
+	 *                   documents, the accompanying Json should specify
+	 *                   <code>{"delete": true}</code>.
 	 * @throws LuceneException If already locked, or if there's an IOException when
 	 *                         deleting documents.
 	 */
@@ -995,14 +1000,16 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 	@Path("lock/{entityName}")
 	@Consumes(MediaType.APPLICATION_JSON)
 	@Produces(MediaType.APPLICATION_JSON)
-	public String lock(@PathParam("entityName") String entityName, @Context HttpServletRequest request) throws LuceneException {
+	public String lock(@PathParam("entityName") String entityName, @Context HttpServletRequest request)
+			throws LuceneException {
 		try (JsonReader reader = Json.createReader(request.getInputStream())) {
 			boolean delete = reader.readObject().getBoolean("delete", false);
 			logger.info("Requesting lock of {} index, delete={}", entityName, delete);
 			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 
 			if (!bucket.locked.compareAndSet(false, true)) {
-				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE, "Lucene already locked for " + entityName);
+				String message = "Lucene already locked for " + entityName;
+				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE, message);
 			}
 			JsonObjectBuilder builder = Json.createObjectBuilder();
 			if (delete) {
@@ -1371,11 +1378,13 @@ private void addField(JsonObject json, Document document, String key) {
 	}
 
 	/**
-	 * Attempts to convert numericFieldName from json into SI units from its recorded unitString, and then add it to the Lucene document.
+	 * Attempts to convert numericFieldName from json into SI units from its
+	 * recorded unitString, and then add it to the Lucene document.
 	 * 
-	 * @param document Lucene Document to add the field to.
-	 * @param json     JsonObject containing the field/value pairs to be added.
-	 * @param unitString Units of the value to be converted.
+	 * @param document         Lucene Document to add the field to.
+	 * @param json             JsonObject containing the field/value pairs to be
+	 *                         added.
+	 * @param unitString       Units of the value to be converted.
 	 * @param numericFieldName Name (key) of the field to convert and add.
 	 */
 	private void convertValue(Document document, JsonObject json, String unitString, String numericFieldName) {
@@ -1405,7 +1414,7 @@ private void convertValue(Document document, JsonObject json, String unitString,
 	/**
 	 * Adds field to document taking its typing, sorting and faceting into account.
 	 * 
-	 * @param field Lucene IndexableField to add to the document.
+	 * @param field    Lucene IndexableField to add to the document.
 	 * @param document Lucene Document to add the field to.
 	 */
 	private void addField(IndexableField field, Document document) {
@@ -1597,11 +1606,12 @@ private void update(JsonObject operationBody) throws LuceneException, NumberForm
 							int docId = topDocs.scoreDocs[0].doc;
 							Document datasetDocument = datafileSearcher.doc(docId);
 							sizeToSubtract = datasetDocument.getField("fileSize").numericValue().longValue();
-							if (jsonFileSize.longValueExact() != sizeToSubtract) {
+							long sizeToAdd = jsonFileSize.longValueExact();
+							if (sizeToAdd != sizeToSubtract) {
 								String datasetId = documentObject.getString("dataset.id", null);
 								String investigationId = documentObject.getString("investigation.id", null);
-								aggregateFileSize(jsonFileSize.longValueExact(), sizeToSubtract, 0, datasetId, "dataset");
-								aggregateFileSize(jsonFileSize.longValueExact(), sizeToSubtract, 0, investigationId, "investigation");
+								aggregateFileSize(sizeToAdd, sizeToSubtract, 0, datasetId, "dataset");
+								aggregateFileSize(sizeToAdd, sizeToSubtract, 0, investigationId, "investigation");
 							}
 							break;
 						}

From 902654bce4bf493998db0112d8c490b9616546e9 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 5 Aug 2022 08:23:33 +0000
Subject: [PATCH 51/73] Improved timeout and search syntax errors #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 248 ++++++++++--------
 .../org/icatproject/lucene/SearchBucket.java  |   5 +-
 2 files changed, 141 insertions(+), 112 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index d0dc91e..0fbaa5f 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -69,21 +69,23 @@
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.SearcherManager;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SortField.Type;
+import org.apache.lucene.search.TimeLimitingCollector.TimeExceededException;
 import org.apache.lucene.search.SortedNumericSortField;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TimeLimitingCollector;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldCollector;
 import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.NumericUtils;
 import org.icatproject.lucene.SearchBucket.SearchType;
 import org.icatproject.lucene.exceptions.LuceneException;
@@ -610,6 +612,9 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("search
 		} catch (Exception e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
+			if (e instanceof LuceneException) {
+				throw (LuceneException) e;
+			}
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
 	}
@@ -642,6 +647,9 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("search_
 		} catch (Exception e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
+			if (e instanceof LuceneException) {
+				throw (LuceneException) e;
+			}
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
 
@@ -748,11 +756,12 @@ private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, In
 			}
 			joinedSearch.query = new TermQuery(new Term(fld, parentId));
 			joinedSearch.sort = new Sort(new SortedNumericSortField("id.long", Type.LONG));
-			TopFieldDocs topFieldDocs = searchShards(joinedSearch, 100, shards, null);
+			TopFieldDocs topFieldDocs = searchShards(joinedSearch, 100, shards);
 			gen.writeStartArray(joinedEntityName.toLowerCase());
 			for (ScoreDoc joinedHit : topFieldDocs.scoreDocs) {
 				gen.writeStartObject();
-				Document joinedDocument = searchers.get(joinedHit.shardIndex).doc(joinedHit.doc);
+				int joinedShardIndex = joinedHit.shardIndex > 0 ? joinedHit.shardIndex : 0;
+				Document joinedDocument = searchers.get(joinedShardIndex).doc(joinedHit.doc);
 				joinedDocument.forEach(encodeField(gen, search.joinedFields.get(joinedEntityName)));
 				gen.writeEnd();
 			}
@@ -827,6 +836,9 @@ public String facet(@PathParam("entityName") String entityName, @Context HttpSer
 			return luceneFacetResult(entityName, search, searchAfter, maxResults, maxLabels);
 		} catch (Exception e) {
 			freeSearcher(uid);
+			if (e instanceof LuceneException) {
+				throw (LuceneException) e;
+			}
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
 	}
@@ -838,7 +850,7 @@ public String facet(@PathParam("entityName") String entityName, @Context HttpSer
 	 * @throws LuceneException
 	 */
 	public void freeSearcher(Long uid) throws LuceneException {
-		if (uid != null) { // May not be set for internal calls
+		if (uid != null && searches.containsKey(uid)) { // May not be set for internal calls
 			Map<String, List<IndexSearcher>> search = searches.get(uid).searcherMap;
 			for (Entry<String, List<IndexSearcher>> entry : search.entrySet()) {
 				String name = entry.getKey();
@@ -980,6 +992,9 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 		} catch (Exception e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
+			if (e instanceof LuceneException) {
+				throw (LuceneException) e;
+			}
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
 	}
@@ -1020,20 +1035,11 @@ public String lock(@PathParam("entityName") String entityName, @Context HttpServ
 				bucket.shardList = Arrays.asList(bucket.shardList.get(0));
 				return builder.add("currentId", 0).build().toString();
 			}
-			SearchBucket searchBucket = new SearchBucket(this);
-			searchBucket.query = new MatchAllDocsQuery();
-			searchBucket.fields.add("id");
-			searchBucket.scored = false;
-			searchBucket.sort = new Sort(new SortedNumericSortField("id.long", Type.LONG, true));
-			TopFieldDocs topFieldDocs = searchShards(searchBucket, 1, bucket.shardList, null);
-			if (topFieldDocs.totalHits.value == 0) {
-				return builder.add("currentId", 0).build().toString();
-			}
-			int shardIndex = topFieldDocs.scoreDocs[0].shardIndex;
-			int doc = topFieldDocs.scoreDocs[0].doc;
-			IndexSearcher searcher = bucket.shardList.get(shardIndex).searcherManager.acquire();
-			String id = searcher.doc(doc).get("id");
-			bucket.shardList.get(shardIndex).searcherManager.release(searcher);
+			ShardBucket shardBucket = bucket.routeShard();
+			int docCount = shardBucket.documentCount.intValue();
+			IndexSearcher searcher = shardBucket.searcherManager.acquire();
+			String id = searcher.doc(docCount - 1).get("id");
+			shardBucket.searcherManager.release(searcher);
 			return builder.add("currentId", new Long(id)).build().toString();
 		} catch (IOException e) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
@@ -1193,7 +1199,7 @@ private String luceneSearchResult(String name, SearchBucket search, String searc
 		List<ShardBucket> shards = getShards(name);
 		String format = "Search {} with: query {}, maxResults {}, searchAfter {}, scored {}, fields {}";
 		logger.debug(format, name, search.query, maxResults, searchAfter, search.scored, search.fields);
-		TopFieldDocs topFieldDocs = searchShards(search, maxResults, shards, search.searchAfter);
+		TopFieldDocs topFieldDocs = searchShards(search, maxResults, shards);
 		ScoreDoc[] hits = topFieldDocs.scoreDocs;
 		TotalHits totalHits = topFieldDocs.totalHits;
 		SortField[] fields = topFieldDocs.fields;
@@ -1205,121 +1211,141 @@ private String luceneSearchResult(String name, SearchBucket search, String searc
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
 			gen.writeStartObject();
-			gen.write("aborted", search.aborted);
-			if (!search.aborted) {
-				gen.writeStartArray("results");
-				for (ScoreDoc hit : hits) {
-					encodeResult(name, gen, hit, searchers.get(hit.shardIndex), search);
+			gen.writeStartArray("results");
+			for (ScoreDoc hit : hits) {
+				encodeResult(name, gen, hit, searchers.get(hit.shardIndex), search);
+			}
+			gen.writeEnd(); // array results
+			if (hits.length == maxResults) {
+				ScoreDoc lastDoc = hits[hits.length - 1];
+				gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex",
+						lastDoc.shardIndex);
+				float lastScore = lastDoc.score;
+				if (!Float.isNaN(lastScore)) {
+					gen.write("score", lastScore);
 				}
-				gen.writeEnd(); // array results
-				if (hits.length == maxResults) {
-					ScoreDoc lastDoc = hits[hits.length - 1];
-					gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex",
-							lastDoc.shardIndex);
-					float lastScore = lastDoc.score;
-					if (!Float.isNaN(lastScore)) {
-						gen.write("score", lastScore);
-					}
-					if (fields != null) {
-						Document lastDocument = searchers.get(lastDoc.shardIndex).doc(lastDoc.doc);
-						gen.writeStartArray("fields");
-						for (SortField sortField : fields) {
-							String fieldName = sortField.getField();
-							if (fieldName == null) {
-								// SCORE sorting will have a null fieldName
+				if (fields != null) {
+					Document lastDocument = searchers.get(lastDoc.shardIndex).doc(lastDoc.doc);
+					gen.writeStartArray("fields");
+					for (SortField sortField : fields) {
+						String fieldName = sortField.getField();
+						if (fieldName == null) {
+							// SCORE sorting will have a null fieldName
+							if (Float.isFinite(lastDoc.score)) {
 								gen.write(lastDoc.score);
-								continue;
-							}
-							IndexableField indexableField = lastDocument.getField(fieldName);
-							if (indexableField == null) {
-								throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field " + fieldName
-										+ " used for sorting was not present on the Lucene Document; all sortable fields must also be stored.");
-							}
-							Type type = (sortField instanceof SortedNumericSortField)
-									? ((SortedNumericSortField) sortField).getNumericType()
-									: sortField.getType();
-							switch (type) {
-								case LONG:
-									if (indexableField.numericValue() != null) {
-										gen.write(indexableField.numericValue().longValue());
-									} else if (indexableField.stringValue() != null) {
-										gen.write(new Long(indexableField.stringValue()));
-									}
-									break;
-								case DOUBLE:
-									if (indexableField.numericValue() != null) {
-										gen.write(indexableField.numericValue().doubleValue());
-									} else if (indexableField.stringValue() != null) {
-										gen.write(new Double(indexableField.stringValue()));
-									}
-									break;
-								case STRING:
-									gen.write(indexableField.stringValue());
-									break;
-								default:
-									throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
-											"SortField.Type must be one of LONG, DOUBLE, STRING, but it was " + type);
 							}
+							continue;
+						}
+						IndexableField indexableField = lastDocument.getField(fieldName);
+						if (indexableField == null) {
+							throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field " + fieldName
+									+ " used for sorting was not present on the Lucene Document; all sortable fields must also be stored.");
+						}
+						Type type = (sortField instanceof SortedNumericSortField)
+								? ((SortedNumericSortField) sortField).getNumericType()
+								: sortField.getType();
+						switch (type) {
+							case LONG:
+								if (indexableField.numericValue() != null) {
+									gen.write(indexableField.numericValue().longValue());
+								} else if (indexableField.stringValue() != null) {
+									gen.write(new Long(indexableField.stringValue()));
+								}
+								break;
+							case DOUBLE:
+								if (indexableField.numericValue() != null) {
+									gen.write(indexableField.numericValue().doubleValue());
+								} else if (indexableField.stringValue() != null) {
+									gen.write(new Double(indexableField.stringValue()));
+								}
+								break;
+							case STRING:
+								gen.write(indexableField.stringValue());
+								break;
+							default:
+								throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
+										"SortField.Type must be one of LONG, DOUBLE, STRING, but it was " + type);
 						}
-						gen.writeEnd(); // end "fields" array
 					}
-					gen.writeEnd(); // end "search_after" object
+					gen.writeEnd(); // end "fields" array
 				}
+				gen.writeEnd(); // end "search_after" object
 			}
 			gen.writeEnd(); // end enclosing object
 		}
-		logger.debug("Json returned {}", baos.toString());
+		logger.trace("Json returned {}", baos.toString());
 		return baos.toString();
 	}
 
 	/**
 	 * Performs a search by iterating over all relevant shards.
 	 * 
-	 * @param search         SearchBucket containing the search query, dimensions to
-	 *                       facet etc.
-	 * @param maxResults     The maximum number of results from the search.
-	 * @param shards         List of all ShardBuckets for the entity to be searched.
-	 * @param searchAfterDoc The last Lucene FieldDoc from a previous search.
+	 * @param search     SearchBucket containing the search query, dimensions to
+	 *                   facet etc.
+	 * @param maxResults The maximum number of results from the search.
+	 * @param shards     List of all ShardBuckets for the entity to be searched.
 	 * @return Lucene TopFieldDocs resulting from the search.
 	 * @throws IOException
+	 * @throws LuceneException If the search runs for longer than the allowed time
 	 */
-	private TopFieldDocs searchShards(SearchBucket search, int maxResults, List<ShardBucket> shards,
-			FieldDoc searchAfterDoc) throws IOException {
+	private TopFieldDocs searchShards(SearchBucket search, int maxResults, List<ShardBucket> shards)
+			throws IOException, LuceneException {
+
 		TopFieldDocs topFieldDocs;
-		if (shards.size() > 0) {
-			List<TopFieldDocs> shardHits = new ArrayList<>();
-			int i = 0;
-			int doc = searchAfterDoc != null ? searchAfterDoc.doc : -1;
-			long startTime = System.currentTimeMillis();
-			for (ShardBucket shard : shards) {
-				int docCount = shard.documentCount.intValue();
-				if (searchAfterDoc != null) {
-					if (doc > docCount) {
-						searchAfterDoc.doc = docCount - 1;
-					} else {
-						searchAfterDoc.doc = doc;
+		Counter clock = TimeLimitingCollector.getGlobalCounter();
+		TimeLimitingCollector collector = new TimeLimitingCollector(null, clock, maxSearchTimeSeconds * 1000);
+		int shardsSize = shards.size();
+
+		try {
+			if (shardsSize > 1) {
+				List<TopFieldDocs> shardHits = new ArrayList<>();
+				int doc = search.searchAfter != null ? search.searchAfter.doc : -1;
+				for (ShardBucket shard : shards) {
+					// Handle the possibility of some shards having a higher docCount than the doc
+					// id on searchAfter
+					int docCount = shard.documentCount.intValue();
+					if (search.searchAfter != null) {
+						if (doc > docCount) {
+							search.searchAfter.doc = docCount - 1;
+						} else {
+							search.searchAfter.doc = doc;
+						}
+					}
+
+					// Wrap Collector with TimeLimitingCollector
+					TopFieldCollector topFieldCollector = TopFieldCollector.create(search.sort, maxResults,
+							search.searchAfter, maxResults);
+					collector.setCollector(topFieldCollector);
+
+					IndexSearcher indexSearcher = shard.searcherManager.acquire();
+					indexSearcher.search(search.query, collector);
+					TopFieldDocs topDocs = topFieldCollector.topDocs();
+					if (search.scored) {
+						TopFieldCollector.populateScores(topDocs.scoreDocs, indexSearcher, search.query);
 					}
+					shardHits.add(topDocs);
 				}
-				IndexSearcher indexSearcher = shard.searcherManager.acquire();
-				TopFieldDocs shardDocs = indexSearcher.searchAfter(searchAfterDoc, search.query, maxResults,
-						search.sort, search.scored);
-				shardHits.add(shardDocs);
-				logger.debug("{} on shard {} out of {} total docs", shardDocs.totalHits, i, docCount);
-				i++;
-				long duration = (System.currentTimeMillis() - startTime);
-				if (duration > maxSearchTimeSeconds * 1000) {
-					logger.info("Stopping search after {} shards due to {} ms having elapsed", i, duration);
-					search.aborted = true;
-					break;
+				topFieldDocs = TopFieldDocs.merge(search.sort, 0, maxResults, shardHits.toArray(new TopFieldDocs[0]),
+						true);
+			} else {
+				// Don't need to merge results across shards
+				TopFieldCollector topFieldCollector = TopFieldCollector.create(search.sort, maxResults,
+						search.searchAfter, maxResults);
+				collector.setCollector(topFieldCollector);
+				IndexSearcher indexSearcher = shards.get(0).searcherManager.acquire();
+				indexSearcher.search(search.query, collector);
+				topFieldDocs = topFieldCollector.topDocs();
+				if (search.scored) {
+					TopFieldCollector.populateScores(topFieldDocs.scoreDocs, indexSearcher, search.query);
 				}
 			}
-			topFieldDocs = TopFieldDocs.merge(search.sort, 0, maxResults, shardHits.toArray(new TopFieldDocs[i]), true);
-		} else {
-			IndexSearcher indexSearcher = shards.get(0).searcherManager.acquire();
-			topFieldDocs = indexSearcher.searchAfter(searchAfterDoc, search.query, maxResults, search.sort,
-					search.scored);
+
+			return topFieldDocs;
+
+		} catch (TimeExceededException e) {
+			String message = "Search cancelled for exceeding " + maxSearchTimeSeconds + " seconds";
+			throw new LuceneException(HttpURLConnection.HTTP_GATEWAY_TIMEOUT, message);
 		}
-		return topFieldDocs;
 	}
 
 	/**
diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
index 57c6fa4..91858a5 100644
--- a/src/main/java/org/icatproject/lucene/SearchBucket.java
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -32,6 +32,7 @@
 import org.apache.lucene.facet.range.Range;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
+import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
@@ -70,7 +71,6 @@ public enum SearchType {
     public Set<String> fields = new HashSet<String>();
     public Map<String, Set<String>> joinedFields = new HashMap<>();
     public Map<String, FacetedDimension> dimensions = new HashMap<String, FacetedDimension>();
-    public boolean aborted = false;
     private static final SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
 
     static {
@@ -222,6 +222,9 @@ public SearchBucket(Lucene lucene, SearchType searchType, HttpServletRequest req
                     query = maybeEmptyQuery(luceneQuery);
                     return;
             }
+        } catch (QueryNodeParseException e) {
+            String message = "Search term could not be parsed due to syntax errors";
+            throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, message);
         }
     }
 

From 1eac7e06073553ca965f064ba820caa0ebe9a16e Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Tue, 9 Aug 2022 14:43:30 +0000
Subject: [PATCH 52/73] Error handling fix and range check for lock #19

---
 .../java/org/icatproject/lucene/Lucene.java   | 200 ++++++++++--------
 1 file changed, 110 insertions(+), 90 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 0fbaa5f..d28720a 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -69,7 +69,10 @@
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.SearcherManager;
 import org.apache.lucene.search.Sort;
@@ -136,13 +139,15 @@ public ShardBucket(java.nio.file.Path shardPath) throws IOException {
 			}
 			searcherManager = new SearcherManager(indexWriter, null);
 			IndexSearcher indexSearcher = null;
+			int numDocs;
 			try {
 				indexSearcher = searcherManager.acquire();
-				int numDocs = indexSearcher.getIndexReader().numDocs();
+				numDocs = indexSearcher.getIndexReader().numDocs();
 				documentCount = new AtomicLong(numDocs);
 			} finally {
 				searcherManager.release(indexSearcher);
 			}
+			logger.info("Created ShardBucket for {} with {} Documents", directory, numDocs);
 		}
 
 		/**
@@ -182,12 +187,15 @@ public IndexBucket(String entityName) {
 				this.entityName = entityName.toLowerCase();
 				Long shardIndex = 0L;
 				java.nio.file.Path shardPath = luceneDirectory.resolve(entityName);
+				ShardBucket shardBucket;
+				// Create at least one shard, then keep creating them so long as directories
+				// exist and already contain Documents
 				do {
-					ShardBucket shardBucket = new ShardBucket(shardPath);
+					shardBucket = new ShardBucket(shardPath);
 					shardList.add(shardBucket);
 					shardIndex++;
 					shardPath = luceneDirectory.resolve(entityName + "_" + shardIndex);
-				} while (Files.isDirectory(shardPath));
+				} while (shardBucket.documentCount.get() > 0 && Files.isDirectory(shardPath));
 				logger.debug("Bucket for {} is now ready with {} shards", entityName, shardIndex);
 			} catch (Throwable e) {
 				logger.error("Can't continue " + e.getClass() + " " + e.getMessage());
@@ -284,6 +292,14 @@ public void close() throws IOException {
 			}
 		}
 
+		/**
+		 * @return The ShardBucket currently in use for indexing new Documents.
+		 */
+		public ShardBucket getCurrentShardBucket() {
+			int size = shardList.size();
+			return shardList.get(size - 1);
+		}
+
 		/**
 		 * Provides the ShardBucket that should be used for writing the next Document.
 		 * All Documents up to luceneMaxShardSize are indexed in the first shard, after
@@ -294,11 +310,10 @@ public void close() throws IOException {
 		 * @throws IOException
 		 */
 		public ShardBucket routeShard() throws IOException {
-			int size = shardList.size();
-			ShardBucket shardBucket = shardList.get(size - 1);
+			ShardBucket shardBucket = getCurrentShardBucket();
 			if (shardBucket.documentCount.get() >= luceneMaxShardSize) {
 				shardBucket.indexWriter.commit();
-				shardBucket = buildShardBucket(size);
+				shardBucket = buildShardBucket(shardList.size());
 			}
 			return shardBucket;
 		}
@@ -330,7 +345,6 @@ public void releaseSearchers(List<IndexSearcher> subSearchers) throws IOExceptio
 	private final FacetsConfig facetsConfig = new FacetsConfig();
 
 	private java.nio.file.Path luceneDirectory;
-	private Set<String> shardedIndices;
 	private int luceneCommitMillis;
 	private Long luceneMaxShardSize;
 	private long maxSearchTimeSeconds;
@@ -609,12 +623,9 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("search
 			SearchBucket search = new SearchBucket(this, SearchType.DATAFILE, request, sort, searchAfter);
 			searches.put(uid, search);
 			return luceneSearchResult("Datafile", search, searchAfter, maxResults);
-		} catch (Exception e) {
+		} catch (IOException | QueryNodeException e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
-			if (e instanceof LuceneException) {
-				throw (LuceneException) e;
-			}
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
 	}
@@ -644,12 +655,9 @@ public String datasets(@Context HttpServletRequest request, @QueryParam("search_
 			SearchBucket search = new SearchBucket(this, SearchType.DATASET, request, sort, searchAfter);
 			searches.put(uid, search);
 			return luceneSearchResult("Dataset", search, searchAfter, maxResults);
-		} catch (Exception e) {
+		} catch (IOException | QueryNodeException e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
-			if (e instanceof LuceneException) {
-				throw (LuceneException) e;
-			}
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
 
@@ -760,8 +768,7 @@ private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, In
 			gen.writeStartArray(joinedEntityName.toLowerCase());
 			for (ScoreDoc joinedHit : topFieldDocs.scoreDocs) {
 				gen.writeStartObject();
-				int joinedShardIndex = joinedHit.shardIndex > 0 ? joinedHit.shardIndex : 0;
-				Document joinedDocument = searchers.get(joinedShardIndex).doc(joinedHit.doc);
+				Document joinedDocument = searchers.get(joinedHit.shardIndex).doc(joinedHit.doc);
 				joinedDocument.forEach(encodeField(gen, search.joinedFields.get(joinedEntityName)));
 				gen.writeEnd();
 			}
@@ -834,11 +841,9 @@ public String facet(@PathParam("entityName") String entityName, @Context HttpSer
 			SearchBucket search = new SearchBucket(this, SearchType.GENERIC, request, sort, null);
 			searches.put(uid, search);
 			return luceneFacetResult(entityName, search, searchAfter, maxResults, maxLabels);
-		} catch (Exception e) {
+		} catch (IOException | QueryNodeException e) {
+			logger.error("Error", e);
 			freeSearcher(uid);
-			if (e instanceof LuceneException) {
-				throw (LuceneException) e;
-			}
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
 	}
@@ -940,18 +945,15 @@ private void init() {
 
 			icatUnits = new IcatUnits(props.getString("units", ""));
 
-			String shardedIndicesString = props.getString("shardedIndices", "").toLowerCase();
-			shardedIndices = new HashSet<>(Arrays.asList(shardedIndicesString.split("\\s+")));
-
 		} catch (Exception e) {
 			logger.error(fatal, e.getMessage());
 			throw new IllegalStateException(e.getMessage());
 		}
 
 		String format = "Initialised icat.lucene with directory {}, commitSeconds {}, maxShardSize {}, "
-				+ "shardedIndices {}, maxSearchTimeSeconds {}, aggregateFiles {}";
-		logger.info(format, luceneDirectory, luceneCommitMillis, luceneMaxShardSize, shardedIndices,
-				maxSearchTimeSeconds, aggregateFiles);
+				+ "maxSearchTimeSeconds {}, aggregateFiles {}";
+		logger.info(format, luceneDirectory, luceneCommitMillis, luceneMaxShardSize, maxSearchTimeSeconds,
+				aggregateFiles);
 	}
 
 	class CommitTimerTask extends TimerTask {
@@ -989,12 +991,9 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 			SearchBucket search = new SearchBucket(this, SearchType.INVESTIGATION, request, sort, searchAfter);
 			searches.put(uid, search);
 			return luceneSearchResult("Investigation", search, searchAfter, maxResults);
-		} catch (Exception e) {
+		} catch (IOException | QueryNodeException e) {
 			logger.error("Error", e);
 			freeSearcher(uid);
-			if (e instanceof LuceneException) {
-				throw (LuceneException) e;
-			}
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
 	}
@@ -1004,43 +1003,70 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 	 * documents and preventing normal modify operations until the index is
 	 * unlocked.
 	 * 
+	 * A check is also performed against the minId and maxId used for population.
+	 * This ensures that no data is duplicated in the index.
+	 * 
 	 * @param entityName Name of the entity/index to lock.
-	 * @param request    Incoming request. In order to delete all existing
-	 *                   documents, the accompanying Json should specify
-	 *                   <code>{"delete": true}</code>.
-	 * @throws LuceneException If already locked, or if there's an IOException when
-	 *                         deleting documents.
+	 * @param minId      The exclusive minimum ICAT id being populated for. If
+	 *                   Documents already exist with an id greater than this, the
+	 *                   lock will fail. If null, treated as if it were
+	 *                   Long.MIN_VALUE
+	 * @param maxId      The inclusive maximum ICAT id being populated for. If
+	 *                   Documents already exist with an id less than or equal to
+	 *                   this, the lock will fail. If null, treated as if it were
+	 *                   Long.MAX_VALUE
+	 * @param delete     Whether to delete all existing Documents on the index.
+	 * @throws LuceneException If already locked, if there's an IOException when
+	 *                         deleting documents, or if the min/max id values are
+	 *                         provided and Documents already exist in that range.
 	 */
 	@POST
 	@Path("lock/{entityName}")
-	@Consumes(MediaType.APPLICATION_JSON)
-	@Produces(MediaType.APPLICATION_JSON)
-	public String lock(@PathParam("entityName") String entityName, @Context HttpServletRequest request)
-			throws LuceneException {
-		try (JsonReader reader = Json.createReader(request.getInputStream())) {
-			boolean delete = reader.readObject().getBoolean("delete", false);
-			logger.info("Requesting lock of {} index, delete={}", entityName, delete);
-			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
+	public void lock(@PathParam("entityName") String entityName, @QueryParam("minId") Long minId,
+			@QueryParam("maxId") Long maxId, @QueryParam("delete") Boolean delete) throws LuceneException {
+		try {
+			entityName = entityName.toLowerCase();
+			logger.info("Requesting lock of {} index, minId={}, maxId={}, delete={}", entityName, minId, maxId, delete);
+			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
 
 			if (!bucket.locked.compareAndSet(false, true)) {
 				String message = "Lucene already locked for " + entityName;
 				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE, message);
 			}
-			JsonObjectBuilder builder = Json.createObjectBuilder();
 			if (delete) {
 				for (ShardBucket shardBucket : bucket.shardList) {
 					shardBucket.indexWriter.deleteAll();
 				}
 				// Reset the shardList so we reset the routing
 				bucket.shardList = Arrays.asList(bucket.shardList.get(0));
-				return builder.add("currentId", 0).build().toString();
+				return;
+			}
+
+			for (ShardBucket shardBucket : bucket.shardList) {
+				IndexSearcher searcher = shardBucket.searcherManager.acquire();
+				Query query;
+				if (minId == null && maxId == null) {
+					query = new MatchAllDocsQuery();
+				} else {
+					if (minId == null) {
+						minId = Long.MIN_VALUE;
+					}
+					if (maxId == null) {
+						maxId = Long.MAX_VALUE;
+					}
+					query = LongPoint.newRangeQuery("id.long", minId + 1, maxId);
+				}
+				TopDocs topDoc = searcher.search(query, 1);
+				if (topDoc.scoreDocs.length != 0) {
+					// If we have any results in the populating range, unlock and throw
+					bucket.locked.compareAndSet(true, false);
+					Document doc = searcher.doc(topDoc.scoreDocs[0].doc);
+					String id = doc.get("id");
+					String message = "While locking index, found id " + id + " in specified range";
+					logger.error(message);
+					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, message);
+				}
 			}
-			ShardBucket shardBucket = bucket.routeShard();
-			int docCount = shardBucket.documentCount.intValue();
-			IndexSearcher searcher = shardBucket.searcherManager.acquire();
-			String id = searcher.doc(docCount - 1).get("id");
-			shardBucket.searcherManager.release(searcher);
-			return builder.add("currentId", new Long(id)).build().toString();
 		} catch (IOException e) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
@@ -1209,23 +1235,25 @@ private String luceneSearchResult(String name, SearchBucket search, String searc
 		}
 		logger.debug("{} maxscore {}", totalHits, maxScore);
 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
+		int shardIndex = -1;
 		try (JsonGenerator gen = Json.createGenerator(baos)) {
 			gen.writeStartObject();
 			gen.writeStartArray("results");
 			for (ScoreDoc hit : hits) {
-				encodeResult(name, gen, hit, searchers.get(hit.shardIndex), search);
+				shardIndex = hit.shardIndex;
+				encodeResult(name, gen, hit, searchers.get(shardIndex), search);
 			}
 			gen.writeEnd(); // array results
 			if (hits.length == maxResults) {
 				ScoreDoc lastDoc = hits[hits.length - 1];
-				gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex",
-						lastDoc.shardIndex);
+				shardIndex = lastDoc.shardIndex;
+				gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex", shardIndex);
 				float lastScore = lastDoc.score;
 				if (!Float.isNaN(lastScore)) {
 					gen.write("score", lastScore);
 				}
 				if (fields != null) {
-					Document lastDocument = searchers.get(lastDoc.shardIndex).doc(lastDoc.doc);
+					Document lastDocument = searchers.get(shardIndex).doc(lastDoc.doc);
 					gen.writeStartArray("fields");
 					for (SortField sortField : fields) {
 						String fieldName = sortField.getField();
@@ -1272,6 +1300,10 @@ private String luceneSearchResult(String name, SearchBucket search, String searc
 				gen.writeEnd(); // end "search_after" object
 			}
 			gen.writeEnd(); // end enclosing object
+		} catch (ArrayIndexOutOfBoundsException e) {
+			String message = "Attempting to access searcher with shardIndex " + shardIndex + ", but only have "
+					+ searchers.size() + " searchers in total";
+			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, message);
 		}
 		logger.trace("Json returned {}", baos.toString());
 		return baos.toString();
@@ -1294,51 +1326,37 @@ private TopFieldDocs searchShards(SearchBucket search, int maxResults, List<Shar
 		TopFieldDocs topFieldDocs;
 		Counter clock = TimeLimitingCollector.getGlobalCounter();
 		TimeLimitingCollector collector = new TimeLimitingCollector(null, clock, maxSearchTimeSeconds * 1000);
-		int shardsSize = shards.size();
 
 		try {
-			if (shardsSize > 1) {
-				List<TopFieldDocs> shardHits = new ArrayList<>();
-				int doc = search.searchAfter != null ? search.searchAfter.doc : -1;
-				for (ShardBucket shard : shards) {
-					// Handle the possibility of some shards having a higher docCount than the doc
-					// id on searchAfter
-					int docCount = shard.documentCount.intValue();
-					if (search.searchAfter != null) {
-						if (doc > docCount) {
-							search.searchAfter.doc = docCount - 1;
-						} else {
-							search.searchAfter.doc = doc;
-						}
-					}
-
-					// Wrap Collector with TimeLimitingCollector
-					TopFieldCollector topFieldCollector = TopFieldCollector.create(search.sort, maxResults,
-							search.searchAfter, maxResults);
-					collector.setCollector(topFieldCollector);
-
-					IndexSearcher indexSearcher = shard.searcherManager.acquire();
-					indexSearcher.search(search.query, collector);
-					TopFieldDocs topDocs = topFieldCollector.topDocs();
-					if (search.scored) {
-						TopFieldCollector.populateScores(topDocs.scoreDocs, indexSearcher, search.query);
+			List<TopFieldDocs> shardHits = new ArrayList<>();
+			int doc = search.searchAfter != null ? search.searchAfter.doc : -1;
+			for (ShardBucket shard : shards) {
+				// Handle the possibility of some shards having a higher docCount than the doc
+				// id on searchAfter
+				int docCount = shard.documentCount.intValue();
+				if (search.searchAfter != null) {
+					if (doc > docCount) {
+						search.searchAfter.doc = docCount - 1;
+					} else {
+						search.searchAfter.doc = doc;
 					}
-					shardHits.add(topDocs);
 				}
-				topFieldDocs = TopFieldDocs.merge(search.sort, 0, maxResults, shardHits.toArray(new TopFieldDocs[0]),
-						true);
-			} else {
-				// Don't need to merge results across shards
+
+				// Wrap Collector with TimeLimitingCollector
 				TopFieldCollector topFieldCollector = TopFieldCollector.create(search.sort, maxResults,
 						search.searchAfter, maxResults);
 				collector.setCollector(topFieldCollector);
-				IndexSearcher indexSearcher = shards.get(0).searcherManager.acquire();
+
+				IndexSearcher indexSearcher = shard.searcherManager.acquire();
 				indexSearcher.search(search.query, collector);
-				topFieldDocs = topFieldCollector.topDocs();
+				TopFieldDocs topDocs = topFieldCollector.topDocs();
 				if (search.scored) {
-					TopFieldCollector.populateScores(topFieldDocs.scoreDocs, indexSearcher, search.query);
+					TopFieldCollector.populateScores(topDocs.scoreDocs, indexSearcher, search.query);
 				}
+				shardHits.add(topDocs);
 			}
+			topFieldDocs = TopFieldDocs.merge(search.sort, 0, maxResults, shardHits.toArray(new TopFieldDocs[0]),
+					true);
 
 			return topFieldDocs;
 
@@ -1487,6 +1505,7 @@ private void addSortField(JsonObject json, Document document, String key) {
 				Long value = new Long(json.getString(key));
 				document.add(new NumericDocValuesField("id.long", value));
 				document.add(new StoredField("id.long", value));
+				document.add(new LongPoint("id.long", value));
 			}
 			if (DocumentMapping.longFields.contains(key)) {
 				document.add(new NumericDocValuesField(key, json.getJsonNumber(key).longValueExact()));
@@ -1515,6 +1534,7 @@ private void addSortField(IndexableField field, Document document) {
 				Long value = new Long(field.stringValue());
 				document.add(new NumericDocValuesField("id.long", value));
 				document.add(new StoredField("id.long", value));
+				document.add(new LongPoint("id.long", value));
 			}
 			if (DocumentMapping.longFields.contains(key)) {
 				document.add(new NumericDocValuesField(key, field.numericValue().longValue()));

From 182b5e5caeb5b49fa1f094924d0b9eee3caa0fde Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 17 Aug 2022 13:51:37 +0000
Subject: [PATCH 53/73] Fix shardList not accepting new shards #19

---
 src/main/java/org/icatproject/lucene/Lucene.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index d28720a..fd4f84e 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -1038,7 +1038,9 @@ public void lock(@PathParam("entityName") String entityName, @QueryParam("minId"
 					shardBucket.indexWriter.deleteAll();
 				}
 				// Reset the shardList so we reset the routing
-				bucket.shardList = Arrays.asList(bucket.shardList.get(0));
+				bucket.shardList = new ArrayList<>();
+				ShardBucket shardBucket = bucket.shardList.get(0);
+				bucket.shardList.add(shardBucket);
 				return;
 			}
 

From d8d1e762251936bcf05d897a65e299e2d86967a6 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 17 Aug 2022 15:41:32 +0100
Subject: [PATCH 54/73] Move synonym analyzer to DocumentMapping #16

---
 .../icatproject/lucene/DocumentMapping.java   | 63 ++++++++++---------
 .../java/org/icatproject/lucene/Lucene.java   |  4 +-
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/DocumentMapping.java b/src/main/java/org/icatproject/lucene/DocumentMapping.java
index 75500d4..2cf4848 100644
--- a/src/main/java/org/icatproject/lucene/DocumentMapping.java
+++ b/src/main/java/org/icatproject/lucene/DocumentMapping.java
@@ -1,12 +1,17 @@
 package org.icatproject.lucene;
 
+import java.io.IOException;
+import java.text.ParseException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
+import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
+import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
 
 public class DocumentMapping {
 
@@ -30,6 +35,8 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 		}
 	}
 
+	private static Analyzer analyzer;
+
 	public static final Set<String> doubleFields = new HashSet<>();
 	public static final Set<String> facetFields = new HashSet<>();
 	public static final Set<String> longFields = new HashSet<>();
@@ -38,14 +45,24 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 	public static final Set<String> indexedEntities = new HashSet<>();
 	public static final Map<String, ParentRelationship[]> relationships = new HashMap<>();
 
-	public static final IcatAnalyzer analyzer = new IcatAnalyzer();
-	public static final StandardQueryParser genericParser = new StandardQueryParser();
-	public static final StandardQueryParser datafileParser = new StandardQueryParser();
-	public static final StandardQueryParser datasetParser = new StandardQueryParser();
-	public static final StandardQueryParser investigationParser = new StandardQueryParser();
-	public static final StandardQueryParser sampleParser = new StandardQueryParser();
+	public static final StandardQueryParser genericParser = buildParser();
+	public static final StandardQueryParser datafileParser = buildParser("name", "description", "location", "datafileFormat.name", "visitId",
+	"sample.name", "sample.type.name", "doi");
+	public static final StandardQueryParser datasetParser = buildParser("name", "description", "sample.name", "sample.type.name", "type.name",
+	"visitId", "doi");
+	public static final StandardQueryParser investigationParser = buildParser("name", "visitId", "title", "summary", "facility.name",
+	"type.name", "doi");
+	public static final StandardQueryParser sampleParser = buildParser("sample.name", "sample.type.name");
 
 	static {
+		try {
+			// Attempt init an Analyzer which injects synonyms for searching
+			analyzer = new IcatSynonymAnalyzer();
+		} catch (IOException | ParseException e) {
+			// If synonym files cannot be parsed, default to using the same analyzer as for writing
+			analyzer = new IcatAnalyzer();
+		}
+
 		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI", "rangeTop", "rangeTopSI", "rangeBottom",
 				"rangeBottomSI"));
 		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name", "stringValue"));
@@ -99,31 +116,17 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 						new ParentRelationship("datafile", "investigation.id", "investigation.name", "visitId") });
 		relationships.put("Dataset",
 				new ParentRelationship[] { new ParentRelationship("Datafile", "dataset.id", "dataset.name") });
+	}
 
-		genericParser.setAllowLeadingWildcard(true);
-		genericParser.setAnalyzer(analyzer);
-
-		CharSequence[] datafileFields = { "name", "description", "location", "datafileFormat.name", "visitId",
-				"sample.name", "sample.type.name", "doi" };
-		datafileParser.setAllowLeadingWildcard(true);
-		datafileParser.setAnalyzer(analyzer);
-		datafileParser.setMultiFields(datafileFields);
-
-		CharSequence[] datasetFields = { "name", "description", "sample.name", "sample.type.name", "type.name",
-				"visitId", "doi" };
-		datasetParser.setAllowLeadingWildcard(true);
-		datasetParser.setAnalyzer(analyzer);
-		datasetParser.setMultiFields(datasetFields);
-
-		CharSequence[] investigationFields = { "name", "visitId", "title", "summary", "facility.name",
-				"type.name", "doi" };
-		investigationParser.setAllowLeadingWildcard(true);
-		investigationParser.setAnalyzer(analyzer);
-		investigationParser.setMultiFields(investigationFields);
+	private static StandardQueryParser buildParser(String... defaultFields) {
+		StandardQueryParser parser = new StandardQueryParser();
+		StandardQueryConfigHandler qpConf = (StandardQueryConfigHandler) parser.getQueryConfigHandler();
+		qpConf.set(ConfigurationKeys.ANALYZER, analyzer);
+		qpConf.set(ConfigurationKeys.ALLOW_LEADING_WILDCARD, true);
+		if (defaultFields.length > 0) {
+			qpConf.set(ConfigurationKeys.MULTI_FIELDS, defaultFields);
+		}
 
-		CharSequence[] sampleFields = { "sample.name", "sample.type.name" };
-		sampleParser.setAllowLeadingWildcard(true);
-		sampleParser.setAnalyzer(analyzer);
-		sampleParser.setMultiFields(sampleFields);
+		return parser;
 	}
 }
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index fd4f84e..442b866 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -7,7 +7,6 @@
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -124,7 +123,7 @@ private class ShardBucket {
 		 */
 		public ShardBucket(java.nio.file.Path shardPath) throws IOException {
 			directory = FSDirectory.open(shardPath);
-			IndexWriterConfig config = new IndexWriterConfig(DocumentMapping.analyzer);
+			IndexWriterConfig config = new IndexWriterConfig(analyzer);
 			indexWriter = new IndexWriter(directory, config);
 			String[] files = directory.listAll();
 			if (files.length == 1 && files[0].equals("write.lock")) {
@@ -341,6 +340,7 @@ public void releaseSearchers(List<IndexSearcher> subSearchers) throws IOExceptio
 
 	static final Logger logger = LoggerFactory.getLogger(Lucene.class);
 	private static final Marker fatal = MarkerFactory.getMarker("FATAL");
+	private static final IcatAnalyzer analyzer = new IcatAnalyzer();
 
 	private final FacetsConfig facetsConfig = new FacetsConfig();
 

From 32c2f335fe3c0cc150f6fd8e16039b3733794f35 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 7 Sep 2022 22:50:50 +0100
Subject: [PATCH 55/73] Add support for faceting DatasetTechnique #18

---
 .../java/org/icatproject/lucene/DocumentMapping.java | 12 ++++++++----
 src/main/java/org/icatproject/lucene/Lucene.java     |  9 ++++-----
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/DocumentMapping.java b/src/main/java/org/icatproject/lucene/DocumentMapping.java
index 75500d4..95c2692 100644
--- a/src/main/java/org/icatproject/lucene/DocumentMapping.java
+++ b/src/main/java/org/icatproject/lucene/DocumentMapping.java
@@ -48,7 +48,7 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 	static {
 		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI", "rangeTop", "rangeTopSI", "rangeBottom",
 				"rangeBottomSI"));
-		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name", "stringValue"));
+		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name", "stringValue", "technique.name"));
 		longFields.addAll(
 				Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate", "fileSize",
 						"fileCount"));
@@ -58,11 +58,12 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 						"numericValueSI", "fileSize", "fileCount"));
 		textFields.addAll(Arrays.asList("name", "visitId", "description", "location", "dataset.name",
 				"investigation.name", "instrument.name", "instrument.fullName", "datafileFormat.name", "sample.name",
-				"sample.type.name", "title", "summary", "facility.name", "user.fullName", "type.name", "doi"));
+				"sample.type.name", "technique.name", "technique.description", "technique.pid", "title", "summary",
+				"facility.name", "user.fullName", "type.name", "doi"));
 
 		indexedEntities.addAll(Arrays.asList("Datafile", "Dataset", "Investigation", "DatafileParameter",
-				"DatasetParameter", "InstrumentScientist", "InvestigationInstrument", "InvestigationParameter",
-				"InvestigationUser", "Sample", "SampleParameter"));
+				"DatasetParameter", "DatasetTechnique", "InstrumentScientist", "InvestigationInstrument",
+				"InvestigationParameter", "InvestigationUser", "Sample", "SampleParameter"));
 
 		relationships.put("Instrument",
 				new ParentRelationship[] { new ParentRelationship("InvestigationInstrument", "instrument.id",
@@ -92,6 +93,9 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 						new ParentRelationship("DatasetParameter", "type.id", "type.name"),
 						new ParentRelationship("InvestigationParameter", "type.id", "type.name"),
 						new ParentRelationship("SampleParameter", "type.id", "type.name") });
+		relationships.put("Technique",
+				new ParentRelationship[] { new ParentRelationship("DatasetTechnique", "technique.id", "technique.name",
+						"technique.description", "technique.pid") });
 		relationships.put("Investigation",
 				new ParentRelationship[] {
 						new ParentRelationship("Dataset", "investigation.id", "investigation.name",
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index fd4f84e..172fc0a 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -147,7 +147,7 @@ public ShardBucket(java.nio.file.Path shardPath) throws IOException {
 			} finally {
 				searcherManager.release(indexSearcher);
 			}
-			logger.info("Created ShardBucket for {} with {} Documents", directory, numDocs);
+			logger.info("Created ShardBucket for directory {} with {} Documents", directory.getDirectory(), numDocs);
 		}
 
 		/**
@@ -905,7 +905,7 @@ public IndexSearcher getSearcher(Map<String, List<IndexSearcher>> searcherMap, S
 			throws IOException, LuceneException {
 		List<IndexSearcher> subSearchers = searcherMap.get(name);
 		subSearchers = getSearchers(searcherMap, name);
-		if (subSearchers.size() > 1) {
+		if (subSearchers.size() != 1) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
 					"Cannot get single IndexSearcher for " + name + " as it has " + subSearchers.size() + " shards");
 		}
@@ -1025,9 +1025,8 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 	public void lock(@PathParam("entityName") String entityName, @QueryParam("minId") Long minId,
 			@QueryParam("maxId") Long maxId, @QueryParam("delete") Boolean delete) throws LuceneException {
 		try {
-			entityName = entityName.toLowerCase();
 			logger.info("Requesting lock of {} index, minId={}, maxId={}, delete={}", entityName, minId, maxId, delete);
-			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName, k -> new IndexBucket(k));
+			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 
 			if (!bucket.locked.compareAndSet(false, true)) {
 				String message = "Lucene already locked for " + entityName;
@@ -1038,8 +1037,8 @@ public void lock(@PathParam("entityName") String entityName, @QueryParam("minId"
 					shardBucket.indexWriter.deleteAll();
 				}
 				// Reset the shardList so we reset the routing
-				bucket.shardList = new ArrayList<>();
 				ShardBucket shardBucket = bucket.shardList.get(0);
+				bucket.shardList = new ArrayList<>();
 				bucket.shardList.add(shardBucket);
 				return;
 			}

From d051925cda5a4d6897fd25376c259b3d2ab6d554 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 9 Sep 2022 05:01:10 +0100
Subject: [PATCH 56/73] Update version #18

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index ae6d0c3..26467e5 100755
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
 
 	<groupId>org.icatproject</groupId>
 	<artifactId>icat.lucene</artifactId>
-	<version>1.1.2</version>
+	<version>2.0.0-SNAPSHOT</version>
 	<packaging>war</packaging>
 	<name>ICAT Lucene</name>
 
@@ -101,7 +101,7 @@
 		<dependency>
 			<groupId>org.icatproject</groupId>
 			<artifactId>icat.utils</artifactId>
-			<version>4.16.2-SNAPSHOT</version>
+			<version>4.17.0-SNAPSHOT</version>
 		</dependency>
 
 		<dependency>

From 2e359eee51a11c92070367a472f8c3fc8e4d6f1c Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 29 Sep 2022 20:42:35 +0100
Subject: [PATCH 57/73] Refactor Field and large Lucene functions #18

---
 .../java/org/icatproject/lucene/Field.java    | 193 ++++++
 .../org/icatproject/lucene/IcatAnalyzer.java  |  13 -
 .../java/org/icatproject/lucene/Lucene.java   | 626 +++++++++---------
 .../org/icatproject/lucene/SearchBucket.java  | 118 ++--
 4 files changed, 552 insertions(+), 398 deletions(-)
 create mode 100644 src/main/java/org/icatproject/lucene/Field.java

diff --git a/src/main/java/org/icatproject/lucene/Field.java b/src/main/java/org/icatproject/lucene/Field.java
new file mode 100644
index 0000000..966332e
--- /dev/null
+++ b/src/main/java/org/icatproject/lucene/Field.java
@@ -0,0 +1,193 @@
+package org.icatproject.lucene;
+
+import javax.json.JsonObject;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
+
+/**
+ * Wrapper for the name, value and type (String/Text, long, double) of a field
+ * to be added to a Lucene Document.
+ */
+class Field {
+
+    private abstract class InnerField {
+
+        public abstract Document addSortable(Document document) throws NumberFormatException;
+
+        public abstract Document addToDocument(Document document) throws NumberFormatException;
+
+    }
+
+    private class InnerStringField extends InnerField {
+
+        private String value;
+
+        public InnerStringField(String value) {
+            this.value = value;
+        }
+
+        @Override
+        public Document addSortable(Document document) throws NumberFormatException {
+            if (DocumentMapping.sortFields.contains(name)) {
+                if (name.equals("id")) {
+                    // Id is a special case, as we need to to be SORTED as a byte ref to allow joins
+                    // but also SORTED_NUMERIC to ensure a deterministic order to results
+                    Long longValue = new Long(value);
+                    document.add(new NumericDocValuesField("id.long", longValue));
+                    document.add(new StoredField("id.long", longValue));
+                    document.add(new LongPoint("id.long", longValue));
+                }
+                document.add(new SortedDocValuesField(name, new BytesRef(value)));
+            }
+            return document;
+        }
+
+        @Override
+        public Document addToDocument(Document document) throws NumberFormatException {
+            addSortable(document);
+
+            if (DocumentMapping.facetFields.contains(name)) {
+                document.add(new SortedSetDocValuesFacetField(name + ".keyword", value));
+                document.add(new StringField(name + ".keyword", value, Store.NO));
+            }
+
+            if (DocumentMapping.textFields.contains(name)) {
+                document.add(new TextField(name, value, Store.YES));
+            } else {
+                document.add(new StringField(name, value, Store.YES));
+            }
+
+            return document;
+        }
+
+    }
+
+    private class InnerLongField extends InnerField {
+
+        private long value;
+
+        public InnerLongField(long value) {
+            this.value = value;
+        }
+
+        @Override
+        public Document addSortable(Document document) throws NumberFormatException {
+            if (DocumentMapping.sortFields.contains(name)) {
+                document.add(new NumericDocValuesField(name, value));
+            }
+            return document;
+        }
+
+        @Override
+        public Document addToDocument(Document document) throws NumberFormatException {
+            addSortable(document);
+            document.add(new LongPoint(name, value));
+            document.add(new StoredField(name, value));
+            return document;
+        }
+
+    }
+
+    private class InnerDoubleField extends InnerField {
+
+        private double value;
+
+        public InnerDoubleField(double value) {
+            this.value = value;
+        }
+
+        @Override
+        public Document addSortable(Document document) throws NumberFormatException {
+            if (DocumentMapping.sortFields.contains(name)) {
+                long sortableLong = NumericUtils.doubleToSortableLong(value);
+                document.add(new NumericDocValuesField(name, sortableLong));
+            }
+            return document;
+        }
+
+        @Override
+        public Document addToDocument(Document document) throws NumberFormatException {
+            addSortable(document);
+            document.add(new DoublePoint(name, value));
+            document.add(new StoredField(name, value));
+            return document;
+        }
+
+    }
+
+    private String name;
+    private InnerField innerField;
+
+    /**
+     * Creates a wrapper for a Field.
+     * 
+     * @param object JsonObject containing representations of multiple fields
+     * @param key    Key of a specific field in object
+     */
+    public Field(JsonObject object, String key) {
+        name = key;
+        if (DocumentMapping.doubleFields.contains(name)) {
+            innerField = new InnerDoubleField(object.getJsonNumber(name).doubleValue());
+        } else if (DocumentMapping.longFields.contains(name)) {
+            innerField = new InnerLongField(object.getJsonNumber(name).longValueExact());
+        } else {
+            innerField = new InnerStringField(object.getString(name));
+        }
+    }
+
+    /**
+     * Creates a wrapper for a Field.
+     * 
+     * @param indexableField A Lucene IndexableField
+     */
+    public Field(IndexableField indexableField) {
+        name = indexableField.name();
+        if (DocumentMapping.doubleFields.contains(name)) {
+            innerField = new InnerDoubleField(indexableField.numericValue().doubleValue());
+        } else if (DocumentMapping.longFields.contains(name)) {
+            innerField = new InnerLongField(indexableField.numericValue().longValue());
+        } else {
+            innerField = new InnerStringField(indexableField.stringValue());
+        }
+    }
+
+    /**
+     * Adds a sortable field to the passed document. This only accounts for sorting,
+     * if storage and searchability are also needed, see {@link #addToDocument}. The
+     * exact implementation depends on whether this is a String, long or double
+     * field.
+     * 
+     * @param document The document to add to
+     * @return The original document with this field added to it
+     * @throws NumberFormatException
+     */
+    public Document addSortable(Document document) throws NumberFormatException {
+        return innerField.addSortable(document);
+    }
+
+    /**
+     * Adds this field to the passed document. This accounts for sortable and
+     * facetable fields. The exact implementation depends on whether this is a
+     * String, long or double field.
+     * 
+     * @param document The document to add to
+     * @return The original document with this field added to it
+     * @throws NumberFormatException
+     */
+    public Document addToDocument(Document document) throws NumberFormatException {
+        return innerField.addToDocument(document);
+    }
+
+}
diff --git a/src/main/java/org/icatproject/lucene/IcatAnalyzer.java b/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
index fcae1c9..5a7da51 100755
--- a/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
+++ b/src/main/java/org/icatproject/lucene/IcatAnalyzer.java
@@ -8,21 +8,8 @@
 import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
 import org.apache.lucene.analysis.en.PorterStemFilter;
-// import org.apache.lucene.analysis.standard.StandardAnalyzer ;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 
-// public class IcatAnalyzer extends Analyzer {
-
-// 	@Override
-// 	protected TokenStreamComponents createComponents(String fieldName) {
-// 		StandardAnalyzer analyzer = new StandardAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
-// 		Analyzer.TokenStreamComponents stream = analyzer.createComponents(fieldName);
-// 		sink = new EnglishPossessiveFilter(stream.getTokenStream());
-// 		sink = new PorterStemFilter(sink);
-// 		return new TokenStreamComponents(source, sink);
-// 	}
-// }
-
 public class IcatAnalyzer extends Analyzer {
 
 	@Override
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 172fc0a..41e3a9d 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -7,7 +7,6 @@
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -50,10 +49,8 @@
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
 import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.Facets;
 import org.apache.lucene.facet.FacetsCollector;
@@ -64,7 +61,6 @@
 import org.apache.lucene.facet.range.LongRangeFacetCounts;
 import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
-import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexableField;
@@ -87,7 +83,6 @@
 import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.NumericUtils;
 import org.icatproject.lucene.SearchBucket.SearchType;
@@ -246,7 +241,7 @@ public void updateDocument(Term term, Document document) throws IOException {
 		 * Creates a new ShardBucket and stores it in the shardMap.
 		 * 
 		 * @param shardKey The identifier for the new shard to be created. For
-		 *                 simplicity, should a Long starting at 0 and incrementing by 1
+		 *                 simplicity, should an int starting at 0 and incrementing by 1
 		 *                 for each new shard.
 		 * @return A new ShardBucket with the provided shardKey.
 		 * @throws IOException
@@ -346,7 +341,7 @@ public void releaseSearchers(List<IndexSearcher> subSearchers) throws IOExceptio
 
 	private java.nio.file.Path luceneDirectory;
 	private int luceneCommitMillis;
-	private Long luceneMaxShardSize;
+	private long luceneMaxShardSize;
 	private long maxSearchTimeSeconds;
 	private boolean aggregateFiles;
 
@@ -427,7 +422,7 @@ public void addNow(@Context HttpServletRequest request, @PathParam("entityName")
 				createNow(entityName, document);
 			}
 		} catch (JsonException e) {
-			logger.error("Could not parse JSON from {}", value.toString());
+			logger.error("Could not parse JSON from {}", value);
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		} catch (IOException e) {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
@@ -500,7 +495,7 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 		if (DocumentMapping.indexedEntities.contains(entityName)) {
 			JsonObject documentObject = operationBody.getJsonObject("doc");
 			Document document = parseDocument(documentObject);
-			logger.trace("create {} {}", entityName, document.toString());
+			logger.trace("create {} {}", entityName, document);
 			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 			if (bucket.locked.get()) {
 				throw new LuceneException(HttpURLConnection.HTTP_NOT_ACCEPTABLE,
@@ -553,23 +548,8 @@ private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFi
 					Set<String> prunedFields = new HashSet<>();
 					List<IndexableField> fieldsToAdd = new ArrayList<>();
 
-					if (deltaFileSize != 0) {
-						prunedFields.add("fileSize");
-						long oldSize = document.getField("fileSize").numericValue().longValue();
-						long newSize = oldSize + deltaFileSize;
-						fieldsToAdd.add(new LongPoint("fileSize", newSize));
-						fieldsToAdd.add(new StoredField("fileSize", newSize));
-						fieldsToAdd.add(new NumericDocValuesField("fileSize", newSize));
-					}
-
-					if (deltaFileCount != 0) {
-						prunedFields.add("fileCount");
-						long oldCount = document.getField("fileCount").numericValue().longValue();
-						long newCount = oldCount + deltaFileCount;
-						fieldsToAdd.add(new LongPoint("fileCount", newCount));
-						fieldsToAdd.add(new StoredField("fileCount", newCount));
-						fieldsToAdd.add(new NumericDocValuesField("fileCount", newCount));
-					}
+					incrementFileStatistic("fileSize", deltaFileSize, document, prunedFields, fieldsToAdd);
+					incrementFileStatistic("fileCount", deltaFileCount, document, prunedFields, fieldsToAdd);
 
 					Document newDocument = pruneDocument(prunedFields, document);
 					fieldsToAdd.forEach(field -> newDocument.add(field));
@@ -582,6 +562,33 @@ private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFi
 		}
 	}
 
+	/**
+	 * Increments a field relating to file statistics (count, size) as part of the
+	 * update on a Document.
+	 * 
+	 * @param statisticName  Name of the field to increment, i.e. fileCount or
+	 *                       fileSize.
+	 * @param statisticDelta Change in the value of the named statistic.
+	 * @param document       Lucene Document containing the old statistic value to
+	 *                       be incremented.
+	 * @param prunedFields   Set of fields which need to be removed from the old
+	 *                       Document. If the statistic is incremented, this will
+	 *                       have statisticName added to it.
+	 * @param fieldsToAdd    List of Lucene IndexableFields to add to the new
+	 *                       Document.
+	 */
+	private void incrementFileStatistic(String statisticName, long statisticDelta, Document document,
+			Set<String> prunedFields, List<IndexableField> fieldsToAdd) {
+		if (statisticDelta != 0) {
+			prunedFields.add(statisticName);
+			long oldValue = document.getField(statisticName).numericValue().longValue();
+			long newValue = oldValue + statisticDelta;
+			fieldsToAdd.add(new LongPoint(statisticName, newValue));
+			fieldsToAdd.add(new StoredField(statisticName, newValue));
+			fieldsToAdd.add(new NumericDocValuesField(statisticName, newValue));
+		}
+	}
+
 	/**
 	 * Creates a new Lucene document.
 	 * 
@@ -594,7 +601,7 @@ private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFi
 	private void createNow(String entityName, JsonObject documentJson)
 			throws NumberFormatException, IOException, LuceneException {
 		Document document = parseDocument(documentJson);
-		logger.trace("create {} {}", entityName, document.toString());
+		logger.trace("create {} {}", entityName, document);
 		IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 		bucket.addDocument(facetsConfig.build(document));
 	}
@@ -617,17 +624,7 @@ private void createNow(String entityName, JsonObject documentJson)
 	@Path("datafile")
 	public String datafiles(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
 			@QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
-		Long uid = null;
-		try {
-			uid = bucketNum.getAndIncrement();
-			SearchBucket search = new SearchBucket(this, SearchType.DATAFILE, request, sort, searchAfter);
-			searches.put(uid, search);
-			return luceneSearchResult("Datafile", search, searchAfter, maxResults);
-		} catch (IOException | QueryNodeException e) {
-			logger.error("Error", e);
-			freeSearcher(uid);
-			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-		}
+		return searchEntity(request, searchAfter, maxResults, sort, SearchType.DATAFILE);
 	}
 
 	/**
@@ -648,19 +645,7 @@ public String datafiles(@Context HttpServletRequest request, @QueryParam("search
 	@Path("dataset")
 	public String datasets(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
 			@QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
-
-		Long uid = null;
-		try {
-			uid = bucketNum.getAndIncrement();
-			SearchBucket search = new SearchBucket(this, SearchType.DATASET, request, sort, searchAfter);
-			searches.put(uid, search);
-			return luceneSearchResult("Dataset", search, searchAfter, maxResults);
-		} catch (IOException | QueryNodeException e) {
-			logger.error("Error", e);
-			freeSearcher(uid);
-			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-		}
-
+		return searchEntity(request, searchAfter, maxResults, sort, SearchType.DATASET);
 	}
 
 	/**
@@ -689,14 +674,13 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 				// Special case for filesizes
 				Term term = new Term("id", icatId);
 				if (aggregateFiles && entityName.equals("Datafile")) {
-					long sizeToSubtract = 0;
 					for (ShardBucket shardBucket : bucket.shardList) {
 						IndexSearcher datafileSearcher = shardBucket.searcherManager.acquire();
 						TopDocs topDocs = datafileSearcher.search(new TermQuery(term), 1);
 						if (topDocs.totalHits.value == 1) {
 							int docId = topDocs.scoreDocs[0].doc;
 							Document datasetDocument = datafileSearcher.doc(docId);
-							sizeToSubtract = datasetDocument.getField("fileSize").numericValue().longValue();
+							long sizeToSubtract = datasetDocument.getField("fileSize").numericValue().longValue();
 							if (sizeToSubtract > 0) {
 								String datasetId = datasetDocument.getField("dataset.id").stringValue();
 								String investigationId = datasetDocument.getField("investigation.id").stringValue();
@@ -753,7 +737,7 @@ private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, In
 			String parentId;
 			if (joinedEntityName.toLowerCase().contains("investigation")) {
 				fld = "investigation.id";
-				if (entityName.toLowerCase().equals("investigation")) {
+				if (entityName.equalsIgnoreCase("investigation")) {
 					parentId = document.get("id");
 				} else {
 					parentId = document.get("investigation.id");
@@ -985,17 +969,7 @@ public void run() {
 	@Path("investigation")
 	public String investigations(@Context HttpServletRequest request, @QueryParam("search_after") String searchAfter,
 			@QueryParam("maxResults") int maxResults, @QueryParam("sort") String sort) throws LuceneException {
-		Long uid = null;
-		try {
-			uid = bucketNum.getAndIncrement();
-			SearchBucket search = new SearchBucket(this, SearchType.INVESTIGATION, request, sort, searchAfter);
-			searches.put(uid, search);
-			return luceneSearchResult("Investigation", search, searchAfter, maxResults);
-		} catch (IOException | QueryNodeException e) {
-			logger.error("Error", e);
-			freeSearcher(uid);
-			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
-		}
+		return searchEntity(request, searchAfter, maxResults, sort, SearchType.INVESTIGATION);
 	}
 
 	/**
@@ -1023,7 +997,7 @@ public String investigations(@Context HttpServletRequest request, @QueryParam("s
 	@POST
 	@Path("lock/{entityName}")
 	public void lock(@PathParam("entityName") String entityName, @QueryParam("minId") Long minId,
-			@QueryParam("maxId") Long maxId, @QueryParam("delete") Boolean delete) throws LuceneException {
+			@QueryParam("maxId") Long maxId, @QueryParam("delete") boolean delete) throws LuceneException {
 		try {
 			logger.info("Requesting lock of {} index, minId={}, maxId={}, delete={}", entityName, minId, maxId, delete);
 			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
@@ -1111,65 +1085,9 @@ private String luceneFacetResult(String name, SearchBucket search, String search
 				TopDocs results = FacetsCollector.search(indexSearcher, search.query, maxResults, facetsCollector);
 				logger.debug("{}", results.totalHits);
 				for (FacetedDimension facetedDimension : search.dimensions.values()) {
-					if (facetedDimension.getRanges().size() > 0) {
-						logger.debug("Ranges: {}", facetedDimension.getRanges().get(0).getClass().getSimpleName());
-						// Perform range based facets for a numeric field
-						String dimension = facetedDimension.getDimension();
-						Facets facets;
-						if (DocumentMapping.longFields.contains(dimension)) {
-							LongRange[] ranges = facetedDimension.getRanges().toArray(new LongRange[0]);
-							facets = new LongRangeFacetCounts(dimension, facetsCollector, ranges);
-						} else if (DocumentMapping.doubleFields.contains(dimension)) {
-							DoubleRange[] ranges = facetedDimension.getRanges().toArray(new DoubleRange[0]);
-							facets = new DoubleRangeFacetCounts(dimension, facetsCollector, ranges);
-						} else {
-							throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-									"'ranges' specified for dimension " + dimension
-											+ " but this is not a supported numeric field");
-						}
-						FacetResult facetResult = facets.getTopChildren(maxLabels, dimension);
-						facetedDimension.addResult(facetResult);
-					} else {
-						// Have a specific string dimension to facet, but these should all be done at
-						// once for efficiency
-						facetStrings = true;
-					}
-				}
-				try {
-					if (sparse) {
-						// Facet all applicable string fields
-						DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
-								indexSearcher.getIndexReader());
-						Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
-						addFacetResults(maxLabels, search.dimensions, facets);
-						logger.trace("Sparse faceting found results for {} dimensions", search.dimensions.size());
-					} else if (facetStrings) {
-						// Only add facets to the results if they match one of the requested dimensions
-						DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
-								indexSearcher.getIndexReader());
-						Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
-						List<FacetResult> facetResults = facets.getAllDims(maxLabels);
-						for (FacetResult facetResult : facetResults) {
-							String dimension = facetResult.dim.replace(".keyword", "");
-							FacetedDimension facetedDimension = search.dimensions.get(dimension);
-							logger.debug("String facets found for {}, requested dimensions were {}", dimension,
-									search.dimensions.keySet());
-							if (facetedDimension != null) {
-								facetedDimension.addResult(facetResult);
-							}
-						}
-					}
-				} catch (IllegalArgumentException e) {
-					// This can occur if no fields in the index have been faceted
-					logger.error(
-							"No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
-				} catch (IllegalStateException e) {
-					// This can occur if we do not create the IndexSearcher from the same
-					// DirectoryReader as we used to create the state
-					logger.error("IndexSearcher used is not based on the DirectoryReader used for facet counting: "
-							+ e.getClass() + " " + e.getMessage());
-					throw e;
+					facetStrings = facetRanges(maxLabels, facetStrings, facetsCollector, facetedDimension);
 				}
+				facetStrings(search, maxLabels, sparse, facetStrings, indexSearcher, facetsCollector);
 			}
 		}
 		// Build results
@@ -1180,6 +1098,101 @@ private String luceneFacetResult(String name, SearchBucket search, String search
 		return aggregations;
 	}
 
+	/**
+	 * Performs range based faceting on the provided facetedDimension, if possible.
+	 * 
+	 * @param maxLabels        The maximum number of labels to collect for each
+	 *                         facet
+	 * @param facetStrings     Whether there a String dimensions that will need
+	 *                         faceting later
+	 * @param facetsCollector  Lucene FacetsCollector used to count results
+	 * @param facetedDimension Representation of the dimension to facet, and used to
+	 *                         store the results of the faceting
+	 * @return If a string dimension was encountered, returns true. Otherwise,
+	 *         returns the value of facetStrings originally passed.
+	 * @throws IOException
+	 * @throws LuceneException
+	 */
+	private boolean facetRanges(int maxLabels, boolean facetStrings, FacetsCollector facetsCollector,
+			FacetedDimension facetedDimension) throws IOException, LuceneException {
+		if (facetedDimension.getRanges().size() > 0) {
+			logger.debug("Ranges: {}", facetedDimension.getRanges().get(0).getClass().getSimpleName());
+			// Perform range based facets for a numeric field
+			String dimension = facetedDimension.getDimension();
+			Facets facets;
+			if (DocumentMapping.longFields.contains(dimension)) {
+				LongRange[] ranges = facetedDimension.getRanges().toArray(new LongRange[0]);
+				facets = new LongRangeFacetCounts(dimension, facetsCollector, ranges);
+			} else if (DocumentMapping.doubleFields.contains(dimension)) {
+				DoubleRange[] ranges = facetedDimension.getRanges().toArray(new DoubleRange[0]);
+				facets = new DoubleRangeFacetCounts(dimension, facetsCollector, ranges);
+			} else {
+				throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
+						"'ranges' specified for dimension " + dimension
+								+ " but this is not a supported numeric field");
+			}
+			FacetResult facetResult = facets.getTopChildren(maxLabels, dimension);
+			facetedDimension.addResult(facetResult);
+		} else {
+			// Have a specific string dimension to facet, but these should all be done at
+			// once for efficiency
+			facetStrings = true;
+		}
+		return facetStrings;
+	}
+
+	/**
+	 * Performs String based faceting. Either this will be sparse (all fields
+	 * targetted) or it will occur for specifc fields only.
+	 * 
+	 * @param search          Bucket being used for this search
+	 * @param maxLabels       The maximum number of labels to collect for each facet
+	 * @param sparse          Whether to perform sparse faceting (faceting across
+	 *                        all String fields)
+	 * @param facetStrings    Whether specific String dimensions should be faceted
+	 * @param indexSearcher   Lucene IndexSearcher used to generate the ReaderState
+	 * @param facetsCollector Lucene FacetsCollector used to count results
+	 * @throws IOException
+	 */
+	private void facetStrings(SearchBucket search, int maxLabels, boolean sparse, boolean facetStrings,
+			IndexSearcher indexSearcher, FacetsCollector facetsCollector) throws IOException {
+		try {
+			if (sparse) {
+				// Facet all applicable string fields
+				DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
+						indexSearcher.getIndexReader());
+				Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
+				addFacetResults(maxLabels, search.dimensions, facets);
+				logger.trace("Sparse faceting found results for {} dimensions", search.dimensions.size());
+			} else if (facetStrings) {
+				// Only add facets to the results if they match one of the requested dimensions
+				DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
+						indexSearcher.getIndexReader());
+				Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
+				List<FacetResult> facetResults = facets.getAllDims(maxLabels);
+				for (FacetResult facetResult : facetResults) {
+					String dimension = facetResult.dim.replace(".keyword", "");
+					FacetedDimension facetedDimension = search.dimensions.get(dimension);
+					logger.debug("String facets found for {}, requested dimensions were {}", dimension,
+							search.dimensions.keySet());
+					if (facetedDimension != null) {
+						facetedDimension.addResult(facetResult);
+					}
+				}
+			}
+		} catch (IllegalArgumentException e) {
+			// This can occur if no fields in the index have been faceted
+			logger.error(
+					"No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
+		} catch (IllegalStateException e) {
+			// This can occur if we do not create the IndexSearcher from the same
+			// DirectoryReader as we used to create the state
+			logger.error("IndexSearcher used is not based on the DirectoryReader used for facet counting: "
+					+ e.getClass() + " " + e.getMessage());
+			throw e;
+		}
+	}
+
 	/**
 	 * Add Facets for all dimensions. This will create FacetDimension Objects if the
 	 * do not already exist in the facetedDimensionMap, otherwise the counts for
@@ -1207,6 +1220,36 @@ private void addFacetResults(int maxLabels, Map<String, FacetedDimension> facete
 		}
 	}
 
+	/**
+	 * Perform search on the specified entity/index.
+	 * 
+	 * @param request     Incoming Http request containing the query as Json.
+	 * @param searchAfter String of Json representing the last Lucene Document from
+	 *                    a previous search.
+	 * @param maxResults  The maximum number of results to include in the returned
+	 *                    Json.
+	 * @param sort        String of Json representing the sort criteria.
+	 * @param searchType  The type of search query to build, corresponding to one of
+	 *                    the main entities.
+	 * @return String of Json representing the results of the search.
+	 * @throws LuceneException
+	 */
+	private String searchEntity(HttpServletRequest request, String searchAfter, int maxResults, String sort,
+			SearchType searchType)
+			throws LuceneException {
+		Long uid = null;
+		try {
+			uid = bucketNum.getAndIncrement();
+			SearchBucket search = new SearchBucket(this, searchType, request, sort, searchAfter);
+			searches.put(uid, search);
+			return luceneSearchResult(searchType.toString(), search, searchAfter, maxResults);
+		} catch (IOException | QueryNodeException e) {
+			logger.error("Error", e);
+			freeSearcher(uid);
+			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+		}
+	}
+
 	/**
 	 * Perform search on name.
 	 * 
@@ -1235,79 +1278,7 @@ private String luceneSearchResult(String name, SearchBucket search, String searc
 			maxScore = hits[0].score;
 		}
 		logger.debug("{} maxscore {}", totalHits, maxScore);
-		ByteArrayOutputStream baos = new ByteArrayOutputStream();
-		int shardIndex = -1;
-		try (JsonGenerator gen = Json.createGenerator(baos)) {
-			gen.writeStartObject();
-			gen.writeStartArray("results");
-			for (ScoreDoc hit : hits) {
-				shardIndex = hit.shardIndex;
-				encodeResult(name, gen, hit, searchers.get(shardIndex), search);
-			}
-			gen.writeEnd(); // array results
-			if (hits.length == maxResults) {
-				ScoreDoc lastDoc = hits[hits.length - 1];
-				shardIndex = lastDoc.shardIndex;
-				gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex", shardIndex);
-				float lastScore = lastDoc.score;
-				if (!Float.isNaN(lastScore)) {
-					gen.write("score", lastScore);
-				}
-				if (fields != null) {
-					Document lastDocument = searchers.get(shardIndex).doc(lastDoc.doc);
-					gen.writeStartArray("fields");
-					for (SortField sortField : fields) {
-						String fieldName = sortField.getField();
-						if (fieldName == null) {
-							// SCORE sorting will have a null fieldName
-							if (Float.isFinite(lastDoc.score)) {
-								gen.write(lastDoc.score);
-							}
-							continue;
-						}
-						IndexableField indexableField = lastDocument.getField(fieldName);
-						if (indexableField == null) {
-							throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field " + fieldName
-									+ " used for sorting was not present on the Lucene Document; all sortable fields must also be stored.");
-						}
-						Type type = (sortField instanceof SortedNumericSortField)
-								? ((SortedNumericSortField) sortField).getNumericType()
-								: sortField.getType();
-						switch (type) {
-							case LONG:
-								if (indexableField.numericValue() != null) {
-									gen.write(indexableField.numericValue().longValue());
-								} else if (indexableField.stringValue() != null) {
-									gen.write(new Long(indexableField.stringValue()));
-								}
-								break;
-							case DOUBLE:
-								if (indexableField.numericValue() != null) {
-									gen.write(indexableField.numericValue().doubleValue());
-								} else if (indexableField.stringValue() != null) {
-									gen.write(new Double(indexableField.stringValue()));
-								}
-								break;
-							case STRING:
-								gen.write(indexableField.stringValue());
-								break;
-							default:
-								throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
-										"SortField.Type must be one of LONG, DOUBLE, STRING, but it was " + type);
-						}
-					}
-					gen.writeEnd(); // end "fields" array
-				}
-				gen.writeEnd(); // end "search_after" object
-			}
-			gen.writeEnd(); // end enclosing object
-		} catch (ArrayIndexOutOfBoundsException e) {
-			String message = "Attempting to access searcher with shardIndex " + shardIndex + ", but only have "
-					+ searchers.size() + " searchers in total";
-			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, message);
-		}
-		logger.trace("Json returned {}", baos.toString());
-		return baos.toString();
+		return encodeResults(name, search, maxResults, searchers, hits, fields);
 	}
 
 	/**
@@ -1367,6 +1338,113 @@ private TopFieldDocs searchShards(SearchBucket search, int maxResults, List<Shar
 		}
 	}
 
+	/**
+	 * Encodes the results of a search into Json.
+	 * 
+	 * @param name       Entity/index that has been searched search
+	 * @param search     SearchBucket containing the search query, dimensions to
+	 *                   facet etc.
+	 * @param maxResults The maximum number of results from the search
+	 * @param searchers  List of IndexSearchers for the given name
+	 * @param hits       Array of the scored hits from the search
+	 * @param fields     SortFields that were used to sort the hits
+	 * @return String of Json encoded results
+	 * @throws IOException
+	 * @throws LuceneException
+	 */
+	private String encodeResults(String name, SearchBucket search, int maxResults, List<IndexSearcher> searchers,
+			ScoreDoc[] hits, SortField[] fields) throws IOException, LuceneException {
+		ByteArrayOutputStream baos = new ByteArrayOutputStream();
+		int shardIndex = -1;
+		try (JsonGenerator gen = Json.createGenerator(baos)) {
+			gen.writeStartObject();
+			gen.writeStartArray("results");
+			for (ScoreDoc hit : hits) {
+				shardIndex = hit.shardIndex;
+				encodeResult(name, gen, hit, searchers.get(shardIndex), search);
+			}
+			gen.writeEnd(); // array results
+			if (hits.length == maxResults) {
+				ScoreDoc lastDoc = hits[hits.length - 1];
+				shardIndex = lastDoc.shardIndex;
+				gen.writeStartObject("search_after").write("doc", lastDoc.doc).write("shardIndex", shardIndex);
+				float lastScore = lastDoc.score;
+				if (!Float.isNaN(lastScore)) {
+					gen.write("score", lastScore);
+				}
+				if (fields != null) {
+					Document lastDocument = searchers.get(shardIndex).doc(lastDoc.doc);
+					gen.writeStartArray("fields");
+					for (SortField sortField : fields) {
+						encodeSearchAfterField(gen, sortField, lastDoc, lastDocument);
+					}
+					gen.writeEnd(); // end "fields" array
+				}
+				gen.writeEnd(); // end "search_after" object
+			}
+			gen.writeEnd(); // end enclosing object
+		} catch (ArrayIndexOutOfBoundsException e) {
+			String message = "Attempting to access searcher with shardIndex " + shardIndex + ", but only have "
+					+ searchers.size() + " searchers in total";
+			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, message);
+		}
+		logger.trace("Json returned {}", baos);
+		return baos.toString();
+	}
+
+	/**
+	 * Encodes a single SortField used in the search into the Json as to enable the
+	 * ability to "search after" the last result of a previous search.
+	 * 
+	 * @param gen          JsonGenerator used to encode the results
+	 * @param sortField    SortField used to sort the hits
+	 * @param lastDoc      The final scored hit of the search
+	 * @param lastDocument The full Document corresponding to the last hit of the
+	 *                     search
+	 * @throws LuceneException
+	 */
+	private void encodeSearchAfterField(JsonGenerator gen, SortField sortField, ScoreDoc lastDoc, Document lastDocument)
+			throws LuceneException {
+		String fieldName = sortField.getField();
+		if (fieldName == null) {
+			// SCORE sorting will have a null fieldName
+			if (Float.isFinite(lastDoc.score)) {
+				gen.write(lastDoc.score);
+			}
+			return;
+		}
+		IndexableField indexableField = lastDocument.getField(fieldName);
+		if (indexableField == null) {
+			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, "Field " + fieldName
+					+ " used for sorting was not present on the Lucene Document; all sortable fields must also be stored.");
+		}
+		Type type = (sortField instanceof SortedNumericSortField)
+				? ((SortedNumericSortField) sortField).getNumericType()
+				: sortField.getType();
+		switch (type) {
+			case LONG:
+				if (indexableField.numericValue() != null) {
+					gen.write(indexableField.numericValue().longValue());
+				} else if (indexableField.stringValue() != null) {
+					gen.write(new Long(indexableField.stringValue()));
+				}
+				break;
+			case DOUBLE:
+				if (indexableField.numericValue() != null) {
+					gen.write(indexableField.numericValue().doubleValue());
+				} else if (indexableField.stringValue() != null) {
+					gen.write(new Double(indexableField.stringValue()));
+				}
+				break;
+			case STRING:
+				gen.write(indexableField.stringValue());
+				break;
+			default:
+				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR,
+						"SortField.Type must be one of LONG, DOUBLE, STRING, but it was " + type);
+		}
+	}
+
 	/**
 	 * Builds a Lucene Document from the parsed json.
 	 * 
@@ -1376,43 +1454,22 @@ private TopFieldDocs searchShards(SearchBucket search, int maxResults, List<Shar
 	private Document parseDocument(JsonObject json) {
 		Document document = new Document();
 		for (String key : json.keySet()) {
-			addField(json, document, key);
+			Field field = new Field(json, key);
+			field.addToDocument(document);
+			convertUnits(json, document, key);
 		}
 		return document;
 	}
 
 	/**
-	 * Extracts key/value pairs from json to add to the Lucene document.
+	 * If key is "type.units", all relevant numeric fields are converted to SI units
+	 * and added to the document.
 	 * 
-	 * @param json     JsonObject containing the field/value pairs to be added.
-	 * @param document Lucene Document being built.
-	 * @param key      Name of the field to be added.
+	 * @param json     A JsonObject representing the Document to be built
+	 * @param document The new Document being built
+	 * @param key      A key present in json
 	 */
-	private void addField(JsonObject json, Document document, String key) {
-		// SortedDocValuesField need to be indexed in addition to indexing a Field for
-		// searching/storing, so deal with that first
-		addSortField(json, document, key);
-
-		// Likewise, faceted fields should be considered separately
-		if (DocumentMapping.facetFields.contains(key)) {
-			document.add(new SortedSetDocValuesFacetField(key + ".keyword", json.getString(key)));
-			document.add(new StringField(key + ".keyword", json.getString(key), Store.NO));
-		}
-
-		if (DocumentMapping.doubleFields.contains(key)) {
-			Double value = json.getJsonNumber(key).doubleValue();
-			document.add(new DoublePoint(key, value));
-			document.add(new StoredField(key, value));
-		} else if (DocumentMapping.longFields.contains(key)) {
-			Long value = json.getJsonNumber(key).longValueExact();
-			document.add(new LongPoint(key, value));
-			document.add(new StoredField(key, value));
-		} else if (DocumentMapping.textFields.contains(key)) {
-			document.add(new TextField(key, json.getString(key), Store.YES));
-		} else {
-			document.add(new StringField(key, json.getString(key), Store.YES));
-		}
-
+	private void convertUnits(JsonObject json, Document document, String key) {
 		// Whenever the units are set or changed, convert to SI
 		if (key.equals("type.units")) {
 			String unitString = json.getString("type.units");
@@ -1456,98 +1513,6 @@ private void convertValue(Document document, JsonObject json, String unitString,
 		}
 	}
 
-	/**
-	 * Adds field to document taking its typing, sorting and faceting into account.
-	 * 
-	 * @param field    Lucene IndexableField to add to the document.
-	 * @param document Lucene Document to add the field to.
-	 */
-	private void addField(IndexableField field, Document document) {
-		// SortedDocValuesField need to be indexed in addition to indexing a Field for
-		// searching/storing, so deal with that first
-		addSortField(field, document);
-		String key = field.name();
-
-		// Likewise, faceted fields should be considered separately
-		if (DocumentMapping.facetFields.contains(key)) {
-			String value = field.stringValue();
-			document.add(new SortedSetDocValuesFacetField(key + ".keyword", value));
-			document.add(new StringField(key + ".keyword", value, Store.NO));
-		}
-
-		if (DocumentMapping.doubleFields.contains(key)) {
-			Double value = field.numericValue().doubleValue();
-			document.add(new DoublePoint(key, value));
-			document.add(new StoredField(key, value));
-		} else if (DocumentMapping.longFields.contains(key)) {
-			Long value = field.numericValue().longValue();
-			document.add(new LongPoint(key, value));
-			document.add(new StoredField(key, value));
-		} else if (DocumentMapping.textFields.contains(key)) {
-			document.add(new TextField(key, field.stringValue(), Store.YES));
-		} else {
-			document.add(new StringField(key, field.stringValue(), Store.YES));
-		}
-	}
-
-	/**
-	 * Extracts key/value pairs from json to add to the Lucene document. Handles
-	 * fields which need to be sortable.
-	 * 
-	 * @param json     JsonObject containing the field/value pairs to be added.
-	 * @param document Lucene Document being built.
-	 * @param key      Name of the field to be added.
-	 */
-	private void addSortField(JsonObject json, Document document, String key) {
-		if (DocumentMapping.sortFields.contains(key)) {
-			if (key.equals("id")) {
-				// Id is a special case, as we need to to be SORTED as a byte ref to allow joins
-				// but also SORTED_NUMERIC to ensure a deterministic order to results
-				Long value = new Long(json.getString(key));
-				document.add(new NumericDocValuesField("id.long", value));
-				document.add(new StoredField("id.long", value));
-				document.add(new LongPoint("id.long", value));
-			}
-			if (DocumentMapping.longFields.contains(key)) {
-				document.add(new NumericDocValuesField(key, json.getJsonNumber(key).longValueExact()));
-			} else if (DocumentMapping.doubleFields.contains(key)) {
-				long sortableLong = NumericUtils.doubleToSortableLong(json.getJsonNumber(key).doubleValue());
-				document.add(new NumericDocValuesField(key, sortableLong));
-			} else {
-				document.add(new SortedDocValuesField(key, new BytesRef(json.getString(key))));
-			}
-		}
-	}
-
-	/**
-	 * Re-adds the content of a Lucene IndexableField to a Lucene Document. This is
-	 * needed when updating Documents to ensure sorting is not lost.
-	 * 
-	 * @param field    Lucene IndexableField to be re-added to the document.
-	 * @param document Lucene Document being built.
-	 */
-	private void addSortField(IndexableField field, Document document) {
-		String key = field.name();
-		if (DocumentMapping.sortFields.contains(key)) {
-			if (key.equals("id")) {
-				// Id is a special case, as we need to to be SORTED as a byte ref to allow joins
-				// but also SORTED_NUMERIC to ensure a deterministic order to results
-				Long value = new Long(field.stringValue());
-				document.add(new NumericDocValuesField("id.long", value));
-				document.add(new StoredField("id.long", value));
-				document.add(new LongPoint("id.long", value));
-			}
-			if (DocumentMapping.longFields.contains(key)) {
-				document.add(new NumericDocValuesField(key, field.numericValue().longValue()));
-			} else if (DocumentMapping.doubleFields.contains(key)) {
-				long sortableLong = NumericUtils.doubleToSortableLong(field.numericValue().doubleValue());
-				document.add(new NumericDocValuesField(key, sortableLong));
-			} else {
-				document.add(new SortedDocValuesField(key, new BytesRef(field.stringValue())));
-			}
-		}
-	}
-
 	/**
 	 * Returns a new Lucene Document that has the same fields as were present in
 	 * oldDocument, except in cases where json has an entry for that field. In this
@@ -1562,10 +1527,13 @@ private Document updateDocument(JsonObject json, Document oldDocument) {
 		Document newDocument = new Document();
 		for (IndexableField field : oldDocument.getFields()) {
 			String fieldName = field.name();
-			if (json.keySet().contains(fieldName)) {
-				addField(json, newDocument, fieldName);
+			if (json.containsKey(fieldName)) {
+				Field jsonField = new Field(json, fieldName);
+				jsonField.addToDocument(newDocument);
+				convertUnits(json, newDocument, fieldName);
 			} else {
-				addSortField(field, newDocument);
+				Field sortField = new Field(field);
+				sortField.addSortable(newDocument);
 				newDocument.add(field);
 			}
 		}
@@ -1585,16 +1553,17 @@ private Document pruneDocument(Set<String> fields, Document oldDocument) {
 		Document newDocument = new Document();
 		for (IndexableField field : oldDocument.getFields()) {
 			if (!fields.contains(field.name())) {
-				addField(field, newDocument);
+				Field fieldToAdd = new Field(field);
+				fieldToAdd.addToDocument(newDocument);
 			}
 		}
 		return newDocument;
 	}
 
 	/**
-	 * Unlocks the specified index after population, commiting all pending documents
-	 * and
-	 * allowing normal modify operations again.
+	 * Unlocks the specified index after population, committing all pending
+	 * documents
+	 * and allowing normal modify operations again.
 	 * 
 	 * @param entityName Name of the entity/index to unlock.
 	 * @throws LuceneException If not locked, or if there's an IOException when
@@ -1618,8 +1587,7 @@ public void unlock(@PathParam("entityName") String entityName) throws LuceneExce
 
 	/**
 	 * Updates an existing Lucene document, provided that the target index is not
-	 * locked
-	 * for another operation.
+	 * locked for another operation.
 	 * 
 	 * @param operationBody JsonObject containing the "_index" that the new "doc"
 	 *                      should be created in.
@@ -1685,7 +1653,7 @@ private void update(JsonObject operationBody) throws LuceneException, NumberForm
 	 * @throws NumberFormatException
 	 * @throws IOException
 	 */
-	private void updateByRelation(JsonObject operationBody, Boolean delete)
+	private void updateByRelation(JsonObject operationBody, boolean delete)
 			throws LuceneException, NumberFormatException, IOException {
 		for (DocumentMapping.ParentRelationship parentRelationship : DocumentMapping.relationships
 				.get(operationBody.getString("_index"))) {
diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
index 91858a5..3a28375 100644
--- a/src/main/java/org/icatproject/lucene/SearchBucket.java
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -68,9 +68,9 @@ public enum SearchType {
     public Sort sort;
     public FieldDoc searchAfter;
     public boolean scored;
-    public Set<String> fields = new HashSet<String>();
+    public Set<String> fields = new HashSet<>();
     public Map<String, Set<String>> joinedFields = new HashMap<>();
-    public Map<String, FacetedDimension> dimensions = new HashMap<String, FacetedDimension>();
+    public Map<String, FacetedDimension> dimensions = new HashMap<>();
     private static final SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
 
     static {
@@ -100,8 +100,8 @@ public SearchBucket(Lucene lucene) {
      * @throws IOException
      * @throws QueryNodeException
      */
-    public SearchBucket(Lucene lucene, SearchType searchType, HttpServletRequest request, String sort, String searchAfter)
-            throws LuceneException, IOException, QueryNodeException {
+    public SearchBucket(Lucene lucene, SearchType searchType, HttpServletRequest request, String sort,
+            String searchAfter) throws LuceneException, IOException, QueryNodeException {
         this.lucene = lucene;
         searcherMap = new HashMap<>();
         parseSort(sort);
@@ -248,11 +248,10 @@ public SearchBucket(Lucene lucene, SearchType searchType, HttpServletRequest req
      */
     private void buildDateRanges(Builder queryBuilder, JsonObject queryJson, String lowerKey, String upperKey,
             String... fields) throws LuceneException {
-        Long lower = parseDate(queryJson, lowerKey, 0);
-        Long upper = parseDate(queryJson, upperKey, 59999);
-        if (lower != null || upper != null) {
-            lower = (lower == null) ? Long.MIN_VALUE : lower;
-            upper = (upper == null) ? Long.MAX_VALUE : upper;
+        long lower = parseDate(queryJson, lowerKey, 0);
+        long upper = parseDate(queryJson, upperKey, 59999);
+        // Only build the query if at least one of the dates is defined
+        if (lower != Long.MIN_VALUE || upper != Long.MAX_VALUE) {
             for (String field : fields) {
                 queryBuilder.add(LongPoint.newRangeQuery(field, lower, upper), Occur.MUST);
             }
@@ -281,21 +280,18 @@ private void buildFilterQueries(String target, JsonObject requestedQuery, Builde
                 String filterTarget = i == -1 ? key : key.substring(0, i);
                 String fld = key.substring(i + 1);
                 Query dimensionQuery;
-                switch (valueType) {
-                    case ARRAY:
-                        Builder builder = new BooleanQuery.Builder();
-                        // If the key was just a nested entity (no ".") then we should FILTER all of our
-                        // queries on that entity.
-                        Occur occur = i == -1 ? Occur.FILTER : Occur.SHOULD;
-                        for (JsonValue arrayValue : filterObject.getJsonArray(key)) {
-                            Query arrayQuery = parseFilter(target, fld, arrayValue);
-                            builder.add(arrayQuery, occur);
-                        }
-                        dimensionQuery = builder.build();
-                        break;
-
-                    default:
-                        dimensionQuery = parseFilter(target, fld, value);
+                if (valueType.equals(ValueType.ARRAY)) {
+                    Builder builder = new BooleanQuery.Builder();
+                    // If the key was just a nested entity (no ".") then we should FILTER all of our
+                    // queries on that entity.
+                    Occur occur = i == -1 ? Occur.FILTER : Occur.SHOULD;
+                    for (JsonValue arrayValue : filterObject.getJsonArray(key)) {
+                        Query arrayQuery = parseFilter(target, fld, arrayValue);
+                        builder.add(arrayQuery, occur);
+                    }
+                    dimensionQuery = builder.build();
+                } else {
+                    dimensionQuery = parseFilter(target, fld, value);
                 }
                 // Nest the dimension query if needed
                 if (i != -1 && !target.equals(filterTarget)) {
@@ -350,7 +346,8 @@ private Query parseFilter(String target, String fld, JsonValue value) throws IOE
                     nestedFilters.forEach(nestedFilter -> {
                         String nestedField = nestedFilter.getString("field");
                         if (nestedFilter.containsKey("value")) {
-                            TermQuery query = new TermQuery(new Term(nestedField + ".keyword", nestedFilter.getString("value")));
+                            Term term = new Term(nestedField + ".keyword", nestedFilter.getString("value"));
+                            TermQuery query = new TermQuery(term);
                             nestedBoolBuilder.add(query, Occur.FILTER);
                         } else if (nestedFilter.containsKey("exact")) {
                             buildNestedExactQuery(nestedField, nestedFilter, nestedBoolBuilder);
@@ -363,11 +360,10 @@ private Query parseFilter(String target, String fld, JsonValue value) throws IOE
                         return JoinUtil.createJoinQuery("sample.id", false, "sample.id", nestedBoolBuilder.build(),
                                 nestedSearcher, ScoreMode.None);
                     } else if (fld.equals("sampleparameter") && target.equals("investigation")) {
-                        Query sampleQuery = JoinUtil.createJoinQuery("sample.id", false, "sample.id", nestedBoolBuilder.build(),
-                                nestedSearcher, ScoreMode.None);
-                        Query investigationQuery = JoinUtil.createJoinQuery("sample.investigation.id", false, "id", sampleQuery,
+                        Query sampleQuery = JoinUtil.createJoinQuery("sample.id", false, "sample.id",
+                                nestedBoolBuilder.build(), nestedSearcher, ScoreMode.None);
+                        return JoinUtil.createJoinQuery("sample.investigation.id", false, "id", sampleQuery,
                                 lucene.getSearcher(searcherMap, "sample"), ScoreMode.None);
-                        return investigationQuery;
                     } else {
                         return JoinUtil.createJoinQuery(target + ".id", false, "id", nestedBoolBuilder.build(),
                                 nestedSearcher, ScoreMode.None);
@@ -390,7 +386,8 @@ private Query parseFilter(String target, String fld, JsonValue value) throws IOE
     }
 
     /**
-     * Builds an exact numeric query, intended for use with numeric or date/time parameters.
+     * Builds an exact numeric query, intended for use with numeric or date/time
+     * parameters.
      * 
      * @param fld         Name of the field to apply the range to.
      * @param valueObject JsonObject containing "exact", and optionally "units"
@@ -408,17 +405,23 @@ private void buildNestedExactQuery(String fld, JsonObject valueObject, BooleanQu
             String units = valueObject.getString("units", null);
             if (units != null) {
                 SystemValue exactValue = lucene.icatUnits.new SystemValue(exact, units);
-                if (exactValue.value != null ) {
+                if (exactValue.value != null) {
                     // If we were able to parse the units, apply query to the SI value
-                    rangeBuilder.add(DoublePoint.newRangeQuery("rangeTopSI", exactValue.value, Double.POSITIVE_INFINITY), Occur.FILTER);
-                    rangeBuilder.add(DoublePoint.newRangeQuery("rangeBottomSI", Double.NEGATIVE_INFINITY, exactValue.value), Occur.FILTER);
+                    rangeBuilder.add(
+                            DoublePoint.newRangeQuery("rangeTopSI", exactValue.value, Double.POSITIVE_INFINITY),
+                            Occur.FILTER);
+                    rangeBuilder.add(
+                            DoublePoint.newRangeQuery("rangeBottomSI", Double.NEGATIVE_INFINITY, exactValue.value),
+                            Occur.FILTER);
                     exactOrRangeBuilder.add(rangeBuilder.build(), Occur.SHOULD);
                     exactOrRangeBuilder.add(DoublePoint.newExactQuery(fld + "SI", exactValue.value), Occur.SHOULD);
                     builder.add(exactOrRangeBuilder.build(), Occur.FILTER);
                 } else {
                     // If units could not be parsed, make them part of the query on the raw data
-                    rangeBuilder.add(DoublePoint.newRangeQuery("rangeTop", exact, Double.POSITIVE_INFINITY), Occur.FILTER);
-                    rangeBuilder.add(DoublePoint.newRangeQuery("rangeBottom", Double.NEGATIVE_INFINITY, exact), Occur.FILTER);
+                    rangeBuilder.add(DoublePoint.newRangeQuery("rangeTop", exact, Double.POSITIVE_INFINITY),
+                            Occur.FILTER);
+                    rangeBuilder.add(DoublePoint.newRangeQuery("rangeBottom", Double.NEGATIVE_INFINITY, exact),
+                            Occur.FILTER);
                     exactOrRangeBuilder.add(rangeBuilder.build(), Occur.SHOULD);
                     exactOrRangeBuilder.add(DoublePoint.newExactQuery(fld, exact), Occur.SHOULD);
                     builder.add(exactOrRangeBuilder.build(), Occur.FILTER);
@@ -427,7 +430,8 @@ private void buildNestedExactQuery(String fld, JsonObject valueObject, BooleanQu
             } else {
                 // If units were not provided, just apply to the raw data
                 rangeBuilder.add(DoublePoint.newRangeQuery("rangeTop", exact, Double.POSITIVE_INFINITY), Occur.FILTER);
-                rangeBuilder.add(DoublePoint.newRangeQuery("rangeBottom", Double.NEGATIVE_INFINITY, exact), Occur.FILTER);
+                rangeBuilder.add(DoublePoint.newRangeQuery("rangeBottom", Double.NEGATIVE_INFINITY, exact),
+                        Occur.FILTER);
                 exactOrRangeBuilder.add(rangeBuilder.build(), Occur.SHOULD);
                 exactOrRangeBuilder.add(DoublePoint.newExactQuery(fld, exact), Occur.SHOULD);
                 builder.add(exactOrRangeBuilder.build(), Occur.FILTER);
@@ -509,16 +513,12 @@ private void buildUserNameQuery(String userName, BooleanQuery.Builder luceneQuer
      * Converts String into number of ms since epoch.
      * 
      * @param value String representing a Date in the format "yyyyMMddHHmm".
-     * @return Number of ms since epoch, or null if value was null
+     * @return Number of ms since epoch.
      * @throws java.text.ParseException
      */
-    protected static Long decodeTime(String value) throws java.text.ParseException {
-        if (value == null) {
-            return null;
-        } else {
-            synchronized (df) {
-                return df.parse(value).getTime();
-            }
+    protected static long decodeTime(String value) throws java.text.ParseException {
+        synchronized (df) {
+            return df.parse(value).getTime();
         }
     }
 
@@ -551,7 +551,7 @@ private Query maybeEmptyQuery(Builder luceneQuery) {
      * @throws LuceneException If the ValueType is not NUMBER or STRING, or if a
      *                         STRING value cannot be parsed.
      */
-    private Long parseDate(JsonObject jsonObject, String key, int offset) throws LuceneException {
+    private long parseDate(JsonObject jsonObject, String key, int offset) throws LuceneException {
         if (jsonObject.containsKey(key)) {
             ValueType valueType = jsonObject.get(key).getValueType();
             switch (valueType) {
@@ -570,7 +570,13 @@ private Long parseDate(JsonObject jsonObject, String key, int offset) throws Luc
                             "Dates should be represented by a NUMBER or STRING JsonValue, but got " + valueType);
             }
         }
-        return null;
+        // If the key wasn't present, use eiter MIN_VALUE or MAX_VALUE based on whether
+        // we need to offset the date. This is useful for half open ranges.
+        if (offset == 0) {
+            return Long.MIN_VALUE;
+        } else {
+            return Long.MAX_VALUE;
+        }
     }
 
     /**
@@ -586,7 +592,7 @@ private void parseDimensions(JsonObject jsonObject) throws LuceneException {
             for (JsonObject dimensionObject : dimensionObjects) {
                 if (!dimensionObject.containsKey("dimension")) {
                     throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
-                            "'dimension' not specified for facet request " + dimensionObject.toString());
+                            "'dimension' not specified for facet request " + dimensionObject);
                 }
                 String dimension = dimensionObject.getString("dimension");
                 FacetedDimension facetDimensionRequest = new FacetedDimension(dimension);
@@ -595,15 +601,15 @@ private void parseDimensions(JsonObject jsonObject) throws LuceneException {
                     List<JsonObject> jsonRanges = dimensionObject.getJsonArray("ranges").getValuesAs(JsonObject.class);
                     if (DocumentMapping.longFields.contains(dimension)) {
                         for (JsonObject range : jsonRanges) {
-                            Long lower = Long.MIN_VALUE;
-                            Long upper = Long.MAX_VALUE;
+                            long lower = Long.MIN_VALUE;
+                            long upper = Long.MAX_VALUE;
                             if (range.containsKey("from")) {
                                 lower = range.getJsonNumber("from").longValueExact();
                             }
                             if (range.containsKey("to")) {
                                 upper = range.getJsonNumber("to").longValueExact();
                             }
-                            String label = lower.toString() + "-" + upper.toString();
+                            String label = lower + "-" + upper;
                             if (range.containsKey("key")) {
                                 label = range.getString("key");
                             }
@@ -611,15 +617,15 @@ private void parseDimensions(JsonObject jsonObject) throws LuceneException {
                         }
                     } else if (DocumentMapping.doubleFields.contains(dimension)) {
                         for (JsonObject range : jsonRanges) {
-                            Double lower = Double.MIN_VALUE;
-                            Double upper = Double.MAX_VALUE;
+                            double lower = Double.MIN_VALUE;
+                            double upper = Double.MAX_VALUE;
                             if (range.containsKey("from")) {
                                 lower = range.getJsonNumber("from").doubleValue();
                             }
                             if (range.containsKey("to")) {
                                 upper = range.getJsonNumber("to").doubleValue();
                             }
-                            String label = lower.toString() + "-" + upper.toString();
+                            String label = lower + "-" + upper;
                             if (range.containsKey("key")) {
                                 label = range.getString("key");
                             }
@@ -749,8 +755,8 @@ private Builder parseParameter(JsonValue p) throws LuceneException {
         } else if (parameter.containsKey("lowerDateValue") && parameter.containsKey("upperDateValue")) {
             buildDateRanges(paramQuery, parameter, "lowerDateValue", "upperDateValue", "dateTimeValue");
         } else if (parameter.containsKey("lowerNumericValue") && parameter.containsKey("upperNumericValue")) {
-            Double pLowerNumericValue = parameter.getJsonNumber("lowerNumericValue").doubleValue();
-            Double pUpperNumericValue = parameter.getJsonNumber("upperNumericValue").doubleValue();
+            double pLowerNumericValue = parameter.getJsonNumber("lowerNumericValue").doubleValue();
+            double pUpperNumericValue = parameter.getJsonNumber("upperNumericValue").doubleValue();
             paramQuery.add(DoublePoint.newRangeQuery("numericValue", pLowerNumericValue, pUpperNumericValue),
                     Occur.MUST);
         }
@@ -848,7 +854,7 @@ public void parseSort(String sortString) throws LuceneException {
             List<SortField> fields = new ArrayList<>();
             for (String key : object.keySet()) {
                 String order = object.getString(key);
-                Boolean reverse;
+                boolean reverse;
                 if (order.equals("asc")) {
                     reverse = false;
                 } else if (order.equals("desc")) {

From 4a7e9db7755edd7a5e8ad1a1e547a807c46457fd Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 12 Oct 2022 19:16:37 +0100
Subject: [PATCH 58/73] run.properties settings updates #18

---
 src/main/config/run.properties.example | 9 +++++----
 src/main/resources/run.properties      | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/main/config/run.properties.example b/src/main/config/run.properties.example
index 4aeab39..dbe555b 100644
--- a/src/main/config/run.properties.example
+++ b/src/main/config/run.properties.example
@@ -1,7 +1,8 @@
 # Real comments in this file are marked with '#' whereas commented out lines
 # are marked with '!'
 
-directory     = ${HOME}/data/lucene
-commitSeconds = 5
-maxShardSize  = 2147483648
-ip            = 127.0.0.1/32
+directory      = ${HOME}/data/search
+commitSeconds  = 5
+maxShardSize   = 2147483648
+ip             = 127.0.0.1/32
+aggregateFiles = false
diff --git a/src/main/resources/run.properties b/src/main/resources/run.properties
index 7189854..c86b66d 100644
--- a/src/main/resources/run.properties
+++ b/src/main/resources/run.properties
@@ -1,7 +1,7 @@
 # Real comments in this file are marked with '#' whereas commented out lines
 # are marked with '!'
 
-directory      = ${HOME}/data/lucene
+directory      = ${HOME}/data/search
 commitSeconds  = 5
 maxShardSize   = 2147483648
 ip             = 127.0.0.1/32

From 7e53648b90b385177a4f91c7c6cafc5c9c1122ae Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Mon, 17 Oct 2022 13:35:26 +0100
Subject: [PATCH 59/73] parse_synonyms clean up and check for null synonyms #16

---
 .../icatproject/lucene/DocumentMapping.java   |  12 +-
 .../lucene/IcatSynonymAnalyzer.java           |  22 ++--
 src/main/scripts/parse_synonyms.py            | 111 +++++++++---------
 3 files changed, 70 insertions(+), 75 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/DocumentMapping.java b/src/main/java/org/icatproject/lucene/DocumentMapping.java
index 95c023b..4eaf7f0 100644
--- a/src/main/java/org/icatproject/lucene/DocumentMapping.java
+++ b/src/main/java/org/icatproject/lucene/DocumentMapping.java
@@ -1,7 +1,5 @@
 package org.icatproject.lucene;
 
-import java.io.IOException;
-import java.text.ParseException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -35,7 +33,7 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 		}
 	}
 
-	private static Analyzer analyzer;
+	private static Analyzer analyzer  = new IcatSynonymAnalyzer();;
 
 	public static final Set<String> doubleFields = new HashSet<>();
 	public static final Set<String> facetFields = new HashSet<>();
@@ -55,14 +53,6 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 	public static final StandardQueryParser sampleParser = buildParser("sample.name", "sample.type.name");
 
 	static {
-		try {
-			// Attempt init an Analyzer which injects synonyms for searching
-			analyzer = new IcatSynonymAnalyzer();
-		} catch (IOException | ParseException e) {
-			// If synonym files cannot be parsed, default to using the same analyzer as for writing
-			analyzer = new IcatAnalyzer();
-		}
-
 		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI", "rangeTop", "rangeTopSI", "rangeBottom",
 				"rangeBottomSI"));
 		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name", "stringValue", "technique.name"));
diff --git a/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java b/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java
index 26841f1..029f8fc 100755
--- a/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java
+++ b/src/main/java/org/icatproject/lucene/IcatSynonymAnalyzer.java
@@ -22,15 +22,21 @@ public class IcatSynonymAnalyzer extends Analyzer {
 
     private SynonymMap synonyms;
 
-    public IcatSynonymAnalyzer() 
-            throws IOException, ParseException {
+    public IcatSynonymAnalyzer() {
         super();
         // Load synonyms from resource file
         InputStream in = IcatSynonymAnalyzer.class.getClassLoader().getResourceAsStream("synonym.txt");
-        BufferedReader reader = new BufferedReader(new InputStreamReader(in));
-        SolrSynonymParser parser = new SolrSynonymParser(true, true, new IcatAnalyzer());
-        parser.parse(reader);
-        synonyms = parser.build();
+        if (in != null) {
+            BufferedReader reader = new BufferedReader(new InputStreamReader(in));
+            SolrSynonymParser parser = new SolrSynonymParser(true, true, new IcatAnalyzer());
+            try {
+                parser.parse(reader);
+                synonyms = parser.build();
+            } catch (IOException | ParseException e) {
+                // If we cannot parse the synonyms, do nothing
+                // To all purposes this will now act as a plain IcatAnalyzer
+            }
+        }
     }
 
 	@Override
@@ -40,7 +46,9 @@ protected TokenStreamComponents createComponents(String fieldName) {
 		sink = new LowerCaseFilter(sink);
 		sink = new StopFilter(sink, IcatAnalyzer.SCIENTIFIC_STOP_WORDS_SET);
 		sink = new PorterStemFilter(sink);
-        sink = new SynonymGraphFilter(sink, synonyms, false);
+        if (synonyms != null) {
+            sink = new SynonymGraphFilter(sink, synonyms, false);
+        }
 		return new TokenStreamComponents(source, sink);
 	}
 }
diff --git a/src/main/scripts/parse_synonyms.py b/src/main/scripts/parse_synonyms.py
index 3ae3d55..d23d2a4 100644
--- a/src/main/scripts/parse_synonyms.py
+++ b/src/main/scripts/parse_synonyms.py
@@ -1,21 +1,21 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import csv
 import sys
 from typing import Dict, List
 
 
-def addToParents(
+def add_to_parents(
     relationships: Dict[str, Dict[str, List[str]]],
     label: str,
     parents: List[str],
-    childDepth: int
+    child_depth: int
 ):
     """
     Adds the `label` to all the entries in `relationships` that have a key in
     `parents`, then recursively calls itself to add `label` to any
-    grandparents. `childDepth` is decreased by 1 for each generation to prevent
-    exponentially large injections.
+    grandparents. `child_depth` is decreased by 1 for each generation to
+    prevent exponentially large injections.
 
     Parameters
     ----------
@@ -26,73 +26,71 @@ def addToParents(
         The term to be added to its `parents`.
     parents: List[str]
         The direct parents of the current `label`
-    childDepth: int
+    child_depth: int
         The number of generations of children to inject for each term.
         For example, a value of 2 would inject children and their children.
         0 will only add alternative terms. Negative integers will add all
         children, grandchildren, etc. Note that this may result in an
         exponentially large number of terms
     """
-    if childDepth != 0:
+    if child_depth != 0:
         for parent in parents:
             try:
                 relationships[parent]["children"].append(label)
                 # If the parent is equivalent to anything, also add label as a
-                # child of the equivalentParent
-                for equivalentParent in relationships[parent]["equivalent"]:
-                    relationships[equivalentParent]["children"].append(label)
-                addToParents(
+                # child of the equivalent_parent
+                for equivalent_parent in relationships[parent]["equivalent"]:
+                    relationships[equivalent_parent]["children"].append(label)
+                add_to_parents(
                     relationships,
                     label,
                     relationships[parent]["parents"],
-                    childDepth - 1,
+                    child_depth - 1,
                 )
             except KeyError:
                 pass
 
 
-def main(inputFile: str, outputFile: str, mode: str, maxChildDepth: int):
+def main(input_file: str, output_file: str, mode: str, max_child_depth: int):
     """
     Reads an CSV file of terminology and writes it into Solr synonym format
     for use in synonym injection. Alternative terms are always written, and the
-    number of child terms is configurable by `maxChildDepth`.
+    number of child terms is configurable by `max_child_depth`.
 
     Parameters
     ----------
-    inputFile: str
+    input_file: str
         CSV file to read ontology from.
-    outputFile: str
+    output_file: str
         Solr synonym output file.
     mode: str
         Python file mode (w, a, ...) to use when writing the output file.
-    maxChildDepth: int
+    max_child_depth: int
         The maximum number of generations of children to inject for each term.
         For example, a value of 2 would inject children and their children.
         0 will only add alternative terms. Negative integers will add all
         children, grandchildren, etc. Note that this may result in an
         exponentially large number of terms
     """
-    altIndices = []
-    parentIndices = []
-    equivalentIndices = []
-    equivalentPairs = {}
+    alt_indices = []
+    parent_indices = []
+    equivalent_indices = []
+    equivalent_pairs = {}
     relationships = {}
-    with open(inputFile) as f:
+    with open(input_file) as f:
         reader = csv.reader(f)
 
         # Dynamically determine header positions
         headers = next(reader)
         for i, header in enumerate(headers):
             if "Label" == header.strip():
-                labelIndex = i
-            # elif "Class Type" == header:
-            #     classIndex = i
+                label_index = i
             elif "Alt Label" in header.strip():
-                altIndices.append(i)
+                alt_indices.append(i)
             elif "Parent IRI" == header.strip():
-                parentIndices.append(i)
+                parent_indices.append(i)
             elif "Equivalent" == header.strip():
-                equivalentIndices.append(i)
+                equivalent_indices.append(i)
 
         for entries in reader:
             try:
@@ -101,7 +99,7 @@ def main(inputFile: str, outputFile: str, mode: str, maxChildDepth: int):
                 # If we do not have an ID, continue to the next line
                 continue
 
-            label = entries[labelIndex]
+            label = entries[label_index]
             if label in relationships.keys():
                 raise ValueError(f"Duplicate entry for label {label}")
 
@@ -111,26 +109,25 @@ def main(inputFile: str, outputFile: str, mode: str, maxChildDepth: int):
                 "equivalent": [],
                 "children": [],
             }
-            # classType = entries[classIndex]
-            for altIndex in altIndices:
-                alternativeLabel = entries[altIndex]
-                if alternativeLabel != "":
+            for alt_index in alt_indices:
+                alternative_label = entries[alt_index]
+                if alternative_label:
                     relationships[label]["alternatives"].append(
-                        alternativeLabel
+                        alternative_label
                     )
-            for parentIndex in parentIndices:
-                parent = entries[parentIndex]
-                if parent != "":
+            for parent_index in parent_indices:
+                parent = entries[parent_index]
+                if parent:
                     relationships[label]["parents"].append(parent)
-            for equivalentIndex in equivalentIndices:
-                equivalentLabel = entries[equivalentIndex]
-                if equivalentLabel != "":
-                    relationships[label]["equivalent"].append(equivalentLabel)
-                    equivalentPairs[equivalentLabel] = label
+            for equivalent_index in equivalent_indices:
+                equivalent_label = entries[equivalent_index]
+                if equivalent_label:
+                    relationships[label]["equivalent"].append(equivalent_label)
+                    equivalent_pairs[equivalent_label] = label
 
     # If A is equivalent to B, then also set B equivalent to A
     # This ensures they share all children
-    for key, value in equivalentPairs.items():
+    for key, value in equivalent_pairs.items():
         try:
             relationships[key]["equivalent"].append(value)
         except KeyError:
@@ -138,8 +135,8 @@ def main(inputFile: str, outputFile: str, mode: str, maxChildDepth: int):
 
     print(f"{len(relationships)} relationships found")
     for label, relationship in relationships.items():
-        addToParents(
-            relationships, label, relationship["parents"], maxChildDepth
+        add_to_parents(
+            relationships, label, relationship["parents"], max_child_depth
         )
 
     output = ""
@@ -147,41 +144,41 @@ def main(inputFile: str, outputFile: str, mode: str, maxChildDepth: int):
         # Only write to file if we have alternative or child terms
         if (len(relationship["alternatives"]) > 0
                 or len(relationship["children"]) > 0):
-            leftHandSide = ", ".join(
+            left_hand_side = ", ".join(
                 sorted(set([label] + relationship["alternatives"]))
             )
-            rightHandSide = ", ".join(
+            right_hand_side = ", ".join(
                 sorted(set(
                     [label]
                     + relationship["alternatives"]
                     + relationship["children"]
                 ))
             )
-            output += leftHandSide + " => " + rightHandSide + "\n"
+            output += left_hand_side + " => " + right_hand_side + "\n"
 
-    with open(outputFile, mode) as f:
+    with open(output_file, mode) as f:
         f.write(output)
 
 
 if __name__ == "__main__":
     args = sys.argv
     try:
-        inputFile = args[1]
+        input_file = args[1]
     except IndexError as e:
-        raise IndexError("inputFile to parse not provided") from e
+        raise IndexError("input_file to parse not provided") from e
     try:
-        outputFile = args[2]
+        output_file = args[2]
     except IndexError as e:
-        raise IndexError("outputFile to write to not provided") from e
+        raise IndexError("output_file to write to not provided") from e
     try:
         mode = args[3]
     except IndexError:
-        # Default to appending to the outputFile (no overwrite)
+        # Default to appending to the output_file (no overwrite)
         mode = "a"
     try:
-        maxChildDepth = int(args[4])
+        max_child_depth = int(args[4])
     except (IndexError, ValueError):
         # Default to 0 depth (only alternative terms)
-        maxChildDepth = 0
+        max_child_depth = 0
 
-    main(inputFile, outputFile, mode, maxChildDepth)
+    main(input_file, output_file, mode, max_child_depth)

From c790b5d712e25b31a838130a94e8969eff92f18d Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 21 Oct 2022 15:50:51 +0100
Subject: [PATCH 60/73] Remove returns from Field.java #18

---
 .../java/org/icatproject/lucene/Field.java    | 32 +++++++------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Field.java b/src/main/java/org/icatproject/lucene/Field.java
index 966332e..c043a8b 100644
--- a/src/main/java/org/icatproject/lucene/Field.java
+++ b/src/main/java/org/icatproject/lucene/Field.java
@@ -24,9 +24,9 @@ class Field {
 
     private abstract class InnerField {
 
-        public abstract Document addSortable(Document document) throws NumberFormatException;
+        public abstract void addSortable(Document document) throws NumberFormatException;
 
-        public abstract Document addToDocument(Document document) throws NumberFormatException;
+        public abstract void addToDocument(Document document) throws NumberFormatException;
 
     }
 
@@ -39,7 +39,7 @@ public InnerStringField(String value) {
         }
 
         @Override
-        public Document addSortable(Document document) throws NumberFormatException {
+        public void addSortable(Document document) throws NumberFormatException {
             if (DocumentMapping.sortFields.contains(name)) {
                 if (name.equals("id")) {
                     // Id is a special case, as we need to to be SORTED as a byte ref to allow joins
@@ -51,11 +51,10 @@ public Document addSortable(Document document) throws NumberFormatException {
                 }
                 document.add(new SortedDocValuesField(name, new BytesRef(value)));
             }
-            return document;
         }
 
         @Override
-        public Document addToDocument(Document document) throws NumberFormatException {
+        public void addToDocument(Document document) throws NumberFormatException {
             addSortable(document);
 
             if (DocumentMapping.facetFields.contains(name)) {
@@ -69,7 +68,6 @@ public Document addToDocument(Document document) throws NumberFormatException {
                 document.add(new StringField(name, value, Store.YES));
             }
 
-            return document;
         }
 
     }
@@ -83,19 +81,17 @@ public InnerLongField(long value) {
         }
 
         @Override
-        public Document addSortable(Document document) throws NumberFormatException {
+        public void addSortable(Document document) throws NumberFormatException {
             if (DocumentMapping.sortFields.contains(name)) {
                 document.add(new NumericDocValuesField(name, value));
             }
-            return document;
         }
 
         @Override
-        public Document addToDocument(Document document) throws NumberFormatException {
+        public void addToDocument(Document document) throws NumberFormatException {
             addSortable(document);
             document.add(new LongPoint(name, value));
             document.add(new StoredField(name, value));
-            return document;
         }
 
     }
@@ -109,20 +105,18 @@ public InnerDoubleField(double value) {
         }
 
         @Override
-        public Document addSortable(Document document) throws NumberFormatException {
+        public void addSortable(Document document) throws NumberFormatException {
             if (DocumentMapping.sortFields.contains(name)) {
                 long sortableLong = NumericUtils.doubleToSortableLong(value);
                 document.add(new NumericDocValuesField(name, sortableLong));
             }
-            return document;
         }
 
         @Override
-        public Document addToDocument(Document document) throws NumberFormatException {
+        public void addToDocument(Document document) throws NumberFormatException {
             addSortable(document);
             document.add(new DoublePoint(name, value));
             document.add(new StoredField(name, value));
-            return document;
         }
 
     }
@@ -170,11 +164,10 @@ public Field(IndexableField indexableField) {
      * field.
      * 
      * @param document The document to add to
-     * @return The original document with this field added to it
      * @throws NumberFormatException
      */
-    public Document addSortable(Document document) throws NumberFormatException {
-        return innerField.addSortable(document);
+    public void addSortable(Document document) throws NumberFormatException {
+        innerField.addSortable(document);
     }
 
     /**
@@ -183,11 +176,10 @@ public Document addSortable(Document document) throws NumberFormatException {
      * String, long or double field.
      * 
      * @param document The document to add to
-     * @return The original document with this field added to it
      * @throws NumberFormatException
      */
-    public Document addToDocument(Document document) throws NumberFormatException {
-        return innerField.addToDocument(document);
+    public void addToDocument(Document document) throws NumberFormatException {
+        innerField.addToDocument(document);
     }
 
 }

From 8662e05166c531782a92363e4a146e0003814c4a Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 24 Nov 2022 14:07:01 +0000
Subject: [PATCH 61/73] Update Lucene to 8.11.2 and remove search caching #18

---
 pom.xml                                       |  8 ++-
 .../java/org/icatproject/lucene/Lucene.java   | 61 +++++++++----------
 2 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/pom.xml b/pom.xml
index 26467e5..44b96bb 100755
--- a/pom.xml
+++ b/pom.xml
@@ -14,7 +14,7 @@
 		<repoUrl>https://repo.icatproject.org/repo</repoUrl>
 		<project.scm.id>github</project.scm.id>
 		<gitUrl>https://github.com/icatproject/icat.lucene</gitUrl>
-		<luceneVersion>8.6.0</luceneVersion>
+		<luceneVersion>8.11.2</luceneVersion>
 	</properties>
 
 	<repositories>
@@ -92,6 +92,12 @@
 			<version>${luceneVersion}</version>
 		</dependency>
 
+		<dependency>
+			<groupId>org.apache.lucene</groupId>
+			<artifactId>lucene-backward-codecs</artifactId>
+			<version>${luceneVersion}</version>
+		</dependency>
+
 		<dependency>
 			<groupId>javax</groupId>
 			<artifactId>javaee-api</artifactId>
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 41e3a9d..72e6cec 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -153,8 +153,10 @@ public ShardBucket(java.nio.file.Path shardPath) throws IOException {
 		 */
 		public int commit() throws IOException {
 			int cached = indexWriter.numRamDocs();
-			indexWriter.commit();
-			searcherManager.maybeRefreshBlocking();
+			if (cached > 0) {
+				indexWriter.commit();
+				searcherManager.maybeRefreshBlocking();
+			}
 			return cached;
 		}
 	}
@@ -344,13 +346,9 @@ public void releaseSearchers(List<IndexSearcher> subSearchers) throws IOExceptio
 	private long luceneMaxShardSize;
 	private long maxSearchTimeSeconds;
 	private boolean aggregateFiles;
-
-	private AtomicLong bucketNum = new AtomicLong();
 	private Map<String, IndexBucket> indexBuckets = new ConcurrentHashMap<>();
-
 	private Timer timer;
 
-	private Map<Long, SearchBucket> searches = new ConcurrentHashMap<>();
 	public IcatUnits icatUnits;
 
 	/**
@@ -440,11 +438,7 @@ public void clear() throws LuceneException {
 		logger.info("Requesting clear");
 
 		exit();
-		timer = new Timer("LuceneCommitTimer");
-
-		bucketNum.set(0);
 		indexBuckets.clear();
-		searches.clear();
 
 		try {
 			Files.walk(luceneDirectory, FileVisitOption.FOLLOW_LINKS).sorted(Comparator.reverseOrder())
@@ -453,7 +447,7 @@ public void clear() throws LuceneException {
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 		}
 
-		timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
+		initTimer();
 		logger.info("clear complete - ready to go again");
 
 	}
@@ -464,11 +458,12 @@ public void clear() throws LuceneException {
 	@POST
 	@Path("commit")
 	public void commit() throws LuceneException {
-		logger.debug("Requesting commit");
+		logger.debug("Requesting commit for {} IndexBuckets", indexBuckets.size());
 		try {
 			for (Entry<String, IndexBucket> entry : indexBuckets.entrySet()) {
 				IndexBucket bucket = entry.getValue();
 				if (!bucket.locked.get()) {
+					logger.info("{} is unlocked", entry.getKey());
 					bucket.commit("Synch", entry.getKey());
 				}
 			}
@@ -819,29 +814,27 @@ private void exit() {
 	public String facet(@PathParam("entityName") String entityName, @Context HttpServletRequest request,
 			@QueryParam("search_after") String searchAfter, @QueryParam("maxResults") int maxResults,
 			@QueryParam("maxLabels") int maxLabels, @QueryParam("sort") String sort) throws LuceneException {
-		Long uid = null;
+		SearchBucket search = null;
 		try {
-			uid = bucketNum.getAndIncrement();
-			SearchBucket search = new SearchBucket(this, SearchType.GENERIC, request, sort, null);
-			searches.put(uid, search);
+			search = new SearchBucket(this, SearchType.GENERIC, request, sort, null);
 			return luceneFacetResult(entityName, search, searchAfter, maxResults, maxLabels);
 		} catch (IOException | QueryNodeException e) {
 			logger.error("Error", e);
-			freeSearcher(uid);
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+		} finally {
+			freeSearcher(search);
 		}
 	}
 
 	/**
 	 * Releases all IndexSearchers associated with uid.
 	 * 
-	 * @param uid Unique Identifier for a set of IndexSearcher to be released.
+	 * @param search SearchBucket to be freed.
 	 * @throws LuceneException
 	 */
-	public void freeSearcher(Long uid) throws LuceneException {
-		if (uid != null && searches.containsKey(uid)) { // May not be set for internal calls
-			Map<String, List<IndexSearcher>> search = searches.get(uid).searcherMap;
-			for (Entry<String, List<IndexSearcher>> entry : search.entrySet()) {
+	public void freeSearcher(SearchBucket search) throws LuceneException {
+		if (search != null) {
+			for (Entry<String, List<IndexSearcher>> entry : search.searcherMap.entrySet()) {
 				String name = entry.getKey();
 				List<IndexSearcher> subReaders = entry.getValue();
 				try {
@@ -851,7 +844,6 @@ public void freeSearcher(Long uid) throws LuceneException {
 					throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
 				}
 			}
-			searches.remove(uid);
 		}
 	}
 
@@ -924,8 +916,7 @@ private void init() {
 					: 5;
 			aggregateFiles = props.getBoolean("aggregateFiles", false);
 
-			timer = new Timer("LuceneCommitTimer");
-			timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
+			initTimer();
 
 			icatUnits = new IcatUnits(props.getString("units", ""));
 
@@ -940,6 +931,14 @@ private void init() {
 				aggregateFiles);
 	}
 
+	/**
+	 * Starts a timer and schedules regular commits of the IndexWriter.
+	 */
+	private void initTimer() {
+		timer = new Timer("LuceneCommitTimer");
+		timer.schedule(new CommitTimerTask(), luceneCommitMillis, luceneCommitMillis);
+	}
+
 	class CommitTimerTask extends TimerTask {
 		@Override
 		public void run() {
@@ -1235,18 +1234,16 @@ private void addFacetResults(int maxLabels, Map<String, FacetedDimension> facete
 	 * @throws LuceneException
 	 */
 	private String searchEntity(HttpServletRequest request, String searchAfter, int maxResults, String sort,
-			SearchType searchType)
-			throws LuceneException {
-		Long uid = null;
+			SearchType searchType) throws LuceneException {
+		SearchBucket search = null;
 		try {
-			uid = bucketNum.getAndIncrement();
-			SearchBucket search = new SearchBucket(this, searchType, request, sort, searchAfter);
-			searches.put(uid, search);
+			search = new SearchBucket(this, searchType, request, sort, searchAfter);
 			return luceneSearchResult(searchType.toString(), search, searchAfter, maxResults);
 		} catch (IOException | QueryNodeException e) {
 			logger.error("Error", e);
-			freeSearcher(uid);
 			throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
+		} finally {
+			freeSearcher(search);
 		}
 	}
 

From 885b876907c6222e20154301587ad6343b084757 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 24 Nov 2022 14:14:09 +0000
Subject: [PATCH 62/73] Replace numRamDocs with hasUncommittedChanges #18

---
 src/main/java/org/icatproject/lucene/Lucene.java | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 72e6cec..641cc8e 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -152,12 +152,11 @@ public ShardBucket(java.nio.file.Path shardPath) throws IOException {
 		 * @throws IOException
 		 */
 		public int commit() throws IOException {
-			int cached = indexWriter.numRamDocs();
-			if (cached > 0) {
+			if (indexWriter.hasUncommittedChanges()) {
 				indexWriter.commit();
 				searcherManager.maybeRefreshBlocking();
 			}
-			return cached;
+			return indexWriter.numRamDocs();
 		}
 	}
 

From ee9da0295f748f8883db3ed871142b52ab06b579 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 20 Jan 2023 17:04:23 +0000
Subject: [PATCH 63/73] Cache state for facets #18

---
 .../icatproject/lucene/FacetedDimension.java  |  4 +
 .../java/org/icatproject/lucene/Lucene.java   | 78 ++++++++++++-------
 2 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/FacetedDimension.java b/src/main/java/org/icatproject/lucene/FacetedDimension.java
index 98c51c5..6f9cd3e 100644
--- a/src/main/java/org/icatproject/lucene/FacetedDimension.java
+++ b/src/main/java/org/icatproject/lucene/FacetedDimension.java
@@ -102,4 +102,8 @@ public String getDimension() {
 		return dimension;
 	}
 
+	public String toString() {
+		return dimension + ": " + labels + ", " + counts;
+	}
+
 }
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 641cc8e..8f0f639 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -107,6 +107,7 @@ private class ShardBucket {
 		private FSDirectory directory;
 		private IndexWriter indexWriter;
 		private SearcherManager searcherManager;
+		private DefaultSortedSetDocValuesReaderState state;
 		private AtomicLong documentCount;
 
 		/**
@@ -133,15 +134,10 @@ public ShardBucket(java.nio.file.Path shardPath) throws IOException {
 				logger.debug("Now have " + indexWriter.getDocStats().numDocs + " documents indexed");
 			}
 			searcherManager = new SearcherManager(indexWriter, null);
-			IndexSearcher indexSearcher = null;
-			int numDocs;
-			try {
-				indexSearcher = searcherManager.acquire();
-				numDocs = indexSearcher.getIndexReader().numDocs();
-				documentCount = new AtomicLong(numDocs);
-			} finally {
-				searcherManager.release(indexSearcher);
-			}
+			IndexSearcher indexSearcher = searcherManager.acquire();
+			int numDocs = indexSearcher.getIndexReader().numDocs();
+			documentCount = new AtomicLong(numDocs);
+			initState(indexSearcher);
 			logger.info("Created ShardBucket for directory {} with {} Documents", directory.getDirectory(), numDocs);
 		}
 
@@ -155,9 +151,28 @@ public int commit() throws IOException {
 			if (indexWriter.hasUncommittedChanges()) {
 				indexWriter.commit();
 				searcherManager.maybeRefreshBlocking();
+				initState(searcherManager.acquire());
 			}
 			return indexWriter.numRamDocs();
 		}
+
+		/**
+		 * Creates a new DefaultSortedSetDocValuesReaderState object for this shard. This can be expensive for indices with a large number of faceted dimensions and labels, so should only be done when needed.
+		 * 
+		 * @param indexSearcher The underlying reader of this searcher is used to build the state
+		 * @throws IOException
+		 */
+		private void initState(IndexSearcher indexSearcher) throws IOException {
+			try {
+				state = new DefaultSortedSetDocValuesReaderState(indexSearcher.getIndexReader());
+			} catch (IllegalArgumentException e) {
+				// This can occur if no fields in the index have been faceted
+				logger.error(
+						"No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
+			} finally {
+				searcherManager.release(indexSearcher);
+			}
+		}
 	}
 
 	/**
@@ -180,6 +195,7 @@ private class IndexBucket {
 		 */
 		public IndexBucket(String entityName) {
 			try {
+				logger.trace("Initialising bucket for {}", entityName);
 				this.entityName = entityName.toLowerCase();
 				Long shardIndex = 0L;
 				java.nio.file.Path shardPath = luceneDirectory.resolve(entityName);
@@ -208,6 +224,7 @@ public IndexBucket(String entityName) {
 		public List<IndexSearcher> acquireSearchers() throws IOException {
 			List<IndexSearcher> subSearchers = new ArrayList<>();
 			for (ShardBucket shardBucket : shardList) {
+				logger.trace("Acquiring searcher for shard");
 				subSearchers.add(shardBucket.searcherManager.acquire());
 			}
 			return subSearchers;
@@ -826,7 +843,7 @@ public String facet(@PathParam("entityName") String entityName, @Context HttpSer
 	}
 
 	/**
-	 * Releases all IndexSearchers associated with uid.
+	 * Releases all IndexSearchers associated with a SearchBucket.
 	 * 
 	 * @param search SearchBucket to be freed.
 	 * @throws LuceneException
@@ -857,8 +874,10 @@ public void freeSearcher(SearchBucket search) throws LuceneException {
 	private List<IndexSearcher> getSearchers(Map<String, List<IndexSearcher>> searcherMap, String name)
 			throws IOException {
 		String nameLowercase = name.toLowerCase();
+		logger.trace("Get searchers for {}", nameLowercase);
 		List<IndexSearcher> subSearchers = searcherMap.get(nameLowercase);
 		if (subSearchers == null) {
+			logger.trace("No searchers found for {}", nameLowercase);
 			subSearchers = indexBuckets.computeIfAbsent(nameLowercase, k -> new IndexBucket(k)).acquireSearchers();
 			searcherMap.put(nameLowercase, subSearchers);
 			logger.debug("Remember searcher for {}", nameLowercase);
@@ -1076,16 +1095,21 @@ private String luceneFacetResult(String name, SearchBucket search, String search
 			logger.warn("Cannot facet when maxResults={}, maxLabels={}, returning empty list", maxResults, maxLabels);
 		} else {
 			// Iterate over shards and aggregate the facets from each
-			List<IndexSearcher> searchers = getSearchers(search.searcherMap, name);
 			logger.debug("Faceting {} with {} after {} ", name, search.query, searchAfter);
-			for (IndexSearcher indexSearcher : searchers) {
+			List<ShardBucket> shards = getShards(name);
+			for (ShardBucket shard : shards) {
 				FacetsCollector facetsCollector = new FacetsCollector();
-				TopDocs results = FacetsCollector.search(indexSearcher, search.query, maxResults, facetsCollector);
-				logger.debug("{}", results.totalHits);
-				for (FacetedDimension facetedDimension : search.dimensions.values()) {
-					facetStrings = facetRanges(maxLabels, facetStrings, facetsCollector, facetedDimension);
+				IndexSearcher indexSearcher = shard.searcherManager.acquire();
+				try {
+					TopDocs results = FacetsCollector.search(indexSearcher, search.query, maxResults, facetsCollector);
+					logger.debug("{}", results.totalHits);
+					for (FacetedDimension facetedDimension : search.dimensions.values()) {
+						facetStrings = facetRanges(maxLabels, facetStrings, facetsCollector, facetedDimension);
+					}
+					facetStrings(search, maxLabels, sparse, facetStrings, indexSearcher, facetsCollector, shard.state);
+				} finally {
+					shard.searcherManager.release(indexSearcher);
 				}
-				facetStrings(search, maxLabels, sparse, facetStrings, indexSearcher, facetsCollector);
 			}
 		}
 		// Build results
@@ -1150,28 +1174,30 @@ private boolean facetRanges(int maxLabels, boolean facetStrings, FacetsCollector
 	 * @param facetStrings    Whether specific String dimensions should be faceted
 	 * @param indexSearcher   Lucene IndexSearcher used to generate the ReaderState
 	 * @param facetsCollector Lucene FacetsCollector used to count results
+	 * @param state           Lucene State used to count results
 	 * @throws IOException
 	 */
 	private void facetStrings(SearchBucket search, int maxLabels, boolean sparse, boolean facetStrings,
-			IndexSearcher indexSearcher, FacetsCollector facetsCollector) throws IOException {
+			IndexSearcher indexSearcher, FacetsCollector facetsCollector, DefaultSortedSetDocValuesReaderState state)
+			throws IOException {
 		try {
+			if (state == null) {
+				logger.debug("State not set, this is most likely due to not having any facetable fields");
+				return;
+			}
+			logger.trace("String faceting");
+			Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
 			if (sparse) {
 				// Facet all applicable string fields
-				DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
-						indexSearcher.getIndexReader());
-				Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
 				addFacetResults(maxLabels, search.dimensions, facets);
-				logger.trace("Sparse faceting found results for {} dimensions", search.dimensions.size());
+				logger.trace("Sparse string faceting found results for {} dimensions", search.dimensions.size());
 			} else if (facetStrings) {
 				// Only add facets to the results if they match one of the requested dimensions
-				DefaultSortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(
-						indexSearcher.getIndexReader());
-				Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
 				List<FacetResult> facetResults = facets.getAllDims(maxLabels);
 				for (FacetResult facetResult : facetResults) {
 					String dimension = facetResult.dim.replace(".keyword", "");
 					FacetedDimension facetedDimension = search.dimensions.get(dimension);
-					logger.debug("String facets found for {}, requested dimensions were {}", dimension,
+					logger.trace("String facets found for {}, requested dimensions were {}", dimension,
 							search.dimensions.keySet());
 					if (facetedDimension != null) {
 						facetedDimension.addResult(facetResult);

From 421020b5e920d81e341dbb56ee1208c57b908f0d Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Mon, 23 Jan 2023 16:21:45 +0000
Subject: [PATCH 64/73] InvestigationFacilityCycle support

---
 .../icatproject/lucene/DocumentMapping.java   |  10 +-
 .../java/org/icatproject/lucene/Lucene.java   | 140 ++++++++++--------
 2 files changed, 85 insertions(+), 65 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/DocumentMapping.java b/src/main/java/org/icatproject/lucene/DocumentMapping.java
index 95c2692..2d63fa7 100644
--- a/src/main/java/org/icatproject/lucene/DocumentMapping.java
+++ b/src/main/java/org/icatproject/lucene/DocumentMapping.java
@@ -53,17 +53,17 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 				Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate", "fileSize",
 						"fileCount"));
 		sortFields.addAll(
-				Arrays.asList("datafile.id", "dataset.id", "investigation.id", "instrument.id", "id", "sample.id",
-						"sample.investigation.id", "date", "name", "stringValue", "dateTimeValue", "numericValue",
-						"numericValueSI", "fileSize", "fileCount"));
+				Arrays.asList("datafile.id", "dataset.id", "facilitycycle.id", "investigation.id", "instrument.id",
+						"id", "sample.id", "sample.investigation.id", "date", "name", "stringValue", "dateTimeValue",
+						"numericValue", "numericValueSI", "fileSize", "fileCount"));
 		textFields.addAll(Arrays.asList("name", "visitId", "description", "location", "dataset.name",
 				"investigation.name", "instrument.name", "instrument.fullName", "datafileFormat.name", "sample.name",
 				"sample.type.name", "technique.name", "technique.description", "technique.pid", "title", "summary",
 				"facility.name", "user.fullName", "type.name", "doi"));
 
 		indexedEntities.addAll(Arrays.asList("Datafile", "Dataset", "Investigation", "DatafileParameter",
-				"DatasetParameter", "DatasetTechnique", "InstrumentScientist", "InvestigationInstrument",
-				"InvestigationParameter", "InvestigationUser", "Sample", "SampleParameter"));
+				"DatasetParameter", "DatasetTechnique", "InstrumentScientist", "InvestigationFacilityCycle",
+				"InvestigationInstrument", "InvestigationParameter", "InvestigationUser", "Sample", "SampleParameter"));
 
 		relationships.put("Instrument",
 				new ParentRelationship[] { new ParentRelationship("InvestigationInstrument", "instrument.id",
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 8f0f639..25e46a3 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -157,18 +157,23 @@ public int commit() throws IOException {
 		}
 
 		/**
-		 * Creates a new DefaultSortedSetDocValuesReaderState object for this shard. This can be expensive for indices with a large number of faceted dimensions and labels, so should only be done when needed.
+		 * Creates a new DefaultSortedSetDocValuesReaderState object for this shard.
+		 * This can be expensive for indices with a large number of faceted dimensions
+		 * and labels, so should only be done when needed.
 		 * 
-		 * @param indexSearcher The underlying reader of this searcher is used to build the state
+		 * @param indexSearcher The underlying reader of this searcher is used to build
+		 *                      the state
 		 * @throws IOException
 		 */
 		private void initState(IndexSearcher indexSearcher) throws IOException {
 			try {
 				state = new DefaultSortedSetDocValuesReaderState(indexSearcher.getIndexReader());
 			} catch (IllegalArgumentException e) {
-				// This can occur if no fields in the index have been faceted
+				// This can occur if no fields in the index have been faceted, in which case set
+				// state to null to ensure we don't (erroneously) use the old state
 				logger.error(
 						"No facets found in index, resulting in error: " + e.getClass() + " " + e.getMessage());
+				state = null;
 			} finally {
 				searcherManager.release(indexSearcher);
 			}
@@ -550,25 +555,27 @@ private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFi
 			for (ShardBucket shardBucket : indexBucket.shardList) {
 				shardBucket.commit();
 				IndexSearcher searcher = shardBucket.searcherManager.acquire();
-				Term idTerm = new Term("id", entityId);
-				TopDocs topDocs = searcher.search(new TermQuery(idTerm), 1);
-				if (topDocs.totalHits.value == 1) {
-					int docId = topDocs.scoreDocs[0].doc;
-					Document document = searcher.doc(docId);
+				try {
+					Term idTerm = new Term("id", entityId);
+					TopDocs topDocs = searcher.search(new TermQuery(idTerm), 1);
+					if (topDocs.totalHits.value == 1) {
+						int docId = topDocs.scoreDocs[0].doc;
+						Document document = searcher.doc(docId);
+						Set<String> prunedFields = new HashSet<>();
+						List<IndexableField> fieldsToAdd = new ArrayList<>();
+
+						incrementFileStatistic("fileSize", deltaFileSize, document, prunedFields, fieldsToAdd);
+						incrementFileStatistic("fileCount", deltaFileCount, document, prunedFields, fieldsToAdd);
+
+						Document newDocument = pruneDocument(prunedFields, document);
+						fieldsToAdd.forEach(field -> newDocument.add(field));
+						shardBucket.indexWriter.updateDocument(idTerm, facetsConfig.build(newDocument));
+						shardBucket.commit();
+						break;
+					}
+				} finally {
 					shardBucket.searcherManager.release(searcher);
-					Set<String> prunedFields = new HashSet<>();
-					List<IndexableField> fieldsToAdd = new ArrayList<>();
-
-					incrementFileStatistic("fileSize", deltaFileSize, document, prunedFields, fieldsToAdd);
-					incrementFileStatistic("fileCount", deltaFileCount, document, prunedFields, fieldsToAdd);
-
-					Document newDocument = pruneDocument(prunedFields, document);
-					fieldsToAdd.forEach(field -> newDocument.add(field));
-					shardBucket.indexWriter.updateDocument(idTerm, facetsConfig.build(newDocument));
-					shardBucket.commit();
-					break;
 				}
-				shardBucket.searcherManager.release(searcher);
 			}
 		}
 	}
@@ -687,21 +694,23 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 				if (aggregateFiles && entityName.equals("Datafile")) {
 					for (ShardBucket shardBucket : bucket.shardList) {
 						IndexSearcher datafileSearcher = shardBucket.searcherManager.acquire();
-						TopDocs topDocs = datafileSearcher.search(new TermQuery(term), 1);
-						if (topDocs.totalHits.value == 1) {
-							int docId = topDocs.scoreDocs[0].doc;
-							Document datasetDocument = datafileSearcher.doc(docId);
-							long sizeToSubtract = datasetDocument.getField("fileSize").numericValue().longValue();
-							if (sizeToSubtract > 0) {
-								String datasetId = datasetDocument.getField("dataset.id").stringValue();
-								String investigationId = datasetDocument.getField("investigation.id").stringValue();
-								aggregateFileSize(0, sizeToSubtract, -1, datasetId, "dataset");
-								aggregateFileSize(0, sizeToSubtract, -1, investigationId, "investigation");
+						try {
+							TopDocs topDocs = datafileSearcher.search(new TermQuery(term), 1);
+							if (topDocs.totalHits.value == 1) {
+								int docId = topDocs.scoreDocs[0].doc;
+								Document datasetDocument = datafileSearcher.doc(docId);
+								long sizeToSubtract = datasetDocument.getField("fileSize").numericValue().longValue();
+								if (sizeToSubtract > 0) {
+									String datasetId = datasetDocument.getField("dataset.id").stringValue();
+									String investigationId = datasetDocument.getField("investigation.id").stringValue();
+									aggregateFileSize(0, sizeToSubtract, -1, datasetId, "dataset");
+									aggregateFileSize(0, sizeToSubtract, -1, investigationId, "investigation");
+								}
+								break;
 							}
+						} finally {
 							shardBucket.searcherManager.release(datafileSearcher);
-							break;
 						}
-						shardBucket.searcherManager.release(datafileSearcher);
 					}
 				}
 				for (ShardBucket shardBucket : bucket.shardList) {
@@ -1036,27 +1045,31 @@ public void lock(@PathParam("entityName") String entityName, @QueryParam("minId"
 
 			for (ShardBucket shardBucket : bucket.shardList) {
 				IndexSearcher searcher = shardBucket.searcherManager.acquire();
-				Query query;
-				if (minId == null && maxId == null) {
-					query = new MatchAllDocsQuery();
-				} else {
-					if (minId == null) {
-						minId = Long.MIN_VALUE;
+				try {
+					Query query;
+					if (minId == null && maxId == null) {
+						query = new MatchAllDocsQuery();
+					} else {
+						if (minId == null) {
+							minId = Long.MIN_VALUE;
+						}
+						if (maxId == null) {
+							maxId = Long.MAX_VALUE;
+						}
+						query = LongPoint.newRangeQuery("id.long", minId + 1, maxId);
 					}
-					if (maxId == null) {
-						maxId = Long.MAX_VALUE;
+					TopDocs topDoc = searcher.search(query, 1);
+					if (topDoc.scoreDocs.length != 0) {
+						// If we have any results in the populating range, unlock and throw
+						bucket.locked.compareAndSet(true, false);
+						Document doc = searcher.doc(topDoc.scoreDocs[0].doc);
+						String id = doc.get("id");
+						String message = "While locking index, found id " + id + " in specified range";
+						logger.error(message);
+						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, message);
 					}
-					query = LongPoint.newRangeQuery("id.long", minId + 1, maxId);
-				}
-				TopDocs topDoc = searcher.search(query, 1);
-				if (topDoc.scoreDocs.length != 0) {
-					// If we have any results in the populating range, unlock and throw
-					bucket.locked.compareAndSet(true, false);
-					Document doc = searcher.doc(topDoc.scoreDocs[0].doc);
-					String id = doc.get("id");
-					String message = "While locking index, found id " + id + " in specified range";
-					logger.error(message);
-					throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, message);
+				} finally {
+					shardBucket.searcherManager.release(searcher);
 				}
 			}
 		} catch (IOException e) {
@@ -1106,6 +1119,13 @@ private String luceneFacetResult(String name, SearchBucket search, String search
 					for (FacetedDimension facetedDimension : search.dimensions.values()) {
 						facetStrings = facetRanges(maxLabels, facetStrings, facetsCollector, facetedDimension);
 					}
+					if (shard.state == null) {
+						logger.debug("State not set, this is most likely due to not having any facetable fields");
+						continue;
+					} else if (shard.state.reader != indexSearcher.getIndexReader()) {
+						logger.warn("Attempted search with outdated state, create new one from current IndexReader");
+						shard.state = new DefaultSortedSetDocValuesReaderState(indexSearcher.getIndexReader());
+					}
 					facetStrings(search, maxLabels, sparse, facetStrings, indexSearcher, facetsCollector, shard.state);
 				} finally {
 					shard.searcherManager.release(indexSearcher);
@@ -1181,10 +1201,6 @@ private void facetStrings(SearchBucket search, int maxLabels, boolean sparse, bo
 			IndexSearcher indexSearcher, FacetsCollector facetsCollector, DefaultSortedSetDocValuesReaderState state)
 			throws IOException {
 		try {
-			if (state == null) {
-				logger.debug("State not set, this is most likely due to not having any facetable fields");
-				return;
-			}
 			logger.trace("String faceting");
 			Facets facets = new SortedSetDocValuesFacetCounts(state, facetsCollector);
 			if (sparse) {
@@ -1342,12 +1358,16 @@ private TopFieldDocs searchShards(SearchBucket search, int maxResults, List<Shar
 				collector.setCollector(topFieldCollector);
 
 				IndexSearcher indexSearcher = shard.searcherManager.acquire();
-				indexSearcher.search(search.query, collector);
-				TopFieldDocs topDocs = topFieldCollector.topDocs();
-				if (search.scored) {
-					TopFieldCollector.populateScores(topDocs.scoreDocs, indexSearcher, search.query);
+				try {
+					indexSearcher.search(search.query, collector);
+					TopFieldDocs topDocs = topFieldCollector.topDocs();
+					if (search.scored) {
+						TopFieldCollector.populateScores(topDocs.scoreDocs, indexSearcher, search.query);
+					}
+					shardHits.add(topDocs);
+				} finally {
+					shard.searcherManager.release(indexSearcher);
 				}
-				shardHits.add(topDocs);
 			}
 			topFieldDocs = TopFieldDocs.merge(search.sort, 0, maxResults, shardHits.toArray(new TopFieldDocs[0]),
 					true);

From 3ce34c682fd163a8f38e133fab68d75f4410a15e Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Wed, 6 Sep 2023 15:50:29 +0000
Subject: [PATCH 65/73] Replace javax with jakarta in new files

---
 .../icatproject/lucene/FacetedDimension.java   |  4 ++--
 .../java/org/icatproject/lucene/Field.java     |  2 +-
 .../org/icatproject/lucene/SearchBucket.java   | 18 +++++++++---------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/FacetedDimension.java b/src/main/java/org/icatproject/lucene/FacetedDimension.java
index 6f9cd3e..bfd1e7f 100644
--- a/src/main/java/org/icatproject/lucene/FacetedDimension.java
+++ b/src/main/java/org/icatproject/lucene/FacetedDimension.java
@@ -3,8 +3,8 @@
 import java.util.ArrayList;
 import java.util.List;
 
-import javax.json.Json;
-import javax.json.JsonObjectBuilder;
+import jakarta.json.Json;
+import jakarta.json.JsonObjectBuilder;
 
 import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.LabelAndValue;
diff --git a/src/main/java/org/icatproject/lucene/Field.java b/src/main/java/org/icatproject/lucene/Field.java
index c043a8b..0231546 100644
--- a/src/main/java/org/icatproject/lucene/Field.java
+++ b/src/main/java/org/icatproject/lucene/Field.java
@@ -1,6 +1,6 @@
 package org.icatproject.lucene;
 
-import javax.json.JsonObject;
+import jakarta.json.JsonObject;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoublePoint;
diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
index 3a28375..e843e97 100644
--- a/src/main/java/org/icatproject/lucene/SearchBucket.java
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -15,15 +15,15 @@
 import java.util.TimeZone;
 import java.util.Map.Entry;
 
-import javax.json.Json;
-import javax.json.JsonArray;
-import javax.json.JsonNumber;
-import javax.json.JsonObject;
-import javax.json.JsonReader;
-import javax.json.JsonString;
-import javax.json.JsonValue;
-import javax.json.JsonValue.ValueType;
-import javax.servlet.http.HttpServletRequest;
+import jakarta.json.Json;
+import jakarta.json.JsonArray;
+import jakarta.json.JsonNumber;
+import jakarta.json.JsonObject;
+import jakarta.json.JsonReader;
+import jakarta.json.JsonString;
+import jakarta.json.JsonValue;
+import jakarta.json.JsonValue.ValueType;
+import jakarta.servlet.http.HttpServletRequest;
 
 import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.LongPoint;

From 453a7252fb8a7a8d5d3dd5f4b02ae3ff05c86028 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 8 Sep 2023 16:09:46 +0000
Subject: [PATCH 66/73] 3.0.0 release notes

---
 src/site/xhtml/installation.xhtml.vm | 19 +++++++++++++++++++
 src/site/xhtml/release-notes.xhtml   | 12 ++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/src/site/xhtml/installation.xhtml.vm b/src/site/xhtml/installation.xhtml.vm
index 37ec5ef..7158410 100644
--- a/src/site/xhtml/installation.xhtml.vm
+++ b/src/site/xhtml/installation.xhtml.vm
@@ -56,6 +56,11 @@
 		<dd>the interval in seconds between committing lucene changes to
 			disk and updating the index.</dd>
 
+		<dt>maxShardSize</dt>
+		<dd>The maximum number of documents to store in a single index before "sharding"
+			into an additional index. All sharded indices are searched at once when
+			performing a search. Has a maximum value of 2147483648 (max int + 1).</dd>
+
 		<dt>ip</dt>
 		<dd>Ranges of ip addresses to accept requests from. This should
 			be as restrictive as possible - just list the icats you need to
@@ -63,6 +68,20 @@
 			take the form of an IPV4 or IPV6 address followed by the number of
 			bits (starting from the most significant) to consider. For example
 			127.0.0.1/32 is the IPV4 value for localhost.</dd>
+
+		<dt>units</dt>
+		<dd>Recognised unit names/symbols. Each symbol recognised by indriya's
+			SimpleUnitFormat should be followed by a colon, and then a comma separated
+			list of units measuring the same property. If the unit is simply an alias
+			(e.g. "K: kelvin") this is sufficient. If a conversion is required, it
+			should be followed by this factor (e.g. "J: eV 1.602176634e-19"). Different
+			units can be separated by a semi-colon.</dd>
+
+		<dt>aggregateFiles</dt>
+		<dd>Aggregate file sizes/counts for Datasets and Investigations as Datafiles are
+			added or modified (i.e. in real time). This can have a significant
+			performance impact when writing to the index. If "false", icat.server can
+			instead be configured to update sizes at a regular intervals.</dd>
 	</dl>
 
 </body>
diff --git a/src/site/xhtml/release-notes.xhtml b/src/site/xhtml/release-notes.xhtml
index 1b43c6b..145a2f9 100644
--- a/src/site/xhtml/release-notes.xhtml
+++ b/src/site/xhtml/release-notes.xhtml
@@ -6,6 +6,18 @@
 
 	<h1>ICAT Lucene Server Release Notes</h1>
 
+	<h2>3.0.0</h2>
+	<p>Significant changes to the functionality and performance of searches:</p>
+	<ul>
+		<li>Ability to search on over 2 billion documents</li>
+		<li>Enable sorting on specific entity fields</li>
+		<li>"Infinitely" search the data by using the searchAfter parameter</li>
+		<li>Faceted searches</li>
+		<li>Replace single "text" field with specific fields that reflect the ICAT schema to allow field targeting</li>
+		<li>Support for unit conversion on numeric Parameters</li>
+		<li>Support for synonym injection</li>
+	</ul>
+
 	<h2>2.0.2</h2>
 	<p>Fix compatibility with indexes built by icat.lucene 1.x</p>
 

From d31e5b7c93130fee8759e4f53c07065eb0b0ec42 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Tue, 26 Sep 2023 10:07:01 +0000
Subject: [PATCH 67/73] Index id as long instead of String #18

---
 .../icatproject/lucene/DocumentMapping.java   |  23 +-
 .../java/org/icatproject/lucene/Field.java    |   8 -
 .../java/org/icatproject/lucene/Lucene.java   | 149 ++++++----
 .../org/icatproject/lucene/SearchBucket.java  | 274 ++++++++++--------
 4 files changed, 257 insertions(+), 197 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/DocumentMapping.java b/src/main/java/org/icatproject/lucene/DocumentMapping.java
index b1d88f6..4b7998a 100644
--- a/src/main/java/org/icatproject/lucene/DocumentMapping.java
+++ b/src/main/java/org/icatproject/lucene/DocumentMapping.java
@@ -33,7 +33,7 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 		}
 	}
 
-	private static Analyzer analyzer  = new IcatSynonymAnalyzer();;
+	private static Analyzer analyzer = new IcatSynonymAnalyzer();;
 
 	public static final Set<String> doubleFields = new HashSet<>();
 	public static final Set<String> facetFields = new HashSet<>();
@@ -44,12 +44,12 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 	public static final Map<String, ParentRelationship[]> relationships = new HashMap<>();
 
 	public static final StandardQueryParser genericParser = buildParser();
-	public static final StandardQueryParser datafileParser = buildParser("name", "description", "location", "datafileFormat.name", "visitId",
-	"sample.name", "sample.type.name", "doi");
-	public static final StandardQueryParser datasetParser = buildParser("name", "description", "sample.name", "sample.type.name", "type.name",
-	"visitId", "doi");
-	public static final StandardQueryParser investigationParser = buildParser("name", "visitId", "title", "summary", "facility.name",
-	"type.name", "doi");
+	public static final StandardQueryParser datafileParser = buildParser("name", "description", "location",
+			"datafileFormat.name", "visitId", "sample.name", "sample.type.name", "doi");
+	public static final StandardQueryParser datasetParser = buildParser("name", "description", "sample.name",
+			"sample.type.name", "type.name", "visitId", "doi");
+	public static final StandardQueryParser investigationParser = buildParser("name", "visitId", "title", "summary",
+			"facility.name", "type.name", "doi");
 	public static final StandardQueryParser sampleParser = buildParser("sample.name", "sample.type.name");
 
 	static {
@@ -58,10 +58,13 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name", "stringValue", "technique.name"));
 		longFields.addAll(
 				Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate", "fileSize",
-						"fileCount"));
+						"fileCount", "datafile.id", "datafileFormat.id", "dataset.id", "facility.id",
+						"facilityCycle.id", "investigation.id", "instrument.id", "id", "sample.id",
+						"sample.investigation.id", "sample.type.id", "technique.id", "type.id", "user.id"));
 		sortFields.addAll(
-				Arrays.asList("datafile.id", "dataset.id", "facilitycycle.id", "investigation.id", "instrument.id",
-						"id", "sample.id", "sample.investigation.id", "date", "name", "stringValue", "dateTimeValue",
+				Arrays.asList("datafile.id", "datafileFormat.id", "dataset.id", "facility.id", "facilityCycle.id",
+						"investigation.id", "instrument.id", "id", "sample.id", "sample.investigation.id",
+						"technique.id", "type.id", "user.id", "date", "name", "stringValue", "dateTimeValue",
 						"numericValue", "numericValueSI", "fileSize", "fileCount"));
 		textFields.addAll(Arrays.asList("name", "visitId", "description", "location", "dataset.name",
 				"investigation.name", "instrument.name", "instrument.fullName", "datafileFormat.name", "sample.name",
diff --git a/src/main/java/org/icatproject/lucene/Field.java b/src/main/java/org/icatproject/lucene/Field.java
index 0231546..ea33aa3 100644
--- a/src/main/java/org/icatproject/lucene/Field.java
+++ b/src/main/java/org/icatproject/lucene/Field.java
@@ -41,14 +41,6 @@ public InnerStringField(String value) {
         @Override
         public void addSortable(Document document) throws NumberFormatException {
             if (DocumentMapping.sortFields.contains(name)) {
-                if (name.equals("id")) {
-                    // Id is a special case, as we need to to be SORTED as a byte ref to allow joins
-                    // but also SORTED_NUMERIC to ensure a deterministic order to results
-                    Long longValue = new Long(value);
-                    document.add(new NumericDocValuesField("id.long", longValue));
-                    document.add(new StoredField("id.long", longValue));
-                    document.add(new LongPoint("id.long", longValue));
-                }
                 document.add(new SortedDocValuesField(name, new BytesRef(value)));
             }
         }
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index 3e84a52..a173aa4 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -76,7 +76,6 @@
 import org.apache.lucene.search.SortField.Type;
 import org.apache.lucene.search.TimeLimitingCollector.TimeExceededException;
 import org.apache.lucene.search.SortedNumericSortField;
-import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TimeLimitingCollector;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TopFieldCollector;
@@ -248,18 +247,29 @@ public void addDocument(Document document) throws IOException {
 		}
 
 		/**
-		 * Updates documents matching the term with the provided document.
+		 * Deletes a document from the appropriate shard for this index.
 		 * 
-		 * @param term     Term identifying the old document(s) to be updated.
-		 * @param document The document that will replace the old document(s).
+		 * @param icatId The ICAT id of the document to be deleted.
 		 * @throws IOException
 		 */
-		public void updateDocument(Term term, Document document) throws IOException {
+		public void deleteDocument(long icatId) throws IOException {
 			for (ShardBucket shardBucket : shardList) {
-				shardBucket.indexWriter.updateDocument(term, document);
+				shardBucket.indexWriter.deleteDocuments(LongPoint.newExactQuery("id", icatId));
 			}
 		}
 
+		/**
+		 * Updates the document with the provided ICAT id.
+		 * 
+		 * @param icatId   The ICAT id of the document to be updated.
+		 * @param document The document that will replace the old document.
+		 * @throws IOException
+		 */
+		public void updateDocument(long icatId, Document document) throws IOException {
+			deleteDocument(icatId);
+			addDocument(document);
+		}
+
 		/**
 		 * Creates a new ShardBucket and stores it in the shardMap.
 		 * 
@@ -485,7 +495,7 @@ public void commit() throws LuceneException {
 			for (Entry<String, IndexBucket> entry : indexBuckets.entrySet()) {
 				IndexBucket bucket = entry.getValue();
 				if (!bucket.locked.get()) {
-					logger.info("{} is unlocked", entry.getKey());
+					logger.trace("{} is unlocked", entry.getKey());
 					bucket.commit("Synch", entry.getKey());
 				}
 			}
@@ -523,9 +533,8 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 			if (aggregateFiles && entityName.equals("Datafile")) {
 				JsonNumber jsonFileSize = documentObject.getJsonNumber("fileSize");
 				if (jsonFileSize != null) {
-					String datasetId = documentObject.getString("dataset.id", null);
-					String investigationId = documentObject.getString("investigation.id", null);
-					logger.trace("Aggregating {} to {}, {}", jsonFileSize.longValue(), datasetId, investigationId);
+					JsonNumber datasetId = documentObject.getJsonNumber("dataset.id");
+					JsonNumber investigationId = documentObject.getJsonNumber("investigation.id");
 					aggregateFileSize(jsonFileSize.longValueExact(), 0, 1, datasetId, "dataset");
 					aggregateFileSize(jsonFileSize.longValueExact(), 0, 1, investigationId, "investigation");
 				}
@@ -543,22 +552,42 @@ private void create(JsonObject operationBody) throws NumberFormatException, IOEx
 	 * @param sizeToSubtract Decreases the fileSize of the entity by this much.
 	 *                       Should be 0 for creates.
 	 * @param deltaFileCount Changes the file count by this much.
-	 * @param entityId       Icat id of entity to update.
+	 * @param entityId       Icat id of entity to update as a JsonNumber.
 	 * @param index          Index (entity) to update.
 	 * @throws IOException
 	 */
-	private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFileCount, String entityId,
-			String index)
-			throws IOException {
+	private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFileCount, JsonNumber entityId,
+			String index) throws IOException {
+		if (entityId != null) {
+			aggregateFileSize(sizeToAdd, sizeToSubtract, deltaFileCount, entityId.longValueExact(), index);
+		}
+	}
+
+	/**
+	 * Changes the fileSize on an entity by the specified amount. This is used to
+	 * aggregate the individual fileSize of Datafiles up to Dataset and
+	 * Investigation sizes.
+	 * 
+	 * @param sizeToAdd      Increases the fileSize of the entity by this much.
+	 *                       Should be 0 for deletes.
+	 * @param sizeToSubtract Decreases the fileSize of the entity by this much.
+	 *                       Should be 0 for creates.
+	 * @param deltaFileCount Changes the file count by this much.
+	 * @param entityId       Icat id of entity to update as a long.
+	 * @param index          Index (entity) to update.
+	 * @throws IOException
+	 */
+	private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFileCount, long entityId,
+			String index) throws IOException {
 		long deltaFileSize = sizeToAdd - sizeToSubtract;
-		if (entityId != null && (deltaFileSize != 0 || deltaFileCount != 0)) {
+		if (deltaFileSize != 0 || deltaFileCount != 0) {
 			IndexBucket indexBucket = indexBuckets.computeIfAbsent(index, k -> new IndexBucket(k));
 			for (ShardBucket shardBucket : indexBucket.shardList) {
 				shardBucket.commit();
 				IndexSearcher searcher = shardBucket.searcherManager.acquire();
 				try {
-					Term idTerm = new Term("id", entityId);
-					TopDocs topDocs = searcher.search(new TermQuery(idTerm), 1);
+					Query idQuery = LongPoint.newExactQuery("id", entityId);
+					TopDocs topDocs = searcher.search(idQuery, 1);
 					if (topDocs.totalHits.value == 1) {
 						int docId = topDocs.scoreDocs[0].doc;
 						Document document = searcher.doc(docId);
@@ -570,7 +599,8 @@ private void aggregateFileSize(long sizeToAdd, long sizeToSubtract, long deltaFi
 
 						Document newDocument = pruneDocument(prunedFields, document);
 						fieldsToAdd.forEach(field -> newDocument.add(field));
-						shardBucket.indexWriter.updateDocument(idTerm, facetsConfig.build(newDocument));
+						shardBucket.indexWriter.deleteDocuments(idQuery);
+						shardBucket.indexWriter.addDocument(facetsConfig.build(newDocument));
 						shardBucket.commit();
 						break;
 					}
@@ -682,7 +712,7 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 			updateByRelation(operationBody, true);
 		}
 		if (DocumentMapping.indexedEntities.contains(entityName)) {
-			String icatId = operationBody.getString("_id");
+			long icatId = operationBody.getJsonNumber("_id").longValueExact();
 			try {
 				IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
 				if (bucket.locked.get()) {
@@ -690,20 +720,21 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 							"Lucene locked for " + entityName);
 				}
 				logger.trace("delete {} {}", entityName, icatId);
+				Query idQuery = LongPoint.newExactQuery("id", icatId);
 				// Special case for filesizes
-				Term term = new Term("id", icatId);
 				if (aggregateFiles && entityName.equals("Datafile")) {
 					for (ShardBucket shardBucket : bucket.shardList) {
 						IndexSearcher datafileSearcher = shardBucket.searcherManager.acquire();
 						try {
-							TopDocs topDocs = datafileSearcher.search(new TermQuery(term), 1);
+							TopDocs topDocs = datafileSearcher.search(idQuery, 1);
 							if (topDocs.totalHits.value == 1) {
 								int docId = topDocs.scoreDocs[0].doc;
 								Document datasetDocument = datafileSearcher.doc(docId);
 								long sizeToSubtract = datasetDocument.getField("fileSize").numericValue().longValue();
 								if (sizeToSubtract > 0) {
-									String datasetId = datasetDocument.getField("dataset.id").stringValue();
-									String investigationId = datasetDocument.getField("investigation.id").stringValue();
+									long datasetId = datasetDocument.getField("dataset.id").numericValue().longValue();
+									long investigationId = datasetDocument.getField("investigation.id").numericValue()
+											.longValue();
 									aggregateFileSize(0, sizeToSubtract, -1, datasetId, "dataset");
 									aggregateFileSize(0, sizeToSubtract, -1, investigationId, "investigation");
 								}
@@ -715,7 +746,7 @@ private void delete(JsonObject operationBody) throws LuceneException, IOExceptio
 					}
 				}
 				for (ShardBucket shardBucket : bucket.shardList) {
-					shardBucket.indexWriter.deleteDocuments(term);
+					shardBucket.indexWriter.deleteDocuments(idQuery);
 				}
 			} catch (IOException e) {
 				throw new LuceneException(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage());
@@ -755,20 +786,20 @@ private void encodeResult(String entityName, JsonGenerator gen, ScoreDoc hit, In
 			List<ShardBucket> shards = getShards(joinedEntityName);
 			SearchBucket joinedSearch = new SearchBucket(this);
 			String fld;
-			String parentId;
+			long parentId;
 			if (joinedEntityName.toLowerCase().contains("investigation")) {
 				fld = "investigation.id";
 				if (entityName.equalsIgnoreCase("investigation")) {
-					parentId = document.get("id");
+					parentId = document.getField("id").numericValue().longValue();
 				} else {
-					parentId = document.get("investigation.id");
+					parentId = document.getField("investigation.id").numericValue().longValue();
 				}
 			} else {
 				fld = entityName.toLowerCase() + ".id";
-				parentId = document.get("id");
+				parentId = document.getField("id").numericValue().longValue();
 			}
-			joinedSearch.query = new TermQuery(new Term(fld, parentId));
-			joinedSearch.sort = new Sort(new SortedNumericSortField("id.long", Type.LONG));
+			joinedSearch.query = LongPoint.newExactQuery(fld, parentId);
+			joinedSearch.sort = new Sort(new SortedNumericSortField("id", Type.LONG));
 			TopFieldDocs topFieldDocs = searchShards(joinedSearch, 100, shards);
 			gen.writeStartArray(joinedEntityName.toLowerCase());
 			for (ScoreDoc joinedHit : topFieldDocs.scoreDocs) {
@@ -939,7 +970,7 @@ private void init() {
 			}
 
 			luceneCommitMillis = props.getPositiveInt("commitSeconds") * 1000;
-			luceneMaxShardSize = Math.max(props.getPositiveLong("maxShardSize"), new Long(Integer.MAX_VALUE + 1));
+			luceneMaxShardSize = Math.max(props.getPositiveLong("maxShardSize"), Long.valueOf(Integer.MAX_VALUE + 1));
 			maxSearchTimeSeconds = props.has("maxSearchTimeSeconds") ? props.getPositiveLong("maxSearchTimeSeconds")
 					: 5;
 			aggregateFiles = props.getBoolean("aggregateFiles", false);
@@ -1057,14 +1088,14 @@ public void lock(@PathParam("entityName") String entityName, @QueryParam("minId"
 						if (maxId == null) {
 							maxId = Long.MAX_VALUE;
 						}
-						query = LongPoint.newRangeQuery("id.long", minId + 1, maxId);
+						query = LongPoint.newRangeQuery("id", minId + 1, maxId);
 					}
 					TopDocs topDoc = searcher.search(query, 1);
 					if (topDoc.scoreDocs.length != 0) {
 						// If we have any results in the populating range, unlock and throw
 						bucket.locked.compareAndSet(true, false);
 						Document doc = searcher.doc(topDoc.scoreDocs[0].doc);
-						String id = doc.get("id");
+						long id = doc.getField("id").numericValue().longValue();
 						String message = "While locking index, found id " + id + " in specified range";
 						logger.error(message);
 						throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, message);
@@ -1469,14 +1500,14 @@ private void encodeSearchAfterField(JsonGenerator gen, SortField sortField, Scor
 				if (indexableField.numericValue() != null) {
 					gen.write(indexableField.numericValue().longValue());
 				} else if (indexableField.stringValue() != null) {
-					gen.write(new Long(indexableField.stringValue()));
+					gen.write(Long.valueOf(indexableField.stringValue()));
 				}
 				break;
 			case DOUBLE:
 				if (indexableField.numericValue() != null) {
 					gen.write(indexableField.numericValue().doubleValue());
 				} else if (indexableField.stringValue() != null) {
-					gen.write(new Double(indexableField.stringValue()));
+					gen.write(Double.valueOf(indexableField.stringValue()));
 				}
 				break;
 			case STRING:
@@ -1511,15 +1542,18 @@ private Document parseDocument(JsonObject json) {
 	 * @param json     A JsonObject representing the Document to be built
 	 * @param document The new Document being built
 	 * @param key      A key present in json
+	 * @retrun Whether a conversion has been performed or not
 	 */
-	private void convertUnits(JsonObject json, Document document, String key) {
+	private boolean convertUnits(JsonObject json, Document document, String key) {
 		// Whenever the units are set or changed, convert to SI
 		if (key.equals("type.units")) {
 			String unitString = json.getString("type.units");
 			convertValue(document, json, unitString, "numericValue");
 			convertValue(document, json, unitString, "rangeTop");
 			convertValue(document, json, unitString, "rangeBottom");
+			return true;
 		}
+		return false;
 	}
 
 	/**
@@ -1566,20 +1600,28 @@ private void convertValue(Document document, JsonObject json, String unitString,
 	 * @param oldDocument Lucene Document to be updated.
 	 * @return Lucene Document with updated fields.
 	 */
-	private Document updateDocument(JsonObject json, Document oldDocument) {
+	private Document updateDocumentFields(JsonObject json, Document oldDocument) {
 		Document newDocument = new Document();
+		List<Field> fieldsSI = new ArrayList<>();
+		boolean hasNewUnits = false;
 		for (IndexableField field : oldDocument.getFields()) {
 			String fieldName = field.name();
 			if (json.containsKey(fieldName)) {
 				Field jsonField = new Field(json, fieldName);
 				jsonField.addToDocument(newDocument);
-				convertUnits(json, newDocument, fieldName);
+				hasNewUnits = hasNewUnits || convertUnits(json, newDocument, fieldName);
+			} else if (fieldName.endsWith("SI")) {
+				fieldsSI.add(new Field(field));
 			} else {
-				Field sortField = new Field(field);
-				sortField.addSortable(newDocument);
-				newDocument.add(field);
+				Field oldField = new Field(field);
+				oldField.addToDocument(newDocument);
 			}
 		}
+		if (!hasNewUnits) {
+			fieldsSI.forEach((field) -> {
+				field.addToDocument(newDocument);
+			});
+		}
 		return newDocument;
 	}
 
@@ -1644,7 +1686,7 @@ private void update(JsonObject operationBody) throws LuceneException, NumberForm
 			updateByRelation(operationBody, false);
 		}
 		if (DocumentMapping.indexedEntities.contains(entityName)) {
-			String icatId = operationBody.getString("_id");
+			long icatId = operationBody.getJsonNumber("_id").longValueExact();
 			JsonObject documentObject = operationBody.getJsonObject("doc");
 			Document document = parseDocument(documentObject);
 			IndexBucket bucket = indexBuckets.computeIfAbsent(entityName.toLowerCase(), k -> new IndexBucket(k));
@@ -1659,15 +1701,15 @@ private void update(JsonObject operationBody) throws LuceneException, NumberForm
 					long sizeToSubtract = 0;
 					List<IndexSearcher> datafileSearchers = bucket.acquireSearchers();
 					for (IndexSearcher datafileSearcher : datafileSearchers) {
-						TopDocs topDocs = datafileSearcher.search(new TermQuery(new Term("id", icatId)), 1);
+						TopDocs topDocs = datafileSearcher.search(LongPoint.newExactQuery("id", icatId), 1);
 						if (topDocs.totalHits.value == 1) {
 							int docId = topDocs.scoreDocs[0].doc;
 							Document datasetDocument = datafileSearcher.doc(docId);
 							sizeToSubtract = datasetDocument.getField("fileSize").numericValue().longValue();
 							long sizeToAdd = jsonFileSize.longValueExact();
 							if (sizeToAdd != sizeToSubtract) {
-								String datasetId = documentObject.getString("dataset.id", null);
-								String investigationId = documentObject.getString("investigation.id", null);
+								JsonNumber datasetId = documentObject.getJsonNumber("dataset.id");
+								JsonNumber investigationId = documentObject.getJsonNumber("investigation.id");
 								aggregateFileSize(sizeToAdd, sizeToSubtract, 0, datasetId, "dataset");
 								aggregateFileSize(sizeToAdd, sizeToSubtract, 0, investigationId, "investigation");
 							}
@@ -1677,7 +1719,7 @@ private void update(JsonObject operationBody) throws LuceneException, NumberForm
 				}
 			}
 			logger.trace("update: {}", document);
-			bucket.updateDocument(new Term("id", icatId), facetsConfig.build(document));
+			bucket.updateDocument(icatId, facetsConfig.build(document));
 		}
 	}
 
@@ -1700,7 +1742,7 @@ private void updateByRelation(JsonObject operationBody, boolean delete)
 			throws LuceneException, NumberFormatException, IOException {
 		for (DocumentMapping.ParentRelationship parentRelationship : DocumentMapping.relationships
 				.get(operationBody.getString("_index"))) {
-			String childId = operationBody.getString("_id");
+			long childId = operationBody.getJsonNumber("_id").longValueExact();
 			IndexBucket bucket = indexBuckets.computeIfAbsent(parentRelationship.parentName.toLowerCase(),
 					k -> new IndexBucket(k));
 			if (bucket.locked.get()) {
@@ -1710,18 +1752,17 @@ private void updateByRelation(JsonObject operationBody, boolean delete)
 			IndexSearcher searcher = getSearcher(new HashMap<>(), parentRelationship.parentName);
 
 			int blockSize = 10000;
-			TermQuery query = new TermQuery(new Term(parentRelationship.joiningField, childId));
-			Sort sort = new Sort(new SortField("id", Type.STRING));
+			Query query = LongPoint.newExactQuery(parentRelationship.joiningField, childId);
+			Sort sort = new Sort(new SortField("id", Type.LONG));
 			ScoreDoc[] scoreDocs = searcher.search(query, blockSize, sort).scoreDocs;
 			while (scoreDocs.length != 0) {
-				TopDocs topDocs = searcher.search(query, blockSize);
-				for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
+				for (ScoreDoc scoreDoc : scoreDocs) {
 					Document oldDocument = searcher.doc(scoreDoc.doc);
-					String parentId = oldDocument.get("id");
+					long parentId = oldDocument.getField("id").numericValue().longValue();
 					Document newDocument = delete ? pruneDocument(parentRelationship.fields, oldDocument)
-							: updateDocument(operationBody.getJsonObject("doc"), oldDocument);
+							: updateDocumentFields(operationBody.getJsonObject("doc"), oldDocument);
 					logger.trace("updateByRelation: {}", newDocument);
-					bucket.updateDocument(new Term("id", parentId), facetsConfig.build(newDocument));
+					bucket.updateDocument(parentId, facetsConfig.build(newDocument));
 				}
 				scoreDocs = searcher.searchAfter(scoreDocs[scoreDocs.length - 1], query, blockSize, sort).scoreDocs;
 			}
diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
index e843e97..18ad0c1 100644
--- a/src/main/java/org/icatproject/lucene/SearchBucket.java
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -85,6 +85,7 @@ public enum SearchType {
      */
     public SearchBucket(Lucene lucene) {
         this.lucene = lucene;
+        searcherMap = new HashMap<>();
     }
 
     /**
@@ -110,122 +111,137 @@ public SearchBucket(Lucene lucene, SearchType searchType, HttpServletRequest req
             parseFields(o);
             parseDimensions(o);
             JsonObject jsonQuery = o.getJsonObject("query");
-            BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
-            String userName;
-            String text;
             switch (searchType) {
                 case GENERIC:
-                    parseGenericQuery(jsonQuery, luceneQuery);
+                    parseGenericQuery(jsonQuery);
                     return;
                 case DATAFILE:
-                    parseSearchAfter(searchAfter);
-                    buildFilterQueries("datafile", jsonQuery, luceneQuery);
+                    parseDatafileQuery(searchAfter, jsonQuery);
+                    return;
+                case DATASET:
+                    parseDatasetQuery(searchAfter, jsonQuery);
+                    return;
+                case INVESTIGATION:
+                    parseInvestigationQuery(searchAfter, jsonQuery);
+                    return;
+            }
+        } catch (QueryNodeParseException e) {
+            String message = "Search term could not be parsed due to syntax errors";
+            throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, message);
+        }
+    }
 
-                    userName = jsonQuery.getString("user", null);
-                    if (userName != null) {
-                        buildUserNameQuery(userName, luceneQuery, "investigation.id");
-                    }
+    private void parseDatafileQuery(String searchAfter, JsonObject jsonQuery)
+            throws LuceneException, IOException, QueryNodeException {
+        BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
+        parseSearchAfter(searchAfter);
+        buildFilterQueries("datafile", jsonQuery, luceneQuery);
 
-                    text = jsonQuery.getString("text", null);
-                    if (text != null) {
-                        luceneQuery.add(DocumentMapping.datafileParser.parse(text, null), Occur.MUST);
-                    }
+        String userName = jsonQuery.getString("user", null);
+        if (userName != null) {
+            buildUserNameQuery(userName, luceneQuery, "investigation.id");
+        }
 
-                    buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "date");
+        String text = jsonQuery.getString("text", null);
+        if (text != null) {
+            luceneQuery.add(DocumentMapping.datafileParser.parse(text, null), Occur.MUST);
+        }
 
-                    if (jsonQuery.containsKey("parameters")) {
-                        JsonArray parameters = jsonQuery.getJsonArray("parameters");
-                        IndexSearcher datafileParameterSearcher = lucene.getSearcher(searcherMap, "DatafileParameter");
-                        for (JsonValue p : parameters) {
-                            BooleanQuery.Builder paramQuery = parseParameter(p);
-                            Query toQuery = JoinUtil.createJoinQuery("datafile.id", false, "id", paramQuery.build(),
-                                    datafileParameterSearcher, ScoreMode.None);
-                            luceneQuery.add(toQuery, Occur.MUST);
-                        }
-                    }
-                    query = maybeEmptyQuery(luceneQuery);
-                    return;
-                case DATASET:
-                    parseSearchAfter(searchAfter);
-                    buildFilterQueries("dataset", jsonQuery, luceneQuery);
+        buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "date");
 
-                    userName = jsonQuery.getString("user", null);
-                    if (userName != null) {
-                        buildUserNameQuery(userName, luceneQuery, "investigation.id");
-                    }
+        if (jsonQuery.containsKey("parameters")) {
+            JsonArray parameters = jsonQuery.getJsonArray("parameters");
+            IndexSearcher datafileParameterSearcher = lucene.getSearcher(searcherMap, "DatafileParameter");
+            for (JsonValue p : parameters) {
+                BooleanQuery.Builder paramQuery = parseParameter(p);
+                Query toQuery = JoinUtil.createJoinQuery("datafile.id", false, "id", Long.class, paramQuery.build(),
+                        datafileParameterSearcher, ScoreMode.None);
+                luceneQuery.add(toQuery, Occur.MUST);
+            }
+        }
+        query = maybeEmptyQuery(luceneQuery);
+    }
 
-                    text = jsonQuery.getString("text", null);
-                    if (text != null) {
-                        luceneQuery.add(DocumentMapping.datasetParser.parse(text, null), Occur.MUST);
-                    }
+    private void parseDatasetQuery(String searchAfter, JsonObject jsonQuery)
+            throws LuceneException, IOException, QueryNodeException {
+        BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
+        parseSearchAfter(searchAfter);
+        buildFilterQueries("dataset", jsonQuery, luceneQuery);
 
-                    buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "startDate", "endDate");
+        String userName = jsonQuery.getString("user", null);
+        if (userName != null) {
+            buildUserNameQuery(userName, luceneQuery, "investigation.id");
+        }
 
-                    if (jsonQuery.containsKey("parameters")) {
-                        JsonArray parameters = jsonQuery.getJsonArray("parameters");
-                        IndexSearcher parameterSearcher = lucene.getSearcher(searcherMap, "DatasetParameter");
-                        for (JsonValue p : parameters) {
-                            BooleanQuery.Builder paramQuery = parseParameter(p);
-                            Query toQuery = JoinUtil.createJoinQuery("dataset.id", false, "id", paramQuery.build(),
-                                    parameterSearcher, ScoreMode.None);
-                            luceneQuery.add(toQuery, Occur.MUST);
-                        }
-                    }
-                    query = maybeEmptyQuery(luceneQuery);
-                    return;
-                case INVESTIGATION:
-                    parseSearchAfter(searchAfter);
-                    buildFilterQueries("investigation", jsonQuery, luceneQuery);
+        String text = jsonQuery.getString("text", null);
+        if (text != null) {
+            luceneQuery.add(DocumentMapping.datasetParser.parse(text, null), Occur.MUST);
+        }
 
-                    userName = jsonQuery.getString("user", null);
-                    if (userName != null) {
-                        buildUserNameQuery(userName, luceneQuery, "id");
-                    }
+        buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "startDate", "endDate");
 
-                    text = jsonQuery.getString("text", null);
-                    if (text != null) {
-                        Builder textBuilder = new BooleanQuery.Builder();
-                        textBuilder.add(DocumentMapping.investigationParser.parse(text, null), Occur.SHOULD);
+        if (jsonQuery.containsKey("parameters")) {
+            JsonArray parameters = jsonQuery.getJsonArray("parameters");
+            IndexSearcher parameterSearcher = lucene.getSearcher(searcherMap, "DatasetParameter");
+            for (JsonValue p : parameters) {
+                BooleanQuery.Builder paramQuery = parseParameter(p);
+                Query toQuery = JoinUtil.createJoinQuery("dataset.id", false, "id", Long.class, paramQuery.build(),
+                        parameterSearcher, ScoreMode.None);
+                luceneQuery.add(toQuery, Occur.MUST);
+            }
+        }
+        query = maybeEmptyQuery(luceneQuery);
+    }
 
-                        IndexSearcher sampleSearcher = lucene.getSearcher(searcherMap, "Sample");
-                        Query joinedSampleQuery = JoinUtil.createJoinQuery("sample.investigation.id", false, "id",
-                                DocumentMapping.sampleParser.parse(text, null), sampleSearcher, ScoreMode.Avg);
-                        textBuilder.add(joinedSampleQuery, Occur.SHOULD);
-                        luceneQuery.add(textBuilder.build(), Occur.MUST);
-                    }
+    private void parseInvestigationQuery(String searchAfter, JsonObject jsonQuery)
+            throws LuceneException, IOException, QueryNodeException {
+        BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
+        parseSearchAfter(searchAfter);
+        buildFilterQueries("investigation", jsonQuery, luceneQuery);
 
-                    buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "startDate", "endDate");
-
-                    if (jsonQuery.containsKey("parameters")) {
-                        JsonArray parameters = jsonQuery.getJsonArray("parameters");
-                        IndexSearcher parameterSearcher = lucene.getSearcher(searcherMap, "InvestigationParameter");
-                        for (JsonValue p : parameters) {
-                            BooleanQuery.Builder paramQuery = parseParameter(p);
-                            Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id",
-                                    paramQuery.build(),
-                                    parameterSearcher, ScoreMode.None);
-                            luceneQuery.add(toQuery, Occur.MUST);
-                        }
-                    }
+        String userName = jsonQuery.getString("user", null);
+        if (userName != null) {
+            buildUserNameQuery(userName, luceneQuery, "id");
+        }
 
-                    String userFullName = jsonQuery.getString("userFullName", null);
-                    if (userFullName != null) {
-                        BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();
-                        userFullNameQuery.add(DocumentMapping.genericParser.parse(userFullName, "user.fullName"),
-                                Occur.MUST);
-                        IndexSearcher investigationUserSearcher = lucene.getSearcher(searcherMap, "InvestigationUser");
-                        Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id",
-                                userFullNameQuery.build(),
-                                investigationUserSearcher, ScoreMode.None);
-                        luceneQuery.add(toQuery, Occur.MUST);
-                    }
-                    query = maybeEmptyQuery(luceneQuery);
-                    return;
+        String text = jsonQuery.getString("text", null);
+        if (text != null) {
+            Builder textBuilder = new BooleanQuery.Builder();
+            textBuilder.add(DocumentMapping.investigationParser.parse(text, null), Occur.SHOULD);
+
+            IndexSearcher sampleSearcher = lucene.getSearcher(searcherMap, "Sample");
+            Query joinedSampleQuery = JoinUtil.createJoinQuery("sample.investigation.id", false, "id", Long.class,
+                    DocumentMapping.sampleParser.parse(text, null), sampleSearcher, ScoreMode.Avg);
+            textBuilder.add(joinedSampleQuery, Occur.SHOULD);
+            luceneQuery.add(textBuilder.build(), Occur.MUST);
+        }
+
+        buildDateRanges(luceneQuery, jsonQuery, "lower", "upper", "startDate", "endDate");
+
+        if (jsonQuery.containsKey("parameters")) {
+            JsonArray parameters = jsonQuery.getJsonArray("parameters");
+            IndexSearcher parameterSearcher = lucene.getSearcher(searcherMap, "InvestigationParameter");
+            for (JsonValue p : parameters) {
+                BooleanQuery.Builder paramQuery = parseParameter(p);
+                Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id", Long.class,
+                        paramQuery.build(),
+                        parameterSearcher, ScoreMode.None);
+                luceneQuery.add(toQuery, Occur.MUST);
             }
-        } catch (QueryNodeParseException e) {
-            String message = "Search term could not be parsed due to syntax errors";
-            throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST, message);
         }
+
+        String userFullName = jsonQuery.getString("userFullName", null);
+        if (userFullName != null) {
+            BooleanQuery.Builder userFullNameQuery = new BooleanQuery.Builder();
+            userFullNameQuery.add(DocumentMapping.genericParser.parse(userFullName, "user.fullName"),
+                    Occur.MUST);
+            IndexSearcher investigationUserSearcher = lucene.getSearcher(searcherMap, "InvestigationUser");
+            Query toQuery = JoinUtil.createJoinQuery("investigation.id", false, "id", Long.class,
+                    userFullNameQuery.build(),
+                    investigationUserSearcher, ScoreMode.None);
+            luceneQuery.add(toQuery, Occur.MUST);
+        }
+        query = maybeEmptyQuery(luceneQuery);
     }
 
     /**
@@ -301,10 +317,10 @@ private void buildFilterQueries(String target, JsonObject requestedQuery, Builde
                     IndexSearcher nestedSearcher = lucene.getSearcher(searcherMap, filterTarget);
                     Query nestedQuery;
                     if (filterTarget.equals("sample") && !target.equals("investigation")) {
-                        nestedQuery = JoinUtil.createJoinQuery("sample.id", false, "sample.id", dimensionQuery,
-                                nestedSearcher, ScoreMode.None);
+                        nestedQuery = JoinUtil.createJoinQuery("sample.id", false, "sample.id", Long.class,
+                                dimensionQuery, nestedSearcher, ScoreMode.None);
                     } else {
-                        nestedQuery = JoinUtil.createJoinQuery(target + ".id", false, "id", dimensionQuery,
+                        nestedQuery = JoinUtil.createJoinQuery(target + ".id", false, "id", Long.class, dimensionQuery,
                                 nestedSearcher, ScoreMode.None);
                     }
                     queryBuilder.add(nestedQuery, Occur.FILTER);
@@ -357,16 +373,16 @@ private Query parseFilter(String target, String fld, JsonValue value) throws IOE
                     });
                     if (fld.contains("sample") && !target.equals("investigation")) {
                         // Datasets and Datafiles join by sample.id on both fields
-                        return JoinUtil.createJoinQuery("sample.id", false, "sample.id", nestedBoolBuilder.build(),
-                                nestedSearcher, ScoreMode.None);
+                        return JoinUtil.createJoinQuery("sample.id", false, "sample.id", Long.class,
+                                nestedBoolBuilder.build(), nestedSearcher, ScoreMode.None);
                     } else if (fld.equals("sampleparameter") && target.equals("investigation")) {
-                        Query sampleQuery = JoinUtil.createJoinQuery("sample.id", false, "sample.id",
+                        Query sampleQuery = JoinUtil.createJoinQuery("sample.id", false, "sample.id", Long.class,
                                 nestedBoolBuilder.build(), nestedSearcher, ScoreMode.None);
-                        return JoinUtil.createJoinQuery("sample.investigation.id", false, "id", sampleQuery,
+                        return JoinUtil.createJoinQuery("sample.investigation.id", false, "id", Long.class, sampleQuery,
                                 lucene.getSearcher(searcherMap, "sample"), ScoreMode.None);
                     } else {
-                        return JoinUtil.createJoinQuery(target + ".id", false, "id", nestedBoolBuilder.build(),
-                                nestedSearcher, ScoreMode.None);
+                        return JoinUtil.createJoinQuery(target + ".id", false, "id", Long.class,
+                                nestedBoolBuilder.build(), nestedSearcher, ScoreMode.None);
                     }
                 } else {
                     // Single range of values for a field
@@ -498,11 +514,11 @@ private void buildNestedRangeQuery(String fld, JsonObject valueObject, BooleanQu
     private void buildUserNameQuery(String userName, BooleanQuery.Builder luceneQuery, String toField)
             throws IOException, LuceneException {
         TermQuery fromQuery = new TermQuery(new Term("user.name", userName));
-        Query investigationUserQuery = JoinUtil.createJoinQuery("investigation.id", false, toField, fromQuery,
-                lucene.getSearcher(searcherMap, "InvestigationUser"), ScoreMode.None);
-        Query instrumentScientistQuery = JoinUtil.createJoinQuery("instrument.id", false, "instrument.id", fromQuery,
-                lucene.getSearcher(searcherMap, "InstrumentScientist"), ScoreMode.None);
-        Query investigationInstrumentQuery = JoinUtil.createJoinQuery("investigation.id", false, toField,
+        Query investigationUserQuery = JoinUtil.createJoinQuery("investigation.id", false, toField, Long.class,
+                fromQuery, lucene.getSearcher(searcherMap, "InvestigationUser"), ScoreMode.None);
+        Query instrumentScientistQuery = JoinUtil.createJoinQuery("instrument.id", false, "instrument.id", Long.class,
+                fromQuery, lucene.getSearcher(searcherMap, "InstrumentScientist"), ScoreMode.None);
+        Query investigationInstrumentQuery = JoinUtil.createJoinQuery("investigation.id", false, toField, Long.class,
                 instrumentScientistQuery, lucene.getSearcher(searcherMap, "InvestigationInstrument"), ScoreMode.None);
         Builder userNameQueryBuilder = new BooleanQuery.Builder();
         userNameQueryBuilder.add(investigationUserQuery, Occur.SHOULD).add(investigationInstrumentQuery, Occur.SHOULD);
@@ -684,7 +700,8 @@ public void parseFields(JsonObject jsonObject) throws LuceneException {
      * @throws LuceneException If the types of the JsonValues in the query do not
      *                         match those supported by icat.lucene
      */
-    private void parseGenericQuery(JsonObject jsonQuery, BooleanQuery.Builder luceneQuery) throws LuceneException {
+    private void parseGenericQuery(JsonObject jsonQuery) throws LuceneException {
+        BooleanQuery.Builder luceneQuery = new BooleanQuery.Builder();
         for (Entry<String, JsonValue> entry : jsonQuery.entrySet()) {
             String field = entry.getKey();
             ValueType valueType = entry.getValue().getValueType();
@@ -707,19 +724,26 @@ private void parseGenericQuery(JsonObject jsonQuery, BooleanQuery.Builder lucene
                     }
                     break;
                 case ARRAY:
-                    // Only support array of String as list of ICAT ids is currently only use case
+                    Query arrayQuery;
                     JsonArray arrayValue = (JsonArray) entry.getValue();
-                    ArrayList<BytesRef> bytesArray = new ArrayList<>();
-                    String valueAsString;
-                    for (JsonValue value : arrayValue) {
-                        if (value.getValueType().equals(ValueType.STRING)) {
-                            valueAsString = ((JsonString) value).getString();
-                        } else {
-                            valueAsString = value.toString();
-                        }
-                        bytesArray.add(new BytesRef(valueAsString));
+                    ValueType arrayValueType = arrayValue.get(0).getValueType();
+                    switch (arrayValueType) {
+                        case NUMBER:
+                            ArrayList<Long> longList = new ArrayList<>();
+                            for (JsonValue value : arrayValue) {
+                                longList.add(((JsonNumber) value).longValueExact());
+                            }
+                            arrayQuery = LongPoint.newSetQuery(field, longList);
+                            break;
+                        default:
+                            ArrayList<BytesRef> bytesRefList = new ArrayList<>();
+                            for (JsonValue value : arrayValue) {
+                                bytesRefList.add(new BytesRef(((JsonString) value).getString()));
+                            }
+                            arrayQuery = new TermInSetQuery(field, bytesRefList);
+                            break;
                     }
-                    luceneQuery.add(new TermInSetQuery(field, bytesArray), Occur.MUST);
+                    luceneQuery.add(arrayQuery, Occur.MUST);
                     break;
                 default:
                     throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
@@ -846,7 +870,7 @@ private void parseSearchAfter(String searchAfter) throws LuceneException {
     public void parseSort(String sortString) throws LuceneException {
         if (sortString == null || sortString.equals("") || sortString.equals("{}")) {
             scored = true;
-            sort = new Sort(SortField.FIELD_SCORE, new SortedNumericSortField("id.long", Type.LONG));
+            sort = new Sort(SortField.FIELD_SCORE, new SortedNumericSortField("id", Type.LONG));
             return;
         }
         try (JsonReader reader = Json.createReader(new ByteArrayInputStream(sortString.getBytes()))) {
@@ -872,7 +896,7 @@ public void parseSort(String sortString) throws LuceneException {
                     fields.add(new SortField(key, Type.STRING, reverse));
                 }
             }
-            fields.add(new SortedNumericSortField("id.long", Type.LONG));
+            fields.add(new SortedNumericSortField("id", Type.LONG));
             scored = false;
             sort = new Sort(fields.toArray(new SortField[0]));
         }

From 3dc957a970765b97e6d167b1694c186d6089aad4 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 28 Sep 2023 10:00:48 +0000
Subject: [PATCH 68/73] Refactor facetable fields into run.properties #18

---
 .../icatproject/lucene/DocumentMapping.java   |  2 -
 .../java/org/icatproject/lucene/Field.java    | 13 ++++--
 .../java/org/icatproject/lucene/Lucene.java   | 15 ++++---
 .../org/icatproject/lucene/SearchBucket.java  | 40 +++++++++++--------
 src/main/resources/run.properties             |  1 +
 src/site/xhtml/installation.xhtml.vm          |  9 +++++
 6 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/DocumentMapping.java b/src/main/java/org/icatproject/lucene/DocumentMapping.java
index 4b7998a..42f0e87 100644
--- a/src/main/java/org/icatproject/lucene/DocumentMapping.java
+++ b/src/main/java/org/icatproject/lucene/DocumentMapping.java
@@ -36,7 +36,6 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 	private static Analyzer analyzer = new IcatSynonymAnalyzer();;
 
 	public static final Set<String> doubleFields = new HashSet<>();
-	public static final Set<String> facetFields = new HashSet<>();
 	public static final Set<String> longFields = new HashSet<>();
 	public static final Set<String> sortFields = new HashSet<>();
 	public static final Set<String> textFields = new HashSet<>();
@@ -55,7 +54,6 @@ public ParentRelationship(String parentName, String joiningField, String... fiel
 	static {
 		doubleFields.addAll(Arrays.asList("numericValue", "numericValueSI", "rangeTop", "rangeTopSI", "rangeBottom",
 				"rangeBottomSI"));
-		facetFields.addAll(Arrays.asList("type.name", "datafileFormat.name", "stringValue", "technique.name"));
 		longFields.addAll(
 				Arrays.asList("date", "startDate", "endDate", "dateTimeValue", "investigation.startDate", "fileSize",
 						"fileCount", "datafile.id", "datafileFormat.id", "dataset.id", "facility.id",
diff --git a/src/main/java/org/icatproject/lucene/Field.java b/src/main/java/org/icatproject/lucene/Field.java
index ea33aa3..ad24647 100644
--- a/src/main/java/org/icatproject/lucene/Field.java
+++ b/src/main/java/org/icatproject/lucene/Field.java
@@ -2,6 +2,8 @@
 
 import jakarta.json.JsonObject;
 
+import java.util.List;
+
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.LongPoint;
@@ -49,7 +51,7 @@ public void addSortable(Document document) throws NumberFormatException {
         public void addToDocument(Document document) throws NumberFormatException {
             addSortable(document);
 
-            if (DocumentMapping.facetFields.contains(name)) {
+            if (facetable) {
                 document.add(new SortedSetDocValuesFacetField(name + ".keyword", value));
                 document.add(new StringField(name + ".keyword", value, Store.NO));
             }
@@ -115,15 +117,18 @@ public void addToDocument(Document document) throws NumberFormatException {
 
     private String name;
     private InnerField innerField;
+    private boolean facetable;
 
     /**
      * Creates a wrapper for a Field.
      * 
      * @param object JsonObject containing representations of multiple fields
      * @param key    Key of a specific field in object
+     * @param facetFields List of String field names which should be stored as a facetable keyword
      */
-    public Field(JsonObject object, String key) {
+    public Field(JsonObject object, String key, List<String> facetFields) {
         name = key;
+        facetable = facetFields.contains(name);
         if (DocumentMapping.doubleFields.contains(name)) {
             innerField = new InnerDoubleField(object.getJsonNumber(name).doubleValue());
         } else if (DocumentMapping.longFields.contains(name)) {
@@ -137,9 +142,11 @@ public Field(JsonObject object, String key) {
      * Creates a wrapper for a Field.
      * 
      * @param indexableField A Lucene IndexableField
+     * @param facetFields List of String fields which should be stored as a facetable keyword
      */
-    public Field(IndexableField indexableField) {
+    public Field(IndexableField indexableField, List<String> facetFields) {
         name = indexableField.name();
+        facetable = facetFields.contains(name);
         if (DocumentMapping.doubleFields.contains(name)) {
             innerField = new InnerDoubleField(indexableField.numericValue().doubleValue());
         } else if (DocumentMapping.longFields.contains(name)) {
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index a173aa4..d0c97ca 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -381,6 +381,7 @@ public void releaseSearchers(List<IndexSearcher> subSearchers) throws IOExceptio
 	private Map<String, IndexBucket> indexBuckets = new ConcurrentHashMap<>();
 	private Timer timer;
 
+	public List<String> facetFields = new ArrayList<>();
 	public IcatUnits icatUnits;
 
 	/**
@@ -979,6 +980,10 @@ private void init() {
 
 			icatUnits = new IcatUnits(props.getString("units", ""));
 
+			String facetFieldsString = props.getString("facetFields", "");
+			for (String facetField : facetFieldsString.split("\\s+")) {
+				facetFields.add(facetField);
+			}
 		} catch (Exception e) {
 			logger.error(fatal, e.getMessage());
 			throw new IllegalStateException(e.getMessage());
@@ -1528,7 +1533,7 @@ private void encodeSearchAfterField(JsonGenerator gen, SortField sortField, Scor
 	private Document parseDocument(JsonObject json) {
 		Document document = new Document();
 		for (String key : json.keySet()) {
-			Field field = new Field(json, key);
+			Field field = new Field(json, key, facetFields);
 			field.addToDocument(document);
 			convertUnits(json, document, key);
 		}
@@ -1607,13 +1612,13 @@ private Document updateDocumentFields(JsonObject json, Document oldDocument) {
 		for (IndexableField field : oldDocument.getFields()) {
 			String fieldName = field.name();
 			if (json.containsKey(fieldName)) {
-				Field jsonField = new Field(json, fieldName);
+				Field jsonField = new Field(json, fieldName, facetFields);
 				jsonField.addToDocument(newDocument);
 				hasNewUnits = hasNewUnits || convertUnits(json, newDocument, fieldName);
 			} else if (fieldName.endsWith("SI")) {
-				fieldsSI.add(new Field(field));
+				fieldsSI.add(new Field(field, facetFields));
 			} else {
-				Field oldField = new Field(field);
+				Field oldField = new Field(field, facetFields);
 				oldField.addToDocument(newDocument);
 			}
 		}
@@ -1638,7 +1643,7 @@ private Document pruneDocument(Set<String> fields, Document oldDocument) {
 		Document newDocument = new Document();
 		for (IndexableField field : oldDocument.getFields()) {
 			if (!fields.contains(field.name())) {
-				Field fieldToAdd = new Field(field);
+				Field fieldToAdd = new Field(field, facetFields);
 				fieldToAdd.addToDocument(newDocument);
 			}
 		}
diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
index 18ad0c1..aca88fa 100644
--- a/src/main/java/org/icatproject/lucene/SearchBucket.java
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -37,6 +37,7 @@
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
@@ -708,7 +709,7 @@ private void parseGenericQuery(JsonObject jsonQuery) throws LuceneException {
             switch (valueType) {
                 case STRING:
                     JsonString stringValue = (JsonString) entry.getValue();
-                    String fld = DocumentMapping.facetFields.contains(field) ? field + ".keyword" : field;
+                    String fld = lucene.facetFields.contains(field) ? field + ".keyword" : field;
                     luceneQuery.add(new TermQuery(new Term(fld, stringValue.getString())), Occur.MUST);
                     break;
                 case NUMBER:
@@ -724,26 +725,31 @@ private void parseGenericQuery(JsonObject jsonQuery) throws LuceneException {
                     }
                     break;
                 case ARRAY:
-                    Query arrayQuery;
+                    ArrayList<Long> longList = new ArrayList<>();
+                    ArrayList<BytesRef> bytesRefList = new ArrayList<>();
                     JsonArray arrayValue = (JsonArray) entry.getValue();
-                    ValueType arrayValueType = arrayValue.get(0).getValueType();
-                    switch (arrayValueType) {
-                        case NUMBER:
-                            ArrayList<Long> longList = new ArrayList<>();
-                            for (JsonValue value : arrayValue) {
+                    for (JsonValue value : arrayValue) {
+                        ValueType arrayValueType = value.getValueType();
+                        switch (arrayValueType) {
+                            case NUMBER:
                                 longList.add(((JsonNumber) value).longValueExact());
-                            }
-                            arrayQuery = LongPoint.newSetQuery(field, longList);
-                            break;
-                        default:
-                            ArrayList<BytesRef> bytesRefList = new ArrayList<>();
-                            for (JsonValue value : arrayValue) {
+                                break;
+                            default:
                                 bytesRefList.add(new BytesRef(((JsonString) value).getString()));
-                            }
-                            arrayQuery = new TermInSetQuery(field, bytesRefList);
-                            break;
+                                break;
+                        }
+                    }
+
+                    if (longList.size() == 0 && bytesRefList.size() == 0) {
+                        query = new MatchNoDocsQuery("Tried filtering" + field + " with an empty array");
+                        return;
+                    }
+                    if (longList.size() != 0) {
+                        luceneQuery.add(LongPoint.newSetQuery(field, longList), Occur.MUST);
+                    }
+                    if (bytesRefList.size() != 0) {
+                        luceneQuery.add(new TermInSetQuery(field, bytesRefList), Occur.MUST);
                     }
-                    luceneQuery.add(arrayQuery, Occur.MUST);
                     break;
                 default:
                     throw new LuceneException(HttpURLConnection.HTTP_BAD_REQUEST,
diff --git a/src/main/resources/run.properties b/src/main/resources/run.properties
index c86b66d..5031cfb 100644
--- a/src/main/resources/run.properties
+++ b/src/main/resources/run.properties
@@ -6,4 +6,5 @@ commitSeconds  = 5
 maxShardSize   = 2147483648
 ip             = 127.0.0.1/32
 units          = J: eV 1.602176634e-19; \u2103: celsius, degC; K: kelvin
+facetFields    = datafileFormat.name instrument.name sample.type.name stringValue technique.name type.name
 aggregateFiles = false
diff --git a/src/site/xhtml/installation.xhtml.vm b/src/site/xhtml/installation.xhtml.vm
index 7158410..8d4f801 100644
--- a/src/site/xhtml/installation.xhtml.vm
+++ b/src/site/xhtml/installation.xhtml.vm
@@ -77,6 +77,15 @@
 			should be followed by this factor (e.g. "J: eV 1.602176634e-19"). Different
 			units can be separated by a semi-colon.</dd>
 
+		<dt>facetFields</dt>
+		<dd>The names of fields which should be stored as facetable. The names should
+			correspond to how the field appears in the Lucene index, which may be
+			different to how it is represented in the ICAT database due to flattening of
+			one to one relationships between entities. Accurate field names can be taken
+			from `getDoc` function(s) in icat.server. Note that in order to be available
+			at search time, the field must have been specified when indexing the
+			documents.</dd>
+
 		<dt>aggregateFiles</dt>
 		<dd>Aggregate file sizes/counts for Datasets and Investigations as Datafiles are
 			added or modified (i.e. in real time). This can have a significant

From c9f21549a39b46bf76c01b86aa364425b2520fc1 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Thu, 5 Oct 2023 14:42:53 +0000
Subject: [PATCH 69/73] Add short explanations of new properties #18

---
 src/main/config/run.properties.example | 6 ++++++
 src/main/resources/run.properties      | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/src/main/config/run.properties.example b/src/main/config/run.properties.example
index dbe555b..76b1475 100644
--- a/src/main/config/run.properties.example
+++ b/src/main/config/run.properties.example
@@ -5,4 +5,10 @@ directory      = ${HOME}/data/search
 commitSeconds  = 5
 maxShardSize   = 2147483648
 ip             = 127.0.0.1/32
+# List of units to enable conversion to SI units when querying on numerical parameters
+!units          = J: eV 1.602176634e-19; \u2103: celsius, degC; K: kelvin
+# List of fields that should be stored for facet filtering when searching
+# In order to be available, these fields must be set when indexing the data
+facetFields    = datafileFormat.name instrument.name sample.type.name stringValue technique.name type.name
+# Aggregate file sizes and counts in real time (this will have a performance impact on write operations)
 aggregateFiles = false
diff --git a/src/main/resources/run.properties b/src/main/resources/run.properties
index 5031cfb..085272d 100644
--- a/src/main/resources/run.properties
+++ b/src/main/resources/run.properties
@@ -5,6 +5,10 @@ directory      = ${HOME}/data/search
 commitSeconds  = 5
 maxShardSize   = 2147483648
 ip             = 127.0.0.1/32
+# List of units to enable conversion to SI units when querying on numerical parameters
 units          = J: eV 1.602176634e-19; \u2103: celsius, degC; K: kelvin
+# List of fields that should be stored for facet filtering when searching
+# In order to be available, these fields must be set when indexing the data
 facetFields    = datafileFormat.name instrument.name sample.type.name stringValue technique.name type.name
+# Aggregate file sizes and counts in real time (this will have a performance impact on write operations)
 aggregateFiles = false

From b6d3e602163e0b28fe66d2807b187b157f2d0e06 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 6 Oct 2023 14:27:30 +0000
Subject: [PATCH 70/73] Add special handling for InvestigationInstrument
 filters #18

---
 src/main/java/org/icatproject/lucene/SearchBucket.java | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
index aca88fa..31d12a3 100644
--- a/src/main/java/org/icatproject/lucene/SearchBucket.java
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -320,6 +320,9 @@ private void buildFilterQueries(String target, JsonObject requestedQuery, Builde
                     if (filterTarget.equals("sample") && !target.equals("investigation")) {
                         nestedQuery = JoinUtil.createJoinQuery("sample.id", false, "sample.id", Long.class,
                                 dimensionQuery, nestedSearcher, ScoreMode.None);
+                    } else if (filterTarget.toLowerCase().equals("investigationinstrument") && !target.equals("investigation")) {
+                        nestedQuery = JoinUtil.createJoinQuery("investigation.id", false, "investigation.id", Long.class, dimensionQuery,
+                                nestedSearcher, ScoreMode.None);
                     } else {
                         nestedQuery = JoinUtil.createJoinQuery(target + ".id", false, "id", Long.class, dimensionQuery,
                                 nestedSearcher, ScoreMode.None);

From 61301a251324f3925bec37954624932712ba87da Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Tue, 10 Oct 2023 09:43:17 +0000
Subject: [PATCH 71/73] Fix for Investigation Sample filtering #18

---
 src/main/java/org/icatproject/lucene/SearchBucket.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
index 31d12a3..f9b5d24 100644
--- a/src/main/java/org/icatproject/lucene/SearchBucket.java
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -317,8 +317,8 @@ private void buildFilterQueries(String target, JsonObject requestedQuery, Builde
                     // just a nested entity)
                     IndexSearcher nestedSearcher = lucene.getSearcher(searcherMap, filterTarget);
                     Query nestedQuery;
-                    if (filterTarget.equals("sample") && !target.equals("investigation")) {
-                        nestedQuery = JoinUtil.createJoinQuery("sample.id", false, "sample.id", Long.class,
+                    if (filterTarget.equals("sample") && target.equals("investigation")) {
+                        nestedQuery = JoinUtil.createJoinQuery("sample.investigation.id", false, "id", Long.class,
                                 dimensionQuery, nestedSearcher, ScoreMode.None);
                     } else if (filterTarget.toLowerCase().equals("investigationinstrument") && !target.equals("investigation")) {
                         nestedQuery = JoinUtil.createJoinQuery("investigation.id", false, "investigation.id", Long.class, dimensionQuery,

From e3f393e0d39e523fa1f3d30d655b3ad095468038 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Fri, 22 Mar 2024 11:48:04 +0000
Subject: [PATCH 72/73] Account for IcatUnits refactors

---
 .../java/org/icatproject/lucene/Lucene.java   | 24 +++++++-------
 .../org/icatproject/lucene/SearchBucket.java  | 31 ++++++++++---------
 2 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index d0c97ca..dbd3247 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -88,7 +88,7 @@
 import org.icatproject.lucene.exceptions.LuceneException;
 import org.icatproject.utils.CheckedProperties;
 import org.icatproject.utils.IcatUnits;
-import org.icatproject.utils.IcatUnits.SystemValue;
+import org.icatproject.utils.IcatUnits.Value;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.Marker;
@@ -1573,24 +1573,22 @@ private boolean convertUnits(JsonObject json, Document document, String key) {
 	 */
 	private void convertValue(Document document, JsonObject json, String unitString, String numericFieldName) {
 		IndexableField field = document.getField(numericFieldName);
-		double value;
+		double numericalValue;
 		if (field != null) {
-			value = NumericUtils.sortableLongToDouble(field.numericValue().longValue());
+			numericalValue = NumericUtils.sortableLongToDouble(field.numericValue().longValue());
 		} else if (json.containsKey(numericFieldName)) {
-			value = json.getJsonNumber(numericFieldName).doubleValue();
+			numericalValue = json.getJsonNumber(numericFieldName).doubleValue();
 		} else {
 			// If we aren't dealing with the desired numeric field don't convert
 			return;
 		}
-		logger.trace("Attempting to convert {} {}", value, unitString);
-		SystemValue systemValue = icatUnits.new SystemValue(value, unitString);
-		if (systemValue.units != null) {
-			document.add(new StringField("type.unitsSI", systemValue.units, Store.YES));
-		}
-		if (systemValue.value != null) {
-			document.add(new DoublePoint(numericFieldName + "SI", systemValue.value));
-			document.add(new StoredField(numericFieldName + "SI", systemValue.value));
-			long sortableLong = NumericUtils.doubleToSortableLong(systemValue.value);
+		logger.trace("Attempting to convert {} {}", numericalValue, unitString);
+		Value value = icatUnits.convertValueToSiUnits(numericalValue, unitString);
+		if (value != null) {
+			document.add(new StringField("type.unitsSI", value.units, Store.YES));
+			document.add(new DoublePoint(numericFieldName + "SI", value.numericalValue));
+			document.add(new StoredField(numericFieldName + "SI", value.numericalValue));
+			long sortableLong = NumericUtils.doubleToSortableLong(value.numericalValue);
 			document.add(new NumericDocValuesField(numericFieldName + "SI", sortableLong));
 		}
 	}
diff --git a/src/main/java/org/icatproject/lucene/SearchBucket.java b/src/main/java/org/icatproject/lucene/SearchBucket.java
index f9b5d24..2c51f76 100644
--- a/src/main/java/org/icatproject/lucene/SearchBucket.java
+++ b/src/main/java/org/icatproject/lucene/SearchBucket.java
@@ -52,7 +52,7 @@
 import org.apache.lucene.search.join.ScoreMode;
 import org.apache.lucene.util.BytesRef;
 import org.icatproject.lucene.exceptions.LuceneException;
-import org.icatproject.utils.IcatUnits.SystemValue;
+import org.icatproject.utils.IcatUnits.Value;
 
 /**
  * Bucket for information relating to a single search.
@@ -424,17 +424,18 @@ private void buildNestedExactQuery(String fld, JsonObject valueObject, BooleanQu
             double exact = valueObject.getJsonNumber("exact").doubleValue();
             String units = valueObject.getString("units", null);
             if (units != null) {
-                SystemValue exactValue = lucene.icatUnits.new SystemValue(exact, units);
-                if (exactValue.value != null) {
+                Value exactValue = lucene.icatUnits.convertValueToSiUnits(exact, units);
+                if (exactValue != null) {
                     // If we were able to parse the units, apply query to the SI value
-                    rangeBuilder.add(
-                            DoublePoint.newRangeQuery("rangeTopSI", exactValue.value, Double.POSITIVE_INFINITY),
-                            Occur.FILTER);
-                    rangeBuilder.add(
-                            DoublePoint.newRangeQuery("rangeBottomSI", Double.NEGATIVE_INFINITY, exactValue.value),
-                            Occur.FILTER);
+                    Query topQuery = DoublePoint.newRangeQuery("rangeTopSI", exactValue.numericalValue,
+                            Double.POSITIVE_INFINITY);
+                    Query bottomQuery = DoublePoint.newRangeQuery("rangeBottomSI", Double.NEGATIVE_INFINITY,
+                            exactValue.numericalValue);
+                    Query exactQuery = DoublePoint.newExactQuery(fld + "SI", exactValue.numericalValue);
+                    rangeBuilder.add(topQuery, Occur.FILTER);
+                    rangeBuilder.add(bottomQuery, Occur.FILTER);
                     exactOrRangeBuilder.add(rangeBuilder.build(), Occur.SHOULD);
-                    exactOrRangeBuilder.add(DoublePoint.newExactQuery(fld + "SI", exactValue.value), Occur.SHOULD);
+                    exactOrRangeBuilder.add(exactQuery, Occur.SHOULD);
                     builder.add(exactOrRangeBuilder.build(), Occur.FILTER);
                 } else {
                     // If units could not be parsed, make them part of the query on the raw data
@@ -487,11 +488,13 @@ private void buildNestedRangeQuery(String fld, JsonObject valueObject, BooleanQu
             double to = valueObject.getJsonNumber("to").doubleValue();
             String units = valueObject.getString("units", null);
             if (units != null) {
-                SystemValue fromValue = lucene.icatUnits.new SystemValue(from, units);
-                SystemValue toValue = lucene.icatUnits.new SystemValue(to, units);
-                if (fromValue.value != null && toValue.value != null) {
+                Value fromValue = lucene.icatUnits.convertValueToSiUnits(from, units);
+                Value toValue = lucene.icatUnits.convertValueToSiUnits(to, units);
+                if (fromValue != null && toValue != null) {
                     // If we were able to parse the units, apply query to the SI value
-                    builder.add(DoublePoint.newRangeQuery(fld + "SI", fromValue.value, toValue.value), Occur.FILTER);
+                    Query rangeQuery = DoublePoint.newRangeQuery(fld + "SI", fromValue.numericalValue,
+                            toValue.numericalValue);
+                    builder.add(rangeQuery, Occur.FILTER);
                 } else {
                     // If units could not be parsed, make them part of the query on the raw data
                     builder.add(DoublePoint.newRangeQuery(fld, from, to), Occur.FILTER);

From bcbe497813cd0eee886fe4964df18ccbe2c7afb8 Mon Sep 17 00:00:00 2001
From: Patrick Austin <patrick.austin@stfc.ac.uk>
Date: Mon, 8 Apr 2024 11:20:36 +0000
Subject: [PATCH 73/73] Add new properties to init logging

---
 src/main/config/run.properties.example           |  2 ++
 src/main/java/org/icatproject/lucene/Lucene.java | 14 +++++++++-----
 src/main/resources/run.properties                |  2 ++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/main/config/run.properties.example b/src/main/config/run.properties.example
index 76b1475..7702881 100644
--- a/src/main/config/run.properties.example
+++ b/src/main/config/run.properties.example
@@ -5,6 +5,8 @@ directory      = ${HOME}/data/search
 commitSeconds  = 5
 maxShardSize   = 2147483648
 ip             = 127.0.0.1/32
+# A search taking longer than this will be cancelled to avoid blocking other users' searches
+maxSearchTimeSeconds = 5
 # List of units to enable conversion to SI units when querying on numerical parameters
 !units          = J: eV 1.602176634e-19; \u2103: celsius, degC; K: kelvin
 # List of fields that should be stored for facet filtering when searching
diff --git a/src/main/java/org/icatproject/lucene/Lucene.java b/src/main/java/org/icatproject/lucene/Lucene.java
index dbd3247..31efaea 100755
--- a/src/main/java/org/icatproject/lucene/Lucene.java
+++ b/src/main/java/org/icatproject/lucene/Lucene.java
@@ -962,6 +962,8 @@ private List<ShardBucket> getShards(String name) {
 	private void init() {
 		logger.info("Initialising icat.lucene");
 		CheckedProperties props = new CheckedProperties();
+		String unitsString;
+		int commitSeconds;
 		try {
 			props.loadFromResource("run.properties");
 
@@ -970,7 +972,8 @@ private void init() {
 				throw new Exception(luceneDirectory + " is not a directory");
 			}
 
-			luceneCommitMillis = props.getPositiveInt("commitSeconds") * 1000;
+			commitSeconds = props.getPositiveInt("commitSeconds");
+			luceneCommitMillis = commitSeconds * 1000;
 			luceneMaxShardSize = Math.max(props.getPositiveLong("maxShardSize"), Long.valueOf(Integer.MAX_VALUE + 1));
 			maxSearchTimeSeconds = props.has("maxSearchTimeSeconds") ? props.getPositiveLong("maxSearchTimeSeconds")
 					: 5;
@@ -978,7 +981,8 @@ private void init() {
 
 			initTimer();
 
-			icatUnits = new IcatUnits(props.getString("units", ""));
+			unitsString = props.getString("units", "");
+			icatUnits = new IcatUnits(unitsString);
 
 			String facetFieldsString = props.getString("facetFields", "");
 			for (String facetField : facetFieldsString.split("\\s+")) {
@@ -990,9 +994,9 @@ private void init() {
 		}
 
 		String format = "Initialised icat.lucene with directory {}, commitSeconds {}, maxShardSize {}, "
-				+ "maxSearchTimeSeconds {}, aggregateFiles {}";
-		logger.info(format, luceneDirectory, luceneCommitMillis, luceneMaxShardSize, maxSearchTimeSeconds,
-				aggregateFiles);
+				+ "maxSearchTimeSeconds {}, aggregateFiles {}, units {}, facetFields {}";
+		logger.info(format, luceneDirectory, commitSeconds, luceneMaxShardSize, maxSearchTimeSeconds,
+				aggregateFiles, unitsString, facetFields);
 	}
 
 	/**
diff --git a/src/main/resources/run.properties b/src/main/resources/run.properties
index 085272d..0e3c2ed 100644
--- a/src/main/resources/run.properties
+++ b/src/main/resources/run.properties
@@ -5,6 +5,8 @@ directory      = ${HOME}/data/search
 commitSeconds  = 5
 maxShardSize   = 2147483648
 ip             = 127.0.0.1/32
+# A search taking longer than this will be cancelled to avoid blocking other users' searches
+maxSearchTimeSeconds = 5
 # List of units to enable conversion to SI units when querying on numerical parameters
 units          = J: eV 1.602176634e-19; \u2103: celsius, degC; K: kelvin
 # List of fields that should be stored for facet filtering when searching