diff --git a/build.gradle b/build.gradle
index 2336ac54..04d1196d 100644
--- a/build.gradle
+++ b/build.gradle
@@ -11,7 +11,7 @@ buildscript {
}
}
-version "1.4.11"
+version "1.4.12"
group "au.org.ala"
apply plugin:"eclipse"
@@ -76,7 +76,7 @@ dependencies {
compile(group: 'org.grails.plugins', name: 'ala-auth', version:'3.1.0') {
exclude group: 'javax.servlet', module: 'servlet-api'
}
- compile group: 'org.grails.plugins', name: 'ala-admin-plugin', version: '2.2'
+ compile group: 'org.grails.plugins', name: 'ala-admin-plugin', version: '2.1'
compile "org.grails.plugins:grails-spring-websocket:2.3.0"
compile 'org.webjars:swagger-ui:3.18.2'
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 00000000..976a29eb
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,22 @@
+# Docker Images
+
+Subdirectories contain docker descriptions that can be used for testing and
+debugging
+
+## Solr
+
+The solr image contains a solr server with pre-configred `bie` and `bie-offline` cores.
+To build a solr6 image for use with the bie-index run, from this directory
+
+```shell
+docker build solr6 -t bie-solr:v1
+```
+
+To run the resulting image in a fresh container, use
+
+```shell
+docker run -p 8983:8983 bie-solr:v1
+```
+
+Set the solr connections in the configuration to
+`http://localhost:8983/solr/bie` and `http://localhost:8983/solr/bie-offline`
\ No newline at end of file
diff --git a/docker/solr6/Dockerfile b/docker/solr6/Dockerfile
new file mode 100644
index 00000000..70322760
--- /dev/null
+++ b/docker/solr6/Dockerfile
@@ -0,0 +1,15 @@
+FROM solr:6.6
+WORKDIR /opt/solr/server/solr
+COPY --chown=solr:solr solr.xml solr.xml
+COPY --chown=solr:solr zoo.cfg zoo.cfg
+#COPY --chown=solr:solr lib /opt/solr/server/solr-webapp/webapp/WEB-INF/lib/
+RUN mkdir -p bie bie/conf bie/data bie/suggest-infix
+COPY --chown=solr:solr conf bie/conf
+COPY --chown=solr:solr bie.core.properties bie/core.properties
+RUN chown -R solr.solr bie
+RUN mkdir -p bie-offline bie-offline/conf bie-offline/data bie-offline/suggest-infix
+COPY --chown=solr:solr conf bie-offline/conf
+COPY --chown=solr:solr bie-offline.core.properties bie-offline/core.properties
+RUN chown -R solr.solr bie-offline
+COPY solr.in.sh /etc/default/solr.in.sh
+
diff --git a/docker/solr6/bie-offline.core.properties b/docker/solr6/bie-offline.core.properties
new file mode 100644
index 00000000..422d6f8a
--- /dev/null
+++ b/docker/solr6/bie-offline.core.properties
@@ -0,0 +1,3 @@
+config=solrconfig.xml
+name=bie-offline
+dataDir=data
\ No newline at end of file
diff --git a/docker/solr6/bie.core.properties b/docker/solr6/bie.core.properties
new file mode 100644
index 00000000..d8624c39
--- /dev/null
+++ b/docker/solr6/bie.core.properties
@@ -0,0 +1,3 @@
+config=solrconfig.xml
+name=bie
+dataDir=data
\ No newline at end of file
diff --git a/docker/solr6/conf/bie_stopwords.txt b/docker/solr6/conf/bie_stopwords.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/docker/solr6/conf/elevate.xml b/docker/solr6/conf/elevate.xml
new file mode 100644
index 00000000..7630ebe2
--- /dev/null
+++ b/docker/solr6/conf/elevate.xml
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docker/solr6/conf/mapping-ISOLatin1Accent.txt b/docker/solr6/conf/mapping-ISOLatin1Accent.txt
new file mode 100644
index 00000000..ede77425
--- /dev/null
+++ b/docker/solr6/conf/mapping-ISOLatin1Accent.txt
@@ -0,0 +1,246 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Syntax:
+# "source" => "target"
+# "source".length() > 0 (source cannot be empty.)
+# "target".length() >= 0 (target can be empty.)
+
+# example:
+# "À" => "A"
+# "\u00C0" => "A"
+# "\u00C0" => "\u0041"
+# "ß" => "ss"
+# "\t" => " "
+# "\n" => ""
+
+# À => A
+"\u00C0" => "A"
+
+# Á => A
+"\u00C1" => "A"
+
+# Â => A
+"\u00C2" => "A"
+
+# Ã => A
+"\u00C3" => "A"
+
+# Ä => A
+"\u00C4" => "A"
+
+# Å => A
+"\u00C5" => "A"
+
+# Æ => AE
+"\u00C6" => "AE"
+
+# Ç => C
+"\u00C7" => "C"
+
+# È => E
+"\u00C8" => "E"
+
+# É => E
+"\u00C9" => "E"
+
+# Ê => E
+"\u00CA" => "E"
+
+# Ë => E
+"\u00CB" => "E"
+
+# Ì => I
+"\u00CC" => "I"
+
+# Í => I
+"\u00CD" => "I"
+
+# Î => I
+"\u00CE" => "I"
+
+# Ï => I
+"\u00CF" => "I"
+
+# IJ => IJ
+"\u0132" => "IJ"
+
+# Ð => D
+"\u00D0" => "D"
+
+# Ñ => N
+"\u00D1" => "N"
+
+# Ò => O
+"\u00D2" => "O"
+
+# Ó => O
+"\u00D3" => "O"
+
+# Ô => O
+"\u00D4" => "O"
+
+# Õ => O
+"\u00D5" => "O"
+
+# Ö => O
+"\u00D6" => "O"
+
+# Ø => O
+"\u00D8" => "O"
+
+# Œ => OE
+"\u0152" => "OE"
+
+# Þ
+"\u00DE" => "TH"
+
+# Ù => U
+"\u00D9" => "U"
+
+# Ú => U
+"\u00DA" => "U"
+
+# Û => U
+"\u00DB" => "U"
+
+# Ü => U
+"\u00DC" => "U"
+
+# Ý => Y
+"\u00DD" => "Y"
+
+# Ÿ => Y
+"\u0178" => "Y"
+
+# à => a
+"\u00E0" => "a"
+
+# á => a
+"\u00E1" => "a"
+
+# â => a
+"\u00E2" => "a"
+
+# ã => a
+"\u00E3" => "a"
+
+# ä => a
+"\u00E4" => "a"
+
+# å => a
+"\u00E5" => "a"
+
+# æ => ae
+"\u00E6" => "ae"
+
+# ç => c
+"\u00E7" => "c"
+
+# è => e
+"\u00E8" => "e"
+
+# é => e
+"\u00E9" => "e"
+
+# ê => e
+"\u00EA" => "e"
+
+# ë => e
+"\u00EB" => "e"
+
+# ì => i
+"\u00EC" => "i"
+
+# í => i
+"\u00ED" => "i"
+
+# î => i
+"\u00EE" => "i"
+
+# ï => i
+"\u00EF" => "i"
+
+# ij => ij
+"\u0133" => "ij"
+
+# ð => d
+"\u00F0" => "d"
+
+# ñ => n
+"\u00F1" => "n"
+
+# ò => o
+"\u00F2" => "o"
+
+# ó => o
+"\u00F3" => "o"
+
+# ô => o
+"\u00F4" => "o"
+
+# õ => o
+"\u00F5" => "o"
+
+# ö => o
+"\u00F6" => "o"
+
+# ø => o
+"\u00F8" => "o"
+
+# œ => oe
+"\u0153" => "oe"
+
+# ß => ss
+"\u00DF" => "ss"
+
+# þ => th
+"\u00FE" => "th"
+
+# ù => u
+"\u00F9" => "u"
+
+# ú => u
+"\u00FA" => "u"
+
+# û => u
+"\u00FB" => "u"
+
+# ü => u
+"\u00FC" => "u"
+
+# ý => y
+"\u00FD" => "y"
+
+# ÿ => y
+"\u00FF" => "y"
+
+# ff => ff
+"\uFB00" => "ff"
+
+# fi => fi
+"\uFB01" => "fi"
+
+# fl => fl
+"\uFB02" => "fl"
+
+# ffi => ffi
+"\uFB03" => "ffi"
+
+# ffl => ffl
+"\uFB04" => "ffl"
+
+# ſt => ft
+"\uFB05" => "ft"
+
+# st => st
+"\uFB06" => "st"
diff --git a/docker/solr6/conf/protwords.txt b/docker/solr6/conf/protwords.txt
new file mode 100644
index 00000000..1dfc0abe
--- /dev/null
+++ b/docker/solr6/conf/protwords.txt
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
diff --git a/docker/solr6/conf/schema.xml b/docker/solr6/conf/schema.xml
new file mode 100644
index 00000000..85c6bdb1
--- /dev/null
+++ b/docker/solr6/conf/schema.xml
@@ -0,0 +1,534 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docker/solr6/conf/solrconfig.xml b/docker/solr6/conf/solrconfig.xml
new file mode 100644
index 00000000..b34351cb
--- /dev/null
+++ b/docker/solr6/conf/solrconfig.xml
@@ -0,0 +1,1814 @@
+
+
+
+
+
+
+
+
+ 6.6.5
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+
+ 15000
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 102400
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ 20
+
+
+ 200
+
+
+
+
+
+
+
+
+
+
+
+ static firstSearcher warming in solrconfig.xml
+
+
+
+
+
+ false
+
+
+ 2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ 10
+ text
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ json
+ true
+ text
+
+
+
+
+
+
+
+ true
+ json
+ true
+
+
+
+
+
+
+
+ explicit
+
+
+ velocity
+ browse
+ layout
+ Solritas
+
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text
+ 100%
+ *:*
+ 10
+ *,score
+
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename
+ 3
+
+
+ on
+ cat
+ manu_exact
+ content_type
+ author_s
+ ipod
+ GB
+ 1
+ cat,inStock
+ after
+ price
+ 0
+ 600
+ 50
+ popularity
+ 0
+ 10
+ 3
+ manufacturedate_dt
+ NOW/YEAR-10YEARS
+ NOW
+ +1YEAR
+ before
+ after
+
+
+ on
+ content features title name
+ html
+ <b>
+ </b>
+ 0
+ title
+ 0
+ name
+ 3
+ 200
+ content
+ 750
+
+
+ on
+ false
+ 5
+ 2
+ 5
+ true
+ true
+ 5
+ 3
+
+
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ ignored_
+
+
+ true
+ links
+ ignored_
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ solrpingquery
+
+
+ all
+
+
+
+
+
+
+
+
+ explicit
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+ infixSuggester
+ true
+ 10
+
+
+ suggest
+
+
+
+
+
+
+
+ infixSuggester
+ BlendedInfixLookupFactory
+ DocumentDictionaryFactory
+ auto_text
+ suggestWeight
+ guid
+ idxtype -->
+ true
+ false
+
+ text_suggest
+ false
+
+
+
+
+
+
+ textSpell
+
+
+
+
+
+ default
+ auto_text_edge
+ solr.DirectSolrSpellChecker
+
+ internal
+
+ 0.5
+
+ 2
+
+ 1
+
+ 5
+
+ 4
+
+ 0.01
+
+
+
+
+
+ wordbreak
+ solr.WordBreakSolrSpellChecker
+ auto_text_edge
+ true
+ true
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+
+ default
+ wordbreak
+ on
+ true
+ 10
+ 5
+ 5
+ true
+ true
+ 10
+ 5
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+ text
+ true
+
+
+ tvComponent
+
+
+
+
+
+
+
+
+ default
+
+
+ org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+
+
+ 20
+
+
+ clustering/carrot2
+
+
+ ENGLISH
+
+
+ stc
+ org.carrot2.clustering.stc.STCClusteringAlgorithm
+
+
+
+
+
+
+ true
+ default
+ true
+
+ name
+ id
+
+ features
+
+ true
+
+
+
+ false
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+ *:*
+ 10
+ *,score
+
+
+ clustering
+
+
+
+
+
+
+
+
+
+ true
+ false
+
+
+ terms
+
+
+
+
+
+
+
+ string
+ elevate.xml
+
+
+
+
+
+ explicit
+ text
+
+
+ elevator
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+
+
+ 70
+
+ 0.5
+
+ [-\w ,/\n\"']{20,200}
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,,
+ ,,
+ ,,
+ ,,
+ ,]]>
+ ]]>
+
+
+
+
+
+ 10
+ .,!?
+
+
+
+
+
+
+ WORD
+
+
+ en
+ US
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text/plain; charset=UTF-8
+
+
+
+
+
+
+
+
+ 5
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ *:*
+
+
+
diff --git a/docker/solr6/conf/stopwords.txt b/docker/solr6/conf/stopwords.txt
new file mode 100644
index 00000000..b5824da3
--- /dev/null
+++ b/docker/solr6/conf/stopwords.txt
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+#Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+s
+such
+t
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
+
diff --git a/docker/solr6/conf/synonyms.txt b/docker/solr6/conf/synonyms.txt
new file mode 100644
index 00000000..b0e31cb7
--- /dev/null
+++ b/docker/solr6/conf/synonyms.txt
@@ -0,0 +1,31 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaa => aaaa
+bbb => bbbb1 bbbb2
+ccc => cccc1,cccc2
+a\=>a => b\=>b
+a\,a => b\,b
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
diff --git a/docker/solr6/solr.in.sh b/docker/solr6/solr.in.sh
new file mode 100644
index 00000000..fd2f449f
--- /dev/null
+++ b/docker/solr6/solr.in.sh
@@ -0,0 +1,119 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# By default the script will use JAVA_HOME to determine which java
+# to use, but you can set a specific path for Solr to use without
+# affecting other Java applications on your server/workstation.
+#SOLR_JAVA_HOME=""
+
+# Increase Java Heap as needed to support your indexing / query needs
+SOLR_HEAP="1g"
+
+# Expert: If you want finer control over memory options, specify them directly
+# Comment out SOLR_HEAP if you are using this though, that takes precedence
+#SOLR_JAVA_MEM="-Xms512m -Xmx512m"
+
+# Enable verbose GC logging
+GC_LOG_OPTS=""
+
+# These GC settings have shown to work well for a number of common Solr workloads
+GC_TUNE="-XX:NewRatio=3 \
+-XX:SurvivorRatio=4 \
+-XX:TargetSurvivorRatio=90 \
+-XX:MaxTenuringThreshold=8 \
+-XX:+UseConcMarkSweepGC \
+-XX:ConcGCThreads=4 -XX:ParallelGCThreads=4 \
+-XX:+CMSScavengeBeforeRemark \
+-XX:PretenureSizeThreshold=64m \
+-XX:+UseCMSInitiatingOccupancyOnly \
+-XX:CMSInitiatingOccupancyFraction=50 \
+-XX:CMSMaxAbortablePrecleanTime=6000 \
+-XX:+CMSParallelRemarkEnabled \
+-XX:+ParallelRefProcEnabled"
+
+# Set the ZooKeeper connection string if using an external ZooKeeper ensemble
+# e.g. host1:2181,host2:2181/chroot
+# Leave empty if not using SolrCloud
+ZK_HOST=""
+
+# Set the ZooKeeper client timeout (for SolrCloud mode)
+ZK_CLIENT_TIMEOUT="15000"
+
+# By default the start script uses "localhost"; override the hostname here
+# for production SolrCloud environments to control the hostname exposed to cluster state
+SOLR_HOST="localhost"
+
+# By default the start script uses UTC; override the timezone if needed
+#SOLR_TIMEZONE="UTC"
+
+# Set to true to activate the JMX RMI connector to allow remote JMX client applications
+# to monitor the JVM hosting Solr; set to "false" to disable that behavior
+# (false is recommended in production environments)
+ENABLE_REMOTE_JMX_OPTS="false"
+
+# The script will use SOLR_PORT+10000 for the RMI_PORT or you can set it here
+# RMI_PORT=18983
+
+# Set the thread stack size
+SOLR_OPTS="$SOLR_OPTS -Xss256k"
+
+# Anything you add to the SOLR_OPTS variable will be included in the java
+# start command line as-is, in ADDITION to other options. If you specify the
+# -a option on start script, those options will be appended as well. Examples:
+#SOLR_OPTS="$SOLR_OPTS -Dsolr.autoSoftCommit.maxTime=3000"
+#SOLR_OPTS="$SOLR_OPTS -Dsolr.autoCommit.maxTime=60000"
+#SOLR_OPTS="$SOLR_OPTS -Dsolr.clustering.enabled=true"
+
+# Location where the bin/solr script will save PID files for running instances
+# If not set, the script will create PID files in $SOLR_TIP/bin
+#SOLR_PID_DIR=
+
+# Path to a directory for Solr to store cores and their data. By default, Solr will use server/solr
+# If solr.xml is not stored in ZooKeeper, this directory needs to contain solr.xml
+#SOLR_HOME=/data/solr/data
+
+# Solr provides a default Log4J configuration properties file in server/resources
+# however, you may want to customize the log settings and file appender location
+# so you can point the script to use a different log4j.properties file
+#LOG4J_PROPS=/data/solr/log4j.properties
+
+# Location where Solr should write logs to; should agree with the file appender
+# settings in server/resources/log4j.properties
+#SOLR_LOGS_DIR=/data/solr/logs
+
+# Sets the port Solr binds to, default is 8983
+SOLR_PORT=8983
+
+# Uncomment to set SSL-related system properties
+# Be sure to update the paths to the correct keystore for your environment
+#SOLR_SSL_KEY_STORE=etc/solr-ssl.keystore.jks
+#SOLR_SSL_KEY_STORE_PASSWORD=secret
+#SOLR_SSL_TRUST_STORE=etc/solr-ssl.keystore.jks
+#SOLR_SSL_TRUST_STORE_PASSWORD=secret
+#SOLR_SSL_NEED_CLIENT_AUTH=false
+#SOLR_SSL_WANT_CLIENT_AUTH=false
+
+# Uncomment if you want to override previously defined SSL values for HTTP client
+# otherwise keep them commented and the above values will automatically be set for HTTP clients
+#SOLR_SSL_CLIENT_KEY_STORE=
+#SOLR_SSL_CLIENT_KEY_STORE_PASSWORD=
+#SOLR_SSL_CLIENT_TRUST_STORE=
+#SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD=
+
+# Settings for authentication
+#SOLR_AUTHENTICATION_CLIENT_CONFIGURER=
+#SOLR_AUTHENTICATION_OPTS=
+
+SOLR_MODE=""
diff --git a/docker/solr6/solr.xml b/docker/solr6/solr.xml
new file mode 100644
index 00000000..68b15ba0
--- /dev/null
+++ b/docker/solr6/solr.xml
@@ -0,0 +1,53 @@
+
+
+
+
+
+
+
+
+
+ ${host:}
+ ${jetty.port:8983}
+ ${hostContext:solr}
+
+ ${genericCoreNodeNames:true}
+
+ ${zkClientTimeout:30000}
+ ${distribUpdateSoTimeout:600000}
+ ${distribUpdateConnTimeout:60000}
+ ${zkCredentialsProvider:org.apache.solr.common.cloud.DefaultZkCredentialsProvider}
+ ${zkACLProvider:org.apache.solr.common.cloud.DefaultZkACLProvider}
+
+
+
+
+ ${socketTimeout:600000}
+ ${connTimeout:60000}
+
+
+
diff --git a/docker/solr6/zoo.cfg b/docker/solr6/zoo.cfg
new file mode 100644
index 00000000..7e42d8c8
--- /dev/null
+++ b/docker/solr6/zoo.cfg
@@ -0,0 +1,31 @@
+# The number of milliseconds of each tick
+tickTime=2000
+# The number of ticks that the initial
+# synchronization phase can take
+initLimit=10
+# The number of ticks that can pass between
+# sending a request and getting an acknowledgement
+syncLimit=5
+
+# the directory where the snapshot is stored.
+# dataDir=/opt/zookeeper/data
+# NOTE: Solr defaults the dataDir to /zoo_data
+
+# the port at which the clients will connect
+# clientPort=2181
+# NOTE: Solr sets this based on zkRun / zkHost params
+
+# the maximum number of client connections.
+# increase this if you need to handle more clients
+#maxClientCnxns=60
+#
+# Be sure to read the maintenance section of the
+# administrator guide before turning on autopurge.
+#
+# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
+#
+# The number of snapshots to retain in dataDir
+#autopurge.snapRetainCount=3
+# Purge task interval in hours
+# Set to "0" to disable auto purge feature
+#autopurge.purgeInterval=1
diff --git a/grails-app/conf/application.yml b/grails-app/conf/application.yml
index e72c56e6..350b3863 100644
--- a/grails-app/conf/application.yml
+++ b/grails-app/conf/application.yml
@@ -189,7 +189,7 @@ vernacularListsUrl: /default-vernacular-lists.json
# Location of locality keywords (null for default)
localityKeywordsUrl: /default-locality-keywords.json
#nationalSpeciesDatasets: dr2699,dr2700,dr2702,dr2704,dr2703,dr3118
-defaultDownloadFields: guid,rank,scientificName,scientificNameAuthorship,taxonomicStatus,establishmentMeans,rk_genus,rk_family,rk_order,rk_class,rk_phylum,rk_kingdom,datasetName,parentGuid,acceptedConceptName,acceptedConceptID
+defaultDownloadFields: guid,rank,scientificName,scientificNameAuthorship,taxonomicStatus,establishmentMeans,rk_genus,rk_family,rk_order,rk_class,rk_phylum,rk_kingdom,datasetName,parentGuid,acceptedConceptName,acceptedConceptID,idxtype,name
additionalResultFields: ""
#toggle for the population of occurrence counts
diff --git a/grails-app/controllers/au/org/ala/bie/ImportController.groovy b/grails-app/controllers/au/org/ala/bie/ImportController.groovy
index 257eefa1..497595bd 100755
--- a/grails-app/controllers/au/org/ala/bie/ImportController.groovy
+++ b/grails-app/controllers/au/org/ala/bie/ImportController.groovy
@@ -15,7 +15,6 @@ package au.org.ala.bie
import au.org.ala.bie.util.Job
import grails.converters.JSON
-import org.apache.commons.lang.BooleanUtils
import au.org.ala.web.AlaSecured
/**
* Controller for data import into the system.
@@ -70,7 +69,7 @@ class ImportController {
return
}
- def clearIndex = BooleanUtils.toBooleanObject(params.clear_index ?: "false")
+ def clearIndex = params.getBoolean('clear_index', false)
def dwcDir = params.dwca_dir
if(new File(dwcDir).exists()){
@@ -120,8 +119,9 @@ class ImportController {
*/
// Documented in openapi.yml
def importCollectory(){
+ boolean online = params.getBoolean('online', false)
if(grailsApplication.config.collectory.service){
- def job = execute("importCollectory", "admin.button.importcollectory", { importService.importCollectory() })
+ def job = execute("importCollectory", "admin.button.importcollectory", { importService.importCollectory(online) })
asJson(job.status())
} else {
asJson([success: false, message: 'collectoryServicesUrl not configured'])
@@ -135,8 +135,9 @@ class ImportController {
*/
// Documented in openapi.yml
def importLayers(){
+ boolean online = params.getBoolean('online', false)
if(grailsApplication.config.layers.service){
- def job = execute("importLayers", "admin.button.importlayer", { importService.importLayers() })
+ def job = execute("importLayers", "admin.button.importlayer", { importService.importLayers(online) })
asJson(job.status())
} else {
asJson([success: false, message: 'layers.service not configured'])
@@ -150,8 +151,9 @@ class ImportController {
*/
// Documented in openapi.yml
def importLocalities(){
+ boolean online = params.getBoolean('online', false)
if(grailsApplication.config.layers.service && grailsApplication.config.layers.gazetteerId){
- def job = execute("importLocalities", "admin.button.importlocalities", { importService.importLocalities() })
+ def job = execute("importLocalities", "admin.button.importlocalities", { importService.importLocalities(online) })
asJson(job.status())
} else {
asJson([success: false, message: 'layers.services not configured or layers.gazetteerId not configured'])
@@ -165,8 +167,9 @@ class ImportController {
*/
// Documented in openapi.yml
def importRegions(){
+ boolean online = params.getBoolean('online', false)
if(grailsApplication.config.layers.service){
- def job = execute("importRegions", "admin.button.importregions", { importService.importRegions() })
+ def job = execute("importRegions", "admin.button.importregions", { importService.importRegions(online) })
asJson(job.status())
} else {
asJson([success: false, message: 'layers.service not configured'])
@@ -191,7 +194,8 @@ class ImportController {
*/
// Documented in openapi.yml
def importWordPress(){
- def job = execute("importWordPressPages", "admin.button.loadwordpress", { importService.importWordPressPages() })
+ boolean online = params.getBoolean('online', false)
+ def job = execute("importWordPressPages", "admin.button.loadwordpress", { importService.importWordPressPages(online) })
asJson(job.status())
}
@@ -201,7 +205,8 @@ class ImportController {
* @return
*/
def importKnowledgeBase(){
- def job = execute("importKnowledgeBasePages", "admin.button.loadknowledgebase", { importService.importKnowledgeBasePages() })
+ boolean online = params.getBoolean('online', false)
+ def job = execute("importKnowledgeBasePages", "admin.button.loadknowledgebase", { importService.importKnowledgeBasePages(online) })
asJson(job.status())
}
@@ -212,7 +217,8 @@ class ImportController {
*/
// Documented in openapi.yml
def importOccurrences(){
- def job = execute("importOccurrences", "admin.button.loadoccurrence", { importService.importOccurrenceData() })
+ def online = params.getBoolean('online', false)
+ def job = execute("importOccurrences", "admin.button.loadoccurrence", { importService.importOccurrenceData(online) })
asJson (job.status())
}
@@ -242,7 +248,7 @@ class ImportController {
// Documented in openapi.yml
def buildLinkIdentifiers() {
- def online = BooleanUtils.toBooleanObject(params.online ?: "false")
+ def online = params.getBoolean('online', false)
def job = execute("buildLinkIdentifiers", "admin.button.buildLinks", { importService.buildLinkIdentifiers(online) })
asJson (job.status())
@@ -250,7 +256,7 @@ class ImportController {
// Documented in openapi.yml
def denormaliseTaxa() {
- def online = BooleanUtils.toBooleanObject(params.online ?: "false")
+ def online = params.getBoolean('online', false)
def job = execute("denormaliseTaxa", "admin.button.denormalise", { importService.denormaliseTaxa(online) })
asJson (job.status())
@@ -258,7 +264,7 @@ class ImportController {
// Documented in openapi.yml
def buildFavourites() {
- def online = BooleanUtils.toBooleanObject(params.online ?: "false")
+ def online = params.getBoolean('online', false)
def job = execute("buildFavourites", "admin.button.buildfavourites", { importService.buildFavourites(online) })
asJson (job.status())
@@ -266,7 +272,7 @@ class ImportController {
// Documented in openapi.yml
def buildWeights() {
- def online = BooleanUtils.toBooleanObject(params.online ?: "false")
+ def online = params.getBoolean('online', false)
def job = execute("buildWeights", "admin.button.buildweights", { importService.buildWeights(online) })
asJson (job.status())
@@ -274,7 +280,7 @@ class ImportController {
// Documented in openapi.yml
def buildSuggestIndex() {
- def online = BooleanUtils.toBooleanObject(params.online ?: "false")
+ def online = params.getBoolean('online', false)
def job = execute("buildSuggestIndex", "admin.button.buildsuggestindex", { importService.buildSuggestIndex(online) })
asJson (job.status())
@@ -288,24 +294,18 @@ class ImportController {
*/
// Documented in openapi.yml
def loadPreferredImages() {
- def online = BooleanUtils.toBooleanObject(params.online ?: "false")
+ def online = params.getBoolean('online', false)
def job = execute("loadImages", "admin.button.loadimagespref", { importService.loadPreferredImages(online) })
asJson (job.status())
}
// Documented in openapi.yml
def loadImages() {
- def online = BooleanUtils.toBooleanObject(params.online ?: "false")
+ def online = params.getBoolean('online', false)
def job = execute("loadImages", "admin.button.loadimagesall", { importService.loadImages(online) })
asJson (job.status())
}
- // Documented in openapi.yml
- def ranks() {
- asJson(importService.ranks())
- }
-
-
private def asJson = { model ->
response.setContentType("application/json;charset=UTF-8")
render (model as JSON)
diff --git a/grails-app/controllers/au/org/ala/bie/MiscController.groovy b/grails-app/controllers/au/org/ala/bie/MiscController.groovy
index 827ee27e..44bb6d79 100644
--- a/grails-app/controllers/au/org/ala/bie/MiscController.groovy
+++ b/grails-app/controllers/au/org/ala/bie/MiscController.groovy
@@ -28,6 +28,11 @@ class MiscController {
return
}
+ // Documented in openapi.yml
+ def ranks() {
+ render importService.ranks() as JSON
+ }
+
def indexFields() {
def fields = indexService.getIndexFieldDetails(null)
diff --git a/grails-app/controllers/au/org/ala/bie/SearchController.groovy b/grails-app/controllers/au/org/ala/bie/SearchController.groovy
index 0f639a62..808511df 100755
--- a/grails-app/controllers/au/org/ala/bie/SearchController.groovy
+++ b/grails-app/controllers/au/org/ala/bie/SearchController.groovy
@@ -63,10 +63,11 @@ class SearchController implements GrailsConfigurationAware {
*/
// Documented in openapi.yml
def imageLinkSearch() {
- def showNoImage = params.containsKey("showNoImage") ? params.boolean("showNoImage") : true
+ def showNoImage = params.boolean('showNoImage', true)
def guid = regularise(params.id)
def locales = [request.locale, defaultLocale]
- def url = searchService.imageLinkSearch(guid, params.imageType, params.qc, locales)
+ def imageType = params.imageType
+ def url = searchService.imageLinkSearch(guid, imageType, params.qc, locales)
if (!url && showNoImage) {
url = resource(dir: "images", file: "noImage85.jpg", absolute: true)
@@ -86,13 +87,15 @@ class SearchController implements GrailsConfigurationAware {
// Documented in openapi.yml
def childConcepts(){
def taxonID = params.id
- if(!taxonID){
+ if(!taxonID) {
response.sendError(400, "Please provide a GUID")
return null
}
- def within = params.within && params.within.isInteger() ? params.within as Integer : 2000
- def unranked = params.unranked ? params.unranked.toBoolean() : true
- render (searchService.getChildConcepts(regularise(taxonID), request.queryString, within, unranked) as JSON)
+ def within = params.int('within', 2000)
+ def unranked = params.boolean('unranked', true)
+ ['within', 'unranked', 'controller', 'action', 'id'].each {params.remove(it) }
+ def extra = params.toQueryString().replaceFirst('^\\?', '')
+ render (searchService.getChildConcepts(regularise(taxonID), extra, within, unranked) as JSON)
}
// Documented in openapi.yml
diff --git a/grails-app/controllers/bie/index/UrlMappings.groovy b/grails-app/controllers/bie/index/UrlMappings.groovy
index 198c426a..f66e648d 100755
--- a/grails-app/controllers/bie/index/UrlMappings.groovy
+++ b/grails-app/controllers/bie/index/UrlMappings.groovy
@@ -35,7 +35,7 @@ class UrlMappings {
"/habitat/ids/$guid**"(controller: "search", action: "getHabitatIDs")
"/habitat/$guid**"(controller: "search", action: "getHabitat")
- "/ranks"(controller: "import", action: "ranks")
+ "/ranks"(controller: "misc", action: "ranks")
"/admin"(controller: "admin")
"/admin/"(controller: "admin")
diff --git a/grails-app/i18n/messages.properties b/grails-app/i18n/messages.properties
index ef0ee0e7..4dbd5f43 100755
--- a/grails-app/i18n/messages.properties
+++ b/grails-app/i18n/messages.properties
@@ -82,6 +82,8 @@ download.rk_phylum=phylum
download.rk_subkingdom=subkingdom
download.rk_kingdom=kingdom
download.datasetName=datasetName
+download.idxtype=type
+download.name=title
admin.ala.label=ALA admin functions
admin.ala.lead=view config, build info & set banner message
diff --git a/grails-app/services/au/org/ala/bie/DownloadService.groovy b/grails-app/services/au/org/ala/bie/DownloadService.groovy
index dac73d39..cbf7cc52 100644
--- a/grails-app/services/au/org/ala/bie/DownloadService.groovy
+++ b/grails-app/services/au/org/ala/bie/DownloadService.groovy
@@ -18,7 +18,6 @@ import au.org.ala.bie.util.Encoder
class DownloadService {
def grailsApplication
- def indexService
def messageSource
/**
@@ -31,10 +30,11 @@ class DownloadService {
*/
def download(params, OutputStream outputStream, Locale locale){
def q = Encoder.escapeQuery(params.q ?: "*:*")
- def fq = params.list('fq')
- def fields = params.fields ?: grailsApplication.config.defaultDownloadFields ?: "guid,rank,scientificName,rk_genus,rk_family,rk_order,rk_class,rk_phylum,rk_kingdom,datasetName"
+ def fq = new ArrayList<>(params.list('fq')) // Make modifiable
+ def fields = params.fields ?: grailsApplication.config.getRequiredProperty('defaultDownloadFields')
def fqs = ''
+ grailsApplication.config.solr.search.fq.each { fq << it }
if (fq) {
fqs = '&fq=' + fq.collect({ Encoder.escapeQuery(it) }).join("&fq=")
}
diff --git a/grails-app/services/au/org/ala/bie/ImportService.groovy b/grails-app/services/au/org/ala/bie/ImportService.groovy
index ee1fcedd..072515fb 100644
--- a/grails-app/services/au/org/ala/bie/ImportService.groovy
+++ b/grails-app/services/au/org/ala/bie/ImportService.groovy
@@ -100,6 +100,8 @@ class ImportService implements GrailsConfigurationAware {
static BATCH_SIZE = 5000
// Buffer size for commits
static BUFFER_SIZE = 1000
+ // The count size for count requests
+ static COUNT_SIZE = 20
// Accepted status
static ACCEPTED_STATUS = TaxonomicType.values().findAll({ it.accepted }).collect({ "taxonomicStatus:${it.term}" }).join(' OR ')
// Synonym status
@@ -214,7 +216,7 @@ class ImportService implements GrailsConfigurationAware {
try {
switch (step) {
case 'collectory':
- importCollectory()
+ importCollectory(false)
break
case 'conservation-lists':
importConservationSpeciesLists()
@@ -230,19 +232,19 @@ class ImportService implements GrailsConfigurationAware {
loadImages(false)
break
case 'layers':
- importLayers()
+ importLayers(false)
break
case 'link-identifiers':
buildLinkIdentifiers(false)
break
case 'localities':
- importLocalities()
+ importLocalities(false)
break
case 'occurrences':
- importOccurrenceData()
+ importOccurrenceData(false)
break
case 'regions':
- importRegions()
+ importRegions(false)
break
case 'suggest-index':
buildSuggestIndex(false)
@@ -257,10 +259,10 @@ class ImportService implements GrailsConfigurationAware {
buildWeights(false)
break
case 'wordpress':
- importWordPressPages()
+ importWordPressPages(false)
break
case 'knowledgebase':
- importKnowledgeBasePages()
+ importKnowledgeBasePages(false)
break
default:
log("Unknown step ${step}")
@@ -290,11 +292,14 @@ class ImportService implements GrailsConfigurationAware {
*
* @return
*/
- def importLayers() {
+ def importLayers(boolean online) {
log "Starting layer import"
+ if (online) {
+ log "Layers may be temporarily unavailable"
+ }
def layers = layerService.layers()
def batch = []
- indexService.deleteFromIndex(IndexDocType.LAYER)
+ indexService.deleteFromIndex(IndexDocType.LAYER, online)
layers.each { layer ->
def doc = [:]
doc["id"] = layer.name
@@ -306,20 +311,23 @@ class ImportService implements GrailsConfigurationAware {
log "Importing layer ${layer.displayname}"
batch << doc
}
- indexService.indexBatch(batch)
+ indexService.indexBatch(batch, online)
log "Finished indexing ${layers.size()} layers"
log "Finsihed layer import"
}
- def importLocalities() {
+ def importLocalities(boolean online) {
log "Starting localities import"
- indexService.deleteFromIndex(IndexDocType.LOCALITY)
+ if (online) {
+ log "Localities may be temporarily unavailable"
+ }
+ indexService.deleteFromIndex(IndexDocType.LOCALITY, online)
if (gazetteerId) {
log("Starting indexing ${gazetteerId}")
log("Getting metadata for layer: ${gazetteerId}")
def layer = layerService.get(gazetteerId)
log("Starting indexing ${layer.id} - ${layer.name} gazetteer layer")
- importLayer(layer)
+ importLayer(layer, online)
log("Finished indexing ${layer.id} - ${layer.name} gazetteer layer")
} else {
log("Skipping localities, no gazetteer layer ID configured")
@@ -327,13 +335,16 @@ class ImportService implements GrailsConfigurationAware {
log "Finished localities import"
}
- def importRegions() {
+ def importRegions(boolean online) {
log "Starting regions import"
- indexService.deleteFromIndex(IndexDocType.REGION)
+ if (online) {
+ log "Regions may be temporarily unavailable"
+ }
+ indexService.deleteFromIndex(IndexDocType.REGION, online)
def layers = layerService.layers()
layers.each { layer ->
if (layer.type == "Contextual") {
- importLayer(layer)
+ importLayer(layer, online)
}
}
log "Finished indexing ${layers.size()} region layers"
@@ -347,7 +358,7 @@ class ImportService implements GrailsConfigurationAware {
* @param layer
* @return
*/
- private def importLayer(layer) {
+ private def importLayer(layer, boolean online) {
log("Loading regions from layer " + layer.name)
def file = layerService.getRegions(layer.id)
@@ -400,13 +411,13 @@ class ImportService implements GrailsConfigurationAware {
batch << doc
if (batch.size() >= BATCH_SIZE) {
- indexService.indexBatch(batch)
+ indexService.indexBatch(batch, online)
batch.clear()
}
}
}
if (batch) {
- indexService.indexBatch(batch)
+ indexService.indexBatch(batch, online)
batch.clear()
}
}
@@ -415,7 +426,7 @@ class ImportService implements GrailsConfigurationAware {
def importHabitats() {
def batch = []
- indexService.deleteFromIndex(IndexDocType.HABITAT)
+ indexService.deleteFromIndex(IndexDocType.HABITAT, false)
//read the DwC metadata
Archive archive = ArchiveFactory.openArchive(new File("/data/habitat/"));
@@ -452,7 +463,7 @@ class ImportService implements GrailsConfigurationAware {
*
* @return
*/
- def importCollectory() {
+ def importCollectory(boolean online) {
log "Starting collectory import"
[
"dataResource": IndexDocType.DATARESOURCE,
@@ -464,7 +475,10 @@ class ImportService implements GrailsConfigurationAware {
def drLists = collectoryService.resources(entityType)
log("About to import ${drLists.size()} ${entityType}")
log("Clearing existing: ${entityType}")
- indexService.deleteFromIndex(indexDocType)
+ if (online) {
+ log "Search may be temporarily unavailable for ${entityType}"
+ }
+ indexService.deleteFromIndex(indexDocType, online)
log("Cleared")
drLists.each {
@@ -488,12 +502,12 @@ class ImportService implements GrailsConfigurationAware {
entities << doc
if (entities.size() > BUFFER_SIZE) {
- indexService.indexBatch(entities)
+ indexService.indexBatch(entities, online)
entities.clear()
}
}
if (entities) {
- indexService.indexBatch(entities)
+ indexService.indexBatch(entities, online)
}
log("Finished indexing ${drLists.size()} ${entityType}")
}
@@ -503,22 +517,16 @@ class ImportService implements GrailsConfigurationAware {
/**
* Index WordPress pages
*/
- def importWordPressPages() throws Exception {
+ def importWordPressPages(boolean online) throws Exception {
log "Starting wordpress import"
- // clear the existing WP index
- indexService.deleteFromIndex(IndexDocType.WORDPRESS)
- if (!wordPressSitemap) {
- return
- }
-
- // get List of WordPress document URLs (each page's URL)
- def pages = wordpressService.resources()
+ def buffer = []
+ def pages = wordPressSitemap ? wordpressService.resources() : []
def documentCount = 0
def totalDocs = pages.size()
- def buffer = []
log("WordPress pages found: ${totalDocs}") // update user via socket
// slurp and build each SOLR doc (add to buffer)
+ // We're expecting less than 1000 documents here so we can delete and update in one batch
pages.each { pageUrl ->
log "indexing url: ${pageUrl}"
try {
@@ -565,7 +573,11 @@ class ImportService implements GrailsConfigurationAware {
}
}
log("Committing to ${buffer.size()} documents to SOLR...")
- indexService.indexBatch(buffer)
+ if (online) {
+ log "Search for wordpress pages may be temporarily unavailable"
+ }
+ indexService.deleteFromIndex(IndexDocType.WORDPRESS, online)
+ indexService.indexBatch(buffer, online)
updateProgressBar(100, 100) // complete progress bar
log "Finished wordpress import"
}
@@ -573,10 +585,8 @@ class ImportService implements GrailsConfigurationAware {
/**
* Index Knowledge Base pages.
*/
- def importKnowledgeBasePages() throws Exception {
- log "Starting knowledge base import"
- // clear the existing WP index
- indexService.deleteFromIndex(IndexDocType.KNOWLEDGEBASE)
+ def importKnowledgeBasePages(boolean online) throws Exception {
+ log "Starting knowledge base import."
// get List of Knowledge Base document URLs (each page's URL)
def pages = knowledgeBaseService.resources()
@@ -618,7 +628,11 @@ class ImportService implements GrailsConfigurationAware {
}
}
log("Committing to ${buffer.size()} documents to SOLR...")
- indexService.indexBatch(buffer)
+ if (online) {
+ log "Search for wordpress pages may be temporarily unavailable"
+ }
+ indexService.deleteFromIndex(IndexDocType.KNOWLEDGEBASE, online)
+ indexService.indexBatch(buffer, online)
updateProgressBar(100, 100) // complete progress bar
log "Finished knowledge base import"
}
@@ -726,202 +740,86 @@ class ImportService implements GrailsConfigurationAware {
}
/**
- * Paginate through taxon docs in SOLR and update their occurrence status via either:
- * - checking against a list of datasetID codes (config)
- * OR
- * - searching for occurrence records with the (taxon concept) GUID
- *
- * Example cursor search
- * http://bie-dev.ala.org.au/solr/bie/select?q=idxtype:TAXON+AND+taxonomicStatus:accepted&wt=json&rows=100&indent=true&sort=id+asc&cursorMark=*
- * Pagination via cursor: https://cwiki.apache.org/confluence/display/solr/Pagination+of+Results
- **/
- def importOccurrenceData() throws Exception {
- def pageSize = BATCH_SIZE
- def paramsMap = [
- q : "idxtype:${IndexDocType.TAXON.name()} AND (${ACCEPTED_STATUS})",
- //fq: "datasetID:dr2699", // testing only with AFD
- cursorMark: CursorMarkParams.CURSOR_MARK_START, // gets updated by subsequent searches
- fl : "id,idxtype,guid,scientificName,datasetID", // will restrict results to dos with these fields (bit like fq)
- rows : pageSize,
- sort : "id asc" // needed for cursor searching
- ]
-
- // first get a count of results so we can determine number of pages to process
- Map countMap = paramsMap.clone(); // shallow clone is OK
- countMap.rows = 0
- countMap.remove("cursorMark")
- def searchCount = searchService.getCursorSearchResults(new MapSolrParams(countMap), true)
- // could throw exception
- def totalDocs = searchCount?.results?.numFound ?: 0
- int totalPages = (totalDocs + pageSize - 1) / pageSize
- log.debug "totalDocs = ${totalDocs} || totalPages = ${totalPages}"
- log("Processing " + String.format("%,d", totalDocs) + " taxa (via ${paramsMap.q})...
") // send to browser
-
- def promiseList = new PromiseList() // for biocache queries
- Queue commitQueue = new ConcurrentLinkedQueue() // queue to put docs to be indexes
- ExecutorService executor = Executors.newSingleThreadExecutor() // consumer of queue - single blocking thread
- executor.execute {
- indexDocInQueue(commitQueue, "initialised") // will keep polling the queue until terminated via cancel()
- }
-
- // iterate over pages
- (1..totalPages).each { page ->
- try {
- MapSolrParams solrParams = new MapSolrParams(paramsMap)
- log.debug "${page}. paramsMap = ${paramsMap}"
- def searchResults = searchService.getCursorSearchResults(solrParams, true)
- // use offline index to search
- def resultsDocs = searchResults?.results ?: []
+ * Go through the index and get occurrence counts for accepted taxa
+ */
+ def importOccurrenceData(online) {
+ int pageSize = BATCH_SIZE
+ int processed = 0
+ def typeQuery = "idxtype:\"${IndexDocType.TAXON.name()}\" AND (${ACCEPTED_STATUS})"
+ def prevCursor
+ def cursor
- // buckets to group results into
- def taxaLocatedInHubCountry = [] // automatically get included
- def taxaToSearchOccurrences = [] // need to search biocache to see if they are located in hub country
+ log("Starting occurrence count scan for ${online ? 'online' : 'offline'} index")
+ try {
+ prevCursor = ""
+ cursor = CursorMarkParams.CURSOR_MARK_START
+ processed = 0
+ while (prevCursor != cursor) {
+ def startTime = System.currentTimeMillis()
+ SolrQuery query = new SolrQuery(typeQuery)
+ query.setParam('cursorMark', cursor)
+ query.setSort("id", SolrQuery.ORDER.asc)
+ query.setRows(pageSize)
+ def response = indexService.query(query, online)
+ def docs = response.results
+ int total = docs.numFound
+ def buffer = []
+ def guids = []
+ def updates = [:]
- // iterate over the result set
- resultsDocs.each { doc ->
+ docs.each { doc ->
+ def taxonID = doc.guid
+ def update = [id: doc.id, idxtype: [set: doc.idxtype], guid: [set: taxonID], occurrenceCount: [set: 0]]
if (nationalSpeciesDatasets && nationalSpeciesDatasets.contains(doc.datasetID)) {
- taxaLocatedInHubCountry.add(doc)
- // in national list so _assume_ it is located in host/hub county
- } else {
- taxaToSearchOccurrences.add(doc)
- // search occurrence records to determine if it is located in host/hub county
+ update.locatedInHubCountry = ["set": true]
}
- }
-
- // update national list without occurrence record lookup
- updateTaxaWithLocationInfo(taxaLocatedInHubCountry, commitQueue)
- // update the rest via occurrence search (non blocking via promiseList)
- promiseList << { searchOccurrencesWithGuids(resultsDocs, commitQueue) }
- // update cursor
- paramsMap.cursorMark = searchResults?.nextCursorMark ?: ""
- // update view via via JS
- updateProgressBar(totalPages, page)
- log("${page}. taxaLocatedInHubCountry = ${taxaLocatedInHubCountry.size()} | taxaToSearchOccurrences = ${taxaToSearchOccurrences.size()}")
- } catch (Exception ex) {
- log.warn "Error calling BIE SOLR: ${ex.message}", ex
- log("ERROR calling SOLR: ${ex.message}")
- }
- }
-
- log("Waiting for all occurrence searches and SOLR commits to finish (could take some time)")
-
- //promiseList.get() // block until all promises are complete
- promiseList.onComplete { List results ->
- //executor.shutdownNow()
- isKeepIndexing = false // stop indexing thread
- executor.shutdown()
- log("Total taxa found with occurrence records = ${results.sum()}")
- log("waiting for indexing to finish...")
- }
- }
-
- /**
- * Batch update of SOLR docs for occurrence/location info
- * TODO extract field name into config: "locatedInHubCountry"
- *
- * @param docs
- * @param commitQueue
- * @return
- */
- def updateTaxaWithLocationInfo(List docs, Queue commitQueue) {
- def totalDocumentsUpdated = 0
-
- docs.each { Map doc ->
- if (doc.containsKey("id") && doc.containsKey("guid") && doc.containsKey("idxtype")) {
- Map updateDoc = [:]
- updateDoc["id"] = doc.id // doc key
- updateDoc["idxtype"] = ["set": doc.idxtype] // required field
- updateDoc["guid"] = ["set": doc.guid] // required field
- updateDoc["locatedInHubCountry"] = ["set": true]
- if (doc.containsKey("occurrenceCount")) {
- updateDoc["occurrenceCount"] = ["set": doc["occurrenceCount"]]
- }
- commitQueue.offer(updateDoc) // throw it on the queue
- totalDocumentsUpdated++
- } else {
- log.warn "Updating doc error: missing keys ${doc}"
- }
- }
-
- totalDocumentsUpdated
- }
-
- /**
- * Poll the queue of docs and index in batches
- *
- * @param updateDocs
- * @return
- */
- def indexDocInQueue(Queue updateDocs, msg) {
- int batchSize = BUFFER_SIZE
-
- while (isKeepIndexing || updateDocs.size() > 0) {
- if (updateDocs.size() > 0) {
- log.info "Starting indexing of ${updateDocs.size()} docs"
- try {
- // batch index docs
- List batchDocs = []
- int end = (batchSize < updateDocs.size()) ? batchSize : updateDocs.size()
-
- (1..end).each {
- if (updateDocs.peek()) {
- batchDocs.add(updateDocs.poll())
+ guids << taxonID
+ updates[taxonID] = update
+
+ if (guids.size() >= COUNT_SIZE) {
+ def cts = biocacheService.counts(guids, occurrenceCountFilter)
+ guids.each { guid ->
+ def val = cts[guid]
+ def upd = updates[guid]
+ if (val && upd)
+ upd.occurrenceCount = [set: val]
+ buffer << upd
}
+ guids = []
+ updates = [:]
}
-
- indexService.indexBatch(batchDocs) // index
- } catch (Exception ex) {
- log.warn "Error batch indexing: ${ex.message}", ex
- log.warn "updateDocs = ${updateDocs}"
- log("ERROR batch indexing: ${ex.message}
${ex.stackTrace}
")
+ if (buffer.size() >= BUFFER_SIZE) {
+ indexService.indexBatch(buffer, online)
+ buffer = []
+ }
+ processed++
}
- } else {
- sleep(500)
- }
- }
-
- log("Indexing thread is done: ${msg}")
- }
-
- /**
- * Extract a list of GUIDs from input list of docs and do paginated/batch search of occurrence records,
- * updating index with occurrence status info (could be presence or record counts, etc)
- *
- * @param docs
- * @return
- */
- def searchOccurrencesWithGuids(List docs, Queue commitQueue) {
- int batchSize = 20 // even with POST SOLR throws 400 code is batchSize is more than 100
- List guids = docs.collect { it.guid }
- int totalPages = ((guids.size() + batchSize - 1) / batchSize) - 1
- log.debug "total = ${guids.size()} || batchSize = ${batchSize} || totalPages = ${totalPages}"
- List docsWithRecs = [] // docs to index
- //log("Getting occurrence data for ${docs.size()} docs")
-
- (0..totalPages).each { index ->
- try {
- int start = index * batchSize
- int end = (start + batchSize < guids.size()) ? start + batchSize - 1 : guids.size()
- log.debug "paging biocache search - ${start} to ${end}"
- def guidSubset = guids.subList(start, end)
- def counts = biocacheService.counts(guidSubset, occurrenceCountFilter)
- counts?.each { guid, count ->
- def docWithRecs = docs.find { it.guid == guid }
- if (docWithRecs) {
- docWithRecs["occurrenceCount"] = count
- docsWithRecs.add(docWithRecs)
+ if (guids.size() > 0) {
+ def cts = biocacheService.counts(guids, occurrenceCountFilter)
+ guids.each { guid ->
+ def val = cts[guid]
+ def update = updates[guid]
+ if (val && update)
+ update.occurrenceCount = [set: val]
+ buffer.addAll(updates.values())
}
}
- } catch (Exception ex) {
- log.warn "Error calling biocache SOLR: ${ex.message}", ex
- log("ERROR calling biocache SOLR: ${ex.message}")
+ if (!buffer.isEmpty())
+ indexService.indexBatch(buffer, online)
+ def percentage = total ? Math.round(processed * 100 / total) : 100
+ def speed = total ? Math.round((pageSize * 1000) / (System.currentTimeMillis() - startTime)) : 0
+ log("Processed ${processed} taxa (${percentage}%) speed ${speed} records per second")
+ if (total > 0) {
+ updateProgressBar(total, processed)
+ }
+ prevCursor = cursor
+ cursor = response.nextCursorMark
}
+ log("Finished scan")
+ } catch (Exception ex) {
+ log.error("Unable to perform occurrence scan", ex)
+ log("Error during scan: " + ex.getMessage())
}
- if (docsWithRecs.size() > 0) {
- log.debug "docsWithRecs size = ${docsWithRecs.size()} vs docs size = ${docs.size()}"
- updateTaxaWithLocationInfo(docsWithRecs, commitQueue)
- }
-
}
/**
@@ -1132,10 +1030,10 @@ class ImportService implements GrailsConfigurationAware {
def clearTaxaIndex() {
log("Deleting existing taxon entries in index...")
- indexService.deleteFromIndex(IndexDocType.TAXON)
- indexService.deleteFromIndex(IndexDocType.COMMON)
- indexService.deleteFromIndex(IndexDocType.IDENTIFIER)
- indexService.deleteFromIndex(IndexDocType.TAXONVARIANT)
+ indexService.deleteFromIndex(IndexDocType.TAXON, false)
+ indexService.deleteFromIndex(IndexDocType.COMMON, false)
+ indexService.deleteFromIndex(IndexDocType.IDENTIFIER, false)
+ indexService.deleteFromIndex(IndexDocType.TAXONVARIANT, false)
log("Cleared.")
}
diff --git a/grails-app/services/au/org/ala/bie/IndexService.groovy b/grails-app/services/au/org/ala/bie/IndexService.groovy
index ca597ef1..72e932e2 100644
--- a/grails-app/services/au/org/ala/bie/IndexService.groovy
+++ b/grails-app/services/au/org/ala/bie/IndexService.groovy
@@ -63,9 +63,10 @@ class IndexService implements GrailsConfigurationAware {
* @param docType
* @return
*/
- def deleteFromIndex(IndexDocType docType){
+ def deleteFromIndex(IndexDocType docType, boolean online) {
log.info("Deleting from index: " + docType.name() + "....")
- offlineSolrClient.deleteByQuery("idxtype:" + docType.name())
+ def client = online ? updatingLiveSolrClient : offlineSolrClient
+ client.deleteByQuery("idxtype:" + docType.name())
log.info("Deleted from index: " + docType.name())
}
@@ -75,7 +76,7 @@ class IndexService implements GrailsConfigurationAware {
* @param docsToIndex
* @param offline Use the offline index (defaults to true)
*/
- def indexBatch(List docsToIndex, online = false) throws Exception {
+ def indexBatch(List docsToIndex, boolean online = false) throws Exception {
def client = online ? updatingLiveSolrClient : offlineSolrClient
def buffer = []
diff --git a/grails-app/services/au/org/ala/bie/SearchService.groovy b/grails-app/services/au/org/ala/bie/SearchService.groovy
index 9a503dc7..c6446878 100644
--- a/grails-app/services/au/org/ala/bie/SearchService.groovy
+++ b/grails-app/services/au/org/ala/bie/SearchService.groovy
@@ -990,19 +990,19 @@ class SearchService {
"infoSourceName" : it.datasetName,
"infoSourceURL" : "${grailsApplication.config.collectory.base}/public/show/${it.datasetID}"
]
- } else if (it.idxtype == IndexDocType.COMMON.name()){
+ } else if (it.idxtype == IndexDocType.COMMON.name()) {
doc = [
- "id" : it.id, // needed for highlighting
- "guid" : it.guid,
- "taxonGuid" : it.taxonGuid,
- "linkIdentifier" : it.linkIdentifier,
- "idxtype": it.idxtype,
- "name" : it.name,
- "language" : it.language,
+ "id" : it.id, // needed for highlighting
+ "guid" : it.guid,
+ "taxonGuid" : it.taxonGuid,
+ "linkIdentifier" : it.linkIdentifier,
+ "idxtype" : it.idxtype,
+ "name" : it.name,
+ "language" : it.language,
"acceptedConceptName": it.acceptedConceptName,
- "favourite": it.favourite,
- "infoSourceName" : it.datasetName,
- "infoSourceURL" : "${grailsApplication.config.collectory.base}/public/show/${it.datasetID}"
+ "favourite" : it.favourite,
+ "infoSourceName" : it.datasetName,
+ "infoSourceURL" : "${grailsApplication.config.collectory.base}/public/show/${it.datasetID}"
]
if (it.image) {
doc.put("image", it.image)
@@ -1029,6 +1029,9 @@ class SearchService {
if(it.favourite){
doc.put("favourite", it.favourite)
}
+ if (it.content){
+ doc.put("content", it.content)
+ }
}
if (doc) {
if(getAdditionalResultFields()){
diff --git a/grails-app/views/import/collectory.gsp b/grails-app/views/import/collectory.gsp
index 1b7afa46..87186a3a 100644
--- a/grails-app/views/import/collectory.gsp
+++ b/grails-app/views/import/collectory.gsp
@@ -17,10 +17,20 @@
-
+
+
+
+
+
+
+ function importCollectory(){
+ loadInfo("${createLink(controller:'import', action:'importCollectory')}?online=" + $('#use-online').is(':checked'));
+ }
+
+