From 2b785c3f6a1ee6a1617d720516aefdd6eb0c7ef9 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Wed, 11 Sep 2024 11:17:11 -0400
Subject: [PATCH 01/52] Enable test for invalid ontology IDs in AnnData

---
 test/js/lib/validate-anndata.test.js | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/test/js/lib/validate-anndata.test.js b/test/js/lib/validate-anndata.test.js
index ec9e0a5ad..26c9044dc 100644
--- a/test/js/lib/validate-anndata.test.js
+++ b/test/js/lib/validate-anndata.test.js
@@ -57,19 +57,18 @@ describe('Client-side file validation for AnnData', () => {
     expect(issues).toHaveLength(0)
   })
 
-  // TODO: Uncomment this after row-level AnnData parsing PR is merged
-  // it('Parses AnnData rows and reports invalid ontology IDs', async () => {
-  //   // eslint-disable-next-line max-len
-  //   const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test_invalid_disease.h5ad'
-  //   const parseResults = await parseAnnDataFile(url)
+  it('Parses AnnData rows and reports invalid ontology IDs', async () => {
+    // eslint-disable-next-line max-len
+    const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test_invalid_disease.h5ad'
+    const parseResults = await parseAnnDataFile(url)
 
-  //   expect(parseResults.issues).toHaveLength(1)
+    expect(parseResults.issues).toHaveLength(1)
 
-  //   const expectedIssue = [
-  //     'error',
-  //     'ontology:label-lookup-error',
-  //     'Ontology ID "FOO_0000042" is not among accepted ontologies (MONDO, PATO) for key "disease"'
-  //   ]
-  //   expect(parseResults.issues[0]).toEqual(expectedIssue)
-  // })
+    const expectedIssue = [
+      'error',
+      'ontology:label-lookup-error',
+      'Ontology ID "FOO_0000042" is not among accepted ontologies (MONDO, PATO) for key "disease"'
+    ]
+    expect(parseResults.issues[0]).toEqual(expectedIssue)
+  })
 })

From a99b7e007cfab5b84b3cecbea57b3c88432080a5 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Wed, 11 Sep 2024 15:43:16 -0400
Subject: [PATCH 02/52] Create default X_umap cluster fragment unless specified

---
 app/models/ann_data_file_info.rb               | 13 ++++++++++++-
 app/models/ann_data_ingest_parameters.rb       |  2 +-
 test/models/ann_data_file_info_test.rb         | 13 ++++++++++++-
 test/models/ann_data_ingest_parameters_test.rb |  2 +-
 4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/app/models/ann_data_file_info.rb b/app/models/ann_data_file_info.rb
index 312228173..1a9033696 100644
--- a/app/models/ann_data_file_info.rb
+++ b/app/models/ann_data_file_info.rb
@@ -39,7 +39,7 @@ class AnnDataFileInfo
   # }
   # { _id: '6033f531e241391884633748', data_type: :expression, description: 'log(TMP) expression' }
   field :data_fragments, type: Array, default: []
-  before_validation :sanitize_fragments!
+  before_validation :set_default_cluster_fragments!, :sanitize_fragments!
   validate :validate_fragments
   after_validation :update_expression_file_info
 
@@ -195,6 +195,17 @@ def sanitize_fragments!
     self.data_fragments = sanitized_fragments
   end
 
+  # create the default cluster data_fragment entries
+  def set_default_cluster_fragments!
+    return false if fragments_by_type(:cluster).any?
+
+    default_obsm_keys = AnnDataIngestParameters::PARAM_DEFAULTS[:obsm_keys]
+    default_obsm_keys.each do |obsm_key_name|
+      name = obsm_key_name.delete_prefix('X_')
+      data_fragments << { _id: BSON::ObjectId.new.to_s, data_type: :cluster, name:, obsm_key_name: }
+    end
+  end
+
   # ensure all fragments have required keys and are unique
   def validate_fragments
     REQUIRED_FRAGMENT_KEYS.each do |data_type, keys|
diff --git a/app/models/ann_data_ingest_parameters.rb b/app/models/ann_data_ingest_parameters.rb
index 82729cd91..be3f26fd8 100644
--- a/app/models/ann_data_ingest_parameters.rb
+++ b/app/models/ann_data_ingest_parameters.rb
@@ -32,7 +32,7 @@ class AnnDataIngestParameters
   PARAM_DEFAULTS = {
     ingest_anndata: true,
     anndata_file: nil,
-    obsm_keys: %w[X_umap X_tsne],
+    obsm_keys: %w[X_umap],
     ingest_cluster: false,
     cluster_file: nil,
     name: nil,
diff --git a/test/models/ann_data_file_info_test.rb b/test/models/ann_data_file_info_test.rb
index 8463fc937..94db34b3d 100644
--- a/test/models/ann_data_file_info_test.rb
+++ b/test/models/ann_data_file_info_test.rb
@@ -120,6 +120,17 @@ def generate_id
     assert_equal %w[X_umap X_tsne], ann_data_info.obsm_key_names
   end
 
+  test 'should set default cluster fragments' do
+    ann_data_info = AnnDataFileInfo.new
+    assert ann_data_info.valid?
+    default_keys = AnnDataIngestParameters::PARAM_DEFAULTS[:obsm_keys]
+    default_keys.each do |obsm_key_name|
+      name = obsm_key_name.delete_prefix('X_')
+      matcher = { data_type: :cluster, name:, obsm_key_name: }.with_indifferent_access
+      assert ann_data_info.find_fragment(**matcher).present?
+    end
+  end
+
   test 'should validate data fragments' do
     ann_data_info = AnnDataFileInfo.new(
       data_fragments: [
@@ -224,7 +235,7 @@ def generate_id
       ]
     )
     assert anndata_info.valid? # invokes validations
-    exp_frag = anndata_info.data_fragments.first
+    exp_frag = anndata_info.fragments_by_type(:expression).first
     assert_nil exp_frag.with_indifferent_access.dig(:expression_file_info, :units)
   end
 end
diff --git a/test/models/ann_data_ingest_parameters_test.rb b/test/models/ann_data_ingest_parameters_test.rb
index 1a3023bc4..734d83cc7 100644
--- a/test/models/ann_data_ingest_parameters_test.rb
+++ b/test/models/ann_data_ingest_parameters_test.rb
@@ -56,7 +56,7 @@ class AnnDataIngestParametersTest < ActiveSupport::TestCase
       assert extraction.send(attr).blank?
     end
 
-    cmd = '--ingest-anndata --anndata-file gs://bucket_id/test.h5ad --obsm-keys ["X_umap", "X_tsne"] --extract ' \
+    cmd = '--ingest-anndata --anndata-file gs://bucket_id/test.h5ad --obsm-keys ["X_umap"] --extract ' \
           '["cluster", "metadata", "processed_expression", "raw_counts"]'
     assert_equal cmd, extraction.to_options_array.join(' ')
     assert_equal 'n2d-highmem-32', extraction.machine_type

From 03ceb12ab0b592c73c8479e2593e587018828a2d Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Wed, 11 Sep 2024 15:55:52 -0400
Subject: [PATCH 03/52] Add missing spatial_cluster_associations field

---
 app/models/ann_data_file_info.rb | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/models/ann_data_file_info.rb b/app/models/ann_data_file_info.rb
index 1a9033696..a0ce6f4d5 100644
--- a/app/models/ann_data_file_info.rb
+++ b/app/models/ann_data_file_info.rb
@@ -202,7 +202,10 @@ def set_default_cluster_fragments!
     default_obsm_keys = AnnDataIngestParameters::PARAM_DEFAULTS[:obsm_keys]
     default_obsm_keys.each do |obsm_key_name|
       name = obsm_key_name.delete_prefix('X_')
-      data_fragments << { _id: BSON::ObjectId.new.to_s, data_type: :cluster, name:, obsm_key_name: }
+      fragment = {
+        _id: BSON::ObjectId.new.to_s, data_type: :cluster, name:, obsm_key_name:, spatial_cluster_associations: []
+      }
+      data_fragments << fragment
     end
   end
 

From 3156dde49b5e4312866409916d1b097c03669c0e Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Wed, 11 Sep 2024 22:46:15 -0400
Subject: [PATCH 04/52] Fetch minified ontologies, modularize

---
 .../lib/validation/ontology-validation.js     | 90 +++++++++++++++++++
 .../lib/validation/validate-anndata.js        | 22 +----
 2 files changed, 92 insertions(+), 20 deletions(-)
 create mode 100644 app/javascript/lib/validation/ontology-validation.js

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
new file mode 100644
index 000000000..0711d24fe
--- /dev/null
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -0,0 +1,90 @@
+import { decompressSync, strFromU8 } from 'fflate'
+
+import {
+  metadataSchema, REQUIRED_CONVENTION_COLUMNS
+} from './shared-validation'
+
+const ONTOLOGY_BASE_URL =
+  'https://raw.githubusercontent.com/broadinstitute/scp-ingest-pipeline/' +
+  'ew-minify-uberon/ingest/validation/ontologies/'
+
+/** Fetch .gz file, decompress it, return plaintext */
+async function fetchGzipped(url) {
+  const response = await fetch(url)
+  const blob = await response.blob();
+  const uint8Array = new Uint8Array(await blob.arrayBuffer());
+  const plaintext = strFromU8(decompressSync(uint8Array));
+  return plaintext
+}
+
+/** Fetch minified ontologies */
+async function fetchOntologies() {
+  const ontologies = {}
+
+  fetchOntologies
+  const ontologyNames = getOntologyShortNames()
+
+  for (let i = 0; i < ontologyNames.length; i++) {
+    const ontologyName = ontologyNames[i]
+    const ontologyUrl = `${ONTOLOGY_BASE_URL + ontologyName}.min.tsv.gz`
+    const tsv = await fetchGzipped(ontologyUrl)
+
+    const lines = tsv.split('\n')
+
+    ontologies[ontologyName] = {}
+
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i]
+      const [ontologyId, label, rawSynonyms] = line.split('\t')
+      let names = [label]
+      if (rawSynonyms) {
+        const synonyms = rawSynonyms.split('||')
+        names = names.concat(synonyms)
+      }
+      ontologies[ontologyName][ontologyId] = names
+    }
+  }
+
+  return ontologies
+}
+
+window.fetchOntologies = fetchOntologies
+
+/** Get lowercase shortnames for all required ontologies */
+function getOntologyShortNames() {
+  let requiredOntologies = []
+
+  // Validate IDs for species, organ, disease, and library preparation protocol
+  for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) {
+    const column = REQUIRED_CONVENTION_COLUMNS[i]
+    if (!column.endsWith('__ontology_label')) {continue}
+    const key = column.split('__ontology_label')[0]
+    const ontologies = getAcceptedOntologies(key, metadataSchema)
+    requiredOntologies = requiredOntologies.concat(ontologies)
+  }
+
+  requiredOntologies = Array.from(
+    new Set(requiredOntologies.map(o => o.toLowerCase()))
+  )
+
+  return requiredOntologies
+}
+
+/**
+ * Get list of ontology names accepted for key from metadata schema
+ *
+ * E.g. "disease" -> ["MONDO", "PATO"]
+ */
+export function getAcceptedOntologies(key, metadataSchema) {
+  // E.g. "ontology_browser_url": "https://www.ebi.ac.uk/ols/ontologies/mondo,https://www.ebi.ac.uk/ols/ontologies/pato"
+  const olsUrls = metadataSchema.properties[key].ontology
+
+  const acceptedOntologies =
+    olsUrls?.split(',').map(url => url.split('/').slice(-1)[0].toUpperCase())
+
+  if (acceptedOntologies.includes('NCBITAXON')) {
+    acceptedOntologies.push('NCBITaxon')
+  }
+
+  return acceptedOntologies
+}
diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index d05fbe7bd..837d023f9 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -1,10 +1,11 @@
 import {openH5File} from 'hdf5-indexed-reader'
 
+import { getOAuthToken } from '~/lib/scp-api'
 import {
   validateUnique, validateRequiredMetadataColumns,
   metadataSchema, REQUIRED_CONVENTION_COLUMNS
 } from './shared-validation'
-import { getOAuthToken } from '~/lib/scp-api'
+import { getAcceptedOntologies } from './ontology-validation'
 
 /** Get ontology ID values for key in AnnData file */
 async function getOntologyIds(key, hdf5File) {
@@ -90,25 +91,6 @@ export async function getAnnDataHeaders(hdf5File) {
   return headers
 }
 
-/**
- * Get list of ontology names accepted for key from metadata schema
- *
- * E.g. "disease" -> ["MONDO", "PATO"]
- */
-function getAcceptedOntologies(key, metadataSchema) {
-  // E.g. "ontology_browser_url": "https://www.ebi.ac.uk/ols/ontologies/mondo,https://www.ebi.ac.uk/ols/ontologies/pato"
-  const olsUrls = metadataSchema.properties[key].ontology
-
-  const acceptedOntologies =
-    olsUrls?.split(',').map(url => url.split('/').slice(-1)[0].toUpperCase())
-
-  if (acceptedOntologies.includes('NCBITAXON')) {
-    acceptedOntologies.push('NCBITaxon')
-  }
-
-  return acceptedOntologies
-}
-
 /**
  * Check format of ontology IDs for key, return updated issues array
  *

From eb17bbb163730e851dc64ebc668d07f166f2e244 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Thu, 12 Sep 2024 10:07:19 -0400
Subject: [PATCH 05/52] Reuse fetched, processed ontologies from this page load

---
 .../lib/validation/ontology-validation.js     | 31 +++++++++++++++++--
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
index 0711d24fe..33ff68dc2 100644
--- a/app/javascript/lib/validation/ontology-validation.js
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -6,7 +6,7 @@ import {
 
 const ONTOLOGY_BASE_URL =
   'https://raw.githubusercontent.com/broadinstitute/scp-ingest-pipeline/' +
-  'ew-minify-uberon/ingest/validation/ontologies/'
+  'ew-refine-mini-ontologies/ingest/validation/ontologies/'
 
 /** Fetch .gz file, decompress it, return plaintext */
 async function fetchGzipped(url) {
@@ -17,11 +17,32 @@ async function fetchGzipped(url) {
   return plaintext
 }
 
-/** Fetch minified ontologies */
+/**
+ * Fetch minified ontologies, transform into object of object of arrays, e.g.:
+ *
+ * {
+ *   'mondo': {
+ *     'MONDO_0008315': ['prostate cancer', 'prostate neoplasm', 'prostatic neoplasm'],
+ *     'MONDO_0018076': ['tuberculosis', 'TB'],
+ *     ...
+ *   },
+ *   'ncbitaxon': {
+ *     'NCBITaxon_9606': ['Homo sapiens', 'human'],
+ *     'NCBITaxon_10090': ['Mus musculus', 'house mouse', 'mouse'],
+ *     ...
+ *   },
+ *   ...
+ * }
+ */
 async function fetchOntologies() {
+  if (window.SCP.ontologies) {
+    // Reuse fetched, processed ontologies from this page load
+    return window.SCP.ontologies
+  }
+
+  const t0 = Date.now()
   const ontologies = {}
 
-  fetchOntologies
   const ontologyNames = getOntologyShortNames()
 
   for (let i = 0; i < ontologyNames.length; i++) {
@@ -45,6 +66,10 @@ async function fetchOntologies() {
     }
   }
 
+  const t1 = Date.now()
+  console.log(t1-t0)
+
+  window.SCP.ontologies = ontologies
   return ontologies
 }
 

From 62de36506bd3a74a1db1e46bd17caf714e82e35e Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Thu, 12 Sep 2024 11:20:00 -0400
Subject: [PATCH 06/52] Use service worker cache for minified ontologies

---
 .../lib/validation/ontology-validation.js     | 49 ++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
index 33ff68dc2..964f72792 100644
--- a/app/javascript/lib/validation/ontology-validation.js
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -8,6 +8,32 @@ const ONTOLOGY_BASE_URL =
   'https://raw.githubusercontent.com/broadinstitute/scp-ingest-pipeline/' +
   'ew-refine-mini-ontologies/ingest/validation/ontologies/'
 
+/** Quickly retrieve current version cache key for ontologies */
+async function fetchOntologyCacheVersion() {
+  const response = await fetch(`${ONTOLOGY_BASE_URL}version.txt`)
+  const text = await response.text()
+  const version = text.trim().split('#')[0]
+  return version
+}
+
+/** Get frontend SW cache object for minified ontologies */
+async function getServiceWorkerCache() {
+  const version = await fetchOntologyCacheVersion()
+  const currentOntologies = `ontologies-${version}`
+
+  // Delete other versions of ontologies cache; there should be 1 per dodmain
+  const cacheNames = await caches.keys()
+  cacheNames.forEach(name => {
+    if (name.startsWith('ontologies-') && name !== currentOntologies) {
+      caches.delete(name)
+    }
+  })
+
+  const cache = await caches.open(currentOntologies)
+
+  return cache
+}
+
 /** Fetch .gz file, decompress it, return plaintext */
 async function fetchGzipped(url) {
   const response = await fetch(url)
@@ -17,6 +43,26 @@ async function fetchGzipped(url) {
   return plaintext
 }
 
+/** Fetch from service worker cache if available, from remote otherwise */
+export async function cacheFetch(url) {
+  const cache = await getServiceWorkerCache()
+
+  const decompressedUrl = url.replace('.gz', '')
+  const response = await cache.match(decompressedUrl)
+  if (typeof response === 'undefined') {
+    // If cache miss, then fetch, decompress, and put response in cache
+    const data = await fetchGzipped(url)
+    const contentLength = data.length
+    const decompressedResponse = new Response(
+      new Blob([data], { type: 'text/tab-separated-values' }),
+      { headers: new Headers({ 'Content-Length': contentLength }) }
+    )
+    await cache.put(decompressedUrl, decompressedResponse)
+    return await cache.match(decompressedUrl)
+  }
+  return await cache.match(decompressedUrl)
+}
+
 /**
  * Fetch minified ontologies, transform into object of object of arrays, e.g.:
  *
@@ -48,7 +94,8 @@ async function fetchOntologies() {
   for (let i = 0; i < ontologyNames.length; i++) {
     const ontologyName = ontologyNames[i]
     const ontologyUrl = `${ONTOLOGY_BASE_URL + ontologyName}.min.tsv.gz`
-    const tsv = await fetchGzipped(ontologyUrl)
+    const response = await cacheFetch(ontologyUrl)
+    const tsv = await response.text()
 
     const lines = tsv.split('\n')
 

From 90a6da6766c6a2e6200fae0e36f4562a119a5072 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Thu, 12 Sep 2024 12:25:00 -0400
Subject: [PATCH 07/52] Move, rename AnnData test files for clarity

---
 test/test_data/{ => anndata}/anndata_test.h5ad  | Bin
 .../invalid_disease_id.h5ad}                    | Bin
 .../anndata/invalid_disease_label.h5ad          | Bin 0 -> 72768 bytes
 .../invalid_header_no_species.h5ad}             | Bin
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename test/test_data/{ => anndata}/anndata_test.h5ad (100%)
 rename test/test_data/{anndata_test_invalid_disease.h5ad => anndata/invalid_disease_id.h5ad} (100%)
 create mode 100644 test/test_data/anndata/invalid_disease_label.h5ad
 rename test/test_data/{anndata_test_bad_header_no_species.h5ad => anndata/invalid_header_no_species.h5ad} (100%)

diff --git a/test/test_data/anndata_test.h5ad b/test/test_data/anndata/anndata_test.h5ad
similarity index 100%
rename from test/test_data/anndata_test.h5ad
rename to test/test_data/anndata/anndata_test.h5ad
diff --git a/test/test_data/anndata_test_invalid_disease.h5ad b/test/test_data/anndata/invalid_disease_id.h5ad
similarity index 100%
rename from test/test_data/anndata_test_invalid_disease.h5ad
rename to test/test_data/anndata/invalid_disease_id.h5ad
diff --git a/test/test_data/anndata/invalid_disease_label.h5ad b/test/test_data/anndata/invalid_disease_label.h5ad
new file mode 100644
index 0000000000000000000000000000000000000000..54da5df2ead12d2a16b119001c7a1f6692504b6a
GIT binary patch
literal 72768
zcmeG_33wDm)}4eSToO<qAjl{Jfe^?ABtWJngphCt2p9!ShGZa-T;||VP()m#h=78I
z_4rv35k-gy2tuYC5n(wrfJa>26)!|VKyfi1tN*L6_hvdBngo*e6EpRF-PNz^)vH&p
z>m6NP)ALaLkfAM_c4#W`<m)Rnko?q3I{l8x*3nBXBADt^@Iur|QW3&0c#8`!pvdd|
zvD^>oTYN-;g5`<hVq+!ATO#_DW3{xZk+`y8K|}}9V+0|1^1rkM;$x!|_y9|Alqvw%
zMeS+kVMQHht#H2T=XG-_UhT<Cx<<NA@|7IfsX1vG+0%mx=Hxn{roES!+LCLN;LsQG
z)l6#4Gf|}0H<!eSyrfo@>CJTH<!9t%i&oXsZK$EkRnSXnbW!~yg-l1nLjSxB^>222
zfk=9c%{*B>kpSsm5{}n`c|6)>5<{B3z|OBTvePnB9r--ZEyyDskfh7C*W*Wz8Nz)C
zs)}?R{RU~hZv)YD0P<lIpcH?tz#zO_gqOAvC0s9gDhm=h;Z-2)A1ul(B9n6WSH>$w
zc>4iFxJt9Qm@y;Ov7<mp4yt{EG1;@T)mw`0TvF`|ln3_?>CHuiw?h&Ege0Zr=Ot&^
z3-U5%3xVM6snU+<v=eAjey$@m!;znyoReLUlbJJpPI9Jwsv{Gumq^4<T1LLZp6?L$
z6RM~^mRD&{^M?A%$(wHH#<U62ZG*X5UZu6<8dpce%gmUXXV06HoSWy!wddIjz%;{k
zPC-s;P9{fbhIqYdfPazhq~_R;<eKjgf^321#BZu2Gc%d{n&eb_zK~UN7~oukctmGv
zMozvxD>u`ToRP+>T4FuPB`qgAClBgKA4tx4KfoJ_bg6zr0Ht*e@InFSTErth3UUhU
znaNps+4khZtc-l{eIn6#Q!@+m3mkbk8zhPNpm~uD28N5MEw&>*;^JbWW8xAMV-lld
z;2a&qnS@01#f6uc7@Zgw2bghy7#%}+{sP@KuDIw#AQv4CSTS&R;h|e};i<%;;}T;y
zF~B8y9dz^nL8TZ2c^nWWyz6y%KnGZh<2cZLVj|(ud~}T?S|tY^M?=4fE_$7G^kA4O
zDc-$SE;s1#RFS}7c;9>=)Nf}U9@GH>NJQ}H!m}VA&BJ^!2@oCG^YZL-c-4(qPkI^L
zJ5;qNH|ff$K^I*);opqqWS>ItK6llZ^T`LPo5<ivL!=O{G?2QB%v~NX3Ig=yeMLbJ
zeR)4o&{JRDUlauD%OgYq*$Edvk)j;kkc*#C^hdpQbV6~weXyMDH?kvPGmJqc3E7db
znX4Sxkx)#oa%4wBF}e5&LBc2|7e3*km|W$AM|=^VghzZ)Iq^Y!Q91EVd{H@#a{$W0
zXx<IPdi}f`gym$vQ|$$g={b2BsrF2@Cd#0lRaZ{)mFdc9zS?x<gdeRdC;Y)!PWA|V
zYhj+lV$aM?w+Bsi6xc0^7VZmT5RdF;mLn@CZ;oZKS_ySSu%7NiwmoB}!%~nwC#x_&
zD9w=vP?{xXNO(|gPJTuKK)tH=!AZm-9kS!uoF99pWkyD3rX!Dflcb@DcN6BhnfClF
zd+MBmoQyO}nj<?c4|+*e@q1a)3$s+G47hQKPj)R8+U4`RV95ec1??RIAs+F_UZ&<`
z&vayScMPbN>8qH-5TEqKF>RV7wIC<Yf`W<}5^>3)56AXocU8VD`Li+#Qqw_ldT~o5
z5TEol6`134a&=dYq(@?VvNOY`#|33OrbEC1x2?L)q3tMaZ=?sKv0kqSW3b#v4-yR8
zjn$RYydI}3r+Gad%d4B$iHK)3uP0zV#WOTZCSo~_U(9!xI!L!*eKq=U=}7@Jl8{bY
z%(HW{gDy5_Z^e3=ub0pV74tU4ujzc7g!mNy=~ZAd*0;etBO9WsEJs=f*g36gO~HDy
z$A%^WC$g|Uy}0S<J8q0ig{0f7&@m8YHFT1Z4#gdY{0K&*6vU@-iaXBv3yE}d2hTN#
zNAp7;pG?Jiy?&%(xsiUTK{nv%+o9QVm6Lu<Gw5eJmRF}A>4;~f9~oG$*N;1}ycOnI
za~zr6G-o;rXK2i<Gz05NPI^7c#QJLVs2Xc0Nm)pT;yHMxn&tq@uiA4v820IoYzsVe
z<#QvSJ>BJKvaucA@5=Mu?OJjWkMwDJUQS`ICB%}E4Ia#4$()mwo6g72eJMxHMSPcB
zh2M1<kUO!S;z`4KS!sStQeKsIHPH)6%Exv!)q?`WZ-lvvafJO`xw!HR5(NsDyVo}x
z!8579l%H-_*ERiMB8j<VQ!%|@os%buUn!^c0Lr~!=Bt<j9j?X9ruHfkIMKSl57cwi
zR+i4N`ooh&YUeApyH~yD`sJ(Ed;6#vK!To+w>OdC?L<?xO%b;Ib7W#&%g42@l}?Io
zz#iQ%g&rgg@Q6$qIT{1Jze@4t&nrK5!ti-a^NP#j^5@giKcxt(lLz@NQW76{OTqQ*
zGURa_Z73(D(XT(JJotMJFBl<>m-1c)ch9eD<qDFQW}uZ3Jdj5M9tn6P;E{kw0v-v}
zD+#D;l;UAX;?HGTn7>ca!u&mq7Uto(7UnUP7Ut_oTDYAK=5fBZzP%3Sj|t5iH}GyL
zc{gd)M1AN&PWj5Y7LL%teC0=5&!6nIa9<tFSM;^@e1%mD2kT(Ig0HO)1z3G*ME>|2
z1TB2C4(0)xww^zMYhnH-M+*n&V7{WNt?#LW19dQu6t(#LL0Jo{E4&Hc#Y!*ijg=xM
z43)~>j+JaFu~Pi=vC{F`u~NXYSSk07Sn1r6Sl=e_xauo;H}qE5b#cTafuc$U;Sm6<
zF*d@(0oLAYzDBM--eG+_!0Ng-!ovVop92vd46wS6fiQnppsv>-JOp5MJs;s%fNz3>
z9!`hCWwXXj8hCp(@b&RkS6UD=1!~k4351gYR#y%Y=BtTXn6C<IN5)q<)fG#`=c|}n
z`jY|Hj)booYe$j@u-X*cPXM?%9K<<a1=f!A7J${2TEtHRSSv}sO6%3Isjqiq-)26|
z)U^}DEPxvANcnmUpLx_1Uk}p4c>rri!q=&^Bj@W;+L7b}tQ{#||I^l|1FW)w{P1->
zb?pw}I{?;>d<MYUk!J$THHrAm;@7Uki4=2TLTW5BJadR@)H~&><q*}VBX!kqKBz_=
zv#W-Wh-$jHO8EsJ3)OIqbSG^;+;+HAL%5_i5$<BR;!kZ<!z$HidE@iio%r3RHlyHL
zOM}mAtt9w7*7nZlc~dy3J)g%~n9pM^%;%*R=JQeu^LeS|htErGJ=dF-a8Q5zZfoH?
zbTFSS+Il{_v~ao(o~ncSs<#$DO$R%4@HBuYy!bAw2t3Rt-@RSlaf8!#=+ObuzwXVq
zt?EC-R^F_C^ok2R*owXVl(tH!t??&wcO@Qq*cMl2i}pKow)9~ei*9uO31!=o?+2AN
zp67hv$H-_P`3EH0@2THc=3WoIvi+X??vd{Kz@-`cJsSXBFPNv?yQ;PXa@udbl)GU_
zC93O{B(dMKL6vn8fh9=^+zmsD7nyW!mbg$5_FXA1$`MZ3AucQkd$Oxr0<_;V1?Niw
z<`whBxXhTp<A5~H*J4@pqlgUjC0^yg3%ufWFuP&9U13&sP);7~UEnB&`>pZ)gdftO
zT<cq}gF0^ANI!Wjr1jn0H)&xWi)mp_OY3{N$5P$0dJ?o%uZl3|p&cC5ANM5MdT#l(
z^}LSb5I!fP?Y{}Y+Sr2InqF`aKKEqY<`B$zZ7$h(iE1zS{&m_%XDCM%TX-d+!+G!m
z0B?+np?|NV+C8YDoZjmg_2XKH_ZDPb?{&NebrmeE5*eOrYnQe6%>p!C^G^+)-0$}#
ziPu)2^!t@<p_gtm8>>sW3x$MgCY2*x!Cf^ZG~ZHrlz<6OvI^K@6M4MIq~cg{L3j$n
z$Kw#;Po9}!Uv7jk8~q-lq`weII?AQG-$MjM3Pe<CY~{)7iv-9&@<^R0i+Toof%6DK
z@Kb5<D+t&ZkNXK#0ASL~2tur3PWI{CK+!<Kc5XZ93(ujxlIj>8X_CLU=!?JK6Mgzh
zL!(EI=jI@B+yr42pn~{8asXehU3b4nq<%Opw>|e(i{1A8<F2AF@(coUkK@YX?1r2S
zD&<XZN%G1aAn4QmlB57pP{XsZ>)t?5r2R<Gs5#jM|I59>Mg;#N<!0D*zLMaw>o}41
zafrI=19+5Qc^q;A=WR+Qm8tE#)y5%wYMCJ?ZZhck*1s<RkRs4t)s}xAW0Kwtf=e^_
z=b=w^_nE8-<#eB`-AClDw@*X6^C9W&L~Z4ywNE^BGeb^%#mglpJ_JZ)2a9bgD5Z-G
z*8!^TkHBilZ?L!`dq(A+ynaZ)D9*9jDv#f+IH#PyoPeaLXRa^t>J^Vqu8ue-8|?^L
zBry>8IBFl~*nlMIcT$X?UsrKX3DOf(tDaqUA-ZZ^eJ(M>u5%H)?7BgmqxxaMqn!NT
z<*&EQ#(7&(={DAO-fH6<{+7iIIgJLo6zAyIM{3KzHqIGi68{N6*L9!ankUk4U;c6z
zQceB${NV!MrKbY)oa>1MjQqFj#SY5BEdL$+H_?HD{kZk7uKf4Kzi(3Xkm!)~1iLrP
zNWadF6etQ});+&wud4(E2-eMuH^xE-B(Dfun69gMWB&*gyg^2GEEnyd0b0E)cKHi~
zxPZTt;)(Jrj|)O^ejQio(aEhRR~FA|;{rPnH6t#V3Unzh(7z9<E&tlMAk`%P`K}As
zeTHkENDp&)&;W_-U3Kp-PI4uNMEWEk&$+%xz$h;8$9Y6@Fe@%N$0su+MLl1Q&-JZ)
z2-eN>vkvHh^gDRG=&i2&Y!T8!@mI;nE|lx$Llwk&R$zu-ng%kg?tL5iZOX6Qe|N-r
zd%V(}tF8az{+o6q@(_gPZ87S(d)`_q^=kweeV0s)6QGg)_xj#QNL^otYx>TF50=w<
z2)&1;@47TFXh(D?r{C*0GQe+)<)k;_qgz2C1RL?)#DE@I6(hc>ocN>hQa@Bqe5?DN
zpf4&Xe40<hzxq8e_}G)W9}4h^o?hO527L3qQallVdU^A`Q#|SU;CrrkqH^M!@6F<g
z%874v{}YrG|D$6D4@<PqhOe<j_6zRke$Qvbzq-HbB6R(EXON8eZ!fB=@Y6w8PW-F8
z%b`xszq(%w%89@DoU9y6zCAa?k<GUYNS*iz64mSFe}ky5Lbo%P8}V;3Xm_KoocO;<
zS5ETpg5@-?MvWOgWK42!a7aXOe-OXeYYw>)|6L8}cEfTb{&7*V%D4k`^oV~nc2)3u
zB0kO6g2Jheywt)>`1(=4#h#zWZ8A@RA~PIEFRa(!-ykeE8b@z~c71f^G>%|hIo*d4
zEGIpfFgSMHn9*<};8ygHkX+yMGP)0826S=zo1q@y>*FeN<@cT^k_Yim+hVAk_y-o%
zMCDU6b8@)L!0lCra;NPV)Q;%kwirXc`MYVJ^zCq4ovWPq#_e^ka_Wb_W95nNZ|qP$
z`@@2R`wGr&YOV47kH&gEAA_-+=<4k!y^hhh8=|8}{Kx9biT|NkPV*upc(x_0FcUtY
zmF3`7ak_fF{fWnVJs-oc+=&0-2JJ@Z%8CDxx^m*5zdPWG{P8qLmOT?VA1#Uv=M7)8
z=Sj~ug}Fw2<KuFbcH;y&*Y`yE$5N-|*@Y0Hj>2qxe~H+R;sf}+4gW-E{sb(ianZiq
ziC9i?LG;Leq5Z>xBYWfi(p!w`b^BP|?fZv?hV>8ajRxdaUB6?-g%1sdseGHBZcIo>
zcz9$4wVR}`C%%yGWTSdCJX5fq<iq>rw&iv#r?_Nj^r(=qh-fJp%jrIi8a6H@EI2eM
zG+xZu6s)Itt)I7iZHXu9kLD9!8{&!Lp4idj<G2wG4h@Tpgn)vt>F`vwK0KsvWTb>)
z?nUau`t=JAksPA=Md~BM`-g=|<Zr6d59=Ee-cOot*gh;GI5^U^?sJj$k&)qjr3|C?
zArbw8BSWP-jOt;M^n;IN&M>SG4-1dzD`jH6k=(PeUN84-ET=eFuRl2k?Q(VHWH0X2
zmDByr)0Gpx8f-zGp05JLCw<X>*SJtekG_XI6U*tj0=|N+lA2|JKidF*jsgB%2KaXy
z;LkO{pJ#x7j{*L@2Ke(0@b5Fg$6_gf9@wbBlY1nfkw9Ipv(UN(pK`SRUycDp#Te0P
zZP!1zKcas4nu8hqbDLtg{+}+!fmyZt`c9%lp!h?y_MGdB1Ps^zX#$}j%v%31!GJ-*
ze%$(3SNj6Ye$W1E55YP`2m*4C<I3KH@;HR-2*0Lv)L;z2l0-Lf4sc!7<U;@Zg(3>*
z3>uSNDA&yg1N3@RY=&Lu&w4Jq?j=O2|NR0M1D;4%zW(2+r=UP9xB{x}yybB}&09Wq
z%#ag*MsWFWhv1ujA6Z-ewf>uHv>E*KK3w-%u<zDye4rt9tM2{%3;3WzdWz&cSv`?}
zk^e5I2WFImS^hg0ACML7$E|-?i~nwmcF;Wk&21w2?{l~SQCI#uo)#`p{`!pULNP6r
z=n?8EY=&KL0kU-2b<_Q~#dQD8*AUH+6OTB^e;4C|;C8fEZu@a%akJKcw=#+U*4q0F
z*F2GaN8o~8P5pNXF65D(3TmEneUX5X|1PG5ViZJO`)~R_Nea?0!F4ctuF$WG(Nn@X
ze2+wU`ri+4M|w)UxFEc9e-p6ZLn2p1CKWv@E(os#;j*QO@F&km?`Pvex`O<=?by{4
z2L+oR2VD!YA$e8ci;B96gA(WkhrxUy`?`u=s2JeYw_-E=`*k41KQ0a$V0s+X7RF<S
zoZ10hii48yMdEC<`)>PD`#8woB>vlL?=xKUMEaeC_CH+|SNk3!{eFQ+dWy|FSv`?}
zQ5>{teq{mFbsR+0X@8CWe`fXnOM9{Z+vQtaE>_OB<Xo(rZ+X%GA20p?F7ZSEyKD4+
z$n^imM*nwA|8H#cf5z4QfAOX2FZO@M^#8;Z$5Fgk-F|jjb)j7QKQw}tun5NAj~Z@z
z+|mJV3fZ-ih2jp?RovqH0NwG6=VT9)7F@i(zLuKdkFN(|{&8_jqUmu<Cm4?za^ic=
zDQ-!@jY9M1Lm{NPj9d5~b~E_50Nv`|t4NCg2@>g_fIR2=A_1d#BpBB(NDg%!kC30)
zzgYC3U`KAfx?0u~(oFZuH-c<PUh$6zLDiLCUWN2f{8cit3+Ht6p$cL>D=@=e-vl!J
z<NR{A>3;cU7>^lp>I!tpFYDJ^-S(sQ>j~XV;=jB0KEpLnF8y96;B=8y>%W!91S07v
zHuGflL;^<s`<(8*f?58%XoV28g7S9j-__#3?=jtf2Y_ryzkQbrLDiN2PC<Go{wf*S
zg%Y$YRS@cFff;_O2gve|^WR0L`|m&)j~R061$4=O>)(UA?MLnXcaTZ^_txHLxaNuU
zJK^sFuBrdt|CGQdJr$7WTwf$$<iAUbD@&lR{WtB4D8g=Xaeor6YwPzbRUn?<I&ndG
z5eO%(7v=s~QMR!%UIoJbuZZ$;kxB7SRL09ic&Tpui2kR`Nbi?ED_B#IL$@8fTH>H$
z)8n8%ARCgG-!no`Nhq({{=Op}>AA@1F37%aNBgX+uD75W{yi9^`NzdUt4)uC`1?&W
z<iy{PQXHhWdv5zt`#6YiWix|+{@nuCeTHkET>8CQz%`A7iq;5x(o+F>&h<qCMsd*g
ze^i!0UB^K*UlX1e{V3R-TmP;W|Gn9C|J@H{L-HznP6(>5{CB`w6ud!3c40Q!6$7+-
zS8Rq|?+;RR*>#NP&HH`kF4O&YB#g%lISl~1<iBVCpO|{ZXs_J%qxSxLph^7mZ-cq+
zvtZw?-}pRNQ~w?Rk|02OisU?5J&}Ns|DKKWh{Rx)|MtW61qC~D>)+MlzYm%2zoS4l
z<iD405Q3^J|9ukaq4=w0WEUb{tWs643(W9KR*<O6uAA<^-#6WV+h9Cq$SE4=lK<AP
zW4i4}?fv&)llYGTx~}^S*F3rO8`oKD>c4$o741n+1>`x`7YP{o?+BbnBnPwnciC3a
zgMuBo_3vu&-zQA>-$Ot)q~8f!grMrmf4_kAQ2bRgvI`Zu`A`M1o)wtkmtsMNF1v2J
z|2}8B|Bi$4m?5WlpiBN+|9;VJKWgv4hnd9xaP57DYo18Ii(eOTP5rmuE`d*aDj?6f
zzDU5xe^+d;EP=ZA-(nv@L+LnnlYr}BXtpH%x>!6`EZrs=5Z-o#%aPw=q!*087bZOY
z?}t|*UIhL=hw${j*Rdd8319U36DHF8a;IQUK_1<9>}rXF=r1IiR+6%N99L$K8v-@@
z-rfkfCV3^5if-yE4k|%<m6+E6F8jJuQ|P%g#lMdPx&Co+P*c9$5fc5PS3tF$x6OeJ
z&D+s%ZAKh42Ix{8q_?BB<-eJR{{)lxAFJWNf^XV{<j*&eK&mbOO@Ic;f1FB>kAlyu
zR?>Cwt4BvRc*KnC=|M9cdHESR*=i-6jQ&5I{EOBejaTV42AaS(5+M8Jzef}(SYF-l
zF;BiJ5G&6@Z0yPY@)9tL<CHfl51qc_K>wpcwKP8?sGx>9&DWBBqJe_#noGj7dPyxn
zU;L%ioXo<k?4X>yG)EpsG1L<;!3pt0IrpEWkBM;IT(ZHL>tkb8A8A~MbX9EeKR85Z
zg4+Iazklto>Z?&d?(Z<DXkaOXX8U+`KtHhxIT2ZH?UdF}qyR~}AHi=4`r<C|Yl|qT
z=1kwerg)_s?T}G3vNuK8pAlYtD>uVm+d+uc#WfT?Q-0;&zkc2H`I-siF+)y#OFG3h
z^A!LftwK9kTYb~U1>EVI!9V}hr<c?~H6X%zxb@oCS6D;^aCoXNF0dUEctl4)o^ySX
zfIcn=z=4+^s4Rh`<lIzlUy_mwv+TL>T_$^ea%yIBerishBOl7etb_lHt=!BIs`_0c
z(X*GgphWhG?~&$750l+$W$6r>z>`I4=PR{y|G$#(+uq)Spxe(D9ahn$vc@78=i2JM
zHf{&;xcqF<dxE~(&yq*f_74AVH+swvFUiOCy_DdCmFx8oQ8|wE_4NdkUi*I_O6We>
z5Kj4!F#j6)i))(i-e!$fb|-F7mVUj;8qmU5-tysRQK#~q@?DE2D-~lE)}ZO<?8O_t
zXHDj$u}19@+1y<g>kS=x$Y(4zCZBQ0S+~cR9*+2#?Rfq{wsPdB)<Sy=xx)-!<*R3&
zkT;c-u&{qEVGlhq(His7%huDgZ1S*0PWiVef9Hgt05<ZrzSf1ylcU}^oX_sh2$Z`%
zaz3i>!C32StDN%x4cp4rZe3z+`Bi6@(<zHBm=?u`-}9t%;E>7e`KIa4mB9}>kJ`6b
zTOJ?G;@%#x``Hasc9-4!R&@Kfg5>hhU**i%!L0Yb_PcxZT*I3Ee)G;7y6%uK1b(l)
zWa((z6qCVT>9bT08WE$EyfmQnp>B%2zj%)`KXH)lOlbq<K~}^z937!p{d>wg<pJ`^
zo@Zoh^F{3TCj)Kk=Qgq}yD?B6H}5p-(D4Lo-RpPfu2Dx6<y;qAam208@6SEJT0G}(
z-T2W9O7{!%t+!v>MgHYfu<hiLQMP5lH`tU<x<~bFebD*j%|*Kgk8fi8@q;DKFK30x
zdq2H}Ee}~3y&`a)+__-7wG&gU*G^w7H)(K0zO853uG98-cDDJg^06oPFrU-DtV8NT
zxm&Bg@}rxlvp!*i<$2z-<k&vlN@F@W<%HSmoU{D@&Tf0B!aDz-dk5`*!7hK+q5&I!
z#}qkv%}}=C>@D(!Eo+=JUN~oM=aUuHv3scX@t+@HO@_Z{o%4uYe(v-G^1$WKIoChD
zS#B2mEL)Nk<Lo?e3ER<ftFwIId=~yhQ`tX!P3h5v|Fllp8qWsL9LAP^<||J={TTD<
zdU99F*v7jL|Bx8H^^t|PJ+pf#Tec|j<87xpy^0sxIxYTD?*G#5O8MsGflp;Ely|?J
z#@^3;NdD*dvy^>Vy_~_t{aERLD%iChC&>4nImbSH_BLzR)Y3swy?<nJ$5yfRpE%`v
z?_4J<Klv%W@9Ai>_<hR$X78u$==Ov2!ZT?~$nS?(n_u6T?@TCiPTo1u`TS#x*ug_(
zY*m=U`Aht2TesB><;Y=;<oCC<k^gn=c(!NzE9}hjIJw(vi{$UizLej1<0;!S8}DQ5
zuUF)wTRQB1<Nq2e&K6%OBmUllRh--!b?oV4>*#;yD<6G)qwW2kv*lGU1jy?X&a>o<
z`{lNS?q;`^-_4dhwzzbK9B11a*i6|u?-8Y?_X*Zg>B6Mp?d3C1oOb4XzEA!pzoE5v
z+XeEJgtfK@XMQJJCr+1r*DsR~&$^#Y9I~J7y<4^(I6q!adU%T5@%?D_{+5PHSg@}=
zVDCtIc*S;EI@`dSmAHiMocNji>cG9ukrg|v*S>qd^V<Q-*y2PlWyymZoO9dUXbb57
zZq)G!ZFdJ>cR~JX&ezJ&tqWOH>=<Y36$9i>C3h+EQ@_Za#;>!c4_@FL_Sy5Y_wM`I
z+NVQoj=+`j-Cf$r?VF943uAUhJ@nN#*7M&_XU@EvY<reHC(E~`%Wc1VH)>nPady1N
z7joZaKRI6+c0nHhc2D`P#WyR<D)!0`H60`ueZSYb^!Fx8nRQT9v$Q2_VvD(~*`S5a
z)z?L{!;TM~=hw8ceo^RSJJoj#`!0U7ytuT5?Wy-#+WOwRT7GQtaJkv&k4tlchFO)h
zi`m@v=ahn<S4B^G^&Q(gE&3}y`CH^0EU#Gyb-a&-Uiicsw<KDLFN3*u=x^+&B@5)p
zt#8WKjQivxx$nwHufNY)@WGGPv%Q+y#vbh=U!x?kESt^t-@R>>$R6LY7joOl^L|;w
zP8WI0kA2+Uw&{`O?88qt$j@y0KzVWPqssg;i|q;NpUO{XhC35Jn5zVJpKsgUG=S~d
z_OR{eop;MgkL*(RborpP+uC5ZB66*@ap@Q~ydcxob&}mWBJqdP^Up1j51)HP-cdY4
zDcJdnGHmYDXv^{4Y<%%HIrYv@<Q`sS?6W!lvtIYwN?YjYmeDH?H<iCw-$RZMT<<(G
z=4*NSByZ(J_dt30;z;Z2m<l#>+CWzE_)1%hBf@&($t0zd&r12GjSXz^Ti#{+It+21
zni|P$ZB{y$-SZoJ@RwHdj~iFYqjyFrjT#+vo_eE|^7Bc%{Pwyj@}1vrljRCuIe+8d
z<uea|C4aEV$NHTAY9(j#H_o?@HBknhe~a~e=Q>;X!yDP@a$n`QvC+!(8EMwX{xew~
z(>`=}yUrIBuZ*#_1<MY|(?&L66HoS%dkq>JeK>BqvUAlc>zi@Covl+^DGL&>x4kz1
zemQ*dlh#H%`pJDK9B0!fy{>e)r-SvQpE}rXf3&-tH!j|`XlFBJ)0%+Nvu&rav!{;8
z_dFfX(r<4s7mrx2bX-~#HGa~U?3T|Jkkjg&aod*4qlVti?!2Q&zW0l_ng2`i%9MW(
zvW*Vxpgex_N^8OSGWOGJDN4}_uc*$Jv+}8DyIOy09&25YHCf)6Udnd-8ez+GoNzu@
zcu;Ya=0;a+dV)2W9q1hYb)a+Epe@!9r~X6Uv2BrUQ&z9&Wqn>}53K7czrA`xROfTA
z$s3+sC;J_`Megz2XR_4tZQJ^{yp?~pPqNMXX^;HGtd@%JngjB@UgK<DbG~)@j<ZLF
zzVMyXvhq#&=T~=@4tceWbyB~ZZO4*6mit-Wb>6>ka#Y}<>)G%_jco4@?yWc*j8lHh
ze#`lE+Cyw;`0QQwI~vG-CC@mmDGix(fv>Ic=swE+xxM76Q;XQ#(al--2iG&7M!RGl
zm2`lkHynJw8;_&-0?{Hk_$xvFdan$Q6L9ck#`D&lK$o7Y12BN{!v77^$E)5d7w&kK
z;46<;Z|fyUCLm>z)n&ZO<!Of8#B)I-_1S>pP+ET~$G9^cM^)W(YQjeXk?3GEPgYMP
zpnp!a;JD}Oo?rO?9`mY<Z1~4ucrt}Nw*V$MJ@-UlyP9S+U*j7K3JNxK>t7?#9e*i5
z-Ja`!IzM0f=A})=^uqOOvICUU^K2nrc)>p>t34p2`rk#7efu4g+nz9dX0%D+&7|<(
zeFTH9b!Hzy*7f}B?Opt_K;a~TTS`TA@u$f3&%?HVU79U=9^QZY(s*P)e1E$%-e0Ab
z+f^@^0yJ+Z53aQO;U4Y&FWlGsisqeWqEtrkKpqKrB;b*NM*<!RcqH%_k-#}724Y@5
z<@0wMgx;vIxVL`x?YQ7m7U)+t>h*nJvH$h!96fmSeis}%yXU~KhMZ)B@9Q<-Ur)cr
z?hNfP@B3r_VMR}UFs;qcM_KV!>79E!ZDa2TJ<#j*SNFMKDqo&vOStdwSyt10SAP4_
zq}_6j@riD`e$PZqeD4&a{2oVxZjTI=8RbOx%*a9|`nc1DXC&t(EuPM~FZ-<PKGYN@
zyv+gs*u3r^XI$;|F!ftq`JAthrO!BYmQ|Pc{NKm}-;6q41uy!|fiW>}8MiM_`S0rK
z3#!%=y`KGhzS-=;x9rwex5j!edY8Q%HFWr<&&$}xU)QEKTX2j~{fqCEPn^-`2uo|X
zdH$?}XI=TP-^_h<-}bYtuJhN%?szXUbu+tX@^7C#-SG?dueKHA=J)y1mD|>>`=aNo
zU$N!;|NHOuo8D#HTYs{zWaXPIY{{rGYk!d0Z3TDyciz8`R>}8W`=+n*%?3t!!l}o)
z1b%UrnVtVoe$9K!T71dE&nG<n^6;<O!Bf8^cYRKA!HGAto!8*8ui3)*ZYjS^+{j*j
zduYMt;V0OQ2RpCo9{d?w{cXERz30BnBCdHlA@B~x1ylKkO>e*7<(spty1b;r_=<vd
z-?BWPx$~EOboR1yqEGFK9@`rD$!{@d*|J+Z`TTPDMV7zex4D}So?uiy?2Qo*>^^gr
zRhJh;w5S+;;B>WkdO9h)cKLk$URC-;&wi?8hwaoE);Mxe)Y_RPZ1|LQCl7pnj6D_7
zqrd%@gKS~gh3{tiA7zARS+HgM-Dzd)$jkdvHoktg#`&JpJI^fHP;-2u|4sC<lOrZ?
zW45DThW+x+S1kOYTfh8V{@4Yd+Q8QAX?K#{*zc*7V_zR-pI-m<%Nf0nu-k6_Z(-}W
zBkX*6i1W3`QdfCw$*9usUN5p$FRhPw&+h<RvHHnL2`6^4pMRP0r1aw1YV+W=@^#LT
z6V>AB>3JXcymisTRp}GG(ZNfK?>}{t9XPh&uHV}4V)u`Ibzeck2A22x4?Ayh9ChV{
z_ZM;au3=pMI+UNfz{J<RXdebAc8T?;@^3|Nd}%>6QQ@Vc^iOs+T0beqbv8Y_z85N&
z(E5pHpp_9kkVgU@33w#nk$^`69tr#<B+v*((G`wpIQVb;=fd$g9LwR5;TQq$_{P9d
z0LLJBA(#e-1CIZ}@f#e^LTs@bj-7Dqf`h-)^MU}v2M%92nqV+-x&8bMw12^VsE<;h
zy-r7u|CE{Y9s>vGe-s>i46EQ+4aa&o_&ELp2aRQtW;~w&O`i1p@!#WdI$RD<z@e4J
z6ixfbq5X0=o`&NYI3j?*?QnGGn`|NJ>0XX}Y213*l3e|upT~9b;IgR6eXNUnc%?Gp
z^5ID@o6DuQq7LP5B#-96>ost2S+|Cx2ONFicnXfX7z53lH(@MC;2_xu>YlSl0v-u?
zBw&sNs#`y4hTYOW4DJ7Q3-f1DF)p!w;{Us#!AYUT<ONsP`U!4o6eH1`lXSmlA$e&A
zs=@YLdL-bHfJXuz33w#nk-%R;0`$9l{qIxV`>n6+J70X8HGS8k0ItpWy+a|;CH{-|
zi@UM?fXHjhKaYQi|Cw-Y2LH2wuF-b|ikiU!4J1W9({}}6ZAzE0+LP6D3Fv>XLHoyp
zaUPL)#H^B}%D--++RM*L|IXoeF#NE;y41hfz?K>MHwWk%>7Q>)kN#c$Y^c92jP$SM
z8qzo+8-nUu|8koP%1g1oy41hBfORwU?{1)Lq<=-NJ^ELFwdwLlZlr&~IFE>;S^9Sl
z{e>U)SC{(7-?y2efAfH@k^cF%_2?hX8Bbol1dQ~rqz!4Dpg~Zb`@d@i<)zqPUFskI
zB)J*-cQ4R2(!crFd-SjRS?9qZ2^i^LaC_1?fk{wZ=^yo<gcmte`Bx|;X{Mcj<wBAG
z&f+=1Y7THN!fKvrp93FkmZZXL(T?~_;YcPV@jLg1uJ7i>W86&r>f`8m<Y7PhL&7_U
zupi2oe&?Q~hz5i=A7T35WdPB`=VijvKR-th9>x_eJYAfDXn%O-dhXZBp)T$2d>EA(
JcK1G@`~S=?zi9vf

literal 0
HcmV?d00001

diff --git a/test/test_data/anndata_test_bad_header_no_species.h5ad b/test/test_data/anndata/invalid_header_no_species.h5ad
similarity index 100%
rename from test/test_data/anndata_test_bad_header_no_species.h5ad
rename to test/test_data/anndata/invalid_header_no_species.h5ad

From 7d27f6df5ec272681a7d3e574a3be98d61f16345 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Fri, 13 Sep 2024 11:42:54 -0400
Subject: [PATCH 08/52] Enrich invalid-label H5AD test data

---
 .../anndata/invalid_disease_label.h5ad        | Bin 72768 -> 72768 bytes
 .../anndata/{anndata_test.h5ad => valid.h5ad} | Bin
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename test/test_data/anndata/{anndata_test.h5ad => valid.h5ad} (100%)

diff --git a/test/test_data/anndata/invalid_disease_label.h5ad b/test/test_data/anndata/invalid_disease_label.h5ad
index 54da5df2ead12d2a16b119001c7a1f6692504b6a..23cbfa526cb97e53f4013e1d96a6e69ce145af30 100644
GIT binary patch
delta 1303
zcmYk4e{54#6vy9b%k~BxtDDj`)4ZDCAM|aqm4WP6T#4C6a4@v8wR`Pe$Lnyeg(;|s
z83~h^z#k;&SJ(^@mZ%uboWqA~5pnZZghV2mh|y^5A4W6NKSp9mjFNcneXH3??z!iC
z&*$8C?tOP*&a*J*nRzL^o?MDoQIv@oJGL4gE*?d#bL__P@B_37?}vk>O|01v%~X{}
zi*c-qI&mi)+`Nld*6=7+r#UWNp-qlgt<e1(uST<8eNRe6D>A(@b4J&w1Uq!?zKRvO
z9LFnl7PGpB-*q2u^4L<d9)Ic~Bx+9EzS<?d-p#nM!2<{6ERG~h98*|qn=^}}3-f~F
zRiaC!7T>Pb=t10C>&MBsia=$6>ZDp=HR1J9L@2(RPvaUjNP4r(cPAYBA*qfee3<mA
z)F|mDj3xZkENCnxLSk2h`a*KQHc3R$*r(ApNz8CY5cy@ILlT|%n~Cj`7%?1Tl4sG=
zPWeq=rFvJ1I0V~ZCYR1;6s22sxLfAilCndX$q+h|0eV8L2KP7xin=`S%k0ne4_OC$
zaACEddJ#<p)QFRdOVQwn1QwH8uy47l_}yKPHMB<>6~(~)DUFg`piv5G_Wc-3`Ge*%
zFP@07_GnlArmayWjSI{S$Zq$_^I9nD*C>Mx><6)({XPt_e-g*p58)m5PvIK-VJx!G
zqRiANhYjra%UhZ><vDS~?3TCmtlalGL{*KRM_TpcNP3B0#O}18j!Hc;pt=;7UGKR~
zs2h&i1?$y~WyQzj#;vAk*|D(Gcpd(EJdVOC1IJETv=Qa6TUc~g*yibvJg&JgT82!j
zw-2BBgZRaCjppbxTpms1ud@bDp0#KTuD@y7@pEHt*LM!Q)oy)*;YJDnEa8ji4RoKk
zs1+xsEIar%!vzO^*uL{F!)+42E#X7c27J>NJ%X-_mi^;2!zH?Gqusvp0mB^vcKLLC
zJZhrhl7Sl^TC@YT!#$X~Qf|L?>6*tCaw@v)FE4Y&PFWFlDq7|(<UcX+@4SJDd5dDA
zlHbtv+G=}b;a)fG!HLg1s0Xc)oU2L4RHPS=##9=x-}>TLqP=*prTLZSi0h~SQ@H!8
zftjx?8j)K)ZRf6@bJH_+`Pbi+(Lwz3{b70meK*tNQ>A1e?+(~?KaAFmXH_!BRh6vq
z!Wud>n^(!}9?w>AXnI9hs1&Oea&?_ptrFpH)nvSE24?f?$g_%nIydh*%D?hI`^tCw

delta 1221
zcmZ9KUrbwd6vunoA{SOh3k2?TOZze<+~V58%r-zF(ZLi1OdXWo!Yy=kNO48c2h%0K
zEHTC<!>=18GaG8=)C{uix<A=)V*E27Far}XCK~(X_OJ&?OiWBnJonrg8-K~~{LY{6
z=j8YM<t$xyFI{(EJXYs+I2=BFTDOzJxKp>Scn`<C&~~V@_^E&WXeI5(4sF}cF3xP>
zJsY)4Lwpn$#DnY?VO)Cz-)U9h(t^rC_6y^_|Hb_rKZMhoAAf1v@o3PCYTl#4svYG8
zU542&j7RWTumNubPff1`OmF>$Mm~gNm1TkCtEo}eT1&&~RzC%?xwVGs;j8e|<Cc#%
zV+v+HUd+V;*odhV5nY4m8b!BTbgcun&KRgI8O|IzlH#6wMY~V5O_mn<fq>PGo)jh&
zS8zD4Qmg3NBsW*CK&?<Q7q^wqNOpsjwYo~}qU?~42c)B++qzECUDOra)4lY(=(<JM
zBe(38dMKeFn^4h{2;h^1mtMrB(i%F5)r6n=u*tR`zCl055FV_d0UTgI&Q(<j%P%=}
zD+n~$LBc*9MGwyI7aI5PW`7W0F+GHznKtAmNx8`|!b5%<K`+}BCfOdsG}}>JVw=V)
z+r!voI|iTOr<V|BJC0tq8R;`2eO|^`!jBV%{qjk9S>D)#4n?KckW^}_UVm^SYi_NT
z8XK81^`wVN?AYO`H9r{slcrZ{5gL!0_fzlfLN@m-y@i{(Bx$H0@8e-Ob~26NNga8Y
zY8t0*xSg|gXdW|YmwEK`U7`@Wauf8SAQ3?taQsXfjc0VT=j_`q=Ve>|gxT`ZT|(#C
zGQ_z1q5yjZ*oUr<)A-|@ZhFr%uxtY@=HH(&;5lyt>jJb2(1x;wG(Ml#&6NcPez$?A
z%`Yx9u)JUc8v?Wo(19PWq%n0xH&0ys(e30xw_fm^`Rz>x-nlA(vsA-k&fu}J-rbG0
zTWREOr7-uUj%MB~^{Sa&+U}w#j(^?GQ_~Wea6YWzLZly^QH2ci+&8}wB{ARJ^lnqc
zdE>uQMp{l_{&pJC+q#+k{(To^&9db?B{Yd&*N*a(#n+ScilxIZYaZNldCkyIxeylD
z^%+H>)Xarq(x(meT3#WKYbK-6)HTCnx%aoQn=hAaxs|PyY=xDrCU?<HrkeDB0AVhB
AM*si-

diff --git a/test/test_data/anndata/anndata_test.h5ad b/test/test_data/anndata/valid.h5ad
similarity index 100%
rename from test/test_data/anndata/anndata_test.h5ad
rename to test/test_data/anndata/valid.h5ad

From 82a24fedc8f217ce19602c6d61a66c016f2db1be Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Fri, 13 Sep 2024 11:43:55 -0400
Subject: [PATCH 09/52] Consolidate URLs, clarify names, stub new tests

---
 test/js/lib/validate-anndata.test.js | 47 ++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/test/js/lib/validate-anndata.test.js b/test/js/lib/validate-anndata.test.js
index 26c9044dc..be43f8809 100644
--- a/test/js/lib/validate-anndata.test.js
+++ b/test/js/lib/validate-anndata.test.js
@@ -2,10 +2,15 @@ import {
   getHdf5File, parseAnnDataFile, getAnnDataHeaders, checkOntologyIdFormat
 } from 'lib/validation/validate-anndata'
 
+const BASE_URL = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data'
+
+// TODO: Uncomment this after PR merge
+// const BASE_URL = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata'
+
 describe('Client-side file validation for AnnData', () => {
   it('Parses AnnData headers', async () => {
-    // eslint-disable-next-line max-len
-    const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test.h5ad'
+    const url = `${BASE_URL}/anndata_test.h5ad`
+    // const url = `${BASE_URL}/valid.h5ad` // Uncomment after PR merge
     const expectedHeaders = [
       '_index',
       'biosample_id',
@@ -20,22 +25,22 @@ describe('Client-side file validation for AnnData', () => {
       'species',
       'species__ontology_label'
     ]
-    const remoteProps = {url}
+    const remoteProps = { url }
     const hdf5File = await getHdf5File(url, remoteProps)
     const headers = await getAnnDataHeaders(hdf5File)
     expect(headers).toEqual(expectedHeaders)
   })
 
   it('Reports AnnData with valid headers as valid', async () => {
-    // eslint-disable-next-line max-len
-    const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test.h5ad'
+    const url = `${BASE_URL}/anndata_test.h5ad`
+    // const url = `${BASE_URL}/valid.h5ad` // Uncomment after PR merge
     const parseResults = await parseAnnDataFile(url)
     expect(parseResults.issues).toHaveLength(0)
   })
 
   it('Reports AnnData with invalid headers as invalid', async () => {
-    // eslint-disable-next-line max-len
-    const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test_bad_header_no_species.h5ad'
+    const url = `${BASE_URL}/anndata_test_bad_header_no_species.h5ad`
+    // const url = `${BASE_URL}/invalid_header_no_species.h5ad` // Uncomment after PR merge
     const parseResults = await parseAnnDataFile(url)
 
     expect(parseResults.issues).toHaveLength(1)
@@ -58,8 +63,8 @@ describe('Client-side file validation for AnnData', () => {
   })
 
   it('Parses AnnData rows and reports invalid ontology IDs', async () => {
-    // eslint-disable-next-line max-len
-    const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test_invalid_disease.h5ad'
+    const url = `${BASE_URL}/anndata_test_invalid_disease.h5ad`
+    // const url = `${BASE_URL}/invalid_disease_id.h5ad` // Uncomment after PR merge
     const parseResults = await parseAnnDataFile(url)
 
     expect(parseResults.issues).toHaveLength(1)
@@ -71,4 +76,28 @@ describe('Client-side file validation for AnnData', () => {
     ]
     expect(parseResults.issues[0]).toEqual(expectedIssue)
   })
+
+  // it('Reports valid ontology labels as valid', async () => {
+  //   const issues = await checkOntologyLabels(
+  //     // Underscore or colon can delimit shortname and number;
+  //     // disease can use MONDO or PATO IDs.
+  //     'disease', ['MONDO_0000001', 'MONDO:0000001', 'PATO:0000001']
+  //   )
+  //   expect(issues).toHaveLength(0)
+  // })
+
+  // TODO: Uncomment after PR merge
+  // it('Parses AnnData rows and reports invalid ontology labels', async () => {
+  //   const url = `${BASE_URL}/invalid_disease_id.h5ad`
+  //   const parseResults = await parseAnnDataFile(url)
+
+  //   expect(parseResults.issues).toHaveLength(1)
+
+  //   const expectedIssue = [
+  //     'error',
+  //     'ontology:label-lookup-error',
+  //     'Ontology ID "FOO_0000042" is not among accepted ontologies (MONDO, PATO) for key "disease"'
+  //   ]
+  //   expect(parseResults.issues[0]).toEqual(expectedIssue)
+  // })
 })

From e2f15a6f0cb682eb5464172f114eb2a3d6f79ef0 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Fri, 13 Sep 2024 11:47:36 -0400
Subject: [PATCH 10/52] Scaffold getOntologyLabels to collate AnnData IDs and
 labels

---
 .../lib/validation/ontology-validation.js     |  8 +-
 .../lib/validation/validate-anndata.js        | 82 ++++++++++++++++++-
 2 files changed, 83 insertions(+), 7 deletions(-)

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
index 964f72792..837c8bcaf 100644
--- a/app/javascript/lib/validation/ontology-validation.js
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -6,7 +6,7 @@ import {
 
 const ONTOLOGY_BASE_URL =
   'https://raw.githubusercontent.com/broadinstitute/scp-ingest-pipeline/' +
-  'ew-refine-mini-ontologies/ingest/validation/ontologies/'
+  'development/ingest/validation/ontologies/'
 
 /** Quickly retrieve current version cache key for ontologies */
 async function fetchOntologyCacheVersion() {
@@ -80,13 +80,12 @@ export async function cacheFetch(url) {
  *   ...
  * }
  */
-async function fetchOntologies() {
+export async function fetchOntologies() {
   if (window.SCP.ontologies) {
     // Reuse fetched, processed ontologies from this page load
     return window.SCP.ontologies
   }
 
-  const t0 = Date.now()
   const ontologies = {}
 
   const ontologyNames = getOntologyShortNames()
@@ -113,9 +112,6 @@ async function fetchOntologies() {
     }
   }
 
-  const t1 = Date.now()
-  console.log(t1-t0)
-
   window.SCP.ontologies = ontologies
   return ontologies
 }
diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index 837d023f9..65b87e08e 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -5,7 +5,7 @@ import {
   validateUnique, validateRequiredMetadataColumns,
   metadataSchema, REQUIRED_CONVENTION_COLUMNS
 } from './shared-validation'
-import { getAcceptedOntologies } from './ontology-validation'
+import { getAcceptedOntologies, fetchOntologies } from './ontology-validation'
 
 /** Get ontology ID values for key in AnnData file */
 async function getOntologyIds(key, hdf5File) {
@@ -38,6 +38,7 @@ async function getOntologyIds(key, hdf5File) {
 
   return ontologyIds
 }
+window.getOntologyIds = getOntologyIds
 
 /** Get annotation headers for a key (e.g. obs) from an HDF5 file */
 async function getAnnotationHeaders(key, hdf5File) {
@@ -120,6 +121,81 @@ export function checkOntologyIdFormat(key, ontologyIds) {
   return issues
 }
 
+/** Validate author's annotation labels match those in ontologies */
+async function checkOntologyLabels(hdf5File) {
+
+}
+
+/** Get ontology ID values for key in AnnData file */
+async function getOntologyLabels(key, hdf5File) {
+  let ontologyIds = []
+
+  const obs = await hdf5File.get('obs')
+  const obsValues = await Promise.all(obs.values)
+
+  // Old versions of the AnnData spec used __categories as an obs.
+  // However, in new versions (since before 2023-01-23) of AnnData spec,
+  // categorical arrays are encoded as self-contained groups containing their
+  // own `categories` and `codes`.
+  // See e.g. https://github.com/scverse/anndata/issues/879
+  const internalCategories = obsValues.find(o => o.name.endsWith('__categories'))
+
+  let resolvedCategories = obsValues
+  if (internalCategories) {
+    resolvedCategories = await Promise.all(internalCategories.values)
+  }
+  const group = resolvedCategories.find(o => o.name.endsWith(key))
+  if (group) {
+    if (internalCategories) {
+      ontologyIds = await group.value
+    } else {
+      // AnnData organizes each "obs" annotation (e.g. disease__ontology_label,
+      // sex) into a container with a `categories` frame and a `code` frame.
+      //
+      // - categories: external values, non-redundant array. E.g.:
+      //     ["tuberculosis", "TB", "foo"] or ["female"]
+      //
+      // - codes: internal values, redundant array of integers that specify
+      //     the index (position) of each category value in the array of obs
+      //     (cells)
+      //
+      // This organization greatly decreases filesize, but requires more code
+      // to map paired obs annotations like `disease` (ontology IDs) to
+      // `disease__ontology_label` (ontology names) than needed for e.g. TSVs.
+      const categories = await group.values[0]
+      ontologyIds = await categories.value
+
+      const codes = await group.values[1]
+      const indexes = await codes.value
+    }
+  }
+
+  return ontologyIds
+}
+
+/** Validate ontology labels for required metadata columns in AnnData file */
+async function validateOntologyLabels(hdf5File) {
+  window.hdf5File = hdf5File
+  let issues = []
+
+  return issues
+
+  // Validate IDs for species, organ, disease, and library preparation protocol
+  for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) {
+    const column = REQUIRED_CONVENTION_COLUMNS[i]
+    if (!column.endsWith('__ontology_label')) {continue}
+    const key = column.split('__ontology_label')[0]
+    const ontologyIds = await getOntologyIds(key, hdf5File)
+
+    issues = issues.concat(
+      checkOntologyIdFormat(key, ontologyIds)
+    )
+  }
+
+  return issues
+}
+
+
 /** Validate ontology IDs for required metadata columns in AnnData file */
 async function validateOntologyIdFormat(hdf5File) {
   let issues = []
@@ -154,8 +230,12 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) {
 
   const requiredMetadataIssues = validateRequiredMetadataColumns([headers], true)
   let ontologyIdFormatIssues = []
+  let ontologyLabelIssues = []
   if (requiredMetadataIssues.length === 0) {
     ontologyIdFormatIssues = await validateOntologyIdFormat(hdf5File)
+    if (ontologyIdFormatIssues.length === 0) {
+      ontologyLabelIssues = await validateOntologyLabels(hdf5File)
+    }
   }
 
   issues = issues.concat(

From 0dbed6d21cddc36641b84b6aec2a52cf735be032 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Mon, 16 Sep 2024 11:30:36 -0400
Subject: [PATCH 11/52] Handle missing cluster-based annotation in plot cache

---
 app/javascript/components/explore/plot-data-cache.js         | 3 ++-
 db/seed/synthetic_studies/chicken_raw_counts/study_info.json | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/app/javascript/components/explore/plot-data-cache.js b/app/javascript/components/explore/plot-data-cache.js
index 78ac277ea..abdd92178 100644
--- a/app/javascript/components/explore/plot-data-cache.js
+++ b/app/javascript/components/explore/plot-data-cache.js
@@ -289,7 +289,8 @@ export function createCache() {
     Fields.cellsAndCoords.merge(cacheEntry, scatter)
     // only merge in annotation values if the annotation matches (or the default was requested, so
     // we can then assume the response matches)
-    if (!requestedAnnotation.name || scatter.annotParams.name === requestedAnnotation.name) {
+    // annotParams may be undefined in spatial UX if a cluster-based annotation does not exist for the plot
+    if (!requestedAnnotation.name || scatter.annotParams?.name === requestedAnnotation.name) {
       Fields.annotation.merge(cacheEntry, scatter)
     }
     if (scatter.genes.length && scatter.genes.join('') === requestedGenes.join('')) {
diff --git a/db/seed/synthetic_studies/chicken_raw_counts/study_info.json b/db/seed/synthetic_studies/chicken_raw_counts/study_info.json
index 4d51a06b9..228d16593 100644
--- a/db/seed/synthetic_studies/chicken_raw_counts/study_info.json
+++ b/db/seed/synthetic_studies/chicken_raw_counts/study_info.json
@@ -39,6 +39,11 @@
       "is_spatial": true,
       "spatial_cluster_associations": ["X cluster"]
     },
+    {
+      "type": "Cluster",
+      "filename": "cluster_odd_labels.txt",
+      "name": "cluster_odd_labels"
+    },
     {
       "type": "Cluster",
       "filename": "cluster_square.txt",

From f7b86fbc073cad34cefca0fb18a0fc34c548d285 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Mon, 16 Sep 2024 13:40:22 -0400
Subject: [PATCH 12/52] Fixing test instability issues, NeMO API changes

---
 app/models/import_service_config/nemo.rb      |  2 +-
 test/factories/study_file.rb                  |  3 ++-
 .../import_service_config/nemo_test.rb        |  4 ++--
 test/models/ingest_job_test.rb                | 23 +++++++++++++------
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/app/models/import_service_config/nemo.rb b/app/models/import_service_config/nemo.rb
index c6f5f1f82..97d4e7a4b 100644
--- a/app/models/import_service_config/nemo.rb
+++ b/app/models/import_service_config/nemo.rb
@@ -72,7 +72,7 @@ def study_file_default_settings
 
     # retrieve common species names from associated collection
     def taxon_names
-      load_study&.[]('taxonomies') || []
+      load_study&.[]('taxa')&.map { |t| t['name']} || []
     end
 
     # map attribute names SCP Study attributes onto NeMO attribute names
diff --git a/test/factories/study_file.rb b/test/factories/study_file.rb
index f6559f8db..a6ca5f08b 100644
--- a/test/factories/study_file.rb
+++ b/test/factories/study_file.rb
@@ -173,6 +173,7 @@
         #   phex: [['cellA', 0.6],['cellB', 6.1], ['cellC', 4.5]]
         # }
         expression_input { {} }
+        has_raw_counts { true }
       end
       after(:create) do |file, evaluator|
         file.build_ann_data_file_info
@@ -195,7 +196,7 @@
           file.build_expression_file_info(library_preparation_protocol: "10x 5' v3")
           file.expression_file_info.save
           file.ann_data_file_info.has_expression = true
-          file.ann_data_file_info.has_raw_counts = true
+          file.ann_data_file_info.has_raw_counts = evaluator.has_raw_counts
           %w[raw processed].each do |matrix_type|
             FactoryBot.create(:data_array,
                               array_type: 'cells',
diff --git a/test/integration/external/import_service_config/nemo_test.rb b/test/integration/external/import_service_config/nemo_test.rb
index 26de92b66..13abe6d23 100644
--- a/test/integration/external/import_service_config/nemo_test.rb
+++ b/test/integration/external/import_service_config/nemo_test.rb
@@ -96,8 +96,8 @@ class NemoTest < ActiveSupport::TestCase
     test 'should load study analog' do
       study = @configuration.load_study
       assert_equal '"Human variation study (10x), GRU"', study['name']
-      assert_equal "10x chromium 3' v3 sequencing", study['technique']
-      assert_equal %w[human], study['taxonomies']
+      assert_equal ["10x chromium 3' v3 sequencing"], study['techniques']
+      assert_equal [{"name"=>"human", "cv_term_id"=>"NCBI:txid9606"}], study['taxa']
     end
 
     # TODO: SCP-5565 Check with NeMO re API, update and re-enable this test
diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb
index f8e7c1368..ca9dec9fd 100644
--- a/test/models/ingest_job_test.rb
+++ b/test/models/ingest_job_test.rb
@@ -187,13 +187,22 @@ class IngestJobTest < ActiveSupport::TestCase
 
   test 'should identify AnnData parses with extraction in mixpanel' do
     # parsed AnnData
-    ann_data_file = FactoryBot.create(:ann_data_file, name: 'test.h5ad', study: @basic_study)
-    ann_data_file.ann_data_file_info.reference_file = false
-    ann_data_file.ann_data_file_info.data_fragments = [
-      { _id: BSON::ObjectId.new.to_s, data_type: :cluster, obsm_key_name: 'X_umap', name: 'UMAP' }
-    ]
-    ann_data_file.upload_file_size = 1.megabyte
-    ann_data_file.save
+    ann_data_file = FactoryBot.create(:ann_data_file,
+                                      name: 'test.h5ad',
+                                      study: @basic_study,
+                                      upload_file_size: 1.megabyte,
+                                      cell_input: %w[A B C D],
+                                      has_raw_counts: false,
+                                      reference_file: false,
+                                      annotation_input: [
+                                        { name: 'disease', type: 'group', values: %w[cancer cancer normal normal] }
+                                      ],
+                                      coordinate_input: [
+                                        { X_umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
+                                      ],
+                                      expression_input: {
+                                        'phex' => [['A', 0.3], ['B', 1.0], ['C', 0.5], ['D', 0.1]]
+                                      })
     params_object = AnnDataIngestParameters.new(
       anndata_file: ann_data_file.gs_url, obsm_keys: ann_data_file.ann_data_file_info.obsm_key_names,
       file_size: ann_data_file.upload_file_size

From 8b26d845f9e614ddc4857f64e3997f08115ea146 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Mon, 16 Sep 2024 15:10:43 -0400
Subject: [PATCH 13/52] Further addressing test instability, OOO issues

---
 test/models/ingest_job_test.rb | 111 +++++++++++++++++++++------------
 1 file changed, 71 insertions(+), 40 deletions(-)

diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb
index ca9dec9fd..f663e3853 100644
--- a/test/models/ingest_job_test.rb
+++ b/test/models/ingest_job_test.rb
@@ -9,15 +9,14 @@ class IngestJobTest < ActiveSupport::TestCase
                                      user: @user,
                                      test_array: @@studies_to_clean)
 
-    @basic_study_exp_file = FactoryBot.create(:study_file,
+    @basic_study_exp_file = FactoryBot.create(:expression_file,
                                               name: 'dense.txt',
                                               file_type: 'Expression Matrix',
-                                              study: @basic_study)
+                                              study: @basic_study,
+                                              expression_input: {
+                                                'PTEN' => [['A', 0],['B', 3],['C', 1.5]]
+                                              })
 
-    @pten_gene = FactoryBot.create(:gene_with_expression,
-                                   name: 'PTEN',
-                                   study_file: @basic_study_exp_file,
-                                   expression_input: [['A', 0],['B', 3],['C', 1.5]])
     @basic_study_exp_file.build_expression_file_info(is_raw_counts: false,
                                                      library_preparation_protocol: 'MARS-seq',
                                                      modality: 'Transcriptomic: unbiased',
@@ -26,11 +25,6 @@ class IngestJobTest < ActiveSupport::TestCase
     @basic_study_exp_file.upload_file_size = 1024
     @basic_study_exp_file.save!
 
-    # insert "all cells" array for this expression file
-    DataArray.create!(study_id: @basic_study.id, study_file_id: @basic_study_exp_file.id, values: %w(A B C),
-                      name: "#{@basic_study_exp_file.name} Cells", array_type: 'cells', linear_data_type: 'Study',
-                      linear_data_id: @basic_study.id, array_index: 0, cluster_name: @basic_study_exp_file.name)
-
     @other_matrix = FactoryBot.create(:study_file,
                                        name: 'dense_2.txt',
                                        file_type: 'Expression Matrix',
@@ -39,6 +33,13 @@ class IngestJobTest < ActiveSupport::TestCase
                                              modality: 'Transcriptomic: unbiased', biosample_input_type: 'Whole cell')
     @other_matrix.upload_file_size = 2048
     @other_matrix.save!
+    @basic_study.reload
+  end
+
+  teardown do
+    @basic_study.default_options[:annotation] = nil
+    @basic_study.save
+    @basic_study_exp_file.update(parse_status: 'parsed')
   end
 
   test 'should hold ingest until other matrix validates' do
@@ -89,7 +90,25 @@ class IngestJobTest < ActiveSupport::TestCase
 
   test 'should gather job properties to report to mixpanel' do
     # positive test
-    job = IngestJob.new(study: @basic_study, study_file: @basic_study_exp_file, user: @user, action: :ingest_expression)
+    study = FactoryBot.create(:detached_study,
+                              name_prefix: 'IngestJob Mixpanel Test',
+                              user: @user,
+                              test_array: @@studies_to_clean)
+
+    study_file = FactoryBot.create(:expression_file,
+                                   name: 'matrix.txt',
+                                   study:,
+                                   expression_input: {
+                                     'Phex' => [['A', 1],['B', 2],['C', 0.5]]
+                                   })
+
+    study_file.build_expression_file_info(is_raw_counts: false,
+                                         library_preparation_protocol: "10x 5' v3",
+                                         modality: 'Transcriptomic: unbiased',
+                                         biosample_input_type: 'Whole cell')
+    study_file.upload_file_size = 1.megabyte
+    study_file.save!
+    job = IngestJob.new(study:, study_file:, user: @user, action: :ingest_expression)
     mock = Minitest::Mock.new
     now = DateTime.now.in_time_zone
     mock_metadata = {
@@ -112,20 +131,20 @@ class IngestJobTest < ActiveSupport::TestCase
     mock.expect :error, nil
     mock.expect :done?, true
 
-    cells = @basic_study.expression_matrix_cells(@basic_study_exp_file)
-    num_cells = cells.present? ? cells.count : 0
+    cells = study.expression_matrix_cells(study_file)
+    num_cells = cells.count
 
     ApplicationController.life_sciences_api_client.stub :get_pipeline, mock do
       expected_outputs = {
         perfTime: 60000,
-        fileType: @basic_study_exp_file.file_type,
-        fileSize: @basic_study_exp_file.upload_file_size,
-        fileName: @basic_study_exp_file.name,
+        fileType: study_file.file_type,
+        fileSize: study_file.upload_file_size,
+        fileName: study_file.name,
         trigger: 'upload',
         action: :ingest_expression,
-        studyAccession: @basic_study.accession,
+        studyAccession: study.accession,
         jobStatus: 'success',
-        numGenes: @basic_study.genes.count,
+        numGenes: study.genes.count,
         is_raw_counts: false,
         numCells: num_cells,
         machineType: 'n2d-highmem-4',
@@ -139,7 +158,15 @@ class IngestJobTest < ActiveSupport::TestCase
     end
 
     # negative test
-    job = IngestJob.new(study: @basic_study, study_file: @other_matrix, user: @user, action: :ingest_expression)
+    other_file = FactoryBot.create(:study_file,
+                                   name: 'matrix_2.txt',
+                                   file_type: 'Expression Matrix',
+                                   study:)
+    other_file.build_expression_file_info(is_raw_counts: false, library_preparation_protocol: 'MARS-seq',
+                                          modality: 'Transcriptomic: unbiased', biosample_input_type: 'Whole cell')
+    other_file.upload_file_size = 2048
+    other_file.save!
+    job = IngestJob.new(study:, study_file: other_file, user: @user, action: :ingest_expression)
     mock = Minitest::Mock.new
     now = DateTime.now.in_time_zone
     mock_metadata = {
@@ -164,12 +191,12 @@ class IngestJobTest < ActiveSupport::TestCase
     ApplicationController.life_sciences_api_client.stub :get_pipeline, mock do
       expected_outputs = {
         perfTime: 120000,
-        fileType: @other_matrix.file_type,
-        fileSize: @other_matrix.upload_file_size,
-        fileName: @other_matrix.name,
+        fileType: other_file.file_type,
+        fileSize: other_file.upload_file_size,
+        fileName: other_file.name,
         trigger: "upload",
         action: :ingest_expression,
-        studyAccession: @basic_study.accession,
+        studyAccession: study.accession,
         jobStatus: 'failed',
         numCells: 0,
         is_raw_counts: false,
@@ -493,40 +520,44 @@ class IngestJobTest < ActiveSupport::TestCase
   end
 
   test 'should set default annotation even if not visualizable' do
-    assert @basic_study.default_annotation.nil?
+    study = FactoryBot.create(:detached_study,
+                              name_prefix: 'Default Annotation Test',
+                              user: @user,
+                              test_array: @@studies_to_clean)
+    assert study.default_annotation.nil?
     # test metadata file with a single annotation with only one unique value
     metadata_file = FactoryBot.create(:metadata_file,
                                       name: 'metadata.txt',
-                                      study: @basic_study,
+                                      study:,
                                       cell_input: %w[A B C],
                                       annotation_input: [
                                         { name: 'species', type: 'group', values: %w[dog dog dog] }
                                       ])
-    job = IngestJob.new(study: @basic_study, study_file: metadata_file, user: @user, action: :ingest_cell_metadata)
+    job = IngestJob.new(study:, study_file: metadata_file, user: @user, action: :ingest_cell_metadata)
     job.set_study_default_options
-    @basic_study.reload
-    assert_equal 'species--group--invalid', @basic_study.default_annotation
+    study.reload
+    assert_equal 'species--group--invalid', study.default_annotation
 
     # reset default annotation, then test cluster file with a single annotation with only one unique value
-    @basic_study.cell_metadata.destroy_all
-    @basic_study.default_options = {}
-    @basic_study.save
-    assert @basic_study.default_annotation.nil?
-    assert @basic_study.default_cluster.nil?
+    study.cell_metadata.destroy_all
+    study.default_options = {}
+    study.save
+    assert study.default_annotation.nil?
+    assert study.default_cluster.nil?
     cluster_file = FactoryBot.create(:cluster_file,
-                                     name: 'cluster.txt', study: @basic_study,
+                                     name: 'cluster.txt', study:,
                                      cell_input: {
                                        x: [1, 4, 6],
                                        y: [7, 5, 3],
                                        cells: %w[A B C]
                                      },
                                      annotation_input: [{ name: 'foo', type: 'group', values: %w[bar bar bar] }])
-    job = IngestJob.new(study: @basic_study, study_file: cluster_file, user: @user, action: :ingest_cluster)
+    job = IngestJob.new(study:, study_file: cluster_file, user: @user, action: :ingest_cluster)
     job.set_study_default_options
-    @basic_study.reload
-    cluster = @basic_study.cluster_groups.first
-    assert_equal cluster, @basic_study.default_cluster
-    assert_equal 'foo--group--invalid', @basic_study.default_annotation
+    study.reload
+    cluster = study.cluster_groups.first
+    assert_equal cluster, study.default_cluster
+    assert_equal 'foo--group--invalid', study.default_annotation
   end
 
   test 'should launch DE jobs if study is eligible' do

From ebd05061d87687b8dc5c6ce1aab5ea9bd251e6a4 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Mon, 16 Sep 2024 17:45:16 -0400
Subject: [PATCH 14/52] Add more id-label collation

---
 .../lib/validation/validate-anndata.js        | 40 ++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index 65b87e08e..02286cd12 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -122,8 +122,45 @@ export function checkOntologyIdFormat(key, ontologyIds) {
 }
 
 /** Validate author's annotation labels match those in ontologies */
-async function checkOntologyLabels(hdf5File) {
+async function checkOntologyLabels(ids, idIndexes, labels, labelIndexes) {
+  let issues = []
+
+  const ontologies = await fetchOntologies()
 
+  const labelIdPairs = []
+  for (let i = 0; i < idIndexes.length; i++) {
+    const id = ids[idIndexes[i]]
+    // console.log('id', id)
+    for (let j = 0; j < labelIndexes.length; j++) {
+      const label = labels[labelIndexes[j]]
+      // console.log('label', label)
+      labelIdPairs.push(`${id} || ${label}`)
+    }
+  }
+  const rawUniques = Array.from(new Set(labelIdPairs))
+
+  rawUniques.map(r => {
+    const [id, label] = r.split(' || ')
+    const ontologyShortNameLc = id.split(/[_:]/)[0].toLowerCase()
+    const ontology = ontologies[ontologyShortNameLc]
+    if (!(id in ontology)) {
+      const msg = `Invalid ontology ID: ${id}`
+      issues.push([
+        'error', 'ontology:label-lookup-error', msg,
+        { subtype: 'ontology:invalid-id' }
+      ])
+    } else {
+      const validLabels = ontology[id]
+      if (!(validLabels.includes(label))) {
+        const validLabelsClause = `Valid labels: ${validLabels.join(', ')}`
+        const msg = `Invalid ontology label "${id}".  ${validLabelsClause}`
+        issues.push([
+          'error', 'ontology:label-lookup-error', msg,
+          { subtype: 'ontology:invalid-label' }
+        ])
+      }
+    }
+})
 }
 
 /** Get ontology ID values for key in AnnData file */
@@ -167,6 +204,7 @@ async function getOntologyLabels(key, hdf5File) {
 
       const codes = await group.values[1]
       const indexes = await codes.value
+
     }
   }
 

From 90d4c34b441ae31ae3b3daa999daab0d913a954d Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Tue, 17 Sep 2024 09:43:48 -0400
Subject: [PATCH 15/52] Retain plot title when missing annotation

---
 .../components/visualization/ScatterPlot.jsx           | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/app/javascript/components/visualization/ScatterPlot.jsx b/app/javascript/components/visualization/ScatterPlot.jsx
index 0062c3949..4cca8f749 100644
--- a/app/javascript/components/visualization/ScatterPlot.jsx
+++ b/app/javascript/components/visualization/ScatterPlot.jsx
@@ -598,17 +598,15 @@ function RawScatterPlot({
   return (
     <div className="plot">
       { ErrorComponent }
+      <PlotTitle
+        titleTexts={titleTexts}
+        isCorrelatedScatter={isCorrelatedScatter}
+        correlation={bulkCorrelation}/>
       { hasMissingAnnot &&
         <div className="alert-warning text-center error-boundary">
           "{cluster}" does not have the requested annotation "{loadedAnnotation}"
         </div>
       }
-      { !hasMissingAnnot &&
-        <PlotTitle
-          titleTexts={titleTexts}
-          isCorrelatedScatter={isCorrelatedScatter}
-          correlation={bulkCorrelation}/>
-      }
       <div
         className="scatter-graph"
         id={graphElementId}

From c2f968fda06e2496b89071ea7359f9cbae14487f Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Tue, 17 Sep 2024 10:08:16 -0400
Subject: [PATCH 16/52] Finish integrating ontology label validation

---
 .../lib/validation/validate-anndata.js        | 53 +++++++++++--------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index 02286cd12..25e7d8657 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -122,18 +122,16 @@ export function checkOntologyIdFormat(key, ontologyIds) {
 }
 
 /** Validate author's annotation labels match those in ontologies */
-async function checkOntologyLabels(ids, idIndexes, labels, labelIndexes) {
-  let issues = []
+async function checkOntologyLabels(key, ids, idIndexes, labels, labelIndexes) {
+  const issues = []
 
   const ontologies = await fetchOntologies()
 
   const labelIdPairs = []
   for (let i = 0; i < idIndexes.length; i++) {
     const id = ids[idIndexes[i]]
-    // console.log('id', id)
     for (let j = 0; j < labelIndexes.length; j++) {
       const label = labels[labelIndexes[j]]
-      // console.log('label', label)
       labelIdPairs.push(`${id} || ${label}`)
     }
   }
@@ -152,19 +150,22 @@ async function checkOntologyLabels(ids, idIndexes, labels, labelIndexes) {
     } else {
       const validLabels = ontology[id]
       if (!(validLabels.includes(label))) {
-        const validLabelsClause = `Valid labels: ${validLabels.join(', ')}`
-        const msg = `Invalid ontology label "${id}".  ${validLabelsClause}`
+        const prettyLabels = validLabels.join(', ')
+        const validLabelsClause = `Valid labels for ${id}: ${prettyLabels}`
+        const msg = `Invalid ${key} label "${label}".  ${validLabelsClause}`
         issues.push([
           'error', 'ontology:label-lookup-error', msg,
           { subtype: 'ontology:invalid-label' }
         ])
       }
     }
-})
+  })
+
+  return issues
 }
 
 /** Get ontology ID values for key in AnnData file */
-async function getOntologyLabels(key, hdf5File) {
+async function getOntologyIdsAndLabels(requiredName, hdf5File) {
   let ontologyIds = []
 
   const obs = await hdf5File.get('obs')
@@ -181,10 +182,16 @@ async function getOntologyLabels(key, hdf5File) {
   if (internalCategories) {
     resolvedCategories = await Promise.all(internalCategories.values)
   }
-  const group = resolvedCategories.find(o => o.name.endsWith(key))
-  if (group) {
+
+  const idKey = requiredName
+  const labelKey = `${requiredName}__ontology_label`
+
+  const idGroup = obsValues.find(o => o.name.endsWith(idKey))
+  const labelGroup = obsValues.find(o => o.name.endsWith(labelKey))
+
+  if (idGroup) {
     if (internalCategories) {
-      ontologyIds = await group.value
+      ontologyIds = await idGroup.value
     } else {
       // AnnData organizes each "obs" annotation (e.g. disease__ontology_label,
       // sex) into a container with a `categories` frame and a `code` frame.
@@ -199,16 +206,19 @@ async function getOntologyLabels(key, hdf5File) {
       // This organization greatly decreases filesize, but requires more code
       // to map paired obs annotations like `disease` (ontology IDs) to
       // `disease__ontology_label` (ontology names) than needed for e.g. TSVs.
-      const categories = await group.values[0]
-      ontologyIds = await categories.value
+      const idCategories = await idGroup.values[0]
+      const idCodes = await idGroup.values[1]
+      const ids = await idCategories.value
+      const idIndexes = await idCodes.value
 
-      const codes = await group.values[1]
-      const indexes = await codes.value
+      const labelCategories = await labelGroup.values[0]
+      const labelCodes = await labelGroup.values[1]
+      const labels = await labelCategories.value
+      const labelIndexes = await labelCodes.value
 
+      return [ids, idIndexes, labels, labelIndexes]
     }
   }
-
-  return ontologyIds
 }
 
 /** Validate ontology labels for required metadata columns in AnnData file */
@@ -216,17 +226,15 @@ async function validateOntologyLabels(hdf5File) {
   window.hdf5File = hdf5File
   let issues = []
 
-  return issues
-
   // Validate IDs for species, organ, disease, and library preparation protocol
   for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) {
     const column = REQUIRED_CONVENTION_COLUMNS[i]
     if (!column.endsWith('__ontology_label')) {continue}
     const key = column.split('__ontology_label')[0]
-    const ontologyIds = await getOntologyIds(key, hdf5File)
+    const [ids, idIndexes, labels, labelIndexes] = await getOntologyIdsAndLabels(key, hdf5File)
 
     issues = issues.concat(
-      checkOntologyIdFormat(key, ontologyIds)
+      await checkOntologyLabels(key, ids, idIndexes, labels, labelIndexes)
     )
   }
 
@@ -279,7 +287,8 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) {
   issues = issues.concat(
     validateUnique(headers),
     requiredMetadataIssues,
-    ontologyIdFormatIssues
+    ontologyIdFormatIssues,
+    ontologyLabelIssues
   )
 
   return { issues }

From e67c580d1e7a5f58e39d5dc1f3f7bf0b9c5f92c2 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Tue, 17 Sep 2024 11:15:45 -0400
Subject: [PATCH 17/52] Fetch ontologies once, not n times

---
 .../lib/validation/ontology-validation.js           |  2 ++
 app/javascript/lib/validation/validate-anndata.js   | 13 ++++++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
index 837c8bcaf..d0c837976 100644
--- a/app/javascript/lib/validation/ontology-validation.js
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -10,9 +10,11 @@ const ONTOLOGY_BASE_URL =
 
 /** Quickly retrieve current version cache key for ontologies */
 async function fetchOntologyCacheVersion() {
+  if (window.SCP.ontologiesVersion) { return window.SCP.ontologiesVersion }
   const response = await fetch(`${ONTOLOGY_BASE_URL}version.txt`)
   const text = await response.text()
   const version = text.trim().split('#')[0]
+  window.SCP.ontologiesVersion = version
   return version
 }
 
diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index 25e7d8657..e44d90ad7 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -122,10 +122,11 @@ export function checkOntologyIdFormat(key, ontologyIds) {
 }
 
 /** Validate author's annotation labels match those in ontologies */
-async function checkOntologyLabels(key, ids, idIndexes, labels, labelIndexes) {
-  const issues = []
+async function checkOntologyLabels(key, ontologies, groups) {
 
-  const ontologies = await fetchOntologies()
+  const [ids, idIndexes, labels, labelIndexes] = groups
+
+  const issues = []
 
   const labelIdPairs = []
   for (let i = 0; i < idIndexes.length; i++) {
@@ -226,15 +227,17 @@ async function validateOntologyLabels(hdf5File) {
   window.hdf5File = hdf5File
   let issues = []
 
+  const ontologies = await fetchOntologies()
+
   // Validate IDs for species, organ, disease, and library preparation protocol
   for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) {
     const column = REQUIRED_CONVENTION_COLUMNS[i]
     if (!column.endsWith('__ontology_label')) {continue}
     const key = column.split('__ontology_label')[0]
-    const [ids, idIndexes, labels, labelIndexes] = await getOntologyIdsAndLabels(key, hdf5File)
+    const groups = await getOntologyIdsAndLabels(key, hdf5File)
 
     issues = issues.concat(
-      await checkOntologyLabels(key, ids, idIndexes, labels, labelIndexes)
+      await checkOntologyLabels(key, ontologies, groups)
     )
   }
 

From 51ef29295f0aa8249337f6d57e332fcd3483c9b7 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Tue, 17 Sep 2024 12:43:12 -0400
Subject: [PATCH 18/52] Ensure email delivery on 'special action' failures

---
 app/mailers/single_cell_mailer.rb |  8 +--
 app/models/ingest_job.rb          |  8 +--
 test/models/ingest_job_test.rb    | 81 +++++++++++++++++++++++++++++--
 3 files changed, 84 insertions(+), 13 deletions(-)

diff --git a/app/mailers/single_cell_mailer.rb b/app/mailers/single_cell_mailer.rb
index ddbf12865..faa90cce0 100644
--- a/app/mailers/single_cell_mailer.rb
+++ b/app/mailers/single_cell_mailer.rb
@@ -38,7 +38,7 @@ def notify_admin_upload_fail(study_file, error)
     @study_file = study_file
     @error = error
     @user = @study.user
-    mail(to: emails, subject: '[Single Cell Portal ERROR] FireCloud auto-upload fail in ' + @study.accession) do |format|
+    mail(to: emails, subject: "[Single Cell Portal ERROR] FireCloud auto-upload fail in #{@study.accession}") do |format|
       format.html
     end
   end
@@ -51,7 +51,7 @@ def notify_user_upload_fail(study_file, study, user)
     dev_email_config = AdminConfiguration.find_by(config_type: 'QA Dev Email')
     dev_email = dev_email_config.present? ? dev_email_config.value : nil
     title = "#{study_file.upload_file_name} did not finish uploading"
-    mail(to: user.email, bcc: dev_email, subject: '[Single Cell Portal Notifier] ' + title)
+    mail(to: user.email, bcc: dev_email, subject: "[Single Cell Portal Notifier] #{title}")
   end
 
   def notify_user_parse_complete(email, title, message, study)
@@ -72,7 +72,7 @@ def notify_admin_parse_fail(user_email, title, contents)
       dev_email = dev_email_config.value
       @contents = contents
       @user_email = user_email
-      mail(to: dev_email, subject: '[Single Cell Portal Admin Notification] ' + title)
+      mail(to: dev_email, subject: "[Single Cell Portal Admin Notification] #{title}")
     end
   end
 
@@ -91,7 +91,7 @@ def notify_admin_parse_launch_fail(study, study_file, corresponding_user, ingest
     @error = error
     @action = ingest_action
     title = 'Ingest Pipeline Launch Failure'
-    mail(to: emails, subject: '[Single Cell Portal Admin Notification] ' + title)
+    mail(to: emails, subject: "[Single Cell Portal Admin Notification] #{title}")
   end
 
   def daily_disk_status
diff --git a/app/models/ingest_job.rb b/app/models/ingest_job.rb
index 6d8869b27..16f6d1592 100644
--- a/app/models/ingest_job.rb
+++ b/app/models/ingest_job.rb
@@ -377,7 +377,8 @@ def poll_for_completion(run_at: 1.minute.from_now)
       log_error_messages
       log_to_mixpanel # log before queuing file for deletion to preserve properties
       # don't delete files or notify users if this is a 'special action', like DE or image pipeline jobs
-      handle_ingest_failure unless special_action?
+      subject = "Error: #{study_file.file_type} file: '#{study_file.upload_file_name}' parse has failed"
+      handle_ingest_failure(subject) unless special_action?
       admin_email_content = generate_error_email_body(email_type: :dev)
       SingleCellMailer.notify_admin_parse_fail(user.email, subject, admin_email_content).deliver_now
     else
@@ -390,7 +391,7 @@ def poll_for_completion(run_at: 1.minute.from_now)
   # will automatically clean up data and notify user
   # in case of subsampling, only subsampled data cleanup is run and all other data is left in place
   # this reduces churn for study owners as full-resolution data is still valid
-  def handle_ingest_failure
+  def handle_ingest_failure(email_subject)
     if action.to_sym == :ingest_subsample
       study_file.update(parse_status: 'parsed') # reset parse flag
       cluster_name = cluster_name_by_file_type
@@ -408,9 +409,8 @@ def handle_ingest_failure
         end
       end
     end
-    subject = "Error: #{study_file.file_type} file: '#{study_file.upload_file_name}' parse has failed"
     user_email_content = generate_error_email_body
-    SingleCellMailer.notify_user_parse_fail(user.email, subject, user_email_content, study).deliver_now
+    SingleCellMailer.notify_user_parse_fail(user.email, email_subject, user_email_content, study).deliver_now
   end
 
   # TODO (SCP-4709, SCP-4710) Processed and Raw expression files
diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb
index f663e3853..fe0c0e251 100644
--- a/test/models/ingest_job_test.rb
+++ b/test/models/ingest_job_test.rb
@@ -362,9 +362,9 @@ class IngestJobTest < ActiveSupport::TestCase
           }
         ]
       },
-      createTime: (now - 1.minutes).to_default_s,
-      startTime: (now - 1.minutes).to_default_s,
-      endTime: now.to_default_s
+      createTime: (now - 1.minutes).to_s,
+      startTime: (now - 1.minutes).to_s,
+      endTime: now.to_s
     }.with_indifferent_access
 
     pipeline_mock = MiniTest::Mock.new
@@ -713,7 +713,7 @@ class IngestJobTest < ActiveSupport::TestCase
     ls_mock.expect :get_pipeline, pipeline_mock, [{ name: pipeline_name }]
     ApplicationController.stub :firecloud_client, mock do
       ApplicationController.stub :life_sciences_api_client, ls_mock do
-        job.handle_ingest_failure
+        job.handle_ingest_failure('parse failure')
         cluster_file.reload
         study.reload
         cluster.reload
@@ -754,7 +754,7 @@ class IngestJobTest < ActiveSupport::TestCase
     ls_mock.expect :get_pipeline, pipeline_mock, [{ name: pipeline_name }]
     ApplicationController.stub :firecloud_client, mock do
       ApplicationController.stub :life_sciences_api_client, ls_mock do
-        failed_job.handle_ingest_failure
+        failed_job.handle_ingest_failure('parse failure')
         mock.verify
       end
     end
@@ -800,6 +800,77 @@ class IngestJobTest < ActiveSupport::TestCase
     end
   end
 
+  test 'should ensure email delivery on special action failures' do
+    study = FactoryBot.create(:detached_study,
+                              name_prefix: 'Special Action Email',
+                              user: @user,
+                              test_array: @@studies_to_clean)
+    study_file = FactoryBot.create(:ann_data_file,
+                                   name: 'matrix.h5ad',
+                                   study:,
+                                   upload_file_size: 1.megabyte,
+                                   cell_input: %w[A B C D],
+                                   annotation_input: [
+                                     { name: 'disease', type: 'group', values: %w[cancer cancer normal normal] }
+                                   ],
+                                   coordinate_input: [
+                                     { X_umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
+                                   ])
+    annotation_file = 'gs://test_bucket/anndata/h5ad_frag.metadata.tsv'
+    cluster_file = 'gs://test_bucket/anndata/h5ad_frag.cluster.X_umap.tsv'
+    params_object = DifferentialExpressionParameters.new(
+      matrix_file_path: 'gs://test_bucket/matrix.h5ad', matrix_file_type: 'h5ad', file_size: study_file.upload_file_size,
+      annotation_file:, cluster_file:, cluster_name: 'umap', annotation_name: 'disease', annotation_scope: 'study'
+    )
+    pipeline_name = SecureRandom.uuid
+    job = IngestJob.new(
+      pipeline_name:, study:, study_file:, user: @user, action: :differential_expression, params_object:
+    )
+    now = DateTime.now.in_time_zone
+    mock_metadata = {
+      events: [
+        { timestamp: now.to_s },
+        { timestamp: (now + 2.minutes).to_s, containerStopped: { exitStatus: 1 } }
+      ],
+      pipeline: {
+        actions: [
+          {
+            commands: [
+              'python', 'ingest_pipeline.py', '--study-id', study.id.to_s, '--study-file-id',
+              study_file.id.to_s, 'differential_expression', '--differential-expression', '--cluster-file', cluster_file,
+              '--annotation-file', annotation_file, '--cluster-name', 'umap', '--annotation-name', 'disease',
+              '--annotation-scope', 'study'
+            ]
+          }
+        ],
+        resources: {
+          virtualMachine: {
+            machineType: 'n2d-highmem-8',
+            bootDiskSizeGb: 300
+          }
+        }
+      },
+      createTime: (now - 3.minutes).to_s,
+      startTime: now.to_s,
+      endTime: now.to_s
+    }.with_indifferent_access
+    mock = Minitest::Mock.new
+    6.times { mock.expect :metadata, mock_metadata }
+    3.times do
+      mock.expect :error, { code: 1, message: 'mock message' }
+      mock.expect :done?, true
+    end
+    email_mock = Minitest::Mock.new
+    email_mock.expect :deliver_now, true
+    ApplicationController.life_sciences_api_client.stub :get_pipeline, mock do
+      SingleCellMailer.stub :notify_admin_parse_fail, email_mock do
+        job.poll_for_completion
+        mock.verify
+        email_mock.verify
+      end
+    end
+  end
+
   test 'should always unset subsampling flags' do
     study = FactoryBot.create(:detached_study,
                               name_prefix: 'Subsample Flag Test',

From b7e2305af074b116373ec72fc95559654a4b8ebb Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Tue, 17 Sep 2024 13:47:29 -0400
Subject: [PATCH 19/52] Working on persistent raw_counts test failure

---
 test/models/ingest_job_test.rb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb
index fe0c0e251..edbcb60b6 100644
--- a/test/models/ingest_job_test.rb
+++ b/test/models/ingest_job_test.rb
@@ -234,6 +234,7 @@ class IngestJobTest < ActiveSupport::TestCase
       anndata_file: ann_data_file.gs_url, obsm_keys: ann_data_file.ann_data_file_info.obsm_key_names,
       file_size: ann_data_file.upload_file_size
     )
+    assert_not params_object.extract.include?('raw_counts')
     job = IngestJob.new(
       study: @basic_study, study_file: ann_data_file, user: @user, action: :ingest_anndata, params_object:
     )

From 4374c34a7b22dcb22d0f60ed1b2d8e4689737c05 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Tue, 17 Sep 2024 13:48:58 -0400
Subject: [PATCH 20/52] Working on persistent raw_counts test failure

---
 app/mailers/single_cell_mailer.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/mailers/single_cell_mailer.rb b/app/mailers/single_cell_mailer.rb
index faa90cce0..61c8bbebc 100644
--- a/app/mailers/single_cell_mailer.rb
+++ b/app/mailers/single_cell_mailer.rb
@@ -57,13 +57,13 @@ def notify_user_upload_fail(study_file, study, user)
   def notify_user_parse_complete(email, title, message, study)
     @message = message
     @study = study
-    mail(to: email, subject: '[Single Cell Portal Notifier] ' + title)
+    mail(to: email, subject: "[Single Cell Portal Notifier] #{title}")
   end
 
   def notify_user_parse_fail(email, title, error, study)
     @error = error
     @study = study
-    mail(to: email, subject: '[Single Cell Portal Notifier] ' + title)
+    mail(to: email, subject: "[Single Cell Portal Notifier] #{title}")
   end
 
   def notify_admin_parse_fail(user_email, title, contents)

From ca17df8e203f10d3d7a076af7e53d925730d7919 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Tue, 17 Sep 2024 17:03:12 -0400
Subject: [PATCH 21/52] Use change event for remote validation on bucket path

---
 .../components/upload/FileUploadControl.jsx   | 29 +++++++++++++++++--
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/app/javascript/components/upload/FileUploadControl.jsx b/app/javascript/components/upload/FileUploadControl.jsx
index be806d26c..7213f0f14 100644
--- a/app/javascript/components/upload/FileUploadControl.jsx
+++ b/app/javascript/components/upload/FileUploadControl.jsx
@@ -28,6 +28,10 @@ export default function FileUploadControl({
   const [showUploadButton, setShowUploadButton] = useState(true)
   const [showBucketPath, setShowBucketPath] = useState(false)
   const ToggleUploadButton = () => {
+    // this is an inverted check since the user is clicking and the value is about to change
+    if (!showUploadButton) {
+      unsetRemoteLocation()
+    }
     setShowUploadButton(!showUploadButton)
     setShowBucketPath(!showBucketPath)
   }
@@ -80,10 +84,22 @@ export default function FileUploadControl({
     }
   }
 
+  // keep track of pending timeout for remote validation via bucket path
+  const [timeOutId, setTimeOutID] = useState(null)
+
+  // clear out remote_location and hasRemoteFile to allow switching back to file upload button
+  function unsetRemoteLocation() {
+    updateFile(file._id, {remote_location: '', hasRemoteFile: false})
+  }
+
   // perform CSFV on remote file when specifying a GS URL or bucket path
   // will sanitize GS URL before calling validateRemoteFile
-  async function handleBucketLocationEntry(e) {
-    const path = e.target.value
+  async function handleBucketLocationEntry(path) {
+    if (path === "") {
+      unsetRemoteLocation()
+      return false
+    }
+
     const matcher = new RegExp(`(gs:\/\/)?${bucketName}\/?`)
     const trimmedPath = path.replace(matcher, '')
     if (!trimmedPath) {
@@ -183,7 +199,14 @@ export default function FileUploadControl({
              size={60}
              id={`remote_location-input-${file._id}`}
              placeholder='GS URL or path to file in GCP bucket'
-             onBlur={handleBucketLocationEntry}/>
+             onChange={ (e) => {
+               const newBucketPath = e.target.value
+               if (timeOutId) {
+                 clearTimeout(timeOutId)
+               }
+               const newTimeout = setTimeout(handleBucketLocationEntry, 500, newBucketPath)
+               setTimeOutID(newTimeout)
+             }}/>
     }
     &nbsp;&nbsp;
     { !isFileOnServer && (showBucketPath || file.hasRemoteFile ) && googleBucketLink }

From b7ce2700e39f64ad8aee1b50d2fbde4176b72f8e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 18 Sep 2024 02:15:31 +0000
Subject: [PATCH 22/52] Bump vite from 4.5.3 to 4.5.5

Bumps [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite) from 4.5.3 to 4.5.5.
- [Release notes](https://github.com/vitejs/vite/releases)
- [Changelog](https://github.com/vitejs/vite/blob/v4.5.5/packages/vite/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite/commits/v4.5.5/packages/vite)

---
updated-dependencies:
- dependency-name: vite
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 package.json | 2 +-
 yarn.lock    | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/package.json b/package.json
index a86bdf0dd..63230c6f9 100644
--- a/package.json
+++ b/package.json
@@ -106,7 +106,7 @@
     "prop-types": "^15.7.2",
     "react-select-event": "^5.3.0",
     "stylelint-prettier": "^1.1.2",
-    "vite": "^4.0.0",
+    "vite": "^4.5.5",
     "vite-plugin-ruby": "^3.0.5"
   }
 }
diff --git a/yarn.lock b/yarn.lock
index 08a604739..52f5500f1 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -9387,10 +9387,10 @@ vite-plugin-ruby@^3.0.5:
     debug "^4.3.4"
     fast-glob "^3.2.12"
 
-vite@^4.0.0:
-  version "4.5.3"
-  resolved "https://registry.npmjs.org/vite/-/vite-4.5.3.tgz"
-  integrity sha512-kQL23kMeX92v3ph7IauVkXkikdDRsYMGTVl5KY2E9OY4ONLvkHf04MDTbnfo6NKxZiDLWzVpP5oTa8hQD8U3dg==
+vite@^4.5.5:
+  version "4.5.5"
+  resolved "https://registry.yarnpkg.com/vite/-/vite-4.5.5.tgz#639b9feca5c0a3bfe3c60cb630ef28bf219d742e"
+  integrity sha512-ifW3Lb2sMdX+WU91s3R0FyQlAyLxOzCSCP37ujw0+r5POeHPwe6udWVIElKQq8gk3t7b8rkmvqC6IHBpCff4GQ==
   dependencies:
     esbuild "^0.18.10"
     postcss "^8.4.27"

From 1eeee20a467b5a330ce406095b2f2af7dc477400 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Wed, 18 Sep 2024 12:43:53 -0400
Subject: [PATCH 23/52] Fix logic for saveAnnDataFileHelper

---
 app/javascript/components/upload/UploadWizard.jsx | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/app/javascript/components/upload/UploadWizard.jsx b/app/javascript/components/upload/UploadWizard.jsx
index 4da9cc59e..18b934366 100644
--- a/app/javascript/components/upload/UploadWizard.jsx
+++ b/app/javascript/components/upload/UploadWizard.jsx
@@ -275,12 +275,17 @@ export function RawUploadWizard({ studyAccession, name }) {
     setTimeout(() => deleteFileFromServer(requestCanceller.fileId), 500)
   }
 
+  /** helper for determining when to use saveAnnDataFileHelper (sets ids/values correctly for AnnData UX **/
+  function useAnnDataFileHelper(file) {
+    return isAnnDataExperience && (file?.file_type === 'AnnData' || Object.keys(file).includes("data_type"))
+  }
+
   /** save the given file and perform an upload if a selected file is present */
   async function saveFile(file) {
     let fileToSave = file
     let studyFileId = file._id
 
-    if (isAnnDataExperience && fileToSave?.file_type === 'AnnData') {
+    if (useAnnDataFileHelper(fileToSave)) {
       fileToSave = saveAnnDataFileHelper(file, fileToSave)
       studyFileId = fileToSave._id
     }

From 9ab452293050edcb468c6685cfb23a69314d4de4 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Wed, 18 Sep 2024 16:25:56 -0400
Subject: [PATCH 24/52] Add spinner for remote validation, improved timing

---
 .../components/upload/FileUploadControl.jsx   | 41 +++++++++++++------
 .../upload-wizard/file-upload-control.test.js | 33 ++++++++++++++-
 2 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/app/javascript/components/upload/FileUploadControl.jsx b/app/javascript/components/upload/FileUploadControl.jsx
index 7213f0f14..38527b3eb 100644
--- a/app/javascript/components/upload/FileUploadControl.jsx
+++ b/app/javascript/components/upload/FileUploadControl.jsx
@@ -42,7 +42,7 @@ export default function FileUploadControl({
     'Upload a file from your computer' :
     "Input a path to a file that is already in this study's bucket"
   const uploadToggle = <span
-    className='btn btn-default'
+    className='btn btn-default margin-left'
     onClick={ToggleUploadButton}
     data-toggle="tooltip"
     data-original-title={toggleTooltip}>{toggleText}
@@ -55,7 +55,7 @@ export default function FileUploadControl({
   </Popover>
   const googleBucketLink =
     <OverlayTrigger trigger={['hover', 'focus']} rootClose placement="top" overlay={bucketPopover} delayHide={1500}>
-      <a className='btn btn-default'
+      <a className='btn btn-default margin-left'
          href={`https://accounts.google.com/AccountChooser?continue=https://console.cloud.google.com/storage/browser/${bucketName}`}
          target='_blank'><FontAwesomeIcon icon={faExternalLinkSquareAlt} /> Browse bucket</a>
     </OverlayTrigger>
@@ -95,18 +95,32 @@ export default function FileUploadControl({
   // perform CSFV on remote file when specifying a GS URL or bucket path
   // will sanitize GS URL before calling validateRemoteFile
   async function handleBucketLocationEntry(path) {
-    if (path === "") {
-      unsetRemoteLocation()
-      return false
-    }
-
     const matcher = new RegExp(`(gs:\/\/)?${bucketName}\/?`)
     const trimmedPath = path.replace(matcher, '')
     if (!trimmedPath) {
+      unsetRemoteLocation()
+      setFileValidation({ validating: false, issues: {}, fileName: null })
       return false
     }
 
+    // don't continue unless a dot is present (otherwise, no valid file extension)
+    if (trimmedPath.indexOf('.') < 0 ) { return false }
+
     const fileType = file.file_type
+    const fileExtension = `.${trimmedPath.split('.').slice(-1)[0]}`
+    if (!inputAcceptExts.includes(fileExtension)) {
+      const invalidExt = {
+        errors: [
+          [
+            'error', 'file:invalid-ext',
+            `${fileExtension} is not an allowed file extension for ${fileType} files: ${inputAcceptExts.join(', ')}`
+          ]
+        ]
+      }
+      setFileValidation({ validating: false, issues: invalidExt, fileName: trimmedPath })
+      return false
+    }
+
     const fileOptions = fileType === 'Metadata' ? { use_metadata_convention: file?.use_metadata_convention } : {}
 
     setFileValidation({ validating: true, issues: {}, fileName: trimmedPath })
@@ -191,6 +205,7 @@ export default function FileUploadControl({
         />
       </button>
     }
+
     {!isFileOnServer && (showBucketPath || file.hasRemoteFile ) &&
       // we can't use TextFormField since we need a custom onBlur event
       // onBlur is the React equivalent of onfocusout, which will fire after the user is done updating the input
@@ -198,22 +213,22 @@ export default function FileUploadControl({
              type="text"
              size={60}
              id={`remote_location-input-${file._id}`}
+             data-testid="remote-location-input"
              placeholder='GS URL or path to file in GCP bucket'
              onChange={ (e) => {
                const newBucketPath = e.target.value
-               if (timeOutId) {
-                 clearTimeout(timeOutId)
-               }
-               const newTimeout = setTimeout(handleBucketLocationEntry, 500, newBucketPath)
+               if (timeOutId) { clearTimeout(timeOutId) }
+               const newTimeout = setTimeout(handleBucketLocationEntry, 300, newBucketPath)
                setTimeOutID(newTimeout)
              }}/>
     }
-    &nbsp;&nbsp;
     { !isFileOnServer && (showBucketPath || file.hasRemoteFile ) && googleBucketLink }
 
-    &nbsp;&nbsp;
     { !isFileOnServer && uploadToggle }
 
+    { showBucketPath && fileValidation.validating &&
+      <span className='margin-left' id='remote-location-validation'>Validating... <LoadingSpinner testId="file-validation-spinner"/></span>
+    }
     <ValidationMessage
       studyAccession={study.accession}
       issues={fileValidation.issues}
diff --git a/test/js/upload-wizard/file-upload-control.test.js b/test/js/upload-wizard/file-upload-control.test.js
index 2db1de448..be30fe737 100644
--- a/test/js/upload-wizard/file-upload-control.test.js
+++ b/test/js/upload-wizard/file-upload-control.test.js
@@ -1,5 +1,5 @@
 import React from 'react'
-import { render, screen, cleanup, fireEvent, waitForElementToBeRemoved } from '@testing-library/react'
+import { render, screen, cleanup, fireEvent, waitForElementToBeRemoved, waitFor } from '@testing-library/react'
 import '@testing-library/jest-dom/extend-expect'
 
 import { StudyContext } from 'components/upload/upload-utils'
@@ -354,6 +354,35 @@ describe('file upload control validates the selected file', () => {
     expect(screen.queryAllByText('Use bucket path')).toHaveLength(1)
     const bucketToggle = screen.getByText('Use bucket path')
     fireEvent.click(bucketToggle)
-    expect(container.querySelector('#remote_location-input-123')).toBeVisible();
+    expect(container.querySelector('#remote_location-input-123')).toBeVisible()
+  })
+
+  it('validates file extension on bucket path entries', async () => {
+    const file = {
+      _id: '123',
+      name: '',
+      status: 'new',
+      file_type: 'Cluster'
+    }
+    render((
+      <StudyContext.Provider value={{ accession: 'SCP123' }}>
+        <FileUploadControl
+          file={file}
+          allFiles={[file]}
+          allowedFileExts={['.tsv']}
+          validationMessages={{}}/>
+      </StudyContext.Provider>
+    ))
+    expect(screen.queryAllByText('Use bucket path')).toHaveLength(1)
+    const bucketToggle = screen.getByText('Use bucket path')
+    fireEvent.click(bucketToggle)
+    const bucketPathInput = screen.getByTestId('remote-location-input')
+    expect(bucketPathInput).toBeVisible()
+    fireEvent.change(bucketPathInput, { target: { value: 'bad.pdf' } })
+    expect(bucketPathInput).toHaveValue('bad.pdf')
+    await waitFor(() => {
+      expect(screen.getByTestId('validation-error')).toBeInTheDocument()
+      expect(screen.getByText('.pdf is not an allowed file extension for Cluster files: .tsv')).toBeVisible()
+    })
   })
 })

From 7d7663fffb3fcf75a5c2e3db3ee8df095c1a3b9f Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Wed, 18 Sep 2024 16:32:55 -0400
Subject: [PATCH 25/52] Match extension code/message to upload

---
 app/javascript/components/upload/FileUploadControl.jsx | 4 ++--
 test/js/upload-wizard/file-upload-control.test.js      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/javascript/components/upload/FileUploadControl.jsx b/app/javascript/components/upload/FileUploadControl.jsx
index 38527b3eb..cbf4bcadc 100644
--- a/app/javascript/components/upload/FileUploadControl.jsx
+++ b/app/javascript/components/upload/FileUploadControl.jsx
@@ -112,8 +112,8 @@ export default function FileUploadControl({
       const invalidExt = {
         errors: [
           [
-            'error', 'file:invalid-ext',
-            `${fileExtension} is not an allowed file extension for ${fileType} files: ${inputAcceptExts.join(', ')}`
+            'error', 'filename:extension',
+            `Allowed extensions are ${allowedFileExts.join(', ')}`
           ]
         ]
       }
diff --git a/test/js/upload-wizard/file-upload-control.test.js b/test/js/upload-wizard/file-upload-control.test.js
index be30fe737..dd1d03a03 100644
--- a/test/js/upload-wizard/file-upload-control.test.js
+++ b/test/js/upload-wizard/file-upload-control.test.js
@@ -382,7 +382,7 @@ describe('file upload control validates the selected file', () => {
     expect(bucketPathInput).toHaveValue('bad.pdf')
     await waitFor(() => {
       expect(screen.getByTestId('validation-error')).toBeInTheDocument()
-      expect(screen.getByText('.pdf is not an allowed file extension for Cluster files: .tsv')).toBeVisible()
+      expect(screen.getByText('Allowed extensions are: .tsv')).toBeVisible()
     })
   })
 })

From b696ab4e0ab3d092e78bfeb62e1deafefc3d70e0 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Wed, 18 Sep 2024 16:58:51 -0400
Subject: [PATCH 26/52] Add ontology validation test, with web Caches mock

---
 .../lib/validation/ontology-validation.js     |  13 +-
 .../lib/validation/validate-anndata.js        |   1 -
 test/js/lib/ontology-validation.test.js       | 191 ++++++++++++++++++
 3 files changed, 202 insertions(+), 3 deletions(-)
 create mode 100644 test/js/lib/ontology-validation.test.js

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
index d0c837976..baaba3122 100644
--- a/app/javascript/lib/validation/ontology-validation.js
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -25,6 +25,7 @@ async function getServiceWorkerCache() {
 
   // Delete other versions of ontologies cache; there should be 1 per dodmain
   const cacheNames = await caches.keys()
+  console.log('cacheNames', cacheNames)
   cacheNames.forEach(name => {
     if (name.startsWith('ontologies-') && name !== currentOntologies) {
       caches.delete(name)
@@ -37,7 +38,7 @@ async function getServiceWorkerCache() {
 }
 
 /** Fetch .gz file, decompress it, return plaintext */
-async function fetchGzipped(url) {
+export async function fetchGzipped(url) {
   const response = await fetch(url)
   const blob = await response.blob();
   const uint8Array = new Uint8Array(await blob.arrayBuffer());
@@ -51,10 +52,12 @@ export async function cacheFetch(url) {
 
   const decompressedUrl = url.replace('.gz', '')
   const response = await cache.match(decompressedUrl)
+  console.log('in cacheFetch, typeof response', typeof response)
   if (typeof response === 'undefined') {
     // If cache miss, then fetch, decompress, and put response in cache
     const data = await fetchGzipped(url)
     const contentLength = data.length
+    // console.log('data', data)
     const decompressedResponse = new Response(
       new Blob([data], { type: 'text/tab-separated-values' }),
       { headers: new Headers({ 'Content-Length': contentLength }) }
@@ -65,6 +68,8 @@ export async function cacheFetch(url) {
   return await cache.match(decompressedUrl)
 }
 
+
+
 /**
  * Fetch minified ontologies, transform into object of object of arrays, e.g.:
  *
@@ -95,9 +100,14 @@ export async function fetchOntologies() {
   for (let i = 0; i < ontologyNames.length; i++) {
     const ontologyName = ontologyNames[i]
     const ontologyUrl = `${ONTOLOGY_BASE_URL + ontologyName}.min.tsv.gz`
+    console.log('ontologyUrl', ontologyUrl)
     const response = await cacheFetch(ontologyUrl)
+
+    console.log('response', response)
+
     const tsv = await response.text()
 
+    // console.log('tsv', tsv)
     const lines = tsv.split('\n')
 
     ontologies[ontologyName] = {}
@@ -117,7 +127,6 @@ export async function fetchOntologies() {
   window.SCP.ontologies = ontologies
   return ontologies
 }
-
 window.fetchOntologies = fetchOntologies
 
 /** Get lowercase shortnames for all required ontologies */
diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index e44d90ad7..6553505d1 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -224,7 +224,6 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) {
 
 /** Validate ontology labels for required metadata columns in AnnData file */
 async function validateOntologyLabels(hdf5File) {
-  window.hdf5File = hdf5File
   let issues = []
 
   const ontologies = await fetchOntologies()
diff --git a/test/js/lib/ontology-validation.test.js b/test/js/lib/ontology-validation.test.js
new file mode 100644
index 000000000..7ad489b72
--- /dev/null
+++ b/test/js/lib/ontology-validation.test.js
@@ -0,0 +1,191 @@
+const fetch = require('node-fetch')
+const { Readable } = require('stream');
+
+import {
+  fetchOntologies
+} from 'lib/validation/ontology-validation'
+
+
+describe('Client-side file validation for AnnData', () => {
+  beforeAll(() => {
+    global.fetch = fetch
+
+    const mockCaches = {
+      _cacheStores: {},
+
+      open: jest.fn((cacheName) => {
+        console.log('cacheName', cacheName)
+        // Initialize the store for the cache name if it doesn't exist
+        if (!mockCaches._cacheStores[cacheName]) {
+          mockCaches._cacheStores[cacheName] = {};
+        }
+
+        console.log('mockCaches._cacheStores[cacheName]', mockCaches._cacheStores[cacheName])
+
+        return Promise.resolve({
+          add: jest.fn((request) => {
+            const response = new Response(`Response for ${request.url}`); // Mocking a response
+            return mockCaches.put(request, response, cacheName);
+          }),
+          addAll: jest.fn((requests) => {
+            const responses = requests.map(req => new Response(`Response for ${req.url}`));
+            return Promise.all(responses.map((response, index) => {
+              return mockCaches.put(requests[index], response, cacheName);
+            }));
+          }),
+          delete: jest.fn((request) => {
+            const key = request.url || request;
+            if (mockCaches._cacheStores[cacheName][key]) {
+              delete mockCaches._cacheStores[cacheName][key];
+              return Promise.resolve(true);
+            }
+            return Promise.resolve(false);
+          }),
+          match: jest.fn((request) => {
+            const key = request.url || request;
+            return Promise.resolve(mockCaches._cacheStores[cacheName][key] || undefined);
+          }),
+          put: jest.fn((request, response) => {
+            console.log('in put, cacheName, request', cacheName, request)
+            const key = request.url || request;
+            mockCaches._cacheStores[cacheName][key] = response;
+            return Promise.resolve();
+          }),
+          keys: jest.fn(() => {
+            console.log('in keys, mockCaches._cacheStores', mockCaches._cacheStores)
+            return Promise.resolve(Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key)));
+          }),
+        });
+      }),
+
+      match: jest.fn((request, cacheName) => {
+        const key = request.url || request;
+        return Promise.resolve(mockCaches._cacheStores[cacheName]?.[key] || null);
+      }),
+
+      delete: jest.fn((request, cacheName) => {
+        const key = request.url || request;
+        if (mockCaches._cacheStores[cacheName] && mockCaches._cacheStores[cacheName][key]) {
+          delete mockCaches._cacheStores[cacheName][key];
+          return Promise.resolve(true);
+        }
+        return Promise.resolve(false);
+      }),
+
+      keys: jest.fn(() => {
+        // Collect all cache keys across all cache stores
+        console.log('in mockCaches._cacheStores', mockCaches._cacheStores)
+        return Promise.resolve(Object.keys(mockCaches._cacheStores).flatMap(cacheName =>
+          Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key))
+        ));
+      }),
+    };
+
+    global.caches = mockCaches;
+
+    // Mock Request
+    global.Request = class {
+      constructor(url, options = {}) {
+        this.url = url;
+        this.method = options.method || 'GET';
+        this.headers = new Headers(options.headers);
+      }
+    };
+
+    // Mock Response
+    global.Response = class {
+      constructor(body, options = {}) {
+        this.status = options.status || 200;
+        this.ok = this.status >= 200 && this.status < 300;
+        this.headers = new Headers(options.headers);
+        this.bodyUsed = false;
+
+        // Ensure the body is a string or Buffer
+        const readableBody = typeof body === 'string' ? body : body.toString();
+
+        // Create a ReadableStream
+        const stream = new Readable({
+          read() {
+            this.push(readableBody); // Push the string or buffer into the stream
+            this.push(null); // Signal the end of the stream
+          },
+        });
+
+        this.body = stream; // Assign the readable stream to body
+      }
+
+      // Define text() method to read from the stream
+      async text() {
+        if (this.bodyUsed) {
+          return Promise.reject(new TypeError('Already read'));
+        }
+        this.bodyUsed = true;
+        return new Promise((resolve, reject) => {
+          const chunks = [];
+          this.body.on('data', (chunk) => {
+            chunks.push(chunk);
+          });
+          this.body.on('end', () => {
+            resolve(Buffer.concat(chunks).toString());
+          });
+          this.body.on('error', (err) => {
+            reject(err);
+          });
+        });
+      }
+
+      // You can also implement json() if needed
+      async json() {
+        const text = await this.text();
+        return JSON.parse(text);
+      }
+    };
+
+
+    global.Headers = class {
+      constructor(headers) {
+        this.map = new Map();
+        if (headers) {
+          for (const [key, value] of Object.entries(headers)) {
+            this.append(key, value);
+          }
+        }
+      }
+
+      append(key, value) {
+        const existingValue = this.map.get(key.toLowerCase());
+        if (existingValue) {
+          this.map.set(key.toLowerCase(), existingValue + ', ' + value);
+        } else {
+          this.map.set(key.toLowerCase(), value);
+        }
+      }
+
+      get(key) {
+        return this.map.get(key.toLowerCase()) || null;
+      }
+
+      has(key) {
+        return this.map.has(key.toLowerCase());
+      }
+
+      set(key, value) {
+        this.map.set(key.toLowerCase(), value);
+      }
+
+      delete(key) {
+        this.map.delete(key.toLowerCase());
+      }
+
+      // Additional methods if needed
+    }
+
+
+  })
+
+  it('Parses AnnData headers', async () => {
+    const ontologies = await fetchOntologies()
+    console.log('ontologies')
+    expect(1).toEqual(1)
+  })
+})

From 5af3fa97fcea07546fd6537f8dfeb04df123e498 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Wed, 18 Sep 2024 17:16:08 -0400
Subject: [PATCH 27/52] Modularize mock Cache interface for Web API

---
 test/js/lib/mock-cache.js | 75 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 test/js/lib/mock-cache.js

diff --git a/test/js/lib/mock-cache.js b/test/js/lib/mock-cache.js
new file mode 100644
index 000000000..73142c024
--- /dev/null
+++ b/test/js/lib/mock-cache.js
@@ -0,0 +1,75 @@
+/** @fileoverview Mock for the Cache interface of the Web API
+ *
+ * https://developer.mozilla.org/en-US/docs/Web/API/Cache
+*/
+
+export const mockCaches = {
+  _cacheStores: {},
+
+  open: jest.fn((cacheName) => {
+    console.log('cacheName', cacheName)
+    // Initialize the store for the cache name if it doesn't exist
+    if (!mockCaches._cacheStores[cacheName]) {
+      mockCaches._cacheStores[cacheName] = {};
+    }
+
+    console.log('mockCaches._cacheStores[cacheName]', mockCaches._cacheStores[cacheName])
+
+    return Promise.resolve({
+      add: jest.fn((request) => {
+        const response = new Response(`Response for ${request.url}`); // Mocking a response
+        return mockCaches.put(request, response, cacheName);
+      }),
+      addAll: jest.fn((requests) => {
+        const responses = requests.map(req => new Response(`Response for ${req.url}`));
+        return Promise.all(responses.map((response, index) => {
+          return mockCaches.put(requests[index], response, cacheName);
+        }));
+      }),
+      delete: jest.fn((request) => {
+        const key = request.url || request;
+        if (mockCaches._cacheStores[cacheName][key]) {
+          delete mockCaches._cacheStores[cacheName][key];
+          return Promise.resolve(true);
+        }
+        return Promise.resolve(false);
+      }),
+      match: jest.fn((request) => {
+        const key = request.url || request;
+        return Promise.resolve(mockCaches._cacheStores[cacheName][key] || undefined);
+      }),
+      put: jest.fn((request, response) => {
+        console.log('in put, cacheName, request', cacheName, request)
+        const key = request.url || request;
+        mockCaches._cacheStores[cacheName][key] = response;
+        return Promise.resolve();
+      }),
+      keys: jest.fn(() => {
+        console.log('in keys, mockCaches._cacheStores', mockCaches._cacheStores)
+        return Promise.resolve(Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key)));
+      }),
+    });
+  }),
+
+  match: jest.fn((request, cacheName) => {
+    const key = request.url || request;
+    return Promise.resolve(mockCaches._cacheStores[cacheName]?.[key] || null);
+  }),
+
+  delete: jest.fn((request, cacheName) => {
+    const key = request.url || request;
+    if (mockCaches._cacheStores[cacheName] && mockCaches._cacheStores[cacheName][key]) {
+      delete mockCaches._cacheStores[cacheName][key];
+      return Promise.resolve(true);
+    }
+    return Promise.resolve(false);
+  }),
+
+  keys: jest.fn(() => {
+    // Collect all cache keys across all cache stores
+    console.log('in mockCaches._cacheStores', mockCaches._cacheStores)
+    return Promise.resolve(Object.keys(mockCaches._cacheStores).flatMap(cacheName =>
+      Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key))
+    ));
+  }),
+};

From 9ab443683be3d968e95d3102126b6431e0884220 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Wed, 18 Sep 2024 17:16:42 -0400
Subject: [PATCH 28/52] Rename to mock Web API

---
 test/js/lib/{mock-cache.js => mock-web-api.js} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename test/js/lib/{mock-cache.js => mock-web-api.js} (100%)

diff --git a/test/js/lib/mock-cache.js b/test/js/lib/mock-web-api.js
similarity index 100%
rename from test/js/lib/mock-cache.js
rename to test/js/lib/mock-web-api.js

From fd04109e650c8f420615dc686724f5b12d9c8cba Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Wed, 18 Sep 2024 17:23:19 -0400
Subject: [PATCH 29/52] Modularize node web APIs used for mocking

---
 test/js/lib/mock-web-api.js             |  75 ----------
 test/js/lib/node-web-api.js             | 171 ++++++++++++++++++++++
 test/js/lib/ontology-validation.test.js | 179 +-----------------------
 3 files changed, 178 insertions(+), 247 deletions(-)
 delete mode 100644 test/js/lib/mock-web-api.js
 create mode 100644 test/js/lib/node-web-api.js

diff --git a/test/js/lib/mock-web-api.js b/test/js/lib/mock-web-api.js
deleted file mode 100644
index 73142c024..000000000
--- a/test/js/lib/mock-web-api.js
+++ /dev/null
@@ -1,75 +0,0 @@
-/** @fileoverview Mock for the Cache interface of the Web API
- *
- * https://developer.mozilla.org/en-US/docs/Web/API/Cache
-*/
-
-export const mockCaches = {
-  _cacheStores: {},
-
-  open: jest.fn((cacheName) => {
-    console.log('cacheName', cacheName)
-    // Initialize the store for the cache name if it doesn't exist
-    if (!mockCaches._cacheStores[cacheName]) {
-      mockCaches._cacheStores[cacheName] = {};
-    }
-
-    console.log('mockCaches._cacheStores[cacheName]', mockCaches._cacheStores[cacheName])
-
-    return Promise.resolve({
-      add: jest.fn((request) => {
-        const response = new Response(`Response for ${request.url}`); // Mocking a response
-        return mockCaches.put(request, response, cacheName);
-      }),
-      addAll: jest.fn((requests) => {
-        const responses = requests.map(req => new Response(`Response for ${req.url}`));
-        return Promise.all(responses.map((response, index) => {
-          return mockCaches.put(requests[index], response, cacheName);
-        }));
-      }),
-      delete: jest.fn((request) => {
-        const key = request.url || request;
-        if (mockCaches._cacheStores[cacheName][key]) {
-          delete mockCaches._cacheStores[cacheName][key];
-          return Promise.resolve(true);
-        }
-        return Promise.resolve(false);
-      }),
-      match: jest.fn((request) => {
-        const key = request.url || request;
-        return Promise.resolve(mockCaches._cacheStores[cacheName][key] || undefined);
-      }),
-      put: jest.fn((request, response) => {
-        console.log('in put, cacheName, request', cacheName, request)
-        const key = request.url || request;
-        mockCaches._cacheStores[cacheName][key] = response;
-        return Promise.resolve();
-      }),
-      keys: jest.fn(() => {
-        console.log('in keys, mockCaches._cacheStores', mockCaches._cacheStores)
-        return Promise.resolve(Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key)));
-      }),
-    });
-  }),
-
-  match: jest.fn((request, cacheName) => {
-    const key = request.url || request;
-    return Promise.resolve(mockCaches._cacheStores[cacheName]?.[key] || null);
-  }),
-
-  delete: jest.fn((request, cacheName) => {
-    const key = request.url || request;
-    if (mockCaches._cacheStores[cacheName] && mockCaches._cacheStores[cacheName][key]) {
-      delete mockCaches._cacheStores[cacheName][key];
-      return Promise.resolve(true);
-    }
-    return Promise.resolve(false);
-  }),
-
-  keys: jest.fn(() => {
-    // Collect all cache keys across all cache stores
-    console.log('in mockCaches._cacheStores', mockCaches._cacheStores)
-    return Promise.resolve(Object.keys(mockCaches._cacheStores).flatMap(cacheName =>
-      Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key))
-    ));
-  }),
-};
diff --git a/test/js/lib/node-web-api.js b/test/js/lib/node-web-api.js
new file mode 100644
index 000000000..4ef5783f2
--- /dev/null
+++ b/test/js/lib/node-web-api.js
@@ -0,0 +1,171 @@
+/** @fileoverview Node port for interfaces of Web API; helps mocking
+ *
+ * https://developer.mozilla.org/en-US/docs/Web/API/Cache
+*/
+const { Readable } = require('stream');
+
+export const nodeCaches = {
+  _cacheStores: {},
+
+  open: jest.fn((cacheName) => {
+    console.log('cacheName', cacheName)
+    // Initialize the store for the cache name if it doesn't exist
+    if (!nodeCaches._cacheStores[cacheName]) {
+      nodeCaches._cacheStores[cacheName] = {};
+    }
+
+    console.log('nodeCaches._cacheStores[cacheName]', nodeCaches._cacheStores[cacheName])
+
+    return Promise.resolve({
+      add: jest.fn((request) => {
+        const response = new Response(`Response for ${request.url}`);
+        return nodeCaches.put(request, response, cacheName);
+      }),
+      addAll: jest.fn((requests) => {
+        const responses = requests.map(req => new Response(`Response for ${req.url}`));
+        return Promise.all(responses.map((response, index) => {
+          return nodeCaches.put(requests[index], response, cacheName);
+        }));
+      }),
+      delete: jest.fn((request) => {
+        const key = request.url || request;
+        if (nodeCaches._cacheStores[cacheName][key]) {
+          delete nodeCaches._cacheStores[cacheName][key];
+          return Promise.resolve(true);
+        }
+        return Promise.resolve(false);
+      }),
+      match: jest.fn((request) => {
+        const key = request.url || request;
+        return Promise.resolve(nodeCaches._cacheStores[cacheName][key] || undefined);
+      }),
+      put: jest.fn((request, response) => {
+        console.log('in put, cacheName, request', cacheName, request)
+        const key = request.url || request;
+        nodeCaches._cacheStores[cacheName][key] = response;
+        return Promise.resolve();
+      }),
+      keys: jest.fn(() => {
+        console.log('in keys, nodeCaches._cacheStores', nodeCaches._cacheStores)
+        return Promise.resolve(Object.keys(nodeCaches._cacheStores[cacheName]).map(key => new Request(key)));
+      }),
+    });
+  }),
+
+  match: jest.fn((request, cacheName) => {
+    const key = request.url || request;
+    return Promise.resolve(nodeCaches._cacheStores[cacheName]?.[key] || null);
+  }),
+
+  delete: jest.fn((request, cacheName) => {
+    const key = request.url || request;
+    if (nodeCaches._cacheStores[cacheName] && nodeCaches._cacheStores[cacheName][key]) {
+      delete nodeCaches._cacheStores[cacheName][key];
+      return Promise.resolve(true);
+    }
+    return Promise.resolve(false);
+  }),
+
+  keys: jest.fn(() => {
+    // Collect all cache keys across all cache stores
+    console.log('in nodeCaches._cacheStores', nodeCaches._cacheStores)
+    return Promise.resolve(Object.keys(nodeCaches._cacheStores).flatMap(cacheName =>
+      Object.keys(nodeCaches._cacheStores[cacheName]).map(key => new Request(key))
+    ));
+  }),
+};
+
+export const nodeRequest = class {
+  constructor(url, options = {}) {
+    this.url = url;
+    this.method = options.method || 'GET';
+    this.headers = new Headers(options.headers);
+  }
+};
+
+export const nodeResponse = class {
+  constructor(body, options = {}) {
+    this.status = options.status || 200;
+    this.ok = this.status >= 200 && this.status < 300;
+    this.headers = new Headers(options.headers);
+    this.bodyUsed = false;
+
+    // Ensure the body is a string or Buffer
+    const readableBody = typeof body === 'string' ? body : body.toString();
+
+    // Create a ReadableStream
+    const stream = new Readable({
+      read() {
+        this.push(readableBody); // Push the string or buffer into the stream
+        this.push(null); // Signal the end of the stream
+      },
+    });
+
+    this.body = stream; // Assign the readable stream to body
+  }
+
+  // Define text() method to read from the stream
+  async text() {
+    if (this.bodyUsed) {
+      return Promise.reject(new TypeError('Already read'));
+    }
+    this.bodyUsed = true;
+    return new Promise((resolve, reject) => {
+      const chunks = [];
+      this.body.on('data', (chunk) => {
+        chunks.push(chunk);
+      });
+      this.body.on('end', () => {
+        resolve(Buffer.concat(chunks).toString());
+      });
+      this.body.on('error', (err) => {
+        reject(err);
+      });
+    });
+  }
+
+  // You can also implement json() if needed
+  async json() {
+    const text = await this.text();
+    return JSON.parse(text);
+  }
+};
+
+
+export const nodeHeaders = class {
+  constructor(headers) {
+    this.map = new Map();
+    if (headers) {
+      for (const [key, value] of Object.entries(headers)) {
+        this.append(key, value);
+      }
+    }
+  }
+
+  append(key, value) {
+    const existingValue = this.map.get(key.toLowerCase());
+    if (existingValue) {
+      this.map.set(key.toLowerCase(), existingValue + ', ' + value);
+    } else {
+      this.map.set(key.toLowerCase(), value);
+    }
+  }
+
+  get(key) {
+    return this.map.get(key.toLowerCase()) || null;
+  }
+
+  has(key) {
+    return this.map.has(key.toLowerCase());
+  }
+
+  set(key, value) {
+    this.map.set(key.toLowerCase(), value);
+  }
+
+  delete(key) {
+    this.map.delete(key.toLowerCase());
+  }
+
+  // Additional methods if needed
+}
diff --git a/test/js/lib/ontology-validation.test.js b/test/js/lib/ontology-validation.test.js
index 7ad489b72..8362d9803 100644
--- a/test/js/lib/ontology-validation.test.js
+++ b/test/js/lib/ontology-validation.test.js
@@ -1,186 +1,21 @@
 const fetch = require('node-fetch')
-const { Readable } = require('stream');
 
 import {
   fetchOntologies
 } from 'lib/validation/ontology-validation'
 
+import {
+  nodeCaches, nodeHeaders, nodeRequest, nodeResponse
+} from './node-web-api'
 
 describe('Client-side file validation for AnnData', () => {
   beforeAll(() => {
     global.fetch = fetch
 
-    const mockCaches = {
-      _cacheStores: {},
-
-      open: jest.fn((cacheName) => {
-        console.log('cacheName', cacheName)
-        // Initialize the store for the cache name if it doesn't exist
-        if (!mockCaches._cacheStores[cacheName]) {
-          mockCaches._cacheStores[cacheName] = {};
-        }
-
-        console.log('mockCaches._cacheStores[cacheName]', mockCaches._cacheStores[cacheName])
-
-        return Promise.resolve({
-          add: jest.fn((request) => {
-            const response = new Response(`Response for ${request.url}`); // Mocking a response
-            return mockCaches.put(request, response, cacheName);
-          }),
-          addAll: jest.fn((requests) => {
-            const responses = requests.map(req => new Response(`Response for ${req.url}`));
-            return Promise.all(responses.map((response, index) => {
-              return mockCaches.put(requests[index], response, cacheName);
-            }));
-          }),
-          delete: jest.fn((request) => {
-            const key = request.url || request;
-            if (mockCaches._cacheStores[cacheName][key]) {
-              delete mockCaches._cacheStores[cacheName][key];
-              return Promise.resolve(true);
-            }
-            return Promise.resolve(false);
-          }),
-          match: jest.fn((request) => {
-            const key = request.url || request;
-            return Promise.resolve(mockCaches._cacheStores[cacheName][key] || undefined);
-          }),
-          put: jest.fn((request, response) => {
-            console.log('in put, cacheName, request', cacheName, request)
-            const key = request.url || request;
-            mockCaches._cacheStores[cacheName][key] = response;
-            return Promise.resolve();
-          }),
-          keys: jest.fn(() => {
-            console.log('in keys, mockCaches._cacheStores', mockCaches._cacheStores)
-            return Promise.resolve(Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key)));
-          }),
-        });
-      }),
-
-      match: jest.fn((request, cacheName) => {
-        const key = request.url || request;
-        return Promise.resolve(mockCaches._cacheStores[cacheName]?.[key] || null);
-      }),
-
-      delete: jest.fn((request, cacheName) => {
-        const key = request.url || request;
-        if (mockCaches._cacheStores[cacheName] && mockCaches._cacheStores[cacheName][key]) {
-          delete mockCaches._cacheStores[cacheName][key];
-          return Promise.resolve(true);
-        }
-        return Promise.resolve(false);
-      }),
-
-      keys: jest.fn(() => {
-        // Collect all cache keys across all cache stores
-        console.log('in mockCaches._cacheStores', mockCaches._cacheStores)
-        return Promise.resolve(Object.keys(mockCaches._cacheStores).flatMap(cacheName =>
-          Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key))
-        ));
-      }),
-    };
-
-    global.caches = mockCaches;
-
-    // Mock Request
-    global.Request = class {
-      constructor(url, options = {}) {
-        this.url = url;
-        this.method = options.method || 'GET';
-        this.headers = new Headers(options.headers);
-      }
-    };
-
-    // Mock Response
-    global.Response = class {
-      constructor(body, options = {}) {
-        this.status = options.status || 200;
-        this.ok = this.status >= 200 && this.status < 300;
-        this.headers = new Headers(options.headers);
-        this.bodyUsed = false;
-
-        // Ensure the body is a string or Buffer
-        const readableBody = typeof body === 'string' ? body : body.toString();
-
-        // Create a ReadableStream
-        const stream = new Readable({
-          read() {
-            this.push(readableBody); // Push the string or buffer into the stream
-            this.push(null); // Signal the end of the stream
-          },
-        });
-
-        this.body = stream; // Assign the readable stream to body
-      }
-
-      // Define text() method to read from the stream
-      async text() {
-        if (this.bodyUsed) {
-          return Promise.reject(new TypeError('Already read'));
-        }
-        this.bodyUsed = true;
-        return new Promise((resolve, reject) => {
-          const chunks = [];
-          this.body.on('data', (chunk) => {
-            chunks.push(chunk);
-          });
-          this.body.on('end', () => {
-            resolve(Buffer.concat(chunks).toString());
-          });
-          this.body.on('error', (err) => {
-            reject(err);
-          });
-        });
-      }
-
-      // You can also implement json() if needed
-      async json() {
-        const text = await this.text();
-        return JSON.parse(text);
-      }
-    };
-
-
-    global.Headers = class {
-      constructor(headers) {
-        this.map = new Map();
-        if (headers) {
-          for (const [key, value] of Object.entries(headers)) {
-            this.append(key, value);
-          }
-        }
-      }
-
-      append(key, value) {
-        const existingValue = this.map.get(key.toLowerCase());
-        if (existingValue) {
-          this.map.set(key.toLowerCase(), existingValue + ', ' + value);
-        } else {
-          this.map.set(key.toLowerCase(), value);
-        }
-      }
-
-      get(key) {
-        return this.map.get(key.toLowerCase()) || null;
-      }
-
-      has(key) {
-        return this.map.has(key.toLowerCase());
-      }
-
-      set(key, value) {
-        this.map.set(key.toLowerCase(), value);
-      }
-
-      delete(key) {
-        this.map.delete(key.toLowerCase());
-      }
-
-      // Additional methods if needed
-    }
-
-
+    global.caches = nodeCaches;
+    global.Response = nodeResponse
+    global.Request = nodeRequest
+    global.Headers = nodeHeaders
   })
 
   it('Parses AnnData headers', async () => {

From f60fa7be39b0e1a0a142d58977b7e813c827a170 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Wed, 18 Sep 2024 17:37:51 -0400
Subject: [PATCH 30/52] Lint, fix caches.keys()

---
 .../lib/validation/ontology-validation.js     |   7 -
 test/js/lib/node-web-api.js                   | 147 +++++++++---------
 test/js/lib/ontology-validation.test.js       |   2 +-
 3 files changed, 71 insertions(+), 85 deletions(-)

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
index baaba3122..5de2cfa6d 100644
--- a/app/javascript/lib/validation/ontology-validation.js
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -52,12 +52,10 @@ export async function cacheFetch(url) {
 
   const decompressedUrl = url.replace('.gz', '')
   const response = await cache.match(decompressedUrl)
-  console.log('in cacheFetch, typeof response', typeof response)
   if (typeof response === 'undefined') {
     // If cache miss, then fetch, decompress, and put response in cache
     const data = await fetchGzipped(url)
     const contentLength = data.length
-    // console.log('data', data)
     const decompressedResponse = new Response(
       new Blob([data], { type: 'text/tab-separated-values' }),
       { headers: new Headers({ 'Content-Length': contentLength }) }
@@ -100,14 +98,9 @@ export async function fetchOntologies() {
   for (let i = 0; i < ontologyNames.length; i++) {
     const ontologyName = ontologyNames[i]
     const ontologyUrl = `${ONTOLOGY_BASE_URL + ontologyName}.min.tsv.gz`
-    console.log('ontologyUrl', ontologyUrl)
     const response = await cacheFetch(ontologyUrl)
 
-    console.log('response', response)
-
     const tsv = await response.text()
-
-    // console.log('tsv', tsv)
     const lines = tsv.split('\n')
 
     ontologies[ontologyName] = {}
diff --git a/test/js/lib/node-web-api.js b/test/js/lib/node-web-api.js
index 4ef5783f2..9ccb62aa8 100644
--- a/test/js/lib/node-web-api.js
+++ b/test/js/lib/node-web-api.js
@@ -1,170 +1,163 @@
+/* eslint-disable require-jsdoc */
 /** @fileoverview Node port for interfaces of Web API; helps mocking
  *
  * https://developer.mozilla.org/en-US/docs/Web/API/Cache
 */
-const { Readable } = require('stream');
+const { Readable } = require('stream')
 
 export const nodeCaches = {
   _cacheStores: {},
 
-  open: jest.fn((cacheName) => {
-    console.log('cacheName', cacheName)
+  open: jest.fn(cacheName => {
     // Initialize the store for the cache name if it doesn't exist
     if (!nodeCaches._cacheStores[cacheName]) {
-      nodeCaches._cacheStores[cacheName] = {};
+      nodeCaches._cacheStores[cacheName] = {}
     }
 
-    console.log('nodeCaches._cacheStores[cacheName]', nodeCaches._cacheStores[cacheName])
-
     return Promise.resolve({
-      add: jest.fn((request) => {
-        const response = new Response(`Response for ${request.url}`);
-        return nodeCaches.put(request, response, cacheName);
+      add: jest.fn(request => {
+        const response = new Response(`Response for ${request.url}`)
+        return nodeCaches.put(request, response, cacheName)
       }),
-      addAll: jest.fn((requests) => {
-        const responses = requests.map(req => new Response(`Response for ${req.url}`));
+      addAll: jest.fn(requests => {
+        const responses = requests.map(req => new Response(`Response for ${req.url}`))
         return Promise.all(responses.map((response, index) => {
-          return nodeCaches.put(requests[index], response, cacheName);
-        }));
+          return nodeCaches.put(requests[index], response, cacheName)
+        }))
       }),
-      delete: jest.fn((request) => {
-        const key = request.url || request;
+      delete: jest.fn(request => {
+        const key = request.url || request
         if (nodeCaches._cacheStores[cacheName][key]) {
-          delete nodeCaches._cacheStores[cacheName][key];
-          return Promise.resolve(true);
+          delete nodeCaches._cacheStores[cacheName][key]
+          return Promise.resolve(true)
         }
-        return Promise.resolve(false);
+        return Promise.resolve(false)
       }),
-      match: jest.fn((request) => {
-        const key = request.url || request;
-        return Promise.resolve(nodeCaches._cacheStores[cacheName][key] || undefined);
+      match: jest.fn(request => {
+        const key = request.url || request
+        return Promise.resolve(nodeCaches._cacheStores[cacheName][key] || undefined)
       }),
       put: jest.fn((request, response) => {
-        console.log('in put, cacheName, request', cacheName, request)
-        const key = request.url || request;
-        nodeCaches._cacheStores[cacheName][key] = response;
-        return Promise.resolve();
+        const key = request.url || request
+        nodeCaches._cacheStores[cacheName][key] = response
+        return Promise.resolve()
       }),
       keys: jest.fn(() => {
-        console.log('in keys, nodeCaches._cacheStores', nodeCaches._cacheStores)
-        return Promise.resolve(Object.keys(nodeCaches._cacheStores[cacheName]).map(key => new Request(key)));
-      }),
-    });
+        return Promise.resolve(Object.keys(nodeCaches._cacheStores[cacheName]).map(key => new Request(key)))
+      })
+    })
   }),
 
   match: jest.fn((request, cacheName) => {
-    const key = request.url || request;
-    return Promise.resolve(nodeCaches._cacheStores[cacheName]?.[key] || null);
+    const key = request.url || request
+    return Promise.resolve(nodeCaches._cacheStores[cacheName]?.[key] || null)
   }),
 
   delete: jest.fn((request, cacheName) => {
-    const key = request.url || request;
+    const key = request.url || request
     if (nodeCaches._cacheStores[cacheName] && nodeCaches._cacheStores[cacheName][key]) {
-      delete nodeCaches._cacheStores[cacheName][key];
-      return Promise.resolve(true);
+      delete nodeCaches._cacheStores[cacheName][key]
+      return Promise.resolve(true)
     }
-    return Promise.resolve(false);
+    return Promise.resolve(false)
   }),
 
   keys: jest.fn(() => {
     // Collect all cache keys across all cache stores
-    console.log('in nodeCaches._cacheStores', nodeCaches._cacheStores)
-    return Promise.resolve(Object.keys(nodeCaches._cacheStores).flatMap(cacheName =>
-      Object.keys(nodeCaches._cacheStores[cacheName]).map(key => new Request(key))
-    ));
-  }),
-};
+    return Object.keys(nodeCaches._cacheStores)
+  })
+}
 
 export const nodeRequest = class {
   constructor(url, options = {}) {
-    this.url = url;
-    this.method = options.method || 'GET';
-    this.headers = new Headers(options.headers);
+    this.url = url
+    this.method = options.method || 'GET'
+    this.headers = new Headers(options.headers)
   }
-};
+}
 
 export const nodeResponse = class {
   constructor(body, options = {}) {
-    this.status = options.status || 200;
-    this.ok = this.status >= 200 && this.status < 300;
-    this.headers = new Headers(options.headers);
-    this.bodyUsed = false;
+    this.status = options.status || 200
+    this.ok = this.status >= 200 && this.status < 300
+    this.headers = new Headers(options.headers)
+    this.bodyUsed = false
 
     // Ensure the body is a string or Buffer
-    const readableBody = typeof body === 'string' ? body : body.toString();
+    const readableBody = typeof body === 'string' ? body : body.toString()
 
     // Create a ReadableStream
     const stream = new Readable({
       read() {
-        this.push(readableBody); // Push the string or buffer into the stream
-        this.push(null); // Signal the end of the stream
+        this.push(readableBody) // Push the string or buffer into the stream
+        this.push(null) // Signal the end of the stream
       },
-    });
+    })
 
-    this.body = stream; // Assign the readable stream to body
+    this.body = stream // Assign the readable stream to body
   }
 
   // Define text() method to read from the stream
   async text() {
     if (this.bodyUsed) {
-      return Promise.reject(new TypeError('Already read'));
+      return Promise.reject(new TypeError('Already read'))
     }
-    this.bodyUsed = true;
+    this.bodyUsed = true
     return new Promise((resolve, reject) => {
-      const chunks = [];
-      this.body.on('data', (chunk) => {
-        chunks.push(chunk);
-      });
+      const chunks = []
+      this.body.on('data', chunk => {
+        chunks.push(chunk)
+      })
       this.body.on('end', () => {
-        resolve(Buffer.concat(chunks).toString());
-      });
+        resolve(Buffer.concat(chunks).toString())
+      })
       this.body.on('error', (err) => {
-        reject(err);
-      });
-    });
+        reject(err)
+      })
+    })
   }
 
   // You can also implement json() if needed
   async json() {
-    const text = await this.text();
-    return JSON.parse(text);
+    const text = await this.text()
+    return JSON.parse(text)
   }
-};
+}
 
 
 export const nodeHeaders = class {
   constructor(headers) {
-    this.map = new Map();
+    this.map = new Map()
     if (headers) {
       for (const [key, value] of Object.entries(headers)) {
-        this.append(key, value);
+        this.append(key, value)
       }
     }
   }
 
   append(key, value) {
-    const existingValue = this.map.get(key.toLowerCase());
+    const existingValue = this.map.get(key.toLowerCase())
     if (existingValue) {
-      this.map.set(key.toLowerCase(), existingValue + ', ' + value);
+      this.map.set(key.toLowerCase(), existingValue + ', ' + value)
     } else {
-      this.map.set(key.toLowerCase(), value);
+      this.map.set(key.toLowerCase(), value)
     }
   }
 
   get(key) {
-    return this.map.get(key.toLowerCase()) || null;
+    return this.map.get(key.toLowerCase()) || null
   }
 
   has(key) {
-    return this.map.has(key.toLowerCase());
+    return this.map.has(key.toLowerCase())
   }
 
   set(key, value) {
-    this.map.set(key.toLowerCase(), value);
+    this.map.set(key.toLowerCase(), value)
   }
 
   delete(key) {
-    this.map.delete(key.toLowerCase());
+    this.map.delete(key.toLowerCase())
   }
 
   // Additional methods if needed
diff --git a/test/js/lib/ontology-validation.test.js b/test/js/lib/ontology-validation.test.js
index 8362d9803..f85db0b98 100644
--- a/test/js/lib/ontology-validation.test.js
+++ b/test/js/lib/ontology-validation.test.js
@@ -20,7 +20,7 @@ describe('Client-side file validation for AnnData', () => {
 
   it('Parses AnnData headers', async () => {
     const ontologies = await fetchOntologies()
-    console.log('ontologies')
+    console.log('ontologies', ontologies)
     expect(1).toEqual(1)
   })
 })

From 2d06a9e7949ff430dfa2eec69591b4f319226a89 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Thu, 19 Sep 2024 09:49:43 -0400
Subject: [PATCH 31/52] Fixing test regressions

---
 test/js/upload-wizard/file-upload-control.test.js | 2 +-
 test/models/ingest_job_test.rb                    | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/js/upload-wizard/file-upload-control.test.js b/test/js/upload-wizard/file-upload-control.test.js
index dd1d03a03..829dd760f 100644
--- a/test/js/upload-wizard/file-upload-control.test.js
+++ b/test/js/upload-wizard/file-upload-control.test.js
@@ -382,7 +382,7 @@ describe('file upload control validates the selected file', () => {
     expect(bucketPathInput).toHaveValue('bad.pdf')
     await waitFor(() => {
       expect(screen.getByTestId('validation-error')).toBeInTheDocument()
-      expect(screen.getByText('Allowed extensions are: .tsv')).toBeVisible()
+      expect(screen.getByText('Allowed extensions are .tsv')).toBeVisible()
     })
   })
 })
diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb
index edbcb60b6..4a8d0b4fa 100644
--- a/test/models/ingest_job_test.rb
+++ b/test/models/ingest_job_test.rb
@@ -219,7 +219,7 @@ class IngestJobTest < ActiveSupport::TestCase
                                       study: @basic_study,
                                       upload_file_size: 1.megabyte,
                                       cell_input: %w[A B C D],
-                                      has_raw_counts: false,
+                                      has_raw_counts: true,
                                       reference_file: false,
                                       annotation_input: [
                                         { name: 'disease', type: 'group', values: %w[cancer cancer normal normal] }
@@ -232,9 +232,9 @@ class IngestJobTest < ActiveSupport::TestCase
                                       })
     params_object = AnnDataIngestParameters.new(
       anndata_file: ann_data_file.gs_url, obsm_keys: ann_data_file.ann_data_file_info.obsm_key_names,
-      file_size: ann_data_file.upload_file_size
+      file_size: ann_data_file.upload_file_size, extract_raw_counts: true
     )
-    assert_not params_object.extract.include?('raw_counts')
+    assert params_object.extract.include?('raw_counts')
     job = IngestJob.new(
       study: @basic_study, study_file: ann_data_file, user: @user, action: :ingest_anndata, params_object:
     )
@@ -269,7 +269,7 @@ class IngestJobTest < ActiveSupport::TestCase
         studyAccession: @basic_study.accession,
         jobStatus: 'success',
         referenceAnnDataFile: false,
-        extractedFileTypes: %w[cluster metadata processed_expression],
+        extractedFileTypes: %w[cluster metadata processed_expression raw_counts],
         machineType: 'n2d-highmem-4',
         bootDiskSizeGb: 300,
         exitStatus: 0

From b6376fbf17706f4eb0d3ff4b86a5b1dae57b5b08 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Thu, 19 Sep 2024 10:28:36 -0400
Subject: [PATCH 32/52] Apply suggestions from code review

Co-authored-by: Eric Weitz <eweitz@broadinstitute.org>
---
 app/javascript/components/upload/FileUploadControl.jsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/javascript/components/upload/FileUploadControl.jsx b/app/javascript/components/upload/FileUploadControl.jsx
index cbf4bcadc..a53f67801 100644
--- a/app/javascript/components/upload/FileUploadControl.jsx
+++ b/app/javascript/components/upload/FileUploadControl.jsx
@@ -104,11 +104,11 @@ export default function FileUploadControl({
     }
 
     // don't continue unless a dot is present (otherwise, no valid file extension)
-    if (trimmedPath.indexOf('.') < 0 ) { return false }
+    if (!trimmedPath.includes('.')) { return false }
 
     const fileType = file.file_type
     const fileExtension = `.${trimmedPath.split('.').slice(-1)[0]}`
-    if (!inputAcceptExts.includes(fileExtension)) {
+    if (fileExtension.length > 1 && !inputAcceptExts.includes(fileExtension)) {
       const invalidExt = {
         errors: [
           [

From 56ce531d173992da70cb34eaaabe117db8a83cd7 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Thu, 19 Sep 2024 11:07:00 -0400
Subject: [PATCH 33/52] Handle corner case of nil file size

---
 app/lib/download_quota_service.rb            | 4 ++--
 test/services/download_quota_service_test.rb | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/app/lib/download_quota_service.rb b/app/lib/download_quota_service.rb
index 6877d28af..60f014b95 100644
--- a/app/lib/download_quota_service.rb
+++ b/app/lib/download_quota_service.rb
@@ -19,7 +19,7 @@ def self.grant_user_exemption(user)
   def self.download_exceeds_quota?(user, requested_bytes)
     return false if user.daily_download_quota.nil?
 
-    user_quota = user.daily_download_quota + requested_bytes
+    user_quota = user.daily_download_quota + requested_bytes.to_i
     user_quota > download_quota
   end
 
@@ -28,7 +28,7 @@ def self.increment_user_quota(user, requested_bytes)
     # skip incrementing quota if exemption is set via nil value
     return true if user.daily_download_quota.nil?
 
-    user_quota = user.daily_download_quota + requested_bytes
+    user_quota = user.daily_download_quota + requested_bytes.to_i
     user.update(daily_download_quota: user_quota)
   end
 
diff --git a/test/services/download_quota_service_test.rb b/test/services/download_quota_service_test.rb
index e65f7ec38..020af3acd 100644
--- a/test/services/download_quota_service_test.rb
+++ b/test/services/download_quota_service_test.rb
@@ -57,4 +57,11 @@ class DownloadQuotaServiceTest < ActiveSupport::TestCase
     )
     assert_equal 1.megabyte, DownloadQuotaService.download_quota
   end
+
+  test 'should handle nil file size' do
+    starting_quota = @user.daily_download_quota
+    DownloadQuotaService.increment_user_quota(@user, nil)
+    @user.reload
+    assert_equal starting_quota, @user.daily_download_quota
+  end
 end

From db5791f3fd96d45ec7eed9916376501d0ce7b93f Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Thu, 19 Sep 2024 13:40:43 -0400
Subject: [PATCH 34/52] Make test meaningful, refine cacheFetch to work with
 simpler mock

---
 .../lib/validation/ontology-validation.js        | 16 ++++++++++------
 test/js/lib/node-web-api.js                      |  4 +++-
 test/js/lib/ontology-validation.test.js          |  8 +++++---
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
index 5de2cfa6d..695d70aac 100644
--- a/app/javascript/lib/validation/ontology-validation.js
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -25,7 +25,6 @@ async function getServiceWorkerCache() {
 
   // Delete other versions of ontologies cache; there should be 1 per dodmain
   const cacheNames = await caches.keys()
-  console.log('cacheNames', cacheNames)
   cacheNames.forEach(name => {
     if (name.startsWith('ontologies-') && name !== currentOntologies) {
       caches.delete(name)
@@ -40,9 +39,9 @@ async function getServiceWorkerCache() {
 /** Fetch .gz file, decompress it, return plaintext */
 export async function fetchGzipped(url) {
   const response = await fetch(url)
-  const blob = await response.blob();
-  const uint8Array = new Uint8Array(await blob.arrayBuffer());
-  const plaintext = strFromU8(decompressSync(uint8Array));
+  const blob = await response.blob()
+  const uint8Array = new Uint8Array(await blob.arrayBuffer())
+  const plaintext = strFromU8(decompressSync(uint8Array))
   return plaintext
 }
 
@@ -57,8 +56,13 @@ export async function cacheFetch(url) {
     const data = await fetchGzipped(url)
     const contentLength = data.length
     const decompressedResponse = new Response(
-      new Blob([data], { type: 'text/tab-separated-values' }),
-      { headers: new Headers({ 'Content-Length': contentLength }) }
+      data,
+      {
+        headers: new Headers({
+          'Content-Length': contentLength,
+          'Content-Type': 'text/tab-separated-values'
+        })
+      }
     )
     await cache.put(decompressedUrl, decompressedResponse)
     return await cache.match(decompressedUrl)
diff --git a/test/js/lib/node-web-api.js b/test/js/lib/node-web-api.js
index 9ccb62aa8..8e0370658 100644
--- a/test/js/lib/node-web-api.js
+++ b/test/js/lib/node-web-api.js
@@ -1,6 +1,7 @@
 /* eslint-disable require-jsdoc */
 /** @fileoverview Node port for interfaces of Web API; helps mocking
  *
+ * https://developer.mozilla.org/en-US/docs/Web/API/CacheStorage
  * https://developer.mozilla.org/en-US/docs/Web/API/Cache
 */
 const { Readable } = require('stream')
@@ -8,6 +9,7 @@ const { Readable } = require('stream')
 export const nodeCaches = {
   _cacheStores: {},
 
+  /** Returns a Promise that resolves to the requested Cache object. */
   open: jest.fn(cacheName => {
     // Initialize the store for the cache name if it doesn't exist
     if (!nodeCaches._cacheStores[cacheName]) {
@@ -91,7 +93,7 @@ export const nodeResponse = class {
       read() {
         this.push(readableBody) // Push the string or buffer into the stream
         this.push(null) // Signal the end of the stream
-      },
+      }
     })
 
     this.body = stream // Assign the readable stream to body
diff --git a/test/js/lib/ontology-validation.test.js b/test/js/lib/ontology-validation.test.js
index f85db0b98..f0f06e171 100644
--- a/test/js/lib/ontology-validation.test.js
+++ b/test/js/lib/ontology-validation.test.js
@@ -18,9 +18,11 @@ describe('Client-side file validation for AnnData', () => {
     global.Headers = nodeHeaders
   })
 
-  it('Parses AnnData headers', async () => {
+  it('Parses minified ontologies', async () => {
     const ontologies = await fetchOntologies()
-    console.log('ontologies', ontologies)
-    expect(1).toEqual(1)
+    const expectedOntologyNames = ['mondo', 'pato', 'efo', 'uberon', 'ncbitaxon']
+    expect(Object.keys(ontologies)).toEqual(expectedOntologyNames)
+    const expectedSpeciesNames = ['Homo sapiens', 'human']
+    expect(ontologies.ncbitaxon['NCBITaxon_9606']).toEqual(expectedSpeciesNames)
   })
 })

From 1ecf29608fc53083718c3bf56197d9b7fc2e8ed8 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Thu, 19 Sep 2024 13:53:54 -0400
Subject: [PATCH 35/52] Remove "BETA" qualifier from AnnData upload UI

---
 app/javascript/components/upload/UploadExperienceSplitter.jsx | 2 +-
 app/javascript/components/upload/WizardNavPanel.jsx           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/javascript/components/upload/UploadExperienceSplitter.jsx b/app/javascript/components/upload/UploadExperienceSplitter.jsx
index 363bb2e64..5816ad64f 100644
--- a/app/javascript/components/upload/UploadExperienceSplitter.jsx
+++ b/app/javascript/components/upload/UploadExperienceSplitter.jsx
@@ -46,7 +46,7 @@ function RawUploadExperienceSplitter({
                 onClick={() => {
                   setIsAnnDataExperience(true)
                   setOverrideExperienceMode(true)
-                }}> AnnData <sup>BETA</sup>
+                }}> AnnData
               </a>
               <div className='col'>
                 Upload one AnnData (.h5ad) file
diff --git a/app/javascript/components/upload/WizardNavPanel.jsx b/app/javascript/components/upload/WizardNavPanel.jsx
index 44123b1d2..b7e955852 100644
--- a/app/javascript/components/upload/WizardNavPanel.jsx
+++ b/app/javascript/components/upload/WizardNavPanel.jsx
@@ -222,7 +222,7 @@ function MainStepsDisplay(formState, serverState, currentStep, setCurrentStep, m
           </span>
           <span>
             <a className="action link" role="link">
-            AnnData <sup>BETA</sup> <AnnDataHelpMessage/> <FontAwesomeIcon icon={expansionIcon}/>
+            AnnData <AnnDataHelpMessage/> <FontAwesomeIcon icon={expansionIcon}/>
             </a>
           </span>
         </button>

From 4e14c4329048821b4ff315b2241d77274919cac5 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Thu, 19 Sep 2024 13:54:38 -0400
Subject: [PATCH 36/52] Clear out existing cached filename on validation
 failure

---
 .../components/upload/FileUploadControl.jsx   |  7 ++++
 .../upload-wizard/file-upload-control.test.js | 42 +++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/app/javascript/components/upload/FileUploadControl.jsx b/app/javascript/components/upload/FileUploadControl.jsx
index be806d26c..4c9d3b40d 100644
--- a/app/javascript/components/upload/FileUploadControl.jsx
+++ b/app/javascript/components/upload/FileUploadControl.jsx
@@ -77,6 +77,13 @@ export default function FileUploadControl({
         name: newName,
         notes
       })
+    } else if (issues.errors.length > 0 && file.uploadSelection) {
+      // clear out a previous known good file, if present
+      updateFile(file._id, {
+        uploadSelection: null,
+        upload_file_name: '',
+        name: ''
+      })
     }
   }
 
diff --git a/test/js/upload-wizard/file-upload-control.test.js b/test/js/upload-wizard/file-upload-control.test.js
index 2db1de448..65177d335 100644
--- a/test/js/upload-wizard/file-upload-control.test.js
+++ b/test/js/upload-wizard/file-upload-control.test.js
@@ -356,4 +356,46 @@ describe('file upload control validates the selected file', () => {
     fireEvent.click(bucketToggle)
     expect(container.querySelector('#remote_location-input-123')).toBeVisible();
   })
+
+  it ('clears out previous uploaded file after replace fails to validate', async () => {
+    const file = {
+      _id: '123',
+      name: 'cluster.txt',
+      status: 'new',
+      file_type: 'Cluster',
+      upload_file_name: 'cluster.txt',
+      uploadSelection: {} // needed to mock a file already being selected
+    }
+    const updateFileHolder = { updateFile: () => {} }
+    const updateFileSpy = jest.spyOn(updateFileHolder, 'updateFile')
+    render((
+      <StudyContext.Provider value={{ accession: 'SCP123' }}>
+        <FileUploadControl
+          file={file}
+          allFiles={[file]}
+          updateFile={updateFileHolder.updateFile}
+          allowedFileExts={['.txt']}
+          validationMessages={{}}/>
+      </StudyContext.Provider>
+    ))
+
+    const replaceLink = screen.getByText('Replace')
+    expect(replaceLink).toBeVisible()
+    // test replacing with bad file & clearing out state
+    // this won't  update the button state back to 'Choose file' since updateFile() is mocked above
+    // unsetting uploadSelection and upload_file_name achieves this in actual usage
+    fireFileSelectionEvent(screen.getByTestId('file-input'), {
+      fileName: 'cluster_bad.txt',
+      content: 'foo,X,Y\nTYPE,numeric,numeric\nCell1,1,0\n'
+    },true)
+    await waitForElementToBeRemoved(() => screen.getByTestId('file-validation-spinner'))
+    expect(updateFileSpy).toHaveBeenLastCalledWith('123', {
+      uploadSelection: null,
+      name: '',
+      upload_file_name: ''
+    })
+    expect(screen.getByTestId('validation-error')).toHaveTextContent('after correcting cluster_bad.txt')
+    expect(screen.getByTestId('validation-error'))
+      .toHaveTextContent('First row, first column must be "NAME" (case insensitive). Your value was "foo')
+  })
 })

From 7e42c7ab2bed760b6e8044dd2ce6d6ad39c6850d Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Thu, 19 Sep 2024 15:44:15 -0400
Subject: [PATCH 37/52] Skip ontology label validation for old-spec AnnData; no
 relevant test data available

---
 .../lib/validation/validate-anndata.js        | 72 +++++++++----------
 test/js/lib/validate-anndata.test.js          | 18 ++---
 2 files changed, 40 insertions(+), 50 deletions(-)

diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index 6553505d1..1e8cab04e 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -167,8 +167,6 @@ async function checkOntologyLabels(key, ontologies, groups) {
 
 /** Get ontology ID values for key in AnnData file */
 async function getOntologyIdsAndLabels(requiredName, hdf5File) {
-  let ontologyIds = []
-
   const obs = await hdf5File.get('obs')
   const obsValues = await Promise.all(obs.values)
 
@@ -178,10 +176,12 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) {
   // own `categories` and `codes`.
   // See e.g. https://github.com/scverse/anndata/issues/879
   const internalCategories = obsValues.find(o => o.name.endsWith('__categories'))
-
-  let resolvedCategories = obsValues
   if (internalCategories) {
-    resolvedCategories = await Promise.all(internalCategories.values)
+    console.debug(
+      'Encountered old-spec AnnData, skipping ontology label validation.  ' +
+      'Server-side processing will validate this'
+    )
+    return null
   }
 
   const idKey = requiredName
@@ -190,36 +190,30 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) {
   const idGroup = obsValues.find(o => o.name.endsWith(idKey))
   const labelGroup = obsValues.find(o => o.name.endsWith(labelKey))
 
-  if (idGroup) {
-    if (internalCategories) {
-      ontologyIds = await idGroup.value
-    } else {
-      // AnnData organizes each "obs" annotation (e.g. disease__ontology_label,
-      // sex) into a container with a `categories` frame and a `code` frame.
-      //
-      // - categories: external values, non-redundant array. E.g.:
-      //     ["tuberculosis", "TB", "foo"] or ["female"]
-      //
-      // - codes: internal values, redundant array of integers that specify
-      //     the index (position) of each category value in the array of obs
-      //     (cells)
-      //
-      // This organization greatly decreases filesize, but requires more code
-      // to map paired obs annotations like `disease` (ontology IDs) to
-      // `disease__ontology_label` (ontology names) than needed for e.g. TSVs.
-      const idCategories = await idGroup.values[0]
-      const idCodes = await idGroup.values[1]
-      const ids = await idCategories.value
-      const idIndexes = await idCodes.value
-
-      const labelCategories = await labelGroup.values[0]
-      const labelCodes = await labelGroup.values[1]
-      const labels = await labelCategories.value
-      const labelIndexes = await labelCodes.value
-
-      return [ids, idIndexes, labels, labelIndexes]
-    }
-  }
+  // AnnData organizes each "obs" annotation (e.g. disease__ontology_label,
+  // sex) into a container with a `categories` frame and a `code` frame.
+  //
+  // - categories: external values, non-redundant array. E.g.:
+  //     ["tuberculosis", "TB", "foo"] or ["female"]
+  //
+  // - codes: internal values, redundant array of integers that specify
+  //     the index (position) of each category value in the array of obs
+  //     (cells)
+  //
+  // This organization greatly decreases filesize, but requires more code
+  // to map paired obs annotations like `disease` (ontology IDs) to
+  // `disease__ontology_label` (ontology names) than needed for e.g. TSVs.
+  const idCategories = await idGroup.values[0]
+  const idCodes = await idGroup.values[1]
+  const ids = await idCategories.value
+  const idIndexes = await idCodes.value
+
+  const labelCategories = await labelGroup.values[0]
+  const labelCodes = await labelGroup.values[1]
+  const labels = await labelCategories.value
+  const labelIndexes = await labelCodes.value
+
+  return [ids, idIndexes, labels, labelIndexes]
 }
 
 /** Validate ontology labels for required metadata columns in AnnData file */
@@ -235,9 +229,11 @@ async function validateOntologyLabels(hdf5File) {
     const key = column.split('__ontology_label')[0]
     const groups = await getOntologyIdsAndLabels(key, hdf5File)
 
-    issues = issues.concat(
-      await checkOntologyLabels(key, ontologies, groups)
-    )
+    if (groups) {
+      issues = issues.concat(
+        await checkOntologyLabels(key, ontologies, groups)
+      )
+    }
   }
 
   return issues
diff --git a/test/js/lib/validate-anndata.test.js b/test/js/lib/validate-anndata.test.js
index be43f8809..b89d27c98 100644
--- a/test/js/lib/validate-anndata.test.js
+++ b/test/js/lib/validate-anndata.test.js
@@ -77,18 +77,9 @@ describe('Client-side file validation for AnnData', () => {
     expect(parseResults.issues[0]).toEqual(expectedIssue)
   })
 
-  // it('Reports valid ontology labels as valid', async () => {
-  //   const issues = await checkOntologyLabels(
-  //     // Underscore or colon can delimit shortname and number;
-  //     // disease can use MONDO or PATO IDs.
-  //     'disease', ['MONDO_0000001', 'MONDO:0000001', 'PATO:0000001']
-  //   )
-  //   expect(issues).toHaveLength(0)
-  // })
-
-  // TODO: Uncomment after PR merge
+  // // TODO: Uncomment after PR merge
   // it('Parses AnnData rows and reports invalid ontology labels', async () => {
-  //   const url = `${BASE_URL}/invalid_disease_id.h5ad`
+  //   const url = `${BASE_URL}/invalid_disease_label.h5ad`
   //   const parseResults = await parseAnnDataFile(url)
 
   //   expect(parseResults.issues).toHaveLength(1)
@@ -96,7 +87,10 @@ describe('Client-side file validation for AnnData', () => {
   //   const expectedIssue = [
   //     'error',
   //     'ontology:label-lookup-error',
-  //     'Ontology ID "FOO_0000042" is not among accepted ontologies (MONDO, PATO) for key "disease"'
+  //     'Invalid disease label "foo".  Valid labels for MONDO_0018076: tuberculosis, tuberculosis disease, active tuberculosis, Kochs disease, TB',
+  //     {
+  //       'subtype': 'ontology:invalid-label'
+  //     }
   //   ]
   //   expect(parseResults.issues[0]).toEqual(expectedIssue)
   // })

From ba8d809c0dbd5851cddbcc72e4c09866f5b4abbd Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Thu, 19 Sep 2024 16:41:41 -0400
Subject: [PATCH 38/52] Convert from O(n^2) to O(n) to avoid OOM

---
 .../lib/validation/validate-anndata.js        | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index 1e8cab04e..ff86a1423 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -128,15 +128,19 @@ async function checkOntologyLabels(key, ontologies, groups) {
 
   const issues = []
 
-  const labelIdPairs = []
+  console.log('in checkOntologyLabels')
+  const labelIdPairs = new Set()
   for (let i = 0; i < idIndexes.length; i++) {
     const id = ids[idIndexes[i]]
-    for (let j = 0; j < labelIndexes.length; j++) {
-      const label = labels[labelIndexes[j]]
-      labelIdPairs.push(`${id} || ${label}`)
+    const label = labels[labelIndexes[i]]
+    if (label === undefined) {
+      console.log("label === 'undefined', i, labelIndexes[i]", i, labelIndexes[i])
     }
+    labelIdPairs.add(`${id} || ${label}`)
   }
-  const rawUniques = Array.from(new Set(labelIdPairs))
+  console.log('labelIdPairs', labelIdPairs)
+  const rawUniques = Array.from(labelIdPairs)
+  console.log('rawUniques', rawUniques)
 
   rawUniques.map(r => {
     const [id, label] = r.split(' || ')
@@ -162,6 +166,8 @@ async function checkOntologyLabels(key, ontologies, groups) {
     }
   })
 
+  console.log('issues', issues)
+
   return issues
 }
 
@@ -203,15 +209,23 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) {
   // This organization greatly decreases filesize, but requires more code
   // to map paired obs annotations like `disease` (ontology IDs) to
   // `disease__ontology_label` (ontology names) than needed for e.g. TSVs.
+  console.log('idGroup', idGroup)
+  console.log('labelGroup', labelGroup)
   const idCategories = await idGroup.values[0]
+  console.log('idCategories', idCategories)
   const idCodes = await idGroup.values[1]
+  console.log('idGroup', idCodes)
   const ids = await idCategories.value
+  console.log('ids', ids)
   const idIndexes = await idCodes.value
+  console.log('idIndexes', idIndexes)
 
   const labelCategories = await labelGroup.values[0]
   const labelCodes = await labelGroup.values[1]
   const labels = await labelCategories.value
+  console.log('labels', labels)
   const labelIndexes = await labelCodes.value
+  console.log('labelIndexes', labelIndexes)
 
   return [ids, idIndexes, labels, labelIndexes]
 }

From 3480860e4f60e01b24f54a29f2a665573555974c Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Fri, 20 Sep 2024 10:00:19 -0400
Subject: [PATCH 39/52] Skip ontology label validation for remote AnnData

---
 .../lib/validation/validate-anndata.js        | 24 +++++++------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index ff86a1423..03ec4eb4b 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -123,24 +123,17 @@ export function checkOntologyIdFormat(key, ontologyIds) {
 
 /** Validate author's annotation labels match those in ontologies */
 async function checkOntologyLabels(key, ontologies, groups) {
-
   const [ids, idIndexes, labels, labelIndexes] = groups
 
   const issues = []
 
-  console.log('in checkOntologyLabels')
   const labelIdPairs = new Set()
   for (let i = 0; i < idIndexes.length; i++) {
     const id = ids[idIndexes[i]]
     const label = labels[labelIndexes[i]]
-    if (label === undefined) {
-      console.log("label === 'undefined', i, labelIndexes[i]", i, labelIndexes[i])
-    }
     labelIdPairs.add(`${id} || ${label}`)
   }
-  console.log('labelIdPairs', labelIdPairs)
   const rawUniques = Array.from(labelIdPairs)
-  console.log('rawUniques', rawUniques)
 
   rawUniques.map(r => {
     const [id, label] = r.split(' || ')
@@ -209,23 +202,15 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) {
   // This organization greatly decreases filesize, but requires more code
   // to map paired obs annotations like `disease` (ontology IDs) to
   // `disease__ontology_label` (ontology names) than needed for e.g. TSVs.
-  console.log('idGroup', idGroup)
-  console.log('labelGroup', labelGroup)
   const idCategories = await idGroup.values[0]
-  console.log('idCategories', idCategories)
   const idCodes = await idGroup.values[1]
-  console.log('idGroup', idCodes)
   const ids = await idCategories.value
-  console.log('ids', ids)
   const idIndexes = await idCodes.value
-  console.log('idIndexes', idIndexes)
 
   const labelCategories = await labelGroup.values[0]
   const labelCodes = await labelGroup.values[1]
   const labels = await labelCategories.value
-  console.log('labels', labels)
   const labelIndexes = await labelCodes.value
-  console.log('labelIndexes', labelIndexes)
 
   return [ids, idIndexes, labels, labelIndexes]
 }
@@ -291,7 +276,14 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) {
   let ontologyLabelIssues = []
   if (requiredMetadataIssues.length === 0) {
     ontologyIdFormatIssues = await validateOntologyIdFormat(hdf5File)
-    if (ontologyIdFormatIssues.length === 0) {
+    if (
+      ontologyIdFormatIssues.length === 0 &&
+
+      // TODO (SCP-5813):
+      // For big remote AnnData, hdf5-indexed-reader sometimes corrupts index codes,
+      // so skip ontology label validation for e.g. "Use bucket path"
+      'url' in remoteProps === false
+    ) {
       ontologyLabelIssues = await validateOntologyLabels(hdf5File)
     }
   }

From c974650111410f4431183248224456e248330383 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Fri, 20 Sep 2024 10:14:40 -0400
Subject: [PATCH 40/52] Fix tests

---
 app/javascript/lib/validation/validate-anndata.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index 03ec4eb4b..9bc49a488 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -282,7 +282,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) {
       // TODO (SCP-5813):
       // For big remote AnnData, hdf5-indexed-reader sometimes corrupts index codes,
       // so skip ontology label validation for e.g. "Use bucket path"
-      'url' in remoteProps === false
+      remoteProps && 'url' in remoteProps === false
     ) {
       ontologyLabelIssues = await validateOntologyLabels(hdf5File)
     }

From 0a73e06460291f7307fd21b7881bae005d28b9e1 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Fri, 20 Sep 2024 10:18:53 -0400
Subject: [PATCH 41/52] Remove obsolete code, refine TODO note

---
 app/javascript/lib/validation/validate-anndata.js | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index 9bc49a488..374d23c1a 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -266,11 +266,6 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) {
 
   const headers = await getAnnDataHeaders(hdf5File)
 
-  // TODO (SCP-5770): Extend AnnData CSFV to remote files, then remove this
-  if (!headers) {
-    return { issues }
-  }
-
   const requiredMetadataIssues = validateRequiredMetadataColumns([headers], true)
   let ontologyIdFormatIssues = []
   let ontologyLabelIssues = []
@@ -279,9 +274,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) {
     if (
       ontologyIdFormatIssues.length === 0 &&
 
-      // TODO (SCP-5813):
-      // For big remote AnnData, hdf5-indexed-reader sometimes corrupts index codes,
-      // so skip ontology label validation for e.g. "Use bucket path"
+      // TODO (SCP-5813): Enable ontology validation for remote AnnData
       remoteProps && 'url' in remoteProps === false
     ) {
       ontologyLabelIssues = await validateOntologyLabels(hdf5File)

From 7d978fa4aaecd07e72ee2b4b0efb82eb5af87011 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Fri, 20 Sep 2024 12:12:17 -0400
Subject: [PATCH 42/52] Improve docs, remove debug

---
 .../lib/validation/ontology-validation.js         | 15 ++++++++++++---
 app/javascript/lib/validation/validate-anndata.js |  8 +++++---
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
index 695d70aac..80dc1b4e2 100644
--- a/app/javascript/lib/validation/ontology-validation.js
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -1,3 +1,15 @@
+/**
+ * @fileoverview Validates ontology labels and IDs in files added by users
+ *
+ * SCP requires uploaded data to content certain metadata annotations, e.g.
+ * species, organ, disease, and library preparation protocol.  This module
+ * loads ontology reference data, and it to required metadata in the
+ * user's uploaded or transferred file.
+ *
+ * More context, demo:
+ * https://github.com/broadinstitute/single_cell_portal_core/pull/2129
+ */
+
 import { decompressSync, strFromU8 } from 'fflate'
 
 import {
@@ -70,8 +82,6 @@ export async function cacheFetch(url) {
   return await cache.match(decompressedUrl)
 }
 
-
-
 /**
  * Fetch minified ontologies, transform into object of object of arrays, e.g.:
  *
@@ -124,7 +134,6 @@ export async function fetchOntologies() {
   window.SCP.ontologies = ontologies
   return ontologies
 }
-window.fetchOntologies = fetchOntologies
 
 /** Get lowercase shortnames for all required ontologies */
 function getOntologyShortNames() {
diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index 374d23c1a..117918171 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -38,7 +38,6 @@ async function getOntologyIds(key, hdf5File) {
 
   return ontologyIds
 }
-window.getOntologyIds = getOntologyIds
 
 /** Get annotation headers for a key (e.g. obs) from an HDF5 file */
 async function getAnnotationHeaders(key, hdf5File) {
@@ -127,6 +126,7 @@ async function checkOntologyLabels(key, ontologies, groups) {
 
   const issues = []
 
+  // Determine unique (ontology ID, ontology label) pairs
   const labelIdPairs = new Set()
   for (let i = 0; i < idIndexes.length; i++) {
     const id = ids[idIndexes[i]]
@@ -139,7 +139,9 @@ async function checkOntologyLabels(key, ontologies, groups) {
     const [id, label] = r.split(' || ')
     const ontologyShortNameLc = id.split(/[_:]/)[0].toLowerCase()
     const ontology = ontologies[ontologyShortNameLc]
+
     if (!(id in ontology)) {
+      // Register invalid ontology ID
       const msg = `Invalid ontology ID: ${id}`
       issues.push([
         'error', 'ontology:label-lookup-error', msg,
@@ -147,7 +149,9 @@ async function checkOntologyLabels(key, ontologies, groups) {
       ])
     } else {
       const validLabels = ontology[id]
+
       if (!(validLabels.includes(label))) {
+        // Register invalid ontology label
         const prettyLabels = validLabels.join(', ')
         const validLabelsClause = `Valid labels for ${id}: ${prettyLabels}`
         const msg = `Invalid ${key} label "${label}".  ${validLabelsClause}`
@@ -159,8 +163,6 @@ async function checkOntologyLabels(key, ontologies, groups) {
     }
   })
 
-  console.log('issues', issues)
-
   return issues
 }
 

From 9a9bd8f156ac467fac8b3f1caebd65212ef4ce8b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 20 Sep 2024 19:24:27 +0000
Subject: [PATCH 43/52] Bump puma from 5.6.8 to 5.6.9

Bumps [puma](https://github.com/puma/puma) from 5.6.8 to 5.6.9.
- [Release notes](https://github.com/puma/puma/releases)
- [Changelog](https://github.com/puma/puma/blob/master/History.md)
- [Commits](https://github.com/puma/puma/compare/v5.6.8...v5.6.9)

---
updated-dependencies:
- dependency-name: puma
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 Gemfile.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Gemfile.lock b/Gemfile.lock
index 1e5ea7a51..90ed9b5fa 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -362,7 +362,7 @@ GEM
     psych (5.1.2)
       stringio
     public_suffix (5.0.4)
-    puma (5.6.8)
+    puma (5.6.9)
       nio4r (~> 2.0)
     racc (1.8.0)
     rack (2.2.9)

From e457f57e29e33e98e773c927a3493baf7d619ba9 Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Mon, 23 Sep 2024 13:11:55 -0400
Subject: [PATCH 44/52] Refine docs, variable names

---
 .../lib/validation/ontology-validation.js          |  4 ++--
 app/javascript/lib/validation/validate-anndata.js  | 14 +++++++-------
 test/js/lib/node-web-api.js                        |  3 +++
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
index 80dc1b4e2..505b970af 100644
--- a/app/javascript/lib/validation/ontology-validation.js
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -3,8 +3,8 @@
  *
  * SCP requires uploaded data to content certain metadata annotations, e.g.
  * species, organ, disease, and library preparation protocol.  This module
- * loads ontology reference data, and it to required metadata in the
- * user's uploaded or transferred file.
+ * loads ontology reference data, and uses it to check required metadata in
+ * the user's uploaded or transferred file.
  *
  * More context, demo:
  * https://github.com/broadinstitute/single_cell_portal_core/pull/2129
diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index 117918171..7e5f4fc49 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -120,8 +120,8 @@ export function checkOntologyIdFormat(key, ontologyIds) {
   return issues
 }
 
-/** Validate author's annotation labels match those in ontologies */
-async function checkOntologyLabels(key, ontologies, groups) {
+/** Validate author's annotation labels and IDs match those in ontologies */
+async function checkOntologyLabelsAndIds(key, ontologies, groups) {
   const [ids, idIndexes, labels, labelIndexes] = groups
 
   const issues = []
@@ -218,7 +218,7 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) {
 }
 
 /** Validate ontology labels for required metadata columns in AnnData file */
-async function validateOntologyLabels(hdf5File) {
+async function validateOntologyLabelsAndIds(hdf5File) {
   let issues = []
 
   const ontologies = await fetchOntologies()
@@ -232,7 +232,7 @@ async function validateOntologyLabels(hdf5File) {
 
     if (groups) {
       issues = issues.concat(
-        await checkOntologyLabels(key, ontologies, groups)
+        await checkOntologyLabelsAndIds(key, ontologies, groups)
       )
     }
   }
@@ -270,7 +270,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) {
 
   const requiredMetadataIssues = validateRequiredMetadataColumns([headers], true)
   let ontologyIdFormatIssues = []
-  let ontologyLabelIssues = []
+  let ontologyLabelAndIdIssues = []
   if (requiredMetadataIssues.length === 0) {
     ontologyIdFormatIssues = await validateOntologyIdFormat(hdf5File)
     if (
@@ -279,7 +279,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) {
       // TODO (SCP-5813): Enable ontology validation for remote AnnData
       remoteProps && 'url' in remoteProps === false
     ) {
-      ontologyLabelIssues = await validateOntologyLabels(hdf5File)
+      ontologyLabelAndIdIssues = await validateOntologyLabelsAndIds(hdf5File)
     }
   }
 
@@ -287,7 +287,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) {
     validateUnique(headers),
     requiredMetadataIssues,
     ontologyIdFormatIssues,
-    ontologyLabelIssues
+    ontologyLabelAndIdIssues
   )
 
   return { issues }
diff --git a/test/js/lib/node-web-api.js b/test/js/lib/node-web-api.js
index 8e0370658..6c8ef04d2 100644
--- a/test/js/lib/node-web-api.js
+++ b/test/js/lib/node-web-api.js
@@ -3,6 +3,9 @@
  *
  * https://developer.mozilla.org/en-US/docs/Web/API/CacheStorage
  * https://developer.mozilla.org/en-US/docs/Web/API/Cache
+ * https://developer.mozilla.org/en-US/docs/Web/API/Request
+ * https://developer.mozilla.org/en-US/docs/Web/API/Response
+ * https://developer.mozilla.org/en-US/docs/Web/API/Headers
 */
 const { Readable } = require('stream')
 

From 0cb12e509b20b1d472eef7d630abcdc67e67bc7a Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Mon, 23 Sep 2024 14:20:53 -0400
Subject: [PATCH 45/52] Fix typo

---
 app/javascript/lib/validation/ontology-validation.js | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
index 505b970af..aee6ec4c5 100644
--- a/app/javascript/lib/validation/ontology-validation.js
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -1,10 +1,10 @@
 /**
  * @fileoverview Validates ontology labels and IDs in files added by users
  *
- * SCP requires uploaded data to content certain metadata annotations, e.g.
+ * SCP requires cells to have certain metadata annotations, e.g.
  * species, organ, disease, and library preparation protocol.  This module
- * loads ontology reference data, and uses it to check required metadata in
- * the user's uploaded or transferred file.
+ * loads ontology reference data, and uses it to check required cell metadata
+ * in the user's uploaded or transferred file.
  *
  * More context, demo:
  * https://github.com/broadinstitute/single_cell_portal_core/pull/2129

From 8af4797a4d7c0902027e56af50660ae6cfe25ffa Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 24 Sep 2024 14:30:07 +0000
Subject: [PATCH 46/52] Bump webrick from 1.8.1 to 1.8.2

Bumps [webrick](https://github.com/ruby/webrick) from 1.8.1 to 1.8.2.
- [Release notes](https://github.com/ruby/webrick/releases)
- [Commits](https://github.com/ruby/webrick/compare/v1.8.1...v1.8.2)

---
updated-dependencies:
- dependency-name: webrick
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 Gemfile.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Gemfile.lock b/Gemfile.lock
index 90ed9b5fa..f66521c49 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -520,7 +520,7 @@ GEM
       zeitwerk (~> 2.2)
     warden (1.2.9)
       rack (>= 2.0.9)
-    webrick (1.8.1)
+    webrick (1.8.2)
     websocket-driver (0.7.6)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.5)

From dedb89dfcba95ee1c0091819fb6270d4cb29c03b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 24 Sep 2024 14:30:24 +0000
Subject: [PATCH 47/52] Bump rollup from 3.29.4 to 3.29.5

Bumps [rollup](https://github.com/rollup/rollup) from 3.29.4 to 3.29.5.
- [Release notes](https://github.com/rollup/rollup/releases)
- [Changelog](https://github.com/rollup/rollup/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rollup/rollup/compare/v3.29.4...v3.29.5)

---
updated-dependencies:
- dependency-name: rollup
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 yarn.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/yarn.lock b/yarn.lock
index 52f5500f1..2ed24b066 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -8305,9 +8305,9 @@ rimraf@^3.0.0:
     glob "^7.1.3"
 
 rollup@^3.27.1:
-  version "3.29.4"
-  resolved "https://registry.npmjs.org/rollup/-/rollup-3.29.4.tgz"
-  integrity sha512-oWzmBZwvYrU0iJHtDmhsm662rC15FRXmcjCk1xD771dFDx5jJ02ufAQQTn0etB2emNk4J9EZg/yWKpsn9BWGRw==
+  version "3.29.5"
+  resolved "https://registry.yarnpkg.com/rollup/-/rollup-3.29.5.tgz#8a2e477a758b520fb78daf04bca4c522c1da8a54"
+  integrity sha512-GVsDdsbJzzy4S/v3dqWPJ7EfvZJfCHiDqe80IyrF59LYuP+e6U1LJoUqeuqRbwAWoMNoXivMNeNAOf5E22VA1w==
   optionalDependencies:
     fsevents "~2.3.2"
 

From a302e3e38207a2634299ff42dc32e63351d1797e Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Tue, 24 Sep 2024 16:07:18 -0400
Subject: [PATCH 48/52] Adding HomePageLink feature

---
 app/assets/images/cellarium.png              | Bin 0 -> 1920 bytes
 app/controllers/site_controller.rb           |   2 +
 app/javascript/styles/_brand.scss            |   8 ++-
 app/javascript/styles/_global.scss           |  10 +++
 app/models/home_page_link.rb                 |  72 +++++++++++++++++++
 app/views/layouts/application.html.erb       |  10 +--
 app/views/site/_home_page_link.html.erb      |  11 +++
 app/views/site/_main_banner_content.html.erb |   7 +-
 test/models/home_page_link_test.rb           |  48 +++++++++++++
 9 files changed, 159 insertions(+), 9 deletions(-)
 create mode 100644 app/assets/images/cellarium.png
 create mode 100644 app/models/home_page_link.rb
 create mode 100644 app/views/site/_home_page_link.html.erb
 create mode 100644 test/models/home_page_link_test.rb

diff --git a/app/assets/images/cellarium.png b/app/assets/images/cellarium.png
new file mode 100644
index 0000000000000000000000000000000000000000..01c96b45e18955f2a2224c8db1a2f93acc06ecdd
GIT binary patch
literal 1920
zcmV-`2Y>j9P)<h;3K|Lk000e1NJLTq001BW001Be1^@s6b9#F800001b5ch_0Itp)
z=>Px+I!Q!9R9HuyR}EB?*BO4^8x%AY!Bc-gr!+V=svfAg?aZH91zHe2bQ9P0^t8@)
z^SG%+T0|zbPS;b;sjJYT&~}}QScZbMh}v$I5FAH}cCD2nKkm@LVn<m6gkVCFFS$GS
zMk0}bus7$+m)!U7x$n>Y-j6T=>WIkQ!8v{cU@?Gs0H)kk=Utr83Qi3VwWseMuUQ(8
zpk8yR0-&FM<98eNY6IG)^`RNv9@S|7dDwX~`d(A5;qB?)zn}dv0etmP?%PDzQZcOs
zRnu-l9WWIeo=indOf+I*Vm!9jtd7I6&h?}Dx*p!1J_6m4@TdUvn%pn@{00}MhkXqH
zNt5y6-#_#?=+t+h?qV(K>pwwLd#hX@1Ayd*x2OB`HDa+V;^0gh_iL{X)T@i9HzIAv
z4y63-30!Hof~>6HNn;v&FruxqzzN`-!w)JfvCKx8BXHD+JHe4ZyM9^T<+A$WLL&fl
zXmSTigPYVJ1+_z4Q!Qna5|Ut3><Cvvp!e%Vz+``f&zT3!f(SeqHWNL@9-KROZq!Iv
znSxzm)ws><Am9zHPMhH~1?}I<Yb*-7w&;?&)8ky^_Yv!Cd_JWM(NPPLTkxk*cPV7E
zyOv{tefkKQs-QMh1iRD4;rgXcrz;Y_JrVd<R9+!m*!t_xvtG_0RWvY!bG%dhu2jBy
z^(uB}W{p|g3kM&>+WrN2cyNZ4vy&4lPIx)A7G0AD<Qg%|)oQi3JOElAev{|TtjE)T
z*p3w|mP70&_f`VZ=X@*ySu7T;OJ0xp^XKCSKU|7s%a(hA_~)meqN3t`SglsidSbr@
zPxdWDR;U(RdL97>@an8OH=pQ4O-=PC3E=wD!)46DQ_lo_37zVu<nJ-A#1xkw_RgsB
za&##7kZeOi*RFkm$jI+v>eQ+7zu9a?Sy?$w7N7L!q?jV14b)?R+3~nJ0v6`LrNB<f
zX9`6~0H3WY;LORvY#c_#j9H2rj00~zP=^-(Z@mBrXToW7a|>)X8}?-HL4151YIU^;
z4GTqNWTgDPfBylGUZ8U_22^(WKg1b;Ue(}G6w)MshWG+bATWiyFDKBb7;seGFlMd=
zv*fSuL9JHfxy{eRXf)!bjF*s{ycS1}6yQwxnKAV_P(_!L-zg!Er^xA&yC%+|;=l=|
z4o&!vPXoch!HA2G!<sd#5fv4MzP>)B?|cy*9UW3WB_#!wm6i7eFH50%{hu2FEFumt
z@g?pq;8jaG98i238D0lN0(!k3yD~Cm6edFS(G!0$G4LR%Bq9Ky*J(!KK$TBBiAjmb
z%-rRZS5$NYMaN}(kDKUR>M_wB()keGa&)7#|8G8x?8?kQQc{vno``~|g^3WozwtPi
zzS(d9-=Lz$HvnJZAJP=z-<bOIiTDSd{>5<<0Cx%)!3Dsim{Hm-DtP1rY{kkIIB;OU
zPbZ@4MfFdFsO0IQM$XtG4)Jl*(SILDNJt2J`+5-*9SyPTm$Ucc^5x5tf9iB8X3TIO
zvqN-m?~fM*6+K&20#ZFC%YXnE2Z}q6%HvlY$D-v9AIV2gPY+($ktXe)C@#X>xpN_M
ze&a?nZZtLF!iBr@!g!qvIT_=lo8RM3;hnhwlz#OtD$jm^y1F_XeEpBIuhVwEAWt}b
zM+Y`-dR9Ju5518)5nm%@<v}D?-`-rpxy(PwG;~@z(Pa7us?ML4Q4q8tbpy6-`>n^w
z|I>2!BH(~~Oe%Q3^fU)HD#YL-4uuF8+TOyxeXn5EqpLh15)&Us=I%_79q}*{WVKjO
zSojtU2E&M5_6KY5fT;t-og;X}u)tI1dA~ZvokVaEBmfr!E;bi>fe7#qK*;nE#Ky)V
zHg*Y?#4dr=I*7KmHsrjP<Mk*O-nzqn1jGC|s0Y4=rllN{9PU}*PVP@?b=qe<01j?B
zUBHQc<GcevVFU+&`lcfu5F+Oy_kMmVSw4#5KB|c^Fe3Q-`^&k(JS`kubC${b5cxSQ
z(6p8#(Cm&9hwD2!ozCFZ=&x-pyRHx|cHVI=??m|gdcIT|nh~3uo6*p41=Sa;LBz0b
z-C86(mH=Ii4*B_q@!5~}_$*FQVPTy$m~HsvIc=@>n5PSdOYhru#_RwBg>#V-0YF^F
zem%O|-A}meSALJU)hoT6h%{vHJ%CPUAX+0g`UD^rO*!#ioAx=YXKk-AI~fIvqI8g$
z>6pB10B&3Vivj%waI5#AB6A+P6o&A}#_il^1ctO<O#7hG!D*4ABG@>Sg*nIwVwSNH
zU?K)10RxP|B90Rp`z`O@oewL<Q2}FnT4kY~v8{F@C?Hrtu-_?CBO~;HU}1#Y;{0F)
zF;4iPFHAY{ci-dg5g6CY=`-o)%N4=~8xh20Cu-tbBACbxKmc=%hyZUf<s?LnzCBMn
z-cQ7vpPX_2l!H`@jlet;ky-#46fH^gH36-rocJ{pvG_mqxAke9EnVvX0000<MNUMn
GLSTZc#hl&%

literal 0
HcmV?d00001

diff --git a/app/controllers/site_controller.rb b/app/controllers/site_controller.rb
index 230e0d9da..a61692e4c 100644
--- a/app/controllers/site_controller.rb
+++ b/app/controllers/site_controller.rb
@@ -61,6 +61,8 @@ def index
       @cell_count = 0
     end
 
+    @home_page_link = HomePageLink.published
+
   end
 
   def covid
diff --git a/app/javascript/styles/_brand.scss b/app/javascript/styles/_brand.scss
index 69623bdad..097928b99 100644
--- a/app/javascript/styles/_brand.scss
+++ b/app/javascript/styles/_brand.scss
@@ -252,11 +252,17 @@
   }
 }
 
-#whitepaper-btn {
+#home-page-link {
   position: absolute;
   top: 220px;
   right: 15px;
   z-index: 500;
+
+  img {
+    height: 24px;
+    object-fit: scale-down;
+    padding-right: 5px;
+  }
 }
 
 #latest-features-btn {
diff --git a/app/javascript/styles/_global.scss b/app/javascript/styles/_global.scss
index 1616a0dc1..2bd1cded4 100644
--- a/app/javascript/styles/_global.scss
+++ b/app/javascript/styles/_global.scss
@@ -941,6 +941,16 @@ figcaption.ck-editor__editable {
   color: #C45500;
 }
 
+.btn-home-link {
+  color: #fff;
+  box-shadow: 8px 8px 12px #2b2b2b;
+}
+
+.btn-home-link:hover {
+  color: #fff;
+  filter: brightness(90%);
+}
+
 .link-darker-blue {
   color: #1b476d
 }
diff --git a/app/models/home_page_link.rb b/app/models/home_page_link.rb
new file mode 100644
index 000000000..2c80911da
--- /dev/null
+++ b/app/models/home_page_link.rb
@@ -0,0 +1,72 @@
+class HomePageLink
+  include Mongoid::Document
+  include Mongoid::Timestamps
+
+  DEFAULT_CSS_CLASS='btn btn-home-link'.freeze
+  DEFAULT_BG_COLOR='#4999F9'.freeze
+
+  field :name, type: String
+  field :href, type: String
+  field :tooltip, type: String
+  field :bg_color, type: String, default: DEFAULT_BG_COLOR
+  field :css_class, type: String, default: DEFAULT_CSS_CLASS
+  field :published, type: Mongoid::Boolean, default: false
+  field :image, type: String
+
+  validates :name, :href, presence: true
+  validate :ensure_one_published_link
+
+  def publish
+    update(published: true)
+  end
+
+  def unpublish
+    update(published: false)
+  end
+
+  def reset_css!
+    puts "Resetting css_class to '#{DEFAULT_CSS_CLASS}'"
+    update(css_class: DEFAULT_CSS_CLASS)
+  end
+
+  def reset_bg_color!
+    puts "Resetting css_class to '#{DEFAULT_BG_COLOR}'"
+    update(bg_color: DEFAULT_BG_COLOR)
+  end
+
+  def self.published
+    self.find_by(published: true)
+  end
+
+  def self.publish_last
+    link = last
+    if link
+      puts "Publishing '#{link.name}'"
+      link.publish
+    else
+      puts "Nothing to publish"
+    end
+  end
+
+  def self.unpublish
+    if published.present?
+      puts "Unpublishing '#{published.name}'"
+      published.update(published: false)
+    else
+      puts "No published links"
+    end
+  end
+
+  private
+
+  def ensure_one_published_link
+    if published && HomePageLink.where(published: true, :id.ne => self.id).exists?
+      existing = HomePageLink.published
+      errors.add(
+        :published,
+        "link exists: '#{existing.name}' (#{existing.id}), please unpublish first with HomePageLink.unpublish"
+      )
+      puts errors.full_messages.to_sentence
+    end
+  end
+end
diff --git a/app/views/layouts/application.html.erb b/app/views/layouts/application.html.erb
index dffd0e816..1f462e1a2 100644
--- a/app/views/layouts/application.html.erb
+++ b/app/views/layouts/application.html.erb
@@ -192,10 +192,12 @@
       &copy; <%= Date.today.year %> The Broad Institute of MIT and Harvard
     </div>
     <div class="footer-text-block pull-left left-border-0-5">
-      <%= scp_link_to 'Privacy Policy', privacy_policy_path %>
-      &nbsp;
-      <%= scp_link_to "Terms of Service".html_safe, terms_of_service_path %>
-      &nbsp;
+      <%= scp_link_to 'Privacy Policy', privacy_policy_path %>&nbsp;
+      <%= scp_link_to 'Terms of Service', terms_of_service_path %>&nbsp;
+      <%= scp_link_to 'Portal whitepaper', "https://www.biorxiv.org/content/10.1101/2023.07.13.548886v1",
+                      id: 'whitepaper-btn', data: { toggle: 'tooltip', placement: 'top' },
+                      title: 'Read the Single Cell Portal whitepaper!', target: :_blank %>
+
       <% if @selected_branding_group.present? %>
         <%= link_to "<i class='fas fa-chevron-circle-left fa-fw'></i> Return to Single Cell Portal".html_safe, site_path %>
       <% end %>
diff --git a/app/views/site/_home_page_link.html.erb b/app/views/site/_home_page_link.html.erb
new file mode 100644
index 000000000..d07278584
--- /dev/null
+++ b/app/views/site/_home_page_link.html.erb
@@ -0,0 +1,11 @@
+<% image_link = @home_page_link.image.present? ? "#{image_tag(@home_page_link.image)}" : nil %>
+<style>
+  #home-page-link {
+    background-color: <%= @home_page_link.bg_color %>;
+  }
+</style>
+<%=
+  link_to "#{image_link}#{@home_page_link.name}".html_safe, @home_page_link.href, title: @home_page_link.tooltip,
+          data: { toggle: 'tooltip', placement: 'left'}, class: @home_page_link.css_class,
+          id: 'home-page-link', target: :_blank
+%>
diff --git a/app/views/site/_main_banner_content.html.erb b/app/views/site/_main_banner_content.html.erb
index 75b016e85..0cbd745e3 100644
--- a/app/views/site/_main_banner_content.html.erb
+++ b/app/views/site/_main_banner_content.html.erb
@@ -5,10 +5,9 @@
                     latest_feature_announcements_path, class: 'btn btn-warning-scp', id: 'latest-features-btn',
                     data: { toggle: 'tooltip', placement: 'left' }, title: 'Learn about new Single Cell Portal features!' %>
   <% end %>
-  <%= link_to "<span class='badge btn-warning-scp-icon'></span> " \
-  "Portal whitepaper".html_safe, 
-  "https://www.biorxiv.org/content/10.1101/2023.07.13.548886v1", class: 'btn btn-warning-scp', id: 'whitepaper-btn',
-  data: { toggle: 'tooltip', placement: 'left' }, title: 'Read the Single Cell Portal whitepaper!', target: :_blank %>
+  <% if @home_page_link.present? %>
+    <%= render partial: 'home_page_link' %>
+  <% end %>
   <% if @selected_branding_group.present? %>
   <% content_for(:html_title) { "#{@selected_branding_group.name} - Collections - Single Cell Portal" } %>
     <% if @selected_branding_group.banner_image.file %>
diff --git a/test/models/home_page_link_test.rb b/test/models/home_page_link_test.rb
new file mode 100644
index 000000000..657b6cd9f
--- /dev/null
+++ b/test/models/home_page_link_test.rb
@@ -0,0 +1,48 @@
+require "test_helper"
+
+class HomePageLinkTest < ActiveSupport::TestCase
+
+  before(:all) do
+    @home_page_link = HomePageLink.create(name: 'Broad Institute', href: 'https://broadinstitute.org')
+  end
+
+  teardown do
+    HomePageLink.unpublish
+  end
+
+  after(:all) do
+    HomePageLink.delete_all
+  end
+
+  test 'should publish/unpublish link' do
+    assert_nil HomePageLink.published
+    HomePageLink.publish_last
+    assert HomePageLink.published.present?
+    link = HomePageLink.published
+    link.unpublish
+    assert_nil HomePageLink.published
+    link.publish
+    assert HomePageLink.published.present?
+    HomePageLink.unpublish
+    assert_nil HomePageLink.published
+  end
+
+  test 'should reset css/color attributes' do
+    @home_page_link.update(css_class: 'foo', bg_color: '#ff0000')
+    @home_page_link.reset_css!
+    @home_page_link.reload
+    assert_equal HomePageLink::DEFAULT_CSS_CLASS, @home_page_link.css_class
+    @home_page_link.reset_bg_color!
+    @home_page_link.reload
+    assert_equal HomePageLink::DEFAULT_BG_COLOR, @home_page_link.bg_color
+  end
+
+  test 'should ensure only one link is published' do
+    assert_not HomePageLink.published.present?
+    HomePageLink.publish_last
+    assert HomePageLink.published.present?
+    new_link = HomePageLink.new(name: 'Google', href: 'https://google.com', published: true)
+    assert_not new_link.valid?
+    assert_includes new_link.errors.attribute_names, :published
+  end
+end

From f5c651a397ba56fb370ac8a4b4140b6f8d6ea09d Mon Sep 17 00:00:00 2001
From: Eric Weitz <eweitz@broadinstitute.org>
Date: Tue, 24 Sep 2024 16:11:03 -0400
Subject: [PATCH 49/52] Note TODO

---
 app/javascript/lib/validation/ontology-validation.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
index aee6ec4c5..1d7661844 100644
--- a/app/javascript/lib/validation/ontology-validation.js
+++ b/app/javascript/lib/validation/ontology-validation.js
@@ -16,6 +16,7 @@ import {
   metadataSchema, REQUIRED_CONVENTION_COLUMNS
 } from './shared-validation'
 
+// TODO: Replace "development" with "main" after next ingest release
 const ONTOLOGY_BASE_URL =
   'https://raw.githubusercontent.com/broadinstitute/scp-ingest-pipeline/' +
   'development/ingest/validation/ontologies/'

From 08330e91426c954fe3c0dfaa4b4f7cf2a2466504 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Tue, 24 Sep 2024 16:22:21 -0400
Subject: [PATCH 50/52] Renaming methods, test updates

---
 app/controllers/site_controller.rb       |  1 -
 app/models/home_page_link.rb             | 11 ++++++-----
 test/controllers/site_controller_test.rb |  9 +++++++++
 test/models/home_page_link_test.rb       | 12 ++++++------
 4 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/app/controllers/site_controller.rb b/app/controllers/site_controller.rb
index a61692e4c..84428bc6d 100644
--- a/app/controllers/site_controller.rb
+++ b/app/controllers/site_controller.rb
@@ -62,7 +62,6 @@ def index
     end
 
     @home_page_link = HomePageLink.published
-
   end
 
   def covid
diff --git a/app/models/home_page_link.rb b/app/models/home_page_link.rb
index 2c80911da..6d6ba448b 100644
--- a/app/models/home_page_link.rb
+++ b/app/models/home_page_link.rb
@@ -16,11 +16,11 @@ class HomePageLink
   validates :name, :href, presence: true
   validate :ensure_one_published_link
 
-  def publish
+  def publish!
     update(published: true)
   end
 
-  def unpublish
+  def unpublish!
     update(published: false)
   end
 
@@ -38,17 +38,18 @@ def self.published
     self.find_by(published: true)
   end
 
-  def self.publish_last
+  def self.publish_last!
+    unpublish!
     link = last
     if link
       puts "Publishing '#{link.name}'"
-      link.publish
+      link.publish!
     else
       puts "Nothing to publish"
     end
   end
 
-  def self.unpublish
+  def self.unpublish!
     if published.present?
       puts "Unpublishing '#{published.name}'"
       published.update(published: false)
diff --git a/test/controllers/site_controller_test.rb b/test/controllers/site_controller_test.rb
index 0e0f0872b..3b5b9b4ff 100644
--- a/test/controllers/site_controller_test.rb
+++ b/test/controllers/site_controller_test.rb
@@ -249,4 +249,13 @@ def teardown
       assert @study.publications.empty?
     end
   end
+
+  test 'should show home page link' do
+    get site_path
+    assert_select '#home-page-link', 0
+    HomePageLink.create(name: 'Foo', href: 'http://foo.com', published: true)
+    get site_path
+    assert_select '#home-page-link', 1
+    HomePageLink.delete_all
+  end
 end
diff --git a/test/models/home_page_link_test.rb b/test/models/home_page_link_test.rb
index 657b6cd9f..e40556779 100644
--- a/test/models/home_page_link_test.rb
+++ b/test/models/home_page_link_test.rb
@@ -7,7 +7,7 @@ class HomePageLinkTest < ActiveSupport::TestCase
   end
 
   teardown do
-    HomePageLink.unpublish
+    HomePageLink.unpublish!
   end
 
   after(:all) do
@@ -16,14 +16,14 @@ class HomePageLinkTest < ActiveSupport::TestCase
 
   test 'should publish/unpublish link' do
     assert_nil HomePageLink.published
-    HomePageLink.publish_last
+    HomePageLink.publish_last!
     assert HomePageLink.published.present?
     link = HomePageLink.published
-    link.unpublish
+    link.unpublish!
     assert_nil HomePageLink.published
-    link.publish
+    link.publish!
     assert HomePageLink.published.present?
-    HomePageLink.unpublish
+    HomePageLink.unpublish!
     assert_nil HomePageLink.published
   end
 
@@ -39,7 +39,7 @@ class HomePageLinkTest < ActiveSupport::TestCase
 
   test 'should ensure only one link is published' do
     assert_not HomePageLink.published.present?
-    HomePageLink.publish_last
+    HomePageLink.publish_last!
     assert HomePageLink.published.present?
     new_link = HomePageLink.new(name: 'Google', href: 'https://google.com', published: true)
     assert_not new_link.valid?

From cf91c848273123f95f19e07bf5503e5f79f01ff8 Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Tue, 24 Sep 2024 17:19:10 -0400
Subject: [PATCH 51/52] Fixing logging, tweaking publishing behavior

---
 app/models/home_page_link.rb | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/app/models/home_page_link.rb b/app/models/home_page_link.rb
index 6d6ba448b..93fff17a2 100644
--- a/app/models/home_page_link.rb
+++ b/app/models/home_page_link.rb
@@ -30,7 +30,7 @@ def reset_css!
   end
 
   def reset_bg_color!
-    puts "Resetting css_class to '#{DEFAULT_BG_COLOR}'"
+    puts "Resetting bg_color to '#{DEFAULT_BG_COLOR}'"
     update(bg_color: DEFAULT_BG_COLOR)
   end
 
@@ -39,7 +39,11 @@ def self.published
   end
 
   def self.publish_last!
-    unpublish!
+    if published
+      puts "'#{published.name}' is already published"
+      return false
+    end
+
     link = last
     if link
       puts "Publishing '#{link.name}'"
@@ -55,6 +59,7 @@ def self.unpublish!
       published.update(published: false)
     else
       puts "No published links"
+      return false
     end
   end
 
@@ -65,7 +70,7 @@ def ensure_one_published_link
       existing = HomePageLink.published
       errors.add(
         :published,
-        "link exists: '#{existing.name}' (#{existing.id}), please unpublish first with HomePageLink.unpublish"
+        "link exists: '#{existing.name}', please unpublish first with HomePageLink.unpublish"
       )
       puts errors.full_messages.to_sentence
     end

From 0235243a3f07645503bbb9a614592fba7b977f0e Mon Sep 17 00:00:00 2001
From: bistline <bistline@broadinstitute.org>
Date: Tue, 24 Sep 2024 17:22:46 -0400
Subject: [PATCH 52/52] Fix log validation message

---
 app/models/home_page_link.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/models/home_page_link.rb b/app/models/home_page_link.rb
index 93fff17a2..b28b3ade3 100644
--- a/app/models/home_page_link.rb
+++ b/app/models/home_page_link.rb
@@ -70,7 +70,7 @@ def ensure_one_published_link
       existing = HomePageLink.published
       errors.add(
         :published,
-        "link exists: '#{existing.name}', please unpublish first with HomePageLink.unpublish"
+        "link exists: '#{existing.name}', please unpublish first with HomePageLink.unpublish!"
       )
       puts errors.full_messages.to_sentence
     end