From 2b785c3f6a1ee6a1617d720516aefdd6eb0c7ef9 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Wed, 11 Sep 2024 11:17:11 -0400 Subject: [PATCH 01/52] Enable test for invalid ontology IDs in AnnData --- test/js/lib/validate-anndata.test.js | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/test/js/lib/validate-anndata.test.js b/test/js/lib/validate-anndata.test.js index ec9e0a5ad..26c9044dc 100644 --- a/test/js/lib/validate-anndata.test.js +++ b/test/js/lib/validate-anndata.test.js @@ -57,19 +57,18 @@ describe('Client-side file validation for AnnData', () => { expect(issues).toHaveLength(0) }) - // TODO: Uncomment this after row-level AnnData parsing PR is merged - // it('Parses AnnData rows and reports invalid ontology IDs', async () => { - // // eslint-disable-next-line max-len - // const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test_invalid_disease.h5ad' - // const parseResults = await parseAnnDataFile(url) + it('Parses AnnData rows and reports invalid ontology IDs', async () => { + // eslint-disable-next-line max-len + const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test_invalid_disease.h5ad' + const parseResults = await parseAnnDataFile(url) - // expect(parseResults.issues).toHaveLength(1) + expect(parseResults.issues).toHaveLength(1) - // const expectedIssue = [ - // 'error', - // 'ontology:label-lookup-error', - // 'Ontology ID "FOO_0000042" is not among accepted ontologies (MONDO, PATO) for key "disease"' - // ] - // expect(parseResults.issues[0]).toEqual(expectedIssue) - // }) + const expectedIssue = [ + 'error', + 'ontology:label-lookup-error', + 'Ontology ID "FOO_0000042" is not among accepted ontologies (MONDO, PATO) for key "disease"' + ] + expect(parseResults.issues[0]).toEqual(expectedIssue) + }) }) From a99b7e007cfab5b84b3cecbea57b3c88432080a5 Mon Sep 17 00:00:00 2001 From: bistline Date: Wed, 11 Sep 2024 15:43:16 -0400 Subject: [PATCH 02/52] Create default X_umap cluster fragment unless specified --- app/models/ann_data_file_info.rb | 13 ++++++++++++- app/models/ann_data_ingest_parameters.rb | 2 +- test/models/ann_data_file_info_test.rb | 13 ++++++++++++- test/models/ann_data_ingest_parameters_test.rb | 2 +- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/app/models/ann_data_file_info.rb b/app/models/ann_data_file_info.rb index 312228173..1a9033696 100644 --- a/app/models/ann_data_file_info.rb +++ b/app/models/ann_data_file_info.rb @@ -39,7 +39,7 @@ class AnnDataFileInfo # } # { _id: '6033f531e241391884633748', data_type: :expression, description: 'log(TMP) expression' } field :data_fragments, type: Array, default: [] - before_validation :sanitize_fragments! + before_validation :set_default_cluster_fragments!, :sanitize_fragments! validate :validate_fragments after_validation :update_expression_file_info @@ -195,6 +195,17 @@ def sanitize_fragments! self.data_fragments = sanitized_fragments end + # create the default cluster data_fragment entries + def set_default_cluster_fragments! + return false if fragments_by_type(:cluster).any? + + default_obsm_keys = AnnDataIngestParameters::PARAM_DEFAULTS[:obsm_keys] + default_obsm_keys.each do |obsm_key_name| + name = obsm_key_name.delete_prefix('X_') + data_fragments << { _id: BSON::ObjectId.new.to_s, data_type: :cluster, name:, obsm_key_name: } + end + end + # ensure all fragments have required keys and are unique def validate_fragments REQUIRED_FRAGMENT_KEYS.each do |data_type, keys| diff --git a/app/models/ann_data_ingest_parameters.rb b/app/models/ann_data_ingest_parameters.rb index 82729cd91..be3f26fd8 100644 --- a/app/models/ann_data_ingest_parameters.rb +++ b/app/models/ann_data_ingest_parameters.rb @@ -32,7 +32,7 @@ class AnnDataIngestParameters PARAM_DEFAULTS = { ingest_anndata: true, anndata_file: nil, - obsm_keys: %w[X_umap X_tsne], + obsm_keys: %w[X_umap], ingest_cluster: false, cluster_file: nil, name: nil, diff --git a/test/models/ann_data_file_info_test.rb b/test/models/ann_data_file_info_test.rb index 8463fc937..94db34b3d 100644 --- a/test/models/ann_data_file_info_test.rb +++ b/test/models/ann_data_file_info_test.rb @@ -120,6 +120,17 @@ def generate_id assert_equal %w[X_umap X_tsne], ann_data_info.obsm_key_names end + test 'should set default cluster fragments' do + ann_data_info = AnnDataFileInfo.new + assert ann_data_info.valid? + default_keys = AnnDataIngestParameters::PARAM_DEFAULTS[:obsm_keys] + default_keys.each do |obsm_key_name| + name = obsm_key_name.delete_prefix('X_') + matcher = { data_type: :cluster, name:, obsm_key_name: }.with_indifferent_access + assert ann_data_info.find_fragment(**matcher).present? + end + end + test 'should validate data fragments' do ann_data_info = AnnDataFileInfo.new( data_fragments: [ @@ -224,7 +235,7 @@ def generate_id ] ) assert anndata_info.valid? # invokes validations - exp_frag = anndata_info.data_fragments.first + exp_frag = anndata_info.fragments_by_type(:expression).first assert_nil exp_frag.with_indifferent_access.dig(:expression_file_info, :units) end end diff --git a/test/models/ann_data_ingest_parameters_test.rb b/test/models/ann_data_ingest_parameters_test.rb index 1a3023bc4..734d83cc7 100644 --- a/test/models/ann_data_ingest_parameters_test.rb +++ b/test/models/ann_data_ingest_parameters_test.rb @@ -56,7 +56,7 @@ class AnnDataIngestParametersTest < ActiveSupport::TestCase assert extraction.send(attr).blank? end - cmd = '--ingest-anndata --anndata-file gs://bucket_id/test.h5ad --obsm-keys ["X_umap", "X_tsne"] --extract ' \ + cmd = '--ingest-anndata --anndata-file gs://bucket_id/test.h5ad --obsm-keys ["X_umap"] --extract ' \ '["cluster", "metadata", "processed_expression", "raw_counts"]' assert_equal cmd, extraction.to_options_array.join(' ') assert_equal 'n2d-highmem-32', extraction.machine_type From 03ceb12ab0b592c73c8479e2593e587018828a2d Mon Sep 17 00:00:00 2001 From: bistline Date: Wed, 11 Sep 2024 15:55:52 -0400 Subject: [PATCH 03/52] Add missing spatial_cluster_associations field --- app/models/ann_data_file_info.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/models/ann_data_file_info.rb b/app/models/ann_data_file_info.rb index 1a9033696..a0ce6f4d5 100644 --- a/app/models/ann_data_file_info.rb +++ b/app/models/ann_data_file_info.rb @@ -202,7 +202,10 @@ def set_default_cluster_fragments! default_obsm_keys = AnnDataIngestParameters::PARAM_DEFAULTS[:obsm_keys] default_obsm_keys.each do |obsm_key_name| name = obsm_key_name.delete_prefix('X_') - data_fragments << { _id: BSON::ObjectId.new.to_s, data_type: :cluster, name:, obsm_key_name: } + fragment = { + _id: BSON::ObjectId.new.to_s, data_type: :cluster, name:, obsm_key_name:, spatial_cluster_associations: [] + } + data_fragments << fragment end end From 3156dde49b5e4312866409916d1b097c03669c0e Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Wed, 11 Sep 2024 22:46:15 -0400 Subject: [PATCH 04/52] Fetch minified ontologies, modularize --- .../lib/validation/ontology-validation.js | 90 +++++++++++++++++++ .../lib/validation/validate-anndata.js | 22 +---- 2 files changed, 92 insertions(+), 20 deletions(-) create mode 100644 app/javascript/lib/validation/ontology-validation.js diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js new file mode 100644 index 000000000..0711d24fe --- /dev/null +++ b/app/javascript/lib/validation/ontology-validation.js @@ -0,0 +1,90 @@ +import { decompressSync, strFromU8 } from 'fflate' + +import { + metadataSchema, REQUIRED_CONVENTION_COLUMNS +} from './shared-validation' + +const ONTOLOGY_BASE_URL = + 'https://raw.githubusercontent.com/broadinstitute/scp-ingest-pipeline/' + + 'ew-minify-uberon/ingest/validation/ontologies/' + +/** Fetch .gz file, decompress it, return plaintext */ +async function fetchGzipped(url) { + const response = await fetch(url) + const blob = await response.blob(); + const uint8Array = new Uint8Array(await blob.arrayBuffer()); + const plaintext = strFromU8(decompressSync(uint8Array)); + return plaintext +} + +/** Fetch minified ontologies */ +async function fetchOntologies() { + const ontologies = {} + + fetchOntologies + const ontologyNames = getOntologyShortNames() + + for (let i = 0; i < ontologyNames.length; i++) { + const ontologyName = ontologyNames[i] + const ontologyUrl = `${ONTOLOGY_BASE_URL + ontologyName}.min.tsv.gz` + const tsv = await fetchGzipped(ontologyUrl) + + const lines = tsv.split('\n') + + ontologies[ontologyName] = {} + + for (let i = 0; i < lines.length; i++) { + const line = lines[i] + const [ontologyId, label, rawSynonyms] = line.split('\t') + let names = [label] + if (rawSynonyms) { + const synonyms = rawSynonyms.split('||') + names = names.concat(synonyms) + } + ontologies[ontologyName][ontologyId] = names + } + } + + return ontologies +} + +window.fetchOntologies = fetchOntologies + +/** Get lowercase shortnames for all required ontologies */ +function getOntologyShortNames() { + let requiredOntologies = [] + + // Validate IDs for species, organ, disease, and library preparation protocol + for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) { + const column = REQUIRED_CONVENTION_COLUMNS[i] + if (!column.endsWith('__ontology_label')) {continue} + const key = column.split('__ontology_label')[0] + const ontologies = getAcceptedOntologies(key, metadataSchema) + requiredOntologies = requiredOntologies.concat(ontologies) + } + + requiredOntologies = Array.from( + new Set(requiredOntologies.map(o => o.toLowerCase())) + ) + + return requiredOntologies +} + +/** + * Get list of ontology names accepted for key from metadata schema + * + * E.g. "disease" -> ["MONDO", "PATO"] + */ +export function getAcceptedOntologies(key, metadataSchema) { + // E.g. "ontology_browser_url": "https://www.ebi.ac.uk/ols/ontologies/mondo,https://www.ebi.ac.uk/ols/ontologies/pato" + const olsUrls = metadataSchema.properties[key].ontology + + const acceptedOntologies = + olsUrls?.split(',').map(url => url.split('/').slice(-1)[0].toUpperCase()) + + if (acceptedOntologies.includes('NCBITAXON')) { + acceptedOntologies.push('NCBITaxon') + } + + return acceptedOntologies +} diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index d05fbe7bd..837d023f9 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -1,10 +1,11 @@ import {openH5File} from 'hdf5-indexed-reader' +import { getOAuthToken } from '~/lib/scp-api' import { validateUnique, validateRequiredMetadataColumns, metadataSchema, REQUIRED_CONVENTION_COLUMNS } from './shared-validation' -import { getOAuthToken } from '~/lib/scp-api' +import { getAcceptedOntologies } from './ontology-validation' /** Get ontology ID values for key in AnnData file */ async function getOntologyIds(key, hdf5File) { @@ -90,25 +91,6 @@ export async function getAnnDataHeaders(hdf5File) { return headers } -/** - * Get list of ontology names accepted for key from metadata schema - * - * E.g. "disease" -> ["MONDO", "PATO"] - */ -function getAcceptedOntologies(key, metadataSchema) { - // E.g. "ontology_browser_url": "https://www.ebi.ac.uk/ols/ontologies/mondo,https://www.ebi.ac.uk/ols/ontologies/pato" - const olsUrls = metadataSchema.properties[key].ontology - - const acceptedOntologies = - olsUrls?.split(',').map(url => url.split('/').slice(-1)[0].toUpperCase()) - - if (acceptedOntologies.includes('NCBITAXON')) { - acceptedOntologies.push('NCBITaxon') - } - - return acceptedOntologies -} - /** * Check format of ontology IDs for key, return updated issues array * From eb17bbb163730e851dc64ebc668d07f166f2e244 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Thu, 12 Sep 2024 10:07:19 -0400 Subject: [PATCH 05/52] Reuse fetched, processed ontologies from this page load --- .../lib/validation/ontology-validation.js | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js index 0711d24fe..33ff68dc2 100644 --- a/app/javascript/lib/validation/ontology-validation.js +++ b/app/javascript/lib/validation/ontology-validation.js @@ -6,7 +6,7 @@ import { const ONTOLOGY_BASE_URL = 'https://raw.githubusercontent.com/broadinstitute/scp-ingest-pipeline/' + - 'ew-minify-uberon/ingest/validation/ontologies/' + 'ew-refine-mini-ontologies/ingest/validation/ontologies/' /** Fetch .gz file, decompress it, return plaintext */ async function fetchGzipped(url) { @@ -17,11 +17,32 @@ async function fetchGzipped(url) { return plaintext } -/** Fetch minified ontologies */ +/** + * Fetch minified ontologies, transform into object of object of arrays, e.g.: + * + * { + * 'mondo': { + * 'MONDO_0008315': ['prostate cancer', 'prostate neoplasm', 'prostatic neoplasm'], + * 'MONDO_0018076': ['tuberculosis', 'TB'], + * ... + * }, + * 'ncbitaxon': { + * 'NCBITaxon_9606': ['Homo sapiens', 'human'], + * 'NCBITaxon_10090': ['Mus musculus', 'house mouse', 'mouse'], + * ... + * }, + * ... + * } + */ async function fetchOntologies() { + if (window.SCP.ontologies) { + // Reuse fetched, processed ontologies from this page load + return window.SCP.ontologies + } + + const t0 = Date.now() const ontologies = {} - fetchOntologies const ontologyNames = getOntologyShortNames() for (let i = 0; i < ontologyNames.length; i++) { @@ -45,6 +66,10 @@ async function fetchOntologies() { } } + const t1 = Date.now() + console.log(t1-t0) + + window.SCP.ontologies = ontologies return ontologies } From 62de36506bd3a74a1db1e46bd17caf714e82e35e Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Thu, 12 Sep 2024 11:20:00 -0400 Subject: [PATCH 06/52] Use service worker cache for minified ontologies --- .../lib/validation/ontology-validation.js | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js index 33ff68dc2..964f72792 100644 --- a/app/javascript/lib/validation/ontology-validation.js +++ b/app/javascript/lib/validation/ontology-validation.js @@ -8,6 +8,32 @@ const ONTOLOGY_BASE_URL = 'https://raw.githubusercontent.com/broadinstitute/scp-ingest-pipeline/' + 'ew-refine-mini-ontologies/ingest/validation/ontologies/' +/** Quickly retrieve current version cache key for ontologies */ +async function fetchOntologyCacheVersion() { + const response = await fetch(`${ONTOLOGY_BASE_URL}version.txt`) + const text = await response.text() + const version = text.trim().split('#')[0] + return version +} + +/** Get frontend SW cache object for minified ontologies */ +async function getServiceWorkerCache() { + const version = await fetchOntologyCacheVersion() + const currentOntologies = `ontologies-${version}` + + // Delete other versions of ontologies cache; there should be 1 per dodmain + const cacheNames = await caches.keys() + cacheNames.forEach(name => { + if (name.startsWith('ontologies-') && name !== currentOntologies) { + caches.delete(name) + } + }) + + const cache = await caches.open(currentOntologies) + + return cache +} + /** Fetch .gz file, decompress it, return plaintext */ async function fetchGzipped(url) { const response = await fetch(url) @@ -17,6 +43,26 @@ async function fetchGzipped(url) { return plaintext } +/** Fetch from service worker cache if available, from remote otherwise */ +export async function cacheFetch(url) { + const cache = await getServiceWorkerCache() + + const decompressedUrl = url.replace('.gz', '') + const response = await cache.match(decompressedUrl) + if (typeof response === 'undefined') { + // If cache miss, then fetch, decompress, and put response in cache + const data = await fetchGzipped(url) + const contentLength = data.length + const decompressedResponse = new Response( + new Blob([data], { type: 'text/tab-separated-values' }), + { headers: new Headers({ 'Content-Length': contentLength }) } + ) + await cache.put(decompressedUrl, decompressedResponse) + return await cache.match(decompressedUrl) + } + return await cache.match(decompressedUrl) +} + /** * Fetch minified ontologies, transform into object of object of arrays, e.g.: * @@ -48,7 +94,8 @@ async function fetchOntologies() { for (let i = 0; i < ontologyNames.length; i++) { const ontologyName = ontologyNames[i] const ontologyUrl = `${ONTOLOGY_BASE_URL + ontologyName}.min.tsv.gz` - const tsv = await fetchGzipped(ontologyUrl) + const response = await cacheFetch(ontologyUrl) + const tsv = await response.text() const lines = tsv.split('\n') From 90a6da6766c6a2e6200fae0e36f4562a119a5072 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Thu, 12 Sep 2024 12:25:00 -0400 Subject: [PATCH 07/52] Move, rename AnnData test files for clarity --- test/test_data/{ => anndata}/anndata_test.h5ad | Bin .../invalid_disease_id.h5ad} | Bin .../anndata/invalid_disease_label.h5ad | Bin 0 -> 72768 bytes .../invalid_header_no_species.h5ad} | Bin 4 files changed, 0 insertions(+), 0 deletions(-) rename test/test_data/{ => anndata}/anndata_test.h5ad (100%) rename test/test_data/{anndata_test_invalid_disease.h5ad => anndata/invalid_disease_id.h5ad} (100%) create mode 100644 test/test_data/anndata/invalid_disease_label.h5ad rename test/test_data/{anndata_test_bad_header_no_species.h5ad => anndata/invalid_header_no_species.h5ad} (100%) diff --git a/test/test_data/anndata_test.h5ad b/test/test_data/anndata/anndata_test.h5ad similarity index 100% rename from test/test_data/anndata_test.h5ad rename to test/test_data/anndata/anndata_test.h5ad diff --git a/test/test_data/anndata_test_invalid_disease.h5ad b/test/test_data/anndata/invalid_disease_id.h5ad similarity index 100% rename from test/test_data/anndata_test_invalid_disease.h5ad rename to test/test_data/anndata/invalid_disease_id.h5ad diff --git a/test/test_data/anndata/invalid_disease_label.h5ad b/test/test_data/anndata/invalid_disease_label.h5ad new file mode 100644 index 0000000000000000000000000000000000000000..54da5df2ead12d2a16b119001c7a1f6692504b6a GIT binary patch literal 72768 zcmeG_33wDm)}4eSToO26)!|VKyfi1tN*L6_hvdBngo*e6EpRF-PNz^)vH&p z>m6NP)ALaLkfAM_c4#W`oTYN-;g5`CJTH222 zfk=9c%{*B>kpSsm5{}n`c|6)>5<{B3z|OBTvePnB9r--ZEyyDskfh7C*W*Wz8Nz)C zs)}?R{RU~hZv)YD0P$w zc>4iFxJt9Qm@y;Ov7CHuiw?h&Ege0Zr=Ot&^ z3-U5%3xVM6snU+u`ToRP+>T4FuPB`qgAClBgKA4tx4KfoJ_bg6zr0Ht*e@InFSTErth3UUhU znaNps+4khZtc-l{eIn6#Q!@+m3mkbk8zhPNpm~uD28N5MEw&>*;^JbWW8xAMV-lld z;2a&qnS@01#f6uc7@Zgw2bghy7#%}+{sP@KuDIw#AQv4CSTS&R;h|e};i<%;;}T;y zF~B8y9dz^nL8TZ2c^nWWyz6y%KnGZh<2cZLVj|(ud~}T?S|tY^M?=4fE_$7G^kA4O zDc-$SE;s1#RFS}7c;9>=)Nf}U9@GH>NJQ}H!m}VA&BJ^!2@oCG^YZL-c-4(qPkI^L zJ5;qNH|ff$K^I*);opqqWS>ItK6llZ^T`LPo58qH-5TEqKF>RV7wIC3)56AXocU8VD`Li+#Qqw_ldT~o5 z5TEol6`134a&=dYq(@?VvNOY`#|33OrbEC1x2?L)q3tMaZ=?sKv0kqSW3b#v4-yR8 zjn$RYydI}3r+Gad%d4B$iHK)3uP0zV#WOTZCSo~_U(9!xI!L!*eKq=U=}7@Jl8{bY z%(HW{gDy5_Z^e3=ub0pV74tU4ujzc7g!mNy=~ZAd*0;etBO9WsEJs=f*g36gO~HDy z$A%^WC$g|Uy}0S4;~f9~oG$*N;1}ycOnI za~zr6G-o;rXK2iBJKvaucA@5=Mu?OJjWkMwDJUQS`ICB%}E4Ia#4$()mwo6g72eJMxHMSPcB zh2M1OdC?Lm-1c)ch9eDca!u&mq7Uto(7UnUP7Ut_oTDYAK=5fBZzP%3Sj|t5iH}GyL zc{gd)M1AN&PWj5Y7LL%teC0=5&!6nIa9|2 z1TB2C4(0)xww^zMYhnH-M+*n&V7{WNt?#LW19dQu6t(#LL0Jo{E4&Hc#Y!*ijg=xM z43)~>j+JaFu~Pi=vC{F`u~NXYSSk07Sn1r6Sl=e_xauo;H}qE5b#cTafuc$U;Sm6< zF*d@(0oLAYzDBM--eG+_!0Ng-!ovVop92vd46wS6fiQnppsv>-JOp5MJs;s%fNz3> z9!`hCWwXXj8hCp(@b&RkS6UD=1!~k4351gYR#y%Y=BtTXn6Crri!q=&^Bj@W;+L7b}tQ{#||I^l|1FW)w{P1-> zb?pw}I{?;>dpZ)gdftO zTp!C^G^+)-0$}# ziPu)2^!t@>sW3x$MgCY2*x!Cf^ZG~ZHrlz<6OvI^K@6M4MIq~cg{L3j$n z$Kw#;Po9}!Uv7jk8~q-lq`weII?AQG-$MjM3Pe8X_CLU=!?JK6Mgzh zL!(EI=jI@B+yr42pn~{8asXehU3b4nq<%Opw>|e(i{1A8)t?5r2RMg;#No}41 zafrI=19+5Qc^q;A=WR+Qm8tE#)y5%wYMCJ?ZZhck*1sJ^Vqu8ue-8|?^L zBry>8IBFl~*nlMIcT$X?UsrKX3DOf(tDaqUA-ZZ^eJ(M>u5%H)?7BgmqxxaMqn!NT z<*&EQ#(7&(={DAO-fH6<{+7iIIgJLo6zAyIM{3KzHqIGi68{N6*L9!ankUk4U;c6z zQceB${NV!MrKbY)oa>1MjQqFj#SY5BEdL$+H_?HD{kZk7uKf4Kzi(3Xkm!)~1iLrP zNWadF6etQ});+&wud4(E2-eMuH^xE-B(Dfun69gMWB&*gyg^2GEEnyd0b0E)cKHi~ zxPZTt;)(Jrj|)O^ejQio(aEhRR~FA|;{rPnH6t#V3Unzh(7z9vkvHh^gDRG=&i2&Y!T8!@mI;nE|lx$Llwk&R$zu-ng%kg?tL5iZOX6Qe|N-r zd%V(}tF8az{+o6q@(_gPZ87S(d)`_q^=kweeV0s)6QGg)_xj#QNL^otYx>TF50=w< z2)&1;@47TFXh(D?r{C*0GQe+)<)k;_qgz2C1RL?)#DE@I6(hc>ocN>hQa@Bqe5?DN zpf4&Xe40)|6L8}cEfTb{&7*V%D4k`^oV~nc2)3u zB0kO6g2Jheywt)>`1(=4#h#zWZ8A@RA~PIEFRa(!-ykeE8b@z~c71f^G>%|hIo*d4 zEGIpfFgSMHn9*<};8ygHkX+yMGP)0826S=zo1q@y>*FeN<@cT^k_Yim+hVAk_y-o% zMCDU6b8@)L!0lCra;NPV)Q;%kwirXc`MYVJ^zCq4ovWPq#_e^ka_Wb_W95nNZ|qP$ z`@@2R`wGr&YOV47kH&gEAA_-+=<4k!y^hhh8=|8}{Kx9biT|NkPV*upc(x_0FcUtY zmF3`7ak_fF{fWnVJs-oc+=&0-2JJ@Z%8CDxx^m*5zdPWG{P8qLmOT?VA1#Uv=M7)8 z=Sj~ug}Fw22qxe~H+R;sf}+4gW-E{sb(ianZiq ziC9i?LG;Leq5Z>xBYWfi(p!w`b^BP|?fZv?hV>8ajRxdaUB6?-g%1sdseGHBZcIo> zcz9$4wVR}`C%%yGWTSdCJX5fqrw&iv#r?_Nj^r(=qh-fJp%jrIi8a6H@EI2eM zG+xZu6s)Itt)I7iZHXu9kLD9!8{&!Lp4idjF`vwK0KsvWTb>) z?nUau`t=JAksPA=Md~BM`-g=|SG4-1dzD`jH6k=(PeUN84-ET=eFuRl2k?Q(VHWH0X2 zmDByr)0Gpx8f-zGp05JLCw*SJtekG_XI6U*tj0=|N+lA2|JKidF*jsgB%2KaXy z;LkO{pJ#x7j{*L@2Ke(0@b5Fg$6_gf9@wbBlY1nfkw9Ipv(UN(pK`SRUycDp#Te0P zZP!1zKcas4nu8hqbDLtg{+}+!fmyZt`c9%lp!h?y_MGdB1Ps^zX#$}j%v%31!GJ-* ze%$(3SNj6Ye$W1E55YP`2m*4C* z3>uSNDA&yg1N3@RY=&Lu&w4Jq?j=O2|NR0M1D;4%zW(2+r=UP9xB{x}yybB}&09Wq z%#ag*MsWFWhv1ujA6Z-ewf>uHv>E*KK3w-%uPD`#8woB>vlL?=xKUMEaeC_CH+|SNk3!{eFQ+dWy|FSv`?} zQ5>{teq{mFbsR+0X@8CWe`fXnOM9{Z+vQtaE>_OBg_fIR2=A_1d#BpBB(NDg%!kC30) zzgYC3U`KAfx?0u~(oFZuH-cD=@=e-vl!J z3;cU7>^lp>I!tpFYDJ^-S(sQ>j~XV;=jB0KEpLnF8y96;B=8y>%W!91S07v zHuGflL;^)(UA?MLnXcaTZ^_txHLxaNuU zJK^sFuBrdt|CGQdJr$7WTwf$$H$ z)8n8%ARCgG-!no`Nhq({{=Op}>AA@1F37%aNBgX+uD75W{yi9^`NzdUt4)uC`1?&W z8CQz%`A7iq;5x(o+F>&h5{CB`w6ud!3c40Q!6$7+- zS8Rq|?+;RR*>#NP&HH`kF4O&YB#g%lISl~1)+MlzYm%2zoS4l zc4$o741n+1>`x`7YP{o?+BbnBnPwnciC3a zgMuBo_3vu&-zQA>-$Ot)q~8f!grMrmf4_kAQ2bRgvI`Zu`A`M1o)wtkmtsMNF1v2J z|2}8B|Bi$4m?5WlpiBN+|9;VJKWgv4hnd9xaP57DYo18Ii(eOTP5rmuE`d*aDj?6f zzDU5xe^+d;EP=ZA-(nv@L+LnnlYr}BXtpH%x>!6`EZrs=5Z-o#%aPw=q!*087bZOY z?}t|*UIhL=hw${j*Rdd8319U36DHF8a;IQUK_1<9>}rXF=r1IiR+6%N99L$K8v-@@ z-rfkfCV3^5if-yE4k|%kAlyu zR?>Cwt4BvRc*KnC=|M9cdHESR*=i-6jQ&5I{EOBejaTV42AaS(5+M8Jzef}(SYF-l zF;BiJ5G&6@Z0yPY@)9tLwpcwKP8?sGx>9&DWBBqJe_#noGj7dPyxn zU;L%ioXoyG)EpsG1L<;!3pt0IrpEWkBM;IT(ZHL>tkb8A8A~MbX9EeKR85Z zg4+Iazklto>Z?&d?(ZuVm+d+uc#WfT?Q-0;&zkc2H`I-siF+)y#OFG3h z^A!LftwK9kTYb~U1>EVI!9V}hrT_$^ea%yIBerishBOl7etb_lHt=!BIs`_0c z(X*GgphWhG?~&$750l+$W$6r>z>`I4=PR{y|G$#(+uq)Spxe(D9ahn$vc@78=i2JM zHf{&;xcqF z5Kj4!F#j6)i))(i-e!$fb|-F7mVUj;8qmU5-tysRQK#~q@?DE2D-~lE)}ZOkS=x$Y(4zCZBQ0S+~cR9*+2#?Rfq{wsPdB)!C32StDN%x4cp4rZe3z+`Bi6@(7e`KIa4mB9}>kJ`6b zTOJ?G;@%#x``Hasc9-4!R&@Kfg5>hhU**i%!L0Yb_PcxZT*I3Ee)G;7y6%uK1b(l) zWa((z6qCVT>9bT08WE$EyfmQnp>B%2zj%)`KXH)lOlbqwgMgHYfu@D(!Eo+=JUN~oM=aUuHv3scX@t+@HO@_Z{o%4uYe(v-G^1$WKIoChD zS#B2mEL)NkVy_~_t{aERLD%iChC&>4nImbSH_BLzR)Y3swy?_<;Y=;*#;yD<6G)qwW2kv*lGU1jy?X&a>o< z`{lNS?q;`^-_4dhwzzbK9B11a*i6|u?-8Y?_X*Zg>B6Mp?d3C1oOb4XzEA!pzoE5v z+XeEJgtfK@XMQJJCr+1r*DsR~&$^#Y9I~J7y<4^(I6q!adU%T5@%?D_{+5PHSg@}= zVDCtIc*S;EI@`dSmAHiMocNji>cG9ukrg|v*S>qd^VcR~JX&ezJ&tqWOH>=C3h+EQ@_Za#;>!c4_@FL_Sy5Y_wM`I z+NVQoj=+`j-Cf$r?VF943uAUhJ@nN#*7M&_XU@EvYnPady1N z7joZaKRI6+c0nHhc2D`P#WyR>$R6LY7joOl^L|;w zP8WI0kA2+Uw&{`O?88qt$j@y0KzVWPqssg;i|q;NpUO{XhC35Jn5zVJpKsgUG=S~d z_OR{eop;MgkL*(RborpP+uC5ZB66*@ap@Q~ydcxob&}mWBJqdP^Up1j51)HP-cdY4 zDcJdnGHmYDXv^{4Y<%%HIrYv@+CWzE_)1%hBf@&($t0zd&r12GjSXz^Ti#{+It+21 zni|P$ZB{y$-SZoJ@RwHdj~iFYqjyFrjT#+vo_eE|^7Bc%{Pwyj@}1vrljRCuIe+8d z;X!yDP@a$n`QvC+!(8EMwX{xew~ z(>`=}yUrIBuZ*#_1JeK>BqvUAlc>zi@Covl+^DGL&>x4kz1 zemQ*dlh#H%`pJDK9B0!fy{>e)r-SvQpE}rXf3&-tH!j|`XlFBJ)0%+Nvu&rav!{;8 z_dFfX(r<4s7mrx2bX-~#HGa~U?3T|Jkkjg&aod*4qlVti?!2Q&zW0l_ng2`i%9MW( zvW*Vxpgex_N^8OSGWOGJDN4}_uc*$Jv+}8DyIOy09&25YHCf)6Udnd-8ez+GoNzu@ zcu;Ya=0;a+dV)2W9q1hYb)a+Epe@!9r~X6Uv2BrUQ&z9&Wqn>}53K7czrA`xROfTA z$s3+sC;J_`Megz2XR_4tZQJ^{yp?~pPqNMXX^;HGtd@%JngjB@UgK6>ka#Y}<>)G%_jco4@?yWc*j8lHh ze#`lE+Cyw;`0QQwI~vG-CC@mmDGix(fv>Ic=swE+xxM76Q;XQ#(al--2iG&7M!RGl zm2`lkHynJw8;_&-0?{Hk_$xvFdan$Q6L9ck#`D&lK$o7Y12BN{!v77^$E)5d7w&kK z;46<;Z|fyUCLm>z)n&ZOpP+ET~$G9^cM^)W(YQjeXk?3GEPgYMP zpnp!a;JD}Oo?rO?9`mYt7?#9e*i5 z-Ja`!IzM0f=A})=^uqOOvICUU^K2nrc)>p>t34p2`rk#7efu4g+nz9dX0%D+&7|<( zeFTH9b!Hzy*7f}B?Opt_K;a~TTS`TA@u$f3&%?HVU79U=9^QZY(s*P)e1E$%-e0Ab z+f^@^0yJ+Z53aQO;U4Y&FWlGsisqeWqEtrkKpqKrB;b*NM*h*nJvH$h!96fmSeis}%yXU~KhMZ)B@9Q<-Ur)cr z?hNfP@B3r_VMR}UFs;qcM_KV!>79E!ZDa2TJ<#j*SNFMKDqo&vOStdwSyt10SAP4_ zq}_6j@riD`e$PZqeD4&a{2oVxZjTI=8RbOx%*a9|`nc1DXC&t(EuPM~FZ-}8MiM_`S0rK z3#!%=y`KGhzS-=;x9rwex5j!edY8Q%HFWr<&&$}xU)QEKTX2j~{fqCEPn^-`2uo|X zdH$?}XI=TP-^_h<-}bYtuJhN%?szXUbu+tX@^7C#-SG?dueKHA=J)y1mD|>>`=aNo zU$N!;|NHOuo8D#HTYs{zWaXPIY{{rGYk!d0Z3TDyciz8`R>}8W`=+n*%?3t!!l}o) z1b%UrnVtVoe$9K!T71dE&nGe*7<(spty1b;r_=^^gr zRhJh;w5S+;;B>WkdO9h)cKLk$URC-;&wi?8hwaoE);Mxe)Y_RPZ1|LQCl7pnj6D_7 zqrd%@gKS~gh3{tiA7zARS+HgM-Dzd)$jkdvHoktg#`&JpJI^fHP;-2u|4sCAB>3JXcymisTRp}GG(ZNfK?>}{t9XPh&uHV}4V)u`Ibzeck2A22x4?Ayh9ChV{ z_ZM;au3=pMI+UNfz{J6QQ@Vc^iOs+T0beqbv8Y_z85N& z(E5pHpp_9kkVgU@33w#nk$^`69tr#vGe-s>i46EQ+4aa&o_&ELp2aRQtW;~w&O`i1p@!#WdI$RD0XX}Y213*l3e|upT~9b;IgR6eXNUnc%?Gp z^5ID@o6DuQq7LP5B#-96>ost2S+|Cx2ONFicnXfX7z53lH(@MC;2_xu>YlSl0v-u? zBw&sNs#`y4hTYOW4DJ7Q3-f1DF)p!w;{Us#!AYUTn6+J70X8HGS8k0ItpWy+a|;CH{-| zi@UM?fXHjhKaYQi|Cw-Y2LH2wuF-b|ikiU!4J1W9({}}6ZAzE0+LP6D3Fv>XLHoyp zaUPL)#H^B}%D--++RM*L|IXoeF#NE;y41hfz?K>MHwWk%>7Q>)kN#c$Y^c92jP$SM z8qzo+8-nUu|8koP%1g1oy41hBfORwU?{1)Lq<=-NJ^ELFwdwLlZlr&~IFE>;S^9Sl z{e>U)SC{(7-?y2efAfH@k^cF%_2?hX8Bbol1dQ~rqz!4Dpg~Zb`@d@i<)zqPUFskI zB)J*-cQ4R2(!crFd-SjRS?9qZ2^i^LaC_1?fk{wZ=^yoW86&r>f`8mx_eJYAfDXn%O-dhXZBp)T$2d>EA( JcK1G@`~S=?zi9vf literal 0 HcmV?d00001 diff --git a/test/test_data/anndata_test_bad_header_no_species.h5ad b/test/test_data/anndata/invalid_header_no_species.h5ad similarity index 100% rename from test/test_data/anndata_test_bad_header_no_species.h5ad rename to test/test_data/anndata/invalid_header_no_species.h5ad From 7d27f6df5ec272681a7d3e574a3be98d61f16345 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Fri, 13 Sep 2024 11:42:54 -0400 Subject: [PATCH 08/52] Enrich invalid-label H5AD test data --- .../anndata/invalid_disease_label.h5ad | Bin 72768 -> 72768 bytes .../anndata/{anndata_test.h5ad => valid.h5ad} | Bin 2 files changed, 0 insertions(+), 0 deletions(-) rename test/test_data/anndata/{anndata_test.h5ad => valid.h5ad} (100%) diff --git a/test/test_data/anndata/invalid_disease_label.h5ad b/test/test_data/anndata/invalid_disease_label.h5ad index 54da5df2ead12d2a16b119001c7a1f6692504b6a..23cbfa526cb97e53f4013e1d96a6e69ce145af30 100644 GIT binary patch delta 1303 zcmYk4e{54#6vy9b%k~BxtDDj`)4ZDCAM|aqm4WP6T#4C6a4@v8wR`Pe$Lnyeg(;|s z83~h^z#k;&SJ(^@mZ%uboWqA~5pnZZghV2mh|y^5A4W6NKSp9mjFNcneXH3??z!iC z&*$8C?tOP*&a*J*nRzL^o?MDoQIv@oJGL4gE*?d#bL__P@B_37?}vk>O|01v%~X{} zi*c-qI&mi)+`Nld*6=7+r#UWNp-qlgtA(@b4J&w1Uq!?zKRvO z9LFnl7PGpB-*q2u^4LPb=t10C>&MBsia=$6>ZDp=HR1J9L@2(RPvaUjNP4r(cPAYBA*qfee36~(~)DUFg`piv5G_Wc-3`Ge*% zFP@07_GnlArmayWjSI{S$Zq$_^I9nD*C>Mx><6)({XPt_e-g*p58)m5PvIK-VJx!G zqRiANhYjra%UhZ>NJ%X-_mi^;2!zH?Gqusvp0mB^vcKLLC zJZhrhl7Sl^TC@YT!#$X~Qf|L?>6*tCaw@v)FE4Y&PFWFlDq7|(TLqP=*prTLZSi0h~SQ@H!8 zftjx?8j)K)ZRf6@bJH_+`Pbi+(Lwz3{b70meK*tNQ>A1e?+(~?KaAFmXH_!BRh6vq z!Wud>n^(!}9?w>AXnI9hs1&Oea&?_ptrFpH)nvSE24?f?$g_%nIydh*%D?hI`^tCw delta 1221 zcmZ9KUrbwd6vunoA{SOh3k2?TOZze<+~V58%r-zF(ZLi1OdXWo!Yy=kNO48c2h%0K zEHTC=18GaG8=)C{uixScn` zJsY)4Lwpn$#DnY?VO)Cz-)U9h(t^rC_6y^_|Hb_rKZMhoAAf1v@o3PCYTl#4svYG8 zU542&j7RWTumNubPff1`OmF>$Mm~gNm1TkCtEo}eT1&&~RzC%?xwVGs;j8e|PGo)jh& zS8zD4Qmg3NBsW*CK&?JVw=V) z+r!voI|iTOrD)#4n?KckW^}_UVm^SYi_NT z8XK81^`wVN?AYO`H9r{slcrZ{5gL!0_fzlfLN@m-y@i{(Bx$H0@8e-Ob~26NNga8Y zY8t0*xSg|gXdW|YmwEK`U7`@Wauf8SAQ3?taQsXfjc0VT=j_`q=Ve>|gxT`ZT|(#C zGQ_z1q5yjZ*oUr<)A-|@ZhFr%uxtY@=HH(&;5lyt>jJb2(1x;wG(Ml#&6NcPez$?A z%`Yx9u)JUc8v?Wo(19PWq%n0xH&0ys(e30xw_fm^`Rz>x-nlA(vsA-k&fu}J-rbG0 zTWREOr7-uUj%MB~^{Sa&+U}w#j(^?GQ_~Wea6YWzLZly^QH2ci+&8}wB{ARJ^lnqc zdE>uQMp{l_{&pJC+q#+k{(To^&9db?B{Yd&*N*a(#n+ScilxIZYaZNldCkyIxeylD z^%+H>)Xarq(x(meT3#WKYbK-6)HTCnx%aoQn=hAaxs|PyY=xDrCU? Date: Fri, 13 Sep 2024 11:43:55 -0400 Subject: [PATCH 09/52] Consolidate URLs, clarify names, stub new tests --- test/js/lib/validate-anndata.test.js | 47 ++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/test/js/lib/validate-anndata.test.js b/test/js/lib/validate-anndata.test.js index 26c9044dc..be43f8809 100644 --- a/test/js/lib/validate-anndata.test.js +++ b/test/js/lib/validate-anndata.test.js @@ -2,10 +2,15 @@ import { getHdf5File, parseAnnDataFile, getAnnDataHeaders, checkOntologyIdFormat } from 'lib/validation/validate-anndata' +const BASE_URL = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data' + +// TODO: Uncomment this after PR merge +// const BASE_URL = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata' + describe('Client-side file validation for AnnData', () => { it('Parses AnnData headers', async () => { - // eslint-disable-next-line max-len - const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test.h5ad' + const url = `${BASE_URL}/anndata_test.h5ad` + // const url = `${BASE_URL}/valid.h5ad` // Uncomment after PR merge const expectedHeaders = [ '_index', 'biosample_id', @@ -20,22 +25,22 @@ describe('Client-side file validation for AnnData', () => { 'species', 'species__ontology_label' ] - const remoteProps = {url} + const remoteProps = { url } const hdf5File = await getHdf5File(url, remoteProps) const headers = await getAnnDataHeaders(hdf5File) expect(headers).toEqual(expectedHeaders) }) it('Reports AnnData with valid headers as valid', async () => { - // eslint-disable-next-line max-len - const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test.h5ad' + const url = `${BASE_URL}/anndata_test.h5ad` + // const url = `${BASE_URL}/valid.h5ad` // Uncomment after PR merge const parseResults = await parseAnnDataFile(url) expect(parseResults.issues).toHaveLength(0) }) it('Reports AnnData with invalid headers as invalid', async () => { - // eslint-disable-next-line max-len - const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test_bad_header_no_species.h5ad' + const url = `${BASE_URL}/anndata_test_bad_header_no_species.h5ad` + // const url = `${BASE_URL}/invalid_header_no_species.h5ad` // Uncomment after PR merge const parseResults = await parseAnnDataFile(url) expect(parseResults.issues).toHaveLength(1) @@ -58,8 +63,8 @@ describe('Client-side file validation for AnnData', () => { }) it('Parses AnnData rows and reports invalid ontology IDs', async () => { - // eslint-disable-next-line max-len - const url = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata_test_invalid_disease.h5ad' + const url = `${BASE_URL}/anndata_test_invalid_disease.h5ad` + // const url = `${BASE_URL}/invalid_disease_id.h5ad` // Uncomment after PR merge const parseResults = await parseAnnDataFile(url) expect(parseResults.issues).toHaveLength(1) @@ -71,4 +76,28 @@ describe('Client-side file validation for AnnData', () => { ] expect(parseResults.issues[0]).toEqual(expectedIssue) }) + + // it('Reports valid ontology labels as valid', async () => { + // const issues = await checkOntologyLabels( + // // Underscore or colon can delimit shortname and number; + // // disease can use MONDO or PATO IDs. + // 'disease', ['MONDO_0000001', 'MONDO:0000001', 'PATO:0000001'] + // ) + // expect(issues).toHaveLength(0) + // }) + + // TODO: Uncomment after PR merge + // it('Parses AnnData rows and reports invalid ontology labels', async () => { + // const url = `${BASE_URL}/invalid_disease_id.h5ad` + // const parseResults = await parseAnnDataFile(url) + + // expect(parseResults.issues).toHaveLength(1) + + // const expectedIssue = [ + // 'error', + // 'ontology:label-lookup-error', + // 'Ontology ID "FOO_0000042" is not among accepted ontologies (MONDO, PATO) for key "disease"' + // ] + // expect(parseResults.issues[0]).toEqual(expectedIssue) + // }) }) From e2f15a6f0cb682eb5464172f114eb2a3d6f79ef0 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Fri, 13 Sep 2024 11:47:36 -0400 Subject: [PATCH 10/52] Scaffold getOntologyLabels to collate AnnData IDs and labels --- .../lib/validation/ontology-validation.js | 8 +- .../lib/validation/validate-anndata.js | 82 ++++++++++++++++++- 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js index 964f72792..837c8bcaf 100644 --- a/app/javascript/lib/validation/ontology-validation.js +++ b/app/javascript/lib/validation/ontology-validation.js @@ -6,7 +6,7 @@ import { const ONTOLOGY_BASE_URL = 'https://raw.githubusercontent.com/broadinstitute/scp-ingest-pipeline/' + - 'ew-refine-mini-ontologies/ingest/validation/ontologies/' + 'development/ingest/validation/ontologies/' /** Quickly retrieve current version cache key for ontologies */ async function fetchOntologyCacheVersion() { @@ -80,13 +80,12 @@ export async function cacheFetch(url) { * ... * } */ -async function fetchOntologies() { +export async function fetchOntologies() { if (window.SCP.ontologies) { // Reuse fetched, processed ontologies from this page load return window.SCP.ontologies } - const t0 = Date.now() const ontologies = {} const ontologyNames = getOntologyShortNames() @@ -113,9 +112,6 @@ async function fetchOntologies() { } } - const t1 = Date.now() - console.log(t1-t0) - window.SCP.ontologies = ontologies return ontologies } diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 837d023f9..65b87e08e 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -5,7 +5,7 @@ import { validateUnique, validateRequiredMetadataColumns, metadataSchema, REQUIRED_CONVENTION_COLUMNS } from './shared-validation' -import { getAcceptedOntologies } from './ontology-validation' +import { getAcceptedOntologies, fetchOntologies } from './ontology-validation' /** Get ontology ID values for key in AnnData file */ async function getOntologyIds(key, hdf5File) { @@ -38,6 +38,7 @@ async function getOntologyIds(key, hdf5File) { return ontologyIds } +window.getOntologyIds = getOntologyIds /** Get annotation headers for a key (e.g. obs) from an HDF5 file */ async function getAnnotationHeaders(key, hdf5File) { @@ -120,6 +121,81 @@ export function checkOntologyIdFormat(key, ontologyIds) { return issues } +/** Validate author's annotation labels match those in ontologies */ +async function checkOntologyLabels(hdf5File) { + +} + +/** Get ontology ID values for key in AnnData file */ +async function getOntologyLabels(key, hdf5File) { + let ontologyIds = [] + + const obs = await hdf5File.get('obs') + const obsValues = await Promise.all(obs.values) + + // Old versions of the AnnData spec used __categories as an obs. + // However, in new versions (since before 2023-01-23) of AnnData spec, + // categorical arrays are encoded as self-contained groups containing their + // own `categories` and `codes`. + // See e.g. https://github.com/scverse/anndata/issues/879 + const internalCategories = obsValues.find(o => o.name.endsWith('__categories')) + + let resolvedCategories = obsValues + if (internalCategories) { + resolvedCategories = await Promise.all(internalCategories.values) + } + const group = resolvedCategories.find(o => o.name.endsWith(key)) + if (group) { + if (internalCategories) { + ontologyIds = await group.value + } else { + // AnnData organizes each "obs" annotation (e.g. disease__ontology_label, + // sex) into a container with a `categories` frame and a `code` frame. + // + // - categories: external values, non-redundant array. E.g.: + // ["tuberculosis", "TB", "foo"] or ["female"] + // + // - codes: internal values, redundant array of integers that specify + // the index (position) of each category value in the array of obs + // (cells) + // + // This organization greatly decreases filesize, but requires more code + // to map paired obs annotations like `disease` (ontology IDs) to + // `disease__ontology_label` (ontology names) than needed for e.g. TSVs. + const categories = await group.values[0] + ontologyIds = await categories.value + + const codes = await group.values[1] + const indexes = await codes.value + } + } + + return ontologyIds +} + +/** Validate ontology labels for required metadata columns in AnnData file */ +async function validateOntologyLabels(hdf5File) { + window.hdf5File = hdf5File + let issues = [] + + return issues + + // Validate IDs for species, organ, disease, and library preparation protocol + for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) { + const column = REQUIRED_CONVENTION_COLUMNS[i] + if (!column.endsWith('__ontology_label')) {continue} + const key = column.split('__ontology_label')[0] + const ontologyIds = await getOntologyIds(key, hdf5File) + + issues = issues.concat( + checkOntologyIdFormat(key, ontologyIds) + ) + } + + return issues +} + + /** Validate ontology IDs for required metadata columns in AnnData file */ async function validateOntologyIdFormat(hdf5File) { let issues = [] @@ -154,8 +230,12 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) { const requiredMetadataIssues = validateRequiredMetadataColumns([headers], true) let ontologyIdFormatIssues = [] + let ontologyLabelIssues = [] if (requiredMetadataIssues.length === 0) { ontologyIdFormatIssues = await validateOntologyIdFormat(hdf5File) + if (ontologyIdFormatIssues.length === 0) { + ontologyLabelIssues = await validateOntologyLabels(hdf5File) + } } issues = issues.concat( From 0dbed6d21cddc36641b84b6aec2a52cf735be032 Mon Sep 17 00:00:00 2001 From: bistline Date: Mon, 16 Sep 2024 11:30:36 -0400 Subject: [PATCH 11/52] Handle missing cluster-based annotation in plot cache --- app/javascript/components/explore/plot-data-cache.js | 3 ++- db/seed/synthetic_studies/chicken_raw_counts/study_info.json | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/app/javascript/components/explore/plot-data-cache.js b/app/javascript/components/explore/plot-data-cache.js index 78ac277ea..abdd92178 100644 --- a/app/javascript/components/explore/plot-data-cache.js +++ b/app/javascript/components/explore/plot-data-cache.js @@ -289,7 +289,8 @@ export function createCache() { Fields.cellsAndCoords.merge(cacheEntry, scatter) // only merge in annotation values if the annotation matches (or the default was requested, so // we can then assume the response matches) - if (!requestedAnnotation.name || scatter.annotParams.name === requestedAnnotation.name) { + // annotParams may be undefined in spatial UX if a cluster-based annotation does not exist for the plot + if (!requestedAnnotation.name || scatter.annotParams?.name === requestedAnnotation.name) { Fields.annotation.merge(cacheEntry, scatter) } if (scatter.genes.length && scatter.genes.join('') === requestedGenes.join('')) { diff --git a/db/seed/synthetic_studies/chicken_raw_counts/study_info.json b/db/seed/synthetic_studies/chicken_raw_counts/study_info.json index 4d51a06b9..228d16593 100644 --- a/db/seed/synthetic_studies/chicken_raw_counts/study_info.json +++ b/db/seed/synthetic_studies/chicken_raw_counts/study_info.json @@ -39,6 +39,11 @@ "is_spatial": true, "spatial_cluster_associations": ["X cluster"] }, + { + "type": "Cluster", + "filename": "cluster_odd_labels.txt", + "name": "cluster_odd_labels" + }, { "type": "Cluster", "filename": "cluster_square.txt", From f7b86fbc073cad34cefca0fb18a0fc34c548d285 Mon Sep 17 00:00:00 2001 From: bistline Date: Mon, 16 Sep 2024 13:40:22 -0400 Subject: [PATCH 12/52] Fixing test instability issues, NeMO API changes --- app/models/import_service_config/nemo.rb | 2 +- test/factories/study_file.rb | 3 ++- .../import_service_config/nemo_test.rb | 4 ++-- test/models/ingest_job_test.rb | 23 +++++++++++++------ 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/app/models/import_service_config/nemo.rb b/app/models/import_service_config/nemo.rb index c6f5f1f82..97d4e7a4b 100644 --- a/app/models/import_service_config/nemo.rb +++ b/app/models/import_service_config/nemo.rb @@ -72,7 +72,7 @@ def study_file_default_settings # retrieve common species names from associated collection def taxon_names - load_study&.[]('taxonomies') || [] + load_study&.[]('taxa')&.map { |t| t['name']} || [] end # map attribute names SCP Study attributes onto NeMO attribute names diff --git a/test/factories/study_file.rb b/test/factories/study_file.rb index f6559f8db..a6ca5f08b 100644 --- a/test/factories/study_file.rb +++ b/test/factories/study_file.rb @@ -173,6 +173,7 @@ # phex: [['cellA', 0.6],['cellB', 6.1], ['cellC', 4.5]] # } expression_input { {} } + has_raw_counts { true } end after(:create) do |file, evaluator| file.build_ann_data_file_info @@ -195,7 +196,7 @@ file.build_expression_file_info(library_preparation_protocol: "10x 5' v3") file.expression_file_info.save file.ann_data_file_info.has_expression = true - file.ann_data_file_info.has_raw_counts = true + file.ann_data_file_info.has_raw_counts = evaluator.has_raw_counts %w[raw processed].each do |matrix_type| FactoryBot.create(:data_array, array_type: 'cells', diff --git a/test/integration/external/import_service_config/nemo_test.rb b/test/integration/external/import_service_config/nemo_test.rb index 26de92b66..13abe6d23 100644 --- a/test/integration/external/import_service_config/nemo_test.rb +++ b/test/integration/external/import_service_config/nemo_test.rb @@ -96,8 +96,8 @@ class NemoTest < ActiveSupport::TestCase test 'should load study analog' do study = @configuration.load_study assert_equal '"Human variation study (10x), GRU"', study['name'] - assert_equal "10x chromium 3' v3 sequencing", study['technique'] - assert_equal %w[human], study['taxonomies'] + assert_equal ["10x chromium 3' v3 sequencing"], study['techniques'] + assert_equal [{"name"=>"human", "cv_term_id"=>"NCBI:txid9606"}], study['taxa'] end # TODO: SCP-5565 Check with NeMO re API, update and re-enable this test diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb index f8e7c1368..ca9dec9fd 100644 --- a/test/models/ingest_job_test.rb +++ b/test/models/ingest_job_test.rb @@ -187,13 +187,22 @@ class IngestJobTest < ActiveSupport::TestCase test 'should identify AnnData parses with extraction in mixpanel' do # parsed AnnData - ann_data_file = FactoryBot.create(:ann_data_file, name: 'test.h5ad', study: @basic_study) - ann_data_file.ann_data_file_info.reference_file = false - ann_data_file.ann_data_file_info.data_fragments = [ - { _id: BSON::ObjectId.new.to_s, data_type: :cluster, obsm_key_name: 'X_umap', name: 'UMAP' } - ] - ann_data_file.upload_file_size = 1.megabyte - ann_data_file.save + ann_data_file = FactoryBot.create(:ann_data_file, + name: 'test.h5ad', + study: @basic_study, + upload_file_size: 1.megabyte, + cell_input: %w[A B C D], + has_raw_counts: false, + reference_file: false, + annotation_input: [ + { name: 'disease', type: 'group', values: %w[cancer cancer normal normal] } + ], + coordinate_input: [ + { X_umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } } + ], + expression_input: { + 'phex' => [['A', 0.3], ['B', 1.0], ['C', 0.5], ['D', 0.1]] + }) params_object = AnnDataIngestParameters.new( anndata_file: ann_data_file.gs_url, obsm_keys: ann_data_file.ann_data_file_info.obsm_key_names, file_size: ann_data_file.upload_file_size From 8b26d845f9e614ddc4857f64e3997f08115ea146 Mon Sep 17 00:00:00 2001 From: bistline Date: Mon, 16 Sep 2024 15:10:43 -0400 Subject: [PATCH 13/52] Further addressing test instability, OOO issues --- test/models/ingest_job_test.rb | 111 +++++++++++++++++++++------------ 1 file changed, 71 insertions(+), 40 deletions(-) diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb index ca9dec9fd..f663e3853 100644 --- a/test/models/ingest_job_test.rb +++ b/test/models/ingest_job_test.rb @@ -9,15 +9,14 @@ class IngestJobTest < ActiveSupport::TestCase user: @user, test_array: @@studies_to_clean) - @basic_study_exp_file = FactoryBot.create(:study_file, + @basic_study_exp_file = FactoryBot.create(:expression_file, name: 'dense.txt', file_type: 'Expression Matrix', - study: @basic_study) + study: @basic_study, + expression_input: { + 'PTEN' => [['A', 0],['B', 3],['C', 1.5]] + }) - @pten_gene = FactoryBot.create(:gene_with_expression, - name: 'PTEN', - study_file: @basic_study_exp_file, - expression_input: [['A', 0],['B', 3],['C', 1.5]]) @basic_study_exp_file.build_expression_file_info(is_raw_counts: false, library_preparation_protocol: 'MARS-seq', modality: 'Transcriptomic: unbiased', @@ -26,11 +25,6 @@ class IngestJobTest < ActiveSupport::TestCase @basic_study_exp_file.upload_file_size = 1024 @basic_study_exp_file.save! - # insert "all cells" array for this expression file - DataArray.create!(study_id: @basic_study.id, study_file_id: @basic_study_exp_file.id, values: %w(A B C), - name: "#{@basic_study_exp_file.name} Cells", array_type: 'cells', linear_data_type: 'Study', - linear_data_id: @basic_study.id, array_index: 0, cluster_name: @basic_study_exp_file.name) - @other_matrix = FactoryBot.create(:study_file, name: 'dense_2.txt', file_type: 'Expression Matrix', @@ -39,6 +33,13 @@ class IngestJobTest < ActiveSupport::TestCase modality: 'Transcriptomic: unbiased', biosample_input_type: 'Whole cell') @other_matrix.upload_file_size = 2048 @other_matrix.save! + @basic_study.reload + end + + teardown do + @basic_study.default_options[:annotation] = nil + @basic_study.save + @basic_study_exp_file.update(parse_status: 'parsed') end test 'should hold ingest until other matrix validates' do @@ -89,7 +90,25 @@ class IngestJobTest < ActiveSupport::TestCase test 'should gather job properties to report to mixpanel' do # positive test - job = IngestJob.new(study: @basic_study, study_file: @basic_study_exp_file, user: @user, action: :ingest_expression) + study = FactoryBot.create(:detached_study, + name_prefix: 'IngestJob Mixpanel Test', + user: @user, + test_array: @@studies_to_clean) + + study_file = FactoryBot.create(:expression_file, + name: 'matrix.txt', + study:, + expression_input: { + 'Phex' => [['A', 1],['B', 2],['C', 0.5]] + }) + + study_file.build_expression_file_info(is_raw_counts: false, + library_preparation_protocol: "10x 5' v3", + modality: 'Transcriptomic: unbiased', + biosample_input_type: 'Whole cell') + study_file.upload_file_size = 1.megabyte + study_file.save! + job = IngestJob.new(study:, study_file:, user: @user, action: :ingest_expression) mock = Minitest::Mock.new now = DateTime.now.in_time_zone mock_metadata = { @@ -112,20 +131,20 @@ class IngestJobTest < ActiveSupport::TestCase mock.expect :error, nil mock.expect :done?, true - cells = @basic_study.expression_matrix_cells(@basic_study_exp_file) - num_cells = cells.present? ? cells.count : 0 + cells = study.expression_matrix_cells(study_file) + num_cells = cells.count ApplicationController.life_sciences_api_client.stub :get_pipeline, mock do expected_outputs = { perfTime: 60000, - fileType: @basic_study_exp_file.file_type, - fileSize: @basic_study_exp_file.upload_file_size, - fileName: @basic_study_exp_file.name, + fileType: study_file.file_type, + fileSize: study_file.upload_file_size, + fileName: study_file.name, trigger: 'upload', action: :ingest_expression, - studyAccession: @basic_study.accession, + studyAccession: study.accession, jobStatus: 'success', - numGenes: @basic_study.genes.count, + numGenes: study.genes.count, is_raw_counts: false, numCells: num_cells, machineType: 'n2d-highmem-4', @@ -139,7 +158,15 @@ class IngestJobTest < ActiveSupport::TestCase end # negative test - job = IngestJob.new(study: @basic_study, study_file: @other_matrix, user: @user, action: :ingest_expression) + other_file = FactoryBot.create(:study_file, + name: 'matrix_2.txt', + file_type: 'Expression Matrix', + study:) + other_file.build_expression_file_info(is_raw_counts: false, library_preparation_protocol: 'MARS-seq', + modality: 'Transcriptomic: unbiased', biosample_input_type: 'Whole cell') + other_file.upload_file_size = 2048 + other_file.save! + job = IngestJob.new(study:, study_file: other_file, user: @user, action: :ingest_expression) mock = Minitest::Mock.new now = DateTime.now.in_time_zone mock_metadata = { @@ -164,12 +191,12 @@ class IngestJobTest < ActiveSupport::TestCase ApplicationController.life_sciences_api_client.stub :get_pipeline, mock do expected_outputs = { perfTime: 120000, - fileType: @other_matrix.file_type, - fileSize: @other_matrix.upload_file_size, - fileName: @other_matrix.name, + fileType: other_file.file_type, + fileSize: other_file.upload_file_size, + fileName: other_file.name, trigger: "upload", action: :ingest_expression, - studyAccession: @basic_study.accession, + studyAccession: study.accession, jobStatus: 'failed', numCells: 0, is_raw_counts: false, @@ -493,40 +520,44 @@ class IngestJobTest < ActiveSupport::TestCase end test 'should set default annotation even if not visualizable' do - assert @basic_study.default_annotation.nil? + study = FactoryBot.create(:detached_study, + name_prefix: 'Default Annotation Test', + user: @user, + test_array: @@studies_to_clean) + assert study.default_annotation.nil? # test metadata file with a single annotation with only one unique value metadata_file = FactoryBot.create(:metadata_file, name: 'metadata.txt', - study: @basic_study, + study:, cell_input: %w[A B C], annotation_input: [ { name: 'species', type: 'group', values: %w[dog dog dog] } ]) - job = IngestJob.new(study: @basic_study, study_file: metadata_file, user: @user, action: :ingest_cell_metadata) + job = IngestJob.new(study:, study_file: metadata_file, user: @user, action: :ingest_cell_metadata) job.set_study_default_options - @basic_study.reload - assert_equal 'species--group--invalid', @basic_study.default_annotation + study.reload + assert_equal 'species--group--invalid', study.default_annotation # reset default annotation, then test cluster file with a single annotation with only one unique value - @basic_study.cell_metadata.destroy_all - @basic_study.default_options = {} - @basic_study.save - assert @basic_study.default_annotation.nil? - assert @basic_study.default_cluster.nil? + study.cell_metadata.destroy_all + study.default_options = {} + study.save + assert study.default_annotation.nil? + assert study.default_cluster.nil? cluster_file = FactoryBot.create(:cluster_file, - name: 'cluster.txt', study: @basic_study, + name: 'cluster.txt', study:, cell_input: { x: [1, 4, 6], y: [7, 5, 3], cells: %w[A B C] }, annotation_input: [{ name: 'foo', type: 'group', values: %w[bar bar bar] }]) - job = IngestJob.new(study: @basic_study, study_file: cluster_file, user: @user, action: :ingest_cluster) + job = IngestJob.new(study:, study_file: cluster_file, user: @user, action: :ingest_cluster) job.set_study_default_options - @basic_study.reload - cluster = @basic_study.cluster_groups.first - assert_equal cluster, @basic_study.default_cluster - assert_equal 'foo--group--invalid', @basic_study.default_annotation + study.reload + cluster = study.cluster_groups.first + assert_equal cluster, study.default_cluster + assert_equal 'foo--group--invalid', study.default_annotation end test 'should launch DE jobs if study is eligible' do From ebd05061d87687b8dc5c6ce1aab5ea9bd251e6a4 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Mon, 16 Sep 2024 17:45:16 -0400 Subject: [PATCH 14/52] Add more id-label collation --- .../lib/validation/validate-anndata.js | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 65b87e08e..02286cd12 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -122,8 +122,45 @@ export function checkOntologyIdFormat(key, ontologyIds) { } /** Validate author's annotation labels match those in ontologies */ -async function checkOntologyLabels(hdf5File) { +async function checkOntologyLabels(ids, idIndexes, labels, labelIndexes) { + let issues = [] + + const ontologies = await fetchOntologies() + const labelIdPairs = [] + for (let i = 0; i < idIndexes.length; i++) { + const id = ids[idIndexes[i]] + // console.log('id', id) + for (let j = 0; j < labelIndexes.length; j++) { + const label = labels[labelIndexes[j]] + // console.log('label', label) + labelIdPairs.push(`${id} || ${label}`) + } + } + const rawUniques = Array.from(new Set(labelIdPairs)) + + rawUniques.map(r => { + const [id, label] = r.split(' || ') + const ontologyShortNameLc = id.split(/[_:]/)[0].toLowerCase() + const ontology = ontologies[ontologyShortNameLc] + if (!(id in ontology)) { + const msg = `Invalid ontology ID: ${id}` + issues.push([ + 'error', 'ontology:label-lookup-error', msg, + { subtype: 'ontology:invalid-id' } + ]) + } else { + const validLabels = ontology[id] + if (!(validLabels.includes(label))) { + const validLabelsClause = `Valid labels: ${validLabels.join(', ')}` + const msg = `Invalid ontology label "${id}". ${validLabelsClause}` + issues.push([ + 'error', 'ontology:label-lookup-error', msg, + { subtype: 'ontology:invalid-label' } + ]) + } + } +}) } /** Get ontology ID values for key in AnnData file */ @@ -167,6 +204,7 @@ async function getOntologyLabels(key, hdf5File) { const codes = await group.values[1] const indexes = await codes.value + } } From 90d4c34b441ae31ae3b3daa999daab0d913a954d Mon Sep 17 00:00:00 2001 From: bistline Date: Tue, 17 Sep 2024 09:43:48 -0400 Subject: [PATCH 15/52] Retain plot title when missing annotation --- .../components/visualization/ScatterPlot.jsx | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/app/javascript/components/visualization/ScatterPlot.jsx b/app/javascript/components/visualization/ScatterPlot.jsx index 0062c3949..4cca8f749 100644 --- a/app/javascript/components/visualization/ScatterPlot.jsx +++ b/app/javascript/components/visualization/ScatterPlot.jsx @@ -598,17 +598,15 @@ function RawScatterPlot({ return (
{ ErrorComponent } + { hasMissingAnnot &&
"{cluster}" does not have the requested annotation "{loadedAnnotation}"
} - { !hasMissingAnnot && - - }
Date: Tue, 17 Sep 2024 10:08:16 -0400 Subject: [PATCH 16/52] Finish integrating ontology label validation --- .../lib/validation/validate-anndata.js | 53 +++++++++++-------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 02286cd12..25e7d8657 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -122,18 +122,16 @@ export function checkOntologyIdFormat(key, ontologyIds) { } /** Validate author's annotation labels match those in ontologies */ -async function checkOntologyLabels(ids, idIndexes, labels, labelIndexes) { - let issues = [] +async function checkOntologyLabels(key, ids, idIndexes, labels, labelIndexes) { + const issues = [] const ontologies = await fetchOntologies() const labelIdPairs = [] for (let i = 0; i < idIndexes.length; i++) { const id = ids[idIndexes[i]] - // console.log('id', id) for (let j = 0; j < labelIndexes.length; j++) { const label = labels[labelIndexes[j]] - // console.log('label', label) labelIdPairs.push(`${id} || ${label}`) } } @@ -152,19 +150,22 @@ async function checkOntologyLabels(ids, idIndexes, labels, labelIndexes) { } else { const validLabels = ontology[id] if (!(validLabels.includes(label))) { - const validLabelsClause = `Valid labels: ${validLabels.join(', ')}` - const msg = `Invalid ontology label "${id}". ${validLabelsClause}` + const prettyLabels = validLabels.join(', ') + const validLabelsClause = `Valid labels for ${id}: ${prettyLabels}` + const msg = `Invalid ${key} label "${label}". ${validLabelsClause}` issues.push([ 'error', 'ontology:label-lookup-error', msg, { subtype: 'ontology:invalid-label' } ]) } } -}) + }) + + return issues } /** Get ontology ID values for key in AnnData file */ -async function getOntologyLabels(key, hdf5File) { +async function getOntologyIdsAndLabels(requiredName, hdf5File) { let ontologyIds = [] const obs = await hdf5File.get('obs') @@ -181,10 +182,16 @@ async function getOntologyLabels(key, hdf5File) { if (internalCategories) { resolvedCategories = await Promise.all(internalCategories.values) } - const group = resolvedCategories.find(o => o.name.endsWith(key)) - if (group) { + + const idKey = requiredName + const labelKey = `${requiredName}__ontology_label` + + const idGroup = obsValues.find(o => o.name.endsWith(idKey)) + const labelGroup = obsValues.find(o => o.name.endsWith(labelKey)) + + if (idGroup) { if (internalCategories) { - ontologyIds = await group.value + ontologyIds = await idGroup.value } else { // AnnData organizes each "obs" annotation (e.g. disease__ontology_label, // sex) into a container with a `categories` frame and a `code` frame. @@ -199,16 +206,19 @@ async function getOntologyLabels(key, hdf5File) { // This organization greatly decreases filesize, but requires more code // to map paired obs annotations like `disease` (ontology IDs) to // `disease__ontology_label` (ontology names) than needed for e.g. TSVs. - const categories = await group.values[0] - ontologyIds = await categories.value + const idCategories = await idGroup.values[0] + const idCodes = await idGroup.values[1] + const ids = await idCategories.value + const idIndexes = await idCodes.value - const codes = await group.values[1] - const indexes = await codes.value + const labelCategories = await labelGroup.values[0] + const labelCodes = await labelGroup.values[1] + const labels = await labelCategories.value + const labelIndexes = await labelCodes.value + return [ids, idIndexes, labels, labelIndexes] } } - - return ontologyIds } /** Validate ontology labels for required metadata columns in AnnData file */ @@ -216,17 +226,15 @@ async function validateOntologyLabels(hdf5File) { window.hdf5File = hdf5File let issues = [] - return issues - // Validate IDs for species, organ, disease, and library preparation protocol for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) { const column = REQUIRED_CONVENTION_COLUMNS[i] if (!column.endsWith('__ontology_label')) {continue} const key = column.split('__ontology_label')[0] - const ontologyIds = await getOntologyIds(key, hdf5File) + const [ids, idIndexes, labels, labelIndexes] = await getOntologyIdsAndLabels(key, hdf5File) issues = issues.concat( - checkOntologyIdFormat(key, ontologyIds) + await checkOntologyLabels(key, ids, idIndexes, labels, labelIndexes) ) } @@ -279,7 +287,8 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) { issues = issues.concat( validateUnique(headers), requiredMetadataIssues, - ontologyIdFormatIssues + ontologyIdFormatIssues, + ontologyLabelIssues ) return { issues } From e67c580d1e7a5f58e39d5dc1f3f7bf0b9c5f92c2 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Tue, 17 Sep 2024 11:15:45 -0400 Subject: [PATCH 17/52] Fetch ontologies once, not n times --- .../lib/validation/ontology-validation.js | 2 ++ app/javascript/lib/validation/validate-anndata.js | 13 ++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js index 837c8bcaf..d0c837976 100644 --- a/app/javascript/lib/validation/ontology-validation.js +++ b/app/javascript/lib/validation/ontology-validation.js @@ -10,9 +10,11 @@ const ONTOLOGY_BASE_URL = /** Quickly retrieve current version cache key for ontologies */ async function fetchOntologyCacheVersion() { + if (window.SCP.ontologiesVersion) { return window.SCP.ontologiesVersion } const response = await fetch(`${ONTOLOGY_BASE_URL}version.txt`) const text = await response.text() const version = text.trim().split('#')[0] + window.SCP.ontologiesVersion = version return version } diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 25e7d8657..e44d90ad7 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -122,10 +122,11 @@ export function checkOntologyIdFormat(key, ontologyIds) { } /** Validate author's annotation labels match those in ontologies */ -async function checkOntologyLabels(key, ids, idIndexes, labels, labelIndexes) { - const issues = [] +async function checkOntologyLabels(key, ontologies, groups) { - const ontologies = await fetchOntologies() + const [ids, idIndexes, labels, labelIndexes] = groups + + const issues = [] const labelIdPairs = [] for (let i = 0; i < idIndexes.length; i++) { @@ -226,15 +227,17 @@ async function validateOntologyLabels(hdf5File) { window.hdf5File = hdf5File let issues = [] + const ontologies = await fetchOntologies() + // Validate IDs for species, organ, disease, and library preparation protocol for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) { const column = REQUIRED_CONVENTION_COLUMNS[i] if (!column.endsWith('__ontology_label')) {continue} const key = column.split('__ontology_label')[0] - const [ids, idIndexes, labels, labelIndexes] = await getOntologyIdsAndLabels(key, hdf5File) + const groups = await getOntologyIdsAndLabels(key, hdf5File) issues = issues.concat( - await checkOntologyLabels(key, ids, idIndexes, labels, labelIndexes) + await checkOntologyLabels(key, ontologies, groups) ) } From 51ef29295f0aa8249337f6d57e332fcd3483c9b7 Mon Sep 17 00:00:00 2001 From: bistline Date: Tue, 17 Sep 2024 12:43:12 -0400 Subject: [PATCH 18/52] Ensure email delivery on 'special action' failures --- app/mailers/single_cell_mailer.rb | 8 +-- app/models/ingest_job.rb | 8 +-- test/models/ingest_job_test.rb | 81 +++++++++++++++++++++++++++++-- 3 files changed, 84 insertions(+), 13 deletions(-) diff --git a/app/mailers/single_cell_mailer.rb b/app/mailers/single_cell_mailer.rb index ddbf12865..faa90cce0 100644 --- a/app/mailers/single_cell_mailer.rb +++ b/app/mailers/single_cell_mailer.rb @@ -38,7 +38,7 @@ def notify_admin_upload_fail(study_file, error) @study_file = study_file @error = error @user = @study.user - mail(to: emails, subject: '[Single Cell Portal ERROR] FireCloud auto-upload fail in ' + @study.accession) do |format| + mail(to: emails, subject: "[Single Cell Portal ERROR] FireCloud auto-upload fail in #{@study.accession}") do |format| format.html end end @@ -51,7 +51,7 @@ def notify_user_upload_fail(study_file, study, user) dev_email_config = AdminConfiguration.find_by(config_type: 'QA Dev Email') dev_email = dev_email_config.present? ? dev_email_config.value : nil title = "#{study_file.upload_file_name} did not finish uploading" - mail(to: user.email, bcc: dev_email, subject: '[Single Cell Portal Notifier] ' + title) + mail(to: user.email, bcc: dev_email, subject: "[Single Cell Portal Notifier] #{title}") end def notify_user_parse_complete(email, title, message, study) @@ -72,7 +72,7 @@ def notify_admin_parse_fail(user_email, title, contents) dev_email = dev_email_config.value @contents = contents @user_email = user_email - mail(to: dev_email, subject: '[Single Cell Portal Admin Notification] ' + title) + mail(to: dev_email, subject: "[Single Cell Portal Admin Notification] #{title}") end end @@ -91,7 +91,7 @@ def notify_admin_parse_launch_fail(study, study_file, corresponding_user, ingest @error = error @action = ingest_action title = 'Ingest Pipeline Launch Failure' - mail(to: emails, subject: '[Single Cell Portal Admin Notification] ' + title) + mail(to: emails, subject: "[Single Cell Portal Admin Notification] #{title}") end def daily_disk_status diff --git a/app/models/ingest_job.rb b/app/models/ingest_job.rb index 6d8869b27..16f6d1592 100644 --- a/app/models/ingest_job.rb +++ b/app/models/ingest_job.rb @@ -377,7 +377,8 @@ def poll_for_completion(run_at: 1.minute.from_now) log_error_messages log_to_mixpanel # log before queuing file for deletion to preserve properties # don't delete files or notify users if this is a 'special action', like DE or image pipeline jobs - handle_ingest_failure unless special_action? + subject = "Error: #{study_file.file_type} file: '#{study_file.upload_file_name}' parse has failed" + handle_ingest_failure(subject) unless special_action? admin_email_content = generate_error_email_body(email_type: :dev) SingleCellMailer.notify_admin_parse_fail(user.email, subject, admin_email_content).deliver_now else @@ -390,7 +391,7 @@ def poll_for_completion(run_at: 1.minute.from_now) # will automatically clean up data and notify user # in case of subsampling, only subsampled data cleanup is run and all other data is left in place # this reduces churn for study owners as full-resolution data is still valid - def handle_ingest_failure + def handle_ingest_failure(email_subject) if action.to_sym == :ingest_subsample study_file.update(parse_status: 'parsed') # reset parse flag cluster_name = cluster_name_by_file_type @@ -408,9 +409,8 @@ def handle_ingest_failure end end end - subject = "Error: #{study_file.file_type} file: '#{study_file.upload_file_name}' parse has failed" user_email_content = generate_error_email_body - SingleCellMailer.notify_user_parse_fail(user.email, subject, user_email_content, study).deliver_now + SingleCellMailer.notify_user_parse_fail(user.email, email_subject, user_email_content, study).deliver_now end # TODO (SCP-4709, SCP-4710) Processed and Raw expression files diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb index f663e3853..fe0c0e251 100644 --- a/test/models/ingest_job_test.rb +++ b/test/models/ingest_job_test.rb @@ -362,9 +362,9 @@ class IngestJobTest < ActiveSupport::TestCase } ] }, - createTime: (now - 1.minutes).to_default_s, - startTime: (now - 1.minutes).to_default_s, - endTime: now.to_default_s + createTime: (now - 1.minutes).to_s, + startTime: (now - 1.minutes).to_s, + endTime: now.to_s }.with_indifferent_access pipeline_mock = MiniTest::Mock.new @@ -713,7 +713,7 @@ class IngestJobTest < ActiveSupport::TestCase ls_mock.expect :get_pipeline, pipeline_mock, [{ name: pipeline_name }] ApplicationController.stub :firecloud_client, mock do ApplicationController.stub :life_sciences_api_client, ls_mock do - job.handle_ingest_failure + job.handle_ingest_failure('parse failure') cluster_file.reload study.reload cluster.reload @@ -754,7 +754,7 @@ class IngestJobTest < ActiveSupport::TestCase ls_mock.expect :get_pipeline, pipeline_mock, [{ name: pipeline_name }] ApplicationController.stub :firecloud_client, mock do ApplicationController.stub :life_sciences_api_client, ls_mock do - failed_job.handle_ingest_failure + failed_job.handle_ingest_failure('parse failure') mock.verify end end @@ -800,6 +800,77 @@ class IngestJobTest < ActiveSupport::TestCase end end + test 'should ensure email delivery on special action failures' do + study = FactoryBot.create(:detached_study, + name_prefix: 'Special Action Email', + user: @user, + test_array: @@studies_to_clean) + study_file = FactoryBot.create(:ann_data_file, + name: 'matrix.h5ad', + study:, + upload_file_size: 1.megabyte, + cell_input: %w[A B C D], + annotation_input: [ + { name: 'disease', type: 'group', values: %w[cancer cancer normal normal] } + ], + coordinate_input: [ + { X_umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } } + ]) + annotation_file = 'gs://test_bucket/anndata/h5ad_frag.metadata.tsv' + cluster_file = 'gs://test_bucket/anndata/h5ad_frag.cluster.X_umap.tsv' + params_object = DifferentialExpressionParameters.new( + matrix_file_path: 'gs://test_bucket/matrix.h5ad', matrix_file_type: 'h5ad', file_size: study_file.upload_file_size, + annotation_file:, cluster_file:, cluster_name: 'umap', annotation_name: 'disease', annotation_scope: 'study' + ) + pipeline_name = SecureRandom.uuid + job = IngestJob.new( + pipeline_name:, study:, study_file:, user: @user, action: :differential_expression, params_object: + ) + now = DateTime.now.in_time_zone + mock_metadata = { + events: [ + { timestamp: now.to_s }, + { timestamp: (now + 2.minutes).to_s, containerStopped: { exitStatus: 1 } } + ], + pipeline: { + actions: [ + { + commands: [ + 'python', 'ingest_pipeline.py', '--study-id', study.id.to_s, '--study-file-id', + study_file.id.to_s, 'differential_expression', '--differential-expression', '--cluster-file', cluster_file, + '--annotation-file', annotation_file, '--cluster-name', 'umap', '--annotation-name', 'disease', + '--annotation-scope', 'study' + ] + } + ], + resources: { + virtualMachine: { + machineType: 'n2d-highmem-8', + bootDiskSizeGb: 300 + } + } + }, + createTime: (now - 3.minutes).to_s, + startTime: now.to_s, + endTime: now.to_s + }.with_indifferent_access + mock = Minitest::Mock.new + 6.times { mock.expect :metadata, mock_metadata } + 3.times do + mock.expect :error, { code: 1, message: 'mock message' } + mock.expect :done?, true + end + email_mock = Minitest::Mock.new + email_mock.expect :deliver_now, true + ApplicationController.life_sciences_api_client.stub :get_pipeline, mock do + SingleCellMailer.stub :notify_admin_parse_fail, email_mock do + job.poll_for_completion + mock.verify + email_mock.verify + end + end + end + test 'should always unset subsampling flags' do study = FactoryBot.create(:detached_study, name_prefix: 'Subsample Flag Test', From b7e2305af074b116373ec72fc95559654a4b8ebb Mon Sep 17 00:00:00 2001 From: bistline Date: Tue, 17 Sep 2024 13:47:29 -0400 Subject: [PATCH 19/52] Working on persistent raw_counts test failure --- test/models/ingest_job_test.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb index fe0c0e251..edbcb60b6 100644 --- a/test/models/ingest_job_test.rb +++ b/test/models/ingest_job_test.rb @@ -234,6 +234,7 @@ class IngestJobTest < ActiveSupport::TestCase anndata_file: ann_data_file.gs_url, obsm_keys: ann_data_file.ann_data_file_info.obsm_key_names, file_size: ann_data_file.upload_file_size ) + assert_not params_object.extract.include?('raw_counts') job = IngestJob.new( study: @basic_study, study_file: ann_data_file, user: @user, action: :ingest_anndata, params_object: ) From 4374c34a7b22dcb22d0f60ed1b2d8e4689737c05 Mon Sep 17 00:00:00 2001 From: bistline Date: Tue, 17 Sep 2024 13:48:58 -0400 Subject: [PATCH 20/52] Working on persistent raw_counts test failure --- app/mailers/single_cell_mailer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/mailers/single_cell_mailer.rb b/app/mailers/single_cell_mailer.rb index faa90cce0..61c8bbebc 100644 --- a/app/mailers/single_cell_mailer.rb +++ b/app/mailers/single_cell_mailer.rb @@ -57,13 +57,13 @@ def notify_user_upload_fail(study_file, study, user) def notify_user_parse_complete(email, title, message, study) @message = message @study = study - mail(to: email, subject: '[Single Cell Portal Notifier] ' + title) + mail(to: email, subject: "[Single Cell Portal Notifier] #{title}") end def notify_user_parse_fail(email, title, error, study) @error = error @study = study - mail(to: email, subject: '[Single Cell Portal Notifier] ' + title) + mail(to: email, subject: "[Single Cell Portal Notifier] #{title}") end def notify_admin_parse_fail(user_email, title, contents) From ca17df8e203f10d3d7a076af7e53d925730d7919 Mon Sep 17 00:00:00 2001 From: bistline Date: Tue, 17 Sep 2024 17:03:12 -0400 Subject: [PATCH 21/52] Use change event for remote validation on bucket path --- .../components/upload/FileUploadControl.jsx | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/app/javascript/components/upload/FileUploadControl.jsx b/app/javascript/components/upload/FileUploadControl.jsx index be806d26c..7213f0f14 100644 --- a/app/javascript/components/upload/FileUploadControl.jsx +++ b/app/javascript/components/upload/FileUploadControl.jsx @@ -28,6 +28,10 @@ export default function FileUploadControl({ const [showUploadButton, setShowUploadButton] = useState(true) const [showBucketPath, setShowBucketPath] = useState(false) const ToggleUploadButton = () => { + // this is an inverted check since the user is clicking and the value is about to change + if (!showUploadButton) { + unsetRemoteLocation() + } setShowUploadButton(!showUploadButton) setShowBucketPath(!showBucketPath) } @@ -80,10 +84,22 @@ export default function FileUploadControl({ } } + // keep track of pending timeout for remote validation via bucket path + const [timeOutId, setTimeOutID] = useState(null) + + // clear out remote_location and hasRemoteFile to allow switching back to file upload button + function unsetRemoteLocation() { + updateFile(file._id, {remote_location: '', hasRemoteFile: false}) + } + // perform CSFV on remote file when specifying a GS URL or bucket path // will sanitize GS URL before calling validateRemoteFile - async function handleBucketLocationEntry(e) { - const path = e.target.value + async function handleBucketLocationEntry(path) { + if (path === "") { + unsetRemoteLocation() + return false + } + const matcher = new RegExp(`(gs:\/\/)?${bucketName}\/?`) const trimmedPath = path.replace(matcher, '') if (!trimmedPath) { @@ -183,7 +199,14 @@ export default function FileUploadControl({ size={60} id={`remote_location-input-${file._id}`} placeholder='GS URL or path to file in GCP bucket' - onBlur={handleBucketLocationEntry}/> + onChange={ (e) => { + const newBucketPath = e.target.value + if (timeOutId) { + clearTimeout(timeOutId) + } + const newTimeout = setTimeout(handleBucketLocationEntry, 500, newBucketPath) + setTimeOutID(newTimeout) + }}/> }    { !isFileOnServer && (showBucketPath || file.hasRemoteFile ) && googleBucketLink } From b7ce2700e39f64ad8aee1b50d2fbde4176b72f8e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 18 Sep 2024 02:15:31 +0000 Subject: [PATCH 22/52] Bump vite from 4.5.3 to 4.5.5 Bumps [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite) from 4.5.3 to 4.5.5. - [Release notes](https://github.com/vitejs/vite/releases) - [Changelog](https://github.com/vitejs/vite/blob/v4.5.5/packages/vite/CHANGELOG.md) - [Commits](https://github.com/vitejs/vite/commits/v4.5.5/packages/vite) --- updated-dependencies: - dependency-name: vite dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- package.json | 2 +- yarn.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/package.json b/package.json index a86bdf0dd..63230c6f9 100644 --- a/package.json +++ b/package.json @@ -106,7 +106,7 @@ "prop-types": "^15.7.2", "react-select-event": "^5.3.0", "stylelint-prettier": "^1.1.2", - "vite": "^4.0.0", + "vite": "^4.5.5", "vite-plugin-ruby": "^3.0.5" } } diff --git a/yarn.lock b/yarn.lock index 08a604739..52f5500f1 100644 --- a/yarn.lock +++ b/yarn.lock @@ -9387,10 +9387,10 @@ vite-plugin-ruby@^3.0.5: debug "^4.3.4" fast-glob "^3.2.12" -vite@^4.0.0: - version "4.5.3" - resolved "https://registry.npmjs.org/vite/-/vite-4.5.3.tgz" - integrity sha512-kQL23kMeX92v3ph7IauVkXkikdDRsYMGTVl5KY2E9OY4ONLvkHf04MDTbnfo6NKxZiDLWzVpP5oTa8hQD8U3dg== +vite@^4.5.5: + version "4.5.5" + resolved "https://registry.yarnpkg.com/vite/-/vite-4.5.5.tgz#639b9feca5c0a3bfe3c60cb630ef28bf219d742e" + integrity sha512-ifW3Lb2sMdX+WU91s3R0FyQlAyLxOzCSCP37ujw0+r5POeHPwe6udWVIElKQq8gk3t7b8rkmvqC6IHBpCff4GQ== dependencies: esbuild "^0.18.10" postcss "^8.4.27" From 1eeee20a467b5a330ce406095b2f2af7dc477400 Mon Sep 17 00:00:00 2001 From: bistline Date: Wed, 18 Sep 2024 12:43:53 -0400 Subject: [PATCH 23/52] Fix logic for saveAnnDataFileHelper --- app/javascript/components/upload/UploadWizard.jsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/javascript/components/upload/UploadWizard.jsx b/app/javascript/components/upload/UploadWizard.jsx index 4da9cc59e..18b934366 100644 --- a/app/javascript/components/upload/UploadWizard.jsx +++ b/app/javascript/components/upload/UploadWizard.jsx @@ -275,12 +275,17 @@ export function RawUploadWizard({ studyAccession, name }) { setTimeout(() => deleteFileFromServer(requestCanceller.fileId), 500) } + /** helper for determining when to use saveAnnDataFileHelper (sets ids/values correctly for AnnData UX **/ + function useAnnDataFileHelper(file) { + return isAnnDataExperience && (file?.file_type === 'AnnData' || Object.keys(file).includes("data_type")) + } + /** save the given file and perform an upload if a selected file is present */ async function saveFile(file) { let fileToSave = file let studyFileId = file._id - if (isAnnDataExperience && fileToSave?.file_type === 'AnnData') { + if (useAnnDataFileHelper(fileToSave)) { fileToSave = saveAnnDataFileHelper(file, fileToSave) studyFileId = fileToSave._id } From 9ab452293050edcb468c6685cfb23a69314d4de4 Mon Sep 17 00:00:00 2001 From: bistline Date: Wed, 18 Sep 2024 16:25:56 -0400 Subject: [PATCH 24/52] Add spinner for remote validation, improved timing --- .../components/upload/FileUploadControl.jsx | 41 +++++++++++++------ .../upload-wizard/file-upload-control.test.js | 33 ++++++++++++++- 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/app/javascript/components/upload/FileUploadControl.jsx b/app/javascript/components/upload/FileUploadControl.jsx index 7213f0f14..38527b3eb 100644 --- a/app/javascript/components/upload/FileUploadControl.jsx +++ b/app/javascript/components/upload/FileUploadControl.jsx @@ -42,7 +42,7 @@ export default function FileUploadControl({ 'Upload a file from your computer' : "Input a path to a file that is already in this study's bucket" const uploadToggle = {toggleText} @@ -55,7 +55,7 @@ export default function FileUploadControl({ const googleBucketLink = - Browse bucket @@ -95,18 +95,32 @@ export default function FileUploadControl({ // perform CSFV on remote file when specifying a GS URL or bucket path // will sanitize GS URL before calling validateRemoteFile async function handleBucketLocationEntry(path) { - if (path === "") { - unsetRemoteLocation() - return false - } - const matcher = new RegExp(`(gs:\/\/)?${bucketName}\/?`) const trimmedPath = path.replace(matcher, '') if (!trimmedPath) { + unsetRemoteLocation() + setFileValidation({ validating: false, issues: {}, fileName: null }) return false } + // don't continue unless a dot is present (otherwise, no valid file extension) + if (trimmedPath.indexOf('.') < 0 ) { return false } + const fileType = file.file_type + const fileExtension = `.${trimmedPath.split('.').slice(-1)[0]}` + if (!inputAcceptExts.includes(fileExtension)) { + const invalidExt = { + errors: [ + [ + 'error', 'file:invalid-ext', + `${fileExtension} is not an allowed file extension for ${fileType} files: ${inputAcceptExts.join(', ')}` + ] + ] + } + setFileValidation({ validating: false, issues: invalidExt, fileName: trimmedPath }) + return false + } + const fileOptions = fileType === 'Metadata' ? { use_metadata_convention: file?.use_metadata_convention } : {} setFileValidation({ validating: true, issues: {}, fileName: trimmedPath }) @@ -191,6 +205,7 @@ export default function FileUploadControl({ /> } + {!isFileOnServer && (showBucketPath || file.hasRemoteFile ) && // we can't use TextFormField since we need a custom onBlur event // onBlur is the React equivalent of onfocusout, which will fire after the user is done updating the input @@ -198,22 +213,22 @@ export default function FileUploadControl({ type="text" size={60} id={`remote_location-input-${file._id}`} + data-testid="remote-location-input" placeholder='GS URL or path to file in GCP bucket' onChange={ (e) => { const newBucketPath = e.target.value - if (timeOutId) { - clearTimeout(timeOutId) - } - const newTimeout = setTimeout(handleBucketLocationEntry, 500, newBucketPath) + if (timeOutId) { clearTimeout(timeOutId) } + const newTimeout = setTimeout(handleBucketLocationEntry, 300, newBucketPath) setTimeOutID(newTimeout) }}/> } -    { !isFileOnServer && (showBucketPath || file.hasRemoteFile ) && googleBucketLink } -    { !isFileOnServer && uploadToggle } + { showBucketPath && fileValidation.validating && + Validating... + } { expect(screen.queryAllByText('Use bucket path')).toHaveLength(1) const bucketToggle = screen.getByText('Use bucket path') fireEvent.click(bucketToggle) - expect(container.querySelector('#remote_location-input-123')).toBeVisible(); + expect(container.querySelector('#remote_location-input-123')).toBeVisible() + }) + + it('validates file extension on bucket path entries', async () => { + const file = { + _id: '123', + name: '', + status: 'new', + file_type: 'Cluster' + } + render(( + + + + )) + expect(screen.queryAllByText('Use bucket path')).toHaveLength(1) + const bucketToggle = screen.getByText('Use bucket path') + fireEvent.click(bucketToggle) + const bucketPathInput = screen.getByTestId('remote-location-input') + expect(bucketPathInput).toBeVisible() + fireEvent.change(bucketPathInput, { target: { value: 'bad.pdf' } }) + expect(bucketPathInput).toHaveValue('bad.pdf') + await waitFor(() => { + expect(screen.getByTestId('validation-error')).toBeInTheDocument() + expect(screen.getByText('.pdf is not an allowed file extension for Cluster files: .tsv')).toBeVisible() + }) }) }) From 7d7663fffb3fcf75a5c2e3db3ee8df095c1a3b9f Mon Sep 17 00:00:00 2001 From: bistline Date: Wed, 18 Sep 2024 16:32:55 -0400 Subject: [PATCH 25/52] Match extension code/message to upload --- app/javascript/components/upload/FileUploadControl.jsx | 4 ++-- test/js/upload-wizard/file-upload-control.test.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/javascript/components/upload/FileUploadControl.jsx b/app/javascript/components/upload/FileUploadControl.jsx index 38527b3eb..cbf4bcadc 100644 --- a/app/javascript/components/upload/FileUploadControl.jsx +++ b/app/javascript/components/upload/FileUploadControl.jsx @@ -112,8 +112,8 @@ export default function FileUploadControl({ const invalidExt = { errors: [ [ - 'error', 'file:invalid-ext', - `${fileExtension} is not an allowed file extension for ${fileType} files: ${inputAcceptExts.join(', ')}` + 'error', 'filename:extension', + `Allowed extensions are ${allowedFileExts.join(', ')}` ] ] } diff --git a/test/js/upload-wizard/file-upload-control.test.js b/test/js/upload-wizard/file-upload-control.test.js index be30fe737..dd1d03a03 100644 --- a/test/js/upload-wizard/file-upload-control.test.js +++ b/test/js/upload-wizard/file-upload-control.test.js @@ -382,7 +382,7 @@ describe('file upload control validates the selected file', () => { expect(bucketPathInput).toHaveValue('bad.pdf') await waitFor(() => { expect(screen.getByTestId('validation-error')).toBeInTheDocument() - expect(screen.getByText('.pdf is not an allowed file extension for Cluster files: .tsv')).toBeVisible() + expect(screen.getByText('Allowed extensions are: .tsv')).toBeVisible() }) }) }) From b696ab4e0ab3d092e78bfeb62e1deafefc3d70e0 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Wed, 18 Sep 2024 16:58:51 -0400 Subject: [PATCH 26/52] Add ontology validation test, with web Caches mock --- .../lib/validation/ontology-validation.js | 13 +- .../lib/validation/validate-anndata.js | 1 - test/js/lib/ontology-validation.test.js | 191 ++++++++++++++++++ 3 files changed, 202 insertions(+), 3 deletions(-) create mode 100644 test/js/lib/ontology-validation.test.js diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js index d0c837976..baaba3122 100644 --- a/app/javascript/lib/validation/ontology-validation.js +++ b/app/javascript/lib/validation/ontology-validation.js @@ -25,6 +25,7 @@ async function getServiceWorkerCache() { // Delete other versions of ontologies cache; there should be 1 per dodmain const cacheNames = await caches.keys() + console.log('cacheNames', cacheNames) cacheNames.forEach(name => { if (name.startsWith('ontologies-') && name !== currentOntologies) { caches.delete(name) @@ -37,7 +38,7 @@ async function getServiceWorkerCache() { } /** Fetch .gz file, decompress it, return plaintext */ -async function fetchGzipped(url) { +export async function fetchGzipped(url) { const response = await fetch(url) const blob = await response.blob(); const uint8Array = new Uint8Array(await blob.arrayBuffer()); @@ -51,10 +52,12 @@ export async function cacheFetch(url) { const decompressedUrl = url.replace('.gz', '') const response = await cache.match(decompressedUrl) + console.log('in cacheFetch, typeof response', typeof response) if (typeof response === 'undefined') { // If cache miss, then fetch, decompress, and put response in cache const data = await fetchGzipped(url) const contentLength = data.length + // console.log('data', data) const decompressedResponse = new Response( new Blob([data], { type: 'text/tab-separated-values' }), { headers: new Headers({ 'Content-Length': contentLength }) } @@ -65,6 +68,8 @@ export async function cacheFetch(url) { return await cache.match(decompressedUrl) } + + /** * Fetch minified ontologies, transform into object of object of arrays, e.g.: * @@ -95,9 +100,14 @@ export async function fetchOntologies() { for (let i = 0; i < ontologyNames.length; i++) { const ontologyName = ontologyNames[i] const ontologyUrl = `${ONTOLOGY_BASE_URL + ontologyName}.min.tsv.gz` + console.log('ontologyUrl', ontologyUrl) const response = await cacheFetch(ontologyUrl) + + console.log('response', response) + const tsv = await response.text() + // console.log('tsv', tsv) const lines = tsv.split('\n') ontologies[ontologyName] = {} @@ -117,7 +127,6 @@ export async function fetchOntologies() { window.SCP.ontologies = ontologies return ontologies } - window.fetchOntologies = fetchOntologies /** Get lowercase shortnames for all required ontologies */ diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index e44d90ad7..6553505d1 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -224,7 +224,6 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) { /** Validate ontology labels for required metadata columns in AnnData file */ async function validateOntologyLabels(hdf5File) { - window.hdf5File = hdf5File let issues = [] const ontologies = await fetchOntologies() diff --git a/test/js/lib/ontology-validation.test.js b/test/js/lib/ontology-validation.test.js new file mode 100644 index 000000000..7ad489b72 --- /dev/null +++ b/test/js/lib/ontology-validation.test.js @@ -0,0 +1,191 @@ +const fetch = require('node-fetch') +const { Readable } = require('stream'); + +import { + fetchOntologies +} from 'lib/validation/ontology-validation' + + +describe('Client-side file validation for AnnData', () => { + beforeAll(() => { + global.fetch = fetch + + const mockCaches = { + _cacheStores: {}, + + open: jest.fn((cacheName) => { + console.log('cacheName', cacheName) + // Initialize the store for the cache name if it doesn't exist + if (!mockCaches._cacheStores[cacheName]) { + mockCaches._cacheStores[cacheName] = {}; + } + + console.log('mockCaches._cacheStores[cacheName]', mockCaches._cacheStores[cacheName]) + + return Promise.resolve({ + add: jest.fn((request) => { + const response = new Response(`Response for ${request.url}`); // Mocking a response + return mockCaches.put(request, response, cacheName); + }), + addAll: jest.fn((requests) => { + const responses = requests.map(req => new Response(`Response for ${req.url}`)); + return Promise.all(responses.map((response, index) => { + return mockCaches.put(requests[index], response, cacheName); + })); + }), + delete: jest.fn((request) => { + const key = request.url || request; + if (mockCaches._cacheStores[cacheName][key]) { + delete mockCaches._cacheStores[cacheName][key]; + return Promise.resolve(true); + } + return Promise.resolve(false); + }), + match: jest.fn((request) => { + const key = request.url || request; + return Promise.resolve(mockCaches._cacheStores[cacheName][key] || undefined); + }), + put: jest.fn((request, response) => { + console.log('in put, cacheName, request', cacheName, request) + const key = request.url || request; + mockCaches._cacheStores[cacheName][key] = response; + return Promise.resolve(); + }), + keys: jest.fn(() => { + console.log('in keys, mockCaches._cacheStores', mockCaches._cacheStores) + return Promise.resolve(Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key))); + }), + }); + }), + + match: jest.fn((request, cacheName) => { + const key = request.url || request; + return Promise.resolve(mockCaches._cacheStores[cacheName]?.[key] || null); + }), + + delete: jest.fn((request, cacheName) => { + const key = request.url || request; + if (mockCaches._cacheStores[cacheName] && mockCaches._cacheStores[cacheName][key]) { + delete mockCaches._cacheStores[cacheName][key]; + return Promise.resolve(true); + } + return Promise.resolve(false); + }), + + keys: jest.fn(() => { + // Collect all cache keys across all cache stores + console.log('in mockCaches._cacheStores', mockCaches._cacheStores) + return Promise.resolve(Object.keys(mockCaches._cacheStores).flatMap(cacheName => + Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key)) + )); + }), + }; + + global.caches = mockCaches; + + // Mock Request + global.Request = class { + constructor(url, options = {}) { + this.url = url; + this.method = options.method || 'GET'; + this.headers = new Headers(options.headers); + } + }; + + // Mock Response + global.Response = class { + constructor(body, options = {}) { + this.status = options.status || 200; + this.ok = this.status >= 200 && this.status < 300; + this.headers = new Headers(options.headers); + this.bodyUsed = false; + + // Ensure the body is a string or Buffer + const readableBody = typeof body === 'string' ? body : body.toString(); + + // Create a ReadableStream + const stream = new Readable({ + read() { + this.push(readableBody); // Push the string or buffer into the stream + this.push(null); // Signal the end of the stream + }, + }); + + this.body = stream; // Assign the readable stream to body + } + + // Define text() method to read from the stream + async text() { + if (this.bodyUsed) { + return Promise.reject(new TypeError('Already read')); + } + this.bodyUsed = true; + return new Promise((resolve, reject) => { + const chunks = []; + this.body.on('data', (chunk) => { + chunks.push(chunk); + }); + this.body.on('end', () => { + resolve(Buffer.concat(chunks).toString()); + }); + this.body.on('error', (err) => { + reject(err); + }); + }); + } + + // You can also implement json() if needed + async json() { + const text = await this.text(); + return JSON.parse(text); + } + }; + + + global.Headers = class { + constructor(headers) { + this.map = new Map(); + if (headers) { + for (const [key, value] of Object.entries(headers)) { + this.append(key, value); + } + } + } + + append(key, value) { + const existingValue = this.map.get(key.toLowerCase()); + if (existingValue) { + this.map.set(key.toLowerCase(), existingValue + ', ' + value); + } else { + this.map.set(key.toLowerCase(), value); + } + } + + get(key) { + return this.map.get(key.toLowerCase()) || null; + } + + has(key) { + return this.map.has(key.toLowerCase()); + } + + set(key, value) { + this.map.set(key.toLowerCase(), value); + } + + delete(key) { + this.map.delete(key.toLowerCase()); + } + + // Additional methods if needed + } + + + }) + + it('Parses AnnData headers', async () => { + const ontologies = await fetchOntologies() + console.log('ontologies') + expect(1).toEqual(1) + }) +}) From 5af3fa97fcea07546fd6537f8dfeb04df123e498 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Wed, 18 Sep 2024 17:16:08 -0400 Subject: [PATCH 27/52] Modularize mock Cache interface for Web API --- test/js/lib/mock-cache.js | 75 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 test/js/lib/mock-cache.js diff --git a/test/js/lib/mock-cache.js b/test/js/lib/mock-cache.js new file mode 100644 index 000000000..73142c024 --- /dev/null +++ b/test/js/lib/mock-cache.js @@ -0,0 +1,75 @@ +/** @fileoverview Mock for the Cache interface of the Web API + * + * https://developer.mozilla.org/en-US/docs/Web/API/Cache +*/ + +export const mockCaches = { + _cacheStores: {}, + + open: jest.fn((cacheName) => { + console.log('cacheName', cacheName) + // Initialize the store for the cache name if it doesn't exist + if (!mockCaches._cacheStores[cacheName]) { + mockCaches._cacheStores[cacheName] = {}; + } + + console.log('mockCaches._cacheStores[cacheName]', mockCaches._cacheStores[cacheName]) + + return Promise.resolve({ + add: jest.fn((request) => { + const response = new Response(`Response for ${request.url}`); // Mocking a response + return mockCaches.put(request, response, cacheName); + }), + addAll: jest.fn((requests) => { + const responses = requests.map(req => new Response(`Response for ${req.url}`)); + return Promise.all(responses.map((response, index) => { + return mockCaches.put(requests[index], response, cacheName); + })); + }), + delete: jest.fn((request) => { + const key = request.url || request; + if (mockCaches._cacheStores[cacheName][key]) { + delete mockCaches._cacheStores[cacheName][key]; + return Promise.resolve(true); + } + return Promise.resolve(false); + }), + match: jest.fn((request) => { + const key = request.url || request; + return Promise.resolve(mockCaches._cacheStores[cacheName][key] || undefined); + }), + put: jest.fn((request, response) => { + console.log('in put, cacheName, request', cacheName, request) + const key = request.url || request; + mockCaches._cacheStores[cacheName][key] = response; + return Promise.resolve(); + }), + keys: jest.fn(() => { + console.log('in keys, mockCaches._cacheStores', mockCaches._cacheStores) + return Promise.resolve(Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key))); + }), + }); + }), + + match: jest.fn((request, cacheName) => { + const key = request.url || request; + return Promise.resolve(mockCaches._cacheStores[cacheName]?.[key] || null); + }), + + delete: jest.fn((request, cacheName) => { + const key = request.url || request; + if (mockCaches._cacheStores[cacheName] && mockCaches._cacheStores[cacheName][key]) { + delete mockCaches._cacheStores[cacheName][key]; + return Promise.resolve(true); + } + return Promise.resolve(false); + }), + + keys: jest.fn(() => { + // Collect all cache keys across all cache stores + console.log('in mockCaches._cacheStores', mockCaches._cacheStores) + return Promise.resolve(Object.keys(mockCaches._cacheStores).flatMap(cacheName => + Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key)) + )); + }), +}; From 9ab443683be3d968e95d3102126b6431e0884220 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Wed, 18 Sep 2024 17:16:42 -0400 Subject: [PATCH 28/52] Rename to mock Web API --- test/js/lib/{mock-cache.js => mock-web-api.js} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/js/lib/{mock-cache.js => mock-web-api.js} (100%) diff --git a/test/js/lib/mock-cache.js b/test/js/lib/mock-web-api.js similarity index 100% rename from test/js/lib/mock-cache.js rename to test/js/lib/mock-web-api.js From fd04109e650c8f420615dc686724f5b12d9c8cba Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Wed, 18 Sep 2024 17:23:19 -0400 Subject: [PATCH 29/52] Modularize node web APIs used for mocking --- test/js/lib/mock-web-api.js | 75 ---------- test/js/lib/node-web-api.js | 171 ++++++++++++++++++++++ test/js/lib/ontology-validation.test.js | 179 +----------------------- 3 files changed, 178 insertions(+), 247 deletions(-) delete mode 100644 test/js/lib/mock-web-api.js create mode 100644 test/js/lib/node-web-api.js diff --git a/test/js/lib/mock-web-api.js b/test/js/lib/mock-web-api.js deleted file mode 100644 index 73142c024..000000000 --- a/test/js/lib/mock-web-api.js +++ /dev/null @@ -1,75 +0,0 @@ -/** @fileoverview Mock for the Cache interface of the Web API - * - * https://developer.mozilla.org/en-US/docs/Web/API/Cache -*/ - -export const mockCaches = { - _cacheStores: {}, - - open: jest.fn((cacheName) => { - console.log('cacheName', cacheName) - // Initialize the store for the cache name if it doesn't exist - if (!mockCaches._cacheStores[cacheName]) { - mockCaches._cacheStores[cacheName] = {}; - } - - console.log('mockCaches._cacheStores[cacheName]', mockCaches._cacheStores[cacheName]) - - return Promise.resolve({ - add: jest.fn((request) => { - const response = new Response(`Response for ${request.url}`); // Mocking a response - return mockCaches.put(request, response, cacheName); - }), - addAll: jest.fn((requests) => { - const responses = requests.map(req => new Response(`Response for ${req.url}`)); - return Promise.all(responses.map((response, index) => { - return mockCaches.put(requests[index], response, cacheName); - })); - }), - delete: jest.fn((request) => { - const key = request.url || request; - if (mockCaches._cacheStores[cacheName][key]) { - delete mockCaches._cacheStores[cacheName][key]; - return Promise.resolve(true); - } - return Promise.resolve(false); - }), - match: jest.fn((request) => { - const key = request.url || request; - return Promise.resolve(mockCaches._cacheStores[cacheName][key] || undefined); - }), - put: jest.fn((request, response) => { - console.log('in put, cacheName, request', cacheName, request) - const key = request.url || request; - mockCaches._cacheStores[cacheName][key] = response; - return Promise.resolve(); - }), - keys: jest.fn(() => { - console.log('in keys, mockCaches._cacheStores', mockCaches._cacheStores) - return Promise.resolve(Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key))); - }), - }); - }), - - match: jest.fn((request, cacheName) => { - const key = request.url || request; - return Promise.resolve(mockCaches._cacheStores[cacheName]?.[key] || null); - }), - - delete: jest.fn((request, cacheName) => { - const key = request.url || request; - if (mockCaches._cacheStores[cacheName] && mockCaches._cacheStores[cacheName][key]) { - delete mockCaches._cacheStores[cacheName][key]; - return Promise.resolve(true); - } - return Promise.resolve(false); - }), - - keys: jest.fn(() => { - // Collect all cache keys across all cache stores - console.log('in mockCaches._cacheStores', mockCaches._cacheStores) - return Promise.resolve(Object.keys(mockCaches._cacheStores).flatMap(cacheName => - Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key)) - )); - }), -}; diff --git a/test/js/lib/node-web-api.js b/test/js/lib/node-web-api.js new file mode 100644 index 000000000..4ef5783f2 --- /dev/null +++ b/test/js/lib/node-web-api.js @@ -0,0 +1,171 @@ +/** @fileoverview Node port for interfaces of Web API; helps mocking + * + * https://developer.mozilla.org/en-US/docs/Web/API/Cache +*/ +const { Readable } = require('stream'); + +export const nodeCaches = { + _cacheStores: {}, + + open: jest.fn((cacheName) => { + console.log('cacheName', cacheName) + // Initialize the store for the cache name if it doesn't exist + if (!nodeCaches._cacheStores[cacheName]) { + nodeCaches._cacheStores[cacheName] = {}; + } + + console.log('nodeCaches._cacheStores[cacheName]', nodeCaches._cacheStores[cacheName]) + + return Promise.resolve({ + add: jest.fn((request) => { + const response = new Response(`Response for ${request.url}`); + return nodeCaches.put(request, response, cacheName); + }), + addAll: jest.fn((requests) => { + const responses = requests.map(req => new Response(`Response for ${req.url}`)); + return Promise.all(responses.map((response, index) => { + return nodeCaches.put(requests[index], response, cacheName); + })); + }), + delete: jest.fn((request) => { + const key = request.url || request; + if (nodeCaches._cacheStores[cacheName][key]) { + delete nodeCaches._cacheStores[cacheName][key]; + return Promise.resolve(true); + } + return Promise.resolve(false); + }), + match: jest.fn((request) => { + const key = request.url || request; + return Promise.resolve(nodeCaches._cacheStores[cacheName][key] || undefined); + }), + put: jest.fn((request, response) => { + console.log('in put, cacheName, request', cacheName, request) + const key = request.url || request; + nodeCaches._cacheStores[cacheName][key] = response; + return Promise.resolve(); + }), + keys: jest.fn(() => { + console.log('in keys, nodeCaches._cacheStores', nodeCaches._cacheStores) + return Promise.resolve(Object.keys(nodeCaches._cacheStores[cacheName]).map(key => new Request(key))); + }), + }); + }), + + match: jest.fn((request, cacheName) => { + const key = request.url || request; + return Promise.resolve(nodeCaches._cacheStores[cacheName]?.[key] || null); + }), + + delete: jest.fn((request, cacheName) => { + const key = request.url || request; + if (nodeCaches._cacheStores[cacheName] && nodeCaches._cacheStores[cacheName][key]) { + delete nodeCaches._cacheStores[cacheName][key]; + return Promise.resolve(true); + } + return Promise.resolve(false); + }), + + keys: jest.fn(() => { + // Collect all cache keys across all cache stores + console.log('in nodeCaches._cacheStores', nodeCaches._cacheStores) + return Promise.resolve(Object.keys(nodeCaches._cacheStores).flatMap(cacheName => + Object.keys(nodeCaches._cacheStores[cacheName]).map(key => new Request(key)) + )); + }), +}; + +export const nodeRequest = class { + constructor(url, options = {}) { + this.url = url; + this.method = options.method || 'GET'; + this.headers = new Headers(options.headers); + } +}; + +export const nodeResponse = class { + constructor(body, options = {}) { + this.status = options.status || 200; + this.ok = this.status >= 200 && this.status < 300; + this.headers = new Headers(options.headers); + this.bodyUsed = false; + + // Ensure the body is a string or Buffer + const readableBody = typeof body === 'string' ? body : body.toString(); + + // Create a ReadableStream + const stream = new Readable({ + read() { + this.push(readableBody); // Push the string or buffer into the stream + this.push(null); // Signal the end of the stream + }, + }); + + this.body = stream; // Assign the readable stream to body + } + + // Define text() method to read from the stream + async text() { + if (this.bodyUsed) { + return Promise.reject(new TypeError('Already read')); + } + this.bodyUsed = true; + return new Promise((resolve, reject) => { + const chunks = []; + this.body.on('data', (chunk) => { + chunks.push(chunk); + }); + this.body.on('end', () => { + resolve(Buffer.concat(chunks).toString()); + }); + this.body.on('error', (err) => { + reject(err); + }); + }); + } + + // You can also implement json() if needed + async json() { + const text = await this.text(); + return JSON.parse(text); + } +}; + + +export const nodeHeaders = class { + constructor(headers) { + this.map = new Map(); + if (headers) { + for (const [key, value] of Object.entries(headers)) { + this.append(key, value); + } + } + } + + append(key, value) { + const existingValue = this.map.get(key.toLowerCase()); + if (existingValue) { + this.map.set(key.toLowerCase(), existingValue + ', ' + value); + } else { + this.map.set(key.toLowerCase(), value); + } + } + + get(key) { + return this.map.get(key.toLowerCase()) || null; + } + + has(key) { + return this.map.has(key.toLowerCase()); + } + + set(key, value) { + this.map.set(key.toLowerCase(), value); + } + + delete(key) { + this.map.delete(key.toLowerCase()); + } + + // Additional methods if needed +} diff --git a/test/js/lib/ontology-validation.test.js b/test/js/lib/ontology-validation.test.js index 7ad489b72..8362d9803 100644 --- a/test/js/lib/ontology-validation.test.js +++ b/test/js/lib/ontology-validation.test.js @@ -1,186 +1,21 @@ const fetch = require('node-fetch') -const { Readable } = require('stream'); import { fetchOntologies } from 'lib/validation/ontology-validation' +import { + nodeCaches, nodeHeaders, nodeRequest, nodeResponse +} from './node-web-api' describe('Client-side file validation for AnnData', () => { beforeAll(() => { global.fetch = fetch - const mockCaches = { - _cacheStores: {}, - - open: jest.fn((cacheName) => { - console.log('cacheName', cacheName) - // Initialize the store for the cache name if it doesn't exist - if (!mockCaches._cacheStores[cacheName]) { - mockCaches._cacheStores[cacheName] = {}; - } - - console.log('mockCaches._cacheStores[cacheName]', mockCaches._cacheStores[cacheName]) - - return Promise.resolve({ - add: jest.fn((request) => { - const response = new Response(`Response for ${request.url}`); // Mocking a response - return mockCaches.put(request, response, cacheName); - }), - addAll: jest.fn((requests) => { - const responses = requests.map(req => new Response(`Response for ${req.url}`)); - return Promise.all(responses.map((response, index) => { - return mockCaches.put(requests[index], response, cacheName); - })); - }), - delete: jest.fn((request) => { - const key = request.url || request; - if (mockCaches._cacheStores[cacheName][key]) { - delete mockCaches._cacheStores[cacheName][key]; - return Promise.resolve(true); - } - return Promise.resolve(false); - }), - match: jest.fn((request) => { - const key = request.url || request; - return Promise.resolve(mockCaches._cacheStores[cacheName][key] || undefined); - }), - put: jest.fn((request, response) => { - console.log('in put, cacheName, request', cacheName, request) - const key = request.url || request; - mockCaches._cacheStores[cacheName][key] = response; - return Promise.resolve(); - }), - keys: jest.fn(() => { - console.log('in keys, mockCaches._cacheStores', mockCaches._cacheStores) - return Promise.resolve(Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key))); - }), - }); - }), - - match: jest.fn((request, cacheName) => { - const key = request.url || request; - return Promise.resolve(mockCaches._cacheStores[cacheName]?.[key] || null); - }), - - delete: jest.fn((request, cacheName) => { - const key = request.url || request; - if (mockCaches._cacheStores[cacheName] && mockCaches._cacheStores[cacheName][key]) { - delete mockCaches._cacheStores[cacheName][key]; - return Promise.resolve(true); - } - return Promise.resolve(false); - }), - - keys: jest.fn(() => { - // Collect all cache keys across all cache stores - console.log('in mockCaches._cacheStores', mockCaches._cacheStores) - return Promise.resolve(Object.keys(mockCaches._cacheStores).flatMap(cacheName => - Object.keys(mockCaches._cacheStores[cacheName]).map(key => new Request(key)) - )); - }), - }; - - global.caches = mockCaches; - - // Mock Request - global.Request = class { - constructor(url, options = {}) { - this.url = url; - this.method = options.method || 'GET'; - this.headers = new Headers(options.headers); - } - }; - - // Mock Response - global.Response = class { - constructor(body, options = {}) { - this.status = options.status || 200; - this.ok = this.status >= 200 && this.status < 300; - this.headers = new Headers(options.headers); - this.bodyUsed = false; - - // Ensure the body is a string or Buffer - const readableBody = typeof body === 'string' ? body : body.toString(); - - // Create a ReadableStream - const stream = new Readable({ - read() { - this.push(readableBody); // Push the string or buffer into the stream - this.push(null); // Signal the end of the stream - }, - }); - - this.body = stream; // Assign the readable stream to body - } - - // Define text() method to read from the stream - async text() { - if (this.bodyUsed) { - return Promise.reject(new TypeError('Already read')); - } - this.bodyUsed = true; - return new Promise((resolve, reject) => { - const chunks = []; - this.body.on('data', (chunk) => { - chunks.push(chunk); - }); - this.body.on('end', () => { - resolve(Buffer.concat(chunks).toString()); - }); - this.body.on('error', (err) => { - reject(err); - }); - }); - } - - // You can also implement json() if needed - async json() { - const text = await this.text(); - return JSON.parse(text); - } - }; - - - global.Headers = class { - constructor(headers) { - this.map = new Map(); - if (headers) { - for (const [key, value] of Object.entries(headers)) { - this.append(key, value); - } - } - } - - append(key, value) { - const existingValue = this.map.get(key.toLowerCase()); - if (existingValue) { - this.map.set(key.toLowerCase(), existingValue + ', ' + value); - } else { - this.map.set(key.toLowerCase(), value); - } - } - - get(key) { - return this.map.get(key.toLowerCase()) || null; - } - - has(key) { - return this.map.has(key.toLowerCase()); - } - - set(key, value) { - this.map.set(key.toLowerCase(), value); - } - - delete(key) { - this.map.delete(key.toLowerCase()); - } - - // Additional methods if needed - } - - + global.caches = nodeCaches; + global.Response = nodeResponse + global.Request = nodeRequest + global.Headers = nodeHeaders }) it('Parses AnnData headers', async () => { From f60fa7be39b0e1a0a142d58977b7e813c827a170 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Wed, 18 Sep 2024 17:37:51 -0400 Subject: [PATCH 30/52] Lint, fix caches.keys() --- .../lib/validation/ontology-validation.js | 7 - test/js/lib/node-web-api.js | 147 +++++++++--------- test/js/lib/ontology-validation.test.js | 2 +- 3 files changed, 71 insertions(+), 85 deletions(-) diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js index baaba3122..5de2cfa6d 100644 --- a/app/javascript/lib/validation/ontology-validation.js +++ b/app/javascript/lib/validation/ontology-validation.js @@ -52,12 +52,10 @@ export async function cacheFetch(url) { const decompressedUrl = url.replace('.gz', '') const response = await cache.match(decompressedUrl) - console.log('in cacheFetch, typeof response', typeof response) if (typeof response === 'undefined') { // If cache miss, then fetch, decompress, and put response in cache const data = await fetchGzipped(url) const contentLength = data.length - // console.log('data', data) const decompressedResponse = new Response( new Blob([data], { type: 'text/tab-separated-values' }), { headers: new Headers({ 'Content-Length': contentLength }) } @@ -100,14 +98,9 @@ export async function fetchOntologies() { for (let i = 0; i < ontologyNames.length; i++) { const ontologyName = ontologyNames[i] const ontologyUrl = `${ONTOLOGY_BASE_URL + ontologyName}.min.tsv.gz` - console.log('ontologyUrl', ontologyUrl) const response = await cacheFetch(ontologyUrl) - console.log('response', response) - const tsv = await response.text() - - // console.log('tsv', tsv) const lines = tsv.split('\n') ontologies[ontologyName] = {} diff --git a/test/js/lib/node-web-api.js b/test/js/lib/node-web-api.js index 4ef5783f2..9ccb62aa8 100644 --- a/test/js/lib/node-web-api.js +++ b/test/js/lib/node-web-api.js @@ -1,170 +1,163 @@ +/* eslint-disable require-jsdoc */ /** @fileoverview Node port for interfaces of Web API; helps mocking * * https://developer.mozilla.org/en-US/docs/Web/API/Cache */ -const { Readable } = require('stream'); +const { Readable } = require('stream') export const nodeCaches = { _cacheStores: {}, - open: jest.fn((cacheName) => { - console.log('cacheName', cacheName) + open: jest.fn(cacheName => { // Initialize the store for the cache name if it doesn't exist if (!nodeCaches._cacheStores[cacheName]) { - nodeCaches._cacheStores[cacheName] = {}; + nodeCaches._cacheStores[cacheName] = {} } - console.log('nodeCaches._cacheStores[cacheName]', nodeCaches._cacheStores[cacheName]) - return Promise.resolve({ - add: jest.fn((request) => { - const response = new Response(`Response for ${request.url}`); - return nodeCaches.put(request, response, cacheName); + add: jest.fn(request => { + const response = new Response(`Response for ${request.url}`) + return nodeCaches.put(request, response, cacheName) }), - addAll: jest.fn((requests) => { - const responses = requests.map(req => new Response(`Response for ${req.url}`)); + addAll: jest.fn(requests => { + const responses = requests.map(req => new Response(`Response for ${req.url}`)) return Promise.all(responses.map((response, index) => { - return nodeCaches.put(requests[index], response, cacheName); - })); + return nodeCaches.put(requests[index], response, cacheName) + })) }), - delete: jest.fn((request) => { - const key = request.url || request; + delete: jest.fn(request => { + const key = request.url || request if (nodeCaches._cacheStores[cacheName][key]) { - delete nodeCaches._cacheStores[cacheName][key]; - return Promise.resolve(true); + delete nodeCaches._cacheStores[cacheName][key] + return Promise.resolve(true) } - return Promise.resolve(false); + return Promise.resolve(false) }), - match: jest.fn((request) => { - const key = request.url || request; - return Promise.resolve(nodeCaches._cacheStores[cacheName][key] || undefined); + match: jest.fn(request => { + const key = request.url || request + return Promise.resolve(nodeCaches._cacheStores[cacheName][key] || undefined) }), put: jest.fn((request, response) => { - console.log('in put, cacheName, request', cacheName, request) - const key = request.url || request; - nodeCaches._cacheStores[cacheName][key] = response; - return Promise.resolve(); + const key = request.url || request + nodeCaches._cacheStores[cacheName][key] = response + return Promise.resolve() }), keys: jest.fn(() => { - console.log('in keys, nodeCaches._cacheStores', nodeCaches._cacheStores) - return Promise.resolve(Object.keys(nodeCaches._cacheStores[cacheName]).map(key => new Request(key))); - }), - }); + return Promise.resolve(Object.keys(nodeCaches._cacheStores[cacheName]).map(key => new Request(key))) + }) + }) }), match: jest.fn((request, cacheName) => { - const key = request.url || request; - return Promise.resolve(nodeCaches._cacheStores[cacheName]?.[key] || null); + const key = request.url || request + return Promise.resolve(nodeCaches._cacheStores[cacheName]?.[key] || null) }), delete: jest.fn((request, cacheName) => { - const key = request.url || request; + const key = request.url || request if (nodeCaches._cacheStores[cacheName] && nodeCaches._cacheStores[cacheName][key]) { - delete nodeCaches._cacheStores[cacheName][key]; - return Promise.resolve(true); + delete nodeCaches._cacheStores[cacheName][key] + return Promise.resolve(true) } - return Promise.resolve(false); + return Promise.resolve(false) }), keys: jest.fn(() => { // Collect all cache keys across all cache stores - console.log('in nodeCaches._cacheStores', nodeCaches._cacheStores) - return Promise.resolve(Object.keys(nodeCaches._cacheStores).flatMap(cacheName => - Object.keys(nodeCaches._cacheStores[cacheName]).map(key => new Request(key)) - )); - }), -}; + return Object.keys(nodeCaches._cacheStores) + }) +} export const nodeRequest = class { constructor(url, options = {}) { - this.url = url; - this.method = options.method || 'GET'; - this.headers = new Headers(options.headers); + this.url = url + this.method = options.method || 'GET' + this.headers = new Headers(options.headers) } -}; +} export const nodeResponse = class { constructor(body, options = {}) { - this.status = options.status || 200; - this.ok = this.status >= 200 && this.status < 300; - this.headers = new Headers(options.headers); - this.bodyUsed = false; + this.status = options.status || 200 + this.ok = this.status >= 200 && this.status < 300 + this.headers = new Headers(options.headers) + this.bodyUsed = false // Ensure the body is a string or Buffer - const readableBody = typeof body === 'string' ? body : body.toString(); + const readableBody = typeof body === 'string' ? body : body.toString() // Create a ReadableStream const stream = new Readable({ read() { - this.push(readableBody); // Push the string or buffer into the stream - this.push(null); // Signal the end of the stream + this.push(readableBody) // Push the string or buffer into the stream + this.push(null) // Signal the end of the stream }, - }); + }) - this.body = stream; // Assign the readable stream to body + this.body = stream // Assign the readable stream to body } // Define text() method to read from the stream async text() { if (this.bodyUsed) { - return Promise.reject(new TypeError('Already read')); + return Promise.reject(new TypeError('Already read')) } - this.bodyUsed = true; + this.bodyUsed = true return new Promise((resolve, reject) => { - const chunks = []; - this.body.on('data', (chunk) => { - chunks.push(chunk); - }); + const chunks = [] + this.body.on('data', chunk => { + chunks.push(chunk) + }) this.body.on('end', () => { - resolve(Buffer.concat(chunks).toString()); - }); + resolve(Buffer.concat(chunks).toString()) + }) this.body.on('error', (err) => { - reject(err); - }); - }); + reject(err) + }) + }) } // You can also implement json() if needed async json() { - const text = await this.text(); - return JSON.parse(text); + const text = await this.text() + return JSON.parse(text) } -}; +} export const nodeHeaders = class { constructor(headers) { - this.map = new Map(); + this.map = new Map() if (headers) { for (const [key, value] of Object.entries(headers)) { - this.append(key, value); + this.append(key, value) } } } append(key, value) { - const existingValue = this.map.get(key.toLowerCase()); + const existingValue = this.map.get(key.toLowerCase()) if (existingValue) { - this.map.set(key.toLowerCase(), existingValue + ', ' + value); + this.map.set(key.toLowerCase(), existingValue + ', ' + value) } else { - this.map.set(key.toLowerCase(), value); + this.map.set(key.toLowerCase(), value) } } get(key) { - return this.map.get(key.toLowerCase()) || null; + return this.map.get(key.toLowerCase()) || null } has(key) { - return this.map.has(key.toLowerCase()); + return this.map.has(key.toLowerCase()) } set(key, value) { - this.map.set(key.toLowerCase(), value); + this.map.set(key.toLowerCase(), value) } delete(key) { - this.map.delete(key.toLowerCase()); + this.map.delete(key.toLowerCase()) } // Additional methods if needed diff --git a/test/js/lib/ontology-validation.test.js b/test/js/lib/ontology-validation.test.js index 8362d9803..f85db0b98 100644 --- a/test/js/lib/ontology-validation.test.js +++ b/test/js/lib/ontology-validation.test.js @@ -20,7 +20,7 @@ describe('Client-side file validation for AnnData', () => { it('Parses AnnData headers', async () => { const ontologies = await fetchOntologies() - console.log('ontologies') + console.log('ontologies', ontologies) expect(1).toEqual(1) }) }) From 2d06a9e7949ff430dfa2eec69591b4f319226a89 Mon Sep 17 00:00:00 2001 From: bistline Date: Thu, 19 Sep 2024 09:49:43 -0400 Subject: [PATCH 31/52] Fixing test regressions --- test/js/upload-wizard/file-upload-control.test.js | 2 +- test/models/ingest_job_test.rb | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/js/upload-wizard/file-upload-control.test.js b/test/js/upload-wizard/file-upload-control.test.js index dd1d03a03..829dd760f 100644 --- a/test/js/upload-wizard/file-upload-control.test.js +++ b/test/js/upload-wizard/file-upload-control.test.js @@ -382,7 +382,7 @@ describe('file upload control validates the selected file', () => { expect(bucketPathInput).toHaveValue('bad.pdf') await waitFor(() => { expect(screen.getByTestId('validation-error')).toBeInTheDocument() - expect(screen.getByText('Allowed extensions are: .tsv')).toBeVisible() + expect(screen.getByText('Allowed extensions are .tsv')).toBeVisible() }) }) }) diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb index edbcb60b6..4a8d0b4fa 100644 --- a/test/models/ingest_job_test.rb +++ b/test/models/ingest_job_test.rb @@ -219,7 +219,7 @@ class IngestJobTest < ActiveSupport::TestCase study: @basic_study, upload_file_size: 1.megabyte, cell_input: %w[A B C D], - has_raw_counts: false, + has_raw_counts: true, reference_file: false, annotation_input: [ { name: 'disease', type: 'group', values: %w[cancer cancer normal normal] } @@ -232,9 +232,9 @@ class IngestJobTest < ActiveSupport::TestCase }) params_object = AnnDataIngestParameters.new( anndata_file: ann_data_file.gs_url, obsm_keys: ann_data_file.ann_data_file_info.obsm_key_names, - file_size: ann_data_file.upload_file_size + file_size: ann_data_file.upload_file_size, extract_raw_counts: true ) - assert_not params_object.extract.include?('raw_counts') + assert params_object.extract.include?('raw_counts') job = IngestJob.new( study: @basic_study, study_file: ann_data_file, user: @user, action: :ingest_anndata, params_object: ) @@ -269,7 +269,7 @@ class IngestJobTest < ActiveSupport::TestCase studyAccession: @basic_study.accession, jobStatus: 'success', referenceAnnDataFile: false, - extractedFileTypes: %w[cluster metadata processed_expression], + extractedFileTypes: %w[cluster metadata processed_expression raw_counts], machineType: 'n2d-highmem-4', bootDiskSizeGb: 300, exitStatus: 0 From b6376fbf17706f4eb0d3ff4b86a5b1dae57b5b08 Mon Sep 17 00:00:00 2001 From: bistline Date: Thu, 19 Sep 2024 10:28:36 -0400 Subject: [PATCH 32/52] Apply suggestions from code review Co-authored-by: Eric Weitz --- app/javascript/components/upload/FileUploadControl.jsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/javascript/components/upload/FileUploadControl.jsx b/app/javascript/components/upload/FileUploadControl.jsx index cbf4bcadc..a53f67801 100644 --- a/app/javascript/components/upload/FileUploadControl.jsx +++ b/app/javascript/components/upload/FileUploadControl.jsx @@ -104,11 +104,11 @@ export default function FileUploadControl({ } // don't continue unless a dot is present (otherwise, no valid file extension) - if (trimmedPath.indexOf('.') < 0 ) { return false } + if (!trimmedPath.includes('.')) { return false } const fileType = file.file_type const fileExtension = `.${trimmedPath.split('.').slice(-1)[0]}` - if (!inputAcceptExts.includes(fileExtension)) { + if (fileExtension.length > 1 && !inputAcceptExts.includes(fileExtension)) { const invalidExt = { errors: [ [ From 56ce531d173992da70cb34eaaabe117db8a83cd7 Mon Sep 17 00:00:00 2001 From: bistline Date: Thu, 19 Sep 2024 11:07:00 -0400 Subject: [PATCH 33/52] Handle corner case of nil file size --- app/lib/download_quota_service.rb | 4 ++-- test/services/download_quota_service_test.rb | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/app/lib/download_quota_service.rb b/app/lib/download_quota_service.rb index 6877d28af..60f014b95 100644 --- a/app/lib/download_quota_service.rb +++ b/app/lib/download_quota_service.rb @@ -19,7 +19,7 @@ def self.grant_user_exemption(user) def self.download_exceeds_quota?(user, requested_bytes) return false if user.daily_download_quota.nil? - user_quota = user.daily_download_quota + requested_bytes + user_quota = user.daily_download_quota + requested_bytes.to_i user_quota > download_quota end @@ -28,7 +28,7 @@ def self.increment_user_quota(user, requested_bytes) # skip incrementing quota if exemption is set via nil value return true if user.daily_download_quota.nil? - user_quota = user.daily_download_quota + requested_bytes + user_quota = user.daily_download_quota + requested_bytes.to_i user.update(daily_download_quota: user_quota) end diff --git a/test/services/download_quota_service_test.rb b/test/services/download_quota_service_test.rb index e65f7ec38..020af3acd 100644 --- a/test/services/download_quota_service_test.rb +++ b/test/services/download_quota_service_test.rb @@ -57,4 +57,11 @@ class DownloadQuotaServiceTest < ActiveSupport::TestCase ) assert_equal 1.megabyte, DownloadQuotaService.download_quota end + + test 'should handle nil file size' do + starting_quota = @user.daily_download_quota + DownloadQuotaService.increment_user_quota(@user, nil) + @user.reload + assert_equal starting_quota, @user.daily_download_quota + end end From db5791f3fd96d45ec7eed9916376501d0ce7b93f Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Thu, 19 Sep 2024 13:40:43 -0400 Subject: [PATCH 34/52] Make test meaningful, refine cacheFetch to work with simpler mock --- .../lib/validation/ontology-validation.js | 16 ++++++++++------ test/js/lib/node-web-api.js | 4 +++- test/js/lib/ontology-validation.test.js | 8 +++++--- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js index 5de2cfa6d..695d70aac 100644 --- a/app/javascript/lib/validation/ontology-validation.js +++ b/app/javascript/lib/validation/ontology-validation.js @@ -25,7 +25,6 @@ async function getServiceWorkerCache() { // Delete other versions of ontologies cache; there should be 1 per dodmain const cacheNames = await caches.keys() - console.log('cacheNames', cacheNames) cacheNames.forEach(name => { if (name.startsWith('ontologies-') && name !== currentOntologies) { caches.delete(name) @@ -40,9 +39,9 @@ async function getServiceWorkerCache() { /** Fetch .gz file, decompress it, return plaintext */ export async function fetchGzipped(url) { const response = await fetch(url) - const blob = await response.blob(); - const uint8Array = new Uint8Array(await blob.arrayBuffer()); - const plaintext = strFromU8(decompressSync(uint8Array)); + const blob = await response.blob() + const uint8Array = new Uint8Array(await blob.arrayBuffer()) + const plaintext = strFromU8(decompressSync(uint8Array)) return plaintext } @@ -57,8 +56,13 @@ export async function cacheFetch(url) { const data = await fetchGzipped(url) const contentLength = data.length const decompressedResponse = new Response( - new Blob([data], { type: 'text/tab-separated-values' }), - { headers: new Headers({ 'Content-Length': contentLength }) } + data, + { + headers: new Headers({ + 'Content-Length': contentLength, + 'Content-Type': 'text/tab-separated-values' + }) + } ) await cache.put(decompressedUrl, decompressedResponse) return await cache.match(decompressedUrl) diff --git a/test/js/lib/node-web-api.js b/test/js/lib/node-web-api.js index 9ccb62aa8..8e0370658 100644 --- a/test/js/lib/node-web-api.js +++ b/test/js/lib/node-web-api.js @@ -1,6 +1,7 @@ /* eslint-disable require-jsdoc */ /** @fileoverview Node port for interfaces of Web API; helps mocking * + * https://developer.mozilla.org/en-US/docs/Web/API/CacheStorage * https://developer.mozilla.org/en-US/docs/Web/API/Cache */ const { Readable } = require('stream') @@ -8,6 +9,7 @@ const { Readable } = require('stream') export const nodeCaches = { _cacheStores: {}, + /** Returns a Promise that resolves to the requested Cache object. */ open: jest.fn(cacheName => { // Initialize the store for the cache name if it doesn't exist if (!nodeCaches._cacheStores[cacheName]) { @@ -91,7 +93,7 @@ export const nodeResponse = class { read() { this.push(readableBody) // Push the string or buffer into the stream this.push(null) // Signal the end of the stream - }, + } }) this.body = stream // Assign the readable stream to body diff --git a/test/js/lib/ontology-validation.test.js b/test/js/lib/ontology-validation.test.js index f85db0b98..f0f06e171 100644 --- a/test/js/lib/ontology-validation.test.js +++ b/test/js/lib/ontology-validation.test.js @@ -18,9 +18,11 @@ describe('Client-side file validation for AnnData', () => { global.Headers = nodeHeaders }) - it('Parses AnnData headers', async () => { + it('Parses minified ontologies', async () => { const ontologies = await fetchOntologies() - console.log('ontologies', ontologies) - expect(1).toEqual(1) + const expectedOntologyNames = ['mondo', 'pato', 'efo', 'uberon', 'ncbitaxon'] + expect(Object.keys(ontologies)).toEqual(expectedOntologyNames) + const expectedSpeciesNames = ['Homo sapiens', 'human'] + expect(ontologies.ncbitaxon['NCBITaxon_9606']).toEqual(expectedSpeciesNames) }) }) From 1ecf29608fc53083718c3bf56197d9b7fc2e8ed8 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Thu, 19 Sep 2024 13:53:54 -0400 Subject: [PATCH 35/52] Remove "BETA" qualifier from AnnData upload UI --- app/javascript/components/upload/UploadExperienceSplitter.jsx | 2 +- app/javascript/components/upload/WizardNavPanel.jsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/javascript/components/upload/UploadExperienceSplitter.jsx b/app/javascript/components/upload/UploadExperienceSplitter.jsx index 363bb2e64..5816ad64f 100644 --- a/app/javascript/components/upload/UploadExperienceSplitter.jsx +++ b/app/javascript/components/upload/UploadExperienceSplitter.jsx @@ -46,7 +46,7 @@ function RawUploadExperienceSplitter({ onClick={() => { setIsAnnDataExperience(true) setOverrideExperienceMode(true) - }}> AnnData BETA + }}> AnnData
Upload one AnnData (.h5ad) file diff --git a/app/javascript/components/upload/WizardNavPanel.jsx b/app/javascript/components/upload/WizardNavPanel.jsx index 44123b1d2..b7e955852 100644 --- a/app/javascript/components/upload/WizardNavPanel.jsx +++ b/app/javascript/components/upload/WizardNavPanel.jsx @@ -222,7 +222,7 @@ function MainStepsDisplay(formState, serverState, currentStep, setCurrentStep, m - AnnData BETA + AnnData From 4e14c4329048821b4ff315b2241d77274919cac5 Mon Sep 17 00:00:00 2001 From: bistline Date: Thu, 19 Sep 2024 13:54:38 -0400 Subject: [PATCH 36/52] Clear out existing cached filename on validation failure --- .../components/upload/FileUploadControl.jsx | 7 ++++ .../upload-wizard/file-upload-control.test.js | 42 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/app/javascript/components/upload/FileUploadControl.jsx b/app/javascript/components/upload/FileUploadControl.jsx index be806d26c..4c9d3b40d 100644 --- a/app/javascript/components/upload/FileUploadControl.jsx +++ b/app/javascript/components/upload/FileUploadControl.jsx @@ -77,6 +77,13 @@ export default function FileUploadControl({ name: newName, notes }) + } else if (issues.errors.length > 0 && file.uploadSelection) { + // clear out a previous known good file, if present + updateFile(file._id, { + uploadSelection: null, + upload_file_name: '', + name: '' + }) } } diff --git a/test/js/upload-wizard/file-upload-control.test.js b/test/js/upload-wizard/file-upload-control.test.js index 2db1de448..65177d335 100644 --- a/test/js/upload-wizard/file-upload-control.test.js +++ b/test/js/upload-wizard/file-upload-control.test.js @@ -356,4 +356,46 @@ describe('file upload control validates the selected file', () => { fireEvent.click(bucketToggle) expect(container.querySelector('#remote_location-input-123')).toBeVisible(); }) + + it ('clears out previous uploaded file after replace fails to validate', async () => { + const file = { + _id: '123', + name: 'cluster.txt', + status: 'new', + file_type: 'Cluster', + upload_file_name: 'cluster.txt', + uploadSelection: {} // needed to mock a file already being selected + } + const updateFileHolder = { updateFile: () => {} } + const updateFileSpy = jest.spyOn(updateFileHolder, 'updateFile') + render(( + + + + )) + + const replaceLink = screen.getByText('Replace') + expect(replaceLink).toBeVisible() + // test replacing with bad file & clearing out state + // this won't update the button state back to 'Choose file' since updateFile() is mocked above + // unsetting uploadSelection and upload_file_name achieves this in actual usage + fireFileSelectionEvent(screen.getByTestId('file-input'), { + fileName: 'cluster_bad.txt', + content: 'foo,X,Y\nTYPE,numeric,numeric\nCell1,1,0\n' + },true) + await waitForElementToBeRemoved(() => screen.getByTestId('file-validation-spinner')) + expect(updateFileSpy).toHaveBeenLastCalledWith('123', { + uploadSelection: null, + name: '', + upload_file_name: '' + }) + expect(screen.getByTestId('validation-error')).toHaveTextContent('after correcting cluster_bad.txt') + expect(screen.getByTestId('validation-error')) + .toHaveTextContent('First row, first column must be "NAME" (case insensitive). Your value was "foo') + }) }) From 7e42c7ab2bed760b6e8044dd2ce6d6ad39c6850d Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Thu, 19 Sep 2024 15:44:15 -0400 Subject: [PATCH 37/52] Skip ontology label validation for old-spec AnnData; no relevant test data available --- .../lib/validation/validate-anndata.js | 72 +++++++++---------- test/js/lib/validate-anndata.test.js | 18 ++--- 2 files changed, 40 insertions(+), 50 deletions(-) diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 6553505d1..1e8cab04e 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -167,8 +167,6 @@ async function checkOntologyLabels(key, ontologies, groups) { /** Get ontology ID values for key in AnnData file */ async function getOntologyIdsAndLabels(requiredName, hdf5File) { - let ontologyIds = [] - const obs = await hdf5File.get('obs') const obsValues = await Promise.all(obs.values) @@ -178,10 +176,12 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) { // own `categories` and `codes`. // See e.g. https://github.com/scverse/anndata/issues/879 const internalCategories = obsValues.find(o => o.name.endsWith('__categories')) - - let resolvedCategories = obsValues if (internalCategories) { - resolvedCategories = await Promise.all(internalCategories.values) + console.debug( + 'Encountered old-spec AnnData, skipping ontology label validation. ' + + 'Server-side processing will validate this' + ) + return null } const idKey = requiredName @@ -190,36 +190,30 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) { const idGroup = obsValues.find(o => o.name.endsWith(idKey)) const labelGroup = obsValues.find(o => o.name.endsWith(labelKey)) - if (idGroup) { - if (internalCategories) { - ontologyIds = await idGroup.value - } else { - // AnnData organizes each "obs" annotation (e.g. disease__ontology_label, - // sex) into a container with a `categories` frame and a `code` frame. - // - // - categories: external values, non-redundant array. E.g.: - // ["tuberculosis", "TB", "foo"] or ["female"] - // - // - codes: internal values, redundant array of integers that specify - // the index (position) of each category value in the array of obs - // (cells) - // - // This organization greatly decreases filesize, but requires more code - // to map paired obs annotations like `disease` (ontology IDs) to - // `disease__ontology_label` (ontology names) than needed for e.g. TSVs. - const idCategories = await idGroup.values[0] - const idCodes = await idGroup.values[1] - const ids = await idCategories.value - const idIndexes = await idCodes.value - - const labelCategories = await labelGroup.values[0] - const labelCodes = await labelGroup.values[1] - const labels = await labelCategories.value - const labelIndexes = await labelCodes.value - - return [ids, idIndexes, labels, labelIndexes] - } - } + // AnnData organizes each "obs" annotation (e.g. disease__ontology_label, + // sex) into a container with a `categories` frame and a `code` frame. + // + // - categories: external values, non-redundant array. E.g.: + // ["tuberculosis", "TB", "foo"] or ["female"] + // + // - codes: internal values, redundant array of integers that specify + // the index (position) of each category value in the array of obs + // (cells) + // + // This organization greatly decreases filesize, but requires more code + // to map paired obs annotations like `disease` (ontology IDs) to + // `disease__ontology_label` (ontology names) than needed for e.g. TSVs. + const idCategories = await idGroup.values[0] + const idCodes = await idGroup.values[1] + const ids = await idCategories.value + const idIndexes = await idCodes.value + + const labelCategories = await labelGroup.values[0] + const labelCodes = await labelGroup.values[1] + const labels = await labelCategories.value + const labelIndexes = await labelCodes.value + + return [ids, idIndexes, labels, labelIndexes] } /** Validate ontology labels for required metadata columns in AnnData file */ @@ -235,9 +229,11 @@ async function validateOntologyLabels(hdf5File) { const key = column.split('__ontology_label')[0] const groups = await getOntologyIdsAndLabels(key, hdf5File) - issues = issues.concat( - await checkOntologyLabels(key, ontologies, groups) - ) + if (groups) { + issues = issues.concat( + await checkOntologyLabels(key, ontologies, groups) + ) + } } return issues diff --git a/test/js/lib/validate-anndata.test.js b/test/js/lib/validate-anndata.test.js index be43f8809..b89d27c98 100644 --- a/test/js/lib/validate-anndata.test.js +++ b/test/js/lib/validate-anndata.test.js @@ -77,18 +77,9 @@ describe('Client-side file validation for AnnData', () => { expect(parseResults.issues[0]).toEqual(expectedIssue) }) - // it('Reports valid ontology labels as valid', async () => { - // const issues = await checkOntologyLabels( - // // Underscore or colon can delimit shortname and number; - // // disease can use MONDO or PATO IDs. - // 'disease', ['MONDO_0000001', 'MONDO:0000001', 'PATO:0000001'] - // ) - // expect(issues).toHaveLength(0) - // }) - - // TODO: Uncomment after PR merge + // // TODO: Uncomment after PR merge // it('Parses AnnData rows and reports invalid ontology labels', async () => { - // const url = `${BASE_URL}/invalid_disease_id.h5ad` + // const url = `${BASE_URL}/invalid_disease_label.h5ad` // const parseResults = await parseAnnDataFile(url) // expect(parseResults.issues).toHaveLength(1) @@ -96,7 +87,10 @@ describe('Client-side file validation for AnnData', () => { // const expectedIssue = [ // 'error', // 'ontology:label-lookup-error', - // 'Ontology ID "FOO_0000042" is not among accepted ontologies (MONDO, PATO) for key "disease"' + // 'Invalid disease label "foo". Valid labels for MONDO_0018076: tuberculosis, tuberculosis disease, active tuberculosis, Kochs disease, TB', + // { + // 'subtype': 'ontology:invalid-label' + // } // ] // expect(parseResults.issues[0]).toEqual(expectedIssue) // }) From ba8d809c0dbd5851cddbcc72e4c09866f5b4abbd Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Thu, 19 Sep 2024 16:41:41 -0400 Subject: [PATCH 38/52] Convert from O(n^2) to O(n) to avoid OOM --- .../lib/validation/validate-anndata.js | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 1e8cab04e..ff86a1423 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -128,15 +128,19 @@ async function checkOntologyLabels(key, ontologies, groups) { const issues = [] - const labelIdPairs = [] + console.log('in checkOntologyLabels') + const labelIdPairs = new Set() for (let i = 0; i < idIndexes.length; i++) { const id = ids[idIndexes[i]] - for (let j = 0; j < labelIndexes.length; j++) { - const label = labels[labelIndexes[j]] - labelIdPairs.push(`${id} || ${label}`) + const label = labels[labelIndexes[i]] + if (label === undefined) { + console.log("label === 'undefined', i, labelIndexes[i]", i, labelIndexes[i]) } + labelIdPairs.add(`${id} || ${label}`) } - const rawUniques = Array.from(new Set(labelIdPairs)) + console.log('labelIdPairs', labelIdPairs) + const rawUniques = Array.from(labelIdPairs) + console.log('rawUniques', rawUniques) rawUniques.map(r => { const [id, label] = r.split(' || ') @@ -162,6 +166,8 @@ async function checkOntologyLabels(key, ontologies, groups) { } }) + console.log('issues', issues) + return issues } @@ -203,15 +209,23 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) { // This organization greatly decreases filesize, but requires more code // to map paired obs annotations like `disease` (ontology IDs) to // `disease__ontology_label` (ontology names) than needed for e.g. TSVs. + console.log('idGroup', idGroup) + console.log('labelGroup', labelGroup) const idCategories = await idGroup.values[0] + console.log('idCategories', idCategories) const idCodes = await idGroup.values[1] + console.log('idGroup', idCodes) const ids = await idCategories.value + console.log('ids', ids) const idIndexes = await idCodes.value + console.log('idIndexes', idIndexes) const labelCategories = await labelGroup.values[0] const labelCodes = await labelGroup.values[1] const labels = await labelCategories.value + console.log('labels', labels) const labelIndexes = await labelCodes.value + console.log('labelIndexes', labelIndexes) return [ids, idIndexes, labels, labelIndexes] } From 3480860e4f60e01b24f54a29f2a665573555974c Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Fri, 20 Sep 2024 10:00:19 -0400 Subject: [PATCH 39/52] Skip ontology label validation for remote AnnData --- .../lib/validation/validate-anndata.js | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index ff86a1423..03ec4eb4b 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -123,24 +123,17 @@ export function checkOntologyIdFormat(key, ontologyIds) { /** Validate author's annotation labels match those in ontologies */ async function checkOntologyLabels(key, ontologies, groups) { - const [ids, idIndexes, labels, labelIndexes] = groups const issues = [] - console.log('in checkOntologyLabels') const labelIdPairs = new Set() for (let i = 0; i < idIndexes.length; i++) { const id = ids[idIndexes[i]] const label = labels[labelIndexes[i]] - if (label === undefined) { - console.log("label === 'undefined', i, labelIndexes[i]", i, labelIndexes[i]) - } labelIdPairs.add(`${id} || ${label}`) } - console.log('labelIdPairs', labelIdPairs) const rawUniques = Array.from(labelIdPairs) - console.log('rawUniques', rawUniques) rawUniques.map(r => { const [id, label] = r.split(' || ') @@ -209,23 +202,15 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) { // This organization greatly decreases filesize, but requires more code // to map paired obs annotations like `disease` (ontology IDs) to // `disease__ontology_label` (ontology names) than needed for e.g. TSVs. - console.log('idGroup', idGroup) - console.log('labelGroup', labelGroup) const idCategories = await idGroup.values[0] - console.log('idCategories', idCategories) const idCodes = await idGroup.values[1] - console.log('idGroup', idCodes) const ids = await idCategories.value - console.log('ids', ids) const idIndexes = await idCodes.value - console.log('idIndexes', idIndexes) const labelCategories = await labelGroup.values[0] const labelCodes = await labelGroup.values[1] const labels = await labelCategories.value - console.log('labels', labels) const labelIndexes = await labelCodes.value - console.log('labelIndexes', labelIndexes) return [ids, idIndexes, labels, labelIndexes] } @@ -291,7 +276,14 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) { let ontologyLabelIssues = [] if (requiredMetadataIssues.length === 0) { ontologyIdFormatIssues = await validateOntologyIdFormat(hdf5File) - if (ontologyIdFormatIssues.length === 0) { + if ( + ontologyIdFormatIssues.length === 0 && + + // TODO (SCP-5813): + // For big remote AnnData, hdf5-indexed-reader sometimes corrupts index codes, + // so skip ontology label validation for e.g. "Use bucket path" + 'url' in remoteProps === false + ) { ontologyLabelIssues = await validateOntologyLabels(hdf5File) } } From c974650111410f4431183248224456e248330383 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Fri, 20 Sep 2024 10:14:40 -0400 Subject: [PATCH 40/52] Fix tests --- app/javascript/lib/validation/validate-anndata.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 03ec4eb4b..9bc49a488 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -282,7 +282,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) { // TODO (SCP-5813): // For big remote AnnData, hdf5-indexed-reader sometimes corrupts index codes, // so skip ontology label validation for e.g. "Use bucket path" - 'url' in remoteProps === false + remoteProps && 'url' in remoteProps === false ) { ontologyLabelIssues = await validateOntologyLabels(hdf5File) } From 0a73e06460291f7307fd21b7881bae005d28b9e1 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Fri, 20 Sep 2024 10:18:53 -0400 Subject: [PATCH 41/52] Remove obsolete code, refine TODO note --- app/javascript/lib/validation/validate-anndata.js | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 9bc49a488..374d23c1a 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -266,11 +266,6 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) { const headers = await getAnnDataHeaders(hdf5File) - // TODO (SCP-5770): Extend AnnData CSFV to remote files, then remove this - if (!headers) { - return { issues } - } - const requiredMetadataIssues = validateRequiredMetadataColumns([headers], true) let ontologyIdFormatIssues = [] let ontologyLabelIssues = [] @@ -279,9 +274,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) { if ( ontologyIdFormatIssues.length === 0 && - // TODO (SCP-5813): - // For big remote AnnData, hdf5-indexed-reader sometimes corrupts index codes, - // so skip ontology label validation for e.g. "Use bucket path" + // TODO (SCP-5813): Enable ontology validation for remote AnnData remoteProps && 'url' in remoteProps === false ) { ontologyLabelIssues = await validateOntologyLabels(hdf5File) From 7d978fa4aaecd07e72ee2b4b0efb82eb5af87011 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Fri, 20 Sep 2024 12:12:17 -0400 Subject: [PATCH 42/52] Improve docs, remove debug --- .../lib/validation/ontology-validation.js | 15 ++++++++++++--- app/javascript/lib/validation/validate-anndata.js | 8 +++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js index 695d70aac..80dc1b4e2 100644 --- a/app/javascript/lib/validation/ontology-validation.js +++ b/app/javascript/lib/validation/ontology-validation.js @@ -1,3 +1,15 @@ +/** + * @fileoverview Validates ontology labels and IDs in files added by users + * + * SCP requires uploaded data to content certain metadata annotations, e.g. + * species, organ, disease, and library preparation protocol. This module + * loads ontology reference data, and it to required metadata in the + * user's uploaded or transferred file. + * + * More context, demo: + * https://github.com/broadinstitute/single_cell_portal_core/pull/2129 + */ + import { decompressSync, strFromU8 } from 'fflate' import { @@ -70,8 +82,6 @@ export async function cacheFetch(url) { return await cache.match(decompressedUrl) } - - /** * Fetch minified ontologies, transform into object of object of arrays, e.g.: * @@ -124,7 +134,6 @@ export async function fetchOntologies() { window.SCP.ontologies = ontologies return ontologies } -window.fetchOntologies = fetchOntologies /** Get lowercase shortnames for all required ontologies */ function getOntologyShortNames() { diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 374d23c1a..117918171 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -38,7 +38,6 @@ async function getOntologyIds(key, hdf5File) { return ontologyIds } -window.getOntologyIds = getOntologyIds /** Get annotation headers for a key (e.g. obs) from an HDF5 file */ async function getAnnotationHeaders(key, hdf5File) { @@ -127,6 +126,7 @@ async function checkOntologyLabels(key, ontologies, groups) { const issues = [] + // Determine unique (ontology ID, ontology label) pairs const labelIdPairs = new Set() for (let i = 0; i < idIndexes.length; i++) { const id = ids[idIndexes[i]] @@ -139,7 +139,9 @@ async function checkOntologyLabels(key, ontologies, groups) { const [id, label] = r.split(' || ') const ontologyShortNameLc = id.split(/[_:]/)[0].toLowerCase() const ontology = ontologies[ontologyShortNameLc] + if (!(id in ontology)) { + // Register invalid ontology ID const msg = `Invalid ontology ID: ${id}` issues.push([ 'error', 'ontology:label-lookup-error', msg, @@ -147,7 +149,9 @@ async function checkOntologyLabels(key, ontologies, groups) { ]) } else { const validLabels = ontology[id] + if (!(validLabels.includes(label))) { + // Register invalid ontology label const prettyLabels = validLabels.join(', ') const validLabelsClause = `Valid labels for ${id}: ${prettyLabels}` const msg = `Invalid ${key} label "${label}". ${validLabelsClause}` @@ -159,8 +163,6 @@ async function checkOntologyLabels(key, ontologies, groups) { } }) - console.log('issues', issues) - return issues } From 9a9bd8f156ac467fac8b3f1caebd65212ef4ce8b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Sep 2024 19:24:27 +0000 Subject: [PATCH 43/52] Bump puma from 5.6.8 to 5.6.9 Bumps [puma](https://github.com/puma/puma) from 5.6.8 to 5.6.9. - [Release notes](https://github.com/puma/puma/releases) - [Changelog](https://github.com/puma/puma/blob/master/History.md) - [Commits](https://github.com/puma/puma/compare/v5.6.8...v5.6.9) --- updated-dependencies: - dependency-name: puma dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 1e5ea7a51..90ed9b5fa 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -362,7 +362,7 @@ GEM psych (5.1.2) stringio public_suffix (5.0.4) - puma (5.6.8) + puma (5.6.9) nio4r (~> 2.0) racc (1.8.0) rack (2.2.9) From e457f57e29e33e98e773c927a3493baf7d619ba9 Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Mon, 23 Sep 2024 13:11:55 -0400 Subject: [PATCH 44/52] Refine docs, variable names --- .../lib/validation/ontology-validation.js | 4 ++-- app/javascript/lib/validation/validate-anndata.js | 14 +++++++------- test/js/lib/node-web-api.js | 3 +++ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js index 80dc1b4e2..505b970af 100644 --- a/app/javascript/lib/validation/ontology-validation.js +++ b/app/javascript/lib/validation/ontology-validation.js @@ -3,8 +3,8 @@ * * SCP requires uploaded data to content certain metadata annotations, e.g. * species, organ, disease, and library preparation protocol. This module - * loads ontology reference data, and it to required metadata in the - * user's uploaded or transferred file. + * loads ontology reference data, and uses it to check required metadata in + * the user's uploaded or transferred file. * * More context, demo: * https://github.com/broadinstitute/single_cell_portal_core/pull/2129 diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 117918171..7e5f4fc49 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -120,8 +120,8 @@ export function checkOntologyIdFormat(key, ontologyIds) { return issues } -/** Validate author's annotation labels match those in ontologies */ -async function checkOntologyLabels(key, ontologies, groups) { +/** Validate author's annotation labels and IDs match those in ontologies */ +async function checkOntologyLabelsAndIds(key, ontologies, groups) { const [ids, idIndexes, labels, labelIndexes] = groups const issues = [] @@ -218,7 +218,7 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) { } /** Validate ontology labels for required metadata columns in AnnData file */ -async function validateOntologyLabels(hdf5File) { +async function validateOntologyLabelsAndIds(hdf5File) { let issues = [] const ontologies = await fetchOntologies() @@ -232,7 +232,7 @@ async function validateOntologyLabels(hdf5File) { if (groups) { issues = issues.concat( - await checkOntologyLabels(key, ontologies, groups) + await checkOntologyLabelsAndIds(key, ontologies, groups) ) } } @@ -270,7 +270,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) { const requiredMetadataIssues = validateRequiredMetadataColumns([headers], true) let ontologyIdFormatIssues = [] - let ontologyLabelIssues = [] + let ontologyLabelAndIdIssues = [] if (requiredMetadataIssues.length === 0) { ontologyIdFormatIssues = await validateOntologyIdFormat(hdf5File) if ( @@ -279,7 +279,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) { // TODO (SCP-5813): Enable ontology validation for remote AnnData remoteProps && 'url' in remoteProps === false ) { - ontologyLabelIssues = await validateOntologyLabels(hdf5File) + ontologyLabelAndIdIssues = await validateOntologyLabelsAndIds(hdf5File) } } @@ -287,7 +287,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) { validateUnique(headers), requiredMetadataIssues, ontologyIdFormatIssues, - ontologyLabelIssues + ontologyLabelAndIdIssues ) return { issues } diff --git a/test/js/lib/node-web-api.js b/test/js/lib/node-web-api.js index 8e0370658..6c8ef04d2 100644 --- a/test/js/lib/node-web-api.js +++ b/test/js/lib/node-web-api.js @@ -3,6 +3,9 @@ * * https://developer.mozilla.org/en-US/docs/Web/API/CacheStorage * https://developer.mozilla.org/en-US/docs/Web/API/Cache + * https://developer.mozilla.org/en-US/docs/Web/API/Request + * https://developer.mozilla.org/en-US/docs/Web/API/Response + * https://developer.mozilla.org/en-US/docs/Web/API/Headers */ const { Readable } = require('stream') From 0cb12e509b20b1d472eef7d630abcdc67e67bc7a Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Mon, 23 Sep 2024 14:20:53 -0400 Subject: [PATCH 45/52] Fix typo --- app/javascript/lib/validation/ontology-validation.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js index 505b970af..aee6ec4c5 100644 --- a/app/javascript/lib/validation/ontology-validation.js +++ b/app/javascript/lib/validation/ontology-validation.js @@ -1,10 +1,10 @@ /** * @fileoverview Validates ontology labels and IDs in files added by users * - * SCP requires uploaded data to content certain metadata annotations, e.g. + * SCP requires cells to have certain metadata annotations, e.g. * species, organ, disease, and library preparation protocol. This module - * loads ontology reference data, and uses it to check required metadata in - * the user's uploaded or transferred file. + * loads ontology reference data, and uses it to check required cell metadata + * in the user's uploaded or transferred file. * * More context, demo: * https://github.com/broadinstitute/single_cell_portal_core/pull/2129 From 8af4797a4d7c0902027e56af50660ae6cfe25ffa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 14:30:07 +0000 Subject: [PATCH 46/52] Bump webrick from 1.8.1 to 1.8.2 Bumps [webrick](https://github.com/ruby/webrick) from 1.8.1 to 1.8.2. - [Release notes](https://github.com/ruby/webrick/releases) - [Commits](https://github.com/ruby/webrick/compare/v1.8.1...v1.8.2) --- updated-dependencies: - dependency-name: webrick dependency-type: indirect ... Signed-off-by: dependabot[bot] --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 90ed9b5fa..f66521c49 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -520,7 +520,7 @@ GEM zeitwerk (~> 2.2) warden (1.2.9) rack (>= 2.0.9) - webrick (1.8.1) + webrick (1.8.2) websocket-driver (0.7.6) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) From dedb89dfcba95ee1c0091819fb6270d4cb29c03b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 14:30:24 +0000 Subject: [PATCH 47/52] Bump rollup from 3.29.4 to 3.29.5 Bumps [rollup](https://github.com/rollup/rollup) from 3.29.4 to 3.29.5. - [Release notes](https://github.com/rollup/rollup/releases) - [Changelog](https://github.com/rollup/rollup/blob/master/CHANGELOG.md) - [Commits](https://github.com/rollup/rollup/compare/v3.29.4...v3.29.5) --- updated-dependencies: - dependency-name: rollup dependency-type: indirect ... Signed-off-by: dependabot[bot] --- yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yarn.lock b/yarn.lock index 52f5500f1..2ed24b066 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8305,9 +8305,9 @@ rimraf@^3.0.0: glob "^7.1.3" rollup@^3.27.1: - version "3.29.4" - resolved "https://registry.npmjs.org/rollup/-/rollup-3.29.4.tgz" - integrity sha512-oWzmBZwvYrU0iJHtDmhsm662rC15FRXmcjCk1xD771dFDx5jJ02ufAQQTn0etB2emNk4J9EZg/yWKpsn9BWGRw== + version "3.29.5" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-3.29.5.tgz#8a2e477a758b520fb78daf04bca4c522c1da8a54" + integrity sha512-GVsDdsbJzzy4S/v3dqWPJ7EfvZJfCHiDqe80IyrF59LYuP+e6U1LJoUqeuqRbwAWoMNoXivMNeNAOf5E22VA1w== optionalDependencies: fsevents "~2.3.2" From a302e3e38207a2634299ff42dc32e63351d1797e Mon Sep 17 00:00:00 2001 From: bistline Date: Tue, 24 Sep 2024 16:07:18 -0400 Subject: [PATCH 48/52] Adding HomePageLink feature --- app/assets/images/cellarium.png | Bin 0 -> 1920 bytes app/controllers/site_controller.rb | 2 + app/javascript/styles/_brand.scss | 8 ++- app/javascript/styles/_global.scss | 10 +++ app/models/home_page_link.rb | 72 +++++++++++++++++++ app/views/layouts/application.html.erb | 10 +-- app/views/site/_home_page_link.html.erb | 11 +++ app/views/site/_main_banner_content.html.erb | 7 +- test/models/home_page_link_test.rb | 48 +++++++++++++ 9 files changed, 159 insertions(+), 9 deletions(-) create mode 100644 app/assets/images/cellarium.png create mode 100644 app/models/home_page_link.rb create mode 100644 app/views/site/_home_page_link.html.erb create mode 100644 test/models/home_page_link_test.rb diff --git a/app/assets/images/cellarium.png b/app/assets/images/cellarium.png new file mode 100644 index 0000000000000000000000000000000000000000..01c96b45e18955f2a2224c8db1a2f93acc06ecdd GIT binary patch literal 1920 zcmV-`2Y>j9P)Px+I!Q!9R9HuyR}EB?*BO4^8x%AY!Bc-gr!+V=svfAg?aZH91zHe2bQ9P0^t8@) z^SG%+T0|zbPS;b;sjJYT&~}}QScZbMh}v$I5FAH}cCD2nKkm@LVnY-j6T=>WIkQ!8v{cU@?Gs0H)kk=Utr83Qi3VwWseMuUQ(8 zpk8yR0-&FM<98eNY6IG)^`RNv9@S|7dDwX~`d(A5;qB?)zn}dv0etmP?%PDzQZcOs zRnu-l9WWIeo=indOf+I*Vm!9jtd7I6&h?}Dx*p!1J_6m4@TdUvn%pn@{00}MhkXqH zNt5y6-#_#?=+t+h?qV(K>pwwLd#hX@1Ayd*x2OB`HDa+V;^0gh_iL{X)T@i9HzIAv z4y63-30!Hof~>6HNn;v&FruxqzzN`-!w)JfvCKx8BXHD+JHe4ZyM9^T<+A$WLL&fl zXmSTigPYVJ1+_z4Q!Qna5|Ut3>+WrN2cyNZ4vy&4lPIx)A7G0AD@an8OH=pQ4O-=PC3E=wD!)46DQ_lo_37zVueQ+7zu9a?Sy?$w7N7L!q?jV14b)?R+3~nJ0v6`LrNB)X8}?-HL4151YIU^; z4GTqNWTgDPfBylGUZ8U_22^(WKg1b;Ue(}G6w)MshWG+bATWiyFDKBb7;seGFlMd= zv*fSuL9JHfxy{eRXf)!bjF*s{ycS1}6yQwxnKAV_P(_!L-zg!Er^xA&yC%+|;=l=| z4o&!vPXoch!HA2G!)B?|cy*9UW3WB_#!wm6i7eFH50%{hu2FEFumt z@g?pq;8jaG98i238D0lN0(!k3yD~Cm6edFS(G!0$G4LR%Bq9Ky*J(!KK$TBBiAjmb z%-rRZS5$NYMaN}(kDKUR>M_wB()keGa&)7#|8G8x?8?kQQc{vno``~|g^3WozwtPi zzS(d9-=Lz$HvnJZAJP=z-5<<0Cx%)!3Dsim{Hm-DtP1rY{kkIIB;OU zPbZ@4MfFdFsO0IQM$XtG4)Jl*(SILDNJt2J`+5-*9SyPTm$Ucc^5x5tf9iB8X3TIO zvqN-m?~fM*6+K&20#ZFC%YXnE2Z}q6%HvlY$D-v9AIV2gPY+($ktXe)C@#X>xpN_M ze&a?nZZtLF!iBr@!g!qvIT_=lo8RM3;hnhwlz#OtD$jm^y1F_XeEpBIuhVwEAWt}b zM+Y`-dR9Ju5518)5nm%@n^w z|I>2!BH(~~Oe%Q3^fU)HD#YL-4uuF8+TOyxeXn5EqpLh15)&Us=I%_79q}*{WVKjO zSojtU2E&M5_6KY5fT;t-og;X}u)tI1dA~ZvokVaEBmfr!E;bi>fe7#qK*;nE#Ky)V zHg*Y?#4dr=I*7KmHsrjPn5PSdOYhru#_RwBg>#V-0YF^F zem%O|-A}meSALJU)hoT6h%{vHJ%CPUAX+0g`UD^rO*!#ioAx=YXKk-AI~fIvqI8g$ z>6pB10B&3Vivj%waI5#AB6A+P6o&A}#_il^1ctOSg*nIwVwSNH zU?K)10RxP|B90Rp`z`O@oewL self.id).exists? + existing = HomePageLink.published + errors.add( + :published, + "link exists: '#{existing.name}' (#{existing.id}), please unpublish first with HomePageLink.unpublish" + ) + puts errors.full_messages.to_sentence + end + end +end diff --git a/app/views/layouts/application.html.erb b/app/views/layouts/application.html.erb index dffd0e816..1f462e1a2 100644 --- a/app/views/layouts/application.html.erb +++ b/app/views/layouts/application.html.erb @@ -192,10 +192,12 @@ © <%= Date.today.year %> The Broad Institute of MIT and Harvard