Merge pull request #2139 from broadinstitute/development

Release 1.80.0
broadinstitute · Sep 25, 2024 · 175b1d4 · 175b1d4
2 parents 699d208 + 0eaa7e3
commit 175b1d4
Show file tree

Hide file tree

Showing 42 changed files with 1,087 additions and 150 deletions.
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -362,7 +362,7 @@ GEM
     psych (5.1.2)
       stringio
     public_suffix (5.0.4)
-    puma (5.6.8)
+    puma (5.6.9)
       nio4r (~> 2.0)
     racc (1.8.0)
     rack (2.2.9)
@@ -520,7 +520,7 @@ GEM
       zeitwerk (~> 2.2)
     warden (1.2.9)
       rack (>= 2.0.9)
-    webrick (1.8.1)
+    webrick (1.8.2)
     websocket-driver (0.7.6)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.5)

diff --git a/app/assets/images/cellarium.png b/app/assets/images/cellarium.png
diff --git a/app/controllers/site_controller.rb b/app/controllers/site_controller.rb
@@ -61,6 +61,7 @@ def index
       @cell_count = 0
     end
 
+    @home_page_link = HomePageLink.published
   end
 
   def covid

diff --git a/app/javascript/components/explore/plot-data-cache.js b/app/javascript/components/explore/plot-data-cache.js
@@ -289,7 +289,8 @@ export function createCache() {
     Fields.cellsAndCoords.merge(cacheEntry, scatter)
     // only merge in annotation values if the annotation matches (or the default was requested, so
     // we can then assume the response matches)
-    if (!requestedAnnotation.name || scatter.annotParams.name === requestedAnnotation.name) {
+    // annotParams may be undefined in spatial UX if a cluster-based annotation does not exist for the plot
+    if (!requestedAnnotation.name || scatter.annotParams?.name === requestedAnnotation.name) {
       Fields.annotation.merge(cacheEntry, scatter)
     }
     if (scatter.genes.length && scatter.genes.join('') === requestedGenes.join('')) {

diff --git a/app/javascript/components/upload/FileUploadControl.jsx b/app/javascript/components/upload/FileUploadControl.jsx
@@ -28,6 +28,10 @@ export default function FileUploadControl({
   const [showUploadButton, setShowUploadButton] = useState(true)
   const [showBucketPath, setShowBucketPath] = useState(false)
   const ToggleUploadButton = () => {
+    // this is an inverted check since the user is clicking and the value is about to change
+    if (!showUploadButton) {
+      unsetRemoteLocation()
+    }
     setShowUploadButton(!showUploadButton)
     setShowBucketPath(!showBucketPath)
   }
@@ -38,7 +42,7 @@ export default function FileUploadControl({
     'Upload a file from your computer' :
     "Input a path to a file that is already in this study's bucket"
   const uploadToggle = <span
-    className='btn btn-default'
+    className='btn btn-default margin-left'
     onClick={ToggleUploadButton}
     data-toggle="tooltip"
     data-original-title={toggleTooltip}>{toggleText}
@@ -51,7 +55,7 @@ export default function FileUploadControl({
   </Popover>
   const googleBucketLink =
     <OverlayTrigger trigger={['hover', 'focus']} rootClose placement="top" overlay={bucketPopover} delayHide={1500}>
-      <a className='btn btn-default'
+      <a className='btn btn-default margin-left'
          href={`https://accounts.google.com/AccountChooser?continue=https://console.cloud.google.com/storage/browser/${bucketName}`}
          target='_blank'><FontAwesomeIcon icon={faExternalLinkSquareAlt} /> Browse bucket</a>
     </OverlayTrigger>
@@ -77,20 +81,53 @@ export default function FileUploadControl({
         name: newName,
         notes
       })
+    } else if (issues.errors.length > 0 && file.uploadSelection) {
+      // clear out a previous known good file, if present
+      updateFile(file._id, {
+        uploadSelection: null,
+        upload_file_name: '',
+        name: ''
+      })
     }
   }
 
+  // keep track of pending timeout for remote validation via bucket path
+  const [timeOutId, setTimeOutID] = useState(null)
+
+  // clear out remote_location and hasRemoteFile to allow switching back to file upload button
+  function unsetRemoteLocation() {
+    updateFile(file._id, {remote_location: '', hasRemoteFile: false})
+  }
+
   // perform CSFV on remote file when specifying a GS URL or bucket path
   // will sanitize GS URL before calling validateRemoteFile
-  async function handleBucketLocationEntry(e) {
-    const path = e.target.value
+  async function handleBucketLocationEntry(path) {
     const matcher = new RegExp(`(gs:\/\/)?${bucketName}\/?`)
     const trimmedPath = path.replace(matcher, '')
     if (!trimmedPath) {
+      unsetRemoteLocation()
+      setFileValidation({ validating: false, issues: {}, fileName: null })
       return false
     }
 
+    // don't continue unless a dot is present (otherwise, no valid file extension)
+    if (!trimmedPath.includes('.')) { return false }
+
     const fileType = file.file_type
+    const fileExtension = `.${trimmedPath.split('.').slice(-1)[0]}`
+    if (fileExtension.length > 1 && !inputAcceptExts.includes(fileExtension)) {
+      const invalidExt = {
+        errors: [
+          [
+            'error', 'filename:extension',
+            `Allowed extensions are ${allowedFileExts.join(', ')}`
+          ]
+        ]
+      }
+      setFileValidation({ validating: false, issues: invalidExt, fileName: trimmedPath })
+      return false
+    }
+
     const fileOptions = fileType === 'Metadata' ? { use_metadata_convention: file?.use_metadata_convention } : {}
 
     setFileValidation({ validating: true, issues: {}, fileName: trimmedPath })
@@ -175,22 +212,30 @@ export default function FileUploadControl({
         />
       </button>
     }
+
     {!isFileOnServer && (showBucketPath || file.hasRemoteFile ) &&
       // we can't use TextFormField since we need a custom onBlur event
       // onBlur is the React equivalent of onfocusout, which will fire after the user is done updating the input
       <input className="form-control"
              type="text"
              size={60}
              id={`remote_location-input-${file._id}`}
+             data-testid="remote-location-input"
              placeholder='GS URL or path to file in GCP bucket'
-             onBlur={handleBucketLocationEntry}/>
+             onChange={ (e) => {
+               const newBucketPath = e.target.value
+               if (timeOutId) { clearTimeout(timeOutId) }
+               const newTimeout = setTimeout(handleBucketLocationEntry, 300, newBucketPath)
+               setTimeOutID(newTimeout)
+             }}/>
     }
-    &nbsp;&nbsp;
     { !isFileOnServer && (showBucketPath || file.hasRemoteFile ) && googleBucketLink }
 
-    &nbsp;&nbsp;
     { !isFileOnServer && uploadToggle }
 
+    { showBucketPath && fileValidation.validating &&
+      <span className='margin-left' id='remote-location-validation'>Validating... <LoadingSpinner testId="file-validation-spinner"/></span>
+    }
     <ValidationMessage
       studyAccession={study.accession}
       issues={fileValidation.issues}

diff --git a/app/javascript/components/upload/UploadExperienceSplitter.jsx b/app/javascript/components/upload/UploadExperienceSplitter.jsx
@@ -46,7 +46,7 @@ function RawUploadExperienceSplitter({
                 onClick={() => {
                   setIsAnnDataExperience(true)
                   setOverrideExperienceMode(true)
-                }}> AnnData <sup>BETA</sup>
+                }}> AnnData
               </a>
               <div className='col'>
                 Upload one AnnData (.h5ad) file

diff --git a/app/javascript/components/upload/UploadWizard.jsx b/app/javascript/components/upload/UploadWizard.jsx
@@ -275,12 +275,17 @@ export function RawUploadWizard({ studyAccession, name }) {
     setTimeout(() => deleteFileFromServer(requestCanceller.fileId), 500)
   }
 
+  /** helper for determining when to use saveAnnDataFileHelper (sets ids/values correctly for AnnData UX **/
+  function useAnnDataFileHelper(file) {
+    return isAnnDataExperience && (file?.file_type === 'AnnData' || Object.keys(file).includes("data_type"))
+  }
+
   /** save the given file and perform an upload if a selected file is present */
   async function saveFile(file) {
     let fileToSave = file
     let studyFileId = file._id
 
-    if (isAnnDataExperience && fileToSave?.file_type === 'AnnData') {
+    if (useAnnDataFileHelper(fileToSave)) {
       fileToSave = saveAnnDataFileHelper(file, fileToSave)
       studyFileId = fileToSave._id
     }

diff --git a/app/javascript/components/upload/WizardNavPanel.jsx b/app/javascript/components/upload/WizardNavPanel.jsx
@@ -222,7 +222,7 @@ function MainStepsDisplay(formState, serverState, currentStep, setCurrentStep, m
           </span>
           <span>
             <a className="action link" role="link">
-            AnnData <sup>BETA</sup> <AnnDataHelpMessage/> <FontAwesomeIcon icon={expansionIcon}/>
+            AnnData <AnnDataHelpMessage/> <FontAwesomeIcon icon={expansionIcon}/>
             </a>
           </span>
         </button>

diff --git a/app/javascript/components/visualization/ScatterPlot.jsx b/app/javascript/components/visualization/ScatterPlot.jsx
@@ -598,17 +598,15 @@ function RawScatterPlot({
   return (
     <div className="plot">
       { ErrorComponent }
+      <PlotTitle
+        titleTexts={titleTexts}
+        isCorrelatedScatter={isCorrelatedScatter}
+        correlation={bulkCorrelation}/>
       { hasMissingAnnot &&
         <div className="alert-warning text-center error-boundary">
           "{cluster}" does not have the requested annotation "{loadedAnnotation}"
         </div>
       }
-      { !hasMissingAnnot &&
-        <PlotTitle
-          titleTexts={titleTexts}
-          isCorrelatedScatter={isCorrelatedScatter}
-          correlation={bulkCorrelation}/>
-      }
       <div
         className="scatter-graph"
         id={graphElementId}

diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js
@@ -0,0 +1,176 @@
+/**
+ * @fileoverview Validates ontology labels and IDs in files added by users
+ *
+ * SCP requires cells to have certain metadata annotations, e.g.
+ * species, organ, disease, and library preparation protocol.  This module
+ * loads ontology reference data, and uses it to check required cell metadata
+ * in the user's uploaded or transferred file.
+ *
+ * More context, demo:
+ * https://github.com/broadinstitute/single_cell_portal_core/pull/2129
+ */
+
+import { decompressSync, strFromU8 } from 'fflate'
+
+import {
+  metadataSchema, REQUIRED_CONVENTION_COLUMNS
+} from './shared-validation'
+
+// TODO: Replace "development" with "main" after next ingest release
+const ONTOLOGY_BASE_URL =
+  'https://raw.githubusercontent.com/broadinstitute/scp-ingest-pipeline/' +
+  'development/ingest/validation/ontologies/'
+
+/** Quickly retrieve current version cache key for ontologies */
+async function fetchOntologyCacheVersion() {
+  if (window.SCP.ontologiesVersion) { return window.SCP.ontologiesVersion }
+  const response = await fetch(`${ONTOLOGY_BASE_URL}version.txt`)
+  const text = await response.text()
+  const version = text.trim().split('#')[0]
+  window.SCP.ontologiesVersion = version
+  return version
+}
+
+/** Get frontend SW cache object for minified ontologies */
+async function getServiceWorkerCache() {
+  const version = await fetchOntologyCacheVersion()
+  const currentOntologies = `ontologies-${version}`
+
+  // Delete other versions of ontologies cache; there should be 1 per dodmain
+  const cacheNames = await caches.keys()
+  cacheNames.forEach(name => {
+    if (name.startsWith('ontologies-') && name !== currentOntologies) {
+      caches.delete(name)
+    }
+  })
+
+  const cache = await caches.open(currentOntologies)
+
+  return cache
+}
+
+/** Fetch .gz file, decompress it, return plaintext */
+export async function fetchGzipped(url) {
+  const response = await fetch(url)
+  const blob = await response.blob()
+  const uint8Array = new Uint8Array(await blob.arrayBuffer())
+  const plaintext = strFromU8(decompressSync(uint8Array))
+  return plaintext
+}
+
+/** Fetch from service worker cache if available, from remote otherwise */
+export async function cacheFetch(url) {
+  const cache = await getServiceWorkerCache()
+
+  const decompressedUrl = url.replace('.gz', '')
+  const response = await cache.match(decompressedUrl)
+  if (typeof response === 'undefined') {
+    // If cache miss, then fetch, decompress, and put response in cache
+    const data = await fetchGzipped(url)
+    const contentLength = data.length
+    const decompressedResponse = new Response(
+      data,
+      {
+        headers: new Headers({
+          'Content-Length': contentLength,
+          'Content-Type': 'text/tab-separated-values'
+        })
+      }
+    )
+    await cache.put(decompressedUrl, decompressedResponse)
+    return await cache.match(decompressedUrl)
+  }
+  return await cache.match(decompressedUrl)
+}
+
+/**
+ * Fetch minified ontologies, transform into object of object of arrays, e.g.:
+ *
+ * {
+ *   'mondo': {
+ *     'MONDO_0008315': ['prostate cancer', 'prostate neoplasm', 'prostatic neoplasm'],
+ *     'MONDO_0018076': ['tuberculosis', 'TB'],
+ *     ...
+ *   },
+ *   'ncbitaxon': {
+ *     'NCBITaxon_9606': ['Homo sapiens', 'human'],
+ *     'NCBITaxon_10090': ['Mus musculus', 'house mouse', 'mouse'],
+ *     ...
+ *   },
+ *   ...
+ * }
+ */
+export async function fetchOntologies() {
+  if (window.SCP.ontologies) {
+    // Reuse fetched, processed ontologies from this page load
+    return window.SCP.ontologies
+  }
+
+  const ontologies = {}
+
+  const ontologyNames = getOntologyShortNames()
+
+  for (let i = 0; i < ontologyNames.length; i++) {
+    const ontologyName = ontologyNames[i]
+    const ontologyUrl = `${ONTOLOGY_BASE_URL + ontologyName}.min.tsv.gz`
+    const response = await cacheFetch(ontologyUrl)
+
+    const tsv = await response.text()
+    const lines = tsv.split('\n')
+
+    ontologies[ontologyName] = {}
+
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i]
+      const [ontologyId, label, rawSynonyms] = line.split('\t')
+      let names = [label]
+      if (rawSynonyms) {
+        const synonyms = rawSynonyms.split('||')
+        names = names.concat(synonyms)
+      }
+      ontologies[ontologyName][ontologyId] = names
+    }
+  }
+
+  window.SCP.ontologies = ontologies
+  return ontologies
+}
+
+/** Get lowercase shortnames for all required ontologies */
+function getOntologyShortNames() {
+  let requiredOntologies = []
+
+  // Validate IDs for species, organ, disease, and library preparation protocol
+  for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) {
+    const column = REQUIRED_CONVENTION_COLUMNS[i]
+    if (!column.endsWith('__ontology_label')) {continue}
+    const key = column.split('__ontology_label')[0]
+    const ontologies = getAcceptedOntologies(key, metadataSchema)
+    requiredOntologies = requiredOntologies.concat(ontologies)
+  }
+
+  requiredOntologies = Array.from(
+    new Set(requiredOntologies.map(o => o.toLowerCase()))
+  )
+
+  return requiredOntologies
+}
+
+/**
+ * Get list of ontology names accepted for key from metadata schema
+ *
+ * E.g. "disease" -> ["MONDO", "PATO"]
+ */
+export function getAcceptedOntologies(key, metadataSchema) {
+  // E.g. "ontology_browser_url": "https://www.ebi.ac.uk/ols/ontologies/mondo,https://www.ebi.ac.uk/ols/ontologies/pato"
+  const olsUrls = metadataSchema.properties[key].ontology
+
+  const acceptedOntologies =
+    olsUrls?.split(',').map(url => url.split('/').slice(-1)[0].toUpperCase())
+
+  if (acceptedOntologies.includes('NCBITAXON')) {
+    acceptedOntologies.push('NCBITaxon')
+  }
+
+  return acceptedOntologies
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -61,6 +61,7 @@ def index @@
           @cell_count = 0
         end
+        @home_page_link = HomePageLink.published
       end
       def covid
@@ Expand Down @@