Skip to content

Commit

Permalink
Merge pull request #2153 from broadinstitute/development
Browse files Browse the repository at this point in the history
Release 1.82.0
  • Loading branch information
bistline authored Oct 9, 2024
2 parents 5feb742 + 087221a commit af2e02d
Show file tree
Hide file tree
Showing 24 changed files with 316 additions and 68 deletions.
2 changes: 1 addition & 1 deletion app/controllers/api/v1/bulk_download_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def self.find_matching_directories(directory_name, file_type, accession)
when 'nodirs'
[]
when 'all'
study.directory_listings.all
study&.directory_listings&.are_synced || []
else
DirectoryListing.where(name: sanitized_dirname, study_id: study.id, sync_status: true, file_type: file_type)
end
Expand Down
33 changes: 20 additions & 13 deletions app/javascript/components/explore/ExploreDisplayPanelManager.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ export default function ExploreDisplayPanelManager({
showDifferentialExpressionPanel, setIsCellSelecting, currentPointsSelected, isCellSelecting, deGenes,
setDeGenes, setShowDeGroupPicker,
cellFaceting, cellFilteringSelection, cellFilterCounts, clusterCanFilter, filterErrorText,
updateFilteredCells, panelToShow, toggleViewOptions
updateFilteredCells, panelToShow, toggleViewOptions, allowCellFiltering
}) {
const [, setRenderForcer] = useState({})
const [dataCache] = useState(createCache())
Expand Down Expand Up @@ -244,14 +244,7 @@ export default function ExploreDisplayPanelManager({

const shownAnnotation = getShownAnnotation(exploreParamsWithDefaults.annotation, annotationList)

const isSubsampled = exploreParamsWithDefaults.subsample !== 'all'
let cellFilteringTooltipAttrs = {}
if (isSubsampled) {
cellFilteringTooltipAttrs = {
'data-toggle': 'tooltip',
'data-original-title': 'Clicking will remove subsampling; plots might be noticeably slower.'
}
}

/** Toggle cell filtering panel */
function toggleCellFilterPanel() {
Expand Down Expand Up @@ -359,7 +352,7 @@ export default function ExploreDisplayPanelManager({
</button>
</>
}
{showCellFiltering && panelToShow === 'cell-filtering' &&
{showCellFiltering && panelToShow === 'cell-filtering' && allowCellFiltering &&
<CellFilteringPanelHeader
togglePanel={togglePanel}
setShowDifferentialExpressionPanel={setShowDifferentialExpressionPanel}
Expand All @@ -370,8 +363,7 @@ export default function ExploreDisplayPanelManager({
setDeGroupB={setDeGroupB}
isAuthorDe={isAuthorDe}
updateFilteredCells={updateFilteredCells}
deGenes={deGenes}
/>
deGenes={deGenes}/>
}
{panelToShow === 'differential-expression' &&
<DifferentialExpressionPanelHeader
Expand Down Expand Up @@ -447,7 +439,7 @@ export default function ExploreDisplayPanelManager({
</div>
</>
}
{ showCellFiltering && clusterCanFilter &&
{ showCellFiltering && clusterCanFilter && allowCellFiltering &&
<>
<div className="row">
<div className={`col-xs-12 cell-filtering-button ${shownTab === 'scatter' && !studyHasDe ? 'create-annotation-cell-filtering' : ''}`}>
Expand All @@ -462,7 +454,7 @@ export default function ExploreDisplayPanelManager({
</div>
</>
}
{ showCellFiltering && !clusterCanFilter &&
{ showCellFiltering && !clusterCanFilter && allowCellFiltering &&
<>
<div className="row">
<div className={`col-xs-12 cell-filtering-button ${shownTab === 'scatter' && !studyHasDe ? 'create-annotation-cell-filtering' : ''}`}>
Expand All @@ -477,6 +469,21 @@ export default function ExploreDisplayPanelManager({
</div>
</>
}
{ showCellFiltering && !allowCellFiltering &&
<>
<div className="row">
<div className={`col-xs-12 cell-filtering-button ${shownTab === 'scatter' && !studyHasDe ? 'create-annotation-cell-filtering' : ''}`}>
<button
disabled="disabled"
className={`btn btn-primary`}
data-testid="cell-filtering-button-disabled"
data-toggle="tooltip"
data-original-title={`Cell filtering cannot be applied in this context (only scatter or distribution tabs) - visible plots are unfiltered`}
>Filtering unavailable</button>
</div>
</div>
</>
}
<SubsampleSelector
annotationList={annotationList}
cluster={exploreParamsWithDefaults.cluster}
Expand Down
13 changes: 13 additions & 0 deletions app/javascript/components/explore/ExploreDisplayTabs.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,18 @@ export default function ExploreDisplayTabs({

const isCorrelatedScatter = enabledTabs.includes('correlatedScatter')

// decide whether or not to allow the cell filtering UX
// must be in the scatter or distribution tab with less than 2 genes, or using consensus metric
const allowCellFiltering = (exploreParams?.genes?.length <= 1 || exploreParams?.consensus !== null) &&
['scatter', 'distribution'].includes(shownTab)

// hide cell filtering panel in non-applicable settings, but remember state
// will reload if user returns to a scenario where cell filtering can be re-applied
if (panelToShow === 'cell-filtering' && !allowCellFiltering) {
togglePanel('options')
} else if (allowCellFiltering && filteredCells && panelToShow === 'options') {
togglePanel('cell-filtering')
}

// If clustering or annotation changes, then update facets shown for cell filtering
useEffect(() => {
Expand Down Expand Up @@ -696,6 +708,7 @@ export default function ExploreDisplayTabs({
updateFilteredCells={updateFilteredCells}
panelToShow={panelToShow}
toggleViewOptions={toggleViewOptions}
allowCellFiltering={allowCellFiltering}
/>
</div>
</div>
Expand Down
5 changes: 4 additions & 1 deletion app/javascript/components/explore/PlotTabs.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ export default function PlotTabs({
const tooltip = disabledTooltips[tabKey]
const numGenes = tooltip.numToSearch
const geneText = `gene${tooltip.isMulti ? 's' : ''}`
const text = `To show this plot, search ${numGenes} ${geneText} using the box at left`
let text = `To show this plot, search ${numGenes} ${geneText} using the box at left`
if (['scatter', 'distribution'].includes(tabKey)) {
text += " or use the 'Collapse genes by' menu for multiple genes"
}
return (
<li key={tabKey}
role="presentation"
Expand Down
45 changes: 45 additions & 0 deletions app/javascript/lib/validation/shared-validation.js
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,51 @@ export function validateUnique(headers) {
return issues
}

/**
* Check headers for disallowed characters in metadata annotations
*
* This rule exists because BigQuery does not except e.g. periods in its
* column names, without special quoting. We use BigQuery to enable cross-study
* search on annotations like "species", "disease", etc.
*
* Cluster-specific annotations aren't searchable in cross-study search, so
* we skip this rule for cluster files (via `false` for `hasMetadataAnnotations`).
*
* More context: https://github.com/broadinstitute/single_cell_portal_core/pull/2143
*/
export function validateAlphanumericAndUnderscores(headers, hasMetadataAnnotations=true) {
// eslint-disable-next-line max-len
// Mirrors https://github.com/broadinstitute/scp-ingest-pipeline/blob/7c3ea039683c3df90d6e32f23bf5e6813d8fbaba/ingest/validation/validate_metadata.py#L1223
const issues = []

if (!hasMetadataAnnotations) {
// Skip this validation for cluster files
return issues
}


const uniques = new Set(headers)
const prohibitedChars = new RegExp(/[^A-Za-z0-9_]/)

const problemHeaders = []

// Do headers have prohibited characters?
uniques.forEach(header => {
const hasProhibited = (header.search(prohibitedChars) !== -1)
if (hasProhibited) {
problemHeaders.push(header)
}
})

if (problemHeaders.length > 0) {
const problems = `"${problemHeaders.join('", "')}"`
const msg = `Update these headers to use only letters, numbers, and underscores: ${problems}`
issues.push(['error', 'format:cap:only-alphanumeric-underscore', msg])
}

return issues
}

/** Verifies metadata file has all required columns */
export function validateRequiredMetadataColumns(parsedHeaders, isAnnData=false) {
const issues = []
Expand Down
4 changes: 3 additions & 1 deletion app/javascript/lib/validation/validate-anndata.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import {openH5File} from 'hdf5-indexed-reader'
import { openH5File } from 'hdf5-indexed-reader'

import { getOAuthToken } from '~/lib/scp-api'
import {
validateUnique, validateRequiredMetadataColumns,
validateAlphanumericAndUnderscores,
metadataSchema, REQUIRED_CONVENTION_COLUMNS
} from './shared-validation'
import { getAcceptedOntologies, fetchOntologies } from './ontology-validation'
Expand Down Expand Up @@ -285,6 +286,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) {

issues = issues.concat(
validateUnique(headers),
validateAlphanumericAndUnderscores(headers),
requiredMetadataIssues,
ontologyIdFormatIssues,
ontologyLabelAndIdIssues
Expand Down
21 changes: 11 additions & 10 deletions app/javascript/lib/validation/validate-file-content.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import {
getParsedHeaderLines, parseLine, ParseException,
validateUniqueCellNamesWithinFile, validateMetadataLabelMatches,
validateGroupColumnCounts, timeOutCSFV, validateUnique,
validateRequiredMetadataColumns
validateRequiredMetadataColumns, validateAlphanumericAndUnderscores
} from './shared-validation'
import { parseDifferentialExpressionFile } from './validate-differential-expression'
import { parseAnnDataFile } from './validate-anndata'
Expand Down Expand Up @@ -154,15 +154,16 @@ function validateEqualCount(headers, annotTypes) {
* Cap lines are like meta-information lines in other file formats
* (e.g. VCF), but do not begin with pound signs (#).
*/
function validateCapFormat([headers, annotTypes]) {
function validateCapFormat([headers, annotTypes], isMetadataFile=true) {
let issues = []
if (!headers || !annotTypes) {
return [['error', 'format:cap:no-cap-rows', 'File does not have 2 non-empty header rows']]
}

// Check format rules that apply to both metadata and cluster files
// Check format rules that apply to both metadata and (except one rule) cluster files
issues = issues.concat(
validateUnique(headers),
validateAlphanumericAndUnderscores(headers, isMetadataFile),
validateNameKeyword(headers),
validateTypeKeyword(annotTypes),
validateGroupOrNumeric(annotTypes),
Expand Down Expand Up @@ -240,7 +241,7 @@ export async function parseMetadataFile(chunker, mimeType, fileOptions) {
/** parse a cluster file, and return an array of issues, along with file parsing info */
export async function parseClusterFile(chunker, mimeType) {
const { headers, delimiter } = await getParsedHeaderLines(chunker, mimeType)
let issues = validateCapFormat(headers)
let issues = validateCapFormat(headers, false)
issues = issues.concat(validateClusterCoordinates(headers))
// add other header validations here

Expand All @@ -264,7 +265,7 @@ export async function parseClusterFile(chunker, mimeType) {
* Example: prettyAndOr(['A', 'B', 'C'], 'or') > '"A", "B", or "C"' */
function prettyAndOr(stringArray, operator) {
let phrase
const quoted = stringArray.map(ext => `"${ext}"`)
const quoted = stringArray.map(ext => `".${ext}"`)

if (quoted.length === 1) {
phrase = quoted[0]
Expand All @@ -274,7 +275,7 @@ function prettyAndOr(stringArray, operator) {
} else if (quoted.length > 2) {
// e.g. "A", "B", or "C"
const last = quoted.slice(-1)[0]
phrase = `${quoted.slice(-1).join(', ') } ${operator} ${last}`
phrase = `${quoted.slice(0, -1).join(', ')}, ${operator} ${last}`
}

return phrase
Expand Down Expand Up @@ -315,10 +316,10 @@ export async function validateGzipEncoding(file, fileType) {
}
} else {
if (firstByte === GZIP_MAGIC_NUMBER) {
const prettyExts = prettyAndOr(EXTENSIONS_MUST_GZIP)
const problem = `Only files with extensions ${prettyExts}) may be gzipped`
const solution = 'please decompress file and retry'
throw new ParseException('encoding:missing-gz-extension', `${problem}; ${solution}`)
const prettyExts = prettyAndOr(EXTENSIONS_MUST_GZIP, 'or')
const problem = `Only files with extensions ${prettyExts} may be gzipped`
const solution = 'Please add a ".gz" extension to the file name, or decompress the file, and retry.'
throw new ParseException('encoding:missing-gz-extension', `${problem}. ${solution}`)
} else {
isGzipped = false
}
Expand Down
2 changes: 1 addition & 1 deletion app/models/ann_data_file_info.rb
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def sanitize_fragments!

# create the default cluster data_fragment entries
def set_default_cluster_fragments!
return false if fragments_by_type(:cluster).any?
return false if fragments_by_type(:cluster).any? || reference_file

default_obsm_keys = AnnDataIngestParameters::PARAM_DEFAULTS[:obsm_keys]
default_obsm_keys.each do |obsm_key_name|
Expand Down
2 changes: 1 addition & 1 deletion app/models/differential_expression_result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def remove_output_files
# in production, DeleteQueueJob will handle all necessary cleanup
return true if study.nil? || study.detached || study.queued_for_deletion

identifier = "#{study.accession}:#{annotation_identifier}"
identifier = "#{study.accession}:#{annotation_name}--group--#{annotation_scope}"
bucket_files.each do |filepath|
remote = ApplicationController.firecloud_client.get_workspace_file(study.bucket_id, filepath)
if remote.present?
Expand Down
51 changes: 40 additions & 11 deletions app/models/ingest_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class IngestJob
# steps that need to be accounted for
SPECIAL_ACTIONS = %i[differential_expression render_expression_arrays image_pipeline].freeze

# main processes that extract or ingest data for core visualizations (scatter, violin, dot, etc)
CORE_ACTIONS = %w[ingest_anndata ingest_expression ingest_cell_metadata ingest_cluster]

# jobs that need parameters objects in order to launch correctly
PARAMS_OBJ_REQUIRED = %i[
differential_expression render_expression_arrays image_pipeline ingest_anndata
Expand Down Expand Up @@ -347,7 +350,7 @@ def poll_for_completion(run_at: 1.minute.from_now)
if done? && !failed?
Rails.logger.info "IngestJob poller: #{pipeline_name} is done!"
Rails.logger.info "IngestJob poller: #{pipeline_name} status: #{current_status}"
unless special_action? || action == :ingest_anndata
unless special_action? || (action == :ingest_anndata && study_file.is_viz_anndata?)
study_file.update(parse_status: 'parsed')
study_file.bundled_files.each { |sf| sf.update(parse_status: 'parsed') }
end
Expand Down Expand Up @@ -981,7 +984,7 @@ def log_to_mixpanel
mixpanel_log_props = get_job_analytics
# log job properties to Mixpanel
MetricsService.log(mixpanel_event_name, mixpanel_log_props, user)
report_anndata_summary if study_file.is_viz_anndata? && action != :differential_expression
report_anndata_summary if study_file.is_viz_anndata? && action != :ingest_anndata
end

# set a mixpanel event name based on action
Expand All @@ -994,7 +997,9 @@ def anndata_summary_props
pipelines = ApplicationController.life_sciences_api_client.list_pipelines
previous_jobs = pipelines.operations.select do |op|
pipeline_args = op.metadata.dig('pipeline', 'actions').first['commands']
pipeline_args.detect { |c| c == study_file.id.to_s } && !pipeline_args.include?('--differential-expression')
op.done? &&
pipeline_args.detect { |c| c == study_file.id.to_s } &&
(pipeline_args & CORE_ACTIONS).any?
end
# get total runtime from initial extract to final parse
initial_extract = previous_jobs.last
Expand Down Expand Up @@ -1023,6 +1028,7 @@ def anndata_summary_props
op.metadata.dig('pipeline', 'actions').first['commands'].detect { |c| c == '--extract' } ||
op.error.present?
end.count
num_files_extracted += 1 if extracted_raw_counts?(initial_extract.metadata)
# event properties for Mixpanel summary event
{
perfTime: job_perftime,
Expand All @@ -1039,20 +1045,43 @@ def anndata_summary_props
}
end

# determine if an ingest_anndata job extracted raw counts data
# reads from the --extract parameter to avoid counting filenames that include 'raw_counts'
def extracted_raw_counts?(pipeline_metadata)
commands = pipeline_metadata.dig('pipeline', 'actions').first['commands']
extract_idx = commands.index('--extract')
return false if extract_idx.nil?

extract_params = commands[extract_idx + 1]
extract_params.include?('raw_counts')
end

# report a summary of all AnnData extraction for this file to Mixpanel, if this is the last job
def report_anndata_summary
study_file.reload
return false if study_file.has_anndata_summary? # don't bother checking if summary is already sent

file_identifier = "#{study_file.upload_file_name} (#{study_file.id})"
Rails.logger.info "Checking AnnData summary for #{file_identifier} after #{action}"
remaining_jobs = DelayedJobAccessor.find_jobs_by_handler_type(IngestJob, study_file)
fragment = params_object.associated_file # fragment file from this job
# discard the job that corresponds with this ingest process
still_processing = remaining_jobs.reject do |job|
# find running jobs associated with this file that are part of primary extraction (expression, metadata, clustering)
still_processing = remaining_jobs.select do |job|
ingest_job = DelayedJobAccessor.dump_job_handler(job).object
ingest_job.params_object.is_a?(AnnDataIngestParameters) && ingest_job.params_object.associated_file == fragment
end.any?
ingest_job.params_object.is_a?(AnnDataIngestParameters) &&
!ingest_job.done? &&
%i[ingest_cluster ingest_cell_metadata ingest_expression].include?(ingest_job.action)
end

# only continue if there are no more jobs and a summary has not been sent yet
study_file.reload
return false if still_processing || study_file.has_anndata_summary?
if still_processing.any?
files = still_processing.map do |job|
ingest_job = DelayedJobAccessor.dump_job_handler(job).object
"#{ingest_job.action} - #{ingest_job.params_object.associated_file}"
end
Rails.logger.info "Found #{still_processing.count} jobs still processing for #{file_identifier} #{files.join(', ')}"
return false
end

Rails.logger.info "Sending AnnData summary for #{file_identifier} after #{action}"
study_file.set_anndata_summary! # prevent race condition leading to duplicate summaries
MetricsService.log('ingestSummary', anndata_summary_props, user)
end
Expand Down
Loading

0 comments on commit af2e02d

Please sign in to comment.