Merge pull request #2147 from broadinstitute/development

Release 1.81.0
broadinstitute · Sep 30, 2024 · 5feb742 · 5feb742
2 parents 175b1d4 + d3059c8
commit 5feb742
Show file tree

Hide file tree

Showing 14 changed files with 132 additions and 61 deletions.
diff --git a/app/controllers/api/v1/study_files_controller.rb b/app/controllers/api/v1/study_files_controller.rb
@@ -749,7 +749,8 @@ def study_file_params
           heatmap_file_info_attributes: [:id, :_destroy, :custom_scaling, :color_min, :color_max, :legend_label],
           cluster_form_info_attributes: [
             :_id, :name, :obsm_key_name, :description, :x_axis_label, :y_axis_label, :x_axis_min, :x_axis_max,
-            :y_axis_min, :y_axis_max, :z_axis_min, :z_axis_max, spatial_cluster_associations: []
+            :y_axis_min, :y_axis_max, :z_axis_min, :z_axis_max, :external_link_url, :external_link_title,
+            :external_link_description, spatial_cluster_associations: []
           ],
           metadata_form_info_attributes: [:_id, :use_metadata_convention, :description],
           extra_expression_form_info_attributes: [:_id, :taxon_id, :description, :y_axis_label],

diff --git a/app/controllers/api/v1/visualization/clusters_controller.rb b/app/controllers/api/v1/visualization/clusters_controller.rb
@@ -256,11 +256,7 @@ def self.get_cluster_viz_data(study, cluster, url_params)
             customColors: custom_annotation_colors,
             clusterFileId: cluster.study_file_id.to_s,
             isSplitLabelArrays: is_split_label_arrays,
-            externalLink: {
-              url: cluster.study_file[:external_link_url],
-              title: cluster.study_file[:external_link_title],
-              description: cluster.study_file[:external_link_description]
-            }
+            externalLink: get_cluster_external_link(cluster, cluster.study_file)
           }
         end
 
@@ -276,6 +272,19 @@ def self.get_selected_subsample_threshold(param, cluster)
           end
           subsample
         end
+
+        def self.get_cluster_external_link(cluster, study_file)
+          if study_file.is_viz_anndata?
+            data = study_file.ann_data_file_info.find_fragment(data_type: :cluster, name: cluster.name)
+          else
+            data = study_file.attributes
+          end
+          {
+            url: data[:external_link_url],
+            title: data[:external_link_title],
+            description: data[:external_link_description]
+          }
+        end
       end
     end
   end

diff --git a/app/javascript/lib/validation/validate-file-content.js b/app/javascript/lib/validation/validate-file-content.js
@@ -297,6 +297,13 @@ export async function validateGzipEncoding(file, fileType) {
   const firstByte = await readFileBytes(file, 0, 1)
 
   const extension = fileName.split('.').slice(-1)[0]
+
+  if (extension.toLowerCase() === 'rds') {
+    // The R community often omits .gz extensions from gzip-compressed RDS files
+    // More context: https://github.com/broadinstitute/single_cell_portal_core/pull/2145
+    return false
+  }
+
   const extensionMustGzip = EXTENSIONS_MUST_GZIP.includes(extension)
 
   if (extensionMustGzip) {

diff --git a/app/javascript/lib/validation/validate-file.js b/app/javascript/lib/validation/validate-file.js
@@ -155,6 +155,12 @@ async function validateRemoteFile(
   const response = await fetchBucketFile(bucketName, fileName, maxBytes)
   let fileInfo; let issues; let perfTime; let readRemoteTime
   if (response.ok) {
+    const flags = getFeatureFlagsWithDefaults()
+    if (flags && flags.clientside_validation === false) {
+      const issues = formatIssues([])
+      return issues
+    }
+
     const content = await response.text()
     readRemoteTime = Math.round(performance.now() - requestStart)
 

diff --git a/app/models/ann_data_file_info.rb b/app/models/ann_data_file_info.rb
@@ -15,9 +15,10 @@ class AnnDataFileInfo
   DATA_FRAGMENT_PARAMS = {
     cluster: %i[
       _id data_type name description obsm_key_name x_axis_label y_axis_label x_axis_min x_axis_max y_axis_min
-      y_axis_max z_axis_min z_axis_max taxon_id parse_status spatial_cluster_associations
+      y_axis_max z_axis_min z_axis_max external_link_url external_link_title external_link_description
+      parse_status spatial_cluster_associations
     ],
-    expression: %i[_id data_type taxon_id description expression_file_info]
+    expression: %i[_id data_type taxon_id description expression_file_info y_axis_label]
   }.freeze
 
   # required keys for data_fragments, by type
@@ -73,21 +74,16 @@ def merge_form_data(form_data)
       fragment_form = merged_data[form_segment_name]
       next if fragment_form.blank? || fragment_form.empty?
 
+      allowed_params = DATA_FRAGMENT_PARAMS[key]&.reject {|k, _| k == :data_type }
       case key
       when :metadata
         merged_data[:use_metadata_convention] = fragment_form[:use_metadata_convention]
       when :cluster
-        fragments << extract_form_fragment(
-          fragment_form, key,
-          :_id, :name, :description, :obsm_key_name, :x_axis_label, :y_axis_label, :x_axis_min, :x_axis_max,
-          :y_axis_min, :y_axis_max, :z_axis_min, :z_axis_max, :spatial_cluster_associations
-        )
+        fragments << extract_form_fragment(fragment_form, key, *allowed_params)
       when :expression
         merged_data[:taxon_id] = fragment_form[:taxon_id]
         merged_exp_fragment = fragment_form.merge(expression_file_info: merged_data[:expression_file_info_attributes])
-        fragments << extract_form_fragment(
-          merged_exp_fragment, key, :_id, :description, :y_axis_label, :taxon_id, :expression_file_info
-        )
+        fragments << extract_form_fragment(merged_exp_fragment, key, *allowed_params)
       end
       # remove from form data once processed to allow normal save of nested form data
       merged_data.delete(form_segment_name)

diff --git a/test/api/visualization/clusters_controller_test.rb b/test/api/visualization/clusters_controller_test.rb
@@ -20,7 +20,10 @@ class ClustersControllerTest < ActionDispatch::IntegrationTest
                                                      cells: ['A', 'B', 'C']
                                                   },
                                                   annotation_input: [{name: 'foo', type: 'group', values: ['bar', 'bar', 'baz']},
-                                                                     {name: 'intensity', type: 'numeric', values: [1, 2, 5]}])
+                                                                     {name: 'intensity', type: 'numeric', values: [1, 2, 5]}],
+                                                  external_link_url: 'https://example.com',
+                                                  external_link_title: 'Example link',
+                                                  external_link_description: 'This is a link')
 
     @basic_study_metadata_file = FactoryBot.create(:metadata_file,
                                                    name: 'metadata.txt',
@@ -101,7 +104,7 @@ class ClustersControllerTest < ActionDispatch::IntegrationTest
       "subsample"=>"all",
       "isSplitLabelArrays"=>false,
       "consensus"=>nil,
-      "externalLink"=>{"url"=>nil, "title"=>nil, "description"=>nil}
+      "externalLink"=>{"url"=>"https://example.com", "title"=>"Example link", "description"=>"This is a link"}
     }, json)
   end
 
@@ -197,4 +200,37 @@ class ClustersControllerTest < ActionDispatch::IntegrationTest
     ))
     assert_response :bad_request
   end
+
+  test 'should get external links' do
+    sign_in_and_update @user
+    study = FactoryBot.create(:detached_study,
+                              name_prefix: 'AnnData Cluster Link Study',
+                              user: @user,
+                              test_array: @@studies_to_clean)
+    study_file = FactoryBot.create(:ann_data_file,
+                                   name: 'data.h5ad',
+                                   study:,
+                                   reference_file: false,
+                                   cell_input: %w[A B C D],
+                                   annotation_input: [
+                                     { name: 'disease', type: 'group', values: %w[cancer cancer normal normal] }
+                                   ],
+                                   coordinate_input: [
+                                     { tsne: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
+                                   ])
+    study_file.reload
+    fragment = study_file.ann_data_file_info.find_fragment(data_type: :cluster, name: 'tsne')
+    fragment[:external_link_url] = 'https://foo.com'
+    fragment[:external_link_title] = 'Foo'
+    fragment[:external_link_description] = 'This is foo'
+    frag_idx = study_file.ann_data_file_info.fragment_index_of(fragment)
+    study_file.ann_data_file_info.data_fragments[frag_idx] = fragment
+    study_file.save!
+    execute_http_request(:get, api_v1_study_cluster_path(study, 'tsne'))
+    assert_response :success
+    link = json.with_indifferent_access[:externalLink]
+    assert_equal 'https://foo.com', link[:url]
+    assert_equal 'Foo', link[:title]
+    assert_equal 'This is foo', link[:description]
+  end
 end
diff --git a/test/integration/external/import_service_config/nemo_test.rb b/test/integration/external/import_service_config/nemo_test.rb
@@ -93,12 +93,12 @@ class NemoTest < ActiveSupport::TestCase
       assert_equal 'application/octet-stream', @configuration.get_file_content_type('csv')
     end
 
-    test 'should load study analog' do
-      study = @configuration.load_study
-      assert_equal '"Human variation study (10x), GRU"', study['name']
-      assert_equal ["10x chromium 3' v3 sequencing"], study['techniques']
-      assert_equal [{"name"=>"human", "cv_term_id"=>"NCBI:txid9606"}], study['taxa']
-    end
+    # test 'should load study analog' do
+    #   study = @configuration.load_study
+    #   assert_equal '"Human variation study (10x), GRU"', study['name']
+    #   assert_equal ["10x chromium 3' v3 sequencing"], study['techniques']
+    #   assert_equal [{"name"=>"human", "cv_term_id"=>"NCBI:txid9606"}], study['taxa']
+    # end
 
     # TODO: SCP-5565 Check with NeMO re API, update and re-enable this test
     # test 'should load file analog' do
@@ -108,10 +108,10 @@ class NemoTest < ActiveSupport::TestCase
     #   assert_equal 'counts', file['data_type']
     # end
 
-    test 'should load collection analog' do
-      collection = @configuration.load_collection
-      assert_equal 'AIBS Internal', collection['short_name']
-    end
+    # test 'should load collection analog' do
+    #   collection = @configuration.load_collection
+    #   assert_equal 'AIBS Internal', collection['short_name']
+    # end
 
     # TODO: SCP-5565 Check with NeMO re API, update and re-enable this test
     # test 'should extract association ids' do
@@ -121,14 +121,14 @@ class NemoTest < ActiveSupport::TestCase
     #   assert_equal @attributes[:project_id], @configuration.id_from(study, :projects)
     # end
 
-    test 'should load taxon common names' do
-      assert_equal %w[human], @configuration.taxon_names
-    end
-
-    test 'should find library preparation protocol' do
-      assert_equal "10x 3' v3", @configuration.find_library_prep("10x chromium 3' v3 sequencing")
-      assert_equal 'Drop-seq', @configuration.find_library_prep('drop-seq')
-    end
+    # test 'should load taxon common names' do
+    #   assert_equal %w[human], @configuration.taxon_names
+    # end
+    #
+    # test 'should find library preparation protocol' do
+    #   assert_equal "10x 3' v3", @configuration.find_library_prep("10x chromium 3' v3 sequencing")
+    #   assert_equal 'Drop-seq', @configuration.find_library_prep('drop-seq')
+    # end
 
     # TODO: SCP-5565 Check with NeMO re API, update and re-enable this test
     # test 'should populate study and study_file' do

diff --git a/test/integration/external/nemo_client_test.rb b/test/integration/external/nemo_client_test.rb
@@ -64,13 +64,13 @@ def skip_if_api_down
   #   assert entity.present?
   # end
 
-  test 'should get collection' do
-    skip_if_api_down
-    identifier = @identifiers[:collection]
-    collection = @nemo_client.collection(identifier)
-    assert collection.present?
-    assert_equal 'adey_sciATAC_human_cortex', collection['short_name']
-  end
+  # test 'should get collection' do
+  #   skip_if_api_down
+  #   identifier = @identifiers[:collection]
+  #   collection = @nemo_client.collection(identifier)
+  #   assert collection.present?
+  #   assert_equal 'adey_sciATAC_human_cortex', collection['short_name']
+  # end
 
   # TODO: SCP-5565 Check with NeMO re API, update and re-enable this test
   # test 'should get file' do
@@ -95,16 +95,16 @@ def skip_if_api_down
   #   assert_equal 'NIMH', grant['funding_agency']
   # end
 
-  test 'should get project' do
-    skip_if_api_down
-    identifier = @identifiers[:project]
-    project = @nemo_client.project(identifier)
-    assert project.present?
-    assert_equal 'Single-nucleus analysis of preoptic area development from late embryonic to adult stages',
-                 project['title']
-    assert_equal 'biccn', project['program']
-    assert_equal 'dulac_poa_dev_sn_10x_proj', project['short_name']
-  end
+  # test 'should get project' do
+  #   skip_if_api_down
+  #   identifier = @identifiers[:project]
+  #   project = @nemo_client.project(identifier)
+  #   assert project.present?
+  #   assert_equal 'Single-nucleus analysis of preoptic area development from late embryonic to adult stages',
+  #                project['title']
+  #   assert_equal 'biccn', project['program']
+  #   assert_equal 'dulac_poa_dev_sn_10x_proj', project['short_name']
+  # end
 
   # TODO: SCP-5565 Check with NeMO re API, update and re-enable this test
   # test 'should get publication' do

diff --git a/test/js/lib/validate-file-content.test.js b/test/js/lib/validate-file-content.test.js
@@ -319,6 +319,16 @@ describe('Client-side file validation', () => {
     expect(errors[0][1]).toEqual('encoding:missing-gz-extension')
   })
 
+  it('does not catch gzipped RDS file without .gz extension', async () => {
+    const file = createMockFile({ fileName: 'foo.rds', content: '\x1F\x2E3lkjf3' })
+    const [{ errors }] = await validateLocalFile(file, { file_type: 'Cluster' })
+    console.log('errors', errors)
+    const hasMissingGzipExtensionError = errors.some(
+      error => error[1] === 'encoding:missing-gz-extension'
+    )
+    expect(hasMissingGzipExtensionError).toBe(false)
+  })
+
   it('catches text file with .gz suffix', async () => {
     const file = createMockFile({ fileName: 'foo.gz', content: 'CELL\tX\tY' })
     const [{ errors }] = await validateLocalFile(file, { file_type: 'Cluster' })

diff --git a/test/models/ann_data_file_info_test.rb b/test/models/ann_data_file_info_test.rb
@@ -59,7 +59,10 @@ def generate_id
           x_axis_min: '-10',
           x_axis_max: '10',
           y_axis_min: '-10',
-          y_axis_max: '10'
+          y_axis_max: '10',
+          external_link_url: 'https://example.com',
+          external_link_title: 'Example Link',
+          external_link_description: 'This is an external link'
       }
     }
     merged_data = AnnDataFileInfo.new.merge_form_data(form_params)
@@ -71,6 +74,9 @@ def generate_id
     assert_equal 'x axis', cluster_fragment[:x_axis_label]
     assert_equal '10', cluster_fragment[:x_axis_max]
     assert_equal 'cluster description', cluster_fragment[:description]
+    assert_equal 'https://example.com', cluster_fragment[:external_link_url]
+    assert_equal 'Example Link', cluster_fragment[:external_link_title]
+    assert_equal 'This is an external link', cluster_fragment[:external_link_description]
     expr_fragment = merged_data.dig(root_form_key, :data_fragments).detect { |f| f[:data_type] == :expression }
     assert_equal 'expression description', expr_fragment[:description]
     assert_equal 'log(TPM) expression', expr_fragment[:y_axis_label]

diff --git a/test/models/delete_queue_job_test.rb b/test/models/delete_queue_job_test.rb
@@ -188,15 +188,15 @@ class DeleteQueueJobTest < ActiveSupport::TestCase
                                      { name: 'disease', type: 'group', values: %w[cancer cancer normal normal] }
                                    ],
                                    coordinate_input: [
-                                     { x_tsne: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
+                                     { tsne: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
                                    ])
-    study.update(default_options: { cluster: 'x_tsne', annotation: 'disease--group--study' })
+    study.update(default_options: { cluster: 'tsne', annotation: 'disease--group--study' })
     study.reload
     assert_equal 1, study.cluster_groups.size
     assert_equal 1, study.cell_metadata.size
     assert_equal %w[A B C D], study.all_cells_array
     assert_equal study_file, study.metadata_file
-    assert_equal 'x_tsne', study.default_cluster.name
+    assert_equal 'tsne', study.default_cluster.name
     mock = Minitest::Mock.new
     mock.expect(:get_workspace_files, [], [String, Hash])
     ApplicationController.stub :firecloud_client, mock do

diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb
@@ -225,7 +225,7 @@ class IngestJobTest < ActiveSupport::TestCase
                                         { name: 'disease', type: 'group', values: %w[cancer cancer normal normal] }
                                       ],
                                       coordinate_input: [
-                                        { X_umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
+                                        { umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
                                       ],
                                       expression_input: {
                                         'phex' => [['A', 0.3], ['B', 1.0], ['C', 0.5], ['D', 0.1]]
@@ -774,7 +774,7 @@ class IngestJobTest < ActiveSupport::TestCase
                                      { name: 'disease', type: 'group', values: %w[cancer cancer normal normal] }
                                    ],
                                    coordinate_input: [
-                                     { x_tsne: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
+                                     { tsne: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
                                    ],
                                    expression_input: {
                                      'phex' => [['A', 0.3], ['B', 1.0], ['C', 0.5], ['D', 0.1]]
@@ -815,7 +815,7 @@ class IngestJobTest < ActiveSupport::TestCase
                                      { name: 'disease', type: 'group', values: %w[cancer cancer normal normal] }
                                    ],
                                    coordinate_input: [
-                                     { X_umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
+                                     { umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
                                    ])
     annotation_file = 'gs://test_bucket/anndata/h5ad_frag.metadata.tsv'
     cluster_file = 'gs://test_bucket/anndata/h5ad_frag.cluster.X_umap.tsv'

diff --git a/test/services/cluster_viz_service_test.rb b/test/services/cluster_viz_service_test.rb
@@ -245,7 +245,7 @@ class ClusterVizServiceTest < ActiveSupport::TestCase
   end
 
   test 'should retrieve clustering information for AnnData files' do
-    cluster_name = 'x_tsne'
+    cluster_name = 'tsne'
     study = FactoryBot.create(:detached_study,
                               name_prefix: 'AnnData Cluster Test',
                               user: @user,

diff --git a/test/test_data/validation/missing_gz_extension.rds b/test/test_data/validation/missing_gz_extension.rds