From 618e689d8780a53f6d28a47d56b5c42b71826194 Mon Sep 17 00:00:00 2001 From: myushen Date: Wed, 10 Jul 2024 14:12:47 +1000 Subject: [PATCH 1/2] fix pseudobulk for some file_ids --- DESCRIPTION | 2 +- R/counts.R | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c763033..34057d9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: CuratedAtlasQueryR Title: Queries the Human Cell Atlas -Version: 1.3.6 +Version: 1.3.7 Authors@R: c( person( "Stefano", diff --git a/R/counts.R b/R/counts.R index becb1b2..c39af37 100644 --- a/R/counts.R +++ b/R/counts.R @@ -29,7 +29,7 @@ COUNTS_VERSION <- "0.2.1" #' @noRd pseudobulk_url <- single_line_str( "https://object-store.rc.nectar.org.au/v1/ - AUTH_06d6e008e3e642da99d806ba3ea629c5/pseudobulk-0.1.0" + AUTH_06d6e008e3e642da99d806ba3ea629c5/pseudobulk-0.1.1" ) @@ -332,7 +332,9 @@ group_to_data_container <- function(i, df, dir_prefix, features, grouping_column select(-dplyr::all_of(intersect(names(df), cell_level_anno))) |> distinct() |> mutate( - sample_identifier = glue("{sample_}___{cell_type_harmonised}"), + sample_identifier = ifelse(file_id %in% file_ids, + glue("{sample_}___{cell_type_harmonised}___{disease}___{is_primary_data_x}"), + glue("{sample_}___{cell_type_harmonised}")), original_sample_id = .data$sample_identifier ) |> column_to_rownames("original_sample_id") @@ -351,6 +353,23 @@ group_to_data_container <- function(i, df, dir_prefix, features, grouping_column } } +#' A temporary solution for get_pseudobulk duplicated rownames due to column +#' `disease` and `is_primary_data` columns are not included in `sample_` in the metadata. +#' @noRd +# file_ids that are corrupted +file_ids <- c( + "b50b15f1-bf19-4775-ab89-02512ec941a6", + "bffedc04-5ba1-46d4-885c-989a294bedd4", + "cc3ff54f-7587-49ea-b197-1515b6d98c4c", + "0af763e1-0e2f-4de6-9563-5abb0ad2b01e", + "51f114ae-232a-4550-a910-934e175db814", + "327927c7-c365-423c-9ebc-07acb09a0c1a", + "3ae36927-c188-4511-88cc-572ee1edf906", + "6ed2cdc2-dda8-4908-ad6c-cead9afee85e", + "56e0359f-ee8d-4ba5-a51d-159a183643e5", + "5c64f247-5b7c-4842-b290-65c722a65952" +) + #' Synchronises one or more remote assays with a local copy #' @param url A character vector of length one. The base HTTP URL from which to #' obtain the files. From 86ef5a0c755ec9601aee06c95745aa56a565ef82 Mon Sep 17 00:00:00 2001 From: myushen Date: Thu, 11 Jul 2024 09:37:14 +1000 Subject: [PATCH 2/2] unit tests --- tests/testthat/test-query.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index 73765c3..4a2cb1b 100755 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -229,4 +229,28 @@ test_that("get_pseudobulk() syncs appropriate files", { expect_gt(1) }) +test_that("get_pseudobulk() syncs appropriate fixed file", { + temp <- tempfile() + ids <- c( + "b50b15f1-bf19-4775-ab89-02512ec941a6", + "bffedc04-5ba1-46d4-885c-989a294bedd4", + "cc3ff54f-7587-49ea-b197-1515b6d98c4c", + "0af763e1-0e2f-4de6-9563-5abb0ad2b01e", + "51f114ae-232a-4550-a910-934e175db814", + "327927c7-c365-423c-9ebc-07acb09a0c1a", + "3ae36927-c188-4511-88cc-572ee1edf906", + "6ed2cdc2-dda8-4908-ad6c-cead9afee85e", + "56e0359f-ee8d-4ba5-a51d-159a183643e5", + "5c64f247-5b7c-4842-b290-65c722a65952" + ) + meta <- get_metadata(cache_directory = temp) |> dplyr::filter(file_id %in% ids) + + # The remote dataset should have many genes + sme <- get_pseudobulk(meta, cache_directory = temp) + sme |> + row.names() |> + length() |> + expect_gt(1) +}) +