diff --git a/DESCRIPTION b/DESCRIPTION index 26e3f91..61ccd32 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,6 +12,7 @@ Encoding: UTF-8 LazyData: false Suggests: BiocStyle, + curl, knitr, testthat, rmarkdown diff --git a/NAMESPACE b/NAMESPACE index f6fec95..fe58a0c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -107,6 +107,7 @@ export(subsetSamples) export(subsetValues) export(swapNameVariable) export(transformSignal) +export(translateSSVtoCT2) exportMethods("[") exportMethods("colToRowMatCols<-") exportMethods("rowRanges<-") diff --git a/NEWS b/NEWS index 9096be7..86e87be 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +# v 0.1.49 +* changed all default VAR names + # v 0.1.47 * depends on bugfix in seqsetvis in 1.25.5 diff --git a/R/class_ChIPtsne.R b/R/class_ChIPtsne.R index 07add9f..d3c2e0a 100644 --- a/R/class_ChIPtsne.R +++ b/R/class_ChIPtsne.R @@ -48,10 +48,10 @@ ChIPtsne2 = function( rowToRowMat=matrix(0,0,0), colToRowMatCols=list(), - name_VAR = "sample", - position_VAR = "x", - value_VAR = "y", - region_VAR = "id", + name_VAR = "name", + position_VAR = "position", + value_VAR = "signal", + region_VAR = "region", fetch_config = FetchConfig.null(), ...) { diff --git a/R/class_ChIPtsne_alt_constructors.R b/R/class_ChIPtsne_alt_constructors.R index a82090f..e6b377a 100644 --- a/R/class_ChIPtsne_alt_constructors.R +++ b/R/class_ChIPtsne_alt_constructors.R @@ -2,10 +2,10 @@ #' #' @param prof_dt Profile data.table, as returned from seqsetvis::ssvFetch* functions #' @param query_gr The query GRanges object used to fetch prof_dt. -#' @param name_VAR Variable name that contains sample ids/names. Links prof_dt to meta_dt. Default is "sample". -#' @param position_VAR Variable name that contains positional information in prof_dt. Default is "x". -#' @param value_VAR Variable name that contains signal value information in prof_dt. Default is "y". -#' @param region_VAR Variable name that contains region ID information in prof_dt. Default is "id". +#' @param name_VAR Variable name that contains sample ids/names. Links prof_dt to meta_dt. Default is "name". +#' @param position_VAR Variable name that contains positional information in prof_dt. Default is "position". +#' @param value_VAR Variable name that contains signal value information in prof_dt. Default is "signal". +#' @param region_VAR Variable name that contains region ID information in prof_dt. Default is "region". #' @param sample_metadata Metadata for entries in prof_dt's name_VAR, must include name_VAR #' @param region_metadata Metadata to append to rowRanges, mcols of query_gr will also be used. #' @param auto_sample_metadata If true, additional attributes in prof_dt will used for metadata (minus certain region related attributes such as seqnames, start, end, etc.) @@ -30,10 +30,10 @@ ChIPtsne2.from_tidy = function(prof_dt, query_gr, sample_metadata = NULL, region_metadata = NULL, - name_VAR = "sample", - position_VAR = "x", - value_VAR = "y", - region_VAR = "id", + name_VAR = "name", + position_VAR = "position", + value_VAR = "signal", + region_VAR = "region", auto_sample_metadata = TRUE, obj_history = list(), fetch_config = FetchConfig.null(), @@ -135,12 +135,10 @@ ChIPtsne2.from_tidy = function(prof_dt, "end", "width", "strand", - "id", - "y", - "x", - "cluster_id", - position_VAR, - value_VAR) + region_VAR, + value_VAR, + position_VAR + ) sample_metadata = prof_dt %>% # dplyr::select(all_of(c(name_VAR))) %>% dplyr::select(!dplyr::any_of(c(drop_vars))) %>% @@ -253,7 +251,7 @@ ChIPtsne2.from_FetchConfig = function(fetch_config, fetch_res = runFetchAtRegions(fetch_config, query_gr, use_cache = use_cache) prof_dt = fetch_res$prof_dt - + prof_dt = translateSSVtoCT2(prof_dt) ct2 = ChIPtsne2.from_tidy(prof_dt = prof_dt, name_VAR = name_VAR, query_gr = query_gr, diff --git a/R/class_ChIPtsne_no_rowRanges.R b/R/class_ChIPtsne_no_rowRanges.R index f35bc6b..56a9574 100644 --- a/R/class_ChIPtsne_no_rowRanges.R +++ b/R/class_ChIPtsne_no_rowRanges.R @@ -49,10 +49,10 @@ ChIPtsne2_no_rowRanges = function( rowToRowMat=matrix(0,0,0), colToRowMatCols=list(), - name_VAR = "sample", - position_VAR = "x", - value_VAR = "y", - region_VAR = "id", + name_VAR = "name", + position_VAR = "position", + value_VAR = "signal", + region_VAR = "region", fetch_config = FetchConfig.null(), ...) { diff --git a/R/functions_examples.R b/R/functions_examples.R index 64d801d..57803c1 100644 --- a/R/functions_examples.R +++ b/R/functions_examples.R @@ -44,6 +44,7 @@ exampleQueryGR = function(){ exampleProfDT = function(){ CTCF_in_10a_profiles_dt = NULL utils::data(list = "CTCF_in_10a_profiles_dt", package = "seqsetvis", overwrite = TRUE, envir = environment()) + CTCF_in_10a_profiles_dt = translateSSVtoCT2(CTCF_in_10a_profiles_dt) CTCF_in_10a_profiles_dt[] } @@ -56,7 +57,8 @@ exampleProfDT = function(){ #' exampleChIPtsne2() exampleChIPtsne2 = function(){ query_gr = exampleQueryGR() - prof_dt = exampleProfDT() + prof_dt = exampleProfDT() %>% + translateSSVtoCT2 ChIPtsne2.from_tidy(prof_dt, query_gr) } @@ -73,9 +75,10 @@ exampleChIPtsne2.with_meta = function(){ query_gr = exampleQueryGR() prof_dt = exampleProfDT() meta_dt = prof_dt %>% - dplyr::select(sample) %>% + translateSSVtoCT2 %>% + dplyr::select(name) %>% unique %>% - tidyr::separate(sample, c("cell", "mark"), sep = "_", remove = FALSE) + tidyr::separate(name, c("cell", "mark"), sep = "_", remove = FALSE) ChIPtsne2.from_tidy(prof_dt, query_gr, sample_metadata = meta_dt) } @@ -169,3 +172,36 @@ exampleDataPaths = function(){ url_dt[, narrowPeak_file := paste0(cell, "_", mark, ".narrowPeak")] url_dt[] } + +#' translateSSVtoCT2 +#' +#' *seqsetvis* creates data.table with certain default variables that *chiptsne2* uses more user-friendly values for. This function checks for *seqsetvis* defaults and converts them to *chiptsne2*. +#' +#' @param prof_dt A data.table generated by *seqsetvis* from ssvFetchGRanges +#' +#' @return +#' @export +#' +#' @examples +#' prof_dt = exampleProfDT() +#' prof_dt = translateSSVtoCT2(prof_dt) +#' ct2 = ChIPtsne2.from_tidy(prof_dt, exampleQueryGR()) +translateSSVtoCT2 = function(prof_dt){ + #convert to data.table if needed + if(!is(prof_dt, "data.table")){ + prof_dt = data.table::as.data.table(prof_dt) + } + if("id" %in% colnames(prof_dt)){ + data.table::setnames(prof_dt, "id", "region") + } + if("x" %in% colnames(prof_dt)){ + data.table::setnames(prof_dt, "x", "position") + } + if("sample" %in% colnames(prof_dt)){ + data.table::setnames(prof_dt, "sample", "name") + } + if("y" %in% colnames(prof_dt)){ + data.table::setnames(prof_dt, "y", "signal") + } + prof_dt[] +} diff --git a/man/ChIPtsne2.Rd b/man/ChIPtsne2.Rd index 1982caa..9a13634 100644 --- a/man/ChIPtsne2.Rd +++ b/man/ChIPtsne2.Rd @@ -7,10 +7,10 @@ ChIPtsne2( rowToRowMat = matrix(0, 0, 0), colToRowMatCols = list(), - name_VAR = "sample", - position_VAR = "x", - value_VAR = "y", - region_VAR = "id", + name_VAR = "name", + position_VAR = "position", + value_VAR = "signal", + region_VAR = "region", fetch_config = FetchConfig.null(), ... ) diff --git a/man/ChIPtsne2.from_tidy.Rd b/man/ChIPtsne2.from_tidy.Rd index 6f1db4e..d52ebb1 100644 --- a/man/ChIPtsne2.from_tidy.Rd +++ b/man/ChIPtsne2.from_tidy.Rd @@ -9,10 +9,10 @@ ChIPtsne2.from_tidy( query_gr, sample_metadata = NULL, region_metadata = NULL, - name_VAR = "sample", - position_VAR = "x", - value_VAR = "y", - region_VAR = "id", + name_VAR = "name", + position_VAR = "position", + value_VAR = "signal", + region_VAR = "region", auto_sample_metadata = TRUE, obj_history = list(), fetch_config = FetchConfig.null(), @@ -28,13 +28,13 @@ ChIPtsne2.from_tidy( \item{region_metadata}{Metadata to append to rowRanges, mcols of query_gr will also be used.} -\item{name_VAR}{Variable name that contains sample ids/names. Links prof_dt to meta_dt. Default is "sample".} +\item{name_VAR}{Variable name that contains sample ids/names. Links prof_dt to meta_dt. Default is "name".} -\item{position_VAR}{Variable name that contains positional information in prof_dt. Default is "x".} +\item{position_VAR}{Variable name that contains positional information in prof_dt. Default is "position".} -\item{value_VAR}{Variable name that contains signal value information in prof_dt. Default is "y".} +\item{value_VAR}{Variable name that contains signal value information in prof_dt. Default is "signal".} -\item{region_VAR}{Variable name that contains region ID information in prof_dt. Default is "id".} +\item{region_VAR}{Variable name that contains region ID information in prof_dt. Default is "region".} \item{auto_sample_metadata}{If true, additional attributes in prof_dt will used for metadata (minus certain region related attributes such as seqnames, start, end, etc.)} diff --git a/man/ChIPtsne2_no_rowRanges.Rd b/man/ChIPtsne2_no_rowRanges.Rd index ec6a539..7c8e85e 100644 --- a/man/ChIPtsne2_no_rowRanges.Rd +++ b/man/ChIPtsne2_no_rowRanges.Rd @@ -7,10 +7,10 @@ ChIPtsne2_no_rowRanges( rowToRowMat = matrix(0, 0, 0), colToRowMatCols = list(), - name_VAR = "sample", - position_VAR = "x", - value_VAR = "y", - region_VAR = "id", + name_VAR = "name", + position_VAR = "position", + value_VAR = "signal", + region_VAR = "region", fetch_config = FetchConfig.null(), ... ) diff --git a/man/translateSSVtoCT2.Rd b/man/translateSSVtoCT2.Rd new file mode 100644 index 0000000..979bb19 --- /dev/null +++ b/man/translateSSVtoCT2.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions_examples.R +\name{translateSSVtoCT2} +\alias{translateSSVtoCT2} +\title{translateSSVtoCT2} +\usage{ +translateSSVtoCT2(prof_dt) +} +\arguments{ +\item{prof_dt}{A data.table generated by \emph{seqsetvis} from ssvFetchGRanges} +} +\description{ +\emph{seqsetvis} creates data.table with certain default variables that \emph{chiptsne2} uses more user-friendly values for. This function checks for \emph{seqsetvis} defaults and converts them to \emph{chiptsne2}. +} +\examples{ +prof_dt = exampleProfDT() +prof_dt = translateSSVtoCT2(prof_dt) +ct2 = ChIPtsne2.from_tidy(prof_dt, exampleQueryGR()) +} diff --git a/tests/testthat/test_ChIPtsne_class.R b/tests/testthat/test_ChIPtsne_class.R index aee12e1..31597d9 100644 --- a/tests/testthat/test_ChIPtsne_class.R +++ b/tests/testthat/test_ChIPtsne_class.R @@ -6,25 +6,26 @@ library(testthat) query_gr = exampleQueryGR() prof_dt = exampleProfDT() -metadata = prof_dt %>% dplyr::select(sample) %>% unique -metadata = metadata %>% tidyr::separate(sample, c("cell", "mark"), sep = "_", remove = FALSE) +metadata = prof_dt %>% dplyr::select(name) %>% unique +metadata = metadata %>% tidyr::separate(name, c("cell", "mark"), sep = "_", remove = FALSE) -map_dt = prof_dt %>% dplyr::select(sample, x) %>% unique %>% - dplyr::mutate(cn = paste(sample, x, sep = "_")) %>% - dplyr::mutate(nr = seq_along(x)) -map_list = split(map_dt$n, map_dt$sample) +map_dt = prof_dt %>% dplyr::select(name, position) %>% unique %>% + dplyr::mutate(cn = paste(name, position, sep = "_")) %>% + dplyr::mutate(nr = seq_along(position)) -tmp_wide = tidyr::pivot_wider(prof_dt, names_from = c("sample", "x"), values_from = "y", id_cols = "id") +map_list = split(map_dt$nr, map_dt$name) + +tmp_wide = tidyr::pivot_wider(prof_dt, names_from = c("name", "position"), values_from = "signal", id_cols = "region") prof_mat = as.matrix(tmp_wide[, -1]) -rownames(prof_mat) = tmp_wide$id +rownames(prof_mat) = tmp_wide$region prof_max = prof_dt %>% - dplyr::group_by(id, sample) %>% - dplyr::summarise(y = max(y)) %>% - tidyr::pivot_wider(names_from = "sample", id_cols = "id", values_from = "y") + dplyr::group_by(region, name) %>% + dplyr::summarise(signal = max(signal)) %>% + tidyr::pivot_wider(names_from = "name", id_cols = "region", values_from = "signal") prof_max_mat = as.matrix(prof_max[, -1]) -rownames(prof_max_mat) = prof_max$id +rownames(prof_max_mat) = prof_max$region ct = ChIPtsne2(assay = list(max = prof_max_mat[names(query_gr),]), rowRanges = query_gr, @@ -34,14 +35,18 @@ ct = ChIPtsne2(assay = list(max = prof_max_mat[names(query_gr),]), metadata = list(time = date())) -prof_dt = seqsetvis::ssvSignalClustering(prof_dt, nclust = 4) -region_metadata = prof_dt %>% dplyr::select(id, cluster_id) %>% unique +clust_dt = seqsetvis::ssvSignalClustering(prof_dt, nclust = 4, facet_ = "name", row_ = "region", column_ = "position", fill_ = "signal") +# prof_dt = translateSSVtoCT2(prof_dt) +# clust_dt = translateSSVtoCT2(clust_dt) + +region_metadata = clust_dt %>% dplyr::select(region, cluster_id) %>% unique + ct2 = ChIPtsne2.from_tidy(prof_dt, query_gr, region_metadata = region_metadata) test_that("Constructors - valid", { expect_true(validObject(ct2)) - expect_true(validObject(.ChIPtsne2())) # internal + expect_true(validObject(chiptsne2:::.ChIPtsne2())) # internal expect_true(validObject(ChIPtsne2())) # exported se = as(ct2, "SummarizedExperiment") @@ -56,7 +61,7 @@ test_that("Constructors - invalid", { }) test_that("Gettters", { - expect_identical(rowToRowMat(ct2), prof_mat[levels(prof_dt$id),]) + expect_identical(rowToRowMat(ct2), prof_mat[unique(prof_dt$region),]) expect_identical(rownames(rowToRowMat(ct2)), rownames(ct2)) expect_identical(names(colToRowMatCols(ct2)), colnames(ct2)) }) @@ -69,6 +74,7 @@ query_gr = exampleQueryGR()[1:10] suppressWarnings({ ct2.cfg = ChIPtsne2.from_FetchConfig(fetch_config, query_gr) }) + test_that("Constructor FetchConfig", { expect_setequal(rownames(rowToRowMat(ct2.cfg)), names(query_gr)) expect_equal(ncol(rowToRowMat(ct2.cfg)), 400) diff --git a/tests/testthat/test_ChIPtsne_getsetters.R b/tests/testthat/test_ChIPtsne_getsetters.R index 383f02d..ac09299 100644 --- a/tests/testthat/test_ChIPtsne_getsetters.R +++ b/tests/testthat/test_ChIPtsne_getsetters.R @@ -5,25 +5,25 @@ library(testthat) ct2 = exampleChIPtsne2.with_meta() test_that("NameVariable", { - expect_equal(getNameVariable(ct2), "sample") + expect_equal(getNameVariable(ct2), "name") ct2.new = setNameVariable(ct2, "Name") expect_equal(getNameVariable(ct2.new), "Name") }) test_that("RegionVariable", { - expect_equal(getRegionVariable(ct2), "id") + expect_equal(getRegionVariable(ct2), "region") ct2.new = setRegionVariable(ct2, "Region") expect_equal(getRegionVariable(ct2.new), "Region") }) test_that("PositionVariable", { - expect_equal(getPositionVariable(ct2), "x") + expect_equal(getPositionVariable(ct2), "position") ct2.new = setPositionVariable(ct2, "Position") expect_equal(getPositionVariable(ct2.new), "Position") }) test_that("ValueVariable", { - expect_equal(getValueVariable(ct2), "y") + expect_equal(getValueVariable(ct2), "signal") ct2.new = setValueVariable(ct2, "Value") expect_equal(getValueVariable(ct2.new), "Value") }) @@ -79,7 +79,7 @@ test_that("Switch name variable to smaller set", { ct2.10a = swapNameVariable(ct2.by_cell$MCF10A, new_VAR = "mark") expect_equal(getNameVariable(ct2.10a), "mark") expect_equal(getSampleMetaData(ct2.10a)$mark, factor("CTCF")) - expect_equal(getSampleMetaData(ct2.10a)$sample, "MCF10A_CTCF") + expect_equal(getSampleMetaData(ct2.10a)$name, "MCF10A_CTCF") expect_equal(colnames(rowToRowMat(ct2.10a))[1], "CTCF_-325") }) @@ -89,7 +89,7 @@ test_that("When names match for operator", { ct2.diff = ct2.by_cell$MCF10A - ct2.by_cell$MCF10AT1 expect_equal(colnames(rowToRowMat(ct2.diff))[1], "CTCF_-325") ct2.diff1 = swapNameVariable(ct2.diff, new_VAR = "cell") - ct2.diff2 = swapNameVariable(ct2.diff, new_VAR = "sample") + ct2.diff2 = swapNameVariable(ct2.diff, new_VAR = "name") expect_equal(colnames(ct2.diff1), "MCF10A - MCF10AT1") expect_equal(colnames(rowToRowMat(ct2.diff1))[1], "MCF10A - MCF10AT1_-325") expect_equal(colnames(ct2.diff2), "MCF10A_CTCF - MCF10AT1_CTCF") @@ -101,7 +101,7 @@ test_that("Switch name variable same set", { expect_equal(getNameVariable(ct2.name_cell), "cell") expect_equal(getSampleMetaData(ct2.name_cell)$mark, rep("CTCF", 3)) expect_equal(colnames(rowToRowMat(ct2.name_cell))[1], "MCF10A_-325") - expect_equal(getSampleMetaData(ct2.name_cell)$sample, c("MCF10A_CTCF", "MCF10AT1_CTCF", "MCF10CA1_CTCF")) + expect_equal(getSampleMetaData(ct2.name_cell)$name, c("MCF10A_CTCF", "MCF10AT1_CTCF", "MCF10CA1_CTCF")) }) diff --git a/tests/testthat/test_ChIPtsne_metadata.R b/tests/testthat/test_ChIPtsne_metadata.R index ac4157a..bfd26cf 100644 --- a/tests/testthat/test_ChIPtsne_metadata.R +++ b/tests/testthat/test_ChIPtsne_metadata.R @@ -5,9 +5,10 @@ library(testthat) query_gr = exampleQueryGR() prof_dt = exampleProfDT() +prof_dt = translateSSVtoCT2(prof_dt) -meta_dt = prof_dt %>% dplyr::select(sample) %>% unique -meta_dt = meta_dt %>% tidyr::separate(sample, c("cell", "mark"), sep = "_", remove = FALSE) +meta_dt = prof_dt %>% dplyr::select(name) %>% unique +meta_dt = meta_dt %>% tidyr::separate(name, c("cell", "mark"), sep = "_", remove = FALSE) ct2 = ChIPtsne2.from_tidy(prof_dt, query_gr, sample_metadata = meta_dt) colData(ct2) @@ -16,7 +17,7 @@ ct2.no_meta = ChIPtsne2.from_tidy(prof_dt, query_gr) colData(ct2.no_meta) prof_dt2 = data.table::copy(prof_dt) -prof_dt2 = prof_dt2 %>% tidyr::separate(sample, c("cell", "mark"), sep = "_", remove = FALSE) +prof_dt2 = prof_dt2 %>% tidyr::separate(name, c("cell", "mark"), sep = "_", remove = FALSE) prof_dt2$extra = 1 ct2.auto = ChIPtsne2.from_tidy(prof_dt2, query_gr) diff --git a/tests/testthat/test_ChIPtsne_names.R b/tests/testthat/test_ChIPtsne_names.R index 94626d5..4df60aa 100644 --- a/tests/testthat/test_ChIPtsne_names.R +++ b/tests/testthat/test_ChIPtsne_names.R @@ -7,9 +7,9 @@ test_that("update_rownames", { ct2 = exampleChIPtsne2.with_meta() new_rn = paste0("row_", rownames(ct2)) ct2.new_rn = update_ct2_rownames(ct2, new_names = new_rn) - expect_equal(ct2.new_rn@region_VAR, "id") + expect_equal(ct2.new_rn@region_VAR, "region") expect_equal(rownames(ct2.new_rn), new_rn) - expect_setequal(getTidyProfile(ct2.new_rn)$id, new_rn) + expect_setequal(getTidyProfile(ct2.new_rn)$region, new_rn) expect_equal(rownames(ct2.new_rn@assays@data$max), new_rn) expect_equal(rownames(ct2.new_rn@rowToRowMat), new_rn) expect_equal(names(rowRanges(ct2.new_rn)), new_rn) @@ -19,7 +19,7 @@ test_that("update_rownames old_name_VAR and new_VAR", { update_ct2_rownames = chiptsne2:::.update_ct2_rownames ct2 = exampleChIPtsne2.with_meta() rowData(ct2)$new_id = paste0("new_", rownames(ct2)) - ct2.new_rn = update_ct2_rownames(ct2, old_name_VAR = "id", new_VAR = "new_id") + ct2.new_rn = update_ct2_rownames(ct2, old_name_VAR = "region", new_VAR = "new_id") new_rn = paste0("new_", rownames(ct2)) @@ -40,9 +40,9 @@ test_that("update_colnames", { new_cn = sub("_CTCF", "", colnames(ct2)) ct2.new_cn = update_ct2_colnames(ct2, new_cn) - expect_equal(ct2.new_cn@name_VAR, "sample") - expect_setequal(getTidyProfile(ct2.new_cn)$sample, c("MCF10A", "MCF10AT1", "MCF10CA1")) - expect_setequal(colnames(getTidyProfile(ct2.new_cn)), c("id", "x", "y", "sample")) + expect_equal(ct2.new_cn@name_VAR, "name") + expect_setequal(getTidyProfile(ct2.new_cn)$name, c("MCF10A", "MCF10AT1", "MCF10CA1")) + expect_setequal(colnames(getTidyProfile(ct2.new_cn)), c( "region", "position", "signal", "name")) expect_equal(rownames(ct2.new_cn@colData), c("MCF10A", "MCF10AT1", "MCF10CA1")) expect_equal(rownames(colData(ct2.new_cn)), c("MCF10A", "MCF10AT1", "MCF10CA1")) @@ -62,10 +62,10 @@ test_that("update_colnames old_name_VAR and new_VAR", { update_ct2_colnames = chiptsne2:::.update_ct2_colnames ct2 = exampleChIPtsne2.with_meta() - ct2.new_cn = update_ct2_colnames(ct2, old_name_VAR = "sample", new_VAR = "cell") + ct2.new_cn = update_ct2_colnames(ct2, old_name_VAR = "name", new_VAR = "cell") expect_equal(ct2.new_cn@name_VAR, "cell") expect_setequal(getTidyProfile(ct2.new_cn)$cell, c("MCF10A", "MCF10AT1", "MCF10CA1")) - expect_setequal(colnames(getTidyProfile(ct2.new_cn)), c("id", "x", "y", "cell")) + expect_setequal(colnames(getTidyProfile(ct2.new_cn)), c( "region", "position", "signal", "cell")) expect_equal(rownames(ct2.new_cn@colData), c("MCF10A", "MCF10AT1", "MCF10CA1")) expect_equal(rownames(colData(ct2.new_cn)), c("MCF10A", "MCF10AT1", "MCF10CA1")) diff --git a/tests/testthat/test_ChIPtsne_no_rowRanges_class.R b/tests/testthat/test_ChIPtsne_no_rowRanges_class.R index 0a493ef..3e811ee 100644 --- a/tests/testthat/test_ChIPtsne_no_rowRanges_class.R +++ b/tests/testthat/test_ChIPtsne_no_rowRanges_class.R @@ -18,7 +18,7 @@ test_that("valid ChIPtsne2_no_rowRanges from constructor", { expect_equal(rownames(rowData(ct2.nrr))[1:5], c("1", "2", "3", "4", "5")) reg_meta = getRegionMetaData(ct2.nrr) - expect_equal(colnames(reg_meta), c("id", "peak_MCF10A_CTCF", "peak_MCF10AT1_CTCF", "peak_MCF10CA1_CTCF")) + expect_equal(colnames(reg_meta), c("region", "peak_MCF10A_CTCF", "peak_MCF10AT1_CTCF", "peak_MCF10CA1_CTCF")) expect_equal(rownames(reg_meta)[1:5], c("1", "2", "3", "4", "5")) }) @@ -32,7 +32,7 @@ test_that("valid ChIPtsne2_no_rowRanges from nullify rowRanges", { expect_equal(rownames(rowData(ct2.nrr))[1:5], c("1", "2", "3", "4", "5")) reg_meta = getRegionMetaData(ct2.nrr) - expect_equal(colnames(reg_meta), c("id", "peak_MCF10A_CTCF", "peak_MCF10AT1_CTCF", "peak_MCF10CA1_CTCF")) + expect_equal(colnames(reg_meta), c("region", "peak_MCF10A_CTCF", "peak_MCF10AT1_CTCF", "peak_MCF10CA1_CTCF")) expect_equal(rownames(reg_meta)[1:5], c("1", "2", "3", "4", "5")) }) @@ -46,7 +46,7 @@ test_that("valid ChIPtsne2_no_rowRanges from tidy", { expect_equal(rownames(rowData(ct2.nrr))[1:5], c("1", "2", "3", "4", "5")) reg_meta = getRegionMetaData(ct2.nrr) - expect_equal(colnames(reg_meta), c("id", "peak_MCF10A_CTCF", "peak_MCF10AT1_CTCF", "peak_MCF10CA1_CTCF")) + expect_equal(colnames(reg_meta), c("region", "peak_MCF10A_CTCF", "peak_MCF10AT1_CTCF", "peak_MCF10CA1_CTCF")) expect_equal(rownames(reg_meta)[1:5], c("1", "2", "3", "4", "5")) }) @@ -115,3 +115,4 @@ test_that("ChIPtsne2 []", { expect_equal(nrow(colData(ct2.1x1)), 1) expect_equal(nrow(colData(ct2.nrr.1x1)), 1) }) + diff --git a/tests/testthat/test_ChIPtsne_tidy_profile.R b/tests/testthat/test_ChIPtsne_tidy_profile.R index 810f36d..dac1093 100644 --- a/tests/testthat/test_ChIPtsne_tidy_profile.R +++ b/tests/testthat/test_ChIPtsne_tidy_profile.R @@ -4,8 +4,9 @@ library(testthat) ct2 = exampleChIPtsne2.with_meta() exp_row_cn = c("peak_MCF10A_CTCF", "peak_MCF10AT1_CTCF", "peak_MCF10CA1_CTCF") -exp_col_cn = c("sample", "cell", "mark") -exp_min_cn = c("id", "x", "y", "sample") +exp_col_cn = c("name", "cell", "mark") +# exp_min_cn = c("id", "x", "y", "sample") +exp_min_cn = c("region", "position", "signal", "name") test_that("getTidyProfile basic", { prof_dt = getTidyProfile(ct2) expect_setequal(colnames(prof_dt), exp_min_cn) @@ -30,3 +31,4 @@ test_that("getTidyProfile bad", { }, regexp = "Invalid meta_VARS specified.") }) + diff --git a/tests/testthat/test_centroid_and_aggregate.R b/tests/testthat/test_centroid_and_aggregate.R index 883694f..0dbbb89 100644 --- a/tests/testthat/test_centroid_and_aggregate.R +++ b/tests/testthat/test_centroid_and_aggregate.R @@ -44,13 +44,13 @@ test_that("aggregateByGroup variable names", { rowData(ct2)$all = "all" ct2.1col = aggregateSamplesByGroup(ct2, "mark") - expect_equal(getRegionVariable(ct2.1col), "id") + expect_equal(getRegionVariable(ct2.1col), "region") expect_equal(getNameVariable(ct2.1col), "mark") ct2.1row = aggregateRegionsByGroup(ct2, "all") expect_equal(getRegionVariable(ct2.1row), "all") - expect_equal(getNameVariable(ct2.1row), "sample") + expect_equal(getNameVariable(ct2.1row), "name") ct2.1x1 = aggregateByGroup(ct2, c("all", "mark")) expect_equal(getRegionVariable(ct2.1x1), "all") @@ -65,7 +65,7 @@ test_that("aggregateByGroup variable names", { # plotSignalHeatmap(ct2.1x1) expect_equal(colnames(getRegionMetaData(ct2.1row)), c("all")) - expect_equal(colnames(getRegionMetaData(ct2.1col)), c("id", "peak_MCF10A_CTCF", "peak_MCF10AT1_CTCF", "peak_MCF10CA1_CTCF", "all")) + expect_equal(colnames(getRegionMetaData(ct2.1col)), c("region", "peak_MCF10A_CTCF", "peak_MCF10AT1_CTCF", "peak_MCF10CA1_CTCF", "all")) expect_equal(colnames(getRegionMetaData(ct2.1x1)), c("all")) expect_equal(nrow(getRegionMetaData(ct2.1row)), 1) diff --git a/tests/testthat/test_converters.R b/tests/testthat/test_converters.R index e4076bc..c29256b 100644 --- a/tests/testthat/test_converters.R +++ b/tests/testthat/test_converters.R @@ -7,9 +7,9 @@ query_gr = exampleQueryGR() prof_dt = exampleProfDT() meta_dt = prof_dt %>% - dplyr::select(sample) %>% + dplyr::select(name) %>% unique %>% - tidyr::separate(sample, c("cell", "mark"), sep = "_", remove = FALSE) + tidyr::separate(name, c("cell", "mark"), sep = "_", remove = FALSE) ct2 = ChIPtsne2.from_tidy(prof_dt, query_gr, sample_metadata = meta_dt) @@ -22,7 +22,7 @@ colData(ct2) prof_dt1.true = getTidyProfile(ct2, meta_VARS = TRUE) test_that("Conversion", { - expect_setequal(colnames(prof_dt1), c("id", "x", "y", "sample")) - expect_setequal(colnames(prof_dt1.cell), c("id", "x", "y", "sample", "cell")) - expect_setequal(colnames(prof_dt1.true), c("id", "x", "y", "sample", "cell", "mark", "peak_MCF10A_CTCF", "peak_MCF10AT1_CTCF", "peak_MCF10CA1_CTCF")) + expect_setequal(colnames(prof_dt1), c("region", "position", "signal", "name")) + expect_setequal(colnames(prof_dt1.cell), c("region", "position", "signal", "name", "cell")) + expect_setequal(colnames(prof_dt1.true), c("region", "position", "signal", "name", "cell", "mark", "peak_MCF10A_CTCF", "peak_MCF10AT1_CTCF", "peak_MCF10CA1_CTCF")) }) diff --git a/tests/testthat/test_expression_based.R b/tests/testthat/test_expression_based.R index 5bec5ac..5fb0ed1 100644 --- a/tests/testthat/test_expression_based.R +++ b/tests/testthat/test_expression_based.R @@ -23,7 +23,7 @@ test_that("subsetRegions - works", { ct2_1 = subsetRegions(ct2, peak_MCF10AT1_CTCF == TRUE) expect_equal(dim(ct2_1), c(74, 3)) - ct2_2 = subsetRegions(ct2, id == "1") + ct2_2 = subsetRegions(ct2, region == "1") expect_equal(dim(ct2_2), c(1, 3)) ct2_history = ChIPtsne2.history(ct2_1) @@ -33,7 +33,7 @@ test_that("subsetRegions - works", { test_that("subsetSamples - works", { ct2_1 = subsetSamples(ct2, cell == "MCF10A") - ct2_2 = subsetSamples(ct2, sample == "MCF10A_CTCF") + ct2_2 = subsetSamples(ct2, name == "MCF10A_CTCF") expect_equal(dim(ct2_1), c(100, 1)) expect_equal(dim(ct2_2), c(100, 1)) @@ -54,7 +54,7 @@ test_that("mutateRegions - works", { expect_equal(dim(ct2_1), c(100, 3)) expect_equal(sum(rowData(ct2_1)$either_10a_or_at1), 99) - ct2_2 = mutateRegions(ct2, "silly", paste(id, peak_MCF10A_CTCF)) + ct2_2 = mutateRegions(ct2, "silly", paste(region, peak_MCF10A_CTCF)) expect_equal(dim(ct2_1), c(100, 3)) expect_equal(sum(rowData(ct2_1)$either_10a_or_at1), 99) @@ -78,7 +78,7 @@ test_that("mutateSamples - works", { test_that("separateRegions - works", { head(getRegionMetaData(ct2)) - ct2_1 = mutateRegions(ct2, "silly", paste(id, as.character(peak_MCF10A_CTCF))) + ct2_1 = mutateRegions(ct2, "silly", paste(region, as.character(peak_MCF10A_CTCF))) rowData(ct2_1) ct2_1 = separateRegions(ct2_1, "silly", sep = " ", into = c("id2", "peak2")) rowData(ct2_1) @@ -89,7 +89,7 @@ test_that("separateRegions - works", { }) test_that("separateSamples - works", { - ct2_1 = separateSamples(ct2, col = "sample", into = c("cell2", "mark2")) + ct2_1 = separateSamples(ct2, col = "name", into = c("cell2", "mark2")) expect_equal(dim(ct2_1), c(100, 3)) colData(ct2_1) expect_equal(colData(ct2_1)$cell, c("MCF10A", "MCF10AT1", "MCF10CA1")) @@ -100,3 +100,4 @@ test_that("separateSamples - works", { ct2_history = ChIPtsne2.history(ct2_1) expect_equal(names(ct2_history)[[length(ct2_history)]], "separateSamples") }) + diff --git a/tests/testthat/test_history.R b/tests/testthat/test_history.R index a067ee1..b1e137a 100644 --- a/tests/testthat/test_history.R +++ b/tests/testthat/test_history.R @@ -7,9 +7,9 @@ query_gr = exampleQueryGR() query_gr = seqsetvis::prepare_fetch_GRanges_width(query_gr, win_size = 50) prof_dt = exampleProfDT() meta_dt = prof_dt %>% - dplyr::select(sample) %>% + dplyr::select(name) %>% unique %>% - tidyr::separate(sample, c("cell", "mark"), sep = "_", remove = FALSE) + tidyr::separate(name, c("cell", "mark"), sep = "_", remove = FALSE) ct2 = ChIPtsne2.from_tidy(prof_dt, query_gr, sample_metadata = meta_dt) ct2.dupe = ChIPtsne2.from_tidy(prof_dt, query_gr, sample_metadata = meta_dt) diff --git a/tests/testthat/test_normalize_RPM.R b/tests/testthat/test_normalize_RPM.R index c28ec1e..3c8ce51 100644 --- a/tests/testthat/test_normalize_RPM.R +++ b/tests/testthat/test_normalize_RPM.R @@ -6,13 +6,13 @@ library(testthat) query_gr = exampleQueryGR() prof_dt = exampleProfDT() meta_dt = prof_dt %>% - dplyr::select(sample) %>% + dplyr::select(name) %>% unique %>% - tidyr::separate(sample, c("cell", "mark"), sep = "_", remove = FALSE) + tidyr::separate(name, c("cell", "mark"), sep = "_", remove = FALSE) meta_dt$mapped_reads = seq(nrow(meta_dt)) meta_dt$test_mapped_reads = seq(nrow(meta_dt))*1000 map_read = meta_dt$mapped_reads -names(map_read) = meta_dt$sample +names(map_read) = meta_dt$name ct2.no_mr = ChIPtsne2.from_tidy(prof_dt, query_gr) ct2.mr = ChIPtsne2.from_tidy(prof_dt, query_gr, sample_metadata = meta_dt) @@ -39,3 +39,4 @@ test_that("normalizeSignalRPM", { names(map_read.bad) = NULL expect_error(normalizeSignalRPM(ct2.no_mr, mapped_reads_data = map_read.bad), "When mapped_reads_data is supplied, names must be set.") }) + diff --git a/tests/testthat/test_normalize_cap.R b/tests/testthat/test_normalize_cap.R index a3ae5b9..4d0c3a0 100644 --- a/tests/testthat/test_normalize_cap.R +++ b/tests/testthat/test_normalize_cap.R @@ -6,13 +6,14 @@ library(testthat) query_gr = exampleQueryGR() prof_dt = exampleProfDT() meta_dt = prof_dt %>% - dplyr::select(sample) %>% + dplyr::select(name) %>% unique %>% - tidyr::separate(sample, c("cell", "mark"), sep = "_", remove = FALSE) + tidyr::separate(name, c("cell", "mark"), sep = "_", remove = FALSE) + meta_dt$cap_value = seq(nrow(meta_dt))*5 meta_dt$test_cap = seq(nrow(meta_dt)) cap_values = meta_dt$cap_value -names(cap_values) = meta_dt$sample +names(cap_values) = meta_dt$name ct2.no_mr = ChIPtsne2.from_tidy(prof_dt, query_gr) ct2.mr = ChIPtsne2.from_tidy(prof_dt, query_gr, sample_metadata = meta_dt) diff --git a/tests/testthat/test_samples_names.R b/tests/testthat/test_samples_names.R index e5966d2..1396837 100644 --- a/tests/testthat/test_samples_names.R +++ b/tests/testthat/test_samples_names.R @@ -25,7 +25,7 @@ ct2.agg = aggregateSamplesByGroup(ct2, group_VAR = "cell") colData(ct2.agg) getNameVariable(ct2) -ct2.sp = split(ct2, "sample") +ct2.sp = split(ct2, "name") ct2.10a = (ct2.sp$MCF10A_rep1 + ct2.sp$MCF10A_rep2) / 2 ct2.at1 = (ct2.sp$MCF10AT1_rep1 + ct2.sp$MCF10AT1_rep2) / 2 @@ -36,3 +36,4 @@ colnames(ct2.avg) = c("MCF10A", "MCF10AT1") ct2.avg = swapNameVariable(ct2.avg, "cell") colData(ct2.avg) + diff --git a/vignettes/chiptsne2_basics.Rmd b/vignettes/chiptsne2_basics.Rmd index 733b1d9..c0f9bf1 100644 --- a/vignettes/chiptsne2_basics.Rmd +++ b/vignettes/chiptsne2_basics.Rmd @@ -6,7 +6,7 @@ output: html_document --- ```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) +knitr::opts_chunk$set(echo = TRUE, fig.width = 3, fig.height = 3) ``` # Introduction @@ -23,13 +23,13 @@ knitr::opts_chunk$set(echo = TRUE) *chiptsne2* key features: -1. *ChIPtsne2* class implemented as an extension of the bioc *SummarizedExperiment* class. This provides greater usability right of the bat. +1. `ChIPtsne2` class implemented as an extension of the bioc `SummarizedExperiment` and, when appropriate, `RangedSummarizedExperiment` classes. This provides greater usability right of the bat. -2. Since you have to create a *ChIPtsne2* object, we can catch errors early and provide useful feedback to resolve them. +2. Since you have to create a `ChIPtsne2` object, we can catch errors early and provide useful feedback to resolve them. -3. All package methods are build for use with the [magrittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html) pipe, %>%. This enables concise and readable workflows. +3. All package methods are built for use with the [magrittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html) pipe, %>%. This enables concise and readable workflows. -4. *ChIPtsne2* objects maintain a history of all package methods applied to them. Importantly, the history from one object can be applied to any number of other objects. This allows for workflows to be developed and reproducibly deployed. +4. `ChIPtsne2` objects maintain a history of all package methods applied to them. Importantly, the history from one object can be applied to any number of other objects. This allows for workflows to be developed and reproducibly deployed. # Setting up @@ -43,6 +43,7 @@ library(tidyverse) # GenomicRanges is a primary Bioconductor package for manipulating genomic # intervals, ie. ChIP-seq peaks, TSSes etc. library(GenomicRanges) +# library(data.table) # Setting mc.cores allows chiptsne2 to use multiple threads and speed up many # functions options(mc.cores = 20) @@ -50,38 +51,76 @@ options(mc.cores = 20) # Quick start +*chiptsne2* provides some toy example data (100 regions in 3 samples) that we can quickly use to demonstrate it's primary functions. Most functions require a `ChIPtsne2` object, therefore we need to look at how to create those. + +## Creating the object + +There are several ways to create a `ChIPtsne2` object. These objects extend `SummarizedExperiment` and, when appropriate, `RangedSummarizedExperiment`. They contain signal profiles at arbitrary resolutions for one or more samples (columns) at potentially 10s of thousands of regions (rows). + +*chipstne2* will handle fetching signal profiles if create a `FetchConfig` object to tell it how to do so. + +```{r quick create ct2} +# the simplest way to create a FetchConfig is from a character vector of file paths. +bam_files = exampleBamFiles() +cfg_from_files = FetchConfig.from_files(bam_files) + +# query regions are stored in a GRanges object. +query_gr = exampleQueryGR() + +# this will take a bit of time to run as it has to retrieve signal profiles for the regions provided. +ct2_from_files = ChIPtsne2.from_FetchConfig(cfg_from_files, query_gr) +``` + +There are more powerful and flexible ways to create the `FetchConfig` and `ChIPtsne2` objects. See the full details [here](#create-chiptsne2-object). + +For now, we'll skip to using the example `ChIPtsne2` object from the package. + +```{r quick pkg ct2} +ct2 = exampleChIPtsne2.with_meta() +``` + +This is a barebones *chiptsne2* workflow. + +```{r quick workflow} +ct2 = ct2 %>% + dimReduceTSNE(perplexity = 25) %>% + groupRegionsBySignalCluster(group_VAR = "cluster_id", n_clusters = 3) %>% + groupRegionsByDimReduceCluster(group_VAR = "knn_id", nearest_neighbors = 15) +``` + +```{r quick heatmap} +plotSignalHeatmap(ct2) +``` + ```{r} -dir("../inst/extdata", pattern = "bam$") -dir("../inst/extdata") -exampleBamFiles() -exampleQueryGR() -exampleChIPtsne2() +plotSignalHeatmap(ct2, group_VARS = "cluster_id") ``` # Download full dataset -```{r} -library(data.table) -library(GenomicRanges) -url_dt = fread("../inst/extdata/example_data_paths.csv") -url_dt[, bw_file := paste0(cell, "_", mark, "_FE.bw")] -url_dt[, narrowPeak_file := paste0(cell, "_", mark, ".narrowPeak")] +```{r example data dl} +#chiptsne2 has URL paths to example bigwig and and peak files to use +url_df = exampleDataPaths() +#this is where data files will be downloaded to dl_dir = "ct2_data_download" dir.create(dl_dir, recursive = TRUE, showWarnings = FALSE) -url_dt[, bw_file := file.path(dl_dir, bw_file)] -url_dt[, narrowPeak_file := file.path(dl_dir, narrowPeak_file)] +#create final download paths +url_df = url_df %>% + mutate(bw_file = file.path(dl_dir, bw_file)) %>% + mutate(narrowPeak_file = file.path(dl_dir, narrowPeak_file)) -for(i in seq_len(nrow(url_dt))){ - message(url_dt$cell[i]) - if(!file.exists(url_dt$bw_file[i])){ - curl::curl_download(url_dt$bw_url[i], url_dt$bw_file[i]) +#use curl to download the example data +for(i in seq_len(nrow(url_df))){ + message(url_df$cell[i]) + if(!file.exists(url_df$bw_file[i])){ + curl::curl_download(url_df$bw_url[i], url_df$bw_file[i]) } - if(!file.exists(url_dt$narrowPeak_file[i])){ - curl::curl_download(url_dt$narrowPeak_url[i], url_dt$narrowPeak_file[i]) + if(!file.exists(url_df$narrowPeak_file[i])){ + curl::curl_download(url_df$narrowPeak_url[i], url_df$narrowPeak_file[i]) } } @@ -89,6 +128,14 @@ for(i in seq_len(nrow(url_dt))){ # Create ChIPtsne2 object +if you supply a data.frame linking these paths to meta data that will be useful later. +```{r} +bam_df = exampleBam_data.frame() +cfg_from_df = FetchConfig(bam_df) + +chiptsne2::ChIPtsne2.from_FetchConfig() +``` + ```{r} print(load("/slipstream/home/joeboyd/R/csaw_test/DB_csaw_results.AF.save")) diff_10a_to_at1 = GenomicRanges::GRanges(comb_res$CTCF$`from MCF10A to MCF10AT1`) @@ -98,7 +145,7 @@ diff_any = union(diff_10a_to_at1, diff_at1_to_ca1) ``` ```{r} -np_grs = seqsetvis::easyLoad_narrowPeak(url_dt$narrowPeak_file, url_dt$cell) +np_grs = seqsetvis::easyLoad_narrowPeak(url_df$narrowPeak_file, url_df$cell) seqsetvis::ssvFeatureUpset(np_grs) ``` @@ -122,9 +169,7 @@ qgr = c(qgr.diff, qgr.ns) ``` ```{r} -library(chiptsne2) - -cfg = FetchConfig(url_dt[, .(bw_file, cell, mark)], read_mode = "bigwig") +cfg = FetchConfig(url_df[, .(bw_file, cell, mark)], read_mode = "bigwig") ct2 = ChIPtsne2.from_FetchConfig(cfg, qgr) ct2 = mutateSamples(ct2, mutate_name = "name2", mutate_expression = paste(cell, mark)) ct2 = swapNameVariable(ct2, "name2")