From c606e6219ef7d8415789a3755ddc3a624fd0011d Mon Sep 17 00:00:00 2001 From: emsonder Date: Mon, 21 Jul 2025 21:35:52 +0200 Subject: [PATCH 1/4] Fix: Subsetting by tf-name --- R/contextTfFeatures.R | 12 ++++++------ R/getFeatureMatrix.R | 7 ++++--- tests/testthat/test-contextTfFeatures.R | 12 ++++++++++++ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/R/contextTfFeatures.R b/R/contextTfFeatures.R index dd0ec92..74e95b0 100644 --- a/R/contextTfFeatures.R +++ b/R/contextTfFeatures.R @@ -45,6 +45,7 @@ contextTfFeatures <- function(mae, ...){ .checkObject(mae, checkFor=c("site", "context", "tf"), tfName=tfName) + tf <- tfName whichCol <- match.arg(whichCol, choices=c("All", "OnlyTrain", "Col")) whichContexts <- fifelse(addLabels, "Both", "ATAC") @@ -79,7 +80,7 @@ contextTfFeatures <- function(mae, features <- unique(c(features, "Inserts")) tfCofactors <- unique(unlist(subset(colData(maeSub[[TFFEAT]]), - get(TFNAMECOL)==tfName)[[TFCOFACTORSCOL]])) + get(TFNAMECOL)==tf)[[TFCOFACTORSCOL]])) if(("Cofactor_ChromVAR_Scores" %in% features) & is.null(tfCofactors)){ msg <- c("No cofactors have been specified when computing transcription ", @@ -95,14 +96,13 @@ contextTfFeatures <- function(mae, names(atacFragPaths) <- names(atacFragFilePaths) # get list of motif ranges, this will eventually be refactored anyways - motifPath <- subset(colData(maeSub[[MOTIFEXP]]), - get(MOTIFNAMECOL)==tfName)$origin + motifPath <- subset(colData(maeSub[[MOTIFEXP]]), get(MOTIFNAMECOL)==tf)$origin baseDir <- metadata(colData(maeSub[[MOTIFEXP]]))[[BASEDIRCOL]] motifRanges <- readRDS(file.path(baseDir, motifPath)) if(addLabels){ - colDataChIP <- colData(mae[[CHIPEXP]]) - colDataChIP <- subset(colDataChIP, get(TFNAMECOL)==tfName) + colDataChIP <- as.data.table(colData(mae[[CHIPEXP]])) + colDataChIP <- subset(colDataChIP, get(TFNAMECOL)==tf) labelCols <- colDataChIP$combination names(labelCols) <- colDataChIP[[annoCol]] labels <- lapply(labelCols, function(col){ @@ -194,7 +194,7 @@ contextTfFeatures <- function(mae, warning("ChromVAR activity estimates can not be added if tfFeatures() with `Associated_Motif_Activity` and panContextFeatures() have not been called before")} else{ selActMotifs <- unlist(subset(colData(mae[[TFFEAT]]), - get(TFNAMECOL)==tfName)[[PRESELACTCOL]]) + get(TFNAMECOL)==tf)[[PRESELACTCOL]]) devMat <- t(assays(mae[[ACTEXP]])[[NORMDEVASSAY]][selActMotifs, contexts, drop=FALSE]) devMat <- as.matrix(devMat) diff --git a/R/getFeatureMatrix.R b/R/getFeatureMatrix.R index 6df3395..ebab494 100644 --- a/R/getFeatureMatrix.R +++ b/R/getFeatureMatrix.R @@ -136,6 +136,7 @@ getFeatureMatrix <- function(mae, .checkObject(mae, checkFor=c("site", "context", "tf", "tf-context"), tfName=tfName) + tf <- tfName norm <- match.arg(norm, choices=c("robust", "min-max", "column", "none")) @@ -166,17 +167,17 @@ getFeatureMatrix <- function(mae, # get the cofactors tfCofactors <- unique(unlist(subset(colData(mae[[TFFEAT]]), - get(TFNAMECOL)==tfName)[[TFCOFACTORSCOL]])) + get(TFNAMECOL)==tf)[[TFCOFACTORSCOL]])) message("Attaching Site & TF-Features") selMotifs <- subset(colData(mae[[TFFEAT]]), - get(TFNAMECOL)==tfName)[[PRESELMOTIFCOL]] + get(TFNAMECOL)==tf)[[PRESELMOTIFCOL]] selMotifs <- unlist(selMotifs) motifMat <- assays(mae[[MOTIFEXP]])[[MATCHASSAY]][,selMotifs,drop=FALSE] colnames(motifMat) <- paste(TFFEAT, MOTIFFEATNAME, names(selMotifs), sep="_") selActMotifs <- subset(colData(mae[[TFFEAT]]), - get(TFNAMECOL)==tfName)[[PRESELACTCOL]] + get(TFNAMECOL)==tf)[[PRESELACTCOL]] selActMotifs <- unlist(selActMotifs) actMat <- assays(mae[[ASSOCEXP]])[[ASSOCASSAY]][,selActMotifs,drop=FALSE] colnames(actMat) <- paste(TFFEAT, ACTASSOCFEATNAME, names(selActMotifs), sep="_") #TODO: should be saved with actual motif name or name like names(selActMotifs) diff --git a/tests/testthat/test-contextTfFeatures.R b/tests/testthat/test-contextTfFeatures.R index 5d1c4b6..fe727a9 100644 --- a/tests/testthat/test-contextTfFeatures.R +++ b/tests/testthat/test-contextTfFeatures.R @@ -27,6 +27,18 @@ test_that("Context-TF-features: Correct training context selection", { expect_equal(rownames(colData(maeTest[[CONTEXTTFFEAT]])), "K562_CTCF") }) +test_that("Context-TF-features: Correct labelling",{ + maeTest <- tfFeatures(maeTest, tfName="JUN", tfCofactors="CTCF", + features="Binding_Patterns") + maeTest <- contextTfFeatures(maeTest, tfName="JUN", + whichCol="OnlyTrain", + features=c("Inserts", "Weighted_Inserts")) + expect_equal(assays(maeTest[[CHIPEXP]])[[PEAKASSAY]][,"K562_JUN", + drop=TRUE], + assays(maeTest[[CONTEXTTFFEAT]])[[LABELCOLNAME]][,"K562_JUN", + drop=TRUE]) +}) + test_that("Assays are preserved when computing for new TF", { assayNamesOrig <- names(assays(maeTest[[CONTEXTTFFEAT]])) maeTest <- tfFeatures(maeTest, tfName="JUN", From 869e96aa1ffec4ee64b1153de8e89bbe8e04cea5 Mon Sep 17 00:00:00 2001 From: emsonder Date: Mon, 21 Jul 2025 22:06:24 +0200 Subject: [PATCH 2/4] Switch aggregation strategy --- R/tfFeatures.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/tfFeatures.R b/R/tfFeatures.R index 748c985..2e5f715 100644 --- a/R/tfFeatures.R +++ b/R/tfFeatures.R @@ -202,8 +202,7 @@ if(length(tfCofactors)>0){ cofactBindings <- lapply(tfCofactors, function(tfCol){ - cofactBinding <- Matrix::Matrix( - Matrix::rowMeans(chIPMat[,tfCols==tfCol, drop=FALSE]), ncol=1) + cofactBinding <- .aggregate(chIPMat[,tfCols==tfCol, drop=FALSE], aggVar="tf") colnames(cofactBinding) <- paste(COBINDFEATNAME, tfCol, sep=".") cofactBinding}) names(cofactBindings) <- paste(COBINDFEATNAME, From df8986f53aa85c164b8516671174af19b97d161a Mon Sep 17 00:00:00 2001 From: emsonder Date: Mon, 21 Jul 2025 22:25:01 +0200 Subject: [PATCH 3/4] Fix: Naming of columns/assays --- R/tfFeatures.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/tfFeatures.R b/R/tfFeatures.R index 2e5f715..c0bd35a 100644 --- a/R/tfFeatures.R +++ b/R/tfFeatures.R @@ -358,7 +358,7 @@ tfFeatures <- function(mae, atacMat <- .convertToMatrix(assays(mae[[ATACEXP]])[[TOTALOVERLAPSFEATNAME]]) colnames(atacMat) <- colnames(mae[[ATACEXP]]) whichCol <- which(mae[[CHIPEXP]][[TFNAMECOL]]!=tfName) - chIPMat <- as(assays(mae[[CHIPEXP]])$peaks[,whichCol],"CsparseMatrix") + chIPMat <- as(assays(mae[[CHIPEXP]])[[PEAKASSAY]][,whichCol],"CsparseMatrix") colnames(chIPMat) <- paste(colData(mae[[CHIPEXP]])[whichCol,annoCol], colData(mae[[CHIPEXP]])[whichCol,TFNAMECOL], sep="_") @@ -396,11 +396,12 @@ tfFeatures <- function(mae, ATACPROMEXP %in% names(experiments(mae))){ message("Promoter association Features") - isProm <- which(rowData(mae[[ATACPROMEXP]])[[TFNAMECOL]]==tfName) + # TODO: Adapt the naming (in prepData.R) + isProm <- which(rowData(mae[[ATACPROMEXP]])[["tf_name"]]==tfName) atacPromMat <- atacPromMat[isProm,,drop=FALSE] promAsc <- .getAssociation(atacMat, atacPromMat) - colnames(promAsc) <- paste(promoterPrefix, colnames(promAsc), sep="_") + colnames(promAsc) <- paste(PROMOTERAFFIX, colnames(promAsc), sep="_") colNamesPromAsc <- colnames(promAsc) promAsc <- lapply(colNamesPromAsc, function(col) promAsc[,col,drop=FALSE]) From 592919c65149590b535991796a55711b1c435a88 Mon Sep 17 00:00:00 2001 From: emsonder Date: Tue, 22 Jul 2025 15:05:16 +0200 Subject: [PATCH 4/4] Increased version number: Mostly naming variable/subsetting fixes --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d951e0d..2a5e2b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TFBlearner Title: Functionality for training TF-specific classifiers to predict TF bindings based on ATAC-seq data. -Version: 0.0.1.0000 +Version: 0.0.1.0001 Authors@R: person("Emanuel", "Sonder", , "emanuel.sonder@hest.ethz.ch", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4788-9508"))