|
41 | 41 | # DATA.frame BUILDERS --------------------------------------------------------- |
42 | 42 | # ----------------------------------------------------------------------------- |
43 | 43 | .makeDFfromSCO <- function(input.data, assay = "escape", gene.set = NULL, |
44 | | - group.by = NULL, split.by = NULL, facet.by = NULL) { |
| 44 | + group.by = NULL, split.by = NULL, facet.by = NULL, color.by = NULL) { |
45 | 45 | if (is.null(assay)) |
46 | 46 | stop("Please provide assay name") |
47 | | - cols <- unique(c(group.by, split.by, facet.by)) |
| 47 | + |
| 48 | + # Pull count matrix (features) and metadata |
48 | 49 | cnts <- .cntEval(input.data, assay = assay, type = "data") |
| 50 | + features <- rownames(cnts) |
| 51 | + meta <- .grabMeta(input.data) |
| 52 | + meta.cols <- colnames(meta) |
49 | 53 |
|
50 | | - if (length(gene.set) == 1 && gene.set == "all") |
51 | | - gene.set <- rownames(cnts) |
| 54 | + # All potential column-like arguments |
| 55 | + cols <- unique(c(group.by, split.by, facet.by, color.by)) |
52 | 56 |
|
53 | | - meta <- .grabMeta(input.data) |
54 | | - meta <- meta[, cols, drop = FALSE] |
| 57 | + # Check that each is either metadata or a feature |
| 58 | + bad.cols <- cols[!(cols %in% meta.cols | cols %in% features)] |
| 59 | + if (length(bad.cols) > 0) { |
| 60 | + stop("The following variables are not found in either metadata or features: ", paste(bad.cols, collapse = ", ")) |
| 61 | + } |
| 62 | + |
| 63 | + # Determine if color.by is a feature or meta |
| 64 | + is_feature_color <- !is.null(color.by) && color.by %in% features |
| 65 | + is_meta_color <- !is.null(color.by) && color.by %in% meta.cols |
55 | 66 |
|
| 67 | + # Prepare metadata subset |
| 68 | + meta <- meta[, intersect(cols, meta.cols), drop = FALSE] |
| 69 | + |
| 70 | + # Convert gene.set if "all" |
| 71 | + if (length(gene.set) == 1 && gene.set == "all") { |
| 72 | + gene.set <- features |
| 73 | + } |
| 74 | + |
| 75 | + # Build data frame with expression values |
56 | 76 | if (length(gene.set) == 1) { |
57 | 77 | df <- cbind(value = cnts[gene.set, ], meta) |
58 | 78 | colnames(df)[1] <- gene.set |
59 | 79 | } else { |
60 | 80 | df <- cbind(Matrix::t(cnts[gene.set, , drop = FALSE]), meta) |
61 | 81 | } |
62 | | - df |
| 82 | + |
| 83 | + # Add color.by feature expression if it's a gene but not in gene.set |
| 84 | + if (is_feature_color && !(color.by %in% gene.set)) { |
| 85 | + df[[color.by]] <- cnts[color.by, ] |
| 86 | + } |
| 87 | + |
| 88 | + return(df) |
63 | 89 | } |
64 | 90 |
|
65 | | -.prepData <- function(input.data, assay, gene.set, group.by, split.by, facet.by) { |
| 91 | + |
| 92 | +.prepData <- function(input.data, assay, gene.set, group.by, split.by, facet.by, color.by) { |
66 | 93 | if (.is_seurat_or_sce(input.data)) { |
67 | | - df <- .makeDFfromSCO(input.data, assay, gene.set, group.by, split.by, facet.by) |
| 94 | + df <- .makeDFfromSCO(input.data, assay, gene.set, group.by, split.by, facet.by, color.by) |
| 95 | + |
| 96 | + if (identical(gene.set, "all")) { |
| 97 | + meta_cols <- c(group.by, split.by, facet.by) |
| 98 | + # Do not remove color.by if it's also a feature |
| 99 | + non_gene_color <- if (!is.null(color.by) && color.by %in% colnames(df) && !(color.by %in% gene.set)) color.by else NULL |
| 100 | + gene.set <- setdiff(colnames(df), c(meta_cols, non_gene_color)) |
| 101 | + } |
| 102 | + |
| 103 | + } else { |
| 104 | + all.cols <- unique(c(gene.set, group.by, split.by, facet.by, color.by)) |
| 105 | + missing.cols <- setdiff(all.cols, colnames(input.data)) |
| 106 | + if (length(missing.cols) > 0) { |
| 107 | + stop("The following columns are missing in the input data: ", paste(missing.cols, collapse = ", ")) |
| 108 | + } |
| 109 | + |
68 | 110 | if (identical(gene.set, "all")) { |
69 | | - gene.set <- setdiff(colnames(df), c(group.by, split.by, facet.by)) |
| 111 | + gene.set <- setdiff(colnames(input.data), c(group.by, split.by, facet.by, color.by)) |
70 | 112 | } |
71 | | - } else { # assume plain data.frame / matrix |
72 | | - if (identical(gene.set, "all")) |
73 | | - gene.set <- setdiff(colnames(input.data), c(group.by, split.by, facet.by)) |
74 | | - df <- input.data[, c(gene.set, group.by, split.by, facet.by), drop = FALSE] |
| 113 | + |
| 114 | + df <- input.data[, unique(c(gene.set, group.by, split.by, facet.by, color.by)), drop = FALSE] |
75 | 115 | } |
76 | | - colnames(df) <- c(gene.set, group.by, split.by, facet.by) |
77 | | - df |
| 116 | + |
| 117 | + return(df) |
78 | 118 | } |
79 | 119 |
|
80 | 120 | # ----------------------------------------------------------------------------- |
@@ -443,4 +483,24 @@ utils::globalVariables(c( |
443 | 483 | "gene.set.query", "index" |
444 | 484 | )) |
445 | 485 |
|
| 486 | +# helper to match summary function |
| 487 | +.match_summary_fun <- function(fun) { |
| 488 | + if (is.function(fun)) return(fun) |
| 489 | + if (!is.character(fun) || length(fun) != 1) |
| 490 | + stop("'summary.stat' must be a single character keyword or a function") |
| 491 | + kw <- tolower(fun) |
| 492 | + fn <- switch(kw, |
| 493 | + mean = base::mean, |
| 494 | + median = stats::median, |
| 495 | + sum = base::sum, |
| 496 | + sd = stats::sd, |
| 497 | + max = base::max, |
| 498 | + min = base::min, |
| 499 | + geometric = function(x) exp(mean(log(x + 1e-6))), |
| 500 | + stop("Unsupported summary keyword: ", fun)) |
| 501 | + |
| 502 | + # Attach keyword as attribute |
| 503 | + attr(fn, "keyword") <- kw |
| 504 | + fn |
| 505 | +} |
446 | 506 |
|
0 commit comments