diff --git a/DESCRIPTION b/DESCRIPTION
index 0be95e9..bb73ede 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: MotrpacRatTraining6moWAT
Type: Package
Title: Helper Functions for the MotrpacRatTraining6moWATData Package
-Version: 0.0.0.9000
+Version: 0.1.0
Authors@R:
c(
person(given = "Tyler", family = "Sagendorf",
@@ -10,7 +10,7 @@ Authors@R:
role = c("aut", "cre")),
person(given = "Zhenxin", family = "Hou",
comment = c(ORCID = "0000-0002-1301-0799",
- "Provided WGCNA-related code."),
+ "Provided foundation for run_WGCNA function."),
role = "ctb"),
person(given = "Vladislav", family = "Petyuk",
comment = c(ORCID = "0000-0003-4076-151X",
@@ -23,26 +23,21 @@ License: MIT + file LICENSE
Encoding: UTF-8
Depends:
R (>= 3.5.0),
- MSnbase
+ Biobase
Imports:
BiocFileCache,
circlize,
ComplexHeatmap (>= 2.12.0),
- cowplot,
data.table,
dynamicTreeCut,
edgeR,
fgsea,
- ggbeeswarm,
- ggplot2,
graphics,
grDevices,
latex2exp,
limma,
msigdbr (>= 7.5.1),
ontologyIndex,
- patchwork,
- scales,
statmod,
stats,
tibble,
diff --git a/NAMESPACE b/NAMESPACE
index 5223312..0d16cdd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,37 +3,25 @@
export(enrichmat)
export(fgsea2)
export(fora2)
-export(get_ranking)
-export(ggplotMDS)
export(label_signif)
export(limma_full)
export(msigdbr2)
-export(plot_ORA)
-export(plot_analyte)
-export(plot_eigenfeature)
-export(plot_upset)
-export(plot_volcano)
-export(pval_hist)
export(range_extend)
+export(rank_genes)
export(run_WGCNA)
-export(theme_pub)
export(update_GO_names)
import(ComplexHeatmap)
-import(ggplot2)
import(limma)
import(statmod)
+importFrom(Biobase,exprs)
+importFrom(Biobase,fData)
+importFrom(Biobase,pData)
importFrom(BiocFileCache,BiocFileCache)
importFrom(BiocFileCache,bfcadd)
importFrom(BiocFileCache,bfccount)
importFrom(BiocFileCache,bfcnew)
importFrom(BiocFileCache,bfcquery)
importFrom(BiocFileCache,bfcrpath)
-importFrom(MSnbase,`fData<-`)
-importFrom(MSnbase,`pData<-`)
-importFrom(MSnbase,exprs)
-importFrom(MSnbase,fData)
-importFrom(MSnbase,featureNames)
-importFrom(MSnbase,pData)
importFrom(WGCNA,TOMsimilarity)
importFrom(WGCNA,adjacency)
importFrom(WGCNA,bicor)
@@ -44,12 +32,9 @@ importFrom(WGCNA,pickSoftThreshold)
importFrom(WGCNA,plotDendroAndColors)
importFrom(WGCNA,standardColors)
importFrom(circlize,colorRamp2)
-importFrom(cowplot,get_legend)
-importFrom(cowplot,plot_grid)
importFrom(data.table,`.N`)
importFrom(data.table,`.SD`)
importFrom(data.table,`:=`)
-importFrom(data.table,as.data.table)
importFrom(data.table,dcast)
importFrom(data.table,melt)
importFrom(data.table,rbindlist)
@@ -57,7 +42,6 @@ importFrom(data.table,setDF)
importFrom(data.table,setDT)
importFrom(data.table,setkeyv)
importFrom(data.table,setnames)
-importFrom(data.table,setorder)
importFrom(data.table,setorderv)
importFrom(dynamicTreeCut,cutreeDynamic)
importFrom(edgeR,DGEList)
@@ -66,8 +50,6 @@ importFrom(edgeR,cpm)
importFrom(edgeR,filterByExpr)
importFrom(fgsea,fgseaMultilevel)
importFrom(fgsea,fora)
-importFrom(ggbeeswarm,geom_quasirandom)
-importFrom(ggbeeswarm,position_beeswarm)
importFrom(grDevices,bmp)
importFrom(grDevices,dev.off)
importFrom(grDevices,jpeg)
@@ -83,13 +65,9 @@ importFrom(grid,grid.circle)
importFrom(grid,grid.rect)
importFrom(grid,unit)
importFrom(latex2exp,TeX)
-importFrom(limma,plotMDS)
importFrom(msigdbr,msigdbr)
importFrom(msigdbr,msigdbr_collections)
importFrom(ontologyIndex,get_OBO)
-importFrom(patchwork,wrap_plots)
-importFrom(scales,breaks_extended)
-importFrom(scales,extended_breaks)
importFrom(stats,as.dist)
importFrom(stats,cor)
importFrom(stats,hclust)
diff --git a/R/enrichmat.R b/R/enrichmat.R
index 89910ef..db10fdf 100644
--- a/R/enrichmat.R
+++ b/R/enrichmat.R
@@ -250,7 +250,7 @@ layer_fun <- function(j, i, x, y, w, h, f)
apply(rmat, MARGIN = margin, max, na.rm = TRUE),
FUN = "/")
} else {
- rmat <- rmat/max(rmat, na.rm = TRUE)
+ rmat <- rmat / max(rmat, na.rm = TRUE)
}
grid.circle(
@@ -271,11 +271,14 @@ heatmap_color_fun <- function(NES_mat,
NES_mat <- NES_mat[!is.na(NES_mat)]
extended_range <- range_extend(NES_mat, nearest = 0.1)
- if (all(sign(NES_mat) %in% c(0, +1))) {
- breaks <- c(0, 1, extended_range[2])
+ # if (all(sign(NES_mat) %in% c(0, +1))) {
+ if (all(NES_mat >= -1)) {
+ breaks <- c(-1, 1, extended_range[2])
colors <- c("white", "white", colors[2])
- } else if (all(sign(NES_mat) %in% c(0, -1))) {
- breaks <- c(extended_range[1], -1, 0)
+ }
+ # } else if (all(sign(NES_mat) %in% c(0, -1))) {
+ else if (all(NES_mat <= +1)) {
+ breaks <- c(extended_range[1], -1, 1)
colors <- c(colors[1], "white", "white")
} else {
breaks <- c(extended_range[1], -1, 1, extended_range[2])
diff --git a/R/fgsea2.R b/R/fgsea2.R
index 71a2821..0b12e30 100644
--- a/R/fgsea2.R
+++ b/R/fgsea2.R
@@ -4,14 +4,14 @@
#' output of \code{\link{msigdbr2}}.
#'
#' @param pathways output of \code{\link{msigdbr2}}.
-#' @param stats output of \code{\link{get_ranking}}.
-#' @param gene_column character string; the name of a column in \code{pathways}
+#' @param stats output of \code{\link{rank_genes}}.
+#' @param gene_column character; the name of a column in \code{pathways}
#' containing the genes in each pathway.
-#' @param adjust.method character string; the p-value correction method. Can be
+#' @param adjust.method character; the p-value correction method. Can be
#' abbreviated. See \code{\link[stats]{p.adjust.methods}}.
#' @param adjust.globally logical; should p-values from all contrasts be
-#' adjusted together using \code{adjust.method}? Set to \code{FALSE} if the contrasts
-#' being tested are not closely related.
+#' adjusted together using \code{adjust.method}? Set to \code{FALSE} if the
+#' contrasts being tested are not closely related.
#' @param seed numeric or \code{NULL}; passed to \code{set.seed}.
#' @param ... additional arguments passed to
#' \code{\link[fgsea]{fgseaMultilevel}}.
@@ -60,7 +60,6 @@ fgsea2 <- function(pathways,
res <- rbindlist(res)
setDT(res)
-
pathways <- pathways[, list(gs_subcat, gs_exact_source, gs_description)]
res <- merge(res, pathways, sort = FALSE,
@@ -68,7 +67,10 @@ fgsea2 <- function(pathways,
# p-value adjustment
by <- "gs_subcat"
- if (!adjust.globally) { by <- c(by, "contrast") }
+ if (!adjust.globally) {
+ by <- c(by, "contrast")
+ }
+
res[, padj := p.adjust(pval, method = adjust.method), by = by]
return(res)
diff --git a/R/fora2.R b/R/fora2.R
index a28c4c3..daaf43d 100644
--- a/R/fora2.R
+++ b/R/fora2.R
@@ -3,8 +3,10 @@
#' @description Custom wrapper for \code{\link[fgsea]{fora}}.
#'
#' @param pathways output of \code{link{msigdbr2}}.
-#' @param genes named list of genes.
-#' @param universe character vector of background genes.
+#' @param genes named list of genes. These are usually clusters of related
+#' genes.
+#' @param universe character vector of background genes. Usually, this is all
+#' unique elements of `genes`.
#' @param gene_column character; the name of the column in pathways containing
#' the elements of each gene set.
#' @param minSize integer; minimum size of gene sets allowed for testing.
@@ -16,6 +18,18 @@
#' adjusted together using \code{adjust.method}? Set to \code{FALSE} if the
#' contrasts being tested are not closely related.
#'
+#' @details If \code{adjust.method = "scale"}, the function will calculate the
+#' maximum overlap for each combination of \code{names(genes)} and (if
+#' \code{adjust.globally = TRUE}), the entries of the "gs_subcat" column of
+#' \code{pathways}. For each gene set, it will then calculate the ratio of the
+#' size of that set's overlap to one plus the maximum overlap. The addition of
+#' 1 in the denominator is to penalize small maximum overlaps. The
+#' \code{log10(pval)} is multiplied by this overlap ratio and then
+#' back-transformed to obtain p-values adjusted by how well they describe each
+#' group defined by \code{genes}.
+#'
+#' @seealso \code{\link[fgsea]{fora}}
+#'
#' @importFrom stats p.adjust p.adjust.methods
#' @importFrom data.table setDT rbindlist setorderv `:=`
#' @importFrom tibble deframe
@@ -30,7 +44,7 @@ fora2 <- function(pathways,
universe,
gene_column = "entrez_gene",
minSize = 1,
- maxSize = Inf,
+ maxSize = length(universe) - 1,
adjust.method = "scale",
adjust.globally = TRUE)
{
@@ -63,13 +77,15 @@ fora2 <- function(pathways,
# p-value adjustment
by <- "module"
- if (!adjust.globally) { by <- c(by, "gs_subcat") }
+ if (!adjust.globally) {
+ by <- c(by, "gs_subcat")
+ }
# Transform p-values to account for overlap ratio
if (adjust.method == "scale") {
res[, maxOverlap := max(overlap), by = by]
- res[, overlapRatio := overlap / maxOverlap]
- res[, padj := 10^(log10(pval) * overlapRatio)]
+ res[, overlapRatio := overlap / (maxOverlap + 1)]
+ res[, padj := 10 ^ (log10(pval) * overlapRatio)]
} else {
res[, padj := p.adjust(pval, method = adjust.method), by = by]
}
diff --git a/R/ggplotMDS.R b/R/ggplotMDS.R
deleted file mode 100644
index 6a57198..0000000
--- a/R/ggplotMDS.R
+++ /dev/null
@@ -1,44 +0,0 @@
-#' @title MDS plots with ggplot2
-#'
-#' @description A \code{ggplot2} version of \code{\link[limma]{plotMDS}}
-#' tailored to the MoTrPAC PASS1B data. Samples are labeled by the number of
-#' weeks of exercise training (0, 1, 2, 4, 8) and colored according to sex.
-#'
-#' @param object object of class \code{link[MSnbase:MSnSet-class]{MSnSet}}.
-#'
-#' @return A \code{\link[ggplot2]{ggplot}} object.
-#'
-#' @import ggplot2
-#' @importFrom limma plotMDS
-#' @importFrom MSnbase exprs
-#'
-#' @export ggplotMDS
-
-ggplotMDS <- function(object) {
- mds.obj <- plotMDS(exprs(object), plot = F)
- var_expl <- mds.obj[["var.explained"]]
- var_expl <- sprintf("Leading logFC dim %g (%g%%)",
- seq_along(var_expl), round(100 * var_expl))[1:2]
-
- point_label <- ifelse(object$timepoint == "SED", "0",
- sub("W", "", object$timepoint))
-
- p <- ggplot(mapping = aes(x = mds.obj$x, y = mds.obj$y)) +
- labs(x = var_expl[1], y = var_expl[2]) +
- geom_text(aes(label = point_label, color = object$sex),
- size = 2.46) +
- scale_color_manual(name = "Sex",
- values = c("#ff6eff", "#3366ff"),
- breaks = c("Female", "Male")) +
- labs(subtitle = "Samples labeled by weeks of ExT") +
- theme_pub() +
- theme(axis.line.y.right = element_blank(),
- strip.text = element_text(hjust = 0,
- margin = margin(t=0, r=0, b=5, l=0)),
- legend.key.size = unit(6, "pt"),
- legend.margin = margin(r = 0, l = 0),
- panel.grid.minor = element_line(linewidth = 0))
-
- return(p)
-}
-
diff --git a/R/globals.R b/R/globals.R
index 9abbf4f..3c93510 100644
--- a/R/globals.R
+++ b/R/globals.R
@@ -42,7 +42,8 @@ utils::globalVariables(
"moduleNum",
"ME",
"plotlist",
- "x"
+ "x",
+ "rank_metric"
)
)
diff --git a/R/limma_full.R b/R/limma_full.R
index b966c6d..aadafe7 100644
--- a/R/limma_full.R
+++ b/R/limma_full.R
@@ -3,9 +3,10 @@
#' @description A convenience wrapper for differential analysis with LIMMA.
#' Performs moderated t-tests, F-tests, or linear regression.
#'
-#' @param object Object of class \code{\link[MSnbase:MSnSet-class]{MSnSet}}. The
-#' \code{exprs} slot can be either a matrix of log\eqn{_2} relative abundances
-#' or counts. If the latter, the limma--edgeR pipeline
+#' @param object Object of class
+#' \code{\link[Biobase:ExpressionSet-class]{ExpressionSet}}. The \code{exprs}
+#' slot can be either a matrix of log\eqn{_2} relative abundances or counts.
+#' If the latter, the limma--edgeR pipeline
#' (\url{https://doi.org/10.12688/f1000research.9005.3}) will automatically be
#' used.
#' @param model.str character; formulation of the model (e.g. \code{"~ a + b"}).
@@ -30,12 +31,11 @@
#' weight of 1). Weights affect the logFC and P.Value columns of the results.
#' @param block \code{NULL} or character; name of a column in
#' \code{pData(object)} specifying a blocking variable. Passed to
-#' \code{\link[limma]{duplicateCorrelation}}. See
-#' \href{https://support.bioconductor.org/p/125489/#125602}{When to use
-#' \code{duplicateCorrelation}}. Section 9.7 "Multi-level Experiments" of the
-#' \href{https://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf}{LIMMA
-#' User's Guide} explains when to use \code{duplicateCorrelation}. Currently
-#' ignored if \code{exprs(object)} is a matrix of counts.
+#' \code{\link[limma]{duplicateCorrelation}}. Section 9.7 "Multi-level
+#' Experiments" of the LIMMA User's Guide (see
+#' \code{\link[limma]{limmaUsersGuide}}) explains when to use
+#' \code{duplicateCorrelation}. Currently ignored if \code{exprs(object)} is a
+#' matrix of counts.
#' @param plot logical; whether to generate diagnostic plots. If \code{TRUE},
#' generates a barplot of weights applied to each sample and a plot of the
#' mean-variance trend using \code{\link[limma]{plotSA}}.
@@ -44,24 +44,23 @@
#' See \code{\link[stats]{p.adjust}} for details.
#' @param adjust.globally logical; should p-values from all contrasts be
#' adjusted together using \code{adjust.method}? Set to \code{FALSE} if the
-#' contrasts being tested are not closely related. See the "Multiple Testing
-#' Across Contrasts" section of the LIMMA User's Guide (linked in "See Also")
-#' for more information.
+#' contrasts being tested are not closely related. See Section 13.3 "Multiple
+#' Testing Across Contrasts" of the LIMMA User's Guide
+#' (\code{\link[limma]{limmaUsersGuide}}) for more information.
#' @param ... additional arguments passed to \code{\link[limma]{plotSA}}.
#'
#' @return \code{data.frame}. Output of \code{\link[limma]{topTable}} with
#' additional columns \code{feature}, \code{contrast}, and column(s) for the
-#' standard error (se) of the logFC. All columns from \code{fData} are also
+#' standard error (SE) of the logFC. All columns from \code{fData} are also
#' included.
#'
-#'
-#' @details An MDS plot (produced by \code{\link[limma]{plotMDS}}) is used to
-#' determine the appropriate value of \code{var.group}. If samples within
-#' phenotype groups tend to cluster well and have similar dispersions, the
-#' default \code{var.group = character(0)} is recommended. If samples within
-#' phenotype groups tend to cluster well, but different groups are more or
-#' less dispersed, it may be beneficial to set \code{var.group} to the name of
-#' the phenotype column. If one or more samples tend to cluster poorly with
+#' @details An MDS plot (\code{\link[limma]{plotMDS}}) is used to determine the
+#' appropriate value of \code{var.group}. If samples within phenotype groups
+#' tend to cluster well and have similar dispersions, the default
+#' \code{var.group = character(0)} is recommended. If samples within phenotype
+#' groups tend to cluster well, but different groups are more or less
+#' dispersed, it may be beneficial to set \code{var.group} to the name of the
+#' phenotype column. If one or more samples tend to cluster poorly with
#' samples of the same phenotype, it may be beneficial to weight by sample.
#' That is, set \code{var.group} to the name of the column in
#' \code{pData(object)} that uniquely identifies each sample. If variation
@@ -78,6 +77,9 @@
#' \href{https://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf}{LIMMA
#' User's Guide}
#'
+#' \href{https://support.bioconductor.org/p/125489/#125602}{When to use
+#' \code{duplicateCorrelation}}
+#'
#' \href{https://online.stat.psu.edu/stat555/node/1/}{PennState STAT 555 -
#' Statistical Analysis of Genomics Data}
#'
@@ -117,7 +119,7 @@
#' annals of applied statistics, 10}(2), 946--963.
#' \url{https://doi.org/10.1214/16-AOAS920}
#'
-#' @importFrom MSnbase exprs pData
+#' @importFrom Biobase exprs pData
#' @importFrom stats terms model.matrix p.adjust
#' @importFrom data.table rbindlist
#' @importFrom graphics barplot abline
@@ -219,8 +221,7 @@ limma_full <- function(object,
# MDS plot
if (plot) {
- lcpm <- cpm(dge$counts, log = TRUE,
- prior.count = 0.5)
+ lcpm <- cpm(dge$counts, log = TRUE)
plotMDS(lcpm, label = dge$samples[[coef.str]])
}
diff --git a/R/msigdbr2.R b/R/msigdbr2.R
index b764dcf..69b5261 100644
--- a/R/msigdbr2.R
+++ b/R/msigdbr2.R
@@ -18,7 +18,7 @@
#' (term description), and a list-column of gene IDs specified by
#' \code{genes}.
#'
-#' @importFrom data.table rbindlist `.SD` `:=` setnames setDF
+#' @importFrom data.table rbindlist `.SD` `:=` setnames
#' @importFrom msigdbr msigdbr_collections msigdbr
#' @importFrom ontologyIndex get_OBO
#'
@@ -77,7 +77,6 @@ msigdbr2 <- function(species = "Homo sapiens",
# Update GO descriptions
paths <- update_GO_names(paths, capitalize = capitalize)
- setDF(paths)
return(paths)
}
diff --git a/R/plot_ORA.R b/R/plot_ORA.R
deleted file mode 100644
index 92a1da9..0000000
--- a/R/plot_ORA.R
+++ /dev/null
@@ -1,101 +0,0 @@
-#' @title Plot module ORA results
-#'
-#' @description Dotplot of the top over-represented terms by WGCNA module.
-#'
-#' @param x output of \code{\link[fgsea]{fora}} or \code{\link{fora2}}.
-#' @param n_terms integer; maximum number of top over-represented terms to
-#' display from each module.
-#' @param mods integer; the module numbers to include in the plot. By default,
-#' terms from modules 1-5 (the 5 largest modules) are shown.
-#' @param subset character; the gene set subcategory of terms to plot.
-#' Default is "GO:MF".
-#' @param rel_heights numeric; vector of length 2 specifying the relative
-#' heights of the plot and color legend, respectively. Default is c(0.8, 0.2).
-#'
-#' @return A \code{\link[ggplot2]{ggplot}} object.
-#'
-#' @import ggplot2
-#' @importFrom latex2exp TeX
-#' @importFrom cowplot get_legend plot_grid
-#' @importFrom scales breaks_extended
-#' @importFrom data.table setDT setorder `:=` `.SD`
-#'
-#' @export plot_ORA
-
-plot_ORA <- function(x,
- n_terms = 5, # number of top terms per module
- mods = 1:5, # module numbers to plot
- subset = "GO:MF",
- rel_heights = c(0.8, 0.2))
-{
- # TODO add input checks
-
- mods <- mods[mods %in% 1:nlevels(x$module)]
-
- setDT(x)
- x <- subset(x,
- subset = (gs_subcat %in% subset) &
- (padj < 0.05) &
- (module %in% levels(module)[mods]))
- # x <- subset(x[, , with = FALSE],
- # subset = (gs_subcat %in% gs_subcat) &
- # (padj < 0.05) & (module %in% levels(module)[mods]))
- setorder(x, module, padj, pval)
- x <- x[, head(.SD, n_terms), by = module]
- x[, row_labels := ifelse(nchar(gs_description) > 35 + nchar(pathway) + 5,
- sprintf("%s...(%s)",
- substr(gs_description, 1, 30),
- pathway),
- gs_description)]
- x[, row_labels := factor(row_labels, levels = rev(unique(row_labels)))]
-
- p1 <- ggplot(x) +
- geom_point(aes(x = module, y = row_labels,
- size = -log10(padj),
- color = overlapRatio)) +
- scale_size_area(name = latex2exp::TeX("$-log_{10}$(scaled p-value)"),
- breaks = scales::breaks_extended(),
- max_size = 3) +
- scale_color_viridis_c(name = "Overlap Ratio",
- direction = -1,
- limits = c(0, 1),
- breaks = seq(0, 1, 0.2)) +
- labs(x = NULL, y = NULL) +
- guides(
- size = guide_legend(title.position = "top",
- title.vjust = -0.5,
- keyheight = unit(5, "pt"),
- keywidth = unit(5, "pt"),
- title.theme = element_text(size = 6,
- margin = margin(b = 0)),
- order = 1),
- color = guide_colorbar(
- title.position = "top",
- label.theme = element_text(size = 5, angle = 45,
- hjust = 1, vjust = 1),
- title.theme = element_text(size = 6, margin = margin(r = 5)),
- barheight = unit(5, "pt"),
- barwidth = unit(60, "pt"),
- frame.colour = "black",
- ticks.colour = "black",
- order = 2)) +
- scale_y_discrete(position = "right") +
- theme_pub() +
- theme(line = element_line(linewidth = 0.3, color = "black"),
- axis.ticks = element_line(linetype = 0),
- axis.text.x = element_text(size = 6,
- angle = 90, hjust = 1, vjust = 0.5),
- axis.text.y.right = element_text(size = 6),
- legend.position = "bottom",
- legend.box.just = "left",
- legend.direction = "horizontal",
- legend.margin = margin(t = 4, r = 5, b = 4, l = 8),
- plot.margin = margin(t = 5, r = 5, b = 5, l = 10))
-
- p2 <- p1 + theme(legend.position = "none")
- le1 <- cowplot::get_legend(p1)
- p <- cowplot::plot_grid(p2, le1, nrow = 2, rel_heights = rel_heights)
-
- return(p)
-}
-
diff --git a/R/plot_analyte.R b/R/plot_analyte.R
deleted file mode 100644
index e36c823..0000000
--- a/R/plot_analyte.R
+++ /dev/null
@@ -1,40 +0,0 @@
-#' @title Visualize clinical analyte data
-#'
-#' @description Generate a scatterplot of clinical analyte values.
-#'
-#' @param x a \code{data.frame} with required columns value, sex, and timepoint.
-#' @param analyte character; the name of a column of \code{x}.
-#'
-#' @import ggplot2
-#' @importFrom ggbeeswarm geom_quasirandom
-#'
-#' @export plot_analyte
-
-
-## Function to plot a specific analyte ----
-plot_analyte <- function(x, analyte = "")
-{
- ggplot(x, aes(x = timepoint, y = !!sym(analyte), color = sex)) +
- stat_summary(fun.data = ~ exp(mean_se(log(.x))),
- show.legend = FALSE,
- geom = "crossbar", width = 0.8,
- na.rm = TRUE, fatten = 1, linewidth = 0.4) +
- scale_color_manual(values = c("#ff6eff", "#5555ff"),
- breaks = c("Female", "Male")) +
- guides(color = NULL) +
- geom_quasirandom(color = "black", width = 0.35, size = 0.4, na.rm = T) +
- facet_wrap(~ sex, scales = "free_x") +
- scale_x_discrete(name = NULL) +
- theme_pub() +
- theme(panel.grid = element_line(color = NULL),
- axis.ticks.x = element_blank(),
- axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
- axis.title.y = element_text(margin = margin(r = 2.5,
- unit = "pt")),
- strip.background = element_blank(),
- panel.spacing = unit(-1, "pt"),
- legend.position = "bottom",
- legend.direction = "horizontal",
- legend.margin = margin(t = -3, b = -3, unit = "pt"))
-}
-
diff --git a/R/plot_eigenfeature.R b/R/plot_eigenfeature.R
deleted file mode 100644
index 3dcfbca..0000000
--- a/R/plot_eigenfeature.R
+++ /dev/null
@@ -1,67 +0,0 @@
-#' @title Visualize WGCNA module eigenfeatures
-#'
-#' @description Create scatterplots of each module eigenfeature (principal
-#' eigenvectors).
-#'
-#' @param x output of \code{\link{run_WGCNA}}.
-#'
-#' @import ggplot2
-#' @importFrom scales breaks_extended
-#' @importFrom ggbeeswarm position_beeswarm
-#' @importFrom patchwork wrap_plots
-#' @importFrom data.table `:=` `.N` `.SD` as.data.table
-#'
-#' @export plot_eigenfeature
-
-plot_eigenfeature <- function(x)
-{
- # Module sizes
- mod_size <- as.data.table(x$modules)
- mod_size <- mod_size[, list(n = .N), by = moduleID]
-
- # Module MEs
- x <- as.data.table(x$MEs)
- x <- subset(x, subset = moduleNum != 0) # remove grey module
- x <- merge(x, mod_size, by = "moduleID", all.x = TRUE, all.y = FALSE)
- x[, moduleID := droplevels(moduleID)]
-
- # Combine plots
- plotlist <- lapply(levels(x$moduleID), function(mod_i) {
- xi <- subset(x, moduleID == mod_i)
-
- mod_i_size <- xi[["n"]][1] # module size
-
- set.seed(0)
- ggplot(xi, aes(x = timepoint, y = ME)) +
- stat_summary(fun.data = "mean_sdl",
- fun.args = list(mult = 1),
- mapping = aes(color = sex),
- geom = "crossbar", width = 0.7,
- fatten = 1, linewidth = 0.5) +
- geom_point(size = 0.4, color = "black", shape = 16,
- position = position_beeswarm(cex = 3, dodge.width = 0.34)) +
- facet_wrap(~ sex, nrow = 1) +
- labs(title = sprintf("%s (n = %d)", mod_i, mod_i_size),
- y = NULL, x = NULL) +
- scale_color_manual(name = "Sex",
- values = c("#ff6eff", "#5555ff")) +
- scale_y_continuous(breaks = breaks_extended()) +
- theme_pub() +
- theme(panel.grid.major.y = element_line(linewidth = 0),
- panel.grid.minor.y = element_line(linewidth = 0),
- panel.grid.major.x = element_blank(),
- axis.ticks.x = element_blank(),
- axis.text.x = element_text(angle = 90,
- hjust = 1, vjust = 0.5,
- margin = margin(t = 0)),
- panel.spacing = unit(0, "points"),
- strip.text = element_blank(),
- plot.margin = unit(rep(3, 4), units = "pt")
- )
- })
-
- p <- wrap_plots(plotlist, ncol = 4, guides = "collect")
-
- return(p)
-}
-
diff --git a/R/plot_upset.R b/R/plot_upset.R
deleted file mode 100644
index 09c8720..0000000
--- a/R/plot_upset.R
+++ /dev/null
@@ -1,138 +0,0 @@
-#' @title Create Annotated UpSet Plot
-#'
-#' @description Create an UpSet plot annotated with intersection and set sizes.
-#' Essentially a modified version of \code{\link[ComplexHeatmap]{UpSet}}.
-#'
-#' @inheritParams enrichmat
-#' @param x list; named list of elements in each set. Passed to
-#' \code{\link[ComplexHeatmap]{make_comb_mat}}.
-#' @param top_n_comb numeric; number of largest intersections to show. Default
-#' is 10.
-#' @param comb_title character; title of top barplot annotation. Default is
-#' "Intersection Size".
-#' @param set_title character; title of right barplot annotation. Default is
-#' "Set Size".
-#' @param scale numeric; scaling factor for all plot elements. Improves
-#' appearance. Default is 2.
-#' @param barplot_args list; additional arguments passed to
-#' \code{\link[ComplexHeatmap]{anno_barplot}}.
-#' @param filename character; the filename used to save the UpSet plot. If
-#' provided, the plot will not be displayed.
-#' @param height numeric; the save height.
-#' @param width numeric; the save width.
-#' @param units character; the units of \code{height} and \code{width}. Default
-#' is "in" (inches).
-#' @param upset_args list; additional arguments passed to
-#' \code{\link[ComplexHeatmap]{UpSet}}.
-#'
-#' @details See \code{\link[ComplexHeatmap]{UpSet}} for details and sections
-#' mentioned in Arguments.
-#'
-#' @import ComplexHeatmap
-#' @importFrom grid gpar unit
-#' @importFrom utils modifyList
-#' @importFrom grDevices dev.off
-#'
-#' @export plot_upset
-#'
-#' @references Lex, A., Gehlenborg, N., Strobelt, H., Vuillemot, R., & Pfister,
-#' H. (2014). UpSet: Visualization of Intersecting Sets. \emph{IEEE
-#' transactions on visualization and computer graphics, 20}(12), 1983--1992.
-#' \url{https://doi.org/10.1109/TVCG.2014.2346248}
-
-plot_upset <- function(x,
- top_n_comb = 10,
- comb_title = "Intersection\nSize",
- set_title = "Set Size",
- scale = 2,
- barplot_args = list(),
- filename = character(0),
- height = 2 * scale,
- width = 3.4 * scale,
- units = "in",
- save_args = list(),
- upset_args = list())
-{
- m <- make_comb_mat(x)
- cs <- comb_size(m)
- ss <- set_size(m)
- m <- m[, order(-cs)[1:min(length(cs), top_n_comb)]]
- cs <- comb_size(m)
- row_order <- seq_along(ss)
- column_order <- order(-cs)
-
- ## Base UpSet plot ----
- upset_args <- modifyList(
- x = list(
- m = m,
- row_names_gp = gpar(fontsize = 7*scale),
- width = length(cs)*unit(12, "pt")*scale,
- height = length(ss)*unit(12, "pt")*scale,
- lwd = 1.8*scale,
- pt_size = unit(7*scale, "pt"),
- comb_order = column_order,
- set_order = row_order,
- row_labels = set_name(m)
- ),
- val = upset_args, keep.null = TRUE)
-
- up <- do.call(what = UpSet,
- args = upset_args)
-
- ## Modify barplot annotations ----
- barplot_args <- modifyList(
- x = list(
- height = 6*unit(12, "pt")*scale,
- width = 6*unit(12, "pt")*scale,
- gp = gpar(fill = "black", color = "black"),
- add_numbers = TRUE,
- border = FALSE,
- numbers_gp = gpar(fontsize = 6*scale),
- axis = FALSE
- ),
- val = barplot_args, keep.null = TRUE)
-
- barplot_args[["row_names_max_width"]] <- max_text_width(
- text = upset_args[["row_labels"]],
- gp = upset_args[["row_names_gp"]]
- )
-
-
-
- if (!("top_annotation" %in% names(upset_args))) {
- up@top_annotation <- HeatmapAnnotation(
- comb_size = do.call(what = anno_barplot,
- args = c(list(x = cs), barplot_args)),
- annotation_name_gp = gpar(fontsize = 7*scale),
- annotation_name_rot = 0,
- annotation_label = comb_title,
- annotation_name_side = "left",
- height = unit(12, "pt")*6*scale
- )
- }
-
- if (!("right_annotation" %in% names(upset_args))) {
- up@right_annotation <- HeatmapAnnotation(
- set_size = do.call(what = anno_barplot,
- args = c(list(x = ss), barplot_args)),
- annotation_name_gp = gpar(fontsize = 7*scale),
- annotation_name_rot = 0,
- annotation_label = set_title,
- which = "row",
- width = unit(12, "pt")*4*scale
- )
- }
-
- ## Save plot ----
- if (!identical(filename, character(0))) {
- save_heatmap(filename = filename,
- height = height,
- width = width,
- units = units,
- save_args = save_args)
- on.exit(expr = dev.off())
- }
-
- draw(up)
-}
-
diff --git a/R/plot_volcano.R b/R/plot_volcano.R
deleted file mode 100644
index dff8a0a..0000000
--- a/R/plot_volcano.R
+++ /dev/null
@@ -1,100 +0,0 @@
-#' @title Create Volcano Plot
-#'
-#' @description Create a volcano plot.
-#'
-#' @param x a \code{matrix} or \code{data.frame} containing differential
-#' analysis results. Must include columns logFC, log10_pval, sign_logFC,
-#' contrast, and label.
-#' @param pval_cutoff numeric; cutoff for p-values to be considered significant.
-#' Adds a dashed horizontal line to the plot.
-#' @param colors character; length 3 vector of significantly negative,
-#' significantly positive, and non-significant (NS) logFC colors.
-#'
-#' @returns A \code{ggplot2} object.
-#'
-#' @md
-#'
-#' @import ggplot2
-#' @importFrom scales extended_breaks
-#' @importFrom data.table setDT `:=` `.N`
-#'
-#' @export plot_volcano
-
-plot_volcano <- function(x,
- pval_cutoff = 0.05,
- colors = c("#3366ff", "darkred", "grey"))
-{
- setDT(x)
-
- p <- ggplot(x, aes(x = logFC, y = log10_pval)) +
- geom_point(aes(color = sign_logFC),
- alpha = 0.5, size = 1, shape = 16) +
- scale_x_continuous(
- limits = range(x[["logFC"]], na.rm = T) * 1.1,
- expand = expansion(mult = 0),
- breaks = scales::extended_breaks(n = 5)
- ) +
- theme_pub() +
- theme(axis.line.y.right = element_blank(),
- strip.background = element_blank(),
- strip.text = element_text(hjust = 0,
- margin = margin(t=0, r=0, b=10, l=0)),
- legend.position = "none",
- panel.grid = element_line(linewidth = 0))
-
- # y-axis limits
- y_lims <- range_extend(x[["log10_pval"]], nearest = 1)
- # Use expand_limits instead
- # y_lims[1] <- 0
- # y_lims[2] <- max(3, y_lims[2])
-
- # Horizontal line and secondary axis for p-value cutoff
- p <- p +
- geom_hline(yintercept = -log10(pval_cutoff),
- linewidth = 0.3,
- lty = "dashed") +
- scale_y_continuous(
- breaks = scales::extended_breaks(n = 5),
- limits = y_lims,
- sec.axis = sec_axis(trans = ~ 10 ^ (-.),
- breaks = pval_cutoff),
- expand = expansion(mult = c(0.01, 0.1))
- ) +
- expand_limits(y = c(0, 3)) +
- scale_color_manual(values = colors,
- breaks = levels(x[["sign_logFC"]]))
-
- # add annotations
- p <- p +
- geom_label(data = unique(x[, list(contrast, label)]),
- aes(label = label, x = -Inf, y = Inf),
- size = 5 * 0.35, label.size = NA,
- label.padding = unit(4, "pt"),
- fill = alpha("white", 0.5),
- # label.r = unit(1.5, "pt"),
- hjust = 0.05, vjust = 0) +
- coord_cartesian(clip = "off")
-
- return(p)
-
- ## Label points (old code)
- # if (!missing(label)) {
- # label_args <- list(mapping = aes(label = !!sym(label)),
- # na.rm = TRUE,
- # size = 5*0.352778, # convert points to mm
- # max.overlaps = Inf,
- # nudge_x = x[["nudge_x"]],
- # nudge_y = 0.1,
- # fill = alpha("white", 0.65),
- # force = 10, seed = 0,
- # color = "darkred",
- # min.segment.length = 0,
- # label.padding = 0.15)
- # label_args <- modifyList(x = label_args, val = list(...),
- # keep.null = TRUE)
- #
- # p <- p +
- # do.call(what = geom_label_repel, args = label_args)
- # }
-}
-
diff --git a/R/pval_hist.R b/R/pval_hist.R
deleted file mode 100644
index 31e48dd..0000000
--- a/R/pval_hist.R
+++ /dev/null
@@ -1,48 +0,0 @@
-#' @title Create p-value Histograms
-#'
-#' @description Create p-value histograms with
-#' \code{\link[ggplot2]{ggplot2-package}}.
-#'
-#' @inheritParams plot_volcano
-#' @param pval character; the name of a column in \code{x} containing p-values
-#' or adjusted p-values.
-#' @param ylims numeric vector of length 2. Limits of y-axis.
-#'
-#' @returns A \code{ggplot2} object.
-#'
-#' @import ggplot2
-#' @importFrom scales extended_breaks
-#'
-#' @export pval_hist
-
-pval_hist <- function(x,
- pval = "adj.P.Val",
- ylims = c(0, NA)) {
- ggplot(x) +
- geom_histogram(aes(x = !!sym(pval)),
- breaks = seq(0, 1, 0.05),
- color = "black", fill = "lightgrey") +
- scale_x_continuous(breaks = seq(0, 1, 0.2),
- expand = expansion(5e-3)) +
- scale_y_continuous(name = "Count",
- limits = ylims,
- breaks = scales::extended_breaks(n = 5),
- expand = expansion(mult = c(0.001, 0.05))) +
- theme_pub()
- # theme_bw() +
- # theme(text = element_text(size = 6.5*scale, color = "black"),
- # line = element_line(size = 0.3*scale, color = "black"),
- # panel.border = element_rect(size = 0.4*scale,
- # fill = NULL,
- # color = "black"),
- # axis.line.y.right = element_blank(),
- # axis.text = element_text(size = 5*scale),
- # strip.background = element_blank(),
- # strip.text = element_text(size = 6.5*scale,
- # hjust = 0),
- # panel.spacing = unit(0.08*scale, "in"),
- # plot.title = element_text(size = 7*scale),
- # panel.spacing.x = unit(0.3, "in"),
- # plot.margin = unit(c(5, 10, 5, 5), "pt"))
-}
-
diff --git a/R/get_ranking.R b/R/rank_genes.R
similarity index 61%
rename from R/get_ranking.R
rename to R/rank_genes.R
index 8d29cac..d86a5d9 100644
--- a/R/get_ranking.R
+++ b/R/rank_genes.R
@@ -1,54 +1,50 @@
-#' @title Calculate ranking metric values by contrast
+#' @title Calculate GSEA ranking metric values
#'
-#' @description Calculate ranking metric values for \code{\link{fgsea2}}.
+#' @description Calculate GSEA ranking metric values for \code{\link{fgsea2}}.
#'
#' @param x object of class \code{data.frame}. Typically results from
#' \code{\link{limma_full}}.
-#' @param genes string; the name of a column in \code{x}. A ranking metric value
-#' will be calculated for each unique entry. Can be gene symbols, Entrez IDs,
-#' or Ensembl genes.
+#' @param genes character; the name of a column in \code{x}. A ranking metric
+#' value will be calculated for each unique entry. Usually gene symbols,
+#' Entrez IDs, or Ensembl genes to work with the output of \code{msigdbr2}.
#' @param metric character; specifies how the ranking metric will be calculated
#' from \code{x}. Can be the name of a single column.
#' @param contrast_column character; the name of a column in \code{x}. Entries
#' in this column will be set as the names of the output list.
#'
#' @details The -log\eqn{_{10}}-transformed p-value signed by the fold-change is
-#' similar to using t-statistics, but provides better separation between the
-#' extreme values in the tails and those close to 0 that are not as
-#' interesting. The log\eqn{_2} fold-change should not be used as the ranking
-#' metric, as it does not capture variability in the measurements.
+#' similar to the t-statistic, but more clearly separates extreme values in
+#' the tails from those close to 0 that are not as interesting.
#'
-#' @returns A named vector (if no "contrast" column) or list of sorted ranking
-#' metric values for each contrast in \code{x}.
+#' @returns A named vector (if no "contrast" column) or list of named vectors of
+#' sorted ranking metric values for each contrast in \code{x}.
#'
#' @importFrom data.table `.SD` `:=` setDT
#' @importFrom tibble deframe
#'
-#' @export get_ranking
+#' @export rank_genes
-get_ranking <- function(x,
+rank_genes <- function(x,
genes,
metric = "-log10(P.Value)*sign(logFC)",
contrast_column = "contrast")
{
- stopifnot(is.data.frame(x))
-
if (!(genes %in% colnames(x))) {
stop(sprintf("'%s' is not a column in x.", genes))
}
if (!(contrast_column %in% colnames(x))) {
warning(sprintf("'%s' is not a column in x. Output will be a vector.",
contrast_column))
- stats <- .get_ranking(x, genes = genes, metric = metric)
+ stats <- .rank_genes(x, genes = genes, metric = metric)
} else {
x <- split.data.frame(x = x, f = x[[contrast_column]], drop = TRUE)
- stats <- lapply(x, .get_ranking, genes = genes, metric = metric)
+ stats <- lapply(x, .rank_genes, genes = genes, metric = metric)
}
return(stats)
}
-.get_ranking <- function(x, genes, metric) {
+.rank_genes <- function(x, genes, metric) {
setDT(x)
# Subset to non-missing genes
x <- subset(x, subset = !is.na(get(genes)))
@@ -56,8 +52,8 @@ get_ranking <- function(x,
stop(sprintf("All entries in '%s' are missing.", genes))
}
# Calculate ranking metric for each gene
- x[, rank.metric := eval(parse(text = metric))]
- cols <- c(genes, "rank.metric")
+ x[, rank_metric := eval(parse(text = metric))]
+ cols <- c(genes, "rank_metric")
x <- unique(x[, cols, with = FALSE])
x <- x[, lapply(.SD, mean, na.rm = TRUE), by = genes] # mean by gene
stats <- sort(deframe(x), decreasing = TRUE) # sorting doesn't actually matter
diff --git a/R/run_WGCNA.R b/R/run_WGCNA.R
index e9277f4..76a95b8 100644
--- a/R/run_WGCNA.R
+++ b/R/run_WGCNA.R
@@ -1,43 +1,34 @@
#' @title Run WGCNA Pipeline
#'
-#' @description Run the WGCNA pipeline used in the WAT manuscript.
+#' @description Run the WGCNA pipeline used in the MoTrPAC PASS1B WAT
+#' manuscript.
#'
-#' @param object object of class \code{\link[MSnbase:MSnSet-class]{MSnSet}}.
-#' \code{exprs} should be a matrix of log\eqn{_2}-transformed values. Count
-#' data is not accepted.
+#' @param object object of class
+#' \code{\link[Biobase:ExpressionSet-class]{ExpressionSet}}. \code{exprs}
+#' should be a matrix of log\eqn{_2}-transformed values. Count data is not
+#' accepted.
#' @param power integer; optional soft power vector (length 1 or greater). If
-#' not specified, the lowest value of 1:20 that satisfies scale-free fit
-#' R\eqn{^2} at least equal to \code{RsquaredCut} will be used.
-#' @param RsquaredCut numeric; desired minimum scale-free fit R\eqn{^2}. Ignored
-#' if \code{power} is provided.
-#' @param module_prefix character; what to append to the module numbers to
-#' create the "moduleID" column in \code{fData(object)}.
-#' @param merge_modules logical; if not provided, user input is requested to
-#' determine whether highly-correlated (r=0.85) modules should be combined
-#' based on plots that the function creates. Must be provided if session is
-#' not interactive.
+#' not specified, the lowest value of 1--20 that satisfies scale-free fit
+#' R\eqn{^2 \geq} \code{RsquaredCut} will be used.
+#' @param RsquaredCut numeric; minimum acceptable scale-free fit R\eqn{^2}.
+#' Ignored if \code{power} is provided.
+#' @param module_prefix character; appended to the module numbers to create the
+#' "moduleID" column.
#'
#' @returns Object of class \code{list} of length 2:
#'
#' \itemize{
#' \item "modules": a \code{data.frame} with the following columns, in
-#' addition to all columns in \code{MSnbase::fData(object)}.
+#' addition to all columns in \code{Biobase::fData(object)}.
#' \describe{
#' \item{moduleColor}{moduleColor; unique color assigned to each module.
#' The "grey" module always contains features that are not co-expressed.}
#' \item{moduleID}{factor; \code{module_prefix} followed by a unique
#' module number. The "grey" module is always 0.}
#' }
-#' \item "MEs": a \code{data.frame} with 6 variables.
+#' \item "MEs": a \code{data.frame} with 3 variables, in addition to all
+#' variables in \code{pData(object)}.
#' \describe{
-#' \item{\code{bid}}{integer; unique 5 digit identifier of all samples
-#' collected for an acute test/sample collection period. All samples
-#' collected during that period will have the same BID.}
-#' \item{\code{sex}}{factor; the sex of the rat with levels "Female" and
-#' "Male".}
-#' \item{\code{timepoint}}{factor; exercise training group. Either "SED"
-#' (sedentary) or the number of weeks of training ("1W", "2W", "4W",
-#' "8W").}
#' \item{\code{moduleID}}{factor; \code{module_prefix} followed by a
#' unique module number. The "grey" module is always 0.}
#' \item{\code{ME}}{numeric; module eigenfeature values. One per sample x
@@ -47,7 +38,7 @@
#' }
#' }
#'
-#' @importFrom MSnbase exprs pData fData `fData<-` `pData<-` featureNames
+#' @importFrom Biobase exprs pData fData
#' @importFrom WGCNA pickSoftThreshold adjacency TOMsimilarity labels2colors
#' plotDendroAndColors moduleEigengenes mergeCloseModules standardColors bicor
#' @importFrom stats hclust as.dist cor
@@ -84,9 +75,10 @@
run_WGCNA <- function(object,
power = 1:20,
RsquaredCut = 0.90,
- module_prefix = "", # "P", "M", "T"
- merge_modules)
+ module_prefix = "") # "P", "M", "T"
{
+ on.exit(invisible(gc()))
+
# Transpose for WGCNA
datExpr <- t(exprs(object))
@@ -99,7 +91,7 @@ run_WGCNA <- function(object,
# Choose soft-thresholding power
if (length(power) > 1) {
- message("Choosing soft-thresholding power ----")
+ message("Choosing soft-thresholding power...")
# Choosing the soft-thresholding power
sft <- pickSoftThreshold(data = datExpr,
powerVector = power,
@@ -115,13 +107,13 @@ run_WGCNA <- function(object,
cex1 <- 0.9
plot(sft$fitIndices[, 1],
- -sign(sft$fitIndices[, 3])*sft$fitIndices[, 2],
+ -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
xlab = "Soft Threshold (power)",
ylab = "Scale Free Topology Model Fit, signed R^2",
type = "n",
main = "Scale independence")
text(sft$fitIndices[, 1],
- -sign(sft$fitIndices[, 3])*sft$fitIndices[, 2],
+ -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
labels = power, cex = cex1, col = "red")
abline(h = 0.90, col = "red")
@@ -148,7 +140,7 @@ run_WGCNA <- function(object,
message(sprintf(msg, power))
}
- message("Calculating adjacency and Topological Overlap matrices ----")
+ message("Constructing adjacency matrix...")
## Adjacency matrix
adjacency <- adjacency(datExpr = datExpr,
power = power,
@@ -156,9 +148,8 @@ run_WGCNA <- function(object,
corFnc = bicor,
corOptions = list(use = "pairwise.complete.obs"))
- # Topological Overlap Matrix (TOM) and dissimilarity matrix
- TOM <- TOMsimilarity(adjMat = adjacency, TOMType = "unsigned")
- dissTOM <- 1 - TOM
+ message("Constructing dissimilarity matrix from topological overlap...")
+ dissTOM <- 1 - TOMsimilarity(adjMat = adjacency, TOMType = "unsigned")
# Hierarchical clustering based on dissTOM
geneTree <- hclust(as.dist(dissTOM), method = "average")
@@ -206,35 +197,19 @@ run_WGCNA <- function(object,
MEDissThres <- 0.15 # 0.85 correlation
abline(h = MEDissThres, col = "green")
- # Request user input
- request_input <- missing(merge_modules)
- while (request_input) {
- merge_modules <- readline(
- prompt = "Merge highly correlated modules? (y/n): "
- )
- if (!(merge_modules) %in% c("y", "n")) {
- message("Invalid input. Try again.")
- } else {
- merge_modules <- merge_modules == "y"
- request_input <- FALSE
- }
- }
-
# Merge highly correlated (1-MEDissThres) modules ----
- if (merge_modules) {
- message("Merging highly-correlated modules ----")
- # Call an automatic merging function
- merge <- mergeCloseModules(exprData = datExpr,
- colors = moduleColors,
- MEs = MEs,
- cutHeight = MEDissThres,
- verbose = 3,
- relabel = TRUE)
- # The merged module colors
- moduleColors <- merge$colors
- # Eigengenes of the new merged modules:
- MEs <- merge$newMEs
- }
+ message("Merging highly-correlated modules...")
+ # Call an automatic merging function
+ merge <- mergeCloseModules(exprData = datExpr,
+ colors = moduleColors,
+ MEs = MEs,
+ cutHeight = MEDissThres,
+ verbose = 3,
+ relabel = TRUE)
+ # The merged module colors
+ moduleColors <- merge$colors
+ # Eigengenes of the new merged modules:
+ MEs <- merge$newMEs
# Update dynamicMods
color_lvls <- c("grey", standardColors(n = 50))
@@ -250,16 +225,17 @@ run_WGCNA <- function(object,
colnames(MEs) <- color2id[sub("^ME", "", colnames(MEs))]
- ME_long <- pData(object)[, c("bid", "sex", "timepoint")]
+ ME_long <- pData(object)
+ id.vars <- colnames(ME_long)
setDT(ME_long)
ME_long <- cbind(ME_long, MEs)
- ME_long <- melt(ME_long, id.vars = c("bid", "sex", "timepoint"),
+ ME_long <- melt(ME_long, id.vars = id.vars,
variable.name = "moduleID",
value.name = "ME")
ME_long[, `:=` (moduleNum = as.numeric(
sub(paste0("^", module_prefix), "", moduleID)
- ))]
+ ))]
setorderv(ME_long, cols = "moduleNum")
ME_long[, `:=` (moduleID = factor(moduleID, levels = unique(moduleID)))]
setDF(ME_long)
diff --git a/R/theme_pub.R b/R/theme_pub.R
deleted file mode 100644
index c2c0cf1..0000000
--- a/R/theme_pub.R
+++ /dev/null
@@ -1,35 +0,0 @@
-#' @title Custom ggplot2 theme
-#'
-#' @description Theme used extensively by the plotting functions of this
-#' package, as well as to generate final plots for the associated publication.
-#' Built to adhere to the Nature publishing guidelines.
-#'
-#' @import ggplot2
-#'
-#' @export theme_pub
-
-theme_pub <- function() {
- theme_bw() +
- theme(
- text = element_text(size = 6, color = "black"),
- axis.title = element_text(size = 6, color = "black"),
- axis.text = element_text(size = 5, color = "black"),
- axis.text.y.right = element_text(size = 5, color = "black"),
- axis.line = element_line(size = 0.3, color = "black"),
- axis.ticks = element_line(size = 0.3, color = "black"),
-
- legend.title = element_text(size = 5.5, color = "black"),
- legend.text = element_text(size = 5, color = "black"),
-
- panel.border = element_blank(),
- panel.grid.major = element_line(size = 0.3),
- panel.spacing = unit(5, "pt"),
-
- plot.title = element_text(size = 6.5, color = "black"),
- plot.subtitle = element_text(size = 6, color = "black"),
- plot.background = element_rect(fill = "white", color = NA),
-
- strip.text = element_text(size = 6.5, color = "black")
- )
-}
-
diff --git a/R/update_GO_names.R b/R/update_GO_names.R
index c8a6244..f6c00a9 100644
--- a/R/update_GO_names.R
+++ b/R/update_GO_names.R
@@ -13,13 +13,18 @@
#' description if the first word does not contain a mix of capital and
#' lowercase letters. Improves appearance of plots, such as those produced by
#' \code{\link{enrichmat}}.
+#' @param obo_file character; optional path to the OBO file that should be used
+#' to update Gene Ontology term descriptions. Only provide when the
+#' appropriate OBO file cannot be extracted from the MSigDB Release Notes (See
+#' Details).
#'
#' @returns Object of class \code{data.frame}. The same as \code{x}, but with
#' updated descriptions.
#'
#' @details This function assumes that the phrase "GO-basic obo file released
#' on" is present in the MSigDB release notes for that version and is followed
-#' by a date.
+#' by a date. This date will replace "RELEASE_DATE" in
+#' "http://release.geneontology.org/RELEASE_DATE/ontology/go-basic.obo".
#'
#' @md
#'
@@ -60,7 +65,8 @@
update_GO_names <- function(x,
version = packageVersion("msigdbr"),
- capitalize = FALSE)
+ capitalize = FALSE,
+ obo_file)
{
go_subcats <- c("GO:MF", "GO:CC", "GO:BP")
@@ -76,18 +82,20 @@ update_GO_names <- function(x,
q1 <- bfcquery(bfc, query = rname, field = "rname")
- if (bfccount(q1) == 1L) {
- file <- q1$fpath
+ if (bfccount(q1) == 1L & missing(obo_file)) {
+ obo_file <- q1$fpath
} else {
- message(sprintf("Searching MSigDB %s Release Notes for OBO file date:",
- version))
- file <- obo_file(version = version)
- bfcadd(bfc, fpath = file, rname = rname, fname = "unique")
+ if (missing(obo_file)) {
+ message(sprintf("Searching MSigDB %s Release Notes for OBO file date:",
+ version))
+ obo_file <- obo_file(version = version)
+ }
+ bfcadd(bfc, fpath = obo_file, rname = rname, fname = "unique")
}
- message(paste("Updating GO term descriptions with", file))
+ message(paste("Updating GO term descriptions with", obo_file))
- obo_date <- sub(".*org/([^/]+)/.*", "\\1", file)
+ obo_date <- sub(".*org/([^/]+)/.*", "\\1", obo_file)
obo_rname <- paste0("GO_OBO_data_", obo_date)
q2 <- bfcquery(bfc, query = obo_rname, field = "rname")
@@ -96,7 +104,7 @@ update_GO_names <- function(x,
go.dt <- readRDS(file = bfcrpath(bfc, rnames = obo_rname))
} else {
message("Downloading OBO file to cache:")
- go_basic_list <- get_OBO(file,
+ go_basic_list <- get_OBO(file = obo_file,
propagate_relationships = "is_a",
extract_tags = "minimal")
go.dt <- as.data.frame(go_basic_list)
diff --git a/README.Rmd b/README.Rmd
index 2d26195..a2fa02c 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -15,13 +15,13 @@ knitr::opts_chunk$set(
# MotrpacRatTraining6moWAT
-A collection of functions for the analysis and visualization of the MoTrPAC PASS1B white adipose tissue (WAT) data as provided in the MotrpacRatTraining6moData package. This package was built specifically to generate the MotrpacRatTraining6moWATData package, so much of it is tailored only for use with those datasets, though users may find some of the functionality contained herein to be useful in other projects:
+A collection of functions for the analysis and visualization of the MoTrPAC PASS1B white adipose tissue (WAT) data as provided in the MotrpacRatTraining6moData package. This package was built specifically to generate the MotrpacRatTraining6moWATData package, so much of it is tailored only for use with those datasets, though users may find some of the functionality contained herein to be useful in other projects:
-* Differential analysis (DA) with the limma and edgeR packages
-* Fast Gene Set Enrichment Analysis (FGSEA) with fgsea and msigdbr
-* Kinase-Substrate Enrichment Analysis (KSEA) with PhosphoSitePlus
-* Over-representation analysis (ORA)
-* Weighted Gene Co-expression Network Analysis (WGCNA)
+- Differential analysis (DA) with the limma and edgeR packages
+- Fast Gene Set Enrichment Analysis (FGSEA) with fgsea and msigdbr
+- Kinase-Substrate Enrichment Analysis (KSEA) with PhosphoSitePlus
+- Over-representation analysis (ORA)
+- Weighted Gene Co-expression Network Analysis (WGCNA)
## Installation
@@ -33,3 +33,7 @@ if (!require("devtools", quietly = TRUE)) {
}
devtools::install_github("PNNL-Comp-Mass-Spec/MotrpacRatTraining6moWAT")
```
+
+# Preprint
+
+Many, G. M., Sanford, J. A., Sagendorf, T. J., Hou, Z., Nigro, P., Whytock, K., Amar, D., Caputo, T., Gay, N. R., Gaul, D. A., Hirshman, M., Jimenez-Morales, D., Lindholm, M. E., Muehlbauer, M. J., Vamvini, M., Bergman, B., Fernández, F. M., Goodyear, L. J., Ortlund, E. A., ... Schenk, S. (2023). Sexual dimorphism and the multi-omic response to exercise training in rat subcutaneous white adipose tissue. *BioRxiv*. https://doi.org/10.1101/2023.02.03.527012
diff --git a/README.md b/README.md
index 025dc1c..96bc323 100644
--- a/README.md
+++ b/README.md
@@ -5,21 +5,21 @@
A collection of functions for the analysis and visualization of the
MoTrPAC PASS1B white adipose tissue (WAT) data as provided in the
-MotrpacRatTraining6moData
+MotrpacRatTraining6moData
package. This package was built specifically to generate the
MotrpacRatTraining6moWATData package, so much of it is tailored only for
use with those datasets, though users may find some of the functionality
contained herein to be useful in other projects:
- Differential analysis (DA) with the
- limma
+ limma
and
- edgeR
+ edgeR
packages
- Fast Gene Set Enrichment Analysis (FGSEA) with
- fgsea
+ fgsea
and
- msigdbr
+ msigdbr
- Kinase-Substrate Enrichment Analysis (KSEA) with
PhosphoSitePlus
- Over-representation analysis (ORA)
@@ -35,3 +35,13 @@ if (!require("devtools", quietly = TRUE)) {
}
devtools::install_github("PNNL-Comp-Mass-Spec/MotrpacRatTraining6moWAT")
```
+
+# Preprint
+
+Many, G. M., Sanford, J. A., Sagendorf, T. J., Hou, Z., Nigro, P.,
+Whytock, K., Amar, D., Caputo, T., Gay, N. R., Gaul, D. A., Hirshman,
+M., Jimenez-Morales, D., Lindholm, M. E., Muehlbauer, M. J., Vamvini,
+M., Bergman, B., Fernández, F. M., Goodyear, L. J., Ortlund, E. A., …
+Schenk, S. (2023). Sexual dimorphism and the multi-omic response to
+exercise training in rat subcutaneous white adipose tissue. *BioRxiv*.
+https://doi.org/10.1101/2023.02.03.527012
diff --git a/man/MotrpacRatTraining6moWAT-package.Rd b/man/MotrpacRatTraining6moWAT-package.Rd
index b40a4d8..df54519 100644
--- a/man/MotrpacRatTraining6moWAT-package.Rd
+++ b/man/MotrpacRatTraining6moWAT-package.Rd
@@ -21,7 +21,7 @@ Useful links:
Other contributors:
\itemize{
- \item Zhenxin Hou (\href{https://orcid.org/0000-0002-1301-0799}{ORCID}) (Provided WGCNA-related code.) [contributor]
+ \item Zhenxin Hou (\href{https://orcid.org/0000-0002-1301-0799}{ORCID}) (Provided foundation for run_WGCNA function.) [contributor]
\item Vladislav Petyuk (\href{https://orcid.org/0000-0003-4076-151X}{ORCID}) (Provided foundation for limma_full function.) [contributor]
}
diff --git a/man/fgsea2.Rd b/man/fgsea2.Rd
index 7941be4..6035ea9 100644
--- a/man/fgsea2.Rd
+++ b/man/fgsea2.Rd
@@ -17,17 +17,17 @@ fgsea2(
\arguments{
\item{pathways}{output of \code{\link{msigdbr2}}.}
-\item{stats}{output of \code{\link{get_ranking}}.}
+\item{stats}{output of \code{\link{rank_genes}}.}
-\item{gene_column}{character string; the name of a column in \code{pathways}
+\item{gene_column}{character; the name of a column in \code{pathways}
containing the genes in each pathway.}
-\item{adjust.method}{character string; the p-value correction method. Can be
+\item{adjust.method}{character; the p-value correction method. Can be
abbreviated. See \code{\link[stats]{p.adjust.methods}}.}
\item{adjust.globally}{logical; should p-values from all contrasts be
-adjusted together using \code{adjust.method}? Set to \code{FALSE} if the contrasts
-being tested are not closely related.}
+adjusted together using \code{adjust.method}? Set to \code{FALSE} if the
+contrasts being tested are not closely related.}
\item{seed}{numeric or \code{NULL}; passed to \code{set.seed}.}
diff --git a/man/fora2.Rd b/man/fora2.Rd
index a1de740..0b0b289 100644
--- a/man/fora2.Rd
+++ b/man/fora2.Rd
@@ -10,7 +10,7 @@ fora2(
universe,
gene_column = "entrez_gene",
minSize = 1,
- maxSize = Inf,
+ maxSize = length(universe) - 1,
adjust.method = "scale",
adjust.globally = TRUE
)
@@ -18,9 +18,11 @@ fora2(
\arguments{
\item{pathways}{output of \code{link{msigdbr2}}.}
-\item{genes}{named list of genes.}
+\item{genes}{named list of genes. These are usually clusters of related
+genes.}
-\item{universe}{character vector of background genes.}
+\item{universe}{character vector of background genes. Usually, this is all
+unique elements of \code{genes}.}
\item{gene_column}{character; the name of the column in pathways containing
the elements of each gene set.}
@@ -40,6 +42,20 @@ contrasts being tested are not closely related.}
\description{
Custom wrapper for \code{\link[fgsea]{fora}}.
}
+\details{
+If \code{adjust.method = "scale"}, the function will calculate the
+maximum overlap for each combination of \code{names(genes)} and (if
+\code{adjust.globally = TRUE}), the entries of the "gs_subcat" column of
+\code{pathways}. For each gene set, it will then calculate the ratio of the
+size of that set's overlap to one plus the maximum overlap. The addition of
+1 in the denominator is to penalize small maximum overlaps. The
+\code{log10(pval)} is multiplied by this overlap ratio and then
+back-transformed to obtain p-values adjusted by how well they describe each
+group defined by \code{genes}.
+}
+\seealso{
+\code{\link[fgsea]{fora}}
+}
\author{
Tyler Sagendorf
}
diff --git a/man/get_ranking.Rd b/man/get_ranking.Rd
deleted file mode 100644
index fa65f5e..0000000
--- a/man/get_ranking.Rd
+++ /dev/null
@@ -1,41 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_ranking.R
-\name{get_ranking}
-\alias{get_ranking}
-\title{Calculate ranking metric values by contrast}
-\usage{
-get_ranking(
- x,
- genes,
- metric = "-log10(P.Value)*sign(logFC)",
- contrast_column = "contrast"
-)
-}
-\arguments{
-\item{x}{object of class \code{data.frame}. Typically results from
-\code{\link{limma_full}}.}
-
-\item{genes}{string; the name of a column in \code{x}. A ranking metric value
-will be calculated for each unique entry. Can be gene symbols, Entrez IDs,
-or Ensembl genes.}
-
-\item{metric}{character; specifies how the ranking metric will be calculated
-from \code{x}. Can be the name of a single column.}
-
-\item{contrast_column}{character; the name of a column in \code{x}. Entries
-in this column will be set as the names of the output list.}
-}
-\value{
-A named vector (if no "contrast" column) or list of sorted ranking
-metric values for each contrast in \code{x}.
-}
-\description{
-Calculate ranking metric values for \code{\link{fgsea2}}.
-}
-\details{
-The -log\eqn{_{10}}-transformed p-value signed by the fold-change is
-similar to using t-statistics, but provides better separation between the
-extreme values in the tails and those close to 0 that are not as
-interesting. The log\eqn{_2} fold-change should not be used as the ranking
-metric, as it does not capture variability in the measurements.
-}
diff --git a/man/ggplotMDS.Rd b/man/ggplotMDS.Rd
deleted file mode 100644
index ac65cce..0000000
--- a/man/ggplotMDS.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ggplotMDS.R
-\name{ggplotMDS}
-\alias{ggplotMDS}
-\title{MDS plots with ggplot2}
-\usage{
-ggplotMDS(object)
-}
-\arguments{
-\item{object}{object of class \code{link[MSnbase:MSnSet-class]{MSnSet}}.}
-}
-\value{
-A \code{\link[ggplot2]{ggplot}} object.
-}
-\description{
-A \code{ggplot2} version of \code{\link[limma]{plotMDS}}
-tailored to the MoTrPAC PASS1B data. Samples are labeled by the number of
-weeks of exercise training (0, 1, 2, 4, 8) and colored according to sex.
-}
diff --git a/man/limma_full.Rd b/man/limma_full.Rd
index a90e118..f74197c 100644
--- a/man/limma_full.Rd
+++ b/man/limma_full.Rd
@@ -20,9 +20,10 @@ limma_full(
)
}
\arguments{
-\item{object}{Object of class \code{\link[MSnbase:MSnSet-class]{MSnSet}}. The
-\code{exprs} slot can be either a matrix of log\eqn{_2} relative abundances
-or counts. If the latter, the limma--edgeR pipeline
+\item{object}{Object of class
+\code{\link[Biobase:ExpressionSet-class]{ExpressionSet}}. The \code{exprs}
+slot can be either a matrix of log\eqn{_2} relative abundances or counts.
+If the latter, the limma--edgeR pipeline
(\url{https://doi.org/10.12688/f1000research.9005.3}) will automatically be
used.}
@@ -54,12 +55,11 @@ weight of 1). Weights affect the logFC and P.Value columns of the results.}
\item{block}{\code{NULL} or character; name of a column in
\code{pData(object)} specifying a blocking variable. Passed to
-\code{\link[limma]{duplicateCorrelation}}. See
-\href{https://support.bioconductor.org/p/125489/#125602}{When to use
-\code{duplicateCorrelation}}. Section 9.7 "Multi-level Experiments" of the
-\href{https://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf}{LIMMA
-User's Guide} explains when to use \code{duplicateCorrelation}. Currently
-ignored if \code{exprs(object)} is a matrix of counts.}
+\code{\link[limma]{duplicateCorrelation}}. Section 9.7 "Multi-level
+Experiments" of the LIMMA User's Guide (see
+\code{\link[limma]{limmaUsersGuide}}) explains when to use
+\code{duplicateCorrelation}. Currently ignored if \code{exprs(object)} is a
+matrix of counts.}
\item{plot}{logical; whether to generate diagnostic plots. If \code{TRUE},
generates a barplot of weights applied to each sample and a plot of the
@@ -71,16 +71,16 @@ See \code{\link[stats]{p.adjust}} for details.}
\item{adjust.globally}{logical; should p-values from all contrasts be
adjusted together using \code{adjust.method}? Set to \code{FALSE} if the
-contrasts being tested are not closely related. See the "Multiple Testing
-Across Contrasts" section of the LIMMA User's Guide (linked in "See Also")
-for more information.}
+contrasts being tested are not closely related. See Section 13.3 "Multiple
+Testing Across Contrasts" of the LIMMA User's Guide
+(\code{\link[limma]{limmaUsersGuide}}) for more information.}
\item{...}{additional arguments passed to \code{\link[limma]{plotSA}}.}
}
\value{
\code{data.frame}. Output of \code{\link[limma]{topTable}} with
additional columns \code{feature}, \code{contrast}, and column(s) for the
-standard error (se) of the logFC. All columns from \code{fData} are also
+standard error (SE) of the logFC. All columns from \code{fData} are also
included.
}
\description{
@@ -88,13 +88,13 @@ A convenience wrapper for differential analysis with LIMMA.
Performs moderated t-tests, F-tests, or linear regression.
}
\details{
-An MDS plot (produced by \code{\link[limma]{plotMDS}}) is used to
-determine the appropriate value of \code{var.group}. If samples within
-phenotype groups tend to cluster well and have similar dispersions, the
-default \code{var.group = character(0)} is recommended. If samples within
-phenotype groups tend to cluster well, but different groups are more or
-less dispersed, it may be beneficial to set \code{var.group} to the name of
-the phenotype column. If one or more samples tend to cluster poorly with
+An MDS plot (\code{\link[limma]{plotMDS}}) is used to determine the
+appropriate value of \code{var.group}. If samples within phenotype groups
+tend to cluster well and have similar dispersions, the default
+\code{var.group = character(0)} is recommended. If samples within phenotype
+groups tend to cluster well, but different groups are more or less
+dispersed, it may be beneficial to set \code{var.group} to the name of the
+phenotype column. If one or more samples tend to cluster poorly with
samples of the same phenotype, it may be beneficial to weight by sample.
That is, set \code{var.group} to the name of the column in
\code{pData(object)} that uniquely identifies each sample. If variation
@@ -140,6 +140,9 @@ annals of applied statistics, 10}(2), 946--963.
\href{https://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf}{LIMMA
User's Guide}
+\href{https://support.bioconductor.org/p/125489/#125602}{When to use
+\code{duplicateCorrelation}}
+
\href{https://online.stat.psu.edu/stat555/node/1/}{PennState STAT 555 -
Statistical Analysis of Genomics Data}
diff --git a/man/plot_ORA.Rd b/man/plot_ORA.Rd
deleted file mode 100644
index fce3cf1..0000000
--- a/man/plot_ORA.Rd
+++ /dev/null
@@ -1,35 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot_ORA.R
-\name{plot_ORA}
-\alias{plot_ORA}
-\title{Plot module ORA results}
-\usage{
-plot_ORA(
- x,
- n_terms = 5,
- mods = 1:5,
- subset = "GO:MF",
- rel_heights = c(0.8, 0.2)
-)
-}
-\arguments{
-\item{x}{output of \code{\link[fgsea]{fora}} or \code{\link{fora2}}.}
-
-\item{n_terms}{integer; maximum number of top over-represented terms to
-display from each module.}
-
-\item{mods}{integer; the module numbers to include in the plot. By default,
-terms from modules 1-5 (the 5 largest modules) are shown.}
-
-\item{subset}{character; the gene set subcategory of terms to plot.
-Default is "GO:MF".}
-
-\item{rel_heights}{numeric; vector of length 2 specifying the relative
-heights of the plot and color legend, respectively. Default is c(0.8, 0.2).}
-}
-\value{
-A \code{\link[ggplot2]{ggplot}} object.
-}
-\description{
-Dotplot of the top over-represented terms by WGCNA module.
-}
diff --git a/man/plot_analyte.Rd b/man/plot_analyte.Rd
deleted file mode 100644
index 9f3f630..0000000
--- a/man/plot_analyte.Rd
+++ /dev/null
@@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot_analyte.R
-\name{plot_analyte}
-\alias{plot_analyte}
-\title{Visualize clinical analyte data}
-\usage{
-plot_analyte(x, analyte = "")
-}
-\arguments{
-\item{x}{a \code{data.frame} with required columns value, sex, and timepoint.}
-
-\item{analyte}{character; the name of a column of \code{x}.}
-}
-\description{
-Generate a scatterplot of clinical analyte values.
-}
diff --git a/man/plot_eigenfeature.Rd b/man/plot_eigenfeature.Rd
deleted file mode 100644
index 982a13f..0000000
--- a/man/plot_eigenfeature.Rd
+++ /dev/null
@@ -1,15 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot_eigenfeature.R
-\name{plot_eigenfeature}
-\alias{plot_eigenfeature}
-\title{Visualize WGCNA module eigenfeatures}
-\usage{
-plot_eigenfeature(x)
-}
-\arguments{
-\item{x}{output of \code{\link{run_WGCNA}}.}
-}
-\description{
-Create scatterplots of each module eigenfeature (principal
-eigenvectors).
-}
diff --git a/man/plot_upset.Rd b/man/plot_upset.Rd
deleted file mode 100644
index df9fec6..0000000
--- a/man/plot_upset.Rd
+++ /dev/null
@@ -1,70 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot_upset.R
-\name{plot_upset}
-\alias{plot_upset}
-\title{Create Annotated UpSet Plot}
-\usage{
-plot_upset(
- x,
- top_n_comb = 10,
- comb_title = "Intersection\\nSize",
- set_title = "Set Size",
- scale = 2,
- barplot_args = list(),
- filename = character(0),
- height = 2 * scale,
- width = 3.4 * scale,
- units = "in",
- save_args = list(),
- upset_args = list()
-)
-}
-\arguments{
-\item{x}{list; named list of elements in each set. Passed to
-\code{\link[ComplexHeatmap]{make_comb_mat}}.}
-
-\item{top_n_comb}{numeric; number of largest intersections to show. Default
-is 10.}
-
-\item{comb_title}{character; title of top barplot annotation. Default is
-"Intersection Size".}
-
-\item{set_title}{character; title of right barplot annotation. Default is
-"Set Size".}
-
-\item{scale}{numeric; scaling factor for all plot elements. Improves
-appearance. Default is 2.}
-
-\item{barplot_args}{list; additional arguments passed to
-\code{\link[ComplexHeatmap]{anno_barplot}}.}
-
-\item{filename}{character; the filename used to save the UpSet plot. If
-provided, the plot will not be displayed.}
-
-\item{height}{numeric; the save height.}
-
-\item{width}{numeric; the save width.}
-
-\item{units}{character; the units of \code{height} and \code{width}. Default
-is "in" (inches).}
-
-\item{save_args}{list; additional arguments passed to the graphics device.
-See \code{\link[grDevices]{png}} for options.}
-
-\item{upset_args}{list; additional arguments passed to
-\code{\link[ComplexHeatmap]{UpSet}}.}
-}
-\description{
-Create an UpSet plot annotated with intersection and set sizes.
-Essentially a modified version of \code{\link[ComplexHeatmap]{UpSet}}.
-}
-\details{
-See \code{\link[ComplexHeatmap]{UpSet}} for details and sections
-mentioned in Arguments.
-}
-\references{
-Lex, A., Gehlenborg, N., Strobelt, H., Vuillemot, R., & Pfister,
-H. (2014). UpSet: Visualization of Intersecting Sets. \emph{IEEE
-transactions on visualization and computer graphics, 20}(12), 1983--1992.
-\url{https://doi.org/10.1109/TVCG.2014.2346248}
-}
diff --git a/man/plot_volcano.Rd b/man/plot_volcano.Rd
deleted file mode 100644
index 70b243b..0000000
--- a/man/plot_volcano.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot_volcano.R
-\name{plot_volcano}
-\alias{plot_volcano}
-\title{Create Volcano Plot}
-\usage{
-plot_volcano(x, pval_cutoff = 0.05, colors = c("#3366ff", "darkred", "grey"))
-}
-\arguments{
-\item{x}{a \code{matrix} or \code{data.frame} containing differential
-analysis results. Must include columns logFC, log10_pval, sign_logFC,
-contrast, and label.}
-
-\item{pval_cutoff}{numeric; cutoff for p-values to be considered significant.
-Adds a dashed horizontal line to the plot.}
-
-\item{colors}{character; length 3 vector of significantly negative,
-significantly positive, and non-significant (NS) logFC colors.}
-}
-\value{
-A \code{ggplot2} object.
-}
-\description{
-Create a volcano plot.
-}
diff --git a/man/pval_hist.Rd b/man/pval_hist.Rd
deleted file mode 100644
index 08d635b..0000000
--- a/man/pval_hist.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pval_hist.R
-\name{pval_hist}
-\alias{pval_hist}
-\title{Create p-value Histograms}
-\usage{
-pval_hist(x, pval = "adj.P.Val", ylims = c(0, NA))
-}
-\arguments{
-\item{x}{a \code{matrix} or \code{data.frame} containing differential
-analysis results. Must include columns logFC, log10_pval, sign_logFC,
-contrast, and label.}
-
-\item{pval}{character; the name of a column in \code{x} containing p-values
-or adjusted p-values.}
-
-\item{ylims}{numeric vector of length 2. Limits of y-axis.}
-}
-\value{
-A \code{ggplot2} object.
-}
-\description{
-Create p-value histograms with
-\code{\link[ggplot2]{ggplot2-package}}.
-}
diff --git a/man/rank_genes.Rd b/man/rank_genes.Rd
new file mode 100644
index 0000000..6dab1be
--- /dev/null
+++ b/man/rank_genes.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rank_genes.R
+\name{rank_genes}
+\alias{rank_genes}
+\title{Calculate GSEA ranking metric values}
+\usage{
+rank_genes(
+ x,
+ genes,
+ metric = "-log10(P.Value)*sign(logFC)",
+ contrast_column = "contrast"
+)
+}
+\arguments{
+\item{x}{object of class \code{data.frame}. Typically results from
+\code{\link{limma_full}}.}
+
+\item{genes}{character; the name of a column in \code{x}. A ranking metric
+value will be calculated for each unique entry. Usually gene symbols,
+Entrez IDs, or Ensembl genes to work with the output of \code{msigdbr2}.}
+
+\item{metric}{character; specifies how the ranking metric will be calculated
+from \code{x}. Can be the name of a single column.}
+
+\item{contrast_column}{character; the name of a column in \code{x}. Entries
+in this column will be set as the names of the output list.}
+}
+\value{
+A named vector (if no "contrast" column) or list of named vectors of
+sorted ranking metric values for each contrast in \code{x}.
+}
+\description{
+Calculate GSEA ranking metric values for \code{\link{fgsea2}}.
+}
+\details{
+The -log\eqn{_{10}}-transformed p-value signed by the fold-change is
+similar to the t-statistic, but more clearly separates extreme values in
+the tails from those close to 0 that are not as interesting.
+}
diff --git a/man/run_WGCNA.Rd b/man/run_WGCNA.Rd
index 23fc539..9f46ae8 100644
--- a/man/run_WGCNA.Rd
+++ b/man/run_WGCNA.Rd
@@ -4,56 +4,39 @@
\alias{run_WGCNA}
\title{Run WGCNA Pipeline}
\usage{
-run_WGCNA(
- object,
- power = 1:20,
- RsquaredCut = 0.9,
- module_prefix = "",
- merge_modules
-)
+run_WGCNA(object, power = 1:20, RsquaredCut = 0.9, module_prefix = "")
}
\arguments{
-\item{object}{object of class \code{\link[MSnbase:MSnSet-class]{MSnSet}}.
-\code{exprs} should be a matrix of log\eqn{_2}-transformed values. Count
-data is not accepted.}
+\item{object}{object of class
+\code{\link[Biobase:ExpressionSet-class]{ExpressionSet}}. \code{exprs}
+should be a matrix of log\eqn{_2}-transformed values. Count data is not
+accepted.}
\item{power}{integer; optional soft power vector (length 1 or greater). If
-not specified, the lowest value of 1:20 that satisfies scale-free fit
-R\eqn{^2} at least equal to \code{RsquaredCut} will be used.}
+not specified, the lowest value of 1--20 that satisfies scale-free fit
+R\eqn{^2 \geq} \code{RsquaredCut} will be used.}
-\item{RsquaredCut}{numeric; desired minimum scale-free fit R\eqn{^2}. Ignored
-if \code{power} is provided.}
+\item{RsquaredCut}{numeric; minimum acceptable scale-free fit R\eqn{^2}.
+Ignored if \code{power} is provided.}
-\item{module_prefix}{character; what to append to the module numbers to
-create the "moduleID" column in \code{fData(object)}.}
-
-\item{merge_modules}{logical; if not provided, user input is requested to
-determine whether highly-correlated (r=0.85) modules should be combined
-based on plots that the function creates. Must be provided if session is
-not interactive.}
+\item{module_prefix}{character; appended to the module numbers to create the
+"moduleID" column.}
}
\value{
Object of class \code{list} of length 2:
\itemize{
\item "modules": a \code{data.frame} with the following columns, in
-addition to all columns in \code{MSnbase::fData(object)}.
+addition to all columns in \code{Biobase::fData(object)}.
\describe{
\item{moduleColor}{moduleColor; unique color assigned to each module.
The "grey" module always contains features that are not co-expressed.}
\item{moduleID}{factor; \code{module_prefix} followed by a unique
module number. The "grey" module is always 0.}
}
-\item "MEs": a \code{data.frame} with 6 variables.
+\item "MEs": a \code{data.frame} with 3 variables, in addition to all
+variables in \code{pData(object)}.
\describe{
-\item{\code{bid}}{integer; unique 5 digit identifier of all samples
-collected for an acute test/sample collection period. All samples
-collected during that period will have the same BID.}
-\item{\code{sex}}{factor; the sex of the rat with levels "Female" and
-"Male".}
-\item{\code{timepoint}}{factor; exercise training group. Either "SED"
-(sedentary) or the number of weeks of training ("1W", "2W", "4W",
-"8W").}
\item{\code{moduleID}}{factor; \code{module_prefix} followed by a
unique module number. The "grey" module is always 0.}
\item{\code{ME}}{numeric; module eigenfeature values. One per sample x
@@ -64,7 +47,8 @@ module is always 0.}
}
}
\description{
-Run the WGCNA pipeline used in the WAT manuscript.
+Run the WGCNA pipeline used in the MoTrPAC PASS1B WAT
+manuscript.
}
\references{
Zhang, B., & Horvath, S. (2005). A general framework for weighted
diff --git a/man/theme_pub.Rd b/man/theme_pub.Rd
deleted file mode 100644
index 57a82d6..0000000
--- a/man/theme_pub.Rd
+++ /dev/null
@@ -1,13 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/theme_pub.R
-\name{theme_pub}
-\alias{theme_pub}
-\title{Custom ggplot2 theme}
-\usage{
-theme_pub()
-}
-\description{
-Theme used extensively by the plotting functions of this
-package, as well as to generate final plots for the associated publication.
-Built to adhere to the Nature publishing guidelines.
-}
diff --git a/man/update_GO_names.Rd b/man/update_GO_names.Rd
index ea5e629..b229fce 100644
--- a/man/update_GO_names.Rd
+++ b/man/update_GO_names.Rd
@@ -4,7 +4,12 @@
\alias{update_GO_names}
\title{Update Gene Ontology term descriptions}
\usage{
-update_GO_names(x, version = packageVersion("msigdbr"), capitalize = FALSE)
+update_GO_names(
+ x,
+ version = packageVersion("msigdbr"),
+ capitalize = FALSE,
+ obo_file
+)
}
\arguments{
\item{x}{object of class \code{data.frame} produced by
@@ -18,6 +23,11 @@ Defaults to the current version.}
description if the first word does not contain a mix of capital and
lowercase letters. Improves appearance of plots, such as those produced by
\code{\link{enrichmat}}.}
+
+\item{obo_file}{character; optional path to the OBO file that should be used
+to update Gene Ontology term descriptions. Only provide when the
+appropriate OBO file cannot be extracted from the MSigDB Release Notes (See
+Details).}
}
\value{
Object of class \code{data.frame}. The same as \code{x}, but with
@@ -31,7 +41,8 @@ Update entries in the \code{gs_description} column of
\details{
This function assumes that the phrase "GO-basic obo file released
on" is present in the MSigDB release notes for that version and is followed
-by a date.
+by a date. This date will replace "RELEASE_DATE" in
+"http://release.geneontology.org/RELEASE_DATE/ontology/go-basic.obo".
}
\examples{
x <- msigdbr2(species = "Homo sapiens",