diff --git a/.Rbuildignore b/.Rbuildignore
index b7d3e72..c57d8d6 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -5,3 +5,6 @@
^\.github$
^data-raw$
^vignettes/prerender_figures\.R$
+^_pkgdown\.yml$
+^docs$
+^pkgdown$
diff --git a/.gitignore b/.gitignore
index 0126ae9..90cd220 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,3 +52,4 @@ rsconnect/
.DS_Store
.quarto
inst/doc
+docs
diff --git a/DESCRIPTION b/DESCRIPTION
index ce290a4..177895e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -62,4 +62,5 @@ Depends:
LazyData: true
LazyDataCompression: xz
VignetteBuilder: knitr
+URL: https://bigmindlab.github.io/OmicsKit
diff --git a/NAMESPACE b/NAMESPACE
index d7bc4fc..44d21fc 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,9 +2,9 @@
export(add_annotations)
export(addgenesPA)
-export(calc_jaccard)
export(detect_filter)
export(do_clust)
+export(geneset_similarity)
export(get_annotations)
export(get_network_communities)
export(get_stars)
diff --git a/R/dataclust_PA.R b/R/dataclust_PA.R
index f3fb3bb..ff04feb 100644
--- a/R/dataclust_PA.R
+++ b/R/dataclust_PA.R
@@ -8,7 +8,7 @@
#' apoptosis & cell death, cell cycle & DNA damage, immune response &
#' inflammation, and metabolism. Gene set names follow standard database
#' conventions (`KEGG_`, `HALLMARK_`, `GO_`) and gene symbols are real human
-#' genes. Designed to be used as input to [calc_jaccard()].
+#' genes. Designed to be used as input to [geneset_similarity()].
#'
#' @format A named list of 40 elements. Each element is a character vector of
#' human gene symbols (HGNC) belonging to that gene set. Gene set sizes range
@@ -26,11 +26,11 @@
#' # Inspect one gene set
#' geneset_list[["KEGG_APOPTOSIS"]]
#'
-#' # Use with calc_jaccard()
+#' # Use with geneset_similarity()
#' data(camera_results)
-#' jac <- calc_jaccard(geneset_list, camera_results, fdr_th = 0.05)
+#' jac <- geneset_similarity(geneset_list, camera_results, fdr_th = 0.05)
#'
-#' @seealso [calc_jaccard()], [camera_results]
+#' @seealso [geneset_similarity()], [camera_results]
"geneset_list"
@@ -40,7 +40,7 @@
#' analysis, containing significance values for the 40 gene sets in
#' [geneset_list]. Approximately 60% of gene sets have FDR < 0.05, providing
#' enough significant sets for meaningful clustering. Designed to be used
-#' alongside [geneset_list] as input to [calc_jaccard()].
+#' alongside [geneset_list] as input to [geneset_similarity()].
#'
#' @format A data frame with 40 rows and 4 columns:
#' \describe{
@@ -63,9 +63,9 @@
#' # How many gene sets are significant?
#' sum(camera_results$FDR < 0.05)
#'
-#' # Use with calc_jaccard()
+#' # Use with geneset_similarity()
#' data(geneset_list)
-#' jac <- calc_jaccard(geneset_list, camera_results, fdr_th = 0.05)
+#' jac <- geneset_similarity(geneset_list, camera_results, fdr_th = 0.05)
#'
-#' @seealso [calc_jaccard()], [geneset_list]
+#' @seealso [geneset_similarity()], [geneset_list]
"camera_results"
diff --git a/R/doclust_PA.R b/R/doclust_PA.R
index ba3e7db..485e190 100644
--- a/R/doclust_PA.R
+++ b/R/doclust_PA.R
@@ -4,14 +4,14 @@
# community detection, and super-term generation.
#
# Functions:
-# calc_jaccard — Compute Jaccard similarity & distance matrices
+# geneset_similarity — Compute Jaccard similarity & distance matrices
# do_clust — Hierarchical clustering with silhouette selection
# get_superterm — TF-IDF super-term labels for gene set communities
# get_network_communities — Community detection + super-terms in one call
# =============================================================================
########################
-# Function calc_jaccard #
+# Function geneset_similarity #
########################
#' Compute Jaccard similarity and distance matrices for gene sets
@@ -51,7 +51,7 @@
#' )
#'
#' # Only the first three gene sets pass the FDR threshold
-#' jac <- calc_jaccard(geneset_list, results, fdr_th = 0.05)
+#' jac <- geneset_similarity(geneset_list, results, fdr_th = 0.05)
#'
#' jac$jaccard_sim # similarity matrix
#' jac$dist_mat # distance object (usable in UMAP, clustering, etc.)
@@ -63,7 +63,7 @@
#' @importFrom rlang .data
#' @export
-calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) {
+geneset_similarity <- function(geneset_list, results, fdr_th = 0.05) {
if (!is.list(geneset_list) || is.null(names(geneset_list))) {
stop("`geneset_list` must be a named list of character vectors.", call. = FALSE)
@@ -136,7 +136,7 @@ calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) {
#' returns cluster assignments, a silhouette ggplot2 object, and a
#' ComplexHeatmap with dendrogram.
#'
-#' @param x A `JaccardResult` object (output of [calc_jaccard()]) or an
+#' @param x A `JaccardResult` object (output of [geneset_similarity()]) or an
#' object of class `dist`.
#' @param method Agglomeration method passed to [stats::hclust()].
#' Default: `"ward.D2"`.
@@ -168,7 +168,7 @@ calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) {
#' FDR = c(0.01, 0.02, 0.03, 0.04, 0.01)
#' )
#'
-#' jac <- calc_jaccard(geneset_list, results)
+#' jac <- geneset_similarity(geneset_list, results)
#' clust <- do_clust(jac)
#'
#' clust$silhouette_plot # ggplot2 silhouette curve
@@ -177,7 +177,7 @@ calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) {
#' clust$cluster_assignments # tibble: NAME | cluster
#' }
#'
-#' @seealso [calc_jaccard()], [get_network_communities()],
+#' @seealso [geneset_similarity()], [get_network_communities()],
#' [network_clust()], [network_clust_gg()]
#' @import ggplot2
#' @importFrom rlang .data
@@ -204,7 +204,7 @@ do_clust <- function(x, method = "ward.D2", max_k = NULL) {
jaccard_sim <- 1 - as.matrix(x)
} else {
stop(
- "`x` must be a `JaccardResult` object (output of `calc_jaccard()`) ",
+ "`x` must be a `JaccardResult` object (output of `geneset_similarity()`) ",
"or an object of class `dist`.",
call. = FALSE
)
@@ -471,10 +471,10 @@ get_superterm <- function(geneset_names, community_membership,
#' Convenience wrapper that builds a binary adjacency network from a Jaccard
#' similarity matrix, runs a community-detection algorithm, and optionally
#' generates super-term labels for each community via [get_superterm()].
-#' Designed to be the single step between [calc_jaccard()] and the network
+#' Designed to be the single step between [geneset_similarity()] and the network
#' plotting functions [network_clust()] / [network_clust_gg()].
#'
-#' @param x A `JaccardResult` object (output of [calc_jaccard()]).
+#' @param x A `JaccardResult` object (output of [geneset_similarity()]).
#' @param threshold Numeric between 0 and 1. Gene set pairs with a Jaccard
#' similarity above this value are connected in the network. Default: `0.3`.
#' @param method Character. Community detection algorithm to use. One of:
@@ -506,7 +506,7 @@ get_superterm <- function(geneset_names, community_membership,
#' res <- read.csv("path/to/results.csv")
#'
#' # Full workflow
-#' jac <- calc_jaccard(gsl, res, fdr_th = 0.05)
+#' jac <- geneset_similarity(gsl, res, fdr_th = 0.05)
#' clust <- do_clust(jac)
#' net <- get_network_communities(jac, threshold = 0.3, method = "louvain")
#'
@@ -524,7 +524,7 @@ get_superterm <- function(geneset_names, community_membership,
#' plots$combined
#' }
#'
-#' @seealso [calc_jaccard()], [do_clust()], [get_superterm()],
+#' @seealso [geneset_similarity()], [do_clust()], [get_superterm()],
#' [network_clust()], [network_clust_gg()]
#' @importFrom magrittr %>%
#' @export
@@ -542,7 +542,7 @@ get_network_communities <- function(x,
}
if (!inherits(x, "JaccardResult")) {
stop(
- "`x` must be a `JaccardResult` object (output of `calc_jaccard()`).",
+ "`x` must be a `JaccardResult` object (output of `geneset_similarity()`).",
call. = FALSE
)
}
diff --git a/R/list_gmts.R b/R/list_gmts.R
index c5f5844..1368364 100644
--- a/R/list_gmts.R
+++ b/R/list_gmts.R
@@ -6,7 +6,7 @@
#'
#' Scans a directory for `.gmt` files, parses them, and returns a single named
#' list where each element is a character vector of gene symbols for one gene
-#' set. The output is ready to be passed directly to [calc_jaccard()].
+#' set. The output is ready to be passed directly to [geneset_similarity()].
#'
#' **GMT format:** each row contains the gene set name in column 1, an optional
#' description in column 2, and gene symbols from column 3 onward. Empty fields
@@ -31,11 +31,11 @@
#' names(geneset_list)[1:5] # first five gene set names
#' geneset_list[["KEGG_APOPTOSIS"]] # genes in a specific set
#'
-#' # Pass directly to calc_jaccard
-#' jac <- calc_jaccard(geneset_list, results_df, fdr_th = 0.05)
+#' # Pass directly to geneset_similarity
+#' jac <- geneset_similarity(geneset_list, results_df, fdr_th = 0.05)
#' }
#'
-#' @seealso [calc_jaccard()]
+#' @seealso [geneset_similarity()]
#' @export
list_gmts <- function(dir) {
diff --git a/R/plotclust_PA.R b/R/plotclust_PA.R
index d66a5f5..ebd333c 100644
--- a/R/plotclust_PA.R
+++ b/R/plotclust_PA.R
@@ -22,7 +22,7 @@
#' For a ggplot2-based version that returns plot objects instead of drawing
#' them, see [network_clust_gg()].
#'
-#' @param x A `JaccardResult` object (output of [calc_jaccard()]).
+#' @param x A `JaccardResult` object (output of [geneset_similarity()]).
#' @param clust_result A list returned by [do_clust()], used to color nodes by
#' hierarchical cluster assignment.
#' @param jaccard_threshold Numeric. Minimum Jaccard similarity required for an
@@ -61,7 +61,7 @@
#' gsl <- list_gmts("path/to/gmt_folder/")
#' res <- read.csv("path/to/results.csv")
#'
-#' jac <- calc_jaccard(gsl, res, fdr_th = 0.05)
+#' jac <- geneset_similarity(gsl, res, fdr_th = 0.05)
#' clust <- do_clust(jac)
#' net <- get_network_communities(jac, threshold = 0.3)
#'
@@ -89,7 +89,7 @@
#' dev.off()
#' }
#'
-#' @seealso [calc_jaccard()], [do_clust()], [get_network_communities()],
+#' @seealso [geneset_similarity()], [do_clust()], [get_network_communities()],
#' [get_superterm()], [network_clust_gg()]
#' @importFrom magrittr %>%
#' @importFrom rlang .data
@@ -110,7 +110,7 @@ network_clust <- function(x, clust_result,
# --- Input validation ---------------------------------------------------
if (!inherits(x, "JaccardResult")) {
stop(
- "`x` must be a `JaccardResult` object (output of `calc_jaccard()`).",
+ "`x` must be a `JaccardResult` object (output of `geneset_similarity()`).",
call. = FALSE
)
}
@@ -317,7 +317,7 @@ network_clust <- function(x, clust_result,
#' For a base R igraph version that draws directly to the active graphics
#' device, see [network_clust()].
#'
-#' @param x A `JaccardResult` object (output of [calc_jaccard()]).
+#' @param x A `JaccardResult` object (output of [geneset_similarity()]).
#' @param clust_result A list returned by [do_clust()], used to color nodes by
#' hierarchical cluster assignment.
#' @param jaccard_threshold Numeric. Minimum Jaccard similarity required for an
@@ -356,7 +356,7 @@ network_clust <- function(x, clust_result,
#' gsl <- list_gmts("path/to/gmt_folder/")
#' res <- read.csv("path/to/results.csv")
#'
-#' jac <- calc_jaccard(gsl, res, fdr_th = 0.05)
+#' jac <- geneset_similarity(gsl, res, fdr_th = 0.05)
#' clust <- do_clust(jac)
#' net <- get_network_communities(jac, threshold = 0.3)
#'
@@ -389,7 +389,7 @@ network_clust <- function(x, clust_result,
#' plots$clean + plots$superterms
#' }
#'
-#' @seealso [calc_jaccard()], [do_clust()], [get_network_communities()],
+#' @seealso [geneset_similarity()], [do_clust()], [get_network_communities()],
#' [get_superterm()], [network_clust()]
#' @import ggplot2
#' @importFrom magrittr %>%
@@ -417,7 +417,7 @@ network_clust_gg <- function(x, clust_result,
# --- Input validation ---------------------------------------------------
if (!inherits(x, "JaccardResult")) {
stop(
- "`x` must be a `JaccardResult` object (output of `calc_jaccard()`).",
+ "`x` must be a `JaccardResult` object (output of `geneset_similarity()`).",
call. = FALSE
)
}
diff --git a/_pkgdown.yml b/_pkgdown.yml
new file mode 100644
index 0000000..33e8d42
--- /dev/null
+++ b/_pkgdown.yml
@@ -0,0 +1,144 @@
+package: OmicsKit
+title: "OmicsKit: A bioinformatics toolset for multiomics analysis"
+url: https://danielgarbozo.github.io/OmicsKit
+
+navbar:
+ structure:
+ left: [reference, articles]
+ right: [github]
+ components:
+ github:
+ icon: fab fa-github
+ aria-label: GitHub
+ href: https://github.com/BigMindLab/OmicsKit
+
+home:
+ title: "OmicsKit"
+ description: >
+ Streamlined utilities for multi-omics analysis and publication-ready visuals.
+ Covers DEA, pathway analysis, dimensionality reduction, and more.
+ links:
+ - text: DEA Workflow
+ href: articles/DEA_workflow.html
+ - text: Pathway Analysis Workflow
+ href: articles/PA_workflow.html
+ - text: PA Clustering Workflow
+ href: articles/PA_clustering.html
+ sidebar:
+ structure: [links, license, authors, dev]
+
+authors:
+ David Requena:
+ href: https://orcid.org/0000-0002-5968-1133
+ Daniel Guevara:
+ href: https://orcid.org/0009-0001-2786-8729
+ Daniel Garbozo:
+ href: https://orcid.org/0009-0003-2495-6568
+ Angela Alarcon:
+ href: https://orcid.org/0000-0003-0293-5603
+
+
+footer:
+ structure:
+ left: [developed_by]
+ right: [built_with]
+ components:
+ developed_by: |
+ Developed by the
+ BigMind Lab.
+ Contact: david.requena@nyulangone.org
+
+reference:
+ - title: "Differential Expression Analysis (DEA)"
+ desc: >
+ Functions for data quality control, normalization, dimensionality reduction,
+ annotation, and differential expression visualization.
+ contents:
+ - power_analysis
+ - tpm
+ - nice_PCA
+ - nice_UMAP
+ - nice_tSNE
+ - get_annotations
+ - add_annotations
+ - save_results
+ - split_cases
+ - nice_Volcano
+ - detect_filter
+ - get_stars
+ - nice_VSB
+ - nice_VSB_DEseq2
+
+ - title: "Genomics"
+ desc: "Survival analysis and genomics visualization utilities."
+ contents:
+ - nice_KM
+
+ - title: "Pathway Analysis (PA)"
+ desc: >
+ Tools for loading, merging, and visualizing GSEA / pathway analysis results,
+ including single- and multi-comparison plots and heatmaps.
+ contents:
+ - list_gmts
+ - merge_PA
+ - getgenesPA
+ - addgenesPA
+ - multiplot_PA
+ - splot_PA
+ - heatmap_PA
+ - heatmap_path_PA
+
+ - title: "PA Clustering"
+ desc: >
+ Pathway clustering via Jaccard similarity, hierarchical clustering,
+ and network-based community detection.
+ contents:
+ - geneset_similarity
+ - do_clust
+ - get_superterm
+ - get_network_communities
+ - network_clust
+ - network_clust_gg
+ - title: "Example datasets"
+ desc: "Built-in datasets for reproducible examples and vignettes."
+ contents:
+ - camera_results
+ - deseq2_results
+ - geneset_list
+ - gsea_results
+ - norm_counts
+ - raw_counts
+ - sampledata
+ - vst_counts
+
+articles:
+ - title: "Workflows"
+ desc: "End-to-end analysis examples with OmicsKit."
+ contents:
+ - DEA_workflow
+ - PA_workflow
+ - PA_clustering
+
+template:
+ bootstrap: 5
+ bootswatch: cosmo
+ bslib:
+ primary: "#6000C6"
+ secondary: "#B3A3FF"
+ body-bg: "#FFFFFF"
+ navbar-bg: "#6000C6"
+ navbar-fg: "#FFFFFF"
+ link-color: "#6000C6"
+ pre-bg: "#F7F7FB"
+ includes:
+ in_header: |
+
+
+development:
+ mode: release
+
+toc:
+ depth: 2
diff --git a/data-raw/example_PA.R b/data-raw/example_PA.R
index 2108929..e8c0ace 100644
--- a/data-raw/example_PA.R
+++ b/data-raw/example_PA.R
@@ -6,7 +6,7 @@
# geneset_list
# A named list of 40 curated gene sets with realistic KEGG / HALLMARK / GO
# naming conventions and real human gene symbols, grouped into four biological
-# themes so that calc_jaccard() + do_clust() + get_network_communities()
+# themes so that geneset_similarity() + do_clust() + get_network_communities()
# produce meaningful clustering results.
#
# Themes:
diff --git a/man/camera_results.Rd b/man/camera_results.Rd
index b374a08..c1635c2 100644
--- a/man/camera_results.Rd
+++ b/man/camera_results.Rd
@@ -26,7 +26,7 @@ A data frame simulating the output of a CAMERA differential expression
analysis, containing significance values for the 40 gene sets in
\link{geneset_list}. Approximately 60\% of gene sets have FDR < 0.05, providing
enough significant sets for meaningful clustering. Designed to be used
-alongside \link{geneset_list} as input to \code{\link[=calc_jaccard]{calc_jaccard()}}.
+alongside \link{geneset_list} as input to \code{\link[=geneset_similarity]{geneset_similarity()}}.
}
\examples{
data(camera_results)
@@ -37,12 +37,12 @@ head(camera_results)
# How many gene sets are significant?
sum(camera_results$FDR < 0.05)
-# Use with calc_jaccard()
+# Use with geneset_similarity()
data(geneset_list)
-jac <- calc_jaccard(geneset_list, camera_results, fdr_th = 0.05)
+jac <- geneset_similarity(geneset_list, camera_results, fdr_th = 0.05)
}
\seealso{
-\code{\link[=calc_jaccard]{calc_jaccard()}}, \link{geneset_list}
+\code{\link[=geneset_similarity]{geneset_similarity()}}, \link{geneset_list}
}
\keyword{datasets}
diff --git a/man/do_clust.Rd b/man/do_clust.Rd
index 2be3419..fb97bb8 100644
--- a/man/do_clust.Rd
+++ b/man/do_clust.Rd
@@ -7,7 +7,7 @@
do_clust(x, method = "ward.D2", max_k = NULL)
}
\arguments{
-\item{x}{A \code{JaccardResult} object (output of \code{\link[=calc_jaccard]{calc_jaccard()}}) or an
+\item{x}{A \code{JaccardResult} object (output of \code{\link[=geneset_similarity]{geneset_similarity()}}) or an
object of class \code{dist}.}
\item{method}{Agglomeration method passed to \code{\link[stats:hclust]{stats::hclust()}}.
@@ -50,7 +50,7 @@ results <- data.frame(
FDR = c(0.01, 0.02, 0.03, 0.04, 0.01)
)
-jac <- calc_jaccard(geneset_list, results)
+jac <- geneset_similarity(geneset_list, results)
clust <- do_clust(jac)
clust$silhouette_plot # ggplot2 silhouette curve
@@ -61,6 +61,6 @@ clust$cluster_assignments # tibble: NAME | cluster
}
\seealso{
-\code{\link[=calc_jaccard]{calc_jaccard()}}, \code{\link[=get_network_communities]{get_network_communities()}},
+\code{\link[=geneset_similarity]{geneset_similarity()}}, \code{\link[=get_network_communities]{get_network_communities()}},
\code{\link[=network_clust]{network_clust()}}, \code{\link[=network_clust_gg]{network_clust_gg()}}
}
diff --git a/man/geneset_list.Rd b/man/geneset_list.Rd
index 01f45a1..00ae84d 100644
--- a/man/geneset_list.Rd
+++ b/man/geneset_list.Rd
@@ -21,7 +21,7 @@ A named list of 40 curated gene sets spanning four biological themes:
apoptosis & cell death, cell cycle & DNA damage, immune response &
inflammation, and metabolism. Gene set names follow standard database
conventions (\code{KEGG_}, \code{HALLMARK_}, \code{GO_}) and gene symbols are real human
-genes. Designed to be used as input to \code{\link[=calc_jaccard]{calc_jaccard()}}.
+genes. Designed to be used as input to \code{\link[=geneset_similarity]{geneset_similarity()}}.
}
\examples{
data(geneset_list)
@@ -32,12 +32,12 @@ length(geneset_list)
# Inspect one gene set
geneset_list[["KEGG_APOPTOSIS"]]
-# Use with calc_jaccard()
+# Use with geneset_similarity()
data(camera_results)
-jac <- calc_jaccard(geneset_list, camera_results, fdr_th = 0.05)
+jac <- geneset_similarity(geneset_list, camera_results, fdr_th = 0.05)
}
\seealso{
-\code{\link[=calc_jaccard]{calc_jaccard()}}, \link{camera_results}
+\code{\link[=geneset_similarity]{geneset_similarity()}}, \link{camera_results}
}
\keyword{datasets}
diff --git a/man/calc_jaccard.Rd b/man/geneset_similarity.Rd
similarity index 93%
rename from man/calc_jaccard.Rd
rename to man/geneset_similarity.Rd
index d6c54e3..573aabd 100644
--- a/man/calc_jaccard.Rd
+++ b/man/geneset_similarity.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/doclust_PA.R
-\name{calc_jaccard}
-\alias{calc_jaccard}
+\name{geneset_similarity}
+\alias{geneset_similarity}
\title{Compute Jaccard similarity and distance matrices for gene sets}
\usage{
-calc_jaccard(geneset_list, results, fdr_th = 0.05)
+geneset_similarity(geneset_list, results, fdr_th = 0.05)
}
\arguments{
\item{geneset_list}{A named list where each element is a character vector of
@@ -48,7 +48,7 @@ results <- data.frame(
)
# Only the first three gene sets pass the FDR threshold
-jac <- calc_jaccard(geneset_list, results, fdr_th = 0.05)
+jac <- geneset_similarity(geneset_list, results, fdr_th = 0.05)
jac$jaccard_sim # similarity matrix
jac$dist_mat # distance object (usable in UMAP, clustering, etc.)
diff --git a/man/get_network_communities.Rd b/man/get_network_communities.Rd
index 9e3813d..effc3eb 100644
--- a/man/get_network_communities.Rd
+++ b/man/get_network_communities.Rd
@@ -15,7 +15,7 @@ get_network_communities(
)
}
\arguments{
-\item{x}{A \code{JaccardResult} object (output of \code{\link[=calc_jaccard]{calc_jaccard()}}).}
+\item{x}{A \code{JaccardResult} object (output of \code{\link[=geneset_similarity]{geneset_similarity()}}).}
\item{threshold}{Numeric between 0 and 1. Gene set pairs with a Jaccard
similarity above this value are connected in the network. Default: \code{0.3}.}
@@ -56,7 +56,7 @@ A named list with four elements:
Convenience wrapper that builds a binary adjacency network from a Jaccard
similarity matrix, runs a community-detection algorithm, and optionally
generates super-term labels for each community via \code{\link[=get_superterm]{get_superterm()}}.
-Designed to be the single step between \code{\link[=calc_jaccard]{calc_jaccard()}} and the network
+Designed to be the single step between \code{\link[=geneset_similarity]{geneset_similarity()}} and the network
plotting functions \code{\link[=network_clust]{network_clust()}} / \code{\link[=network_clust_gg]{network_clust_gg()}}.
}
\examples{
@@ -65,7 +65,7 @@ gsl <- list_gmts("path/to/gmt_folder/")
res <- read.csv("path/to/results.csv")
# Full workflow
-jac <- calc_jaccard(gsl, res, fdr_th = 0.05)
+jac <- geneset_similarity(gsl, res, fdr_th = 0.05)
clust <- do_clust(jac)
net <- get_network_communities(jac, threshold = 0.3, method = "louvain")
@@ -85,6 +85,6 @@ plots$combined
}
\seealso{
-\code{\link[=calc_jaccard]{calc_jaccard()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_superterm]{get_superterm()}},
+\code{\link[=geneset_similarity]{geneset_similarity()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_superterm]{get_superterm()}},
\code{\link[=network_clust]{network_clust()}}, \code{\link[=network_clust_gg]{network_clust_gg()}}
}
diff --git a/man/list_gmts.Rd b/man/list_gmts.Rd
index de832ee..77ddc64 100644
--- a/man/list_gmts.Rd
+++ b/man/list_gmts.Rd
@@ -19,7 +19,7 @@ last occurrence overwrites the earlier one.
\description{
Scans a directory for \code{.gmt} files, parses them, and returns a single named
list where each element is a character vector of gene symbols for one gene
-set. The output is ready to be passed directly to \code{\link[=calc_jaccard]{calc_jaccard()}}.
+set. The output is ready to be passed directly to \code{\link[=geneset_similarity]{geneset_similarity()}}.
}
\details{
\strong{GMT format:} each row contains the gene set name in column 1, an optional
@@ -37,11 +37,11 @@ length(geneset_list) # number of gene sets
names(geneset_list)[1:5] # first five gene set names
geneset_list[["KEGG_APOPTOSIS"]] # genes in a specific set
-# Pass directly to calc_jaccard
-jac <- calc_jaccard(geneset_list, results_df, fdr_th = 0.05)
+# Pass directly to geneset_similarity
+jac <- geneset_similarity(geneset_list, results_df, fdr_th = 0.05)
}
}
\seealso{
-\code{\link[=calc_jaccard]{calc_jaccard()}}
+\code{\link[=geneset_similarity]{geneset_similarity()}}
}
diff --git a/man/network_clust.Rd b/man/network_clust.Rd
index cb85245..8931d94 100644
--- a/man/network_clust.Rd
+++ b/man/network_clust.Rd
@@ -16,7 +16,7 @@ network_clust(
)
}
\arguments{
-\item{x}{A \code{JaccardResult} object (output of \code{\link[=calc_jaccard]{calc_jaccard()}}).}
+\item{x}{A \code{JaccardResult} object (output of \code{\link[=geneset_similarity]{geneset_similarity()}}).}
\item{clust_result}{A list returned by \code{\link[=do_clust]{do_clust()}}, used to color nodes by
hierarchical cluster assignment.}
@@ -78,7 +78,7 @@ them, see \code{\link[=network_clust_gg]{network_clust_gg()}}.
gsl <- list_gmts("path/to/gmt_folder/")
res <- read.csv("path/to/results.csv")
-jac <- calc_jaccard(gsl, res, fdr_th = 0.05)
+jac <- geneset_similarity(gsl, res, fdr_th = 0.05)
clust <- do_clust(jac)
net <- get_network_communities(jac, threshold = 0.3)
@@ -108,6 +108,6 @@ dev.off()
}
\seealso{
-\code{\link[=calc_jaccard]{calc_jaccard()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_network_communities]{get_network_communities()}},
+\code{\link[=geneset_similarity]{geneset_similarity()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_network_communities]{get_network_communities()}},
\code{\link[=get_superterm]{get_superterm()}}, \code{\link[=network_clust_gg]{network_clust_gg()}}
}
diff --git a/man/network_clust_gg.Rd b/man/network_clust_gg.Rd
index 3f6bf93..165e8de 100644
--- a/man/network_clust_gg.Rd
+++ b/man/network_clust_gg.Rd
@@ -16,7 +16,7 @@ network_clust_gg(
)
}
\arguments{
-\item{x}{A \code{JaccardResult} object (output of \code{\link[=calc_jaccard]{calc_jaccard()}}).}
+\item{x}{A \code{JaccardResult} object (output of \code{\link[=geneset_similarity]{geneset_similarity()}}).}
\item{clust_result}{A list returned by \code{\link[=do_clust]{do_clust()}}, used to color nodes by
hierarchical cluster assignment.}
@@ -81,7 +81,7 @@ device, see \code{\link[=network_clust]{network_clust()}}.
gsl <- list_gmts("path/to/gmt_folder/")
res <- read.csv("path/to/results.csv")
-jac <- calc_jaccard(gsl, res, fdr_th = 0.05)
+jac <- geneset_similarity(gsl, res, fdr_th = 0.05)
clust <- do_clust(jac)
net <- get_network_communities(jac, threshold = 0.3)
@@ -116,6 +116,6 @@ plots$clean + plots$superterms
}
\seealso{
-\code{\link[=calc_jaccard]{calc_jaccard()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_network_communities]{get_network_communities()}},
+\code{\link[=geneset_similarity]{geneset_similarity()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_network_communities]{get_network_communities()}},
\code{\link[=get_superterm]{get_superterm()}}, \code{\link[=network_clust]{network_clust()}}
}
diff --git a/vignettes/pathway_analysis_clustering.Rmd b/vignettes/PA_clustering.Rmd
similarity index 97%
rename from vignettes/pathway_analysis_clustering.Rmd
rename to vignettes/PA_clustering.Rmd
index a79dee0..cb48f83 100644
--- a/vignettes/pathway_analysis_clustering.Rmd
+++ b/vignettes/PA_clustering.Rmd
@@ -27,7 +27,7 @@ vignette demonstrates how to use OmicsKit's pathway clustering functions to:
1. **Load gene sets** from GMT files with `list_gmts()`
2. **Quantify redundancy** between gene sets using Jaccard similarity with
- `calc_jaccard()`
+ `geneset_similarity()`
3. **Cluster** redundant gene sets hierarchically with `do_clust()`
4. **Detect communities** in the gene set network and generate interpretable
labels with `get_network_communities()`
@@ -77,7 +77,7 @@ sum(camera_results$FDR < 0.05)
## Step 2 — Jaccard similarity matrix
-`calc_jaccard()` filters the gene sets by FDR threshold and computes all
+`geneset_similarity()` filters the gene sets by FDR threshold and computes all
pairwise Jaccard similarity coefficients:
$$J(A, B) = \frac{|A \cap B|}{|A \cup B|}$$
@@ -85,8 +85,8 @@ $$J(A, B) = \frac{|A \cap B|}{|A \cup B|}$$
A value of 1 means the two gene sets share identical gene membership; 0 means
no overlap at all.
-```{r calc_jaccard}
-jac <- calc_jaccard(
+```{r geneset_similarity}
+jac <- geneset_similarity(
geneset_list = geneset_list,
results = camera_results,
fdr_th = 0.05
@@ -291,7 +291,7 @@ library(OmicsKit)
gsl <- list_gmts("path/to/gmt_folder/")
# 2. Jaccard similarity (filter by FDR < 0.05)
-jac <- calc_jaccard(gsl, camera_results, fdr_th = 0.05)
+jac <- geneset_similarity(gsl, camera_results, fdr_th = 0.05)
# 3. Hierarchical clustering
clust <- do_clust(jac)