diff --git a/.Rbuildignore b/.Rbuildignore index b7d3e72..c57d8d6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,6 @@ ^\.github$ ^data-raw$ ^vignettes/prerender_figures\.R$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ diff --git a/.gitignore b/.gitignore index 0126ae9..90cd220 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,4 @@ rsconnect/ .DS_Store .quarto inst/doc +docs diff --git a/DESCRIPTION b/DESCRIPTION index ce290a4..177895e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -62,4 +62,5 @@ Depends: LazyData: true LazyDataCompression: xz VignetteBuilder: knitr +URL: https://bigmindlab.github.io/OmicsKit diff --git a/NAMESPACE b/NAMESPACE index d7bc4fc..44d21fc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,9 +2,9 @@ export(add_annotations) export(addgenesPA) -export(calc_jaccard) export(detect_filter) export(do_clust) +export(geneset_similarity) export(get_annotations) export(get_network_communities) export(get_stars) diff --git a/R/dataclust_PA.R b/R/dataclust_PA.R index f3fb3bb..ff04feb 100644 --- a/R/dataclust_PA.R +++ b/R/dataclust_PA.R @@ -8,7 +8,7 @@ #' apoptosis & cell death, cell cycle & DNA damage, immune response & #' inflammation, and metabolism. Gene set names follow standard database #' conventions (`KEGG_`, `HALLMARK_`, `GO_`) and gene symbols are real human -#' genes. Designed to be used as input to [calc_jaccard()]. +#' genes. Designed to be used as input to [geneset_similarity()]. #' #' @format A named list of 40 elements. Each element is a character vector of #' human gene symbols (HGNC) belonging to that gene set. Gene set sizes range @@ -26,11 +26,11 @@ #' # Inspect one gene set #' geneset_list[["KEGG_APOPTOSIS"]] #' -#' # Use with calc_jaccard() +#' # Use with geneset_similarity() #' data(camera_results) -#' jac <- calc_jaccard(geneset_list, camera_results, fdr_th = 0.05) +#' jac <- geneset_similarity(geneset_list, camera_results, fdr_th = 0.05) #' -#' @seealso [calc_jaccard()], [camera_results] +#' @seealso [geneset_similarity()], [camera_results] "geneset_list" @@ -40,7 +40,7 @@ #' analysis, containing significance values for the 40 gene sets in #' [geneset_list]. Approximately 60% of gene sets have FDR < 0.05, providing #' enough significant sets for meaningful clustering. Designed to be used -#' alongside [geneset_list] as input to [calc_jaccard()]. +#' alongside [geneset_list] as input to [geneset_similarity()]. #' #' @format A data frame with 40 rows and 4 columns: #' \describe{ @@ -63,9 +63,9 @@ #' # How many gene sets are significant? #' sum(camera_results$FDR < 0.05) #' -#' # Use with calc_jaccard() +#' # Use with geneset_similarity() #' data(geneset_list) -#' jac <- calc_jaccard(geneset_list, camera_results, fdr_th = 0.05) +#' jac <- geneset_similarity(geneset_list, camera_results, fdr_th = 0.05) #' -#' @seealso [calc_jaccard()], [geneset_list] +#' @seealso [geneset_similarity()], [geneset_list] "camera_results" diff --git a/R/doclust_PA.R b/R/doclust_PA.R index ba3e7db..485e190 100644 --- a/R/doclust_PA.R +++ b/R/doclust_PA.R @@ -4,14 +4,14 @@ # community detection, and super-term generation. # # Functions: -# calc_jaccard — Compute Jaccard similarity & distance matrices +# geneset_similarity — Compute Jaccard similarity & distance matrices # do_clust — Hierarchical clustering with silhouette selection # get_superterm — TF-IDF super-term labels for gene set communities # get_network_communities — Community detection + super-terms in one call # ============================================================================= ######################## -# Function calc_jaccard # +# Function geneset_similarity # ######################## #' Compute Jaccard similarity and distance matrices for gene sets @@ -51,7 +51,7 @@ #' ) #' #' # Only the first three gene sets pass the FDR threshold -#' jac <- calc_jaccard(geneset_list, results, fdr_th = 0.05) +#' jac <- geneset_similarity(geneset_list, results, fdr_th = 0.05) #' #' jac$jaccard_sim # similarity matrix #' jac$dist_mat # distance object (usable in UMAP, clustering, etc.) @@ -63,7 +63,7 @@ #' @importFrom rlang .data #' @export -calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) { +geneset_similarity <- function(geneset_list, results, fdr_th = 0.05) { if (!is.list(geneset_list) || is.null(names(geneset_list))) { stop("`geneset_list` must be a named list of character vectors.", call. = FALSE) @@ -136,7 +136,7 @@ calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) { #' returns cluster assignments, a silhouette ggplot2 object, and a #' ComplexHeatmap with dendrogram. #' -#' @param x A `JaccardResult` object (output of [calc_jaccard()]) or an +#' @param x A `JaccardResult` object (output of [geneset_similarity()]) or an #' object of class `dist`. #' @param method Agglomeration method passed to [stats::hclust()]. #' Default: `"ward.D2"`. @@ -168,7 +168,7 @@ calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) { #' FDR = c(0.01, 0.02, 0.03, 0.04, 0.01) #' ) #' -#' jac <- calc_jaccard(geneset_list, results) +#' jac <- geneset_similarity(geneset_list, results) #' clust <- do_clust(jac) #' #' clust$silhouette_plot # ggplot2 silhouette curve @@ -177,7 +177,7 @@ calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) { #' clust$cluster_assignments # tibble: NAME | cluster #' } #' -#' @seealso [calc_jaccard()], [get_network_communities()], +#' @seealso [geneset_similarity()], [get_network_communities()], #' [network_clust()], [network_clust_gg()] #' @import ggplot2 #' @importFrom rlang .data @@ -204,7 +204,7 @@ do_clust <- function(x, method = "ward.D2", max_k = NULL) { jaccard_sim <- 1 - as.matrix(x) } else { stop( - "`x` must be a `JaccardResult` object (output of `calc_jaccard()`) ", + "`x` must be a `JaccardResult` object (output of `geneset_similarity()`) ", "or an object of class `dist`.", call. = FALSE ) @@ -471,10 +471,10 @@ get_superterm <- function(geneset_names, community_membership, #' Convenience wrapper that builds a binary adjacency network from a Jaccard #' similarity matrix, runs a community-detection algorithm, and optionally #' generates super-term labels for each community via [get_superterm()]. -#' Designed to be the single step between [calc_jaccard()] and the network +#' Designed to be the single step between [geneset_similarity()] and the network #' plotting functions [network_clust()] / [network_clust_gg()]. #' -#' @param x A `JaccardResult` object (output of [calc_jaccard()]). +#' @param x A `JaccardResult` object (output of [geneset_similarity()]). #' @param threshold Numeric between 0 and 1. Gene set pairs with a Jaccard #' similarity above this value are connected in the network. Default: `0.3`. #' @param method Character. Community detection algorithm to use. One of: @@ -506,7 +506,7 @@ get_superterm <- function(geneset_names, community_membership, #' res <- read.csv("path/to/results.csv") #' #' # Full workflow -#' jac <- calc_jaccard(gsl, res, fdr_th = 0.05) +#' jac <- geneset_similarity(gsl, res, fdr_th = 0.05) #' clust <- do_clust(jac) #' net <- get_network_communities(jac, threshold = 0.3, method = "louvain") #' @@ -524,7 +524,7 @@ get_superterm <- function(geneset_names, community_membership, #' plots$combined #' } #' -#' @seealso [calc_jaccard()], [do_clust()], [get_superterm()], +#' @seealso [geneset_similarity()], [do_clust()], [get_superterm()], #' [network_clust()], [network_clust_gg()] #' @importFrom magrittr %>% #' @export @@ -542,7 +542,7 @@ get_network_communities <- function(x, } if (!inherits(x, "JaccardResult")) { stop( - "`x` must be a `JaccardResult` object (output of `calc_jaccard()`).", + "`x` must be a `JaccardResult` object (output of `geneset_similarity()`).", call. = FALSE ) } diff --git a/R/list_gmts.R b/R/list_gmts.R index c5f5844..1368364 100644 --- a/R/list_gmts.R +++ b/R/list_gmts.R @@ -6,7 +6,7 @@ #' #' Scans a directory for `.gmt` files, parses them, and returns a single named #' list where each element is a character vector of gene symbols for one gene -#' set. The output is ready to be passed directly to [calc_jaccard()]. +#' set. The output is ready to be passed directly to [geneset_similarity()]. #' #' **GMT format:** each row contains the gene set name in column 1, an optional #' description in column 2, and gene symbols from column 3 onward. Empty fields @@ -31,11 +31,11 @@ #' names(geneset_list)[1:5] # first five gene set names #' geneset_list[["KEGG_APOPTOSIS"]] # genes in a specific set #' -#' # Pass directly to calc_jaccard -#' jac <- calc_jaccard(geneset_list, results_df, fdr_th = 0.05) +#' # Pass directly to geneset_similarity +#' jac <- geneset_similarity(geneset_list, results_df, fdr_th = 0.05) #' } #' -#' @seealso [calc_jaccard()] +#' @seealso [geneset_similarity()] #' @export list_gmts <- function(dir) { diff --git a/R/plotclust_PA.R b/R/plotclust_PA.R index d66a5f5..ebd333c 100644 --- a/R/plotclust_PA.R +++ b/R/plotclust_PA.R @@ -22,7 +22,7 @@ #' For a ggplot2-based version that returns plot objects instead of drawing #' them, see [network_clust_gg()]. #' -#' @param x A `JaccardResult` object (output of [calc_jaccard()]). +#' @param x A `JaccardResult` object (output of [geneset_similarity()]). #' @param clust_result A list returned by [do_clust()], used to color nodes by #' hierarchical cluster assignment. #' @param jaccard_threshold Numeric. Minimum Jaccard similarity required for an @@ -61,7 +61,7 @@ #' gsl <- list_gmts("path/to/gmt_folder/") #' res <- read.csv("path/to/results.csv") #' -#' jac <- calc_jaccard(gsl, res, fdr_th = 0.05) +#' jac <- geneset_similarity(gsl, res, fdr_th = 0.05) #' clust <- do_clust(jac) #' net <- get_network_communities(jac, threshold = 0.3) #' @@ -89,7 +89,7 @@ #' dev.off() #' } #' -#' @seealso [calc_jaccard()], [do_clust()], [get_network_communities()], +#' @seealso [geneset_similarity()], [do_clust()], [get_network_communities()], #' [get_superterm()], [network_clust_gg()] #' @importFrom magrittr %>% #' @importFrom rlang .data @@ -110,7 +110,7 @@ network_clust <- function(x, clust_result, # --- Input validation --------------------------------------------------- if (!inherits(x, "JaccardResult")) { stop( - "`x` must be a `JaccardResult` object (output of `calc_jaccard()`).", + "`x` must be a `JaccardResult` object (output of `geneset_similarity()`).", call. = FALSE ) } @@ -317,7 +317,7 @@ network_clust <- function(x, clust_result, #' For a base R igraph version that draws directly to the active graphics #' device, see [network_clust()]. #' -#' @param x A `JaccardResult` object (output of [calc_jaccard()]). +#' @param x A `JaccardResult` object (output of [geneset_similarity()]). #' @param clust_result A list returned by [do_clust()], used to color nodes by #' hierarchical cluster assignment. #' @param jaccard_threshold Numeric. Minimum Jaccard similarity required for an @@ -356,7 +356,7 @@ network_clust <- function(x, clust_result, #' gsl <- list_gmts("path/to/gmt_folder/") #' res <- read.csv("path/to/results.csv") #' -#' jac <- calc_jaccard(gsl, res, fdr_th = 0.05) +#' jac <- geneset_similarity(gsl, res, fdr_th = 0.05) #' clust <- do_clust(jac) #' net <- get_network_communities(jac, threshold = 0.3) #' @@ -389,7 +389,7 @@ network_clust <- function(x, clust_result, #' plots$clean + plots$superterms #' } #' -#' @seealso [calc_jaccard()], [do_clust()], [get_network_communities()], +#' @seealso [geneset_similarity()], [do_clust()], [get_network_communities()], #' [get_superterm()], [network_clust()] #' @import ggplot2 #' @importFrom magrittr %>% @@ -417,7 +417,7 @@ network_clust_gg <- function(x, clust_result, # --- Input validation --------------------------------------------------- if (!inherits(x, "JaccardResult")) { stop( - "`x` must be a `JaccardResult` object (output of `calc_jaccard()`).", + "`x` must be a `JaccardResult` object (output of `geneset_similarity()`).", call. = FALSE ) } diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..33e8d42 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,144 @@ +package: OmicsKit +title: "OmicsKit: A bioinformatics toolset for multiomics analysis" +url: https://danielgarbozo.github.io/OmicsKit + +navbar: + structure: + left: [reference, articles] + right: [github] + components: + github: + icon: fab fa-github + aria-label: GitHub + href: https://github.com/BigMindLab/OmicsKit + +home: + title: "OmicsKit" + description: > + Streamlined utilities for multi-omics analysis and publication-ready visuals. + Covers DEA, pathway analysis, dimensionality reduction, and more. + links: + - text: DEA Workflow + href: articles/DEA_workflow.html + - text: Pathway Analysis Workflow + href: articles/PA_workflow.html + - text: PA Clustering Workflow + href: articles/PA_clustering.html + sidebar: + structure: [links, license, authors, dev] + +authors: + David Requena: + href: https://orcid.org/0000-0002-5968-1133 + Daniel Guevara: + href: https://orcid.org/0009-0001-2786-8729 + Daniel Garbozo: + href: https://orcid.org/0009-0003-2495-6568 + Angela Alarcon: + href: https://orcid.org/0000-0003-0293-5603 + + +footer: + structure: + left: [developed_by] + right: [built_with] + components: + developed_by: | + Developed by the + BigMind Lab. + Contact: david.requena@nyulangone.org + +reference: + - title: "Differential Expression Analysis (DEA)" + desc: > + Functions for data quality control, normalization, dimensionality reduction, + annotation, and differential expression visualization. + contents: + - power_analysis + - tpm + - nice_PCA + - nice_UMAP + - nice_tSNE + - get_annotations + - add_annotations + - save_results + - split_cases + - nice_Volcano + - detect_filter + - get_stars + - nice_VSB + - nice_VSB_DEseq2 + + - title: "Genomics" + desc: "Survival analysis and genomics visualization utilities." + contents: + - nice_KM + + - title: "Pathway Analysis (PA)" + desc: > + Tools for loading, merging, and visualizing GSEA / pathway analysis results, + including single- and multi-comparison plots and heatmaps. + contents: + - list_gmts + - merge_PA + - getgenesPA + - addgenesPA + - multiplot_PA + - splot_PA + - heatmap_PA + - heatmap_path_PA + + - title: "PA Clustering" + desc: > + Pathway clustering via Jaccard similarity, hierarchical clustering, + and network-based community detection. + contents: + - geneset_similarity + - do_clust + - get_superterm + - get_network_communities + - network_clust + - network_clust_gg + - title: "Example datasets" + desc: "Built-in datasets for reproducible examples and vignettes." + contents: + - camera_results + - deseq2_results + - geneset_list + - gsea_results + - norm_counts + - raw_counts + - sampledata + - vst_counts + +articles: + - title: "Workflows" + desc: "End-to-end analysis examples with OmicsKit." + contents: + - DEA_workflow + - PA_workflow + - PA_clustering + +template: + bootstrap: 5 + bootswatch: cosmo + bslib: + primary: "#6000C6" + secondary: "#B3A3FF" + body-bg: "#FFFFFF" + navbar-bg: "#6000C6" + navbar-fg: "#FFFFFF" + link-color: "#6000C6" + pre-bg: "#F7F7FB" + includes: + in_header: | + + +development: + mode: release + +toc: + depth: 2 diff --git a/data-raw/example_PA.R b/data-raw/example_PA.R index 2108929..e8c0ace 100644 --- a/data-raw/example_PA.R +++ b/data-raw/example_PA.R @@ -6,7 +6,7 @@ # geneset_list # A named list of 40 curated gene sets with realistic KEGG / HALLMARK / GO # naming conventions and real human gene symbols, grouped into four biological -# themes so that calc_jaccard() + do_clust() + get_network_communities() +# themes so that geneset_similarity() + do_clust() + get_network_communities() # produce meaningful clustering results. # # Themes: diff --git a/man/camera_results.Rd b/man/camera_results.Rd index b374a08..c1635c2 100644 --- a/man/camera_results.Rd +++ b/man/camera_results.Rd @@ -26,7 +26,7 @@ A data frame simulating the output of a CAMERA differential expression analysis, containing significance values for the 40 gene sets in \link{geneset_list}. Approximately 60\% of gene sets have FDR < 0.05, providing enough significant sets for meaningful clustering. Designed to be used -alongside \link{geneset_list} as input to \code{\link[=calc_jaccard]{calc_jaccard()}}. +alongside \link{geneset_list} as input to \code{\link[=geneset_similarity]{geneset_similarity()}}. } \examples{ data(camera_results) @@ -37,12 +37,12 @@ head(camera_results) # How many gene sets are significant? sum(camera_results$FDR < 0.05) -# Use with calc_jaccard() +# Use with geneset_similarity() data(geneset_list) -jac <- calc_jaccard(geneset_list, camera_results, fdr_th = 0.05) +jac <- geneset_similarity(geneset_list, camera_results, fdr_th = 0.05) } \seealso{ -\code{\link[=calc_jaccard]{calc_jaccard()}}, \link{geneset_list} +\code{\link[=geneset_similarity]{geneset_similarity()}}, \link{geneset_list} } \keyword{datasets} diff --git a/man/do_clust.Rd b/man/do_clust.Rd index 2be3419..fb97bb8 100644 --- a/man/do_clust.Rd +++ b/man/do_clust.Rd @@ -7,7 +7,7 @@ do_clust(x, method = "ward.D2", max_k = NULL) } \arguments{ -\item{x}{A \code{JaccardResult} object (output of \code{\link[=calc_jaccard]{calc_jaccard()}}) or an +\item{x}{A \code{JaccardResult} object (output of \code{\link[=geneset_similarity]{geneset_similarity()}}) or an object of class \code{dist}.} \item{method}{Agglomeration method passed to \code{\link[stats:hclust]{stats::hclust()}}. @@ -50,7 +50,7 @@ results <- data.frame( FDR = c(0.01, 0.02, 0.03, 0.04, 0.01) ) -jac <- calc_jaccard(geneset_list, results) +jac <- geneset_similarity(geneset_list, results) clust <- do_clust(jac) clust$silhouette_plot # ggplot2 silhouette curve @@ -61,6 +61,6 @@ clust$cluster_assignments # tibble: NAME | cluster } \seealso{ -\code{\link[=calc_jaccard]{calc_jaccard()}}, \code{\link[=get_network_communities]{get_network_communities()}}, +\code{\link[=geneset_similarity]{geneset_similarity()}}, \code{\link[=get_network_communities]{get_network_communities()}}, \code{\link[=network_clust]{network_clust()}}, \code{\link[=network_clust_gg]{network_clust_gg()}} } diff --git a/man/geneset_list.Rd b/man/geneset_list.Rd index 01f45a1..00ae84d 100644 --- a/man/geneset_list.Rd +++ b/man/geneset_list.Rd @@ -21,7 +21,7 @@ A named list of 40 curated gene sets spanning four biological themes: apoptosis & cell death, cell cycle & DNA damage, immune response & inflammation, and metabolism. Gene set names follow standard database conventions (\code{KEGG_}, \code{HALLMARK_}, \code{GO_}) and gene symbols are real human -genes. Designed to be used as input to \code{\link[=calc_jaccard]{calc_jaccard()}}. +genes. Designed to be used as input to \code{\link[=geneset_similarity]{geneset_similarity()}}. } \examples{ data(geneset_list) @@ -32,12 +32,12 @@ length(geneset_list) # Inspect one gene set geneset_list[["KEGG_APOPTOSIS"]] -# Use with calc_jaccard() +# Use with geneset_similarity() data(camera_results) -jac <- calc_jaccard(geneset_list, camera_results, fdr_th = 0.05) +jac <- geneset_similarity(geneset_list, camera_results, fdr_th = 0.05) } \seealso{ -\code{\link[=calc_jaccard]{calc_jaccard()}}, \link{camera_results} +\code{\link[=geneset_similarity]{geneset_similarity()}}, \link{camera_results} } \keyword{datasets} diff --git a/man/calc_jaccard.Rd b/man/geneset_similarity.Rd similarity index 93% rename from man/calc_jaccard.Rd rename to man/geneset_similarity.Rd index d6c54e3..573aabd 100644 --- a/man/calc_jaccard.Rd +++ b/man/geneset_similarity.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/doclust_PA.R -\name{calc_jaccard} -\alias{calc_jaccard} +\name{geneset_similarity} +\alias{geneset_similarity} \title{Compute Jaccard similarity and distance matrices for gene sets} \usage{ -calc_jaccard(geneset_list, results, fdr_th = 0.05) +geneset_similarity(geneset_list, results, fdr_th = 0.05) } \arguments{ \item{geneset_list}{A named list where each element is a character vector of @@ -48,7 +48,7 @@ results <- data.frame( ) # Only the first three gene sets pass the FDR threshold -jac <- calc_jaccard(geneset_list, results, fdr_th = 0.05) +jac <- geneset_similarity(geneset_list, results, fdr_th = 0.05) jac$jaccard_sim # similarity matrix jac$dist_mat # distance object (usable in UMAP, clustering, etc.) diff --git a/man/get_network_communities.Rd b/man/get_network_communities.Rd index 9e3813d..effc3eb 100644 --- a/man/get_network_communities.Rd +++ b/man/get_network_communities.Rd @@ -15,7 +15,7 @@ get_network_communities( ) } \arguments{ -\item{x}{A \code{JaccardResult} object (output of \code{\link[=calc_jaccard]{calc_jaccard()}}).} +\item{x}{A \code{JaccardResult} object (output of \code{\link[=geneset_similarity]{geneset_similarity()}}).} \item{threshold}{Numeric between 0 and 1. Gene set pairs with a Jaccard similarity above this value are connected in the network. Default: \code{0.3}.} @@ -56,7 +56,7 @@ A named list with four elements: Convenience wrapper that builds a binary adjacency network from a Jaccard similarity matrix, runs a community-detection algorithm, and optionally generates super-term labels for each community via \code{\link[=get_superterm]{get_superterm()}}. -Designed to be the single step between \code{\link[=calc_jaccard]{calc_jaccard()}} and the network +Designed to be the single step between \code{\link[=geneset_similarity]{geneset_similarity()}} and the network plotting functions \code{\link[=network_clust]{network_clust()}} / \code{\link[=network_clust_gg]{network_clust_gg()}}. } \examples{ @@ -65,7 +65,7 @@ gsl <- list_gmts("path/to/gmt_folder/") res <- read.csv("path/to/results.csv") # Full workflow -jac <- calc_jaccard(gsl, res, fdr_th = 0.05) +jac <- geneset_similarity(gsl, res, fdr_th = 0.05) clust <- do_clust(jac) net <- get_network_communities(jac, threshold = 0.3, method = "louvain") @@ -85,6 +85,6 @@ plots$combined } \seealso{ -\code{\link[=calc_jaccard]{calc_jaccard()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_superterm]{get_superterm()}}, +\code{\link[=geneset_similarity]{geneset_similarity()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_superterm]{get_superterm()}}, \code{\link[=network_clust]{network_clust()}}, \code{\link[=network_clust_gg]{network_clust_gg()}} } diff --git a/man/list_gmts.Rd b/man/list_gmts.Rd index de832ee..77ddc64 100644 --- a/man/list_gmts.Rd +++ b/man/list_gmts.Rd @@ -19,7 +19,7 @@ last occurrence overwrites the earlier one. \description{ Scans a directory for \code{.gmt} files, parses them, and returns a single named list where each element is a character vector of gene symbols for one gene -set. The output is ready to be passed directly to \code{\link[=calc_jaccard]{calc_jaccard()}}. +set. The output is ready to be passed directly to \code{\link[=geneset_similarity]{geneset_similarity()}}. } \details{ \strong{GMT format:} each row contains the gene set name in column 1, an optional @@ -37,11 +37,11 @@ length(geneset_list) # number of gene sets names(geneset_list)[1:5] # first five gene set names geneset_list[["KEGG_APOPTOSIS"]] # genes in a specific set -# Pass directly to calc_jaccard -jac <- calc_jaccard(geneset_list, results_df, fdr_th = 0.05) +# Pass directly to geneset_similarity +jac <- geneset_similarity(geneset_list, results_df, fdr_th = 0.05) } } \seealso{ -\code{\link[=calc_jaccard]{calc_jaccard()}} +\code{\link[=geneset_similarity]{geneset_similarity()}} } diff --git a/man/network_clust.Rd b/man/network_clust.Rd index cb85245..8931d94 100644 --- a/man/network_clust.Rd +++ b/man/network_clust.Rd @@ -16,7 +16,7 @@ network_clust( ) } \arguments{ -\item{x}{A \code{JaccardResult} object (output of \code{\link[=calc_jaccard]{calc_jaccard()}}).} +\item{x}{A \code{JaccardResult} object (output of \code{\link[=geneset_similarity]{geneset_similarity()}}).} \item{clust_result}{A list returned by \code{\link[=do_clust]{do_clust()}}, used to color nodes by hierarchical cluster assignment.} @@ -78,7 +78,7 @@ them, see \code{\link[=network_clust_gg]{network_clust_gg()}}. gsl <- list_gmts("path/to/gmt_folder/") res <- read.csv("path/to/results.csv") -jac <- calc_jaccard(gsl, res, fdr_th = 0.05) +jac <- geneset_similarity(gsl, res, fdr_th = 0.05) clust <- do_clust(jac) net <- get_network_communities(jac, threshold = 0.3) @@ -108,6 +108,6 @@ dev.off() } \seealso{ -\code{\link[=calc_jaccard]{calc_jaccard()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_network_communities]{get_network_communities()}}, +\code{\link[=geneset_similarity]{geneset_similarity()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_network_communities]{get_network_communities()}}, \code{\link[=get_superterm]{get_superterm()}}, \code{\link[=network_clust_gg]{network_clust_gg()}} } diff --git a/man/network_clust_gg.Rd b/man/network_clust_gg.Rd index 3f6bf93..165e8de 100644 --- a/man/network_clust_gg.Rd +++ b/man/network_clust_gg.Rd @@ -16,7 +16,7 @@ network_clust_gg( ) } \arguments{ -\item{x}{A \code{JaccardResult} object (output of \code{\link[=calc_jaccard]{calc_jaccard()}}).} +\item{x}{A \code{JaccardResult} object (output of \code{\link[=geneset_similarity]{geneset_similarity()}}).} \item{clust_result}{A list returned by \code{\link[=do_clust]{do_clust()}}, used to color nodes by hierarchical cluster assignment.} @@ -81,7 +81,7 @@ device, see \code{\link[=network_clust]{network_clust()}}. gsl <- list_gmts("path/to/gmt_folder/") res <- read.csv("path/to/results.csv") -jac <- calc_jaccard(gsl, res, fdr_th = 0.05) +jac <- geneset_similarity(gsl, res, fdr_th = 0.05) clust <- do_clust(jac) net <- get_network_communities(jac, threshold = 0.3) @@ -116,6 +116,6 @@ plots$clean + plots$superterms } \seealso{ -\code{\link[=calc_jaccard]{calc_jaccard()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_network_communities]{get_network_communities()}}, +\code{\link[=geneset_similarity]{geneset_similarity()}}, \code{\link[=do_clust]{do_clust()}}, \code{\link[=get_network_communities]{get_network_communities()}}, \code{\link[=get_superterm]{get_superterm()}}, \code{\link[=network_clust]{network_clust()}} } diff --git a/vignettes/pathway_analysis_clustering.Rmd b/vignettes/PA_clustering.Rmd similarity index 97% rename from vignettes/pathway_analysis_clustering.Rmd rename to vignettes/PA_clustering.Rmd index a79dee0..cb48f83 100644 --- a/vignettes/pathway_analysis_clustering.Rmd +++ b/vignettes/PA_clustering.Rmd @@ -27,7 +27,7 @@ vignette demonstrates how to use OmicsKit's pathway clustering functions to: 1. **Load gene sets** from GMT files with `list_gmts()` 2. **Quantify redundancy** between gene sets using Jaccard similarity with - `calc_jaccard()` + `geneset_similarity()` 3. **Cluster** redundant gene sets hierarchically with `do_clust()` 4. **Detect communities** in the gene set network and generate interpretable labels with `get_network_communities()` @@ -77,7 +77,7 @@ sum(camera_results$FDR < 0.05) ## Step 2 — Jaccard similarity matrix -`calc_jaccard()` filters the gene sets by FDR threshold and computes all +`geneset_similarity()` filters the gene sets by FDR threshold and computes all pairwise Jaccard similarity coefficients: $$J(A, B) = \frac{|A \cap B|}{|A \cup B|}$$ @@ -85,8 +85,8 @@ $$J(A, B) = \frac{|A \cap B|}{|A \cup B|}$$ A value of 1 means the two gene sets share identical gene membership; 0 means no overlap at all. -```{r calc_jaccard} -jac <- calc_jaccard( +```{r geneset_similarity} +jac <- geneset_similarity( geneset_list = geneset_list, results = camera_results, fdr_th = 0.05 @@ -291,7 +291,7 @@ library(OmicsKit) gsl <- list_gmts("path/to/gmt_folder/") # 2. Jaccard similarity (filter by FDR < 0.05) -jac <- calc_jaccard(gsl, camera_results, fdr_th = 0.05) +jac <- geneset_similarity(gsl, camera_results, fdr_th = 0.05) # 3. Hierarchical clustering clust <- do_clust(jac)