diff --git a/DESCRIPTION b/DESCRIPTION index a6199bc..3ad2dca 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,8 +14,15 @@ Description: An R implementation of the GLIPH and GLIPH2 algorithms for algorithm papers: Glanville et al. (2017) and Huang et al. (2020) . License: MIT + file LICENSE +biocViews: + Software, + ImmunoOncology, + Clustering, + SingleCell, + Sequencing, + Visualization Depends: - R (>= 4.0.0) + R (>= 4.5.0) Imports: stringdist, igraph, diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..db52466 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,7 @@ +# immGLIPH 0.99.0 + +* Initial Bioconductor submission +* R implementation of GLIPH and GLIPH2 algorithms for TCR clustering +* Integration with scRepertoire ecosystem via immApex +* Interactive network visualization with plotNetwork() +* De novo TCR sequence generation with deNovoTCRs() diff --git a/R/clusterScoring.R b/R/clusterScoring.R index 0a68e7b..ed6733f 100644 --- a/R/clusterScoring.R +++ b/R/clusterScoring.R @@ -81,21 +81,21 @@ #' enrichment, clonal expansion enrichment, and common HLA enrichment). #' #' @examples -#' \dontrun{ #' utils::data("gliph_input_data") +#' ref_df <- gliph_input_data[, c("CDR3b", "TRBV")] #' #' res <- runGLIPH(cdr3_sequences = gliph_input_data[seq_len(200), ], +#' refdb_beta = ref_df, #' sim_depth = 100, #' n_cores = 1) #' #' scoring_results <- clusterScoring( #' cluster_list = res$cluster_list, #' cdr3_sequences = gliph_input_data[seq_len(200), ], -#' refdb_beta = "human_v2.0_CD48", +#' refdb_beta = ref_df, #' gliph_version = 1, #' sim_depth = 100, #' n_cores = 1) -#' } #' #' @references Glanville, Jacob, et al. #' "Identifying specificity groups in the T cell receptor repertoire." Nature 547.7661 (2017): 94. @@ -188,6 +188,12 @@ clusterScoring <- function(cluster_list, n_cores <- max(1, min(n_cores, parallel::detectCores()-2)) } + ### Early return for empty cluster_list (after all validation) + if(length(cluster_list) == 0) { + message("No clusters to score.") + return(data.frame()) + } + ################################################################# ## Preparation ## ################################################################# diff --git a/R/data.R b/R/data.R index 4b677de..c8f921a 100644 --- a/R/data.R +++ b/R/data.R @@ -12,6 +12,7 @@ #' (e.g. \code{"P17B"}, \code{"P19L"}).} #' } #' +#' @format A data.frame with 365 rows and 3 columns (CDR3b, TRBV, patient). #' @docType data #' @keywords datasets #' @name gliph_input_data @@ -41,6 +42,7 @@ #' This object demonstrates how to pass a SingleCellExperiment directly to #' \code{\link{runGLIPH}}. #' +#' @format A SingleCellExperiment with 2000 genes and 500 cells. #' @docType data #' @keywords datasets #' @name gliph_sce @@ -67,6 +69,7 @@ #' actual sample size is used.} #' } #' +#' @format A list with 2 elements: original and simulated. #' @docType data #' @keywords datasets #' @name ref_cluster_sizes @@ -82,6 +85,7 @@ #' segments that may appear in the CDR3 region. These fragments are used by the #' GLIPH2 algorithm to identify germline-encoded sequence segments. #' +#' @format A list of 3 data.frames: gTRV, gTRD, and gTRJ. #' @docType data #' @keywords datasets #' @name gTRB @@ -137,6 +141,8 @@ #' #' Raw data downloaded from \url{http://50.255.35.37:8080/tools}. #' +#' @format NULL. Data is downloaded on first use via +#' \code{\link{getGLIPHreference}}. #' @name reference_list #' @keywords datasets NULL diff --git a/R/deNovoTCRs.R b/R/deNovoTCRs.R index b3e8e27..c266614 100644 --- a/R/deNovoTCRs.R +++ b/R/deNovoTCRs.R @@ -72,20 +72,26 @@ #' \code{_de_novo.txt} is also written to disk. #' #' @examples -#' \dontrun{ -#' utils::data("gliph_input_data") -#' res <- runGLIPH(cdr3_sequences = gliph_input_data[seq_len(200),], -#' method = "gliph1", -#' sim_depth = 100, -#' n_cores = 1) -#' +#' # Build a minimal clustering output to demonstrate deNovoTCRs +#' fake_cluster <- data.frame( +#' CDR3b = c("CASSLAPGATNEKLFF", "CASSLAPGGTNEKLFF", +#' "CASSLAPGDTNEKLFF", "CASSLAPGETNEKLFF", +#' "CASSLAPGANEKLFF", "CASSLAPGVTNEKLFF"), +#' TRBV = rep("TRBV5-1", 6), +#' stringsAsFactors = FALSE +#' ) +#' fake_output <- list(cluster_list = list("motif-LAP" = fake_cluster)) +#' ref_seqs <- fake_cluster[, c("CDR3b", "TRBV")] #' new_seqs <- deNovoTCRs( -#' convergence_group_tag = res$cluster_properties$tag[1], -#' clustering_output = res, -#' sims = 10000, -#' make_figure = TRUE, -#' n_cores = 1) -#' } +#' convergence_group_tag = "motif-LAP", +#' clustering_output = fake_output, +#' refdb_beta = ref_seqs, +#' sims = 100, +#' num_tops = 10, +#' min_length = 8, +#' make_figure = FALSE, +#' n_cores = 1 +#' ) #' #' @references Glanville, Jacob, et al. #' "Identifying specificity groups in the T cell receptor repertoire." Nature 547.7661 (2017): 94. @@ -197,7 +203,7 @@ deNovoTCRs <- function(convergence_group_tag, if(length(excluded) > 0){ all_crg_cdr3_seqs <- all_crg_cdr3_seqs[-excluded] if(length(all_crg_cdr3_seqs) > 0){ - message("Warning: ", length(excluded), " sequences of the convergence group were excluded from the further procedure due to falling below a minimum length of ", min_length, ".") + warning(length(excluded), " sequences of the convergence group were excluded from the further procedure due to falling below a minimum length of ", min_length, ".", call. = FALSE) } else { stop("No sequences of the convergence group are of minimum length of ", min_length, ". For further procedure, adjust the parameter 'min_length'") } @@ -230,7 +236,7 @@ deNovoTCRs <- function(convergence_group_tag, if(ncol(refseqs) > 1 && v_gene_norm == TRUE){ message("Notification: Second column of reference database is considered as V-gene information.") } else if(v_gene_norm == TRUE){ - message("Warning: Beta sequence reference database is missing column containing V-genes. Without V-gene information normalization may be inaccurate.") + warning("Beta sequence reference database is missing column containing V-genes. Without V-gene information normalization may be inaccurate.", call. = FALSE) v_gene_norm <- FALSE } if(ncol(refseqs) == 1) refseqs <- cbind(refseqs, rep("", nrow(refseqs))) @@ -245,7 +251,7 @@ deNovoTCRs <- function(convergence_group_tag, if(nrow(refseqs) == 0){ normalization <- FALSE v_gene_norm <- FALSE - message("Warning: No reference sequences with a minimum length of ", min_length, " given. Normalization therefore not possible. Adjust min_length to enable normalization.") + warning("No reference sequences with a minimum length of ", min_length, " given. Normalization therefore not possible. Adjust min_length to enable normalization.", call. = FALSE) } else { ref_vgenes <- as.character(refseqs$TRBV) refseqs <- as.character(refseqs$CDR3b) @@ -262,7 +268,7 @@ deNovoTCRs <- function(convergence_group_tag, if(ncol(refseqs) > 1 && v_gene_norm == TRUE){ message("Notification: Second column of reference database is considered as V-gene information.") } else if(v_gene_norm == TRUE){ - message("Warning: Beta sequence reference database is missing column containing V-genes. Without V-gene information normalization may be inaccurate.") + warning("Beta sequence reference database is missing column containing V-genes. Without V-gene information normalization may be inaccurate.", call. = FALSE) v_gene_norm <- FALSE } if(ncol(refseqs) == 1) refseqs <- cbind(refseqs, rep("", nrow(refseqs))) @@ -277,7 +283,7 @@ deNovoTCRs <- function(convergence_group_tag, if(nrow(refseqs) == 0){ normalization <- FALSE v_gene_norm <- FALSE - message("Warning: No reference sequences with a minimum length of ", min_length, " given. Normalization therefore not possible. Adjust min_length to enable normalization.") + warning("No reference sequences with a minimum length of ", min_length, " given. Normalization therefore not possible. Adjust min_length to enable normalization.", call. = FALSE) } else { ref_vgenes <- as.character(refseqs$TRBV) refseqs <- as.character(refseqs$CDR3b) @@ -289,8 +295,8 @@ deNovoTCRs <- function(convergence_group_tag, v_genes <- crg$TRBV } else if(v_gene_norm == TRUE){ v_gene_norm <- FALSE - message("Warning: Without V-gene information of sample sequences normalization may be inaccurate.") - } else message("Warning: Without V-gene restriction normalization may be inaccurate.") + warning("Without V-gene information of sample sequences normalization may be inaccurate.", call. = FALSE) + } else warning("Without V-gene restriction normalization may be inaccurate.", call. = FALSE) } ### Initialization diff --git a/R/getRandomSubsample.R b/R/getRandomSubsample.R index 067a1fe..b2b810e 100644 --- a/R/getRandomSubsample.R +++ b/R/getRandomSubsample.R @@ -33,6 +33,14 @@ #' @return A character vector of length \code{length(motif_region)} drawn from #' \code{refseqs_motif_region}. #' +#' @examples +#' ref_seqs <- c("ASSG", "ASSD", "ASSE", "ASSF", "ASSK", "ASSL") +#' sample_seqs <- c("ASSG", "ASSF", "ASSL") +#' sub <- getRandomSubsample( +#' refseqs_motif_region = ref_seqs, +#' motif_region = sample_seqs +#' ) +#' #' @export getRandomSubsample <- function(cdr3_len_stratify = FALSE, diff --git a/R/global-cutoff.R b/R/global-cutoff.R index 27aa689..cb55bb8 100644 --- a/R/global-cutoff.R +++ b/R/global-cutoff.R @@ -50,6 +50,7 @@ } #' immApex-accelerated global cutoff via buildNetwork() +#' @return A list with edge data and excluded sequence IDs. #' @keywords internal .global_cutoff_immapex <- function(seqs, motif_region, sequences, gccutoff, global_vgene, verbose) { @@ -118,6 +119,7 @@ } #' stringdist + foreach fallback for global cutoff +#' @return A list with edge data and excluded sequence IDs. #' @keywords internal .global_cutoff_stringdist <- function(seqs, motif_region, sequences, gccutoff, global_vgene, no_cores, diff --git a/R/loadGLIPH.R b/R/loadGLIPH.R index 302b037..177ddf5 100644 --- a/R/loadGLIPH.R +++ b/R/loadGLIPH.R @@ -12,6 +12,21 @@ #' \code{cluster_properties}, \code{motif_enrichment}, \code{connections}, and #' \code{parameters}. #' +#' @examples +#' utils::data("gliph_input_data") +#' ref_df <- gliph_input_data[, c("CDR3b", "TRBV")] +#' tmp_dir <- tempfile("gliph_out_") +#' res <- runGLIPH( +#' cdr3_sequences = gliph_input_data[seq_len(200), ], +#' method = "gliph1", +#' refdb_beta = ref_df, +#' result_folder = tmp_dir, +#' sim_depth = 50, +#' n_cores = 1 +#' ) +#' reloaded <- loadGLIPH(result_folder = tmp_dir) +#' unlink(tmp_dir, recursive = TRUE) +#' #' @export loadGLIPH <- function(result_folder = ""){ diff --git a/R/plotNetwork.R b/R/plotNetwork.R index e692072..33c9b07 100644 --- a/R/plotNetwork.R +++ b/R/plotNetwork.R @@ -44,16 +44,16 @@ #' @return A `visNetwork` object containing the interactive network graph. #' #' @examples -#' \dontrun{ #' utils::data("gliph_input_data") +#' ref_df <- gliph_input_data[, c("CDR3b", "TRBV")] #' res <- runGLIPH(cdr3_sequences = gliph_input_data[seq_len(200),], #' method = "gliph1", +#' refdb_beta = ref_df, #' sim_depth = 100, #' n_cores = 1) #' #' plotNetwork(clustering_output = res, #' n_cores = 1) -#' } #' #' @import viridis foreach grDevices #' @export @@ -134,8 +134,10 @@ plotNetwork <- function(clustering_output = NULL, # cluster_properties: contains cluster specific information like all scores parameters <- clustering_output$parameters cluster_list <- clustering_output$cluster_list - if(is.null(cluster_list)) stop("The specified clustering_output does not contain any clusters.") - if(length(cluster_list) == 0) stop("The specified clustering_output does not contain any clusters.") + if(is.null(cluster_list) || length(cluster_list) == 0) { + message("No clusters found in the clustering output.") + return(invisible(NULL)) + } cluster_properties <- clustering_output$cluster_properties hold_ids <- which(as.numeric(cluster_properties$cluster_size) >= cluster_min_size) @@ -539,7 +541,7 @@ plotNetwork <- function(clustering_output = NULL, } if(color_info == "color" && !(any(plotfunctions::isColor(cluster_data_frame[, color_info]) == FALSE))) stop("Column ", color_info, " determining node color has to contain only values that represent colors.") - color.scale = "" + color.scale <- "" if("color" %in% colnames(cluster_data_frame)){ # Use the user specified colors vert.info$color <- cluster_data_frame$color[vert.info$id] @@ -633,13 +635,13 @@ plotNetwork <- function(clustering_output = NULL, v.title <- vertexes v.color <- rep("gray", num.v) v.shadow <- rep(FALSE, num.v) - if ("size" %in% names(vertex.info)) v.size = as.numeric(vertex.info$size) - if ("label" %in% names(vertex.info)) v.label = as.character(vertex.info$label) - if ("group" %in% names(vertex.info)) v.group = as.character(vertex.info$group) - if ("shape" %in% names(vertex.info)) v.shape = as.character(vertex.info$shape) - if ("title" %in% names(vertex.info)) v.title = as.character(vertex.info$title) - if ("color" %in% names(vertex.info)) v.color = as.character(vertex.info$color) - if ("shadow" %in% names(vertex.info)) v.shadow = as.logical(vertex.info$shadow) + if ("size" %in% names(vertex.info)) v.size <- as.numeric(vertex.info$size) + if ("label" %in% names(vertex.info)) v.label <- as.character(vertex.info$label) + if ("group" %in% names(vertex.info)) v.group <- as.character(vertex.info$group) + if ("shape" %in% names(vertex.info)) v.shape <- as.character(vertex.info$shape) + if ("title" %in% names(vertex.info)) v.title <- as.character(vertex.info$title) + if ("color" %in% names(vertex.info)) v.color <- as.character(vertex.info$color) + if ("shadow" %in% names(vertex.info)) v.shadow <- as.logical(vertex.info$shadow) nodes <- data.frame(id = vertexes, color = list(background = v.color, border = "black", highlight = "red"), size=v.size, @@ -661,15 +663,15 @@ plotNetwork <- function(clustering_output = NULL, e.title <- rep("",num.e) e.smooth <- rep(FALSE,num.e) e.shadow <- rep(FALSE,num.e) - if ("length" %in% names(edge.info)) e.length = as.numeric(edge.info$length) - if ("label" %in% names(edge.info)) e.label = as.character(edge.info$label) - if ("width" %in% names(edge.info)) e.width = as.numeric(edge.info$width) - if ("color" %in% names(edge.info)) e.color = as.character(edge.info$color) - if ("arrows" %in% names(edge.info)) e.arrows = as.character(edge.info$arrows) - if ("dashes" %in% names(edge.info)) e.dashes = as.logical(edge.info$dashes) - if ("title" %in% names(edge.info)) e.title = as.character(edge.info$title) - if ("smooth" %in% names(edge.info)) e.smooth = as.logical(edge.info$smooth) - if ("shadow" %in% names(edge.info)) e.shadow = as.logical(edge.info$shadow) + if ("length" %in% names(edge.info)) e.length <- as.numeric(edge.info$length) + if ("label" %in% names(edge.info)) e.label <- as.character(edge.info$label) + if ("width" %in% names(edge.info)) e.width <- as.numeric(edge.info$width) + if ("color" %in% names(edge.info)) e.color <- as.character(edge.info$color) + if ("arrows" %in% names(edge.info)) e.arrows <- as.character(edge.info$arrows) + if ("dashes" %in% names(edge.info)) e.dashes <- as.logical(edge.info$dashes) + if ("title" %in% names(edge.info)) e.title <- as.character(edge.info$title) + if ("smooth" %in% names(edge.info)) e.smooth <- as.logical(edge.info$smooth) + if ("shadow" %in% names(edge.info)) e.shadow <- as.logical(edge.info$shadow) edges <- data.frame(from = eds$from, to = eds$to, length = e.length, width = e.width, diff --git a/R/runGLIPH.R b/R/runGLIPH.R index 37bcdd6..8df3996 100644 --- a/R/runGLIPH.R +++ b/R/runGLIPH.R @@ -213,15 +213,15 @@ #' \doi{10.1038/s41587-020-0505-4} #' #' @examples -#' \dontrun{ #' utils::data("gliph_input_data") +#' ref_df <- gliph_input_data[, c("CDR3b", "TRBV")] #' res <- runGLIPH( #' cdr3_sequences = gliph_input_data[seq_len(200), ], #' method = "gliph2", +#' refdb_beta = ref_df, #' sim_depth = 50, #' n_cores = 1 #' ) -#' } #' #' @import foreach #' @export diff --git a/R/utils-output.R b/R/utils-output.R index 7123b03..be5fdad 100644 --- a/R/utils-output.R +++ b/R/utils-output.R @@ -57,6 +57,7 @@ #' #' @param parameters Named list of parameters #' @param result_folder Path to output folder +#' @return NULL (invisibly). Called for side effect of writing file. #' @keywords internal .save_parameters <- function(parameters, result_folder) { paras <- data.frame( diff --git a/R/utils-parallel.R b/R/utils-parallel.R index 82975bc..19c6219 100644 --- a/R/utils-parallel.R +++ b/R/utils-parallel.R @@ -27,6 +27,7 @@ } #' Stop parallel backend +#' @return NULL (invisibly). Called for side effect. #' @keywords internal .stop_parallel <- function() { doParallel::stopImplicitCluster() diff --git a/R/utils-reference.R b/R/utils-reference.R index 9f9446a..9b63077 100644 --- a/R/utils-reference.R +++ b/R/utils-reference.R @@ -27,7 +27,13 @@ #' \code{cdr3_length_frequencies}. #' #' @examples -#' \dontrun{ +#' # Available reference database names +#' c("human_v1.0_CD4", "human_v1.0_CD8", "human_v1.0_CD48", +#' "human_v2.0_CD4", "human_v2.0_CD8", "human_v2.0_CD48", +#' "mouse_v1.0_CD4", "mouse_v1.0_CD8", "mouse_v1.0_CD48", +#' "gliph_reference") +#' +#' \donttest{ #' ref <- getGLIPHreference() #' names(ref) #' head(ref[["human_v2.0_CD48"]]$refseqs) @@ -79,18 +85,16 @@ getGLIPHreference <- function(force_download = FALSE, verbose = TRUE) { #' @return The reference list. #' @keywords internal .get_reference_list <- function() { - # Session-level cache: store in package namespace environment - pkg_env <- parent.env(environment()) - if (!is.null(pkg_env$.reference_list_cache)) { - return(pkg_env$.reference_list_cache) + if (!is.null(.ref_cache$data)) { + return(.ref_cache$data) } ref <- getGLIPHreference(verbose = TRUE) - pkg_env$.reference_list_cache <- ref + .ref_cache$data <- ref ref } -# Session-level cache placeholder (populated on first use) -.reference_list_cache <- NULL +# Session-level cache environment (survives namespace locking) +.ref_cache <- new.env(parent = emptyenv()) #' Load and prepare reference database #' diff --git a/README.md b/README.md index 69ed4c7..7e632d4 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # immGLIPH - [![R-CMD-check](https://github.com/ncborcherding/immGLIPH/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ncborcherding/immGLIPH/actions/workflows/R-CMD-check.yaml) - [![Codecov test coverage](https://codecov.io/gh/ncborcherding/immGLIPH/graph/badge.svg)](https://app.codecov.io/gh/ncborcherding/immGLIPH) + [![R-CMD-check](https://github.com/BorchLab/immGLIPH/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/BorchLab/immGLIPH/actions/workflows/R-CMD-check.yaml) + [![Codecov test coverage](https://codecov.io/gh/BorchLab/immGLIPH/graph/badge.svg)](https://app.codecov.io/gh/BorchLab/immGLIPH) An R implementation of the **GLIPH** and **GLIPH2** algorithms for clustering diff --git a/man/clusterScoring.Rd b/man/clusterScoring.Rd index 8fbca64..5dd14fa 100644 --- a/man/clusterScoring.Rd +++ b/man/clusterScoring.Rd @@ -110,21 +110,21 @@ enrichment of CDR3 lengths, enrichment of V genes, enrichment of clonal expansions, and enrichment of common HLA alleles. } \examples{ -\dontrun{ utils::data("gliph_input_data") +ref_df <- gliph_input_data[, c("CDR3b", "TRBV")] res <- runGLIPH(cdr3_sequences = gliph_input_data[seq_len(200), ], + refdb_beta = ref_df, sim_depth = 100, n_cores = 1) scoring_results <- clusterScoring( cluster_list = res$cluster_list, cdr3_sequences = gliph_input_data[seq_len(200), ], - refdb_beta = "human_v2.0_CD48", + refdb_beta = ref_df, gliph_version = 1, sim_depth = 100, n_cores = 1) -} } \references{ diff --git a/man/deNovoTCRs.Rd b/man/deNovoTCRs.Rd index 9ff57ce..573d23d 100644 --- a/man/deNovoTCRs.Rd +++ b/man/deNovoTCRs.Rd @@ -103,20 +103,26 @@ from the convergence group, and optionally normalized against a reference database. The top-scoring sequences are returned. } \examples{ -\dontrun{ -utils::data("gliph_input_data") -res <- runGLIPH(cdr3_sequences = gliph_input_data[seq_len(200),], - method = "gliph1", - sim_depth = 100, - n_cores = 1) - +# Build a minimal clustering output to demonstrate deNovoTCRs +fake_cluster <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLAPGGTNEKLFF", + "CASSLAPGDTNEKLFF", "CASSLAPGETNEKLFF", + "CASSLAPGANEKLFF", "CASSLAPGVTNEKLFF"), + TRBV = rep("TRBV5-1", 6), + stringsAsFactors = FALSE +) +fake_output <- list(cluster_list = list("motif-LAP" = fake_cluster)) +ref_seqs <- fake_cluster[, c("CDR3b", "TRBV")] new_seqs <- deNovoTCRs( - convergence_group_tag = res$cluster_properties$tag[1], - clustering_output = res, - sims = 10000, - make_figure = TRUE, - n_cores = 1) -} + convergence_group_tag = "motif-LAP", + clustering_output = fake_output, + refdb_beta = ref_seqs, + sims = 100, + num_tops = 10, + min_length = 8, + make_figure = FALSE, + n_cores = 1 +) } \references{ diff --git a/man/dot-global_cutoff_immapex.Rd b/man/dot-global_cutoff_immapex.Rd index 01532c8..06c8b09 100644 --- a/man/dot-global_cutoff_immapex.Rd +++ b/man/dot-global_cutoff_immapex.Rd @@ -13,6 +13,9 @@ verbose ) } +\value{ +A list with edge data and excluded sequence IDs. +} \description{ immApex-accelerated global cutoff via buildNetwork() } diff --git a/man/dot-global_cutoff_stringdist.Rd b/man/dot-global_cutoff_stringdist.Rd index 6eefaab..ab505fa 100644 --- a/man/dot-global_cutoff_stringdist.Rd +++ b/man/dot-global_cutoff_stringdist.Rd @@ -14,6 +14,9 @@ verbose ) } +\value{ +A list with edge data and excluded sequence IDs. +} \description{ stringdist + foreach fallback for global cutoff } diff --git a/man/dot-save_parameters.Rd b/man/dot-save_parameters.Rd index cc1c0de..bf71ac7 100644 --- a/man/dot-save_parameters.Rd +++ b/man/dot-save_parameters.Rd @@ -11,6 +11,9 @@ \item{result_folder}{Path to output folder} } +\value{ +NULL (invisibly). Called for side effect of writing file. +} \description{ Save parameter list to file } diff --git a/man/dot-stop_parallel.Rd b/man/dot-stop_parallel.Rd index fb1f10d..8c01239 100644 --- a/man/dot-stop_parallel.Rd +++ b/man/dot-stop_parallel.Rd @@ -6,6 +6,9 @@ \usage{ .stop_parallel() } +\value{ +NULL (invisibly). Called for side effect. +} \description{ Stop parallel backend } diff --git a/man/gTRB.Rd b/man/gTRB.Rd index 44187c7..eb85438 100644 --- a/man/gTRB.Rd +++ b/man/gTRB.Rd @@ -5,7 +5,7 @@ \alias{gTRB} \title{Germline TCR-beta CDR3 fragments} \format{ -An object of class \code{list} of length 3. +A list of 3 data.frames: gTRV, gTRD, and gTRJ. } \source{ Lefranc, M.-P. IMGT, the international ImMunoGeneTics database. diff --git a/man/getGLIPHreference.Rd b/man/getGLIPHreference.Rd index 4db15de..a0b3dc9 100644 --- a/man/getGLIPHreference.Rd +++ b/man/getGLIPHreference.Rd @@ -27,7 +27,13 @@ The cached file contains a named \code{list} with entries for each built-in reference database (see \code{\link{.valid_reference_names}}). } \examples{ -\dontrun{ +# Available reference database names +c("human_v1.0_CD4", "human_v1.0_CD8", "human_v1.0_CD48", + "human_v2.0_CD4", "human_v2.0_CD8", "human_v2.0_CD48", + "mouse_v1.0_CD4", "mouse_v1.0_CD8", "mouse_v1.0_CD48", + "gliph_reference") + +\donttest{ ref <- getGLIPHreference() names(ref) head(ref[["human_v2.0_CD48"]]$refseqs) diff --git a/man/getRandomSubsample.Rd b/man/getRandomSubsample.Rd index f851d9a..cf2e44a 100644 --- a/man/getRandomSubsample.Rd +++ b/man/getRandomSubsample.Rd @@ -62,3 +62,12 @@ enabled, the function preserves the CDR3 length and/or V-gene distribution of the sample in the subsample. This is used internally by the repeated random sampling (RRS) local-similarity method in \code{\link{runGLIPH}}. } +\examples{ +ref_seqs <- c("ASSG", "ASSD", "ASSE", "ASSF", "ASSK", "ASSL") +sample_seqs <- c("ASSG", "ASSF", "ASSL") +sub <- getRandomSubsample( + refseqs_motif_region = ref_seqs, + motif_region = sample_seqs +) + +} diff --git a/man/gliph_input_data.Rd b/man/gliph_input_data.Rd index 5619a00..0dfd85e 100644 --- a/man/gliph_input_data.Rd +++ b/man/gliph_input_data.Rd @@ -5,7 +5,7 @@ \alias{gliph_input_data} \title{Example TCR input data} \format{ -An object of class \code{data.frame} with 365 rows and 3 columns. +A data.frame with 365 rows and 3 columns (CDR3b, TRBV, patient). } \source{ Yost, K. E. et al. Clonal replacement of tumor-specific T cells diff --git a/man/gliph_sce.Rd b/man/gliph_sce.Rd index 6a2a7a5..f28fba8 100644 --- a/man/gliph_sce.Rd +++ b/man/gliph_sce.Rd @@ -5,7 +5,7 @@ \alias{gliph_sce} \title{Example SingleCellExperiment with TCR clonal information} \format{ -An object of class \code{SingleCellExperiment} with 2000 rows and 500 columns. +A SingleCellExperiment with 2000 genes and 500 cells. } \source{ Yost, K. E. et al. Clonal replacement of tumor-specific T cells diff --git a/man/loadGLIPH.Rd b/man/loadGLIPH.Rd index de5198e..813e346 100644 --- a/man/loadGLIPH.Rd +++ b/man/loadGLIPH.Rd @@ -21,3 +21,19 @@ Reads the tab-delimited output files produced by \code{\link{runGLIPH}} (when \code{result_folder} was specified) and reconstructs the same list structure that \code{runGLIPH()} returns. } +\examples{ +utils::data("gliph_input_data") +ref_df <- gliph_input_data[, c("CDR3b", "TRBV")] +tmp_dir <- tempfile("gliph_out_") +res <- runGLIPH( + cdr3_sequences = gliph_input_data[seq_len(200), ], + method = "gliph1", + refdb_beta = ref_df, + result_folder = tmp_dir, + sim_depth = 50, + n_cores = 1 +) +reloaded <- loadGLIPH(result_folder = tmp_dir) +unlink(tmp_dir, recursive = TRUE) + +} diff --git a/man/plotNetwork.Rd b/man/plotNetwork.Rd index aa7df6c..3ae22c2 100644 --- a/man/plotNetwork.Rd +++ b/man/plotNetwork.Rd @@ -75,15 +75,15 @@ zoom, hover over a node for details, and click a node to highlight its direct neighbors. } \examples{ -\dontrun{ utils::data("gliph_input_data") +ref_df <- gliph_input_data[, c("CDR3b", "TRBV")] res <- runGLIPH(cdr3_sequences = gliph_input_data[seq_len(200),], method = "gliph1", + refdb_beta = ref_df, sim_depth = 100, n_cores = 1) plotNetwork(clustering_output = res, n_cores = 1) -} } diff --git a/man/ref_cluster_sizes.Rd b/man/ref_cluster_sizes.Rd index f6d1330..354844b 100644 --- a/man/ref_cluster_sizes.Rd +++ b/man/ref_cluster_sizes.Rd @@ -5,7 +5,7 @@ \alias{ref_cluster_sizes} \title{Cluster size probabilities in naive reference repertoire} \format{ -An object of class \code{list} of length 2. +A list with 2 elements: original and simulated. } \source{ Glanville, J. et al. Identifying specificity groups in the T cell diff --git a/man/reference_list.Rd b/man/reference_list.Rd index 8a3d793..e102570 100644 --- a/man/reference_list.Rd +++ b/man/reference_list.Rd @@ -3,6 +3,10 @@ \name{reference_list} \alias{reference_list} \title{GLIPH reference repertoire list (external data)} +\format{ +NULL. Data is downloaded on first use via +\code{\link{getGLIPHreference}}. +} \source{ Glanville, J. et al. Identifying specificity groups in the T cell receptor repertoire. \emph{Nature} 547, 94--98 (2017). diff --git a/man/runGLIPH.Rd b/man/runGLIPH.Rd index 5ba4da7..22cd6c7 100644 --- a/man/runGLIPH.Rd +++ b/man/runGLIPH.Rd @@ -288,15 +288,15 @@ globally similar CDR3b sequences, clusters them into convergence groups, and scores each group for biological relevance. } \examples{ -\dontrun{ utils::data("gliph_input_data") +ref_df <- gliph_input_data[, c("CDR3b", "TRBV")] res <- runGLIPH( cdr3_sequences = gliph_input_data[seq_len(200), ], method = "gliph2", + refdb_beta = ref_df, sim_depth = 50, n_cores = 1 ) -} } \references{ diff --git a/tests/testthat/test-clusterScoring.R b/tests/testthat/test-clusterScoring.R index a3dd0a7..2b1201b 100644 --- a/tests/testthat/test-clusterScoring.R +++ b/tests/testthat/test-clusterScoring.R @@ -12,7 +12,7 @@ test_that("clusterScoring rejects non-list cluster_list", { test_that("clusterScoring rejects non-data.frame cdr3_sequences", { expect_error( - clusterScoring(cluster_list = list(), + clusterScoring(cluster_list = list(a = data.frame(CDR3b = "X")), cdr3_sequences = list(CDR3b = "CASSLAPGATNEKLFF")), "data.frame" ) @@ -59,7 +59,7 @@ test_that("clusterScoring rejects invalid refdb_beta name", { }) test_that("clusterScoring validates refdb_beta", { - expect_error(clusterScoring(cluster_list = list(), + expect_error(clusterScoring(cluster_list = list(a = data.frame(CDR3b = "X")), cdr3_sequences = data.frame(CDR3b = "CASSLAPGATNEKLFF"), refdb_beta = "nonexistent_ref"), "refdb_beta must be") @@ -93,7 +93,7 @@ test_that("clusterScoring rejects invalid sim_depth", { }) test_that("clusterScoring validates sim_depth minimum", { - expect_error(clusterScoring(cluster_list = list(), + expect_error(clusterScoring(cluster_list = list(a = data.frame(CDR3b = "X")), cdr3_sequences = data.frame(CDR3b = "CASSLAPGATNEKLFF"), sim_depth = 0), "sim_depth") diff --git a/tests/testthat/test-plotNetwork.R b/tests/testthat/test-plotNetwork.R index 67b5560..1c38bbe 100644 --- a/tests/testthat/test-plotNetwork.R +++ b/tests/testthat/test-plotNetwork.R @@ -43,22 +43,26 @@ test_that("plotNetwork rejects cluster_min_size < 1", { "at least 1") }) -test_that("plotNetwork requires clustering output with clusters", { +test_that("plotNetwork returns NULL for clustering output with no clusters", { mock_output <- list( cluster_list = NULL, cluster_properties = NULL, parameters = list(gliph_version = 1) ) - expect_error(plotNetwork(clustering_output = mock_output), "does not contain") + expect_message(result <- plotNetwork(clustering_output = mock_output), + "No clusters found") + expect_null(result) }) -test_that("plotNetwork rejects empty cluster_list", { +test_that("plotNetwork returns NULL for empty cluster_list", { mock_output <- list( cluster_list = list(), cluster_properties = data.frame(), parameters = list(clustering_method = "GLIPH2.0") ) - expect_error(plotNetwork(clustering_output = mock_output), "does not contain") + expect_message(result <- plotNetwork(clustering_output = mock_output), + "No clusters found") + expect_null(result) }) test_that("plotNetwork rejects non-character size_info", { diff --git a/vignettes/immGLIPH.Rmd b/vignettes/immGLIPH.Rmd index e677f73..7330e46 100644 --- a/vignettes/immGLIPH.Rmd +++ b/vignettes/immGLIPH.Rmd @@ -267,10 +267,11 @@ if (!is.null(res_gliph2$connections)) { The `"custom"` method allows independent control over each algorithmic component: -```{r eval=FALSE} +```{r} res_custom <- runGLIPH( cdr3_sequences = gliph_input_data[seq_len(200), ], method = "custom", + refdb_beta = ref_df, local_method = "fisher", # or "rrs" global_method = "cutoff", # or "fisher" clustering_method = "GLIPH1.0", # or "GLIPH2.0" @@ -290,10 +291,11 @@ For the Fisher-based local method (GLIPH2), you can adjust: (default `c(1000, 100, 10)` for 2-mers, 3-mers, 4-mers) - **`kmer_mindepth`**: Minimum motif observations in the sample (default 3) -```{r eval=FALSE} +```{r} res_strict <- runGLIPH( cdr3_sequences = gliph_input_data[seq_len(200), ], method = "gliph2", + refdb_beta = ref_df, lcminp = 0.001, # Stricter p-value lcminove = c(10000, 1000, 100), # Higher fold-change sim_depth = 100, @@ -431,7 +433,7 @@ data): rescored <- clusterScoring( cluster_list = res_gliph1$cluster_list, cdr3_sequences = gliph_input_data[seq_len(200), ], - refdb_beta = "gliph_reference", + refdb_beta = "human_v2.0_CD48", gliph_version = 2, sim_depth = 500, n_cores = 1