Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,15 @@ Description: An R implementation of the GLIPH and GLIPH2 algorithms for
algorithm papers: Glanville et al. (2017) <doi:10.1038/nature22976>
and Huang et al. (2020) <doi:10.1038/s41587-020-0505-4>.
License: MIT + file LICENSE
biocViews:
Software,
ImmunoOncology,
Clustering,
SingleCell,
Sequencing,
Visualization
Depends:
R (>= 4.0.0)
R (>= 4.5.0)
Imports:
stringdist,
igraph,
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# immGLIPH 0.99.0

* Initial Bioconductor submission
* R implementation of GLIPH and GLIPH2 algorithms for TCR clustering
* Integration with scRepertoire ecosystem via immApex
* Interactive network visualization with plotNetwork()
* De novo TCR sequence generation with deNovoTCRs()
12 changes: 9 additions & 3 deletions R/clusterScoring.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,21 +81,21 @@
#' enrichment, clonal expansion enrichment, and common HLA enrichment).
#'
#' @examples
#' \dontrun{
#' utils::data("gliph_input_data")
#' ref_df <- gliph_input_data[, c("CDR3b", "TRBV")]
#'
#' res <- runGLIPH(cdr3_sequences = gliph_input_data[seq_len(200), ],
#' refdb_beta = ref_df,
#' sim_depth = 100,
#' n_cores = 1)
#'
#' scoring_results <- clusterScoring(
#' cluster_list = res$cluster_list,
#' cdr3_sequences = gliph_input_data[seq_len(200), ],
#' refdb_beta = "human_v2.0_CD48",
#' refdb_beta = ref_df,
#' gliph_version = 1,
#' sim_depth = 100,
#' n_cores = 1)
#' }
#'
#' @references Glanville, Jacob, et al.
#' "Identifying specificity groups in the T cell receptor repertoire." Nature 547.7661 (2017): 94.
Expand Down Expand Up @@ -188,6 +188,12 @@ clusterScoring <- function(cluster_list,
n_cores <- max(1, min(n_cores, parallel::detectCores()-2))
}

### Early return for empty cluster_list (after all validation)
if(length(cluster_list) == 0) {
message("No clusters to score.")
return(data.frame())
}

#################################################################
## Preparation ##
#################################################################
Expand Down
6 changes: 6 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#' (e.g. \code{"P17B"}, \code{"P19L"}).}
#' }
#'
#' @format A data.frame with 365 rows and 3 columns (CDR3b, TRBV, patient).
#' @docType data
#' @keywords datasets
#' @name gliph_input_data
Expand Down Expand Up @@ -41,6 +42,7 @@
#' This object demonstrates how to pass a SingleCellExperiment directly to
#' \code{\link{runGLIPH}}.
#'
#' @format A SingleCellExperiment with 2000 genes and 500 cells.
#' @docType data
#' @keywords datasets
#' @name gliph_sce
Expand All @@ -67,6 +69,7 @@
#' actual sample size is used.}
#' }
#'
#' @format A list with 2 elements: original and simulated.
#' @docType data
#' @keywords datasets
#' @name ref_cluster_sizes
Expand All @@ -82,6 +85,7 @@
#' segments that may appear in the CDR3 region. These fragments are used by the
#' GLIPH2 algorithm to identify germline-encoded sequence segments.
#'
#' @format A list of 3 data.frames: gTRV, gTRD, and gTRJ.
#' @docType data
#' @keywords datasets
#' @name gTRB
Expand Down Expand Up @@ -137,6 +141,8 @@
#'
#' Raw data downloaded from \url{http://50.255.35.37:8080/tools}.
#'
#' @format NULL. Data is downloaded on first use via
#' \code{\link{getGLIPHreference}}.
#' @name reference_list
#' @keywords datasets
NULL
46 changes: 26 additions & 20 deletions R/deNovoTCRs.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,20 +72,26 @@
#' \code{<convergence_group_tag>_de_novo.txt} is also written to disk.
#'
#' @examples
#' \dontrun{
#' utils::data("gliph_input_data")
#' res <- runGLIPH(cdr3_sequences = gliph_input_data[seq_len(200),],
#' method = "gliph1",
#' sim_depth = 100,
#' n_cores = 1)
#'
#' # Build a minimal clustering output to demonstrate deNovoTCRs
#' fake_cluster <- data.frame(
#' CDR3b = c("CASSLAPGATNEKLFF", "CASSLAPGGTNEKLFF",
#' "CASSLAPGDTNEKLFF", "CASSLAPGETNEKLFF",
#' "CASSLAPGANEKLFF", "CASSLAPGVTNEKLFF"),
#' TRBV = rep("TRBV5-1", 6),
#' stringsAsFactors = FALSE
#' )
#' fake_output <- list(cluster_list = list("motif-LAP" = fake_cluster))
#' ref_seqs <- fake_cluster[, c("CDR3b", "TRBV")]
#' new_seqs <- deNovoTCRs(
#' convergence_group_tag = res$cluster_properties$tag[1],
#' clustering_output = res,
#' sims = 10000,
#' make_figure = TRUE,
#' n_cores = 1)
#' }
#' convergence_group_tag = "motif-LAP",
#' clustering_output = fake_output,
#' refdb_beta = ref_seqs,
#' sims = 100,
#' num_tops = 10,
#' min_length = 8,
#' make_figure = FALSE,
#' n_cores = 1
#' )
#'
#' @references Glanville, Jacob, et al.
#' "Identifying specificity groups in the T cell receptor repertoire." Nature 547.7661 (2017): 94.
Expand Down Expand Up @@ -197,7 +203,7 @@ deNovoTCRs <- function(convergence_group_tag,
if(length(excluded) > 0){
all_crg_cdr3_seqs <- all_crg_cdr3_seqs[-excluded]
if(length(all_crg_cdr3_seqs) > 0){
message("Warning: ", length(excluded), " sequences of the convergence group were excluded from the further procedure due to falling below a minimum length of ", min_length, ".")
warning(length(excluded), " sequences of the convergence group were excluded from the further procedure due to falling below a minimum length of ", min_length, ".", call. = FALSE)
} else {
stop("No sequences of the convergence group are of minimum length of ", min_length, ". For further procedure, adjust the parameter 'min_length'")
}
Expand Down Expand Up @@ -230,7 +236,7 @@ deNovoTCRs <- function(convergence_group_tag,
if(ncol(refseqs) > 1 && v_gene_norm == TRUE){
message("Notification: Second column of reference database is considered as V-gene information.")
} else if(v_gene_norm == TRUE){
message("Warning: Beta sequence reference database is missing column containing V-genes. Without V-gene information normalization may be inaccurate.")
warning("Beta sequence reference database is missing column containing V-genes. Without V-gene information normalization may be inaccurate.", call. = FALSE)
v_gene_norm <- FALSE
}
if(ncol(refseqs) == 1) refseqs <- cbind(refseqs, rep("", nrow(refseqs)))
Expand All @@ -245,7 +251,7 @@ deNovoTCRs <- function(convergence_group_tag,
if(nrow(refseqs) == 0){
normalization <- FALSE
v_gene_norm <- FALSE
message("Warning: No reference sequences with a minimum length of ", min_length, " given. Normalization therefore not possible. Adjust min_length to enable normalization.")
warning("No reference sequences with a minimum length of ", min_length, " given. Normalization therefore not possible. Adjust min_length to enable normalization.", call. = FALSE)
} else {
ref_vgenes <- as.character(refseqs$TRBV)
refseqs <- as.character(refseqs$CDR3b)
Expand All @@ -262,7 +268,7 @@ deNovoTCRs <- function(convergence_group_tag,
if(ncol(refseqs) > 1 && v_gene_norm == TRUE){
message("Notification: Second column of reference database is considered as V-gene information.")
} else if(v_gene_norm == TRUE){
message("Warning: Beta sequence reference database is missing column containing V-genes. Without V-gene information normalization may be inaccurate.")
warning("Beta sequence reference database is missing column containing V-genes. Without V-gene information normalization may be inaccurate.", call. = FALSE)
v_gene_norm <- FALSE
}
if(ncol(refseqs) == 1) refseqs <- cbind(refseqs, rep("", nrow(refseqs)))
Expand All @@ -277,7 +283,7 @@ deNovoTCRs <- function(convergence_group_tag,
if(nrow(refseqs) == 0){
normalization <- FALSE
v_gene_norm <- FALSE
message("Warning: No reference sequences with a minimum length of ", min_length, " given. Normalization therefore not possible. Adjust min_length to enable normalization.")
warning("No reference sequences with a minimum length of ", min_length, " given. Normalization therefore not possible. Adjust min_length to enable normalization.", call. = FALSE)
} else {
ref_vgenes <- as.character(refseqs$TRBV)
refseqs <- as.character(refseqs$CDR3b)
Expand All @@ -289,8 +295,8 @@ deNovoTCRs <- function(convergence_group_tag,
v_genes <- crg$TRBV
} else if(v_gene_norm == TRUE){
v_gene_norm <- FALSE
message("Warning: Without V-gene information of sample sequences normalization may be inaccurate.")
} else message("Warning: Without V-gene restriction normalization may be inaccurate.")
warning("Without V-gene information of sample sequences normalization may be inaccurate.", call. = FALSE)
} else warning("Without V-gene restriction normalization may be inaccurate.", call. = FALSE)
}

### Initialization
Expand Down
8 changes: 8 additions & 0 deletions R/getRandomSubsample.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@
#' @return A character vector of length \code{length(motif_region)} drawn from
#' \code{refseqs_motif_region}.
#'
#' @examples
#' ref_seqs <- c("ASSG", "ASSD", "ASSE", "ASSF", "ASSK", "ASSL")
#' sample_seqs <- c("ASSG", "ASSF", "ASSL")
#' sub <- getRandomSubsample(
#' refseqs_motif_region = ref_seqs,
#' motif_region = sample_seqs
#' )
#'
#' @export

getRandomSubsample <- function(cdr3_len_stratify = FALSE,
Expand Down
2 changes: 2 additions & 0 deletions R/global-cutoff.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
}

#' immApex-accelerated global cutoff via buildNetwork()
#' @return A list with edge data and excluded sequence IDs.
#' @keywords internal
.global_cutoff_immapex <- function(seqs, motif_region, sequences,
gccutoff, global_vgene, verbose) {
Expand Down Expand Up @@ -118,6 +119,7 @@
}

#' stringdist + foreach fallback for global cutoff
#' @return A list with edge data and excluded sequence IDs.
#' @keywords internal
.global_cutoff_stringdist <- function(seqs, motif_region, sequences,
gccutoff, global_vgene, no_cores,
Expand Down
15 changes: 15 additions & 0 deletions R/loadGLIPH.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,21 @@
#' \code{cluster_properties}, \code{motif_enrichment}, \code{connections}, and
#' \code{parameters}.
#'
#' @examples
#' utils::data("gliph_input_data")
#' ref_df <- gliph_input_data[, c("CDR3b", "TRBV")]
#' tmp_dir <- tempfile("gliph_out_")
#' res <- runGLIPH(
#' cdr3_sequences = gliph_input_data[seq_len(200), ],
#' method = "gliph1",
#' refdb_beta = ref_df,
#' result_folder = tmp_dir,
#' sim_depth = 50,
#' n_cores = 1
#' )
#' reloaded <- loadGLIPH(result_folder = tmp_dir)
#' unlink(tmp_dir, recursive = TRUE)
#'
#' @export
loadGLIPH <- function(result_folder = ""){

Expand Down
44 changes: 23 additions & 21 deletions R/plotNetwork.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,16 @@
#' @return A `visNetwork` object containing the interactive network graph.
#'
#' @examples
#' \dontrun{
#' utils::data("gliph_input_data")
#' ref_df <- gliph_input_data[, c("CDR3b", "TRBV")]
#' res <- runGLIPH(cdr3_sequences = gliph_input_data[seq_len(200),],
#' method = "gliph1",
#' refdb_beta = ref_df,
#' sim_depth = 100,
#' n_cores = 1)
#'
#' plotNetwork(clustering_output = res,
#' n_cores = 1)
#' }
#'
#' @import viridis foreach grDevices
#' @export
Expand Down Expand Up @@ -134,8 +134,10 @@ plotNetwork <- function(clustering_output = NULL,
# cluster_properties: contains cluster specific information like all scores
parameters <- clustering_output$parameters
cluster_list <- clustering_output$cluster_list
if(is.null(cluster_list)) stop("The specified clustering_output does not contain any clusters.")
if(length(cluster_list) == 0) stop("The specified clustering_output does not contain any clusters.")
if(is.null(cluster_list) || length(cluster_list) == 0) {
message("No clusters found in the clustering output.")
return(invisible(NULL))
}
cluster_properties <- clustering_output$cluster_properties

hold_ids <- which(as.numeric(cluster_properties$cluster_size) >= cluster_min_size)
Expand Down Expand Up @@ -539,7 +541,7 @@ plotNetwork <- function(clustering_output = NULL,
}
if(color_info == "color" && !(any(plotfunctions::isColor(cluster_data_frame[, color_info]) == FALSE))) stop("Column ", color_info, " determining node color has to contain only values that represent colors.")

color.scale = ""
color.scale <- ""
if("color" %in% colnames(cluster_data_frame)){
# Use the user specified colors
vert.info$color <- cluster_data_frame$color[vert.info$id]
Expand Down Expand Up @@ -633,13 +635,13 @@ plotNetwork <- function(clustering_output = NULL,
v.title <- vertexes
v.color <- rep("gray", num.v)
v.shadow <- rep(FALSE, num.v)
if ("size" %in% names(vertex.info)) v.size = as.numeric(vertex.info$size)
if ("label" %in% names(vertex.info)) v.label = as.character(vertex.info$label)
if ("group" %in% names(vertex.info)) v.group = as.character(vertex.info$group)
if ("shape" %in% names(vertex.info)) v.shape = as.character(vertex.info$shape)
if ("title" %in% names(vertex.info)) v.title = as.character(vertex.info$title)
if ("color" %in% names(vertex.info)) v.color = as.character(vertex.info$color)
if ("shadow" %in% names(vertex.info)) v.shadow = as.logical(vertex.info$shadow)
if ("size" %in% names(vertex.info)) v.size <- as.numeric(vertex.info$size)
if ("label" %in% names(vertex.info)) v.label <- as.character(vertex.info$label)
if ("group" %in% names(vertex.info)) v.group <- as.character(vertex.info$group)
if ("shape" %in% names(vertex.info)) v.shape <- as.character(vertex.info$shape)
if ("title" %in% names(vertex.info)) v.title <- as.character(vertex.info$title)
if ("color" %in% names(vertex.info)) v.color <- as.character(vertex.info$color)
if ("shadow" %in% names(vertex.info)) v.shadow <- as.logical(vertex.info$shadow)
nodes <- data.frame(id = vertexes,
color = list(background = v.color, border = "black", highlight = "red"),
size=v.size,
Expand All @@ -661,15 +663,15 @@ plotNetwork <- function(clustering_output = NULL,
e.title <- rep("",num.e)
e.smooth <- rep(FALSE,num.e)
e.shadow <- rep(FALSE,num.e)
if ("length" %in% names(edge.info)) e.length = as.numeric(edge.info$length)
if ("label" %in% names(edge.info)) e.label = as.character(edge.info$label)
if ("width" %in% names(edge.info)) e.width = as.numeric(edge.info$width)
if ("color" %in% names(edge.info)) e.color = as.character(edge.info$color)
if ("arrows" %in% names(edge.info)) e.arrows = as.character(edge.info$arrows)
if ("dashes" %in% names(edge.info)) e.dashes = as.logical(edge.info$dashes)
if ("title" %in% names(edge.info)) e.title = as.character(edge.info$title)
if ("smooth" %in% names(edge.info)) e.smooth = as.logical(edge.info$smooth)
if ("shadow" %in% names(edge.info)) e.shadow = as.logical(edge.info$shadow)
if ("length" %in% names(edge.info)) e.length <- as.numeric(edge.info$length)
if ("label" %in% names(edge.info)) e.label <- as.character(edge.info$label)
if ("width" %in% names(edge.info)) e.width <- as.numeric(edge.info$width)
if ("color" %in% names(edge.info)) e.color <- as.character(edge.info$color)
if ("arrows" %in% names(edge.info)) e.arrows <- as.character(edge.info$arrows)
if ("dashes" %in% names(edge.info)) e.dashes <- as.logical(edge.info$dashes)
if ("title" %in% names(edge.info)) e.title <- as.character(edge.info$title)
if ("smooth" %in% names(edge.info)) e.smooth <- as.logical(edge.info$smooth)
if ("shadow" %in% names(edge.info)) e.shadow <- as.logical(edge.info$shadow)
edges <- data.frame(from = eds$from, to = eds$to,
length = e.length,
width = e.width,
Expand Down
4 changes: 2 additions & 2 deletions R/runGLIPH.R
Original file line number Diff line number Diff line change
Expand Up @@ -213,15 +213,15 @@
#' \doi{10.1038/s41587-020-0505-4}
#'
#' @examples
#' \dontrun{
#' utils::data("gliph_input_data")
#' ref_df <- gliph_input_data[, c("CDR3b", "TRBV")]
#' res <- runGLIPH(
#' cdr3_sequences = gliph_input_data[seq_len(200), ],
#' method = "gliph2",
#' refdb_beta = ref_df,
#' sim_depth = 50,
#' n_cores = 1
#' )
#' }
#'
#' @import foreach
#' @export
Expand Down
1 change: 1 addition & 0 deletions R/utils-output.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#'
#' @param parameters Named list of parameters
#' @param result_folder Path to output folder
#' @return NULL (invisibly). Called for side effect of writing file.
#' @keywords internal
.save_parameters <- function(parameters, result_folder) {
paras <- data.frame(
Expand Down
1 change: 1 addition & 0 deletions R/utils-parallel.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
}

#' Stop parallel backend
#' @return NULL (invisibly). Called for side effect.
#' @keywords internal
.stop_parallel <- function() {
doParallel::stopImplicitCluster()
Expand Down
Loading