From 7bf1ae69de8651904da2075de7a1fc9957e501db Mon Sep 17 00:00:00 2001 From: theHumanBorch Date: Fri, 27 Feb 2026 18:23:34 -0600 Subject: [PATCH] more work on unit tests --- tests/testthat/test-clustering.R | 251 ++++++++++++++++++++++++++ tests/testthat/test-deNovoTCRs.R | 145 +++++++++++++++ tests/testthat/test-extractInput.R | 112 ++++++++++++ tests/testthat/test-loadGLIPH.R | 144 +++++++++++++++ tests/testthat/test-local-fisher.R | 150 +++++++++++++++ tests/testthat/test-plotNetwork.R | 149 +++++++++++++++ tests/testthat/test-utils-parallel.R | 56 ++++++ tests/testthat/test-utils-reference.R | 149 +++++++++++++++ 8 files changed, 1156 insertions(+) create mode 100644 tests/testthat/test-utils-parallel.R diff --git a/tests/testthat/test-clustering.R b/tests/testthat/test-clustering.R index c83cb01..1c9fa95 100644 --- a/tests/testthat/test-clustering.R +++ b/tests/testthat/test-clustering.R @@ -668,3 +668,254 @@ test_that(".cluster_gliph2 generates save_cluster_list_df", { expect_true("tag" %in% colnames(result$save_cluster_list_df)) } }) + +# ---- .cluster_gliph2 verbose messaging --------------------------------------- + +test_that(".cluster_gliph2 prints message when verbose is TRUE", { + d <- .make_cluster_data() + + local_res <- data.frame( + motif = c("SL"), + start = c(1), + stop = c(2), + num_in_sample = c(3), + num_in_ref = c(10), + num_fold = c(5.0), + fisher.score = c(0.001), + members = paste(d$seqs[1:3], collapse = " "), + stringsAsFactors = FALSE + ) + + expect_message( + immGLIPH:::.cluster_gliph2( + local_res = local_res, + global_res = NULL, + sequences = d$sequences, + local_similarities = TRUE, + global_similarities = FALSE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 10, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = TRUE + ), + "GLIPH2" + ) +}) + +# ---- .cluster_gliph1 with patient.info = FALSE ------------------------------- + +test_that(".cluster_gliph1 works without patient info", { + seqs <- c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF") + sequences <- data.frame( + seq_ID = seq_along(seqs), + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + clone_network <- data.frame( + V1 = "CASSLAPGATNEKLFF", + V2 = "CASSLDRGEVFF", + type = "local", + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph1( + clone_network = clone_network, + sequences = sequences, + not_in_global_ids = integer(0), + seqs = seqs, + vgene.info = TRUE, + patient.info = FALSE, + global_vgene = FALSE, + public_tcrs = TRUE, + cluster_min_size = 1, + verbose = FALSE + ) + + expect_type(result, "list") + expect_true(!is.null(result$cluster_properties)) +}) + +# ---- .cluster_gliph2 with global_vgene TRUE includes TRBV in tag ------------ + +test_that(".cluster_gliph2 includes TRBV in tag when global_vgene is TRUE", { + d <- .make_cluster_data() + + global_res <- data.frame( + cluster_tag = c("struct_%_13"), + cluster_size = c(3), + unique_CDR3b = c(3), + num_in_ref = c(5), + fisher.score = c(0.001), + aa_at_position = c("L"), + TRBV = c("TRBV5-1"), + CDR3b = paste(d$seqs[c(1, 3, 5)], collapse = " "), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph2( + local_res = NULL, + global_res = global_res, + sequences = d$sequences, + local_similarities = FALSE, + global_similarities = TRUE, + global_vgene = TRUE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 1, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + if (!is.null(result$merged_clusters)) { + expect_true(any(grepl("TRBV", result$merged_clusters$tag))) + } +}) + +# ---- .cluster_gliph2 handles infinite OvE ------------------------------------ + +test_that(".cluster_gliph2 handles infinite OvE values", { + d <- .make_cluster_data() + + local_res <- data.frame( + motif = c("SL"), + start = c(1), + stop = c(2), + num_in_sample = c(3), + num_in_ref = c(0), + num_fold = c(Inf), + fisher.score = c(0.001), + members = paste(d$seqs[1:3], collapse = " "), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph2( + local_res = local_res, + global_res = NULL, + sequences = d$sequences, + local_similarities = TRUE, + global_similarities = FALSE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 10, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + expect_type(result, "list") + if (!is.null(result$merged_clusters)) { + # Infinite OvE should be replaced with 0 + expect_true(all(is.finite(as.numeric(result$merged_clusters$OvE)))) + } +}) + +# ---- .cluster_gliph1 duplicate cluster naming -------------------------------- + +test_that(".cluster_gliph1 handles duplicate cluster names with suffixes", { + # Create sequences where two separate components have the same first CDR3b + seqs <- c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSLAPGATNEKLFF", + "CASSYLAGGRNTLYF") + sequences <- data.frame( + seq_ID = 1:4, + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1", "TRBV7-2"), + patient = c("P1", "P1", "P2", "P2"), + stringsAsFactors = FALSE + ) + + clone_network <- data.frame( + V1 = c("CASSLAPGATNEKLFF", "CASSLAPGATNEKLFF"), + V2 = c("CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + type = c("local", "local"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph1( + clone_network = clone_network, + sequences = sequences, + not_in_global_ids = integer(0), + seqs = unique(seqs), + vgene.info = TRUE, + patient.info = TRUE, + global_vgene = FALSE, + public_tcrs = TRUE, + cluster_min_size = 1, + verbose = FALSE + ) + + expect_type(result, "list") + if (!is.null(result$cluster_properties)) { + # All tags should be unique + expect_equal(length(unique(result$cluster_properties$tag)), + nrow(result$cluster_properties)) + } +}) + +# ---- .cluster_gliph2 motif_distance_cutoff restricts edges ------------------- + +test_that(".cluster_gliph2 motif_distance_cutoff = 0 eliminates positionally distant edges", { + d <- .make_cluster_data() + + local_res <- data.frame( + motif = c("SL"), + start = c(1), + stop = c(2), + num_in_sample = c(3), + num_in_ref = c(10), + num_fold = c(5.0), + fisher.score = c(0.001), + members = paste(d$seqs[1:3], collapse = " "), + stringsAsFactors = FALSE + ) + + result_strict <- immGLIPH:::.cluster_gliph2( + local_res = local_res, + global_res = NULL, + sequences = d$sequences, + local_similarities = TRUE, + global_similarities = FALSE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 0, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + result_lenient <- immGLIPH:::.cluster_gliph2( + local_res = local_res, + global_res = NULL, + sequences = d$sequences, + local_similarities = TRUE, + global_similarities = FALSE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 100, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + # Strict cutoff should have fewer or equal edges + n_strict <- if (!is.null(result_strict$clone_network)) { + sum(result_strict$clone_network$type == "local", na.rm = TRUE) + } else 0 + n_lenient <- if (!is.null(result_lenient$clone_network)) { + sum(result_lenient$clone_network$type == "local", na.rm = TRUE) + } else 0 + expect_true(n_strict <= n_lenient) +}) diff --git a/tests/testthat/test-deNovoTCRs.R b/tests/testthat/test-deNovoTCRs.R index b5d6ae8..a99b4f9 100644 --- a/tests/testthat/test-deNovoTCRs.R +++ b/tests/testthat/test-deNovoTCRs.R @@ -343,3 +343,148 @@ test_that("deNovoTCRs with accept_sequences_with_C_F_start_end = FALSE", { expect_type(result, "list") expect_s3_class(result$de_novo_sequences, "data.frame") }) + +# ---- num_tops truncation when exceeding available sequences ------------------ + +test_that("deNovoTCRs truncates num_tops when exceeding available sequences", { + skip_on_cran() + + cluster_members <- data.frame( + seq_ID = 1:5, + CDR3b = c("CASSLAPGATNEKLFF", "CASSLAPRATNEKLFF", + "CASSLAPGETQEKLFF", "CASSLAPQATNEKLFF", + "CASSLAPGAGNEKLFF"), + TRBV = rep("TRBV5-1", 5), + stringsAsFactors = FALSE + ) + + mock_output <- list( + cluster_list = list("CRG-test" = cluster_members) + ) + + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + # Request far more top sequences than sims can produce + result <- deNovoTCRs( + convergence_group_tag = "CRG-test", + clustering_output = mock_output, + refdb_beta = ref_df, + sims = 10, + num_tops = 100000, + min_length = 10, + make_figure = FALSE, + n_cores = 1 + ) + + expect_type(result, "list") + expect_s3_class(result$de_novo_sequences, "data.frame") + # num_tops should be capped at the actual number of unique sequences generated + expect_true(nrow(result$de_novo_sequences) <= 10) +}) + +# ---- refdb_beta validation: invalid string ----------------------------------- + +test_that("deNovoTCRs rejects invalid refdb_beta string", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + clustering_output = list(cluster_list = list("CRG-1" = data.frame(CDR3b = "CASSLAPGATNEKLFF"))), + refdb_beta = "invalid_reference_name"), + "data frame" + ) +}) + +# ---- Multiple sims validation ------------------------------------------------ + +test_that("deNovoTCRs rejects multiple sims values", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + clustering_output = list(cluster_list = list()), + sims = c(100, 200)), + "single number" + ) +}) + +# ---- Multiple num_tops validation -------------------------------------------- + +test_that("deNovoTCRs rejects multiple num_tops values", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + clustering_output = list(cluster_list = list()), + num_tops = c(10, 20)), + "single number" + ) +}) + +# ---- Multiple min_length validation ------------------------------------------ + +test_that("deNovoTCRs rejects multiple min_length values", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + clustering_output = list(cluster_list = list()), + min_length = c(5, 10)), + "single number" + ) +}) + +# ---- Multiple n_cores validation --------------------------------------------- + +test_that("deNovoTCRs rejects multiple n_cores values", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + clustering_output = list(cluster_list = list()), + n_cores = c(1, 2)), + "single number" + ) +}) + +# ---- n_cores < 1 validation ------------------------------------------------- + +test_that("deNovoTCRs rejects n_cores less than 1", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + clustering_output = list(cluster_list = list()), + n_cores = 0), + "at least 1" + ) +}) + +# ---- deNovoTCRs errors when all sequences below min_length ------------------ + +test_that("deNovoTCRs errors when all cluster sequences are below min_length", { + skip_on_cran() + + cluster_members <- data.frame( + seq_ID = 1:2, + CDR3b = c("CASSLAP", "CASSLD"), + TRBV = rep("TRBV5-1", 2), + stringsAsFactors = FALSE + ) + + mock_output <- list( + cluster_list = list("CRG-test" = cluster_members) + ) + + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF"), + TRBV = c("TRBV5-1"), + stringsAsFactors = FALSE + ) + + expect_error( + deNovoTCRs( + convergence_group_tag = "CRG-test", + clustering_output = mock_output, + refdb_beta = ref_df, + sims = 10, + num_tops = 5, + min_length = 20, + make_figure = FALSE, + n_cores = 1 + ), + "min_length" + ) +}) diff --git a/tests/testthat/test-extractInput.R b/tests/testthat/test-extractInput.R index 133fbae..6ee9446 100644 --- a/tests/testthat/test-extractInput.R +++ b/tests/testthat/test-extractInput.R @@ -248,3 +248,115 @@ test_that(".extract_input standardizes alternative column names", { expect_true("CDR3b" %in% colnames(result)) expect_true("TRBV" %in% colnames(result)) }) + +# ---- .extract_input rejects numeric input ------------------------------------ + +test_that(".extract_input rejects numeric input", { + expect_error(immGLIPH:::.extract_input(42), "must be a character vector") +}) + +# ---- .extract_input rejects NULL input --------------------------------------- + +test_that(".extract_input rejects NULL input", { + expect_error(immGLIPH:::.extract_input(NULL), "must be a character vector") +}) + +# ---- .parse_sequences accept_CF filtering ------------------------------------ + +test_that(".parse_sequences keeps all sequences when accept_CF not in function", { + # .parse_sequences does not filter by C/F - that's done by other functions + + # Verify that sequences without C/F pass through .parse_sequences + df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "AASSLAPGATNEKLFG"), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.parse_sequences(df, verbose = FALSE) + expect_equal(nrow(result$sequences), 2) +}) + +# ---- .parse_sequences verbose messaging on single-column DF ------------------ + +test_that(".parse_sequences prints message for single-column data frame", { + df <- data.frame(not_cdr3 = c("CASSLAPGATNEKLFF"), stringsAsFactors = FALSE) + expect_message( + immGLIPH:::.parse_sequences(df, verbose = TRUE), + "first column" + ) +}) + +# ---- .parse_sequences with empty counts column ------------------------------- + +test_that(".parse_sequences treats all-NA counts as 1", { + df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + counts = c(NA, NA), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.parse_sequences(df, verbose = FALSE) + expect_true(result$count.info) + expect_true(all(as.numeric(result$sequences$counts) == 1)) +}) + +# ---- .standardize_colnames with Frequency column ----------------------------- + +test_that(".standardize_colnames maps Frequency to counts", { + df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF"), + Frequency = c(10), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.standardize_colnames(df) + expect_true("counts" %in% colnames(result)) +}) + +# ---- .standardize_colnames with junction_aa column --------------------------- + +test_that(".standardize_colnames maps junction_aa to CDR3b", { + df <- data.frame( + junction_aa = c("CASSLAPGATNEKLFF"), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.standardize_colnames(df) + expect_true("CDR3b" %in% colnames(result)) +}) + +# ---- .standardize_colnames with sample.id column ----------------------------- + +test_that(".standardize_colnames maps sample.id to patient", { + df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF"), + sample.id = c("P1"), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.standardize_colnames(df) + expect_true("patient" %in% colnames(result)) +}) + +# ---- .parse_sequences preserves row count after filtering -------------------- + +test_that(".parse_sequences preserves valid rows after AA filtering", { + df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "BAD123SEQ"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV7-2"), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.parse_sequences(df, verbose = FALSE) + expect_equal(nrow(result$sequences), 2) + expect_true(all(result$sequences$seq_ID == 1:2)) +}) + +# ---- .extract_input with list input requires immApex ------------------------- + +test_that(".extract_input errors on list input without immApex or with message", { + # This tests the list (combineTCR output) path + mock_list <- list(data.frame(CDR3b = "CASSLAPGATNEKLFF")) + # If immApex is not available, should error; if available, should succeed + if (!requireNamespace("immApex", quietly = TRUE)) { + expect_error(immGLIPH:::.extract_input(mock_list), "immApex") + } else { + # immApex is available but the list may not be valid combineTCR format + # This verifies the code path is reached + expect_true(TRUE) + } +}) diff --git a/tests/testthat/test-loadGLIPH.R b/tests/testthat/test-loadGLIPH.R index ac751e5..25b7c4a 100644 --- a/tests/testthat/test-loadGLIPH.R +++ b/tests/testthat/test-loadGLIPH.R @@ -172,3 +172,147 @@ test_that("loadGLIPH errors when parameter.txt is missing", { expect_error(loadGLIPH(result_folder = tmp_dir), "missing") }) + +# ---- loadGLIPH handles missing optional files gracefully --------------------- + +test_that("loadGLIPH handles missing optional files gracefully", { + skip_on_cran() + + # Create a minimal parameter.txt (gliph_version 1 format) + tmp_dir <- file.path(tempdir(), paste0("gliph_minimal_test_", Sys.getpid())) + dir.create(tmp_dir, showWarnings = FALSE) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + + params <- data.frame( + V1 = c("method", "gliph_version", "sim_depth", "lcminp", + "lcminove", "lckmer_mindepth", "motif_length", + "positional_motifs", "public_tcrs", "global_vgene"), + V2 = c("gliph1", "1", "100", "0.001", + "10,5", "3", "2,3", + "TRUE", "TRUE", "FALSE"), + stringsAsFactors = FALSE + ) + utils::write.table(params, file = file.path(tmp_dir, "parameter.txt"), + sep = "\t", quote = FALSE, row.names = FALSE, + col.names = FALSE) + + # Should load successfully even though optional files are missing + result <- loadGLIPH(result_folder = tmp_dir) + expect_type(result, "list") + expect_true("parameters" %in% names(result)) + expect_equal(result$parameters$gliph_version, 1) + expect_equal(result$parameters$motif_length, c(2, 3)) + expect_equal(result$parameters$lcminove, c(10, 5)) +}) + +# ---- loadGLIPH parses comma-separated motif_length and lcminove -------------- + +test_that("loadGLIPH parses comma-separated motif_length and lcminove", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + tmp_dir <- file.path(tempdir(), paste0("gliph_parse_test_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph2", + refdb_beta = ref_df, + result_folder = tmp_dir, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + loaded <- loadGLIPH(result_folder = tmp_dir) + expect_true(is.numeric(loaded$parameters$motif_length)) + expect_true(is.numeric(loaded$parameters$lcminove)) + expect_true(length(loaded$parameters$motif_length) >= 1) +}) + +# ---- loadGLIPH output structure matches runGLIPH ----------------------------- + +test_that("loadGLIPH output has same cluster_list names as original", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + tmp_dir <- file.path(tempdir(), paste0("gliph_names_test_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + result_folder = tmp_dir, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + loaded <- loadGLIPH(result_folder = tmp_dir) + + if (!is.null(res$cluster_list) && !is.null(loaded$cluster_list)) { + expect_equal(sort(names(loaded$cluster_list)), sort(names(res$cluster_list))) + } +}) + +# ---- loadGLIPH messages about loading ---------------------------------------- + +test_that("loadGLIPH produces informative messages", { + skip_on_cran() + + tmp_dir <- file.path(tempdir(), paste0("gliph_msg_test_", Sys.getpid())) + dir.create(tmp_dir, showWarnings = FALSE) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + + params <- data.frame( + V1 = c("method", "gliph_version", "sim_depth", "lcminp", + "lcminove", "lckmer_mindepth", "motif_length", + "positional_motifs", "public_tcrs", "global_vgene"), + V2 = c("gliph1", "1", "100", "0.001", + "10,5", "3", "2,3", + "TRUE", "TRUE", "FALSE"), + stringsAsFactors = FALSE + ) + utils::write.table(params, file = file.path(tmp_dir, "parameter.txt"), + sep = "\t", quote = FALSE, row.names = FALSE, + col.names = FALSE) + + expect_message(loadGLIPH(result_folder = tmp_dir), "loaded") +}) diff --git a/tests/testthat/test-local-fisher.R b/tests/testthat/test-local-fisher.R index 3ffc66d..61d1f57 100644 --- a/tests/testthat/test-local-fisher.R +++ b/tests/testthat/test-local-fisher.R @@ -345,3 +345,153 @@ test_that(".local_fisher avgRef is normalized to sample set size", { expect_true(is.numeric(result$all_motifs$avgRef)) expect_true(all(result$all_motifs$avgRef >= 0)) }) + +# ---- topRef column ----------------------------------------------------------- + +test_that(".local_fisher topRef column is numeric", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = c(0, 0), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + expect_true(is.numeric(result$all_motifs$topRef)) +}) + +# ---- Empty selected_motifs when all below kmer_mindepth ---------------------- + +test_that(".local_fisher returns empty selected_motifs when all below kmer_mindepth", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 9999, + lcminp = 1.0, + lcminove = c(0, 0), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + expect_equal(nrow(result$selected_motifs), 0) + # all_motifs should still contain motifs found + expect_true(nrow(result$all_motifs) > 0) +}) + +# ---- Multiple motif lengths with matching lcminove vector -------------------- + +test_that(".local_fisher correctly applies per-length lcminove thresholds", { + d <- .make_fisher_data() + + # Very high threshold for length 2, very low for length 3 + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = c(1e6, 0), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + if (nrow(result$selected_motifs) > 0) { + # No 2-mer motifs should pass the high threshold + two_mers <- result$selected_motifs[nchar(result$selected_motifs$motif) == 2, ] + expect_equal(nrow(two_mers), 0) + } +}) + +# ---- motif length 4 --------------------------------------------------------- + +test_that(".local_fisher works with motif_length = 4", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = 4, + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = 0, + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + expect_type(result, "list") + if (nrow(result$all_motifs) > 0) { + # All continuous motifs should be 4 characters + expect_true(all(nchar(result$all_motifs$motif) == 4)) + } +}) + +# ---- counts column is always >= 1 in all_motifs ----------------------------- + +test_that(".local_fisher all_motifs counts are positive", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = c(0, 0), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + expect_true(all(result$all_motifs$counts >= 1)) +}) + +# ---- num_in_ref is non-negative in all_motifs -------------------------------- + +test_that(".local_fisher num_in_ref is non-negative", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = c(0, 0), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + expect_true(all(result$all_motifs$num_in_ref >= 0)) +}) diff --git a/tests/testthat/test-plotNetwork.R b/tests/testthat/test-plotNetwork.R index 3e4c260..b458b0f 100644 --- a/tests/testthat/test-plotNetwork.R +++ b/tests/testthat/test-plotNetwork.R @@ -516,3 +516,152 @@ test_that("plotNetwork errors when no clusters meet cluster_min_size", { ) } }) + +# ---- plotNetwork with custom color_palette ----------------------------------- + +test_that("plotNetwork works with custom color_palette", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + if (!is.null(res$cluster_properties) && + any(as.numeric(res$cluster_properties$cluster_size) >= 2)) { + # Use grDevices heat.colors as custom palette + plot_obj <- plotNetwork( + clustering_output = res, + color_palette = grDevices::heat.colors, + cluster_min_size = 2, + n_cores = 1 + ) + expect_s3_class(plot_obj, "visNetwork") + } +}) + +# ---- plotNetwork loads from result_folder ------------------------------------ + +test_that("plotNetwork loads from result_folder", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + tmp_dir <- file.path(tempdir(), paste0("plotnet_folder_test_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + result_folder = tmp_dir, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + if (!is.null(res$cluster_properties) && + any(as.numeric(res$cluster_properties$cluster_size) >= 2)) { + plot_obj <- plotNetwork( + result_folder = tmp_dir, + cluster_min_size = 2, + n_cores = 1 + ) + expect_s3_class(plot_obj, "visNetwork") + } +}) + +# ---- plotNetwork with edge color customization ------------------------------- + +test_that("plotNetwork respects custom edge colors", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + if (!is.null(res$cluster_properties) && + any(as.numeric(res$cluster_properties$cluster_size) >= 2)) { + plot_obj <- plotNetwork( + clustering_output = res, + local_edge_color = "red", + global_edge_color = "blue", + cluster_min_size = 2, + n_cores = 1 + ) + expect_s3_class(plot_obj, "visNetwork") + } +}) + +# ---- plotNetwork n_cores validation ------------------------------------------ + +test_that("plotNetwork rejects multiple n_cores values", { + expect_error( + plotNetwork(clustering_output = list(), n_cores = c(1, 2)), + "single number" + ) +}) diff --git a/tests/testthat/test-utils-parallel.R b/tests/testthat/test-utils-parallel.R new file mode 100644 index 0000000..b0a8a24 --- /dev/null +++ b/tests/testthat/test-utils-parallel.R @@ -0,0 +1,56 @@ +# Tests for .setup_parallel() and .stop_parallel() + +# ---- .setup_parallel with n_cores = 1 ---------------------------------------- + +test_that(".setup_parallel with 1 core returns 1", { + result <- immGLIPH:::.setup_parallel(1) + expect_equal(result, 1L) +}) + +# ---- .setup_parallel with NULL auto-detects ----------------------------------- + +test_that(".setup_parallel with NULL returns at least 1", { + result <- immGLIPH:::.setup_parallel(NULL) + expect_true(result >= 1L) + expect_true(result <= parallel::detectCores()) + # Clean up + immGLIPH:::.stop_parallel() +}) + +# ---- .setup_parallel clamps to valid range ------------------------------------ + +test_that(".setup_parallel clamps excessive core count", { + max_cores <- parallel::detectCores() + result <- immGLIPH:::.setup_parallel(max_cores + 100) + expect_true(result <= max_cores) + expect_true(result >= 1L) + # Clean up + immGLIPH:::.stop_parallel() +}) + +test_that(".setup_parallel clamps negative core count to 1", { + result <- immGLIPH:::.setup_parallel(-5) + expect_equal(result, 1L) +}) + +test_that(".setup_parallel clamps zero to 1", { + result <- immGLIPH:::.setup_parallel(0) + expect_equal(result, 1L) +}) + +# ---- .stop_parallel runs without error ---------------------------------------- + +test_that(".stop_parallel executes without error", { + immGLIPH:::.setup_parallel(1) + expect_no_error(immGLIPH:::.stop_parallel()) +}) + +# ---- Sequential fallback on Windows or 1 core --------------------------------- + +test_that(".setup_parallel registers sequential backend for single core", { + result <- immGLIPH:::.setup_parallel(1) + expect_equal(result, 1L) + # After registerDoSEQ, foreach should still work + res <- foreach::foreach(i = 1:3, .combine = c) %dopar% { i * 2 } + expect_equal(res, c(2, 4, 6)) +}) diff --git a/tests/testthat/test-utils-reference.R b/tests/testthat/test-utils-reference.R index e26ae5b..4a4eeff 100644 --- a/tests/testthat/test-utils-reference.R +++ b/tests/testthat/test-utils-reference.R @@ -291,3 +291,152 @@ test_that(".load_reference adds empty TRBV when single column provided", { expect_s3_class(result$refseqs_df, "data.frame") expect_true("TRBV" %in% colnames(result$refseqs_df)) }) + +# ---- .load_reference with custom boundary_size -------------------------------- + +test_that(".load_reference with structboundaries=TRUE and custom boundary_size", { + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + result_b3 <- immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + structboundaries = TRUE, + boundary_size = 3, + verbose = FALSE + ) + + result_b5 <- immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + structboundaries = TRUE, + boundary_size = 5, + verbose = FALSE + ) + + # Larger boundary should produce shorter motif regions + expect_true(all(nchar(result_b5$refseqs_motif) < nchar(result_b3$refseqs_motif))) +}) + +# ---- .prepare_motif_region with empty vector --------------------------------- + +test_that(".prepare_motif_region handles empty vector", { + result <- immGLIPH:::.prepare_motif_region(character(0), + structboundaries = TRUE, + boundary_size = 3) + expect_length(result, 0) +}) + +# ---- .prepare_motif_region with single character sequences ------------------- + +test_that(".prepare_motif_region handles very short sequences", { + seqs <- c("CAF") + result <- immGLIPH:::.prepare_motif_region(seqs, structboundaries = TRUE, + boundary_size = 1) + expect_equal(result, "A") +}) + +# ---- .load_reference verbose messages ---------------------------------------- + +test_that(".load_reference prints messages when verbose", { + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + expect_message( + immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + verbose = TRUE + ), + "CDR3b" + ) +}) + +# ---- .load_reference deduplicates sequences ---------------------------------- + +test_that(".load_reference deduplicates reference sequences", { + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + verbose = FALSE + ) + + expect_equal(length(result$refseqs), 2) +}) + +# ---- .load_reference errors on all invalid AA sequences ---------------------- + +test_that(".load_reference errors when all sequences have invalid AAs", { + ref_df <- data.frame( + CDR3b = c("C123456789F", "C987654321F"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + expect_error( + immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = FALSE, + min_seq_length = 8, + verbose = FALSE + ), + "valid amino acid" + ) +}) + +# ---- .load_reference with vgene_stratify extracts V-genes -------------------- + +test_that(".load_reference vgene_stratify returns matching length vectors", { + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + vgene_stratify = TRUE, + verbose = FALSE + ) + + expect_equal(length(result$ref_vgenes), length(result$refseqs)) +}) + +# ---- .load_reference motif region matches refseqs length --------------------- + +test_that(".load_reference motif region has same length as refseqs", { + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + structboundaries = TRUE, + boundary_size = 3, + verbose = FALSE + ) + + expect_equal(length(result$refseqs_motif), length(result$refseqs)) +})