diff --git a/tests/testthat/test-clusterScoring.R b/tests/testthat/test-clusterScoring.R index 2b1201b..3559f11 100644 --- a/tests/testthat/test-clusterScoring.R +++ b/tests/testthat/test-clusterScoring.R @@ -19,8 +19,6 @@ test_that("clusterScoring rejects non-data.frame cdr3_sequences", { }) test_that("clusterScoring accepts character vector for cdr3_sequences", { - # Should convert to data.frame internally without error (unless cluster_list is empty) - # The function coerces atomic to data.frame, then proceeds cl <- list( "CRG-1" = data.frame( CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), @@ -35,13 +33,10 @@ test_that("clusterScoring accepts character vector for cdr3_sequences", { stringsAsFactors = FALSE ) - # This should not error on the cdr3_sequences validation - # (may error later if immApex features are unavailable) tryCatch( clusterScoring(cluster_list = cl, cdr3_sequences = seqs, refdb_beta = ref_df, sim_depth = 10, n_cores = 1), error = function(e) { - # Accept errors that are NOT about cdr3_sequences format expect_false(grepl("data.frame", e$message)) } ) @@ -119,14 +114,12 @@ test_that("clusterScoring validates v_usage_freq format", { cl <- list("CRG-1" = data.frame(CDR3b = "CASSLAPGATNEKLFF")) seqs <- data.frame(CDR3b = "CASSLAPGATNEKLFF") - # Non-data.frame should error expect_error( clusterScoring(cluster_list = cl, cdr3_sequences = seqs, v_usage_freq = c(0.1, 0.2)), "v_usage_freq" ) - # Data frame with one column should error expect_error( clusterScoring(cluster_list = cl, cdr3_sequences = seqs, v_usage_freq = data.frame(vgene = "TRBV5-1")), @@ -138,7 +131,6 @@ test_that("clusterScoring validates cdr3_length_freq format", { cl <- list("CRG-1" = data.frame(CDR3b = "CASSLAPGATNEKLFF")) seqs <- data.frame(CDR3b = "CASSLAPGATNEKLFF") - # Non-data.frame should error expect_error( clusterScoring(cluster_list = cl, cdr3_sequences = seqs, cdr3_length_freq = c(0.1, 0.2)), @@ -146,12 +138,34 @@ test_that("clusterScoring validates cdr3_length_freq format", { ) }) +test_that("clusterScoring returns empty data.frame for empty cluster_list", { + seqs <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + result <- clusterScoring( + cluster_list = list(), + cdr3_sequences = seqs, + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1 + ) + expect_s3_class(result, "data.frame") + expect_equal(nrow(result), 0) +}) + # ---- Computation tests ------------------------------------------------------- test_that("clusterScoring computes scores with custom refdb_beta data frame", { skip_on_cran() - # Create a small reference ref_df <- data.frame( CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF", "CASSLTGGEETQYF", "CASSLGGRETQYF", "CASSLGQAYEQYF", @@ -191,8 +205,7 @@ test_that("clusterScoring computes scores with custom refdb_beta data frame", { expect_true("network.size.score" %in% colnames(result)) expect_true("cdr3.length.score" %in% colnames(result)) expect_true("vgene.score" %in% colnames(result)) - expect_equal(nrow(result), 1) # One cluster - # Scores should be numeric + expect_equal(nrow(result), 1) expect_true(is.numeric(result$total.score)) }) @@ -233,9 +246,144 @@ test_that("clusterScoring gliph_version 2 omits 0.064 multiplier", { refdb_beta = ref_df, gliph_version = 2, sim_depth = 10, n_cores = 1 ) - # Both should return data frames with total.score expect_s3_class(res_v1, "data.frame") expect_s3_class(res_v2, "data.frame") expect_true("total.score" %in% colnames(res_v1)) expect_true("total.score" %in% colnames(res_v2)) }) + +test_that("clusterScoring with counts and HLA information", { + skip_on_cran() + + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF", + "CASSLTGGEETQYF", "CASSLGGRETQYF", "CASSLGQAYEQYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1", "TRBV7-2", "TRBV5-1", + "TRBV6-2"), + stringsAsFactors = FALSE + ) + + cl <- list( + "CRG-1" = data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1"), + patient = c("P1", "P2", "P1"), + HLA = c("A*02:01,B*07:02", "A*02:01,B*08:01", "A*02:01,B*07:02"), + counts = c(5, 3, 2), + stringsAsFactors = FALSE + ) + ) + + seqs <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF", + "CASSLTGGEETQYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1", "TRBV7-2"), + patient = c("P1", "P2", "P1", "P2"), + HLA = c("A*02:01,B*07:02", "A*02:01,B*08:01", + "A*02:01,B*07:02", "A*02:01,B*08:01"), + counts = c(5, 3, 2, 1), + stringsAsFactors = FALSE + ) + + result <- clusterScoring( + cluster_list = cl, + cdr3_sequences = seqs, + refdb_beta = ref_df, + gliph_version = 1, + sim_depth = 10, + n_cores = 1 + ) + + expect_s3_class(result, "data.frame") + expect_true("clonal.expansion.score" %in% colnames(result)) + expect_true("hla.score" %in% colnames(result)) +}) + +test_that("clusterScoring with multiple clusters", { + skip_on_cran() + + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF", + "CASSLTGGEETQYF", "CASSLGGRETQYF", "CASSLGQAYEQYF", + "CASSFSTCSANYGYTF", "CASSPTGGYEQYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1", "TRBV7-2", "TRBV5-1", + "TRBV6-2", "TRBV5-1", "TRBV7-2"), + stringsAsFactors = FALSE + ) + + cl <- list( + "CRG-1" = data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ), + "CRG-2" = data.frame( + CDR3b = c("CASSYLAGGRNTLYF", "CASSLTGGEETQYF"), + TRBV = c("TRBV5-1", "TRBV7-2"), + stringsAsFactors = FALSE + ) + ) + + seqs <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF", + "CASSLTGGEETQYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1", "TRBV7-2"), + stringsAsFactors = FALSE + ) + + result <- clusterScoring( + cluster_list = cl, + cdr3_sequences = seqs, + refdb_beta = ref_df, + gliph_version = 1, + sim_depth = 10, + n_cores = 1 + ) + + expect_s3_class(result, "data.frame") + expect_equal(nrow(result), 2) +}) + +test_that("clusterScoring with custom v_usage_freq", { + skip_on_cran() + + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF", + "CASSLTGGEETQYF", "CASSLGGRETQYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1", "TRBV7-2", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + cl <- list( + "CRG-1" = data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + ) + + seqs <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + v_freq <- data.frame( + vgene = c("TRBV5-1", "TRBV6-2", "TRBV7-2"), + freq = c(0.5, 0.3, 0.2), + stringsAsFactors = FALSE + ) + + result <- clusterScoring( + cluster_list = cl, + cdr3_sequences = seqs, + refdb_beta = ref_df, + v_usage_freq = v_freq, + gliph_version = 1, + sim_depth = 10, + n_cores = 1 + ) + + expect_s3_class(result, "data.frame") + expect_true("vgene.score" %in% colnames(result)) +}) diff --git a/tests/testthat/test-clustering.R b/tests/testthat/test-clustering.R index f5d4c3c..c83cb01 100644 --- a/tests/testthat/test-clustering.R +++ b/tests/testthat/test-clustering.R @@ -316,3 +316,355 @@ test_that(".cluster_gliph2 handles no local and no global similarities", { expect_null(result$merged_clusters) expect_equal(length(result$cluster_list), 0) }) + +# ---- .cluster_gliph2 with global_res ---------------------------------------- + +test_that(".cluster_gliph2 handles global results only", { + d <- .make_cluster_data() + + global_res <- data.frame( + cluster_tag = c("struct_%_13"), + cluster_size = c(3), + unique_CDR3b = c(3), + num_in_ref = c(5), + fisher.score = c(0.001), + aa_at_position = c("L"), + TRBV = c("TRBV5-1"), + CDR3b = paste(d$seqs[c(1, 3, 5)], collapse = " "), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph2( + local_res = NULL, + global_res = global_res, + sequences = d$sequences, + local_similarities = FALSE, + global_similarities = TRUE, + global_vgene = TRUE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 1, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + expect_type(result, "list") + if (!is.null(result$merged_clusters)) { + expect_true(all(result$merged_clusters$type == "global")) + } +}) + +test_that(".cluster_gliph2 with global_vgene FALSE for global clusters", { + d <- .make_cluster_data() + + global_res <- data.frame( + cluster_tag = c("struct_%_13"), + cluster_size = c(3), + unique_CDR3b = c(3), + num_in_ref = c(5), + fisher.score = c(0.001), + aa_at_position = c("L"), + TRBV = c("TRBV5-1"), + CDR3b = paste(d$seqs[c(1, 3, 5)], collapse = " "), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph2( + local_res = NULL, + global_res = global_res, + sequences = d$sequences, + local_similarities = FALSE, + global_similarities = TRUE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 1, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + expect_type(result, "list") + if (!is.null(result$merged_clusters)) { + # Without global_vgene, tag should NOT include TRBV + expect_false(any(grepl("TRBV", result$merged_clusters$tag))) + } +}) + +# ---- .cluster_gliph2 with both local and global ---------------------------- + +test_that(".cluster_gliph2 merges local and global clusters", { + d <- .make_cluster_data() + + local_res <- data.frame( + motif = c("SL"), + start = c(1), + stop = c(2), + num_in_sample = c(3), + num_in_ref = c(10), + num_fold = c(5.0), + fisher.score = c(0.001), + members = paste(d$seqs[1:3], collapse = " "), + stringsAsFactors = FALSE + ) + + global_res <- data.frame( + cluster_tag = c("struct_%_5"), + cluster_size = c(2), + unique_CDR3b = c(2), + num_in_ref = c(5), + fisher.score = c(0.01), + aa_at_position = c("G"), + TRBV = c("TRBV7-2"), + CDR3b = paste(d$seqs[4:5], collapse = " "), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph2( + local_res = local_res, + global_res = global_res, + sequences = d$sequences, + local_similarities = TRUE, + global_similarities = TRUE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 10, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + expect_type(result, "list") + if (!is.null(result$merged_clusters)) { + expect_true(any(result$merged_clusters$type == "local")) + expect_true(any(result$merged_clusters$type == "global")) + } +}) + +# ---- .cluster_gliph2 BLOSUM62 filtering ------------------------------------ + +test_that(".cluster_gliph2 applies BLOSUM62 filtering when all_aa_interchangeable is FALSE", { + skip_if_not_installed("immApex") + + d <- .make_cluster_data() + + global_res <- data.frame( + cluster_tag = c("struct_%_5"), + cluster_size = c(2), + unique_CDR3b = c(2), + num_in_ref = c(5), + fisher.score = c(0.01), + aa_at_position = c("G"), + TRBV = c("TRBV5-1"), + CDR3b = paste(d$seqs[c(1, 3)], collapse = " "), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph2( + local_res = NULL, + global_res = global_res, + sequences = d$sequences, + local_similarities = FALSE, + global_similarities = TRUE, + global_vgene = FALSE, + all_aa_interchangeable = FALSE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 1, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + expect_type(result, "list") +}) + +# ---- .cluster_gliph1 with public_tcrs FALSE -------------------------------- + +test_that(".cluster_gliph1 restricts edges to same donor when public_tcrs is FALSE", { + d <- .make_cluster_data() + + clone_network <- data.frame( + V1 = c("CASSLAPGATNEKLFF", "CASSLAPGATNEKLFF"), + V2 = c("CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + type = c("local", "local"), + stringsAsFactors = FALSE + ) + + # P1 has seq1,2,5; P2 has seq3,4 + # Seq1-Seq2: same donor P1 + # Seq1-Seq3: different donors P1 vs P2 + result <- immGLIPH:::.cluster_gliph1( + clone_network = clone_network, + sequences = d$sequences, + not_in_global_ids = integer(0), + seqs = d$seqs, + vgene.info = TRUE, + patient.info = TRUE, + global_vgene = FALSE, + public_tcrs = FALSE, + cluster_min_size = 1, + verbose = FALSE + ) + + expect_type(result, "list") +}) + +# ---- .cluster_gliph1 global_vgene filtering -------------------------------- + +test_that(".cluster_gliph1 filters global edges by V-gene when global_vgene is TRUE", { + d <- .make_cluster_data() + + clone_network <- data.frame( + V1 = c("CASSLAPGATNEKLFF", "CASSLAPGATNEKLFF"), + V2 = c("CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + type = c("global", "global"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph1( + clone_network = clone_network, + sequences = d$sequences, + not_in_global_ids = integer(0), + seqs = d$seqs, + vgene.info = TRUE, + patient.info = TRUE, + global_vgene = TRUE, + public_tcrs = TRUE, + cluster_min_size = 1, + verbose = FALSE + ) + + expect_type(result, "list") +}) + +# ---- .cluster_gliph1 verbose messaging ------------------------------------- + +test_that(".cluster_gliph1 prints message when verbose is TRUE", { + d <- .make_cluster_data() + + clone_network <- data.frame( + V1 = "CASSLAPGATNEKLFF", + V2 = "CASSLDRGEVFF", + type = "local", + stringsAsFactors = FALSE + ) + + expect_message( + immGLIPH:::.cluster_gliph1( + clone_network = clone_network, + sequences = d$sequences, + not_in_global_ids = integer(0), + seqs = d$seqs, + vgene.info = TRUE, + patient.info = TRUE, + global_vgene = FALSE, + public_tcrs = TRUE, + cluster_min_size = 1, + verbose = TRUE + ), + "GLIPH1" + ) +}) + +# ---- .cluster_gliph1 singletons from NULL network --------------------------- + +test_that(".cluster_gliph1 creates singleton network from NULL clone_network", { + d <- .make_cluster_data() + + result <- immGLIPH:::.cluster_gliph1( + clone_network = NULL, + sequences = d$sequences, + not_in_global_ids = c(1L, 2L, 3L), + seqs = d$seqs, + vgene.info = TRUE, + patient.info = TRUE, + global_vgene = FALSE, + public_tcrs = TRUE, + cluster_min_size = 1, + verbose = FALSE + ) + + expect_true(!is.null(result$clone_network)) + expect_true(all(result$clone_network$type == "singleton")) +}) + +# ---- .cluster_gliph2 structboundaries FALSE -------------------------------- + +test_that(".cluster_gliph2 works with structboundaries FALSE", { + d <- .make_cluster_data() + + local_res <- data.frame( + motif = c("SL"), + start = c(1), + stop = c(2), + num_in_sample = c(3), + num_in_ref = c(10), + num_fold = c(5.0), + fisher.score = c(0.001), + members = paste(d$seqs[1:3], collapse = " "), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph2( + local_res = local_res, + global_res = NULL, + sequences = d$sequences, + local_similarities = TRUE, + global_similarities = FALSE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = FALSE, + boundary_size = 3, + motif_distance_cutoff = 10, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + expect_type(result, "list") +}) + +# ---- .cluster_gliph2 save_cluster_list_df creation ------------------------- + +test_that(".cluster_gliph2 generates save_cluster_list_df", { + d <- .make_cluster_data() + + local_res <- data.frame( + motif = c("SL"), + start = c(1), + stop = c(2), + num_in_sample = c(3), + num_in_ref = c(10), + num_fold = c(5.0), + fisher.score = c(0.001), + members = paste(d$seqs[1:3], collapse = " "), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph2( + local_res = local_res, + global_res = NULL, + sequences = d$sequences, + local_similarities = TRUE, + global_similarities = FALSE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 10, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + if (!is.null(result$merged_clusters) && length(result$cluster_list) > 0) { + expect_s3_class(result$save_cluster_list_df, "data.frame") + expect_true("tag" %in% colnames(result$save_cluster_list_df)) + } +}) diff --git a/tests/testthat/test-deNovoTCRs.R b/tests/testthat/test-deNovoTCRs.R index bbecabe..b5d6ae8 100644 --- a/tests/testthat/test-deNovoTCRs.R +++ b/tests/testthat/test-deNovoTCRs.R @@ -1,5 +1,7 @@ # Tests for deNovoTCRs() +# ---- Input validation ------------------------------------------------------- + test_that("deNovoTCRs rejects non-character convergence_group_tag", { expect_error(deNovoTCRs(convergence_group_tag = 123), "character") }) @@ -58,6 +60,15 @@ test_that("deNovoTCRs rejects non-logical normalization", { ) }) +test_that("deNovoTCRs rejects non-logical accept_sequences_with_C_F_start_end", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + clustering_output = list(cluster_list = list()), + accept_sequences_with_C_F_start_end = "yes"), + "logical" + ) +}) + test_that("deNovoTCRs rejects tag not in cluster_list", { mock_output <- list( cluster_list = list( @@ -104,12 +115,28 @@ test_that("deNovoTCRs rejects min_length less than 1", { ) }) +test_that("deNovoTCRs rejects non-numeric n_cores", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + clustering_output = list(cluster_list = list()), + n_cores = "abc"), + "numeric" + ) +}) + +test_that("deNovoTCRs rejects multiple result_folder paths", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + result_folder = c("a", "b")), + "single path" + ) +}) + # ---- Functional tests ------------------------------------------------------- test_that("deNovoTCRs generates sequences from a pre-computed cluster", { skip_on_cran() - # Build a mock clustering output with a cluster of similar sequences cluster_members <- data.frame( seq_ID = 1:5, CDR3b = c("CASSLAPGATNEKLFF", "CASSLAPRATNEKLFF", @@ -148,26 +175,171 @@ test_that("deNovoTCRs generates sequences from a pre-computed cluster", { ) expect_type(result, "list") - - # Check output structure expect_true("de_novo_sequences" %in% names(result)) expect_true("sample_sequences_scores" %in% names(result)) expect_true("cdr3_length_probability" %in% names(result)) expect_true("PWM_Scoring" %in% names(result)) expect_true("PWM_Prediction" %in% names(result)) - - # de_novo_sequences should be a data.frame expect_s3_class(result$de_novo_sequences, "data.frame") - - # PWM_Scoring should have amino acid-related columns expect_s3_class(result$PWM_Scoring, "data.frame") expect_true(ncol(result$PWM_Scoring) > 0) - - # PWM_Prediction should be a list of data.frames expect_type(result$PWM_Prediction, "list") - - # cdr3_length_probability should have probabilities summing to ~1 expect_s3_class(result$cdr3_length_probability, "data.frame") probs <- result$cdr3_length_probability[, ncol(result$cdr3_length_probability)] expect_true(abs(sum(as.numeric(probs)) - 1) < 0.01) }) + +test_that("deNovoTCRs with normalization produces norm_score column", { + skip_on_cran() + + cluster_members <- data.frame( + seq_ID = 1:5, + CDR3b = c("CASSLAPGATNEKLFF", "CASSLAPRATNEKLFF", + "CASSLAPGETQEKLFF", "CASSLAPQATNEKLFF", + "CASSLAPGAGNEKLFF"), + TRBV = rep("TRBV5-1", 5), + patient = rep("P1", 5), + stringsAsFactors = FALSE + ) + + mock_output <- list( + cluster_list = list("CRG-CASSLAPGATNEKLFF" = cluster_members) + ) + + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF", + "CASSLTGGEETQYF", "CASSLGGRETQYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1", "TRBV7-2", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + result <- deNovoTCRs( + convergence_group_tag = "CRG-CASSLAPGATNEKLFF", + clustering_output = mock_output, + refdb_beta = ref_df, + normalization = TRUE, + sims = 50, + num_tops = 5, + min_length = 10, + make_figure = FALSE, + n_cores = 1 + ) + + expect_true("norm_score" %in% colnames(result$de_novo_sequences)) + expect_true("norm_scores" %in% colnames(result$sample_sequences_scores)) +}) + +test_that("deNovoTCRs saves output to result_folder", { + skip_on_cran() + + cluster_members <- data.frame( + seq_ID = 1:5, + CDR3b = c("CASSLAPGATNEKLFF", "CASSLAPRATNEKLFF", + "CASSLAPGETQEKLFF", "CASSLAPQATNEKLFF", + "CASSLAPGAGNEKLFF"), + TRBV = rep("TRBV5-1", 5), + stringsAsFactors = FALSE + ) + + mock_output <- list( + cluster_list = list("CRG-CASSLAPGATNEKLFF" = cluster_members) + ) + + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + tmp_dir <- file.path(tempdir(), paste0("denovo_save_test_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + dir.create(tmp_dir, showWarnings = FALSE) + + result <- deNovoTCRs( + convergence_group_tag = "CRG-CASSLAPGATNEKLFF", + clustering_output = mock_output, + result_folder = tmp_dir, + refdb_beta = ref_df, + sims = 50, + num_tops = 5, + min_length = 10, + make_figure = FALSE, + n_cores = 1 + ) + + expected_file <- file.path(tmp_dir, "CRG-CASSLAPGATNEKLFF_de_novo.txt") + expect_true(file.exists(expected_file)) +}) + +test_that("deNovoTCRs warns when sequences fall below min_length", { + skip_on_cran() + + cluster_members <- data.frame( + seq_ID = 1:3, + CDR3b = c("CASSLAPGATNEKLFF", "CASSLAP", "CASSLAPGAGNEKLFF"), + TRBV = rep("TRBV5-1", 3), + stringsAsFactors = FALSE + ) + + mock_output <- list( + cluster_list = list("CRG-test" = cluster_members) + ) + + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + expect_warning( + deNovoTCRs( + convergence_group_tag = "CRG-test", + clustering_output = mock_output, + refdb_beta = ref_df, + sims = 50, + num_tops = 5, + min_length = 10, + make_figure = FALSE, + n_cores = 1 + ), + "excluded" + ) +}) + +test_that("deNovoTCRs with accept_sequences_with_C_F_start_end = FALSE", { + skip_on_cran() + + cluster_members <- data.frame( + seq_ID = 1:5, + CDR3b = c("CASSLAPGATNEKLFF", "CASSLAPRATNEKLFF", + "CASSLAPGETQEKLFF", "CASSLAPQATNEKLFF", + "CASSLAPGAGNEKLFF"), + TRBV = rep("TRBV5-1", 5), + stringsAsFactors = FALSE + ) + + mock_output <- list( + cluster_list = list("CRG-test" = cluster_members) + ) + + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + result <- deNovoTCRs( + convergence_group_tag = "CRG-test", + clustering_output = mock_output, + refdb_beta = ref_df, + accept_sequences_with_C_F_start_end = FALSE, + sims = 50, + num_tops = 5, + min_length = 10, + make_figure = FALSE, + n_cores = 1 + ) + + expect_type(result, "list") + expect_s3_class(result$de_novo_sequences, "data.frame") +}) diff --git a/tests/testthat/test-extractInput.R b/tests/testthat/test-extractInput.R index 02c58a3..133fbae 100644 --- a/tests/testthat/test-extractInput.R +++ b/tests/testthat/test-extractInput.R @@ -94,3 +94,157 @@ test_that(".parse_sequences uses first column for single-column data frame", { result <- immGLIPH:::.parse_sequences(df, verbose = FALSE) expect_equal(nrow(result$sequences), 1) }) + +# ---- .parse_sequences character vector input --------------------------------- + +test_that(".parse_sequences handles character vector input", { + seqs <- c("CASSLAPGATNEKLFF", "CASSLDRGEVFF") + result <- immGLIPH:::.parse_sequences(seqs, verbose = FALSE) + expect_equal(nrow(result$sequences), 2) + expect_false(result$vgene.info) + expect_false(result$patient.info) + expect_false(result$hla.info) + expect_false(result$count.info) +}) + +test_that(".parse_sequences errors on character vector with global_vgene", { + seqs <- c("CASSLAPGATNEKLFF", "CASSLDRGEVFF") + expect_error( + immGLIPH:::.parse_sequences(seqs, global_vgene = TRUE, verbose = FALSE), + "V-gene" + ) +}) + +test_that(".parse_sequences errors on character vector with vgene_stratify", { + seqs <- c("CASSLAPGATNEKLFF", "CASSLDRGEVFF") + expect_error( + immGLIPH:::.parse_sequences(seqs, vgene_stratify = TRUE, verbose = FALSE), + "V-gene" + ) +}) + +# ---- .parse_sequences data frame with missing V-gene ----------------------- + +test_that(".parse_sequences errors on data frame without TRBV when global_vgene is TRUE", { + df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + stringsAsFactors = FALSE + ) + expect_error( + immGLIPH:::.parse_sequences(df, global_vgene = TRUE, verbose = FALSE), + "V-gene" + ) +}) + +test_that(".parse_sequences errors on data frame without TRBV when vgene_stratify is TRUE", { + df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + stringsAsFactors = FALSE + ) + expect_error( + immGLIPH:::.parse_sequences(df, vgene_stratify = TRUE, verbose = FALSE), + "V-gene" + ) +}) + +# ---- .parse_sequences errors on invalid input type ------------------------- + +test_that(".parse_sequences errors on invalid input type", { + expect_error( + immGLIPH:::.parse_sequences(list(a = 1), verbose = FALSE), + "character vector or data frame" + ) +}) + +# ---- .parse_sequences errors on no valid AA sequences ---------------------- + +test_that(".parse_sequences errors when no valid AA sequences found", { + df <- data.frame( + CDR3b = c("12345", "INVALID123"), + stringsAsFactors = FALSE + ) + expect_error( + immGLIPH:::.parse_sequences(df, verbose = FALSE), + "No valid" + ) +}) + +# ---- .parse_sequences adds seq_ID column ----------------------------------- + +test_that(".parse_sequences adds sequential seq_ID column", { + df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.parse_sequences(df, verbose = FALSE) + expect_equal(result$sequences$seq_ID, 1:3) +}) + +# ---- .parse_sequences counts handling ------------------------------------- + +test_that(".parse_sequences handles NA counts", { + df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + counts = c(5, NA), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.parse_sequences(df, verbose = FALSE) + expect_true(result$count.info) + # NA counts should be replaced with 1 + expect_equal(as.numeric(result$sequences$counts[2]), 1) +}) + +# ---- .parse_sequences carries forward extra columns ----------------------- + +test_that(".parse_sequences carries forward additional columns", { + df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + patient = c("P1", "P2"), + custom_col = c("A", "B"), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.parse_sequences(df, verbose = FALSE) + expect_true("custom_col" %in% colnames(result$sequences)) +}) + +# ---- .standardize_colnames additional mappings ------------------------------ + +test_that(".standardize_colnames maps HLA and counts alternatives", { + df <- data.frame( + cdr3_aa = c("CASSLAPGATNEKLFF"), + v_call = c("TRBV5-1"), + donor = c("P1"), + hla = c("A*02:01"), + clone_count = c(5), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.standardize_colnames(df) + expect_true("CDR3b" %in% colnames(result)) + expect_true("TRBV" %in% colnames(result)) + expect_true("patient" %in% colnames(result)) + expect_true("HLA" %in% colnames(result)) + expect_true("counts" %in% colnames(result)) +}) + +test_that(".standardize_colnames handles single-column data frame without CDR3b", { + df <- data.frame( + some_random_name = c("CASSLAPGATNEKLFF"), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.standardize_colnames(df) + expect_equal(colnames(result), "CDR3b") +}) + +# ---- .extract_input data frame standardization ---------------------------- + +test_that(".extract_input standardizes alternative column names", { + df <- data.frame( + cdr3 = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + v_gene = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.extract_input(df) + expect_true("CDR3b" %in% colnames(result)) + expect_true("TRBV" %in% colnames(result)) +}) diff --git a/tests/testthat/test-getRandomSubsample.R b/tests/testthat/test-getRandomSubsample.R index 0468113..56e76cc 100644 --- a/tests/testthat/test-getRandomSubsample.R +++ b/tests/testthat/test-getRandomSubsample.R @@ -18,7 +18,6 @@ test_that("getRandomSubsample returns correct length without stratification", { ) expect_equal(length(result), length(sample_motifs)) - # All values should come from the reference expect_true(all(result %in% ref_motifs)) }) @@ -78,3 +77,103 @@ test_that("getRandomSubsample with V-gene stratification returns correct length" expect_equal(length(result), length(sample_motifs)) expect_true(all(result %in% ref_motifs)) }) + +test_that("getRandomSubsample with both CDR3 and V-gene stratification", { + set.seed(42) + ref_motifs <- paste0("REF", seq_len(500)) + sample_motifs <- paste0("SAM", seq_len(20)) + + motif_lengths_list <- list("10" = 12, "12" = 8) + ref_motif_lengths_id_list <- list( + "10" = seq_len(250), + "12" = 251:500 + ) + motif_region_vgenes_list <- list("TRBV5-1" = 12, "TRBV6-2" = 8) + ref_motif_vgenes_id_list <- list( + "TRBV5-1" = seq_len(300), + "TRBV6-2" = 301:500 + ) + lengths_vgenes_list <- list( + "10" = list("TRBV5-1" = 7, "TRBV6-2" = 5), + "12" = list("TRBV5-1" = 5, "TRBV6-2" = 3) + ) + ref_lengths_vgenes_list <- list( + "10" = list("TRBV5-1" = seq_len(100), "TRBV6-2" = 101:200), + "12" = list("TRBV5-1" = 201:350, "TRBV6-2" = 351:500) + ) + + result <- getRandomSubsample( + cdr3_len_stratify = TRUE, + vgene_stratify = TRUE, + refseqs_motif_region = ref_motifs, + motif_region = sample_motifs, + motif_lengths_list = motif_lengths_list, + ref_motif_lengths_id_list = ref_motif_lengths_id_list, + motif_region_vgenes_list = motif_region_vgenes_list, + ref_motif_vgenes_id_list = ref_motif_vgenes_id_list, + ref_lengths_vgenes_list = ref_lengths_vgenes_list, + lengths_vgenes_list = lengths_vgenes_list + ) + + expect_equal(length(result), length(sample_motifs)) + expect_true(all(result %in% ref_motifs)) +}) + +test_that("getRandomSubsample CDR3 length stratification handles overflow", { + set.seed(42) + # More sample motifs of length 4 than ref has + ref_motifs <- c(rep("AABB", 3), rep("AABBCC", 300)) + sample_motifs <- c(rep("XXYY", 10), rep("XXYYCC", 5)) + + motif_lengths_list <- list("4" = 10, "6" = 5) + ref_motif_lengths_id_list <- list( + "4" = which(nchar(ref_motifs) == 4), + "6" = which(nchar(ref_motifs) == 6) + ) + + result <- getRandomSubsample( + cdr3_len_stratify = TRUE, + vgene_stratify = FALSE, + refseqs_motif_region = ref_motifs, + motif_region = sample_motifs, + motif_lengths_list = motif_lengths_list, + ref_motif_lengths_id_list = ref_motif_lengths_id_list, + motif_region_vgenes_list = list(), + ref_motif_vgenes_id_list = list(), + ref_lengths_vgenes_list = list(), + lengths_vgenes_list = list() + ) + + # Should still return correct total length even if some strata overflow + expect_equal(length(result), length(sample_motifs)) + expect_true(all(result %in% ref_motifs)) +}) + +test_that("getRandomSubsample V-gene stratification handles overflow", { + set.seed(42) + ref_motifs <- paste0("REF", seq_len(500)) + sample_motifs <- paste0("SAM", seq_len(20)) + + # More sample TRBV5-1 than ref has + motif_region_vgenes_list <- list("TRBV5-1" = 15, "TRBV6-2" = 5) + ref_motif_vgenes_id_list <- list( + "TRBV5-1" = seq_len(10), # only 10 available, need 15 + "TRBV6-2" = 11:500 + ) + + result <- getRandomSubsample( + cdr3_len_stratify = FALSE, + vgene_stratify = TRUE, + refseqs_motif_region = ref_motifs, + motif_region = sample_motifs, + motif_lengths_list = list(), + ref_motif_lengths_id_list = list(), + motif_region_vgenes_list = motif_region_vgenes_list, + ref_motif_vgenes_id_list = ref_motif_vgenes_id_list, + ref_lengths_vgenes_list = list(), + lengths_vgenes_list = list() + ) + + expect_equal(length(result), length(sample_motifs)) + expect_true(all(result %in% ref_motifs)) +}) diff --git a/tests/testthat/test-globalCutoff.R b/tests/testthat/test-globalCutoff.R index e601ff6..2be07f4 100644 --- a/tests/testthat/test-globalCutoff.R +++ b/tests/testthat/test-globalCutoff.R @@ -224,3 +224,119 @@ test_that(".global_cutoff not_in_global_ids tracks isolated sequences", { expect_type(result$not_in_global_ids, "integer") expect_true(length(result$not_in_global_ids) > 0) }) + +# ---- Verbose messaging ------------------------------------------------------- + +test_that(".global_cutoff_stringdist prints messages when verbose is TRUE", { + seqs <- c("CASSLAPGATNEKLFF", "CASSLDRGEVFF") + motif_region <- substr(seqs, 4, nchar(seqs) - 3) + sequences <- data.frame( + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + expect_message( + immGLIPH:::.global_cutoff_stringdist( + seqs = seqs, motif_region = motif_region, sequences = sequences, + gccutoff = 5, global_vgene = FALSE, no_cores = 1, verbose = TRUE + ), + "global" + ) +}) + +test_that(".global_cutoff prints verbose dispatch message", { + seqs <- c("CASSLAPGATNEKLFF", "CASSLDRGEVFF") + motif_region <- substr(seqs, 4, nchar(seqs) - 3) + sequences <- data.frame( + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + expect_message( + immGLIPH:::.global_cutoff( + seqs = seqs, motif_region = motif_region, sequences = sequences, + gccutoff = 5, global_vgene = FALSE, no_cores = 1, verbose = TRUE + ), + "global" + ) +}) + +# ---- immApex path ----------------------------------------------------------- + +test_that(".global_cutoff_immapex works when immApex available", { + skip_if_not_installed("immApex") + skip_if(!exists("buildNetwork", asNamespace("immApex")), + "immApex::buildNetwork not available") + + seqs <- c("CASSLAPGATNEKLFF", "CASSLAPRQTNEKLFF", + "CASSLDRGEVFF", "CASSLDRGQVFF") + motif_region <- substr(seqs, 4, nchar(seqs) - 3) + sequences <- data.frame( + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV5-1", "TRBV6-2", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.global_cutoff_immapex( + seqs = seqs, motif_region = motif_region, sequences = sequences, + gccutoff = 3, global_vgene = FALSE, verbose = FALSE + ) + + expect_type(result, "list") + expect_s3_class(result$edges, "data.frame") + expect_true(all(c("V1", "V2", "type") %in% colnames(result$edges))) +}) + +test_that(".global_cutoff_immapex with global_vgene restricts to same V-gene", { + skip_if_not_installed("immApex") + skip_if(!exists("buildNetwork", asNamespace("immApex")), + "immApex::buildNetwork not available") + + seqs <- c("CASSLAPGATNEKLFF", "CASSLAPRQTNEKLFF") + motif_region <- substr(seqs, 4, nchar(seqs) - 3) + + sequences_same <- data.frame( + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV5-1"), + stringsAsFactors = FALSE + ) + sequences_diff <- data.frame( + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + result_same <- immGLIPH:::.global_cutoff_immapex( + seqs = seqs, motif_region = motif_region, sequences = sequences_same, + gccutoff = 5, global_vgene = TRUE, verbose = FALSE + ) + result_diff <- immGLIPH:::.global_cutoff_immapex( + seqs = seqs, motif_region = motif_region, sequences = sequences_diff, + gccutoff = 5, global_vgene = TRUE, verbose = FALSE + ) + + expect_true(nrow(result_same$edges) >= nrow(result_diff$edges)) +}) + +# ---- High cutoff captures similar sequences -------------------------------- + +test_that(".global_cutoff_stringdist finds edges with high cutoff", { + seqs <- c("CASSLAPGATNEKLFF", "CASSLAPRQTNEKLFF") + motif_region <- substr(seqs, 4, nchar(seqs) - 3) + sequences <- data.frame( + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.global_cutoff_stringdist( + seqs = seqs, motif_region = motif_region, sequences = sequences, + gccutoff = 10, global_vgene = FALSE, no_cores = 1, verbose = FALSE + ) + + expect_true(nrow(result$edges) > 0) + expect_equal(result$edges$V1[1], seqs[1]) + expect_equal(result$edges$V2[1], seqs[2]) +}) diff --git a/tests/testthat/test-loadGLIPH.R b/tests/testthat/test-loadGLIPH.R index 3fa685c..ac751e5 100644 --- a/tests/testthat/test-loadGLIPH.R +++ b/tests/testthat/test-loadGLIPH.R @@ -1,5 +1,7 @@ # Tests for loadGLIPH() +# ---- Input validation ------------------------------------------------------- + test_that("loadGLIPH rejects non-character result_folder", { expect_error(loadGLIPH(result_folder = 123), "character") }) @@ -16,3 +18,157 @@ test_that("loadGLIPH rejects non-existent path", { expect_error(loadGLIPH(result_folder = "/tmp/nonexistent_gliph_path_xyz"), "does not exist") }) + +# ---- Functional tests: load saved GLIPH1 results ---------------------------- + +test_that("loadGLIPH loads gliph1 output correctly", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + tmp_dir <- file.path(tempdir(), paste0("gliph1_load_test_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + result_folder = tmp_dir, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + loaded <- loadGLIPH(result_folder = tmp_dir) + expect_type(loaded, "list") + expect_true("parameters" %in% names(loaded)) + expect_true("cluster_list" %in% names(loaded)) + expect_true("cluster_properties" %in% names(loaded)) + expect_true("connections" %in% names(loaded)) + expect_true("motif_enrichment" %in% names(loaded)) + + # Parameters should be loadable + expect_true("method" %in% names(loaded$parameters)) + expect_equal(loaded$parameters$method, "gliph1") + + # cluster_list should be a named list + if (!is.null(loaded$cluster_list)) { + expect_type(loaded$cluster_list, "list") + expect_true(length(names(loaded$cluster_list)) > 0) + } +}) + +# ---- Functional tests: load saved GLIPH2 results ---------------------------- + +test_that("loadGLIPH loads gliph2 output correctly", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + tmp_dir <- file.path(tempdir(), paste0("gliph2_load_test_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph2", + refdb_beta = ref_df, + result_folder = tmp_dir, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + loaded <- loadGLIPH(result_folder = tmp_dir) + expect_type(loaded, "list") + expect_true("parameters" %in% names(loaded)) + expect_true("cluster_list" %in% names(loaded)) + expect_true("cluster_properties" %in% names(loaded)) + expect_true("motif_enrichment" %in% names(loaded)) + + expect_equal(loaded$parameters$method, "gliph2") +}) + +# ---- Path normalization ------------------------------------------------------ + +test_that("loadGLIPH adds trailing slash if missing", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + tmp_dir <- file.path(tempdir(), paste0("gliph_slash_test_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + result_folder = tmp_dir, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + # Load without trailing slash - should still work + path_no_slash <- sub("/$", "", tmp_dir) + loaded <- loadGLIPH(result_folder = path_no_slash) + expect_type(loaded, "list") +}) + +test_that("loadGLIPH errors when parameter.txt is missing", { + tmp_dir <- file.path(tempdir(), paste0("gliph_empty_test_", Sys.getpid())) + dir.create(tmp_dir, showWarnings = FALSE) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + + expect_error(loadGLIPH(result_folder = tmp_dir), "missing") +}) diff --git a/tests/testthat/test-local-fisher.R b/tests/testthat/test-local-fisher.R index 8576b6f..3ffc66d 100644 --- a/tests/testthat/test-local-fisher.R +++ b/tests/testthat/test-local-fisher.R @@ -216,3 +216,132 @@ test_that(".local_fisher includes discontinuous motifs when enabled", { disc <- result$all_motifs[grep("\\.", result$all_motifs$motif), ] expect_true(nrow(disc) > 0) }) + +# ---- immApex path ----------------------------------------------------------- + +test_that(".local_fisher works via immApex when available", { + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = c(1, 1), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + expect_type(result, "list") + expect_s3_class(result$all_motifs, "data.frame") + expect_true(nrow(result$all_motifs) > 0) + expect_true(all(c("motif", "counts", "num_in_ref", "OvE", "p.value") %in% + colnames(result$all_motifs))) +}) + +test_that(".local_fisher immApex path with discontinuous motifs", { + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = c(0, 0), + discontinuous_motifs = TRUE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + disc <- result$all_motifs[grep("\\.", result$all_motifs$motif), ] + expect_true(nrow(disc) > 0) +}) + +# ---- Verbose messaging ------------------------------------------------------- + +test_that(".local_fisher prints messages when verbose is TRUE", { + d <- .make_fisher_data() + expect_message( + immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = c(1, 1), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = TRUE + ), + "motif" + ) +}) + +# ---- Single lcminove value --------------------------------------------------- + +test_that(".local_fisher works with single lcminove value for multiple motif_lengths", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3, 4), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = 1, + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + expect_type(result, "list") + expect_s3_class(result$all_motifs, "data.frame") +}) + +# ---- avgRef normalization ---------------------------------------------------- + +test_that(".local_fisher avgRef is normalized to sample set size", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = c(0, 0), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + expect_true(is.numeric(result$all_motifs$avgRef)) + expect_true(all(result$all_motifs$avgRef >= 0)) +}) diff --git a/tests/testthat/test-plotNetwork.R b/tests/testthat/test-plotNetwork.R index 1c38bbe..3e4c260 100644 --- a/tests/testthat/test-plotNetwork.R +++ b/tests/testthat/test-plotNetwork.R @@ -1,5 +1,7 @@ # Tests for plotNetwork() +# ---- Input validation ------------------------------------------------------- + test_that("plotNetwork rejects non-character result_folder", { expect_error(plotNetwork(result_folder = 123), "character") }) @@ -77,9 +79,37 @@ test_that("plotNetwork rejects non-character show_additional_columns", { ) }) -# ---- Functional tests ------------------------------------------------------- +test_that("plotNetwork rejects multiple size_info values", { + expect_error( + plotNetwork(clustering_output = list(), size_info = c("a", "b")), + "single column" + ) +}) + +test_that("plotNetwork rejects multiple color_info values", { + expect_error( + plotNetwork(clustering_output = list(), color_info = c("a", "b")), + "single column" + ) +}) + +test_that("plotNetwork rejects non-numeric n_cores", { + expect_error( + plotNetwork(clustering_output = list(), n_cores = "abc"), + "numeric" + ) +}) -test_that("plotNetwork produces visNetwork object from clustering output", { +test_that("plotNetwork rejects n_cores < 1", { + expect_error( + plotNetwork(clustering_output = list(), n_cores = 0), + "at least 1" + ) +}) + +# ---- Functional tests with GLIPH1 output ------------------------------------ + +test_that("plotNetwork produces visNetwork from gliph_version=1 output", { skip_on_cran() skip_if_not_installed("immApex") skip_if(!exists("calculateMotif", asNamespace("immApex")), @@ -110,20 +140,18 @@ test_that("plotNetwork produces visNetwork object from clustering output", { verbose = FALSE ) - # Only run if there are clusters large enough if (!is.null(res$cluster_properties) && - any(res$cluster_properties$cluster_size >= 2)) { + any(as.numeric(res$cluster_properties$cluster_size) >= 2)) { plot_obj <- plotNetwork( clustering_output = res, cluster_min_size = 2, n_cores = 1 ) - expect_s3_class(plot_obj, "visNetwork") } }) -test_that("plotNetwork with color_info = 'none' works", { +test_that("plotNetwork works with color_info = 'none'", { skip_on_cran() skip_if_not_installed("immApex") skip_if(!exists("calculateMotif", asNamespace("immApex")), @@ -155,14 +183,336 @@ test_that("plotNetwork with color_info = 'none' works", { ) if (!is.null(res$cluster_properties) && - any(res$cluster_properties$cluster_size >= 2)) { + any(as.numeric(res$cluster_properties$cluster_size) >= 2)) { plot_obj <- plotNetwork( clustering_output = res, color_info = "none", cluster_min_size = 2, n_cores = 1 ) + expect_s3_class(plot_obj, "visNetwork") + } +}) + +# ---- Functional tests with GLIPH2 output ------------------------------------ + +test_that("plotNetwork produces visNetwork from gliph_version=2 output", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph2", + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + if (!is.null(res$cluster_properties) && + any(as.numeric(res$cluster_properties$cluster_size) >= 2)) { + plot_obj <- plotNetwork( + clustering_output = res, + cluster_min_size = 2, + n_cores = 1 + ) expect_s3_class(plot_obj, "visNetwork") } }) + +# ---- plotNetwork with size_info and various coloring ------------------------- + +test_that("plotNetwork handles size_info parameter", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + if (!is.null(res$cluster_properties) && + any(as.numeric(res$cluster_properties$cluster_size) >= 2)) { + # Test with size_info = "cluster_size" + plot_obj <- plotNetwork( + clustering_output = res, + size_info = "cluster_size", + absolute_size = FALSE, + cluster_min_size = 2, + n_cores = 1 + ) + expect_s3_class(plot_obj, "visNetwork") + + # Test with absolute_size = TRUE + plot_obj2 <- plotNetwork( + clustering_output = res, + size_info = "cluster_size", + absolute_size = TRUE, + cluster_min_size = 2, + n_cores = 1 + ) + expect_s3_class(plot_obj2, "visNetwork") + } +}) + +test_that("plotNetwork errors for non-existent size_info column", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + if (!is.null(res$cluster_properties) && + any(as.numeric(res$cluster_properties$cluster_size) >= 2)) { + expect_error( + plotNetwork( + clustering_output = res, + size_info = "nonexistent_column", + cluster_min_size = 2, + n_cores = 1 + ), + "not found" + ) + } +}) + +test_that("plotNetwork errors for non-existent color_info column", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + if (!is.null(res$cluster_properties) && + any(as.numeric(res$cluster_properties$cluster_size) >= 2)) { + expect_error( + plotNetwork( + clustering_output = res, + color_info = "nonexistent_column", + cluster_min_size = 2, + n_cores = 1 + ), + "not found" + ) + } +}) + +test_that("plotNetwork with categorical color_info", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + if (!is.null(res$cluster_properties) && + any(as.numeric(res$cluster_properties$cluster_size) >= 2)) { + # Use TRBV as categorical color_info + if ("TRBV" %in% colnames(res$cluster_list[[1]])) { + plot_obj <- plotNetwork( + clustering_output = res, + color_info = "TRBV", + cluster_min_size = 2, + n_cores = 1 + ) + expect_s3_class(plot_obj, "visNetwork") + } + } +}) + +test_that("plotNetwork with show_additional_columns", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + if (!is.null(res$cluster_properties) && + any(as.numeric(res$cluster_properties$cluster_size) >= 2)) { + plot_obj <- plotNetwork( + clustering_output = res, + show_additional_columns = c("TRBV"), + cluster_min_size = 2, + n_cores = 1 + ) + expect_s3_class(plot_obj, "visNetwork") + } +}) + +test_that("plotNetwork errors when no clusters meet cluster_min_size", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + if (!is.null(res$cluster_properties)) { + expect_error( + plotNetwork( + clustering_output = res, + cluster_min_size = 10000, + n_cores = 1 + ), + "does not contain any clusters" + ) + } +}) diff --git a/tests/testthat/test-utils-output.R b/tests/testthat/test-utils-output.R new file mode 100644 index 0000000..faf221e --- /dev/null +++ b/tests/testthat/test-utils-output.R @@ -0,0 +1,136 @@ +# Tests for utility functions in utils-output.R + +# ---- .coerce_numeric_cols --------------------------------------------------- + +test_that(".coerce_numeric_cols converts character columns to numeric", { + df <- data.frame( + a = c("1", "2", "3"), + b = c("hello", "world", "foo"), + c = c("1.5", "2.5", "3.5"), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.coerce_numeric_cols(df) + expect_true(is.numeric(result$a)) + expect_true(is.character(result$b)) + expect_true(is.numeric(result$c)) +}) + +test_that(".coerce_numeric_cols returns non-data.frame input unchanged", { + expect_equal(immGLIPH:::.coerce_numeric_cols("hello"), "hello") + expect_equal(immGLIPH:::.coerce_numeric_cols(42), 42) + expect_null(immGLIPH:::.coerce_numeric_cols(NULL)) +}) + +test_that(".coerce_numeric_cols handles mixed numeric/character columns", { + df <- data.frame( + a = c("1", "abc", "3"), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.coerce_numeric_cols(df) + # Should remain character because "abc" can't be coerced + expect_true(is.character(result$a)) +}) + +# ---- .check_existing_files -------------------------------------------------- + +test_that(".check_existing_files returns TRUE when no files exist", { + tmp_dir <- file.path(tempdir(), paste0("check_test_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + dir.create(tmp_dir, showWarnings = FALSE) + + result <- immGLIPH:::.check_existing_files( + paste0(tmp_dir, "/"), + c("file1.txt", "file2.txt") + ) + expect_true(result) +}) + +test_that(".check_existing_files returns FALSE and warns when files exist", { + tmp_dir <- file.path(tempdir(), paste0("check_test2_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + dir.create(tmp_dir, showWarnings = FALSE) + + writeLines("test", file.path(tmp_dir, "existing.txt")) + + expect_warning( + result <- immGLIPH:::.check_existing_files( + paste0(tmp_dir, "/"), + c("existing.txt", "other.txt") + ), + "already exists" + ) + expect_false(result) +}) + +# ---- .prepare_result_folder ------------------------------------------------- + +test_that(".prepare_result_folder appends trailing slash", { + tmp_dir <- file.path(tempdir(), paste0("prep_test_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + + result <- immGLIPH:::.prepare_result_folder(tmp_dir) + expect_equal(substr(result, nchar(result), nchar(result)), "/") + expect_true(dir.exists(result)) +}) + +test_that(".prepare_result_folder returns empty string for empty input", { + result <- immGLIPH:::.prepare_result_folder("") + expect_equal(result, "") +}) + +test_that(".prepare_result_folder errors on non-character input", { + expect_error( + immGLIPH:::.prepare_result_folder(123), + "character" + ) +}) + +test_that(".prepare_result_folder errors on multiple paths", { + expect_error( + immGLIPH:::.prepare_result_folder(c("a", "b")), + "single path" + ) +}) + +test_that(".prepare_result_folder preserves existing trailing slash", { + tmp_dir <- file.path(tempdir(), paste0("prep_test2_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + + result <- immGLIPH:::.prepare_result_folder(paste0(tmp_dir, "/")) + expect_equal(substr(result, nchar(result), nchar(result)), "/") +}) + +test_that(".prepare_result_folder creates nested directories", { + tmp_dir <- file.path(tempdir(), paste0("prep_nest_", Sys.getpid()), + "sub1", "sub2") + on.exit(unlink(file.path(tempdir(), paste0("prep_nest_", Sys.getpid())), + recursive = TRUE), add = TRUE) + + result <- immGLIPH:::.prepare_result_folder(tmp_dir) + expect_true(dir.exists(result)) +}) + +# ---- .save_parameters ------------------------------------------------------- + +test_that(".save_parameters writes parameter file", { + tmp_dir <- file.path(tempdir(), paste0("save_params_", Sys.getpid())) + on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE) + dir.create(tmp_dir, showWarnings = FALSE) + + params <- list( + method = "gliph2", + sim_depth = 1000, + motif_length = c(2, 3, 4) + ) + + immGLIPH:::.save_parameters(params, paste0(tmp_dir, "/")) + + param_file <- file.path(tmp_dir, "parameter.txt") + expect_true(file.exists(param_file)) + + content <- readLines(param_file) + expect_true(any(grepl("method", content))) + expect_true(any(grepl("gliph2", content))) + expect_true(any(grepl("motif_length", content))) + expect_true(any(grepl("2,3,4", content))) +}) diff --git a/tests/testthat/test-utils-reference.R b/tests/testthat/test-utils-reference.R index 10b6d2b..e26ae5b 100644 --- a/tests/testthat/test-utils-reference.R +++ b/tests/testthat/test-utils-reference.R @@ -163,3 +163,131 @@ test_that(".load_reference applies accept_CF filtering", { ) expect_true(length(result_no_cf$refseqs) >= length(result_cf$refseqs)) }) + +test_that(".load_reference with global_vgene uses TRBV column", { + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + global_vgene = TRUE, + verbose = FALSE + ) + + expect_type(result, "list") + expect_true(length(result$refseqs) > 0) +}) + +test_that(".load_reference errors when global_vgene but no V-gene column", { + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + stringsAsFactors = FALSE + ) + + expect_error( + immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + global_vgene = TRUE, + verbose = FALSE + ), + "V-gene" + ) +}) + +test_that(".load_reference filters invalid amino acid sequences", { + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "C123INVALID456F", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = FALSE, + min_seq_length = 8, + verbose = FALSE + ) + + # The invalid sequence should have been removed + expect_true(length(result$refseqs) == 2) +}) + +test_that(".load_reference errors when all sequences are below min_seq_length", { + ref_df <- data.frame( + CDR3b = c("CASF", "CASF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + expect_error( + immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + verbose = FALSE + ), + "min_seq_length" + ) +}) + +test_that(".load_reference with structboundaries FALSE", { + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + structboundaries = FALSE, + boundary_size = 3, + verbose = FALSE + ) + + # Without boundary trimming, motif region should equal full sequence + expect_equal(result$refseqs_motif, result$refseqs) +}) + +test_that(".load_reference uses second column as TRBV when not named", { + ref_df <- data.frame( + seq = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + vgene = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + global_vgene = TRUE, + verbose = FALSE + ) + + expect_true(length(result$refseqs) > 0) +}) + +test_that(".load_reference adds empty TRBV when single column provided", { + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.load_reference( + refdb_beta = ref_df, + accept_CF = TRUE, + min_seq_length = 8, + verbose = FALSE + ) + + expect_s3_class(result$refseqs_df, "data.frame") + expect_true("TRBV" %in% colnames(result$refseqs_df)) +}) diff --git a/tests/testthat/test-utils-validate.R b/tests/testthat/test-utils-validate.R new file mode 100644 index 0000000..b3969b9 --- /dev/null +++ b/tests/testthat/test-utils-validate.R @@ -0,0 +1,337 @@ +# Tests for .validate_params() in utils-validate.R + +# ---- Valid defaults --------------------------------------------------------- + +test_that(".validate_params returns list with all expected elements", { + result <- immGLIPH:::.validate_params() + expect_type(result, "list") + expected_names <- c("refdb_beta", "v_usage_freq", "cdr3_length_freq", + "ref_cluster_size", "sim_depth", "lcminp", "lcminove", + "kmer_mindepth", "accept_CF", "min_seq_length", + "gccutoff", "structboundaries", "boundary_size", + "motif_length", "local_similarities", + "global_similarities", "cluster_min_size", + "hla_cutoff", "n_cores", "motif_distance_cutoff", + "discontinuous_motifs", "all_aa_interchangeable", + "boost_local_significance") + expect_true(all(expected_names %in% names(result))) +}) + +# ---- refdb_beta validation -------------------------------------------------- + +test_that(".validate_params rejects invalid refdb_beta name", { + expect_error( + immGLIPH:::.validate_params(refdb_beta = "invalid_name"), + "refdb_beta" + ) +}) + +test_that(".validate_params rejects non-character, non-data.frame refdb_beta", { + expect_error( + immGLIPH:::.validate_params(refdb_beta = 42), + "refdb_beta" + ) +}) + +test_that(".validate_params accepts valid refdb_beta names", { + result <- immGLIPH:::.validate_params(refdb_beta = "human_v2.0_CD48") + expect_equal(result$refdb_beta, "human_v2.0_CD48") +}) + +test_that(".validate_params accepts data frame refdb_beta", { + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF"), + TRBV = c("TRBV5-1"), + stringsAsFactors = FALSE + ) + result <- immGLIPH:::.validate_params(refdb_beta = ref_df) + expect_s3_class(result$refdb_beta, "data.frame") +}) + +# ---- v_usage_freq validation ------------------------------------------------ + +test_that(".validate_params rejects invalid v_usage_freq", { + expect_error( + immGLIPH:::.validate_params(v_usage_freq = "not_a_df"), + "v_usage_freq" + ) +}) + +test_that(".validate_params rejects v_usage_freq with non-numeric frequencies", { + bad_freq <- data.frame(vgene = "TRBV5-1", freq = "abc", + stringsAsFactors = FALSE) + expect_error( + immGLIPH:::.validate_params(v_usage_freq = bad_freq), + "numeric" + ) +}) + +# ---- cdr3_length_freq validation ------------------------------------------- + +test_that(".validate_params rejects invalid cdr3_length_freq", { + expect_error( + immGLIPH:::.validate_params(cdr3_length_freq = "not_a_df"), + "cdr3_length_freq" + ) +}) + +test_that(".validate_params rejects cdr3_length_freq with non-numeric frequencies", { + bad_freq <- data.frame(length = "10", freq = "abc", + stringsAsFactors = FALSE) + expect_error( + immGLIPH:::.validate_params(cdr3_length_freq = bad_freq), + "numeric" + ) +}) + +# ---- ref_cluster_size validation ------------------------------------------- + +test_that(".validate_params rejects invalid ref_cluster_size", { + expect_error( + immGLIPH:::.validate_params(ref_cluster_size = "invalid"), + "ref_cluster_size" + ) +}) + +# ---- sim_depth validation -------------------------------------------------- + +test_that(".validate_params rejects non-numeric sim_depth", { + expect_error( + immGLIPH:::.validate_params(sim_depth = "abc"), + "sim_depth" + ) +}) + +test_that(".validate_params rejects sim_depth < 1", { + expect_error( + immGLIPH:::.validate_params(sim_depth = 0), + "sim_depth" + ) +}) + +test_that(".validate_params rounds sim_depth", { + result <- immGLIPH:::.validate_params(sim_depth = 5.7) + expect_equal(result$sim_depth, 6) +}) + +# ---- lcminp validation ---------------------------------------------------- + +test_that(".validate_params rejects non-numeric lcminp", { + expect_error( + immGLIPH:::.validate_params(lcminp = "abc"), + "lcminp" + ) +}) + +test_that(".validate_params rejects lcminp <= 0", { + expect_error( + immGLIPH:::.validate_params(lcminp = 0), + "lcminp" + ) +}) + +# ---- lcminove validation --------------------------------------------------- + +test_that(".validate_params rejects non-numeric lcminove", { + expect_error( + immGLIPH:::.validate_params(lcminove = "abc"), + "lcminove" + ) +}) + +test_that(".validate_params rejects lcminove length mismatch", { + expect_error( + immGLIPH:::.validate_params(lcminove = c(10, 20), motif_length = c(2, 3, 4)), + "lcminove" + ) +}) + +test_that(".validate_params rejects lcminove values < 1", { + expect_error( + immGLIPH:::.validate_params(lcminove = c(0.5, 1, 1)), + "lcminove" + ) +}) + +# ---- kmer_mindepth validation ----------------------------------------------- + +test_that(".validate_params rejects non-numeric kmer_mindepth", { + expect_error( + immGLIPH:::.validate_params(kmer_mindepth = "abc"), + "kmer_mindepth" + ) +}) + +test_that(".validate_params rejects kmer_mindepth < 1", { + expect_error( + immGLIPH:::.validate_params(kmer_mindepth = 0), + "kmer_mindepth" + ) +}) + +# ---- accept_CF validation --------------------------------------------------- + +test_that(".validate_params rejects non-logical accept_CF", { + expect_error( + immGLIPH:::.validate_params(accept_CF = "yes"), + "logical" + ) +}) + +# ---- min_seq_length validation ----------------------------------------------- + +test_that(".validate_params rejects non-numeric min_seq_length", { + expect_error( + immGLIPH:::.validate_params(min_seq_length = "abc"), + "min_seq_length" + ) +}) + +test_that(".validate_params adjusts min_seq_length with structboundaries", { + result <- immGLIPH:::.validate_params( + structboundaries = TRUE, + boundary_size = 3, + min_seq_length = 1 + ) + # min_seq_length should be at least 2 * boundary_size + 1 = 7 + expect_true(result$min_seq_length >= 7) +}) + +# ---- structboundaries / boundary_size validation ---------------------------- + +test_that(".validate_params rejects non-logical structboundaries", { + expect_error( + immGLIPH:::.validate_params(structboundaries = "yes"), + "logical" + ) +}) + +test_that(".validate_params rejects negative boundary_size", { + expect_error( + immGLIPH:::.validate_params(boundary_size = -1), + "boundary_size" + ) +}) + +# ---- motif_length validation ------------------------------------------------ + +test_that(".validate_params rejects motif_length with values < 1", { + expect_error( + immGLIPH:::.validate_params(motif_length = c(0, 2)), + "motif_length" + ) +}) + +# ---- similarities validation ------------------------------------------------ + +test_that(".validate_params rejects non-logical local_similarities", { + expect_error( + immGLIPH:::.validate_params(local_similarities = "yes"), + "logical" + ) +}) + +test_that(".validate_params rejects non-logical global_similarities", { + expect_error( + immGLIPH:::.validate_params(global_similarities = "yes"), + "logical" + ) +}) + +test_that(".validate_params rejects both similarities as FALSE", { + expect_error( + immGLIPH:::.validate_params(local_similarities = FALSE, + global_similarities = FALSE), + "At least one" + ) +}) + +# ---- gccutoff validation ---------------------------------------------------- + +test_that(".validate_params rejects invalid gccutoff", { + expect_error( + immGLIPH:::.validate_params(gccutoff = "abc"), + "gccutoff" + ) + expect_error( + immGLIPH:::.validate_params(gccutoff = -1), + "gccutoff" + ) +}) + +test_that(".validate_params accepts NULL gccutoff", { + result <- immGLIPH:::.validate_params(gccutoff = NULL) + expect_null(result$gccutoff) +}) + +# ---- cluster_min_size validation -------------------------------------------- + +test_that(".validate_params rejects cluster_min_size < 1", { + expect_error( + immGLIPH:::.validate_params(cluster_min_size = 0), + "cluster_min_size" + ) +}) + +# ---- hla_cutoff validation -------------------------------------------------- + +test_that(".validate_params rejects hla_cutoff outside [0, 1]", { + expect_error( + immGLIPH:::.validate_params(hla_cutoff = 1.5), + "hla_cutoff" + ) + expect_error( + immGLIPH:::.validate_params(hla_cutoff = -0.1), + "hla_cutoff" + ) +}) + +# ---- n_cores validation ----------------------------------------------------- + +test_that(".validate_params rejects n_cores < 1", { + expect_error( + immGLIPH:::.validate_params(n_cores = 0), + "n_cores" + ) +}) + +test_that(".validate_params accepts NULL n_cores", { + result <- immGLIPH:::.validate_params(n_cores = NULL) + expect_null(result$n_cores) +}) + +# ---- motif_distance_cutoff validation ---------------------------------------- + +test_that(".validate_params rejects non-numeric motif_distance_cutoff", { + expect_error( + immGLIPH:::.validate_params(motif_distance_cutoff = "abc"), + "motif_distance_cutoff" + ) +}) + +# ---- discontinuous_motifs validation ---------------------------------------- + +test_that(".validate_params rejects non-logical discontinuous_motifs", { + expect_error( + immGLIPH:::.validate_params(discontinuous_motifs = "yes"), + "logical" + ) +}) + +# ---- all_aa_interchangeable validation -------------------------------------- + +test_that(".validate_params rejects non-logical all_aa_interchangeable", { + expect_error( + immGLIPH:::.validate_params(all_aa_interchangeable = "yes"), + "logical" + ) +}) + +# ---- boost_local_significance validation ------------------------------------ + +test_that(".validate_params rejects non-logical boost_local_significance", { + expect_error( + immGLIPH:::.validate_params(boost_local_significance = "yes"), + "logical" + ) +})