BorchLab
diff --git a/‎DESCRIPTION‎
Lines changed: 0 additions & 6 deletions b/‎DESCRIPTION‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 2 additions & 27 deletions b/‎NAMESPACE‎
Lines changed: 2 additions & 27 deletions
diff --git a/‎R/calculateAuc.R‎
Lines changed: 33 additions & 59 deletions b/‎R/calculateAuc.R‎
Lines changed: 33 additions & 59 deletions
@@ -14,20 +14,14 @@ biocViews: Software, Classification, Annotation, Sequencing
 Depends: 
 	R (>= 4.0)
 Imports: 
-	dplyr,
 	ggplot2,
 	grDevices,
 	readxl,
-	stringr,
-	tidyr,
 	data.table,
 	patchwork,
 	directlabels,
 	keras3,
 	pracma,
-	purrr,
-	magrittr,
-	tibble,
 	treemapify,
 	immReferent
 Suggests: 
 
@@ -24,27 +24,10 @@ importFrom(data.table,fifelse)
 importFrom(data.table,rbindlist)
 importFrom(data.table,set)
 importFrom(data.table,setcolorder)
+importFrom(data.table,setnames)
 importFrom(data.table,setorder)
 importFrom(directlabels,geom_dl)
 importFrom(directlabels,last.points)
-importFrom(dplyr,all_of)
-importFrom(dplyr,arrange)
-importFrom(dplyr,distinct)
-importFrom(dplyr,filter)
-importFrom(dplyr,group_by)
-importFrom(dplyr,if_else)
-importFrom(dplyr,left_join)
-importFrom(dplyr,mutate)
-importFrom(dplyr,n)
-importFrom(dplyr,pull)
-importFrom(dplyr,relocate)
-importFrom(dplyr,rename)
-importFrom(dplyr,row_number)
-importFrom(dplyr,select)
-importFrom(dplyr,slice_max)
-importFrom(dplyr,summarise)
-importFrom(dplyr,summarize)
-importFrom(dplyr,ungroup)
 importFrom(ggplot2,"%+replace%")
 importFrom(ggplot2,aes)
 importFrom(ggplot2,coord_flip)
@@ -68,20 +51,12 @@ importFrom(ggplot2,ylab)
 importFrom(ggplot2,ylim)
 importFrom(grDevices,hcl.colors)
 importFrom(immReferent,getIMGT)
-importFrom(magrittr,"%>%")
 importFrom(patchwork,plot_layout)
 importFrom(pracma,trapz)
-importFrom(purrr,map2_chr)
-importFrom(purrr,map_dfr)
 importFrom(readxl,read_excel)
+importFrom(stats,as.formula)
 importFrom(stats,mad)
 importFrom(stats,median)
-importFrom(stringr,str_c)
-importFrom(stringr,str_extract)
-importFrom(tibble,as_tibble)
-importFrom(tidyr,separate_longer_delim)
-importFrom(tidyr,unnest)
-importFrom(tidyr,unnest_longer)
 importFrom(treemapify,geom_treemap)
 importFrom(treemapify,geom_treemap_subgroup_border)
 importFrom(treemapify,geom_treemap_subgroup_text)
 
@@ -35,10 +35,6 @@
 #'   will contain columns for the feature (`eplet`, `creg`, `serology`), `AUC`,
 #'   `norm_AUC`, `total_count`, and `loci`.
 #'
-#' @importFrom dplyr filter mutate select arrange group_by ungroup summarise rename all_of
-#'   relocate left_join n pull slice_max
-#' @importFrom tidyr unnest_longer separate_longer_delim
-#' @importFrom stringr str_extract str_c
 #' @importFrom ggplot2 ggplot aes geom_line xlim ylim labs scale_color_manual
 #' @importFrom directlabels geom_dl last.points
 #' @importFrom pracma trapz
@@ -65,28 +61,27 @@ calculateAUC <- function(result_file,
   if (tolower(analysis_type) == "eplet") {
     config <- list(
       feature_col = "eplet",
-      data = deepMatchR::deepMatchR_eplets,
+      data = data.table::as.data.table(deepMatchR::deepMatchR_eplets),
       evidence_level = evidence_level,
       top_eplets = top_eplets,
       default_group_by = "eplet"
     )
   } else if (tolower(analysis_type) == "creg") {
     config <- list(
       feature_col = "CREG",
-      data = deepMatchR::deepMatchR_cregs,
+      data = data.table::as.data.table(deepMatchR::deepMatchR_cregs),
       default_group_by = "CREG"
     )
   } else if (tolower(analysis_type) == "serology") {
     config <- list(
       feature_col = "serology",
-      data = deepMatchR::deepMatchR_cregs,
+      data = data.table::as.data.table(deepMatchR::deepMatchR_cregs),
       default_group_by = "serology"
     )
   } else {
     stop("`analysis_type` must be one of 'eplet', 'creg' or 'serology'.")
   }
 
-  # Set default for group_by if not provided
   if (is.null(group_by)) {
     group_by <- config$default_group_by
   }
@@ -98,84 +93,63 @@ calculateAUC <- function(result_file,
     result0 <- result_file
   }
   .checkSAB(result0)
-  result <- .processSAB(result0)
+  result <- data.table::as.data.table(.processSAB(result0))
 
   # --- 3. Create combinations of alleles and MFI cutoffs ---
   cutoffs <- seq(cut_min, cut_max, cut_step)
-  class_alleles <- result %>% dplyr::select(allele, mfi_min)
+  class_alleles <- result[, .(allele, mfi_min)]
 
-  summary_df <- expand.grid(allele = class_alleles$allele, cut = cutoffs) |>
-    as_tibble() |>
-    left_join(class_alleles, by = "allele") |>
-    dplyr::filter(mfi_min > cut) |>
-    dplyr::select(allele, cut)
+  summary_dt <- data.table::CJ(allele = unique(class_alleles$allele), cut = cutoffs)
+  summary_dt <- merge(summary_dt, class_alleles, by = "allele", all.x = TRUE)
+  summary_dt <- summary_dt[mfi_min > cut, .(allele, cut)]
 
   # --- 4. Prepare feature dictionary (Eplet or CREG) ---
-  feature_data <- config$data[config$data$allele %in% class_alleles$allele, ]
+  feature_data <- config$data[allele %in% class_alleles$allele]
 
-  # Handle eplet-specific evidence level filter
   if (analysis_type == "eplet" && !is.null(config$evidence_level)) {
-    feature_data <- feature_data[feature_data[["evidence"]] %in% config$evidence_level, ]
+    feature_data <- feature_data[evidence %in% config$evidence_level]
     if(nrow(feature_data) == 0) {
       stop("`evidence_level` filtering criteria did not produce any results.")
     }
   }
 
-  # Per-feature bookkeeping (count occurrences)
-  feature_data <- feature_data |>
-    group_by(!!sym(config$feature_col), allele) |> mutate(count = n())    |> ungroup() |>
-    group_by(!!sym(config$feature_col))         |> mutate(subtotal = n()) |> ungroup()
+  feature_data[, count := .N, by = c(config[["feature_col"]], "allele")]
+  feature_data[, subtotal := .N, by = c(config[["feature_col"]])]
 
   # --- 5. Calculate proportion positive for each feature × cut-off pair ---
-  analysis_df <- summary_df |>
-    left_join(feature_data, by = "allele", relationship = "many-to-many") |>
-    mutate(loci = sub("\\*.*", "", allele)) |>
-    dplyr::filter(!is.na(cut)) |>
-    group_by(!!sym(config$feature_col), cut) |>
-    mutate(
-      positive_count   = sum(count, na.rm = TRUE),
-      percent_positive = positive_count / subtotal
-    ) |>
-    group_by(!!sym(config$feature_col)) |>
-    mutate(pp_max = max(percent_positive, na.rm = TRUE)) |>
-    arrange(desc(subtotal), desc(percent_positive)) |>
-    ungroup()
+  analysis_dt <- merge(summary_dt, feature_data, by = "allele", allow.cartesian = TRUE)
+  analysis_dt[, loci := sub("\\*.*", "", allele)]
+  analysis_dt <- analysis_dt[!is.na(cut)]
+
+  analysis_dt[, positive_count := sum(count, na.rm = TRUE), by = c(config[["feature_col"]], "cut")]
+  analysis_dt[, percent_positive := positive_count / subtotal]
+  analysis_dt[, pp_max := max(percent_positive, na.rm = TRUE), by = c(config[["feature_col"]])]
+  data.table::setorder(analysis_dt, -subtotal, -percent_positive)
 
   # --- 6. Apply user filters ---
   if (!is.null(feature_filter))
-    analysis_df <- analysis_df |> dplyr::filter(subtotal >= feature_filter)
+    analysis_dt <- analysis_dt[subtotal >= feature_filter]
 
   if (!is.null(percPos_filter))
-    analysis_df <- analysis_df |> dplyr::filter(pp_max >= percPos_filter)
+    analysis_dt <- analysis_dt[pp_max >= percPos_filter]
 
-  # Collapse loci for labelling
-  analysis_df <- analysis_df |>
-    group_by(!!sym(config$feature_col)) |>
-    mutate(loci = paste0(unique(loci), collapse = "; ")) |>
-    ungroup()
+  analysis_dt[, loci := paste0(unique(loci), collapse = "; "), by = c(config[["feature_col"]])]
 
   # --- 7. Calculate AUC ---
-  feature_AUC <- analysis_df |>
-    group_by(!!sym(config$feature_col)) |>
-    summarise(
-      AUC         = trapz(cut, percent_positive),
-      norm_AUC    = AUC / cut_max,
-      total_count = unique(subtotal)[1],
-      loci        = paste0(unique(loci), collapse = "; ")
-    ) |>
-    ungroup()
+  feature_AUC <- analysis_dt[, .(
+    AUC = pracma::trapz(cut, percent_positive),
+    total_count = unique(subtotal)[1],
+    loci = paste0(unique(loci), collapse = "; ")
+  ), by = c(config[["feature_col"]])]
+  feature_AUC[, norm_AUC := AUC / cut_max]
 
-  # --- 8. Generate Plot or Return Tibble ---
+  # --- 8. Generate Plot or Return data.table ---
   if (plot_results) {
-    plot_data <- analysis_df
+    plot_data <- analysis_dt
 
-    # Handle eplet-specific `top_eplets` filter for plotting
     if (analysis_type == "eplet" && !is.null(config$top_eplets)) {
-      top_features_vec <- feature_AUC |>
-        slice_max(order_by = norm_AUC, n = config$top_eplets) |>
-        pull(!!sym(config$feature_col))
-      plot_data <- plot_data |>
-        dplyr::filter(!!sym(config$feature_col) %in% top_features_vec)
+      top_features_vec <- feature_AUC[order(-norm_AUC)][1:config$top_eplets, get(config$feature_col)]
+      plot_data <- plot_data[get(config$feature_col) %in% top_features_vec]
     }
 
     p <- ggplot(plot_data, aes(x = cut, y = percent_positive,