Skip to content

Commit 5db1def

Browse files
Merge pull request #11 from ncborcherding/feature/r-pkg-update
Refactor for efficiency, reduce dependencies, and expand documentation
2 parents e77a5d4 + 52cd3c3 commit 5db1def

File tree

11 files changed

+390
-375
lines changed

11 files changed

+390
-375
lines changed

DESCRIPTION

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,14 @@ biocViews: Software, Classification, Annotation, Sequencing
1414
Depends:
1515
R (>= 4.0)
1616
Imports:
17-
dplyr,
1817
ggplot2,
1918
grDevices,
2019
readxl,
21-
stringr,
22-
tidyr,
2320
data.table,
2421
patchwork,
2522
directlabels,
2623
keras3,
2724
pracma,
28-
purrr,
29-
magrittr,
30-
tibble,
3125
treemapify,
3226
immReferent
3327
Suggests:

NAMESPACE

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -24,27 +24,10 @@ importFrom(data.table,fifelse)
2424
importFrom(data.table,rbindlist)
2525
importFrom(data.table,set)
2626
importFrom(data.table,setcolorder)
27+
importFrom(data.table,setnames)
2728
importFrom(data.table,setorder)
2829
importFrom(directlabels,geom_dl)
2930
importFrom(directlabels,last.points)
30-
importFrom(dplyr,all_of)
31-
importFrom(dplyr,arrange)
32-
importFrom(dplyr,distinct)
33-
importFrom(dplyr,filter)
34-
importFrom(dplyr,group_by)
35-
importFrom(dplyr,if_else)
36-
importFrom(dplyr,left_join)
37-
importFrom(dplyr,mutate)
38-
importFrom(dplyr,n)
39-
importFrom(dplyr,pull)
40-
importFrom(dplyr,relocate)
41-
importFrom(dplyr,rename)
42-
importFrom(dplyr,row_number)
43-
importFrom(dplyr,select)
44-
importFrom(dplyr,slice_max)
45-
importFrom(dplyr,summarise)
46-
importFrom(dplyr,summarize)
47-
importFrom(dplyr,ungroup)
4831
importFrom(ggplot2,"%+replace%")
4932
importFrom(ggplot2,aes)
5033
importFrom(ggplot2,coord_flip)
@@ -68,20 +51,12 @@ importFrom(ggplot2,ylab)
6851
importFrom(ggplot2,ylim)
6952
importFrom(grDevices,hcl.colors)
7053
importFrom(immReferent,getIMGT)
71-
importFrom(magrittr,"%>%")
7254
importFrom(patchwork,plot_layout)
7355
importFrom(pracma,trapz)
74-
importFrom(purrr,map2_chr)
75-
importFrom(purrr,map_dfr)
7656
importFrom(readxl,read_excel)
57+
importFrom(stats,as.formula)
7758
importFrom(stats,mad)
7859
importFrom(stats,median)
79-
importFrom(stringr,str_c)
80-
importFrom(stringr,str_extract)
81-
importFrom(tibble,as_tibble)
82-
importFrom(tidyr,separate_longer_delim)
83-
importFrom(tidyr,unnest)
84-
importFrom(tidyr,unnest_longer)
8560
importFrom(treemapify,geom_treemap)
8661
importFrom(treemapify,geom_treemap_subgroup_border)
8762
importFrom(treemapify,geom_treemap_subgroup_text)

R/calculateAuc.R

Lines changed: 33 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,6 @@
3535
#' will contain columns for the feature (`eplet`, `creg`, `serology`), `AUC`,
3636
#' `norm_AUC`, `total_count`, and `loci`.
3737
#'
38-
#' @importFrom dplyr filter mutate select arrange group_by ungroup summarise rename all_of
39-
#' relocate left_join n pull slice_max
40-
#' @importFrom tidyr unnest_longer separate_longer_delim
41-
#' @importFrom stringr str_extract str_c
4238
#' @importFrom ggplot2 ggplot aes geom_line xlim ylim labs scale_color_manual
4339
#' @importFrom directlabels geom_dl last.points
4440
#' @importFrom pracma trapz
@@ -65,28 +61,27 @@ calculateAUC <- function(result_file,
6561
if (tolower(analysis_type) == "eplet") {
6662
config <- list(
6763
feature_col = "eplet",
68-
data = deepMatchR::deepMatchR_eplets,
64+
data = data.table::as.data.table(deepMatchR::deepMatchR_eplets),
6965
evidence_level = evidence_level,
7066
top_eplets = top_eplets,
7167
default_group_by = "eplet"
7268
)
7369
} else if (tolower(analysis_type) == "creg") {
7470
config <- list(
7571
feature_col = "CREG",
76-
data = deepMatchR::deepMatchR_cregs,
72+
data = data.table::as.data.table(deepMatchR::deepMatchR_cregs),
7773
default_group_by = "CREG"
7874
)
7975
} else if (tolower(analysis_type) == "serology") {
8076
config <- list(
8177
feature_col = "serology",
82-
data = deepMatchR::deepMatchR_cregs,
78+
data = data.table::as.data.table(deepMatchR::deepMatchR_cregs),
8379
default_group_by = "serology"
8480
)
8581
} else {
8682
stop("`analysis_type` must be one of 'eplet', 'creg' or 'serology'.")
8783
}
8884

89-
# Set default for group_by if not provided
9085
if (is.null(group_by)) {
9186
group_by <- config$default_group_by
9287
}
@@ -98,84 +93,63 @@ calculateAUC <- function(result_file,
9893
result0 <- result_file
9994
}
10095
.checkSAB(result0)
101-
result <- .processSAB(result0)
96+
result <- data.table::as.data.table(.processSAB(result0))
10297

10398
# --- 3. Create combinations of alleles and MFI cutoffs ---
10499
cutoffs <- seq(cut_min, cut_max, cut_step)
105-
class_alleles <- result %>% dplyr::select(allele, mfi_min)
100+
class_alleles <- result[, .(allele, mfi_min)]
106101

107-
summary_df <- expand.grid(allele = class_alleles$allele, cut = cutoffs) |>
108-
as_tibble() |>
109-
left_join(class_alleles, by = "allele") |>
110-
dplyr::filter(mfi_min > cut) |>
111-
dplyr::select(allele, cut)
102+
summary_dt <- data.table::CJ(allele = unique(class_alleles$allele), cut = cutoffs)
103+
summary_dt <- merge(summary_dt, class_alleles, by = "allele", all.x = TRUE)
104+
summary_dt <- summary_dt[mfi_min > cut, .(allele, cut)]
112105

113106
# --- 4. Prepare feature dictionary (Eplet or CREG) ---
114-
feature_data <- config$data[config$data$allele %in% class_alleles$allele, ]
107+
feature_data <- config$data[allele %in% class_alleles$allele]
115108

116-
# Handle eplet-specific evidence level filter
117109
if (analysis_type == "eplet" && !is.null(config$evidence_level)) {
118-
feature_data <- feature_data[feature_data[["evidence"]] %in% config$evidence_level, ]
110+
feature_data <- feature_data[evidence %in% config$evidence_level]
119111
if(nrow(feature_data) == 0) {
120112
stop("`evidence_level` filtering criteria did not produce any results.")
121113
}
122114
}
123115

124-
# Per-feature bookkeeping (count occurrences)
125-
feature_data <- feature_data |>
126-
group_by(!!sym(config$feature_col), allele) |> mutate(count = n()) |> ungroup() |>
127-
group_by(!!sym(config$feature_col)) |> mutate(subtotal = n()) |> ungroup()
116+
feature_data[, count := .N, by = c(config[["feature_col"]], "allele")]
117+
feature_data[, subtotal := .N, by = c(config[["feature_col"]])]
128118

129119
# --- 5. Calculate proportion positive for each feature × cut-off pair ---
130-
analysis_df <- summary_df |>
131-
left_join(feature_data, by = "allele", relationship = "many-to-many") |>
132-
mutate(loci = sub("\\*.*", "", allele)) |>
133-
dplyr::filter(!is.na(cut)) |>
134-
group_by(!!sym(config$feature_col), cut) |>
135-
mutate(
136-
positive_count = sum(count, na.rm = TRUE),
137-
percent_positive = positive_count / subtotal
138-
) |>
139-
group_by(!!sym(config$feature_col)) |>
140-
mutate(pp_max = max(percent_positive, na.rm = TRUE)) |>
141-
arrange(desc(subtotal), desc(percent_positive)) |>
142-
ungroup()
120+
analysis_dt <- merge(summary_dt, feature_data, by = "allele", allow.cartesian = TRUE)
121+
analysis_dt[, loci := sub("\\*.*", "", allele)]
122+
analysis_dt <- analysis_dt[!is.na(cut)]
123+
124+
analysis_dt[, positive_count := sum(count, na.rm = TRUE), by = c(config[["feature_col"]], "cut")]
125+
analysis_dt[, percent_positive := positive_count / subtotal]
126+
analysis_dt[, pp_max := max(percent_positive, na.rm = TRUE), by = c(config[["feature_col"]])]
127+
data.table::setorder(analysis_dt, -subtotal, -percent_positive)
143128

144129
# --- 6. Apply user filters ---
145130
if (!is.null(feature_filter))
146-
analysis_df <- analysis_df |> dplyr::filter(subtotal >= feature_filter)
131+
analysis_dt <- analysis_dt[subtotal >= feature_filter]
147132

148133
if (!is.null(percPos_filter))
149-
analysis_df <- analysis_df |> dplyr::filter(pp_max >= percPos_filter)
134+
analysis_dt <- analysis_dt[pp_max >= percPos_filter]
150135

151-
# Collapse loci for labelling
152-
analysis_df <- analysis_df |>
153-
group_by(!!sym(config$feature_col)) |>
154-
mutate(loci = paste0(unique(loci), collapse = "; ")) |>
155-
ungroup()
136+
analysis_dt[, loci := paste0(unique(loci), collapse = "; "), by = c(config[["feature_col"]])]
156137

157138
# --- 7. Calculate AUC ---
158-
feature_AUC <- analysis_df |>
159-
group_by(!!sym(config$feature_col)) |>
160-
summarise(
161-
AUC = trapz(cut, percent_positive),
162-
norm_AUC = AUC / cut_max,
163-
total_count = unique(subtotal)[1],
164-
loci = paste0(unique(loci), collapse = "; ")
165-
) |>
166-
ungroup()
139+
feature_AUC <- analysis_dt[, .(
140+
AUC = pracma::trapz(cut, percent_positive),
141+
total_count = unique(subtotal)[1],
142+
loci = paste0(unique(loci), collapse = "; ")
143+
), by = c(config[["feature_col"]])]
144+
feature_AUC[, norm_AUC := AUC / cut_max]
167145

168-
# --- 8. Generate Plot or Return Tibble ---
146+
# --- 8. Generate Plot or Return data.table ---
169147
if (plot_results) {
170-
plot_data <- analysis_df
148+
plot_data <- analysis_dt
171149

172-
# Handle eplet-specific `top_eplets` filter for plotting
173150
if (analysis_type == "eplet" && !is.null(config$top_eplets)) {
174-
top_features_vec <- feature_AUC |>
175-
slice_max(order_by = norm_AUC, n = config$top_eplets) |>
176-
pull(!!sym(config$feature_col))
177-
plot_data <- plot_data |>
178-
dplyr::filter(!!sym(config$feature_col) %in% top_features_vec)
151+
top_features_vec <- feature_AUC[order(-norm_AUC)][1:config$top_eplets, get(config$feature_col)]
152+
plot_data <- plot_data[get(config$feature_col) %in% top_features_vec]
179153
}
180154

181155
p <- ggplot(plot_data, aes(x = cut, y = percent_positive,

0 commit comments

Comments
 (0)