3535# ' will contain columns for the feature (`eplet`, `creg`, `serology`), `AUC`,
3636# ' `norm_AUC`, `total_count`, and `loci`.
3737# '
38- # ' @importFrom dplyr filter mutate select arrange group_by ungroup summarise rename all_of
39- # ' relocate left_join n pull slice_max
40- # ' @importFrom tidyr unnest_longer separate_longer_delim
41- # ' @importFrom stringr str_extract str_c
4238# ' @importFrom ggplot2 ggplot aes geom_line xlim ylim labs scale_color_manual
4339# ' @importFrom directlabels geom_dl last.points
4440# ' @importFrom pracma trapz
@@ -65,28 +61,27 @@ calculateAUC <- function(result_file,
6561 if (tolower(analysis_type ) == " eplet" ) {
6662 config <- list (
6763 feature_col = " eplet" ,
68- data = deepMatchR :: deepMatchR_eplets ,
64+ data = data.table :: as.data.table( deepMatchR :: deepMatchR_eplets ) ,
6965 evidence_level = evidence_level ,
7066 top_eplets = top_eplets ,
7167 default_group_by = " eplet"
7268 )
7369 } else if (tolower(analysis_type ) == " creg" ) {
7470 config <- list (
7571 feature_col = " CREG" ,
76- data = deepMatchR :: deepMatchR_cregs ,
72+ data = data.table :: as.data.table( deepMatchR :: deepMatchR_cregs ) ,
7773 default_group_by = " CREG"
7874 )
7975 } else if (tolower(analysis_type ) == " serology" ) {
8076 config <- list (
8177 feature_col = " serology" ,
82- data = deepMatchR :: deepMatchR_cregs ,
78+ data = data.table :: as.data.table( deepMatchR :: deepMatchR_cregs ) ,
8379 default_group_by = " serology"
8480 )
8581 } else {
8682 stop(" `analysis_type` must be one of 'eplet', 'creg' or 'serology'." )
8783 }
8884
89- # Set default for group_by if not provided
9085 if (is.null(group_by )) {
9186 group_by <- config $ default_group_by
9287 }
@@ -98,84 +93,63 @@ calculateAUC <- function(result_file,
9893 result0 <- result_file
9994 }
10095 .checkSAB(result0 )
101- result <- . processSAB(result0 )
96+ result <- data.table :: as.data.table(. processSAB(result0 ) )
10297
10398 # --- 3. Create combinations of alleles and MFI cutoffs ---
10499 cutoffs <- seq(cut_min , cut_max , cut_step )
105- class_alleles <- result % > % dplyr :: select (allele , mfi_min )
100+ class_alleles <- result [, . (allele , mfi_min )]
106101
107- summary_df <- expand.grid(allele = class_alleles $ allele , cut = cutoffs ) | >
108- as_tibble() | >
109- left_join(class_alleles , by = " allele" ) | >
110- dplyr :: filter(mfi_min > cut ) | >
111- dplyr :: select(allele , cut )
102+ summary_dt <- data.table :: CJ(allele = unique(class_alleles $ allele ), cut = cutoffs )
103+ summary_dt <- merge(summary_dt , class_alleles , by = " allele" , all.x = TRUE )
104+ summary_dt <- summary_dt [mfi_min > cut , .(allele , cut )]
112105
113106 # --- 4. Prepare feature dictionary (Eplet or CREG) ---
114- feature_data <- config $ data [config $ data $ allele %in% class_alleles $ allele , ]
107+ feature_data <- config $ data [allele %in% class_alleles $ allele ]
115108
116- # Handle eplet-specific evidence level filter
117109 if (analysis_type == " eplet" && ! is.null(config $ evidence_level )) {
118- feature_data <- feature_data [feature_data [[ " evidence" ]] %in% config $ evidence_level , ]
110+ feature_data <- feature_data [evidence %in% config $ evidence_level ]
119111 if (nrow(feature_data ) == 0 ) {
120112 stop(" `evidence_level` filtering criteria did not produce any results." )
121113 }
122114 }
123115
124- # Per-feature bookkeeping (count occurrences)
125- feature_data <- feature_data | >
126- group_by(!! sym(config $ feature_col ), allele ) | > mutate(count = n()) | > ungroup() | >
127- group_by(!! sym(config $ feature_col )) | > mutate(subtotal = n()) | > ungroup()
116+ feature_data [, count : = .N , by = c(config [[" feature_col" ]], " allele" )]
117+ feature_data [, subtotal : = .N , by = c(config [[" feature_col" ]])]
128118
129119 # --- 5. Calculate proportion positive for each feature × cut-off pair ---
130- analysis_df <- summary_df | >
131- left_join(feature_data , by = " allele" , relationship = " many-to-many" ) | >
132- mutate(loci = sub(" \\ *.*" , " " , allele )) | >
133- dplyr :: filter(! is.na(cut )) | >
134- group_by(!! sym(config $ feature_col ), cut ) | >
135- mutate(
136- positive_count = sum(count , na.rm = TRUE ),
137- percent_positive = positive_count / subtotal
138- ) | >
139- group_by(!! sym(config $ feature_col )) | >
140- mutate(pp_max = max(percent_positive , na.rm = TRUE )) | >
141- arrange(desc(subtotal ), desc(percent_positive )) | >
142- ungroup()
120+ analysis_dt <- merge(summary_dt , feature_data , by = " allele" , allow.cartesian = TRUE )
121+ analysis_dt [, loci : = sub(" \\ *.*" , " " , allele )]
122+ analysis_dt <- analysis_dt [! is.na(cut )]
123+
124+ analysis_dt [, positive_count : = sum(count , na.rm = TRUE ), by = c(config [[" feature_col" ]], " cut" )]
125+ analysis_dt [, percent_positive : = positive_count / subtotal ]
126+ analysis_dt [, pp_max : = max(percent_positive , na.rm = TRUE ), by = c(config [[" feature_col" ]])]
127+ data.table :: setorder(analysis_dt , - subtotal , - percent_positive )
143128
144129 # --- 6. Apply user filters ---
145130 if (! is.null(feature_filter ))
146- analysis_df <- analysis_df | > dplyr :: filter( subtotal > = feature_filter )
131+ analysis_dt <- analysis_dt [ subtotal > = feature_filter ]
147132
148133 if (! is.null(percPos_filter ))
149- analysis_df <- analysis_df | > dplyr :: filter( pp_max > = percPos_filter )
134+ analysis_dt <- analysis_dt [ pp_max > = percPos_filter ]
150135
151- # Collapse loci for labelling
152- analysis_df <- analysis_df | >
153- group_by(!! sym(config $ feature_col )) | >
154- mutate(loci = paste0(unique(loci ), collapse = " ; " )) | >
155- ungroup()
136+ analysis_dt [, loci : = paste0(unique(loci ), collapse = " ; " ), by = c(config [[" feature_col" ]])]
156137
157138 # --- 7. Calculate AUC ---
158- feature_AUC <- analysis_df | >
159- group_by(!! sym(config $ feature_col )) | >
160- summarise(
161- AUC = trapz(cut , percent_positive ),
162- norm_AUC = AUC / cut_max ,
163- total_count = unique(subtotal )[1 ],
164- loci = paste0(unique(loci ), collapse = " ; " )
165- ) | >
166- ungroup()
139+ feature_AUC <- analysis_dt [, .(
140+ AUC = pracma :: trapz(cut , percent_positive ),
141+ total_count = unique(subtotal )[1 ],
142+ loci = paste0(unique(loci ), collapse = " ; " )
143+ ), by = c(config [[" feature_col" ]])]
144+ feature_AUC [, norm_AUC : = AUC / cut_max ]
167145
168- # --- 8. Generate Plot or Return Tibble ---
146+ # --- 8. Generate Plot or Return data.table ---
169147 if (plot_results ) {
170- plot_data <- analysis_df
148+ plot_data <- analysis_dt
171149
172- # Handle eplet-specific `top_eplets` filter for plotting
173150 if (analysis_type == " eplet" && ! is.null(config $ top_eplets )) {
174- top_features_vec <- feature_AUC | >
175- slice_max(order_by = norm_AUC , n = config $ top_eplets ) | >
176- pull(!! sym(config $ feature_col ))
177- plot_data <- plot_data | >
178- dplyr :: filter(!! sym(config $ feature_col ) %in% top_features_vec )
151+ top_features_vec <- feature_AUC [order(- norm_AUC )][1 : config $ top_eplets , get(config $ feature_col )]
152+ plot_data <- plot_data [get(config $ feature_col ) %in% top_features_vec ]
179153 }
180154
181155 p <- ggplot(plot_data , aes(x = cut , y = percent_positive ,
0 commit comments