-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCRC_real_data_analysis_choose_stopping_k.R
496 lines (401 loc) · 31.6 KB
/
CRC_real_data_analysis_choose_stopping_k.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
source('./src/dependencies.R')
source('./src/estimation_functions.R')
params_save_name <- '2,8,32,128,256,512,1024,2,6,10_1,1,1,1,1,1,1,2,2,2'
load(paste0("params/parameters_CRC_real_data_", params_save_name, ".RData"))
parameters_CRC_real_data$save_name <- params_save_name
parameters_CRC_real_data$num_cores <- 1
with(parameters_CRC_real_data, {
colon_ncdb <- read.csv('./colon_ncdb.csv') %>%
mutate(X = surg_approach_open,
Y = death_90) %>% dplyr::select(-c(surg_approach_open, death_90))
set.seed(202848)
colon_ncdb_split <- split_data (colon_ncdb, 2)
colon_ncdb_1 <- colon_ncdb_split[[1]]
colon_ncdb_2 <- colon_ncdb_split[[2]]
###########
## split 1
###########
basis_1 <- lapply(1:length(K), function(k) {
create_b_spline_basis(
data = colon_ncdb_1,
continuous_vars = c('distance', 'age', 'tumor_size', 'days_from_dx_to_def_surg'),
binary_vars = names(colon_ncdb_1)[!(names(colon_ncdb_1) %in%
c('distance', 'age', 'tumor_size', 'days_from_dx_to_def_surg', 'X', 'Y'))],
knots = knots[[k]], boundary_knots = boundary_knots[[k]],
degree_of_interactions = degree_of_interactions[k],
polynomial_degree = polynomial_degree[k]
)
})
###########
## split 2
###########
basis_2 <- lapply(1:length(K), function(k) {
create_b_spline_basis(
data = colon_ncdb_2,
continuous_vars = c('distance', 'age', 'tumor_size', 'days_from_dx_to_def_surg'),
binary_vars = names(colon_ncdb_2)[!(names(colon_ncdb_2) %in%
c('distance', 'age', 'tumor_size', 'days_from_dx_to_def_surg', 'X', 'Y'))],
knots = knots[[k]], boundary_knots = boundary_knots[[k]],
degree_of_interactions = degree_of_interactions[k],
polynomial_degree = polynomial_degree[k]
)
})
##########################################################
## compute estimates of the nuisance parameters and psi in estimation data
##########################################################
###########
## split 1
###########
prob_outcome1_trt1_stacked_classifier_1 <- estimate_prob_stacked_classifier(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params_boosted_tree = params_boosted_tree_outcome_2,
params_random_forest = params_random_forest_outcome_2,
params_knn = params_knn_outcome_2,
params_lasso = params_lasso_outcome_2,
params_glm = params_glm_outcome_2,
meta_model = meta_model_outcome_2$meta_model,
meta_model_formula = meta_model_outcome_2$meta_model_formula,
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_stacked_classifier_1 <- estimate_prob_stacked_classifier(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params_boosted_tree = params_boosted_tree_outcome_2,
params_random_forest = params_random_forest_outcome_2,
params_knn = params_knn_outcome_2,
params_lasso = params_lasso_outcome_2,
params_glm = params_glm_outcome_2,
meta_model = meta_model_outcome_2$meta_model,
meta_model_formula = meta_model_outcome_2$meta_model_formula,
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_outcome1_trt1_boosted_tree_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_boosted_tree_outcome_2,
model = "boosted_tree",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_boosted_tree_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_boosted_tree_outcome_2,
model = "boosted_tree",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_outcome1_trt1_random_forest_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_random_forest_outcome_2,
model = "random_forest",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_random_forest_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_random_forest_outcome_2,
model = "random_forest",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_outcome1_trt1_knn_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_knn_outcome_2,
model = "knn",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_knn_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_knn_outcome_2,
model = "knn",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_outcome1_trt1_lasso_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_lasso_outcome_2,
model = "lasso",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_lasso_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_lasso_outcome_2,
model = "lasso",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_outcome1_trt1_glm_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_glm_outcome_2,
model = "glm",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_glm_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_glm_outcome_2,
model = "glm",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
###########
## split 2
###########
prob_outcome1_trt1_stacked_classifier_2 <- estimate_prob_stacked_classifier(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params_boosted_tree = params_boosted_tree_outcome_1,
params_random_forest = params_random_forest_outcome_1,
params_knn = params_knn_outcome_1,
params_lasso = params_lasso_outcome_1,
params_glm = params_glm_outcome_1,
meta_model = meta_model_outcome_1$meta_model,
meta_model_formula = meta_model_outcome_1$meta_model_formula,
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_stacked_classifier_2 <- estimate_prob_stacked_classifier(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params_boosted_tree = params_boosted_tree_outcome_1,
params_random_forest = params_random_forest_outcome_1,
params_knn = params_knn_outcome_1,
params_lasso = params_lasso_outcome_1,
params_glm = params_glm_outcome_1,
meta_model = meta_model_outcome_1$meta_model,
meta_model_formula = meta_model_outcome_1$meta_model_formula,
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_outcome1_trt1_boosted_tree_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_boosted_tree_outcome_1,
model = "boosted_tree",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_boosted_tree_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_boosted_tree_outcome_1,
model = "boosted_tree",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_outcome1_trt1_random_forest_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_random_forest_outcome_1,
model = "random_forest",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_random_forest_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_random_forest_outcome_1,
model = "random_forest",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_outcome1_trt1_knn_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_knn_outcome_1,
model = "knn",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_knn_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_knn_outcome_1,
model = "knn",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_outcome1_trt1_lasso_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_lasso_outcome_1,
model = "lasso",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_lasso_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_lasso_outcome_1,
model = "lasso",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_outcome1_trt1_glm_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_glm_outcome_1,
model = "glm",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_glm_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_glm_outcome_1,
model = "glm",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
##########################################################
## compute IF22 for choosing the stopping value of k with Gram matrix computed from the estimation sample
##########################################################
# a_resid is multiplied by -1 to correct for the fact that Sigma here is defined as A*t(basis)%*%basis rather than the -A*t(basis)%*%basis
# which is in expected in the functions used to compute IF22 for the treatment effect parameters
###########
## split 1
###########
HOIF_stacked_eff1_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_stacked_classifier_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_stacked_classifier_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_1_eff1,
num_cores = 1
)
HOIF_stacked_eff0_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_1$X), outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_stacked_classifier_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_stacked_classifier_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_1_eff0,
num_cores = 1
)
HOIF_boosted_tree_eff1_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_boosted_tree_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_boosted_tree_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_1_eff1,
num_cores = 1
)
HOIF_boosted_tree_eff0_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_1$X), outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_boosted_tree_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_boosted_tree_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_2_eff0,
num_cores = 1
)
HOIF_random_forest_eff1_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_random_forest_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_random_forest_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_1_eff1,
num_cores = 1
)
HOIF_random_forest_eff0_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_1$X), outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_random_forest_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_random_forest_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_1_eff0,
num_cores = 1
)
HOIF_knn_eff1_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_knn_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_knn_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_1_eff1,
num_cores = 1
)
HOIF_knn_eff0_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_1$X), outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_knn_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_knn_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_1_eff0,
num_cores = 1
)
HOIF_lasso_eff1_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_lasso_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_lasso_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_1_eff1,
num_cores = 1
)
HOIF_lasso_eff0_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_1$X), outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_lasso_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_lasso_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_1_eff0,
num_cores = 1
)
HOIF_glm_eff1_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_glm_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_glm_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_1_eff1,
num_cores = 1
)
HOIF_glm_eff0_outcome_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_1$X), outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_glm_1, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_glm_1, type = 'outcome', estimand = 'effect'),
basis = basis_1,
sigma = sigma_1_eff0,
num_cores = 1
)
###########
## split 2
###########
HOIF_stacked_eff1_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_stacked_classifier_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_stacked_classifier_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff1,
num_cores = 1
)
HOIF_stacked_eff0_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_2$X), outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_stacked_classifier_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_stacked_classifier_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff0,
num_cores = 1
)
HOIF_boosted_tree_eff1_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_boosted_tree_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_boosted_tree_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff1,
num_cores = 1
)
HOIF_boosted_tree_eff0_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_2$X), outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_boosted_tree_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_boosted_tree_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff0,
num_cores = 1
)
HOIF_random_forest_eff1_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_random_forest_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_random_forest_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff1,
num_cores = 1
)
HOIF_random_forest_eff0_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_2$X), outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_random_forest_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_random_forest_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff0,
num_cores = 1
)
HOIF_knn_eff1_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_knn_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_knn_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff1,
num_cores = 1
)
HOIF_knn_eff0_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_2$X), outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_knn_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_knn_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff0,
num_cores = 1
)
HOIF_lasso_eff1_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_lasso_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_lasso_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff1,
num_cores = 1
)
HOIF_lasso_eff0_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_2$X), outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_lasso_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_lasso_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff0,
num_cores = 1
)
HOIF_glm_eff1_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_glm_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_glm_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff1,
num_cores = 1
)
HOIF_glm_eff0_outcome_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = -(1-colon_ncdb_2$X), outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_glm_2, type = 'outcome', estimand = 'effect'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_glm_2, type = 'outcome', estimand = 'effect'),
basis = basis_2,
sigma = sigma_2_eff0,
num_cores = 1
)
##########################################################
## save
##########################################################
results <- list(
HOIF_stacked_eff1_outcome_1 = HOIF_stacked_eff1_outcome_1,
HOIF_stacked_eff0_outcome_1 = HOIF_stacked_eff0_outcome_1,
HOIF_boosted_tree_eff1_outcome_1 = HOIF_boosted_tree_eff1_outcome_1,
HOIF_boosted_tree_eff0_outcome_1 = HOIF_boosted_tree_eff0_outcome_1,
HOIF_random_forest_eff1_outcome_1 = HOIF_random_forest_eff1_outcome_1,
HOIF_random_forest_eff0_outcome_1 = HOIF_random_forest_eff0_outcome_1,
HOIF_knn_eff1_outcome_1 = HOIF_knn_eff1_outcome_1,
HOIF_knn_eff0_outcome_1 = HOIF_knn_eff0_outcome_1,
HOIF_lasso_eff1_outcome_1 = HOIF_lasso_eff1_outcome_1,
HOIF_lasso_eff0_outcome_1 = HOIF_lasso_eff0_outcome_1,
HOIF_glm_eff1_outcome_1 = HOIF_glm_eff1_outcome_1,
HOIF_glm_eff0_outcome_1 = HOIF_glm_eff0_outcome_1,
HOIF_stacked_eff1_outcome_2 = HOIF_stacked_eff1_outcome_2,
HOIF_stacked_eff0_outcome_2 = HOIF_stacked_eff0_outcome_2,
HOIF_boosted_tree_eff1_outcome_2 = HOIF_boosted_tree_eff1_outcome_2,
HOIF_boosted_tree_eff0_outcome_2 = HOIF_boosted_tree_eff0_outcome_2,
HOIF_random_forest_eff1_outcome_2 = HOIF_random_forest_eff1_outcome_2,
HOIF_random_forest_eff0_outcome_2 = HOIF_random_forest_eff0_outcome_2,
HOIF_knn_eff1_outcome_2 = HOIF_knn_eff1_outcome_2,
HOIF_knn_eff0_outcome_2 = HOIF_knn_eff0_outcome_2,
HOIF_lasso_eff1_outcome_2 = HOIF_lasso_eff1_outcome_2,
HOIF_lasso_eff0_outcome_2 = HOIF_lasso_eff0_outcome_2,
HOIF_glm_eff1_outcome_2 = HOIF_glm_eff1_outcome_2,
HOIF_glm_eff0_outcome_2 = HOIF_glm_eff0_outcome_2
)
save(results, file=paste0("./output/CRC_real_data_analysis_choose_stopping_k_", save_name, ".RData"))
})