-
Notifications
You must be signed in to change notification settings - Fork 1
/
DRsims.Rmd
130 lines (81 loc) · 5.13 KB
/
DRsims.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
---
title: "simsDR"
output: html_document
date: '2023-07-05'
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
results <- out
results[, coverage := CI_left <= ATE & CI_right >= ATE]
results[, sd := abs(CI_left -CI_right)/1.96/2]
results
results <- unique(results[, .(sd = mean(sd), se = sd(estimate-ATE), bias = abs(mean(estimate - ATE)), coverage = mean(coverage)), by = c( "misp", "estimator", "lrnr")])
results[, rmse := sqrt(bias^2 + se^2)]
results
```
Do table
For each lrnr.
n Treatment Outcome Both
mse (coverage)
```{r}
results[results$lrnr=="xgboost",]
```
```{r}
library(data.table)
n_list <- c( 1000, 2000, 3000, 4000, 5000 )
pos_list <- c(2 )
results <-
rbindlist(unlist(lapply(n_list, function(n){
unlist(lapply(pos_list, function(pos) {
lapply(c(0), function(misp) {
try({
key <- paste0("DR_iter=", "1000", "_n=", n, "_pos=", pos )
data <- fread(paste0("./simResultsDR/sim_results_", key, ".csv"))
return(data)
})
return(data.table())
})
}), recursive = F)
}), recursive = F))
results[, coverage := CI_left <= ATE & CI_right >= ATE]
results[, sd := abs(CI_left -CI_right)/1.96/2]
results <- unique(results[, .(sd = mean(sd), se = sd(estimate-ATE), bias = abs(mean(estimate - ATE)), coverage = mean(coverage)), by = c("pos_const", "n", "misp", "estimator", "lrnr")])
results[, rmse := sqrt(bias^2 + se^2)]
library(ggplot2)
w <- 7
h <- 5
results$misp <- c("Both", "Treatment", "Outcome", "Neither")[match(results$misp, c("1", "2", "3", "4"))]
for(pos_const in unique(results$pos_const)) {
for(lrnr in c("pooled")) {
for(misp in c("Both", "Treatment", "Outcome", "Neither")) {
tmp <- as.data.frame(results)[results$pos_const == pos_const,]
#tmp <- as.data.frame(tmp)[tmp$lrnr == lrnr,]
tmp <- as.data.frame(tmp)[tmp$misp == misp,]
#maxval <- 3*min(tmp$rmse[tmp$n ==500])
maxval <- max(tmp$rmse )
tmp$bias <- pmin(tmp$bias, maxval)
tmp$se <- pmin(tmp$se, maxval)
tmp$mse <- pmin(tmp$rmse, maxval)
limits <- c(0, maxval + .01)
p <- ggplot(tmp, aes(x= n, y = bias, color = lrnr, shape= lrnr)) + geom_point(size = 4) + geom_line( color="grey", linetype = "dashed") + facet_wrap(~estimator, ncol =2) + theme_bw() + theme( text = element_text(size=18), axis.text.x = element_text(size = 14 , hjust = 1, vjust = 0.5), legend.position = "bottom", legend.box = "horizontal"
) + labs(x = "Sample Size (n)", y = "Bias", color = "Estimator", group = "Estimator", shape= "Estimator") + scale_y_continuous( limits = limits)
ggsave( filename = paste0("DR=", pos_const,lrnr,misp, "Bias.pdf") , width = w, height = h)
ggplot(tmp, aes(x= n, y = se, color = lrnr, shape= lrnr)) + geom_point(size = 4) + geom_line( color="grey", linetype = "dashed") + facet_wrap(~estimator, ncol =2) + theme_bw() + theme( text = element_text(size=18), axis.text.x = element_text(size = 14 , hjust = 1, vjust = 0.5), legend.position = "bottom", legend.box = "horizontal"
) + labs(x = "Sample Size (n)", y = "Standard Error", color = "Estimator", group = "Estimator", shape= "Estimator") + scale_y_continuous( limits = limits)
ggsave(filename = paste0("DR=", pos_const,lrnr,misp, "SE.pdf"), width = w, height = h)
ggplot(tmp, aes(x= n, y = mse, color = lrnr, shape= lrnr)) + geom_point(size = 4) + geom_line( color="grey", linetype = "dashed") + facet_wrap(~estimator, ncol =2) + theme_bw() + theme( text = element_text(size=18), axis.text.x = element_text(size = 14 , hjust = 1, vjust = 0.5), legend.position = "bottom", legend.box = "horizontal"
) + labs(x = "Sample Size (n)", y = "Root Mean Square Error", color = "Estimator", group = "Estimator", shape= "Estimator") + scale_y_continuous( limits = limits)
ggsave(filename = paste0("DR=", pos_const,lrnr,misp, "MSE.pdf"), width = w, height = h)
p <- ggplot(tmp, aes(x= n, y = coverage, color = lrnr, shape= lrnr)) + geom_point(size = 4) + geom_line( color="grey", linetype = "dashed") + facet_wrap(~estimator, ncol =2)+ scale_y_continuous(limits = c(min(tmp$coverage), 0.97)) + geom_hline(yintercept = 0.95, color = "grey") + theme_bw() + theme( text = element_text(size=18), axis.text.x = element_text(size = 14 , hjust = 1, vjust = 0.5), legend.position = "bottom", legend.box = "horizontal"
) + labs(x = "Sample Size (n)", y = "CI Coverage", color = "Estimator", group = "Estimator", shape= "Estimator")
ggsave(filename = paste0("DR=", pos_const,lrnr,misp, "CI.pdf"), width = w, height = h)
}
}}
p <- ggplot(tmp, aes(x= n, y = coverage, color = lrnr, shape= lrnr)) + geom_point(size = 4) + facet_wrap(~estimator, ncol =2)+ scale_y_log10() + geom_hline(yintercept = 0.95, color = "grey") + theme_bw() + theme( text = element_text(size=18), axis.text.x = element_text(size = 14 , hjust = 1, vjust = 0.5), legend.position = "bottom", legend.box = "horizontal" , legend.title = element_blank()
) + labs(x = "Sample Size (n)", y = "CI Coverage", color = "estimator", group = "estimator", shape= "estimator")
```
```{r}
results[n >= 1000 & pos == 4e-02]
```