-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathevaluate_pmt_models_train_test.ado
112 lines (88 loc) · 5.22 KB
/
evaluate_pmt_models_train_test.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
program define evaluate_pmt_models_train_test
// 20 July 2013: adding the capability to base the regression coefficients on a reduced set of observations, given by the variable `trainingIndicator'
syntax varname [using/] [if], model(string) startingCutoff(real) Target_coverage(real) poor(varname numeric) Quantilec(varname numeric) trainingIndicator(varname numeric) testingIndicator(varname numeric) [TOLerance(real 0.001) step(real 5) logpline(real 0) append]
version 10.1
tempname cuttoffOptimizeNoDecimal exit_loop
tempvar logpccd_hat
// 9 May 2016
// regresses varname on model, and predict varname_hat. Evaluates the performance on the training set. Need to change to evaluate on testing set
tempname cutoffOptimize
local `cutoffOptimize' = `startingCutoff' // this is for looping over values of the cutoff to optimize the target coverage rate. But I will loop only over the section "combined" below, in order
// to speed things up
// ------------------ Model ------------------------------------------------------------------------------------
qui xi: svy: reg `varlist' `model' if `trainingIndicator'==1 // varlist is varname above
varsformyrelabel // This program relabels vars that are created by xi. The default label is something like "roof_material=3", and this code would change that to "roof_material=slate" for example
qui cap drop `logpccd_hat'
qui predict `logpccd_hat' `if' // predict for all observations, not just those on which the regression coefficients are based (i.e. not just on the training set)
// ------------------------------------------- Performance of Model -------------------------------------------
tempname fraction_covered_loop fraction_covered cutoffsTried n_steps y1 y2 x1 x2 last_word
local `n_steps' = 0
local `exit_loop' = 0
while (``exit_loop'' == 0) {
local `n_steps' = ``n_steps'' + 1
// get rid of decimal in the name of variable
local `cuttoffOptimizeNoDecimal' = subinstr(`"`cutoffOptimize'"', ".", "pp", .)
tempvar eligible_`cuttoffOptimizeNoDecimal'
qui cap drop `eligible_`cuttoffOptimizeNoDecimal''
qui gen `eligible_`cuttoffOptimizeNoDecimal'' = .
qui replace `eligible_`cuttoffOptimizeNoDecimal'' = 1 if `logpccd_hat' < ``cutoffOptimize'' & `logpccd_hat' ~= .
qui replace `eligible_`cuttoffOptimizeNoDecimal'' = 0 if `logpccd_hat' >= ``cutoffOptimize'' & `logpccd_hat' ~= .
qui pmt_eligible_train_test `eligible_`cuttoffOptimizeNoDecimal'', p(`poor') qu(`quantilec') evaluationSubset(`testingIndicator')
local `fraction_covered' = r(fraction_covered)
// calculate next cutoff
if (``n_steps'' ~= 1) {
local `last_word' = ``n_steps'' - 1
local `x2' = ``cutoffOptimize'' // This is the current cutoff being tried
local `x1' : word ``last_word'' of ``cutoffsTried'' // This is the previous cutoff tried
local `y2' = ``fraction_covered'' // The current fraction covered
local `y1' : word ``last_word'' of ``fraction_covered_loop'' // The previous fraction covered
// if the coverage did not change... then do what?
if (``y1''==``y2'') {
if (``y1''==1 | ``y1'' >= `target_coverage') {
local step = -1
}
else {
local step = 1
}
}
else {
local step = ((``x2''-``x1'')/(``y2''-``y1''))*(`target_coverage'-``y2'')
// lets make sure that the step size doesn't get ridiculously large
if (abs(`step') > 30) {
local step = (`step'/abs(`step'))*30
}
}
di `"n_steps=``n_steps'' step=`step' cutoffOptimize=``cutoffOptimize'' fraction_covered=``fraction_covered''"'
}
else {
if (`r(fraction_covered)' > `target_coverage' + `tolerance') {
// di `"n_steps=``step'' step=`step'"'
local step = -1*`step'
// di `"n_steps=``n_steps'' step=`step'"'
}
}
// Conditions under which to write the data to file and exit the loop: coverage hasn't changed; same threshold is being tried; more than 30 iterations; coverage is within tolerance of target
if ((`step'==.) | (`: list `cutoffOptimize' in local `cutoffsTried'') | (``n_steps'' > 30) | ((`r(fraction_covered)' > (`target_coverage' - `tolerance')) & (`r(fraction_covered)' < (`target_coverage' + `tolerance'))) ) {
qui xi: pmt2_train_test `varlist' `model', cutoffs(``cutoffOptimize'') p(`poor') quantilec(`quantilec') logpline(`logpline') graphme(``cutoffOptimize'') trainingIndicator(`trainingIndicator') testingIndicator(`testingIndicator')
varsformyrelabel // again, relabel the categorical variables so that humans can understand them without having to look anything up.
dataout_pmt2_train_test , l("Model (`: word count `model''):`model'") c(``cutoffOptimize'') f("`using'.csv") q(5) `append' // This just outputs the results from the r values
local `exit_loop' = 1
exit
}
local `cutoffsTried' ``cutoffsTried'' ``cutoffOptimize''
local `fraction_covered_loop' ``fraction_covered_loop'' ``fraction_covered''
if ((`r(fraction_covered)' < `target_coverage' - `tolerance') | (`r(fraction_covered)' > `target_coverage' + `tolerance')) {
local `cutoffOptimize' = ``cutoffOptimize'' + `step'
}
else {
stop
local `exit_loop' = 1
}
// di "n_steps = ``n_steps''"
// Lets not do too many loops
if (``n_steps'' > 30) {
stop
local `exit_loop' = 1
}
}
end