-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPSM_analysis.R
120 lines (94 loc) · 3.67 KB
/
PSM_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
############################################################
#
# This performs PSM analysis (Table 3 & 4 with matched samples)
# Use package 'MatchIt'
# Follows Randolph et al. (2014)
############################################################
install.packages('MatchIt')
install.packages('tableone')
library(MatchIt)
library(data.table)
## (0) Read the data:
setwd("C:\\Users\\Yuriy\\Documents\\_Research\\ ! Project_ Payout Revolution\\Data\\capitaliq")
data = fread("capital_iq_work_full.csv")
data[1:5,]
summary(data$dco)
sd(data$dco, na.rm = T)
length(data$dco) - sum(is.na(data$dco))
## (1) Mach samples:
# Matching variables: size(t-1), payouts(t-1),
# cash_ta(t-1), past ownership(t-1)
colnames(data)
data = data[!is.na(data$dco_high),]
data = data[!is.na(data$size_1),]
data = data[!is.na(data$own1_1),]
data = data[!is.na(data$netpo_ta_1),]
data = data[!is.na(data$cash_ta_1),]
# data = data[!is.na(data$crossdummy1),]
data = data[!is.na(data$ebit_ta_1),]
data = data[!is.na(data$hhi_1),]
data[1:7,]
data$cash_ta_1 = (data$cash_ta_1 - mean(data$cash_ta_1))/sd(data$cash_ta_1)
data$ebit_ta_1 = (data$ebit_ta_1 - mean(data$ebit_ta_1))/sd(data$ebit_ta_1)
data$hhi_1 = (data$hhi_1 - mean(data$hhi_1))/sd(data$hhi_1)
summary(data$cash_ta_1)
summary(data$hhi_1)
summary(data$ebit_ta_1)
mydata = data[,c('dco_high', 'crossdummy1' ,'size_1', 'own1_1', 'netpo_ta_1',
'cash_ta_1', 'ebit_ta_1', 'hhi_1',
'time', 'gvkey', 'cusip', 'sic3')]
mydata[1:7,]
m.out1 = matchit(crossdummy1 ~ size_1 + own1_1 + netpo_ta_1 +
cash_ta_1 + ebit_ta_1 + hhi_1,
data = mydata, method = "nearest", ratio = 1)
## (2) Save matched data:
m.data1 = match.data(m.out1)
write.csv(m.data1, file = "capital_iq_work_matched.csv")
######################################################
######## Matching for each year separately ###########
data = fread("capital_iq_work_full.csv")
count = 1
for (tt in 2001:2017) {
print(tt)
data1 = data[data$time == tt, ]
data1 = data1[!is.na(data1$dco_high),]
data1 = data1[!is.na(data1$size_1),]
data1 = data1[!is.na(data1$own1_1),]
data1 = data1[!is.na(data1$netpo_ta_1),]
data1 = data1[!is.na(data1$cash_ta_1),]
data1 = data1[!is.na(data1$ebit_ta_1),]
data1 = data1[!is.na(data1$hhi_1),]
data1 = data1[!is.na(data1$crossdummy1_1),]
cash_m = mean(data1$cash_ta_1)
cash_sd = sd(data1$cash_ta_1)
ebit_m = mean(data1$ebit_ta_1)
ebit_sd = sd(data1$ebit_ta_1)
hhi_m = mean(data1$hhi_1)
hhi_sd = sd(data1$hhi_1)
data1$cash_ta_1 = (data1$cash_ta_1 - cash_m)/cash_sd
data1$ebit_ta_1 = (data1$ebit_ta_1 - ebit_m)/ebit_sd
data1$hhi_1 = (data1$hhi_1 - hhi_m)/hhi_sd
mydata = data1[,c('dco_high', 'crossdummy1_1' ,'size_1', 'own1_1', 'netpo_ta_1',
'cash_ta_1', 'ebit_ta_1', 'hhi_1',
'time', 'gvkey', 'cusip', 'sic3')]
m.out1 = matchit(dco_high ~ size_1 + own1_1 + netpo_ta_1 +
cash_ta_1 + ebit_ta_1 + hhi_1 + crossdummy1_1,
data = mydata, method = "nearest", ratio = 1)
data_new = match.data(m.out1)
data_new$cash_ta_1 = data_new$cash_ta_1*cash_sd + cash_m
data_new$ebit_ta_1 = data_new$ebit_ta_1*ebit_sd + ebit_m
data_new$hhi_1 = data_new$hhi_1*hhi_sd + hhi_m
if (count == 1) {
m.data1 = data_new
} else {
m.data1 = rbind(m.data1, data_new)
}
count = count + 1
}
write.csv(m.data1, file = "capital_iq_work_matched_yby.csv")
# -------------------------------
colnames(m.data1)
m.data1[1:7,]
a1 <- summary(m.out1)
summary(m.out1$model)
plot(m.out1, type = "hist")