-
Notifications
You must be signed in to change notification settings - Fork 58
/
Copy pathWeek5_worksheet.R
125 lines (95 loc) · 3.7 KB
/
Week5_worksheet.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
####### WEEK 5 WORKSHEET - GLM 2 #######
library(haven)
library(data.table)
library(JWileymisc)
library(ggplot2)
library(ggpubr)
library(visreg)
library(survey)
dcount <- fread("https://stats.oarc.ucla.edu/stat/data/poisson_sim.csv")
### 1. Poisson ###
# Fit a poisson regression predicting the number of awards,
# `num_awards` from `prog` in the `dcount` data. Try to
# interpret the results using IRRs and make a graph.
# Note: "There is no association between prog and num_awards so
# do not be surprised if the graphs etc are not that interesting.")
## Plot the distribution of the outcome num_awards:
plot(???(dcount$???, distr = "???"))
## fit poisson regression num_awards on prog:
m <- ???(??? ~ ???, data = ???, family = ???n())
## Show the results:
???
## calculate IRRs and confidence intervals on the IRR scale:
exp(???(m))
exp(???(m))
## graph and visualize the (very exciting) results.
# First in linear space (predicted log number of awards):
visreg(???, xvar = "???", partial = FALSE, rug = FALSE, gg = TRUE) +
ylab("???") +
theme_pubr()
# then for predicted number of awards
visreg(???, xvar = "???", ???,
partial = FALSE, rug = FALSE, gg = TRUE) +
ylab("???") +
theme_pubr()
## Pretend the result was significant. How would you write
# up the interpretation of the results? Say that 'prog'
# is just something like 'their progress in the class' (I
# actually have no idea)
# First write it up for log awards:
# A poisson regression predicting the number of awards each student
# received based on their progress in the class showed that students
# who had ____ progress were expected to have ____log awards, p = ____.
# Each one unit higher of progress was associated with ____ lower log
# awards, p = ____
# Now write it up for the more interpret-able IRRs:
# A poisson regression predicting the number of awards each student
# received based on their progress in the class showed that students
# who had a 0 progress were expected to have ?? awards,
# [95% CI ____]. Each one unit higher progress was associated
# with having ____ times the number of awards, [95% CI ____],
# p = ____.
### 2. Binary logistic ###
# Run the below code to make a new variable in dcount called 'unicorn'
# which fills it in with binary values to say whether or not someone
# is a unicorn. Note: this will be different for everyone
set.seed(666)
dcount$unicorn <- sample(0:1, size = nrow(dcount), replace = TRUE)
# Now use math scores to predict if someone is a unicorn.
mlog <- ???(??? ~ ???, data = ???, family = ???())
summary(???)
???(coef(mlog))
???(confint(mlog))
# Pretend your result is significant. How would you write it
# up (in odds)?
# Odds:
# For each ____ on math, participants have ____ times
# the ____ of ____ (p = ____)
# Visualise the probability of being a unicorn by math score
# (Note: this might not look very curvy in your data and
# that's ok):
visreg(???, xvar = "???", ???,
partial = FALSE, rug = FALSE, gg = TRUE) +
ylab("???") +
theme_pubr()
# Calculate the average marginal effect for a one higher
# math score predicting the probability of being a
# unicorn:
## pick h value for difference, store that in variable h
???
## make the original dataset
originaldata <- dcount[, .(math)]
## make the increased math dataset (math + h)
increaseddata <- dcount[, .(math = ???)]
## calculate original predicted probabilities
originaldata$Prob <- predict(???, newdata = ???,
type = "???")
## calculate increased predicted probabilities
increaseddata$Prob <- ???
## calculate the difference in probabilities per unit
diffprob <- ???
## calculate the average marginal effect
mean(???)
## OPTIONAL: you can cross check your work using
library(marginaleffects)
avg_slopes(???)