-
Notifications
You must be signed in to change notification settings - Fork 2
/
01-06-Stat-Methods-Experimental-Design.Rmd
190 lines (131 loc) · 6.28 KB
/
01-06-Stat-Methods-Experimental-Design.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
## Experimental Design {-}
### Completely Random Design {-}
The response is the length of odontoblasts (cells responsible for tooth growth) in 60 guinea pigs. Each animal received one of three dose levels of vitamin C (0.5, 1, and 2 mg/day) by one of two delivery methods, orange juice or ascorbic acid (a form of vitamin C and coded as VC).
- Treatment Structure: 2 x 3 Factorial Treatment, both Fixed
- Model: $y_{ijk} = \mu + \alpha_i + \beta_j + \alpha \beta_{ij} + e_{ijk}$
- Treatments: $\alpha_i = \text{supp, } \beta_j = \text{dose}$
- Fixed Effects: $\alpha_1 = \beta_1 = \alpha \beta_{1j} = \alpha \beta_{i1} = 0$
- Random Effects: $e_{ijk} = N(0, \sigma^2_e)$
```{r a13, comment=NA, fig.width=8, fig.height=5, warning=FALSE, message=FALSE}
library(lsmeans)
library(reshape2)
library(car)
library(plyr)
data("ToothGrowth")
## Data is numeric, but we need to force it to be a factor for the model
ToothGrowth$dose.factor = as.factor(ToothGrowth$dose)
summary(ToothGrowth)
## Even observations per treatment group
table(ToothGrowth$supp, ToothGrowth$dose)
## ANOVA model
mdl = lm(len ~ supp * dose.factor, data = ToothGrowth)
anova(mdl)
## Least Squares Means
lsmeans(mdl, specs = c("supp", "dose.factor"))
## Differences in Means
TukeyHSD(aov(mdl))
```
Are the necessary conditions for hypothesis testing present?
- Normality: Residuals appear normally distributed per the residual normal reference plot and shapiro-wilks test
- Equal Variance: Brown-Forsythe-Levene test and residual plot supports equal variance
- Independence: No correlation in the residuals per the Durbin Watson test and plots of variables against residuals
Conditions for hypothesis testing appears to be satisfied
```{r a14, comment=NA, fig.width=8, fig.height=5, warning=FALSE, message=FALSE}
## Normality of Residuals
shapiro.test(mdl$residuals); qqnorm(mdl$residuals); qqline(mdl$residuals)
## Equal Variance of Residuals
leveneTest(mdl)
plot(x = mdl$fitted.values, y = (mdl$residual - mean(mdl$residuals))/sd(mdl$residuals),
xlab = "Fitted Values", ylab = "Standardized Residuals",
main = "Equal Variance Residual Plot")
## Independence of Residuals
par(mfrow = c(1, 2))
plot(ToothGrowth$dose, mdl$residuals, xlab = "Dose", ylab = "Residuals")
plot(ToothGrowth$supp, mdl$residuals, xlab = "Supp", ylab = "Residuals")
## Test for correlation in the residuals
durbinWatsonTest(mdl)
```
- Group doses so that each dose is not statistically different than any other dose in the group:
+ The interaction between dose and supp are significant so we need to assess the differences in dose per each level of supp.
+ OJ: {.5}, {1, 2}
+ VC: {.5}, {1}, {2}
- Group supps so that each supp is not statistically different than any other supp in the group:
+ The interaction is significant so we need to assess the supps at each level of dose
+ .5: {OJ}, {VC}
+ 1: {OJ}, {VC}
+ 2: {OJ, VC}
```{r a15, comment=NA, fig.width=8, fig.height=5, echo=FALSE}
## Calculate the mean and sd for each combination of supp and dose
x = ddply(ToothGrowth, .(supp, dose), summarize, mean = mean(len), sd = sd(len))
## add 95% confidence intervals
x$lwr = with(x, mean - qt(p = .975, df = 9) * sd/sqrt(10))
x$upr = with(x, mean + qt(p = .975, df = 9) * sd/sqrt(10))
x1 = subset(x, supp == "OJ")
x2 = subset(x, supp == "VC")
## Make a blank plot
plot(x = ToothGrowth$dose,
y = ToothGrowth$len,
xlab = "Dose",
ylab = "Tooth Length",
type = "n",
ylim = c(6, 29),
main = "Interaction Plot")
## Add points for the means
points(x = x1$dose, y = x1$mean, pch = 3, lwd = 4, col = "blue", cex = 1.1)
points(x = x2$dose, y = x2$mean, pch = 4, lwd = 4, col = "red", cex = 1.1)
## Add lines for interaction
lines(x = x1$dose, y = x1$mean, lty = 2)
lines(x = x2$dose, y = x2$mean, lty = 2)
## Add segments for confidence intervals
for (i in 1:6) {
segments(x0 = x$dose[i], y0 = x$lwr[i], y1 = x$upr[i])
segments(x0 = x$dose[i]-.05, x1 = x$dose[i]+.05, y0 = x$lwr[i])
segments(x0 = x$dose[i]-.05, x1 = x$dose[i]+.05, y0 = x$upr[i])
}
## Add legend
legend("bottomright", c("Orange Juice", "Vitamin C"),
pch = c(3, 4), bty = "n", cex = 1.1, col = c("blue", "red"))
```
### Random Complete Block Design {-}
An experiment was conducted to compare four different pre-planting treatments for soybeen seeds. A fifth treatment, consisting of not treating the seeds was used as a control. The experimental area consisted of four fields. There are notable differences in the fields. Each field was divided into five plots and one of the treatments was randomly assigned to a plot within each field.
- Treatment Structure: 1 Single Treatment with 5 levels
- Response: The number of plants that failed to emerge out of 100 seeds planted per plot.
- Model: $y_{ij} = \mu + \alpha_i + \beta_j + e_{ij}$
- Treatments: $\alpha_i = \text{Seed, } \beta_j = \text{Field, } \alpha_5 = \text{Control}$
- Fixed Effects: $\alpha_5 = \beta_1 = 0$
- Random Effects: $e_{ij} = N(0, \sigma^2_e)$
---
```{r a16, echo=FALSE, results='asis'}
library(knitr)
library(reshape2)
library(pander)
soy = data.frame(
Treatment = c("Avasan", "Spergon", "Semaesan", "Fermate", "Control"),
Field.1 = c(2, 4, 3, 9, 8),
Field.2 = c(5, 10, 6, 3, 11),
Field.3 = c(7, 9, 9, 5, 12),
Field.4 = c(11, 8, 10, 5, 13)
)
pandoc.table(soy, caption = "Soy Bean Data")
soy = melt(soy, id.vars = "Treatment", variable.name = "Field", value.name = "Count")
```
```{r a17, comment=NA}
## Make the control treatment the default level
soy$Treatment = relevel(soy$Treatment, ref = "Control")
## We only have one rep per treatment so there are not enough DF to measure the interaction
mdl = lm(Count ~ Field + Treatment, data = soy)
anova(mdl)
```
---
#### Comparison of means vs control
Since we have a control variable we want to know if any of the treatment means are significantly lower than the control mean.
```{r a18, comment=NA, message=FALSE, warning=FALSE}
library(multcomp)
Dunnet = glht(mdl, linfct = mcp(Treatment = "Dunnet"), alternative = "less")
summary(Dunnet)
```
We have significant evidence that only Avasan and Fermate are significantly lower than the control. Are they significantly different from each other?
```{r a19, comment=NA, message=FALSE, warning=FALSE}
TukeyHSD(aov(mdl))
```
There is not significant evidence between the difference in means between any of the treatments.