Skip to content

Commit 1e309ad

Browse files
committed
Cleanup for sm changes, remove output, use local imports
1 parent 23ec79c commit 1e309ad

22 files changed

+877
-4371
lines changed

Diff for: contrasts.ipynb

+79-387
Large diffs are not rendered by default.

Diff for: contrasts.py

+28-14
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55

66
# Contrasts Overview
77

8+
# <codecell>
9+
10+
import statsmodels.api as sm
11+
812
# <markdowncell>
913

1014
# This document is based heavily on this excellent resource from UCLA http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm
@@ -24,9 +28,13 @@
2428
# <codecell>
2529

2630
import pandas
27-
url = 'http://www.ats.ucla.edu/stat/R/notes/hsb2_nolabel.csv'
31+
url = 'http://www.ats.ucla.edu/stat/data/hsb2.csv'
2832
hsb2 = pandas.read_table(url, delimiter=",")
2933

34+
# <codecell>
35+
36+
hsb2.head(10)
37+
3038
# <rawcell>
3139

3240
# It will be instructive to look at the mean of the dependent variable, write, for each level of race ((1 = Hispanic, 2 = Asian, 3 = African American and 4 = Caucasian)).
@@ -56,16 +64,24 @@
5664

5765
# <codecell>
5866

67+
hsb2.race.head(10)
68+
69+
# <codecell>
70+
5971
print contrast.matrix[hsb2.race-1, :][:20]
6072

73+
# <codecell>
74+
75+
sm.categorical(hsb2.race.values)
76+
6177
# <rawcell>
6278

6379
# This is a bit of a trick, as the `race` category conveniently maps to zero-based indices. If it does not, this conversion happens under the hood, so this won't work in general but nonetheless is a useful exercise to fix ideas. The below illustrates the output using the three contrasts above
6480

6581
# <codecell>
6682

6783
from statsmodels.formula.api import ols
68-
mod = ols("write ~ C(race, Treatment)", df=hsb2)
84+
mod = ols("write ~ C(race, Treatment)", data=hsb2)
6985
res = mod.fit()
7086
print res.summary()
7187

@@ -106,12 +122,16 @@ def code_without_intercept(self, levels):
106122

107123
# <codecell>
108124

125+
hsb2.groupby('race')['write'].mean().mean()
126+
127+
# <codecell>
128+
109129
contrast = Simple().code_without_intercept(levels)
110130
print contrast.matrix
111131

112132
# <codecell>
113133

114-
mod = ols("write ~ C(race, Simple)", df=hsb2)
134+
mod = ols("write ~ C(race, Simple)", data=hsb2)
115135
res = mod.fit()
116136
print res.summary()
117137

@@ -131,7 +151,7 @@ def code_without_intercept(self, levels):
131151

132152
# <codecell>
133153

134-
mod = ols("write ~ C(race, Sum)", df=hsb2)
154+
mod = ols("write ~ C(race, Sum)", data=hsb2)
135155
res = mod.fit()
136156
print res.summary()
137157

@@ -159,7 +179,7 @@ def code_without_intercept(self, levels):
159179

160180
# <codecell>
161181

162-
mod = ols("write ~ C(race, Diff)", df=hsb2)
182+
mod = ols("write ~ C(race, Diff)", data=hsb2)
163183
res = mod.fit()
164184
print res.summary()
165185

@@ -187,10 +207,9 @@ def code_without_intercept(self, levels):
187207
contrast = Helmert().code_without_intercept(levels)
188208
print contrast.matrix
189209

190-
191210
# <codecell>
192211

193-
mod = ols("write ~ C(race, Helmert)", df=hsb2)
212+
mod = ols("write ~ C(race, Helmert)", data=hsb2)
194213
res = mod.fit()
195214
print res.summary()
196215

@@ -224,12 +243,7 @@ def code_without_intercept(self, levels):
224243

225244
# <codecell>
226245

227-
_, bins = np.histogram(hsb2.read, 3)
228-
try: # requires numpy master
229-
readcat = np.digitize(hsb2.read, bins, True)
230-
except:
231-
readcat = np.digitize(hsb2.read, bins)
232-
hsb2['readcat'] = readcat
246+
hsb2['readcat'] = pandas.cut(hsb2.read, bins=3)
233247
hsb2.groupby('readcat').mean()['write']
234248

235249
# <codecell>
@@ -241,7 +255,7 @@ def code_without_intercept(self, levels):
241255

242256
# <codecell>
243257

244-
mod = ols("write ~ C(readcat, Poly)", df=hsb2)
258+
mod = ols("write ~ C(readcat, Poly)", data=hsb2)
245259
res = mod.fit()
246260
print res.summary()
247261

0 commit comments

Comments
 (0)