forked from weecology/sad-comparison
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRAD-comparison-graphs.py
413 lines (366 loc) · 15.6 KB
/
RAD-comparison-graphs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
""" Project code for graphing the results of the comparisions for species abundance distribution (SAD) models of the Ulrich and Ollik 2003 RAD data."""
from __future__ import division
import csv
import sys
import multiprocessing
import itertools
import os
import matplotlib.pyplot as plt
import colorsys
import numpy as np
from math import log, exp
from scipy import stats
import sqlite3 as dbapi
from mpl_toolkits.axes_grid.inset_locator import inset_axes
# Set up database capabilities
# Set up ability to query data
con = dbapi.connect('./sad-data/chapter2/UlrichOllik2003.sqlite')
cur = con.cursor()
# Switch con data type to string
con.text_factory = str
'''Summarize the number of wins for each model/dataset'''
# Make histogram
# Set up figure
total_wins_fig= plt.figure()
# Extract number of wins for all datasets combined.
total_wins = cur.execute("""SELECT model_name, COUNT(model_code) AS total_wins FROM ResultsWin
GROUP BY model_code""")
total_wins = cur.fetchall()
# Plot variables for total wins
N = len(total_wins)
x = np.arange(1, N+1)
y = [ num for (s, num) in total_wins ]
labels = [ s for (s, num) in total_wins ]
width = 1
bar1 = plt.bar( x, y, width, color="grey" )
plt.ylabel( 'Number of Wins' )
plt.xticks(x + width/2.0, labels, fontsize = 'small' )
plt.xlabel( 'Species abundance distribution models' )
#Output figure
fileName = "./sad-data/chapter2/total_wins.png"
plt.savefig(fileName, format="png" )
plt.close()
'''AIC_c weight distributions graphs'''
# Make histogram
# Set up figure
AIC_c_weights = plt.figure()
# Extract AICc weights for each model.
#Logseries
logseries = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name == 'Logseries' AND value_type =='AICc weight' AND value IS NOT NULL
ORDER BY value""")
logseries = cur.fetchall()
#Poisson lognormal
pln = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Poisson lognormal' AND value_type =='AICc weight' AND value IS NOT NULL
ORDER BY value""")
pln = cur.fetchall()
#Negative binomial
neg_bin = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Negative binomial' AND value_type =='AICc weight' AND value IS NOT NULL
ORDER BY value""")
neg_bin = cur.fetchall()
#Geometric series
geometric = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Geometric series' AND value_type =='AICc weight' AND value IS NOT NULL
ORDER BY value""")
geometric = cur.fetchall()
#Zipf distribution
zipf = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Zipf distribution' AND value_type =='AICc weight' AND value IS NOT NULL
ORDER BY value""")
zipf = cur.fetchall()
# Plot variables for weights
bins = 50
#Logseries
model0 = [ num for (s, num) in logseries ]
plt.hist(model0, bins, range = (0,1), facecolor = 'magenta', histtype="stepfilled", alpha=1, label = "Logseries")
#Poisson lognormal
model1 = [ num for (s, num) in pln]
plt.hist(model1, bins, range = (0,1), facecolor = 'teal', histtype="stepfilled", alpha=.7, label = "Poisson lognormal")
#Negative binomial
model2 = [ num for (s, num) in neg_bin]
plt.hist(model2, bins, range = (0,1), facecolor = 'gray', histtype="stepfilled", alpha=.7, label = "Negative binomial")
#Geometric series
model3 = [ num for (s, num) in geometric]
plt.hist(model3, bins, range = (0,1), facecolor = 'olivedrab', histtype="stepfilled", alpha=.7, label = "Geometric")
#Zipf distribution
model4 = [ num for (s, num) in zipf]
plt.hist(model4, bins, range = (0,1), facecolor = 'orange', histtype="stepfilled", alpha=.7, label = "Zipf")
plt.legend(loc = 'upper right', fontsize = 11)
plt.xlabel("AICc weights")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/AICc_weights.png"
plt.savefig(fileName, format="png" )
plt.close()
'''Plot weights for each model individually'''
bins = 50
#Logseries
plt.figure()
plt.hist(model0, bins, range = (0,1), facecolor = 'magenta', histtype="stepfilled", alpha=1, label = "Logseries")
plt.xlabel("Logseries AICc weights")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/Logseries_weights.png"
plt.savefig(fileName, format="png" )
plt.close()
#Poisson lognormal
plt.figure()
plt.hist(model1, bins, range = (0,1), facecolor = 'teal', histtype="stepfilled", alpha=.7, label = "Poisson lognormal")
plt.xlabel("Poisson lognormal AICc weights")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/Poisson_lognormal_weights.png"
plt.savefig(fileName, format="png" )
plt.close()
#Negative binomial
plt.figure()
model2 = [ num for (s, num) in neg_bin]
plt.hist(model2, bins, range = (0,1), facecolor = 'gray', histtype="stepfilled", alpha=.7, label = "Negative binomial")
plt.xlabel("Negative binomial AICc weights")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/Negative_binomial_weights.png"
plt.savefig(fileName, format="png" )
plt.close()
#Geometric series
plt.figure()
model3 = [ num for (s, num) in geometric]
plt.hist(model3, bins, range = (0,1), facecolor = 'olivedrab', histtype="stepfilled", alpha=.7, label = "Geometric")
plt.xlabel("Geometric AICc weights")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/Geometric_weights.png"
plt.savefig(fileName, format="png" )
plt.close()
#Zipf distribution
plt.figure()
model4 = [ num for (s, num) in zipf]
plt.hist(model4, bins, range = (0,1), facecolor = 'orange', histtype="stepfilled", alpha=.7, label = "Zipf")
plt.xlabel("Zipf distribution AICc weights")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/Zipf_weights.png"
plt.savefig(fileName, format="png" )
plt.close()
'''Likelihoods graph'''
# Make histogram
# Set up figure
l_likelihood = plt.figure()
# Extract log-likelihoods for each model.
#Logseries
ll_logseries = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name == 'Logseries' AND value_type =='likelihood' AND value IS NOT NUll
ORDER BY value""")
ll_logseries = cur.fetchall()
#Poisson lognormal
ll_pln = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Poisson lognormal' AND value_type =='likelihood' AND value IS NOT NUll
ORDER BY value""")
ll_pln = cur.fetchall()
#Negative binomial
ll_neg_bin = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Negative binomial' AND value_type =='likelihood' AND value IS NOT NUll
ORDER BY value""")
ll_neg_bin = cur.fetchall()
#Geometric series
ll_geometric = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Geometric series' AND value_type =='likelihood' AND value IS NOT NUll
ORDER BY value""")
ll_geometric = cur.fetchall()
#Zipf distribution
ll_zipf = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Zipf distribution' AND value_type =='likelihood' AND value IS NOT NUll
ORDER BY value""")
ll_zipf = cur.fetchall()
# Plot variables for combined likelihoods graph
#Zipf distribution
ll_model5 = [ num for (s, num) in ll_zipf]
plt.hist(ll_model5, bins = range(-750, 0, 10), facecolor = 'orange', histtype="stepfilled", alpha=.7, label = "Zipf distribution")
#Geometric series
ll_model4 = [ num for (s, num) in ll_geometric]
plt.hist(ll_model4, bins = range(-750, 0, 10), facecolor = 'olivedrab', histtype="stepfilled", alpha=.7, label = "Geometric")
#Negative binomial
ll_model3 = [ num for (s, num) in ll_neg_bin]
plt.hist(ll_model3, bins = range(-750, 0, 10), facecolor = 'gray', histtype="stepfilled", alpha=.7, label = "Negative binomial")
#Poisson lognormal
ll_model2 = [ num for (s, num) in ll_pln]
plt.hist(ll_model2, bins = range(-750, 0, 10), facecolor = 'teal', histtype="stepfilled", alpha=.7, label = "Poisson lognormal")
#Logseries
ll_model0 = [ num for (s, num) in ll_logseries ]
plt.hist(ll_model0, bins = range(-750, 0, 10), facecolor = 'magenta', histtype="stepfilled", alpha=.4, label = "Logseries")
plt.legend(loc = 'upper left', fontsize = 11)
plt.xlabel("Log-likelihoods")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/likelihoods.png"
plt.savefig(fileName, format="png" )
plt.close()
''' Plot likelihoods for each model individually'''
#Logseries
plt.figure()
plt.hist(ll_model0, bins = range(-750, 0, 10), facecolor = 'magenta', histtype="stepfilled", alpha=1, label = "Logseries")
plt.xlabel("Logseries log-likelihoods")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/logseries_likelihoods.png"
plt.savefig(fileName, format="png" )
plt.close()
#Poisson lognormal
plt.figure()
plt.hist(ll_model2, bins = range(-750, 0, 10), facecolor = 'teal', histtype="stepfilled", alpha=.7, label = "Poisson lognormal")
plt.xlabel("Poisson lognormal log-likelihoods")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/pln_likelihoods.png"
plt.savefig(fileName, format="png" )
plt.close()
#Negative binomial
plt.figure()
plt.hist(ll_model3, bins = range(-750, 0, 10), facecolor = 'gray', histtype="stepfilled", alpha=.7, label = "Negative binomial")
plt.xlabel("Negative binomial log-likelihoods")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/neg_bin_likelihoods.png"
plt.savefig(fileName, format="png" )
plt.close()
#Geometric
plt.figure()
plt.hist(ll_model4, bins = range(-750, 0, 10), facecolor = 'olivedrab', histtype="stepfilled", alpha=.7, label = "Geometric")
plt.xlabel("Geometric log-likelihoods")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/geometric_likelihoods.png"
plt.savefig(fileName, format="png" )
plt.close()
#Zipf distribution
plt.figure()
plt.hist(ll_model5, bins = range(-750, 0, 10), facecolor = 'orange', histtype="stepfilled", alpha=.7, label = "Zipf distribution")
plt.xlabel("Zipf distribution log-likelihoods")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/Zipf_likelihoods.png"
plt.savefig(fileName, format="png" )
plt.close()
'''Relative likelihoods graph'''
# Make histogram
# Set up figure
relative_likelihood = plt.figure()
# Extract relative likelihoods for each model.
#Logseries
relative_logseries = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name == 'Logseries' AND value_type =='relative likelihood' AND value IS NOT NUll
ORDER BY value""")
relative_logseries = cur.fetchall()
#Poisson lognormal
relative_pln = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Poisson lognormal' AND value_type =='relative likelihood' AND value IS NOT NUll
ORDER BY value""")
relative_pln = cur.fetchall()
#Negative binomial
relative_neg_bin = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Negative binomial' AND value_type =='relative likelihood' AND value IS NOT NUll
ORDER BY value""")
relative_neg_bin = cur.fetchall()
#Geometric series
relative_geometric = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Geometric series' AND value_type =='relative likelihood' AND value IS NOT NUll
ORDER BY value""")
relative_geometric = cur.fetchall()
#Zipf distribution
relative_zipf = cur.execute("""SELECT model_name, value FROM RawResults
WHERE model_name =='Zipf distribution' AND value_type =='relative likelihood' AND value IS NOT NUll
ORDER BY value""")
relative_zipf = cur.fetchall()
# Plot variables for relative likelihoods combined graph
plt.figure()
bins = 50
#Zipf distribution
relative_model5 = [ num for (s, num) in relative_zipf]
plt.hist(relative_model5, bins, range = [0,1], facecolor = 'orange', histtype="stepfilled", alpha=.7, label = "Zipf distribution")
#Geometric series
relative_model4 = [ num for (s, num) in relative_geometric]
plt.hist(relative_model4, bins, range = [0,1], facecolor = 'olivedrab', histtype="stepfilled", alpha=.7, label = "Geometric")
#Negative binomial
relative_model3 = [ num for (s, num) in relative_neg_bin]
plt.hist(relative_model3, bins, range = [0,1], facecolor = 'gray', histtype="stepfilled", alpha=.7, label = "Negative binomial")
#Poisson lognormal
relative_model2 = [ num for (s, num) in relative_pln]
plt.hist(relative_model2, bins, range = [0,1], facecolor = 'teal', histtype="stepfilled", alpha=.7, label = "Poisson lognormal")
#Logseries
relative_model0 = [ num for (s, num) in relative_logseries ]
plt.hist(relative_model0, bins, range = [0,1], facecolor = 'magenta', histtype="stepfilled", alpha=.4, label = "Logseries")
plt.legend(loc = 'upper right', fontsize = 11)
plt.xlabel("Relative likelihoods")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/relative_likelihoods.png"
plt.savefig(fileName, format="png" )
plt.close()
''' Plot relative likelihoods for each model individually'''
#Logseries
plt.figure()
plt.hist(relative_model0, bins, range = [0,1], facecolor = 'magenta', histtype="stepfilled", alpha=1, label = "Logseries")
plt.xlabel("Logseries relative likelihoods")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/logseries_relative.png"
plt.savefig(fileName, format="png" )
plt.close()
#Poisson lognormal
plt.figure()
plt.hist(relative_model2, bins, range = [0,1], facecolor = 'teal', histtype="stepfilled", alpha=.7, label = "Poisson lognormal")
plt.xlabel("Poisson lognormal relative likelihoods")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/pln_relative.png"
plt.savefig(fileName, format="png" )
plt.close()
#Negative binomial
plt.figure()
plt.hist(relative_model3, bins, range = [0,1], facecolor = 'gray', histtype="stepfilled", alpha=.7, label = "Negative binomial")
plt.xlabel("Negative binomial relative likelihoods")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/neg_bin_relative.png"
plt.savefig(fileName, format="png" )
plt.close()
#Geometric
plt.figure()
plt.hist(relative_model4, bins, range = [0,1], facecolor = 'olivedrab', histtype="stepfilled", alpha=.7, label = "Geometric")
plt.xlabel("Geometric relative likelihoods")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/geometric_relative.png"
plt.savefig(fileName, format="png" )
plt.close()
#Zipf
plt.figure()
plt.hist(relative_model5, bins, range = [0,1], facecolor = 'orange', histtype="stepfilled", alpha=.7, label = "Geometric")
plt.xlabel("Zipf distribution")
plt.ylabel("Frequency")
plt.tight_layout()
#Output figure
fileName = "./sad-data/chapter2/zipf_relative.png"
plt.savefig(fileName, format="png" )
plt.close()
# Close connection
con.close()