-
Notifications
You must be signed in to change notification settings - Fork 34
/
gp.py
109 lines (95 loc) · 3.56 KB
/
gp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import argparse
import numpy as np
from datetime import datetime
import sklearn.gaussian_process as gp
import matplotlib.pyplot as plot
import parser as gcparser
import os
import os.path
import running
def main(indir, outdir):
"""
Main driver method.
"""
# Generate a list of all the files.
listing = filelisting(indir)
# Parse each file.
timestamps = []
distances = []
splits = []
i = 0
for f in listing:
p = gcparser.GCFileParser(f)
t, d, s = p.parse()
if t is None and d is None and s is None: continue
# Append the data.
timestamps.append(t)
distances.append(d)
splits.append(s)
i += 1
print '.'
# Sort the data.
timestamps = np.array(timestamps)
numRuns = np.size(timestamps)
sortInd = np.argsort(timestamps)
# Loop through the sorted arrays, generating a graph.
X = np.atleast_2d(np.linspace(0, numRuns, numRuns, endpoint = False)).T
y = np.zeros(np.size(timestamps))
dy = np.zeros(np.size(y))
for i in range(0, np.size(sortInd)):
ind = sortInd[i]
d = running.metersToMiles(distances[ind])
s = running.secondsToMinutes(splits[ind])
y[i] = running.averagePace(d, s)
dy[i] = np.std(s)
dy += 0.01
process = gp.GaussianProcess(corr = 'squared_exponential',
nugget = (dy / y) ** 2, theta0 = 1e-1, thetaL = 1e-3,
thetaU = 1, random_start = 100)
process.fit(X, y)
# Set up a prediction.
x = np.atleast_2d(np.linspace(0, numRuns, numRuns * 10)).T
y_pred, MSE = process.predict(x, eval_MSE = True)
sigma = np.sqrt(MSE)
# Plot the prediction and the 95% confidence interval.
plot.plot(X.ravel(), y, c = 'r', marker = '+', ls = 'None', markersize = 10, label = 'Runs')
plot.plot(x, y_pred, 'b-', label = 'Prediction')
plot.fill(np.concatenate([x, x[::-1]]),
np.concatenate([y_pred - 1.96 * sigma,
(y_pred + 1.96 * sigma)[::-1]]),
alpha = 0.5, fc = 'b', ec = 'None', label = '95% confidence')
plot.ylabel('Average Pace (minutes)')
locs, labels = plot.xticks()
locs = locs[np.where(locs < numRuns)]
newlabels = [datetime.fromtimestamp(timestamps[loc]).strftime("%Y/%m/%d") for loc in locs]
plot.xticks(locs, newlabels)
plot.legend(loc = 0)
plot.show()
def filelisting(directory, suffix = 'tcx'):
"""
Generates a list of all the files in the directory.
"""
files = []
for f in os.listdir(directory):
fullpath = os.path.join(directory, f)
if os.path.isfile(fullpath) and f.endswith(suffix):
files.append(fullpath)
return files
if __name__ == "__main__":
print 'Guinea pigs, that is!\n'
print " , , "
print " \ | \ / / / /"
print " / o ,) \\"
print " C / / \\"
print " \_ ( /"
print " mm --- mooo-\n"
parser = argparse.ArgumentParser(description = 'Gaussian Processes on GC',
epilog = 'guinea pig = gp',
add_help = 'How to use',
prog = 'python gp.py -i <input dir> -o <output dir>')
parser.add_argument('-i', '--input', required = True,
help = 'Input directory, contains lots of .tcx files.')
parser.add_argument('-o', '--output', required = False,
default = None, help = 'Output directory.')
args = vars(parser.parse_args())
main(args['input'], args['output'])