-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest2.py
50 lines (38 loc) · 1.88 KB
/
test2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
__author__ = 'SmartWombat'
import pandas
import time
from tree_parallel import Tree
import random
from data.data import Data
from util import cross_validate_splits, cross_validate_group
import pickle
airports = ['yssy', 'egll', 'zbaa']
for airport in airports:
df = pandas.read_csv("./web/static/data/" + airport + ".csv")
df['gfs_wind_dir'] = df['gfs_wind_dir'].apply(lambda x: round(x/10) * 10)
class_vars = ['metar_wind_spd', 'metar_press', 'metar_temp']
for class_var in class_vars:
var_types = ['linear', 'linear', 'linear', 'circular', 'linear', 'linear', 'linear', 'linear', 'circular', 'linear', 'time', 'date']
print("{} {}: {}".format(airport, class_var, time.strftime("%c")))
bin_number = 5
cx_val = cross_validate_splits(df, bin_number)
for i in range(bin_number):
train_df, test_df = cross_validate_group(i+1, cx_val)
with open('/Users/monkeybutter/Desktop/' + airport + '_' + class_var + '_bin100_cx' + str(i+1) + 'df' + '.pick', 'w') as f:
pickle.dump(test_df, f)
print("Bin {}: {}".format(i, time.strftime("%c")))
trees = []
for j in range(50):
print("Tree {} of 50: {}".format(j, time.strftime("%c")))
# train tree with 70% of the train_df
rows = random.sample(train_df.index, int(train_df.shape[0]*.7))
tree_df = df.ix[rows]
data = Data(tree_df, class_var, var_types, True)
tree = Tree()
# 100 bin size
node = tree.tree_grower(data, 100)
# Pickle object
print(type(node))
print(node)
with open('/Users/monkeybutter/Desktop/' + airport + '_' + class_var + '_bin100_cx' + str(i+1) + '_rftree' + str(j+1) + '.pick', 'w') as f:
pickle.dump(node, f)