-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_datasets.py
70 lines (49 loc) · 2.65 KB
/
get_datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
__author__ = 'SmartWombat'
import pandas as pd
import time
import datetime
from util import cross_validate_splits, cross_validate_group
import pickle
def transform_time(a_time):
hour = int(a_time[:2])
minute = int(a_time[3:])
return minute + (hour * 60)
def transform_date(a_date):
year = int(a_date[:4])
month = int(a_date[5:7])
day = int(a_date[8:10])
return time.mktime(datetime.datetime(year, month, day, 0, 0).timetuple())
airports = ['yssy', 'egll', 'zbaa']
gfs_vars = ['gfs_press', 'gfs_rh', 'gfs_temp', 'gfs_wind_dir', 'gfs_wind_spd', 'time', 'date']
class_vars = ['metar_press', 'metar_rh', 'metar_temp', 'metar_wind_dir', 'metar_wind_spd']
for class_var in class_vars:
print class_var
for airport in airports:
print airport
df_master = pd.read_csv("./web/static/data/" + airport + ".csv")
df_master['gfs_wind_dir'] = df_master['gfs_wind_dir'].apply(lambda x: round(x/10) * 10)
index = class_vars.index(class_var)
del_metar_vars = class_vars[:]
del del_metar_vars[index]
df_master = df_master.drop(del_metar_vars, 1)
cx_bin_number = 5
cx_val = cross_validate_splits(df_master, cx_bin_number)
for i in range(cx_bin_number):
print("Cross Validate: {}".format(i+1))
train_df, test_df = cross_validate_group(i+1, cx_val)
# Pickle Test Dataframe
with open('/home/roz016/Dropbox/Data for Tree/Results/cx5_lin_vs_cir/' + airport + '_' + class_var + '_cx' + str(i+1) + '_cir_testdf.pick', 'w') as f:
pickle.dump(test_df, f)
# Pickle Train Dataframe
with open('/home/roz016/Dropbox/Data for Tree/Results/cx5_lin_vs_cir/' + airport + '_' + class_var + '_cx' + str(i+1) + '_cir_traindf.pick', 'w') as f:
pickle.dump(train_df, f)
train_df['time'] = train_df.apply(lambda x: transform_time(x['time']), axis=1)
train_df['date'] = train_df.apply(lambda x: transform_date(x['date']), axis=1)
test_df['time'] = test_df.apply(lambda x: transform_time(x['time']), axis=1)
test_df['date'] = test_df.apply(lambda x: transform_date(x['date']), axis=1)
# Pickle Test Dataframe
with open('/home/roz016/Dropbox/Data for Tree/Results/cx5_lin_vs_cir/' + airport + '_' + class_var + '_cx' + str(i+1) + '_lin_testdf.pick', 'w') as f:
pickle.dump(test_df, f)
# Pickle Train Dataframe
with open('/home/roz016/Dropbox/Data for Tree/Results/cx5_lin_vs_cir/' + airport + '_' + class_var + '_cx' + str(i+1) + '_lin_traindf.pick', 'w') as f:
pickle.dump(train_df, f)