-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathcross_val.py
66 lines (54 loc) · 2.01 KB
/
cross_val.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#-*- coding: utf8
from __future__ import division, print_function
from prme import dataio
from prme import learn
import argparse
import numpy as np
import pandas as pd
import os
import time
def main():
parser = argparse.ArgumentParser()
parser.add_argument('trace_fpath', help='The trace to learn topics from', \
type=str)
parser.add_argument('num_topics', help='The number of topics to learn', \
type=int)
parser.add_argument('model_fpath', \
help='The name of the model file (a h5 file)', type=str)
parser.add_argument('--leaveout', \
help='The number of transitions to leave for test', type=float, \
default=0.3)
args = parser.parse_args()
started = time.mktime(time.localtime())
num_lines = 0
with open(args.trace_fpath) as trace_file:
num_lines = sum(1 for _ in trace_file)
if args.leaveout > 0:
leave_out = min(1, args.leaveout)
if leave_out == 1:
print('Leave out is 1 (100%), nothing todo')
return
from_ = 0
to = int(num_lines - num_lines * leave_out)
else:
from_ = 0
to = np.inf
max_cost = float('-inf')
best_model = None
for rate in [0.0001, 0.001, 0.01]:
for reg in [0.00001, 0.0001, 0.001, 0.01]:
for alpha in [0.25, 0.5, 0.75]:
for tau in [0, 60 * 60, 12 * 60 * 60, 24 * 60 * 60]:
rv = learn(args.trace_fpath, args.num_topics, rate, \
reg, alpha, tau, from_, to)
cost_val = rv['cost_val'][0]
if cost_val > max_cost:
max_cost = cost_val
best_model = rv
print(max_cost)
ended = time.mktime(time.localtime())
best_model['training_time'] = np.array([ended - started])
dataio.save_model(args.model_fpath, best_model)
print('Learning took', ended - started, 'seconds')
if __name__ == '__main__':
main()