-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
253 lines (214 loc) · 11.7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
import argparse
import logging
import os
from datetime import datetime
from pathlib import Path
import yaml
from libmultilabel.common_utils import Timer, AttributeDict
def add_all_arguments(parser):
# path / directory
parser.add_argument('--data_dir', default='./data/rcv1',
help='The directory to load data (default: %(default)s)')
parser.add_argument('--result_dir', default='./runs',
help='The directory to save checkpoints and logs (default: %(default)s)')
# data
parser.add_argument('--data_name', default='rcv1',
help='Dataset name (default: %(default)s)')
parser.add_argument('--train_path',
help='Path to training data (default: [data_dir]/train.txt)')
parser.add_argument('--val_path',
help='Path to validation data (default: [data_dir]/valid.txt)')
parser.add_argument('--test_path',
help='Path to test data (default: [data_dir]/test.txt)')
parser.add_argument('--val_size', type=float, default=0.2,
help='Training-validation split: a ratio in [0, 1] or an integer for the size of the validation set (default: %(default)s).')
parser.add_argument('--min_vocab_freq', type=int, default=1,
help='The minimum frequency needed to include a token in the vocabulary (default: %(default)s)')
parser.add_argument('--max_seq_length', type=int, default=500,
help='The maximum number of tokens of a sample (default: %(default)s)')
parser.add_argument('--lm_weight', type=str,
help='Pretrained model name or path (default: %(default)s)')
parser.add_argument('--shuffle', type=bool, default=True,
help='Whether to shuffle training data before each epoch (default: %(default)s)')
parser.add_argument('--merge_train_val', action='store_true',
help='Whether to merge the training and validation data. (default: %(default)s)')
parser.add_argument('--include_test_labels', action='store_true',
help='Whether to include labels in the test dataset. (default: %(default)s)')
parser.add_argument('--remove_no_label_data', action='store_true',
help='Whether to remove training and validation instances that have no labels.')
# train
parser.add_argument('--seed', type=int,
help='Random seed (default: %(default)s)')
parser.add_argument('--epochs', type=int, default=10000,
help='The number of epochs to train (default: %(default)s)')
parser.add_argument('--batch_size', type=int, default=16,
help='Size of training batches (default: %(default)s)')
parser.add_argument('--optimizer', default='adam', choices=['adam', 'adamw', 'adamax', 'sgd'],
help='Optimizer (default: %(default)s)')
parser.add_argument('--learning_rate', type=float, default=0.0001,
help='Learning rate for optimizer (default: %(default)s)')
parser.add_argument('--weight_decay', type=float, default=0,
help='Weight decay factor (default: %(default)s)')
parser.add_argument('--momentum', type=float, default=0.9,
help='Momentum factor for SGD only (default: %(default)s)')
parser.add_argument('--patience', type=int, default=5,
help='The number of epochs to wait for improvement before early stopping (default: %(default)s)')
parser.add_argument('--normalize_embed', action='store_true',
help='Whether the embeddings of each word is normalized to a unit vector (default: %(default)s)')
# model
parser.add_argument('--model_name', default='KimCNN',
help='Model to be used (default: %(default)s)')
parser.add_argument('--init_weight', default='kaiming_uniform',
help='Weight initialization to be used (default: %(default)s)')
# eval
parser.add_argument('--eval_batch_size', type=int, default=256,
help='Size of evaluating batches (default: %(default)s)')
parser.add_argument('--metric_threshold', type=float, default=0.5,
help='Thresholds to monitor for metrics (default: %(default)s)')
parser.add_argument('--monitor_metrics', nargs='+', default=['P@1', 'P@3', 'P@5'],
help='Metrics to monitor while validating (default: %(default)s)')
parser.add_argument('--val_metric', default='P@1',
help='The metric to monitor for early stopping (default: %(default)s)')
# pretrained vocab / embeddings
parser.add_argument('--vocab_file', type=str,
help='Path to a file holding vocabuaries (default: %(default)s)')
parser.add_argument('--embed_file', type=str,
help='Path to a file holding pre-trained embeddings (default: %(default)s)')
parser.add_argument('--label_file', type=str,
help='Path to a file holding all labels (default: %(default)s)')
# log
parser.add_argument('--save_k_predictions', type=int, nargs='?', const=100, default=0,
help='Save top k predictions on test set. k=%(const)s if not specified. (default: %(default)s)')
parser.add_argument('--predict_out_path',
help='Path to the an output file holding top k label results (default: %(default)s)')
# auto-test
parser.add_argument('--limit_train_batches', type=float, default=1.0,
help='Percentage of train dataset to use for auto-testing (default: %(default)s)')
parser.add_argument('--limit_val_batches', type=float, default=1.0,
help='Percentage of validation dataset to use for auto-testing (default: %(default)s)')
parser.add_argument('--limit_test_batches', type=float, default=1.0,
help='Percentage of test dataset to use for auto-testing (default: %(default)s)')
# others
parser.add_argument('--cpu', action='store_true',
help='Disable CUDA')
parser.add_argument('--silent', action='store_true',
help='Enable silent mode')
parser.add_argument('--data_workers', type=int, default=4,
help='Use multi-cpu core for data pre-processing (default: %(default)s)')
parser.add_argument('--embed_cache_dir', type=str,
help='For parameter search only: path to a directory for storing embeddings for multiple runs. (default: %(default)s)')
parser.add_argument('--eval', action='store_true',
help='Only run evaluation on the test set (default: %(default)s)')
parser.add_argument('--checkpoint_path',
help='The checkpoint to warm-up with (default: %(default)s)')
# linear options
parser.add_argument('--linear', action='store_true',
help='Train linear model')
parser.add_argument('--data_format', type=str, default='txt',
help='\'svm\' for SVM format or \'txt\' for LibMultiLabel format (default: %(default)s)')
parser.add_argument('--liblinear_options', type=str,
help='Options passed to liblinear (default: %(default)s)')
parser.add_argument('--linear_technique', type=str, default='1vsrest',
choices=['1vsrest', 'thresholding', 'cost_sensitive', 'cost_sensitive_micro'],
help='Technique for linear classification (default: %(default)s)')
parser.add_argument('-h', '--help', action='help',
help="If you are trying to specify network config such as dropout or activation, use a yaml file instead. "
"See example configs in example_config")
# LexGLUE
parser.add_argument('--zero', action='store_true')
parser.add_argument('--multi_class', action='store_true')
parser.add_argument('--add_special_tokens', action='store_true')
parser.add_argument('--enable_ce_loss', action='store_true')
parser.add_argument('--hierarchical', action='store_true')
parser.add_argument('--accumulate_grad_batches', type=int, default=1)
parser.add_argument('--enable_transformer_trainer', action='store_true')
def get_config():
parser = argparse.ArgumentParser(
add_help=False,
description='multi-label learning for text classification')
# load params from config file
parser.add_argument('-c', '--config', help='Path to configuration file')
args, _ = parser.parse_known_args()
config = {}
if args.config:
with open(args.config) as fp:
config = yaml.load(fp, Loader=yaml.SafeLoader)
add_all_arguments(parser)
parser.set_defaults(**config)
args = parser.parse_args()
config = AttributeDict(vars(args))
config.run_name = '{}_{}_{}'.format(
config.data_name,
Path(config.config).stem if config.config else config.model_name,
datetime.now().strftime('%Y%m%d%H%M%S'),
)
config.checkpoint_dir = os.path.join(config.result_dir, config.run_name)
config.log_path = os.path.join(config.checkpoint_dir, 'logs.json')
config.predict_out_path = config.predict_out_path or os.path.join(
config.checkpoint_dir, 'predictions.txt')
config.train_path = config.train_path or os.path.join(
config.data_dir, 'train.txt')
config.val_path = config.val_path or os.path.join(
config.data_dir, 'valid.txt')
config.test_path = config.test_path or os.path.join(
config.data_dir, 'test.txt')
return config
def check_config(config):
"""Check if the configuration has invalid arguments.
Args:
config (AttributeDict): Config of the experiment from `get_args`.
"""
if config.model_name == 'XMLCNN' and config.seed is not None:
raise ValueError("nn.AdaptiveMaxPool1d doesn't have a deterministic implementation but seed is"
"specified. Please do not specify seed.")
if config.eval and not os.path.exists(config.test_path):
raise ValueError('--eval is specified but there is no test data set')
def main():
# Get config
config = get_config()
check_config(config)
# Set up logger
log_level = logging.WARNING if config.silent else logging.INFO
logging.basicConfig(
level=log_level, format='%(asctime)s %(levelname)s:%(message)s')
logging.info(f'Run name: {config.run_name}')
if config.linear:
from linear_trainer import linear_run
linear_run(config)
else:
from torch_trainer import TorchTrainer
trainer = TorchTrainer(config) # initialize trainer
# train
if not config.eval:
trainer.train()
# test
if 'test' in trainer.datasets:
trainer.test()
return config
def dump_time_to_log(log_path, time):
"""Write time to log.
Args:
log_path(str): path to log path
time (str): time in seconds
"""
assert os.path.isfile(log_path)
with open(log_path) as fp:
import json
result = json.load(fp)
if time >= 60 * 60:
h, s = divmod(time, 60 * 60)
formatted_time = f'{h}h {s/60:.0f}m'
elif time >= 60:
m, s = divmod(time, 60)
formatted_time = f'{m}m {s}s'
else:
formatted_time = f'{time}s'
result['time'] = formatted_time
with open(log_path, 'w') as fp:
json.dump(result, fp)
print(f'Wall time: {formatted_time}')
if __name__ == '__main__':
wall_time = Timer()
config = main()
dump_time_to_log(config.log_path, round(wall_time.time()))
# print(f'Wall time: {wall_time.time():.2f} (s)')