forked from Dmmc123/taim-gan
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompute_metrics.py
80 lines (74 loc) · 2.22 KB
/
compute_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""Code to Initiate training of the model."""
import argparse
import torch
import nltk
from torch.utils.data import DataLoader
from src.models.compute_metrics import compute_metrics
from src.data.datasets import TextImageDataset
from src.data.collate import custom_collate
from torchvision import transforms
from src.config import config_dict, update_config
from functools import partial
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")
def main(args):
"""Main function to initiate training."""
cfg_dict = config_dict
# Define dataset and dataloader
transform = transforms.Compose(
[
transforms.Resize(304),
]
)
dataset = TextImageDataset(args.data_dir, args.split, int(args.num_capt), transform)
ix2word = dataset.ix_to_word
vocab_len = len(ix2word)
batch_size = int(args.batch_size)
cfg_dict = update_config(cfg_dict, ix2word = ix2word, vocab_len = vocab_len, batch_size = batch_size)
dataloader = DataLoader(
dataset,
batch_size=args.batch_size,
drop_last = True,
shuffle=True,
num_workers=args.num_workers,
multiprocessing_context = "spawn",
collate_fn=partial(custom_collate, device = cfg_dict["device"]),
)
compute_metrics(dataloader, cfg_dict)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--data_dir",
type=str,
default="data",
help="Directory containing the dataset",
required = True
)
parser.add_argument(
"--split",
type=str,
default="train",
help="Split of dataset to use for training. Can be 'train' or 'test'.",
required = True
)
parser.add_argument(
"--num_capt",
type=int,
default=5,
help="Number of captions per image present in the dataset. 5 for COCO, 10 for bird.",
required = True
)
parser.add_argument(
"--batch_size",
type=int,
default=32,
help="Batch size for training",
)
parser.add_argument(
"--num_workers",
type=int,
default=4,
help="Number of workers for dataloader",
)
args = parser.parse_args()
main(args)