-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathbar_l.yaml
More file actions
78 lines (73 loc) · 1.71 KB
/
bar_l.yaml
File metadata and controls
78 lines (73 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
experiment:
project: bar
name: bar_l
output_dir: bar_l
max_train_examples: 1281167
save_every: 50000
eval_every: 5000000
generate_every: 5000
log_every: 50
log_grad_norm_every: 1000
resume: true
tokenizer_checkpoint: assets/tokenizer/bar_fsq_16bits_ft.bin
model:
vq_model:
codebook_size: 65536
token_size: 16
vit_dec_patch_size: 16
generator:
hidden_size: 1280
num_hidden_layers: 32
num_attention_heads: 16
class_label_dropout: 0.1
image_seq_len: 256
condition_num_classes: 1000
target_codebook_size: 2
use_checkpoint: false
randomness_anneal_start: 125000
randomness_anneal_end: 187500
repeat_class_condition: 32
dropout: 0.2
attn_drop: 0.2
guidance_scale: 5.0
mbm_head:
width: 2048
num_layers: 3
randomize_temperature: 1.0
tokens_allocation: [4, 4, 4, 4]
dataset:
params:
pretokenization: /path/to/pretokenized_npz
train_shards_path_or_url: /path/to/imagenet-train-{000000..000320}.tar
eval_shards_path_or_url: /path/to/imagenet-val-{000000..000049}.tar
num_workers_per_gpu: 12
preprocessing:
resize_shorter_edge: 256
crop_size: 256
random_crop: false
random_flip: true
optimizer:
name: adamw
params:
learning_rate: 0.0004
beta1: 0.9
beta2: 0.96
weight_decay: 0.03
lr_scheduler:
scheduler: cosine
params:
learning_rate: ${optimizer.params.learning_rate}
warmup_steps: 62500
end_lr: 1.0e-05
training:
compile_model: true
num_generated_images: 10
gradient_accumulation_steps: 1
per_gpu_batch_size: 64
mixed_precision: bf16
enable_tf32: true
enable_wandb: true
use_ema: false
seed: 42
max_train_steps: 250000
max_grad_norm: 1.0