Skip to content

Commit

Permalink
Clean up yamls (#58)
Browse files Browse the repository at this point in the history
* Clean up yamls
  • Loading branch information
Landanjs authored Aug 21, 2023
1 parent bacab36 commit ee8d0b2
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 53 deletions.
16 changes: 5 additions & 11 deletions yamls/hydra-yamls/SD-2-base-256.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
project: # Insert wandb project name
batch_size: 2048
seed: 17
scale_schedule_ratio: 1.0
name: # Insert wandb run name
seed: 17
eval_first: false
algorithms:
low_precision_groupnorm:
Expand All @@ -22,15 +20,14 @@ model:
val_guidance_scales: []
loss_bins: []
dataset:
train_batch_size: ${batch_size}
eval_batch_size: 1024 # Should be 8 per device
train_batch_size: 2048 # Global training batch size
eval_batch_size: 1024 # Global evaluation batch size
train_dataset:
_target_: diffusion.datasets.laion.laion.build_streaming_laion_dataloader
remote:
# Path to object store bucket(s)
local:
# Path to corresponding local dataset(s)
batch_size: ${batch_size}
tokenizer_name_or_path: stabilityai/stable-diffusion-2-base
caption_drop_prob: 0.1
resize_size: 256
Expand All @@ -46,7 +43,6 @@ dataset:
_target_: diffusion.datasets.coco.coco_captions.build_streaming_cocoval_dataloader
remote: # Path to object store bucket
local: # Path to local dataset cache
batch_size: 8
resize_size: 256
prefetch_factor: 2
num_workers: 8
Expand All @@ -57,10 +53,9 @@ optimizer:
lr: 1.0e-4
weight_decay: 0.01
scheduler:
_target_: composer.optim.MultiStepWithWarmupScheduler
_target_: composer.optim.LinearWithWarmupScheduler
t_warmup: 10000ba
milestones:
- 200ep
alpha_f: 1.0
logger:
wandb:
_target_: composer.loggers.wandb_logger.WandBLogger
Expand All @@ -87,7 +82,6 @@ trainer:
device_train_microbatch_size: 16
run_name: ${name}
seed: ${seed}
scale_schedule_ratio: ${scale_schedule_ratio}
save_folder: # Insert path to save folder or bucket
save_interval: 10000ba
save_overwrite: true
Expand Down
16 changes: 5 additions & 11 deletions yamls/hydra-yamls/SD-2-base-512.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
project: # Insert wandb project name
batch_size: 2048
seed: 17
scale_schedule_ratio: 1.0
name: # Insert wandb run name
seed: 17
eval_first: false
algorithms:
ema:
Expand All @@ -28,15 +26,14 @@ model:
val_guidance_scales: []
loss_bins: []
dataset:
train_batch_size: ${batch_size}
eval_batch_size: 1024 # Should be 8 per device
train_batch_size: 2048 # Global training batch size
eval_batch_size: 1024 # Global evaluation batch size
train_dataset:
_target_: diffusion.datasets.laion.laion.build_streaming_laion_dataloader
remote:
# Path to object store bucket(s)
local:
# Path to corresponding local dataset(s)
batch_size: ${batch_size}
tokenizer_name_or_path: stabilityai/stable-diffusion-2-base
caption_drop_prob: 0.1
resize_size: 512
Expand All @@ -52,7 +49,6 @@ dataset:
_target_: diffusion.datasets.coco.coco_captions.build_streaming_cocoval_dataloader
remote: # Path to object store bucket
local: # Path to local dataset cache
batch_size: 8
resize_size: 512
prefetch_factor: 2
num_workers: 8
Expand All @@ -63,10 +59,9 @@ optimizer:
lr: 1.0e-4
weight_decay: 0.01
scheduler:
_target_: composer.optim.MultiStepWithWarmupScheduler
_target_: composer.optim.LinearWithWarmupScheduler
t_warmup: 10000ba
milestones:
- 200ep
alpha_f: 1.0
logger:
wandb:
_target_: composer.loggers.wandb_logger.WandBLogger
Expand All @@ -93,7 +88,6 @@ trainer:
device_train_microbatch_size: 16
run_name: ${name}
seed: ${seed}
scale_schedule_ratio: ${scale_schedule_ratio}
save_folder: # Insert path to save folder or bucket
save_interval: 10000ba
save_overwrite: true
Expand Down
34 changes: 18 additions & 16 deletions yamls/mosaic-yamls/SD-2-base-256.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
run_name: SD2-base-256
cluster: # Insert cluster here
gpu_num: # Insert number of GPUs
image: mosaicml/pytorch_vision:1.13.1_cu117-python3.10-ubuntu20.04
compute:
gpus: # Number of GPUs to use

## These configurations are optional
# cluster: TODO # Name of the cluster to use for this run
# gpu_type: a100_80gb # Type of GPU to use. We use a100_80gb in our experiments

integrations:
- integration_type: "git_repo"
git_repo: mosaicml/diffusion2
git_repo: mosaicml/diffusion
git_branch: main
pip_install: .[all]
- integration_type: "wandb"
project: # Insert wandb project name
entity: # Insert wandb entity name
env_variables:
- key: HYDRA_FULL_ERROR
value: '1' # Set to '0' to limit Hydra tracebacks
command: |
cd diffusion2
HYDRA_FULL_ERROR=1 composer run.py
cd diffusion
composer run.py --config-path /mnt/config --config-name parameters
parameters:
project: # Insert wandb project name
batch_size: 2048
seed: 17
scale_schedule_ratio: 1.0
name: # Insert wandb run name
seed: 17
eval_first: false
algorithms:
low_precision_groupnorm:
Expand All @@ -38,15 +44,14 @@ parameters:
val_guidance_scales: []
loss_bins: []
dataset:
train_batch_size: ${batch_size}
eval_batch_size: 1024 # Should be 8 per device
train_batch_size: 2048 # Global training batch size
eval_batch_size: 1024 # Global evaluation batch size
train_dataset:
_target_: diffusion.datasets.laion.laion.build_streaming_laion_dataloader
remote:
# Path to object store bucket(s)
local:
# Path to corresponding local dataset(s)
batch_size: ${batch_size}
tokenizer_name_or_path: stabilityai/stable-diffusion-2-base
caption_drop_prob: 0.1
resize_size: 256
Expand All @@ -62,7 +67,6 @@ parameters:
_target_: diffusion.datasets.coco.coco_captions.build_streaming_cocoval_dataloader
remote: # Path to object store bucket
local: # Path to local dataset cache
batch_size: 8
resize_size: 256
prefetch_factor: 2
num_workers: 8
Expand All @@ -73,10 +77,9 @@ parameters:
lr: 1.0e-4
weight_decay: 0.01
scheduler:
_target_: composer.optim.MultiStepWithWarmupScheduler
_target_: composer.optim.LinearWithWarmupScheduler
t_warmup: 10000ba
milestones:
- 200ep
alpha_f: 1.0
logger:
wandb:
_target_: composer.loggers.wandb_logger.WandBLogger
Expand All @@ -103,7 +106,6 @@ parameters:
device_train_microbatch_size: 16
run_name: ${name}
seed: ${seed}
scale_schedule_ratio: ${scale_schedule_ratio}
save_folder: # Insert path to save folder or bucket
save_interval: 10000ba
save_overwrite: true
Expand Down
27 changes: 12 additions & 15 deletions yamls/mosaic-yamls/SD-2-base-512.yaml
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
name: SD2-base-512
image: mosaicml/pytorch_vision:1.13.1_cu117-python3.10-ubuntu20.04
compute:
gpus: 8 # Number of GPUs to use
gpus: # Number of GPUs to use

## These configurations are optional
# cluster: TODO # Name of the cluster to use for this run
# gpu_type: a100_80gb # Type of GPU to use. We use a100_80gb in our experiments

integrations:
- integration_type: "git_repo"
git_repo: mosaicml/diffusion2
git_repo: mosaicml/diffusion
git_branch: main
pip_install: .[all]
- integration_type: "wandb"
project: # Insert wandb project name
entity: # Insert wandb entity name
env_variables:
- key: HYDRA_FULL_ERROR
value: '1' # Set to '0' to limit Hydra tracebacks
command: |
cd diffusion2
HYDRA_FULL_ERROR=1 composer run.py
cd diffusion
composer run.py --config-path /mnt/config --config-name parameters
parameters:
project: # Insert wandb project name
batch_size: 2048
seed: 17
scale_schedule_ratio: 1.0
name: # Insert wandb run name
seed: 17
eval_first: false
algorithms:
ema:
Expand All @@ -49,15 +50,14 @@ parameters:
val_guidance_scales: []
loss_bins: []
dataset:
train_batch_size: ${batch_size}
eval_batch_size: 1024 # Should be 8 per device
train_batch_size: 2048 # Global training batch size
eval_batch_size: 1024 # Global evaluation batch size
train_dataset:
_target_: diffusion.datasets.laion.laion.build_streaming_laion_dataloader
remote:
# Path to object store bucket(s)
local:
# Path to corresponding local dataset(s)
batch_size: ${batch_size}
tokenizer_name_or_path: stabilityai/stable-diffusion-2-base
caption_drop_prob: 0.1
resize_size: 512
Expand All @@ -73,7 +73,6 @@ parameters:
_target_: diffusion.datasets.coco.coco_captions.build_streaming_cocoval_dataloader
remote: # Path to object store bucket
local: # Path to local dataset cache
batch_size: 8
resize_size: 512
prefetch_factor: 2
num_workers: 8
Expand All @@ -84,10 +83,9 @@ parameters:
lr: 1.0e-4
weight_decay: 0.01
scheduler:
_target_: composer.optim.MultiStepWithWarmupScheduler
_target_: composer.optim.LinearWithWarmupScheduler
t_warmup: 10000ba
milestones:
- 200ep
alpha_f: 1.0
logger:
wandb:
_target_: composer.loggers.wandb_logger.WandBLogger
Expand All @@ -114,7 +112,6 @@ parameters:
device_train_microbatch_size: 16
run_name: ${name}
seed: ${seed}
scale_schedule_ratio: ${scale_schedule_ratio}
save_folder: # Insert path to save folder or bucket
save_interval: 10000ba
save_overwrite: true
Expand Down

0 comments on commit ee8d0b2

Please sign in to comment.