diff --git a/configs/training/finetune_c_3b_lora.yaml b/configs/training/finetune_c_3b_lora.yaml index b60c518..f686f73 100755 --- a/configs/training/finetune_c_3b_lora.yaml +++ b/configs/training/finetune_c_3b_lora.yaml @@ -1,26 +1,31 @@ # GLOBAL STUFF experiment_id: stage_c_3b_lora -checkpoint_path: /path/to/checkpoint -output_path: /path/to/output +checkpoint_path: output +output_path: output model_version: 3.6B +dtype: bfloat16 # WandB -wandb_project: StableCascade -wandb_entity: wandb_username +# wandb_project: StableCascade +# wandb_entity: wandb_username # TRAINING PARAMS -lr: 1.0e-4 -batch_size: 32 -image_size: 768 -multi_aspect_ratio: [1/1, 1/2, 1/3, 2/3, 3/4, 1/5, 2/5, 3/5, 4/5, 1/6, 5/6, 9/16] +lr: 5.0e-6 +batch_size: 1 +image_size: 1024 +# multi_aspect_ratio: [1/1, 1/2, 1/3, 2/3, 3/4, 1/5, 2/5, 3/5, 4/5, 1/6, 5/6, 9/16] grad_accum_steps: 4 updates: 10000 backup_every: 1000 save_every: 100 warmup_updates: 1 -# use_fsdp: True -> FSDP doesn't work at the moment for LoRA +# use_fsdp: False -> FSDP doesn't work at the moment for LoRA use_fsdp: False +# OPTIMIZER +# Options: AdamW, AdamW8bit, Adafactor +optimizer_type: Adafactor + # GDF # adaptive_loss_weight: True @@ -36,9 +41,7 @@ train_tokens: # ema_iters: 100 # ema_beta: 0.9 -webdataset_path: - - s3://path/to/your/first/dataset/on/s3 - - s3://path/to/your/second/dataset/on/s3 +webdataset_path: file:input/data.tar effnet_checkpoint_path: models/effnet_encoder.safetensors previewer_checkpoint_path: models/previewer.safetensors -generator_checkpoint_path: models/stage_c_bf16.safetensors \ No newline at end of file +generator_checkpoint_path: models/stage_c_bf16.safetensors diff --git a/models/readme.md b/models/readme.md index 68a4345..dac8d85 100644 --- a/models/readme.md +++ b/models/readme.md @@ -29,6 +29,12 @@ bash download_models.sh essential big-big bfloat16 The last argument is optional as well, and simply determines in which precision you download Stage B & Stage C. If you want a faster download, choose _bfloat16_ (if your machine supports it), otherwise use _float32_. +To check bfloat16 support run: +```python +import torch +torch.cuda.is_bf16_supported() +``` + ### Recommendation If your GPU allows for it, you should definitely go for the **large** Stage C, which has 3.6 billion parameters. It is a lot better and was finetuned a lot more. Also, the ControlNet and Lora examples are only for the large Stage C at the moment. @@ -37,4 +43,4 @@ but if your GPU is not so powerful, just go for the smaller one. ### Remark Unfortunately, you can not run the models in float16 at the moment. Only bfloat16 or float32 work for now. However, -with some investigation, it should be possible to fix the overflowing and allow for inference in float16 as well. \ No newline at end of file +with some investigation, it should be possible to fix the overflowing and allow for inference in float16 as well. diff --git a/readme.md b/readme.md index 8b408f5..2a4e466 100755 --- a/readme.md +++ b/readme.md @@ -1,3 +1,20 @@ +``` +git clone https://github.com/Stability-AI/StableCascade +cd StableCascade +pip install -r requirements.txt +``` + +``` +cd models +bash download_models.sh essential big-big bfloat16 +``` + + +``` +apt-get update && apt-get install ffmpeg libsm6 libxext6 -y +``` + + # Stable Cascade