Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/diffusers/imgs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
### Image Credits

The images in this folder are taken from the [Hugging Face Diffusers repository](https://github.com/huggingface/diffusers/tree/main/docs/source/en/imgs) and are subject to the Apache 2.0 license of the Diffusers project.
Binary file added docs/diffusers/imgs/access_request.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/diffusers/imgs/diffusers_library.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 6 additions & 1 deletion examples/diffusers/cogvideox_factory/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
> 我们的开发和验证基于Ascend Atlas 800T A2硬件,相关环境如下:
> | mindspore | ascend driver | firmware | cann toolkit/kernel |
> |:----------:|:--------------:|:-----------:|:------------------:|
> | 2.5 | 24.1.RC2 | 7.5.0.1.129 | 8.0.0.beta1 |
> | 2.6.0 | 24.1.RC2 | 7.3.0.1.231 | 8.1.RC1 |
> | 2.7.0 | 24.1.RC2 | 7.3.0.1.231 | 8.2.RC1 |

<table align="center">
<tr>
Expand Down Expand Up @@ -409,3 +410,7 @@ NODE_RANK="0"
当前训练脚本并不完全支持原仓代码的所有训练参数,详情参见[`args.py`](./scripts/args.py)中的`check_args()`。

其中一个主要的限制来自于CogVideoX模型中的[3D Causual VAE不支持静态图](https://gist.github.com/townwish4git/b6cd0d213b396eaedfb69b3abcd742da),这导致我们**不支持静态图模式下VAE参与训练**,因此在静态图模式下必须提前进行数据预处理以获取VAE-latents/text-encoder-embeddings cache。


### 注意
训练结束后若出现 `Exception ignored: OSError [Errno 9] Bad file descriptor`,仅为 Python 关闭时的提示,不影响训练结果;使用 Python 3.11,该提示不再出现。
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from mindone.diffusers.models.layers_compat import pad
from mindone.diffusers.models.modeling_outputs import AutoencoderKLOutput
from mindone.diffusers.models.modeling_utils import ModelMixin
from mindone.diffusers.models.normalization import GroupNorm
from mindone.diffusers.models.upsampling import CogVideoXUpsample3D
from mindone.diffusers.utils import logging

Expand All @@ -40,7 +39,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name


class GroupNorm_SP(GroupNorm):
class GroupNorm_SP(mint.nn.GroupNorm):
def set_frame_group_size(self, frame_group_size):
self.frame_group_size = frame_group_size

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ AMP_LEVEL=O2
DATA_ROOT="preprocessed-dataset"
CAPTION_COLUMN="prompts.txt"
VIDEO_COLUMN="videos.txt"
MODEL_NAME_OR_PATH="THUDM/CogVideoX1.5-5b"
MODEL_NAME_OR_PATH="THUDM/CogVideoX1.5-5B"
H=768
W=1360
F=77
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ DEEPSPEED_ZERO_STAGE=3
DATA_ROOT="preprocessed-dataset"
CAPTION_COLUMN="prompts.txt"
VIDEO_COLUMN="videos.txt"
MODEL_NAME_OR_PATH="THUDM/CogVideoX1.5-5b"
MODEL_NAME_OR_PATH="THUDM/CogVideoX1.5-5B"
H=768
W=1360
F=77
Expand Down
2 changes: 1 addition & 1 deletion examples/diffusers/cogview/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ cd mindone
pip install -e .
# NOTE: transformers requires >=4.46.0

cd examples/cogview
cd examples/diffusers/cogview
```


Expand Down
146 changes: 146 additions & 0 deletions examples/diffusers/controlnet/test_controlnet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# coding=utf-8
# Copyright 2025 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os
import sys
import tempfile

sys.path.append("..")
from examples.diffusers.test_examples_utils import ExamplesTests, run_command # noqa: E402

ExamplesTests._launch_args = ["python"]

logging.basicConfig(level=logging.DEBUG)

logger = logging.getLogger()
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)


class ControlNet(ExamplesTests):
def test_controlnet_checkpointing_checkpoints_total_limit(self):
with tempfile.TemporaryDirectory() as tmpdir:
test_args = f"""
examples/diffusers/controlnet/train_controlnet.py
--pretrained_model_name_or_path=hf-internal-testing/tiny-stable-diffusion-pipe
--revision refs/pr/4
--dataset_name=hf-internal-testing/fill10
--output_dir={tmpdir}
--resolution=64
--train_batch_size=1
--gradient_accumulation_steps=1
--max_train_steps=6
--checkpoints_total_limit=2
--checkpointing_steps=2
--controlnet_model_name_or_path=hf-internal-testing/tiny-controlnet
""".split()

run_command(self._launch_args + test_args)

self.assertEqual(
{x for x in os.listdir(tmpdir) if "checkpoint" in x},
{"checkpoint-4", "checkpoint-6"},
)

def test_controlnet_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self):
with tempfile.TemporaryDirectory() as tmpdir:
test_args = f"""
examples/diffusers/controlnet/train_controlnet.py
--pretrained_model_name_or_path=hf-internal-testing/tiny-stable-diffusion-pipe
--revision refs/pr/4
--dataset_name=hf-internal-testing/fill10
--output_dir={tmpdir}
--resolution=64
--train_batch_size=1
--gradient_accumulation_steps=1
--controlnet_model_name_or_path=hf-internal-testing/tiny-controlnet
--max_train_steps=6
--checkpointing_steps=2
""".split()

run_command(self._launch_args + test_args)

self.assertEqual(
{x for x in os.listdir(tmpdir) if "checkpoint" in x},
{"checkpoint-2", "checkpoint-4", "checkpoint-6"},
)

resume_run_args = f"""
examples/diffusers/controlnet/train_controlnet.py
--pretrained_model_name_or_path=hf-internal-testing/tiny-stable-diffusion-pipe
--revision refs/pr/4
--dataset_name=hf-internal-testing/fill10
--output_dir={tmpdir}
--resolution=64
--train_batch_size=1
--gradient_accumulation_steps=1
--controlnet_model_name_or_path=hf-internal-testing/tiny-controlnet
--max_train_steps=8
--checkpointing_steps=2
--resume_from_checkpoint=checkpoint-6
--checkpoints_total_limit=2
""".split()

run_command(self._launch_args + resume_run_args)

self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-6", "checkpoint-8"})


class ControlNetSDXL(ExamplesTests):
def test_controlnet_sdxl(self):
with tempfile.TemporaryDirectory() as tmpdir:
test_args = f"""
examples/diffusers/controlnet/train_controlnet_sdxl.py
--pretrained_model_name_or_path=hf-internal-testing/tiny-stable-diffusion-xl-pipe
--revision refs/pr/2
--dataset_name=hf-internal-testing/fill10
--output_dir={tmpdir}
--resolution=64
--train_batch_size=1
--gradient_accumulation_steps=1
--controlnet_model_name_or_path=hf-internal-testing/tiny-controlnet-sdxl
--max_train_steps=4
--checkpointing_steps=2
""".split()

run_command(self._launch_args + test_args)

self.assertTrue(os.path.isfile(os.path.join(tmpdir, "diffusion_pytorch_model.safetensors")))


class ControlNetflux(ExamplesTests):
def test_controlnet_flux(self):
with tempfile.TemporaryDirectory() as tmpdir:
test_args = f"""
examples/diffusers/controlnet/train_controlnet_flux.py
--pretrained_model_name_or_path=hf-internal-testing/tiny-flux-pipe
--output_dir={tmpdir}
--dataset_name=hf-internal-testing/fill10
--conditioning_image_column=conditioning_image
--image_column=image
--caption_column=text
--resolution=64
--train_batch_size=1
--gradient_accumulation_steps=1
--max_train_steps=4
--checkpointing_steps=2
--num_double_layers=1
--num_single_layers=1
""".split()

run_command(self._launch_args + test_args)

self.assertTrue(os.path.isfile(os.path.join(tmpdir, "diffusion_pytorch_model.safetensors")))
7 changes: 3 additions & 4 deletions examples/diffusers/controlnet/train_controlnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,8 +879,8 @@ def __len__(self):
if is_master(args):
logger.info(f"Resuming from checkpoint {path}")
# TODO: load optimizer & grad scaler etc. like accelerator.load_state
input_model_file = os.path.join(args.output_dir, path, "pytorch_model.ckpt")
ms.load_param_into_net(unet, ms.load_checkpoint(input_model_file), strict_load=True)
input_model_file = os.path.join(args.output_dir, path, "unet/diffusion_pytorch_model.safetensors")
ms.load_param_into_net(unet, ms.load_checkpoint(input_model_file, format="safetensors"), strict_load=True)
global_step = int(path.split("-")[1])

initial_global_step = global_step
Expand Down Expand Up @@ -939,8 +939,7 @@ def __len__(self):
save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}")
# TODO: save optimizer & grad scaler etc. like accelerator.save_state
os.makedirs(save_path, exist_ok=True)
output_model_file = os.path.join(save_path, "pytorch_model.ckpt")
ms.save_checkpoint(unet, output_model_file)
unet.save_pretrained(os.path.join(save_path, "unet"))
logger.info(f"Saved state to {save_path}")

if args.validation_prompt is not None and global_step % args.validation_steps == 0:
Expand Down
2 changes: 1 addition & 1 deletion examples/diffusers/controlnet/train_controlnet_flux.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from mindspore.dataset import GeneratorDataset, transforms, vision

from mindone.diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxTransformer2DModel
from mindone.diffusers.models.controlnet_flux import FluxControlNetModel
from mindone.diffusers.models.controlnets.controlnet_flux import FluxControlNetModel
from mindone.diffusers.models.layers_compat import set_amp_strategy
from mindone.diffusers.optimization import get_scheduler
from mindone.diffusers.pipelines.flux.pipeline_flux_controlnet import FluxControlNetPipeline
Expand Down
7 changes: 3 additions & 4 deletions examples/diffusers/controlnet/train_controlnet_sdxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,8 +990,8 @@ def __len__(self):
if is_master(args):
logger.info(f"Resuming from checkpoint {path}")
# TODO: load optimizer & grad scaler etc. like accelerator.load_state
input_model_file = os.path.join(args.output_dir, path, "pytorch_model.ckpt")
ms.load_param_into_net(unet, ms.load_checkpoint(input_model_file), strict_load=True)
input_model_file = os.path.join(args.output_dir, path, "unet/diffusion_pytorch_model.safetensors")
ms.load_param_into_net(unet, ms.load_checkpoint(input_model_file, format="safetensors"), strict_load=True)
global_step = int(path.split("-")[1])

initial_global_step = global_step
Expand Down Expand Up @@ -1050,8 +1050,7 @@ def __len__(self):
save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}")
# TODO: save optimizer & grad scaler etc. like accelerator.save_state
os.makedirs(save_path, exist_ok=True)
output_model_file = os.path.join(save_path, "pytorch_model.ckpt")
ms.save_checkpoint(unet, output_model_file)
unet.save_pretrained(os.path.join(save_path, "unet"))
logger.info(f"Saved state to {save_path}")

if args.validation_prompt is not None and global_step % args.validation_steps == 0:
Expand Down
Loading