thu-ml
diff --git a/‎README.md
+4 b/‎README.md
+4
diff --git a/‎acce.yaml
+10 b/‎acce.yaml
+10
diff --git a/‎configs/nf7_v3_SNR_rd_size_stroke_train.yaml
+90 b/‎configs/nf7_v3_SNR_rd_size_stroke_train.yaml
+90
diff --git a/‎configs/stage2-v2-snr_train.yaml
+79 b/‎configs/stage2-v2-snr_train.yaml
+79
diff --git a/‎launch_train.sh
+41 b/‎launch_train.sh
+41
@@ -61,6 +61,10 @@ It will output the preprocessed image, generated 6-view images and CCMs and a 3D
 **Tips:** (1) If the result is unsatisfatory, please check whether the input image is correctly pre-processed into a grey background. Otherwise the results will be unpredictable.
 (2) Different from the [Huggingface Demo](https://huggingface.co/spaces/Zhengyi/CRM), this official implementation uses UV texture instead of vertex color. It has better texture than the online demo but longer generating time owing to the UV texturing.
 
+## train
+We provide training script for multivew generation and their data requirements see `launch_train.sh`.
+
+
 ## Todo List
 - [x] Release inference code.
 - [x] Release pretrained models.
 
@@ -0,0 +1,10 @@
+compute_environment: LOCAL_MACHINE
+deepspeed_config:
+  deepspeed_multinode_launcher: standard
+  offload_optimizer_device: none
+  offload_param_device: none
+  zero3_init_flag: false
+  zero_stage: 2
+distributed_type: DEEPSPEED
+mixed_precision: fp16
+num_processes: 8
@@ -0,0 +1,90 @@
+config:
+# others
+    seed: 1234
+    num_frames: 7
+    mode: pixel
+    offset_noise: true
+# model related
+    models:
+        config: imagedream/configs/sd_v2_base_ipmv_zero_SNR.yaml
+        resume: release_models/sd-v2.1-base-4view-ipmv.pt
+# sampler related
+    sampler:
+        target: libs.sample.ImageDreamDiffusion
+        params:
+            mode: pixel
+            num_frames: 7
+            camera_views: [1, 2, 3, 4, 5, 0, 0]
+            ref_position: 6
+            random_background: false
+            offset_noise: true
+            resize_rate: 1.0
+
+# config datasets
+    train_data:
+        target: libs.data.DataRelativeStroke
+        params:
+            base_dir: train_examples
+            caption_csv: train_examples/caption.csv
+            image_size: 256
+            repeat: 1
+            camera_views: [1, 2, 3, 4, 5, 0, 0]
+            ref_indexs: [0, 1, 3, 4, 5, 2]
+            ref_position: 6
+            split: train
+            num_frames: 7
+            random_background: true
+            resize_rate: 0.95
+            stroke_p: 0.5
+            eval_size: 100
+            resize_range:
+                - 0.5
+                - 1.0
+    eval_data:
+        target: libs.data.DataRelativeStroke
+        params:
+            base_dir: train_examples
+            caption_csv: train_examples/caption.csv
+            image_size: 256
+            repeat: 1
+            camera_views: [1, 2, 3, 4, 5, 0, 0] # camera views are relative views
+            ref_indexs: [0, 1, 3, 4, 5, 2]
+            ref_position: 6
+            split: eval
+            num_frames: 7
+            random_background: true
+            resize_rate: 0.95
+            stroke_p: 0.5
+            eval_size: 100
+            resize_range:
+                - 0.5
+                - 1.0
+    
+    in_the_wild_images:
+        target: libs.data.InTheWildImages
+        params:
+            base_dirs:
+                - examples
+
+#  optimizer related
+    optimizer:
+        lr: 5e-5
+    gradient_accumulation_steps: 12
+
+# wandb related parameters
+    project: CRM
+    wandb_run_name: CRM-pixel
+    wandb_mode: offline
+
+
+# training hyperparmeters
+    batch_size: 16
+    dataloader:
+        num_workers: 10
+        shuffle: true
+        drop_last: true
+    
+    save_interval: 600000
+    log_interval: 5000
+    eval_interval: 300000
+    max_step: 10000000
@@ -0,0 +1,79 @@
+config:
+# others
+    seed: 1234
+    num_frames: 6
+    mode: pixel
+    offset_noise: true
+    gd_type: xyz
+# model related
+    models:
+        config: imagedream/configs/sd_v2_base_ipmv_chin8_zero_snr.yaml
+        resume: release_models/ImageDream/sd-v2.1-base-4view-ipmv.pt
+        resume_unet: null
+
+# eval related
+    sampler:
+        target: libs.sample.ImageDreamDiffusionStage2
+        params:
+            mode: pixel
+            num_frames: 6
+            camera_views: [1, 2, 3, 4, 5, 0]
+            ref_position: null
+            random_background: false
+            offset_noise: true
+            resize_rate: 1.0
+
+# config datasets
+    train_data:
+        target: libs.data.DataHQCRelative
+        params:
+            xyz_base: train_examples
+            base_dir: train_examples
+            caption_csv: train_examples/caption.csv
+            image_size: 256
+            repeat: 1
+            camera_views: [1, 2, 3, 4, 5, 0]
+            ref_indexs: [0, 1, 3, 4]
+            ref_position: null
+            split: train
+            num_frames: 6
+            random_background: true
+            resize_rate: 0.95
+    eval_data:
+        target: libs.data.DataHQCRelative
+        params:
+            xyz_base: train_examples
+            base_dir: train_examples
+            caption_csv: train_examples/caption.csv
+            image_size: 256
+            repeat: 1
+            camera_views: [1, 2, 3, 4, 5, 0] # when pixel mode, last image will be coverd by ref image
+            ref_indexs: [0, 1, 3, 4]
+            ref_position: null
+            split: eval
+            num_frames: 6
+            random_background: true
+            resize_rate: 0.95
+
+#  optimizer related
+    optimizer:
+        lr: 5e-5
+    gradient_accumulation_steps: 12
+
+# wandb related parameters
+    project: CRM
+    wandb_run_name: CRM-xyz
+    wandb_mode: offline
+
+
+# training hyperparmeters
+    batch_size: 16
+    dataloader:
+        num_workers: 10
+        shuffle: true
+        drop_last: true
+    
+    save_interval: 400000
+    log_interval: 5000
+    eval_interval: 50000
+    max_step: 100000000
@@ -0,0 +1,41 @@
+
+
+# set default values for the environment variables
+export OMP_NUM_THREADS=8
+if [ -z "$ADDR" ]
+then
+    export ADDR=127.0.0.1
+fi
+
+if [ -z "$WORLD_SIZE" ]
+then
+    export WORLD_SIZE=1
+fi
+
+if [ -z "$RANK" ]
+then
+    export RANK=0
+fi
+
+if [ -z "$MASTER_PORT" ]
+then
+    export MASTER_PORT=29501
+fi
+
+export WANDB_MODE=offline
+accelerate_args="--config_file acce.yaml --num_machines $WORLD_SIZE \
+                 --machine_rank $RANK --num_processes 1 \
+                 --main_process_port $MASTER_PORT \
+                 --main_process_ip $ADDR"
+echo $accelerate_args
+
+# train stage 1
+accelerate launch $accelerate_args train.py --config configs/nf7_v3_SNR_rd_size_stroke_train.yaml \
+    config.batch_size=1 \
+    config.eval_interval=100
+
+
+# train stage 2
+# accelerate launch $accelerate_args train_stage2.py --config configs/stage2-v2-snr_train.yaml \
+#     config.batch_size=1 \
+#     config.eval_interval=100