File tree Expand file tree Collapse file tree 2 files changed +5
-0
lines changed Expand file tree Collapse file tree 2 files changed +5
-0
lines changed Original file line number Diff line number Diff line change @@ -127,6 +127,8 @@ jobs:
127127 args : " --recompute-swiglu --recompute-norm"
128128 - name : " Offload Opt"
129129 args : " --offload-opt-m --offload-opt-v --offload-master"
130+ - name : " Offload Gradient"
131+ args : " --shard-gradients --offload-grads"
130132 # While not strictly a recomputation, chunked attention should be bitwise identical, too
131133 - name : " Chunked attention"
132134 args : " --recompute-att --attn-bwd-chunks=2"
Original file line number Diff line number Diff line change @@ -75,6 +75,9 @@ def _create_options(config: TrainingConfig) -> pyllmq.LLamaOptions:
7575 options .offload_grads = config .offload_grads
7676 options .persistent_quants = config .persistent_quants
7777
78+ options .shard_gradients = config .shard_gradients
79+ options .shard_weights = config .shard_weights
80+
7881 if config .matmul_dtype :
7982 options .matmul_type = config .matmul_dtype
8083 if config .gradient_dtype :
You can’t perform that action at this time.
0 commit comments