Skip to content

Commit d99e066

Browse files
committed
binary search
1 parent 174a600 commit d99e066

File tree

1 file changed

+0
-5
lines changed

1 file changed

+0
-5
lines changed

src/models/llama_model.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -384,23 +384,18 @@ void LLamaModel::backward(Tensor inputs, Tensor targets, NCCLCommunicator& comm,
384384
rmsnorm_backward(rs->DActs[L-1].DResFFN.Value, d_lnf_w, rs->RMSNormScratch, rs->DActs[L - 1].DResFFN.Value, rs->DLNF,
385385
rs->get_res_ffn(L-1, main_stream), Parameters->get_lnf(main_stream), rs->LNF_Rstd,
386386
quant_abs_max_ptr(rs->DActs[L-1].DResFFN), B, T, C, rs->DeviceProp, main_stream);
387-
CUDA_CHECK(cudaDeviceSynchronize());
388387
rs->release_res_ffn(L-1, main_stream);
389388

390389
Parameters->release_lnf(main_stream);
391-
CUDA_CHECK(cudaDeviceSynchronize());
392390
Grads->notify_lnf_w(main_stream, comm);
393-
CUDA_CHECK(cudaDeviceSynchronize());
394391
rs->fetch_res_ffn(L-2, comm.stream());
395-
CUDA_CHECK(cudaDeviceSynchronize());
396392
Parameters->gather_block(L - 1, comm, *rs);
397393
// now backward all the layers
398394
for (int l = L-1; l >= 0; l--) {
399395
NvtxRange layer_range("Layer", l);
400396
auto& dw = Grads->get_block_full(l, main_stream, comm, accumulate);
401397

402398
// prefetch previous layer
403-
CUDA_CHECK(cudaDeviceSynchronize());
404399
if(l > 1) {
405400
rs->fetch_res_ffn(l-2, comm.stream());
406401
}

0 commit comments

Comments
 (0)