We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 1532531 + 4638513 commit a5c23c9Copy full SHA for a5c23c9
src/instructlab/training/main_ds.py
@@ -505,6 +505,8 @@ def train(
505
is_lora=bool(args.lora_r),
506
hf_format=True,
507
)
508
+ base_logger.debug("RANK (%d) waiting at post-save barrier.", local_rank)
509
+ torch.distributed.barrier()
510
511
# if (
512
# args.save_samples_ds is not None
@@ -533,6 +535,8 @@ def train(
533
535
534
536
epoch=epoch,
537
538
539
540
541
if args.save_last:
542
save_hf_format_accelerate(
0 commit comments