fix: add grad spike detection

aphp · Feb 12, 2025 · cf105f2 · cf105f2
1 parent 9c9a5fe
commit cf105f2
Show file tree

Hide file tree

Showing 7 changed files with 201 additions and 56 deletions.
diff --git a/changelog.md b/changelog.md
@@ -9,11 +9,13 @@
 - Added a new unit test suite to validate the tuning script.
 - `docs/tutorials/tuning.md`: New tutorial for hyperparameter tuning.
 - Provided a [detailed tutorial](./docs/tutorials/tuning.md) on hyperparameter tuning, covering usage scenarios and configuration options.
+- Added grad spike detection to the `edsnlp.train` script, and per weight layer gradient logging.
 
 ### Fixed
 
 - Support packaging with poetry 2.0
 - Solve pickling issues with multiprocessing when pytorch is installed
+- Fixed mini-batch accumulation for multi-task training
 
 # v0.15.0 (2024-12-13)
 

diff --git a/docs/tutorials/training.md b/docs/tutorials/training.md
@@ -179,7 +179,7 @@ EDS-NLP supports training models either [from the command line](#from-the-comman
       val_data: ${ val_data }
       max_steps: 2000
       validation_interval: ${ train.max_steps//10 }
-      max_grad_norm: 1.0
+      grad_max_norm: 1.0
       scorer: ${ scorer }
       optimizer: ${ optimizer }
       # Do preprocessing in parallel on 1 worker
@@ -284,7 +284,7 @@ EDS-NLP supports training models either [from the command line](#from-the-comman
         val_data=val_data,
         scorer={"ner": ner_metric},
         optimizer=optimizer,
-        max_grad_norm=1.0,
+        grad_max_norm=1.0,
         output_dir="artifacts",
         # Do preprocessing in parallel on 1 worker
         num_workers=1,

diff --git a/docs/tutorials/tuning.md b/docs/tutorials/tuning.md
@@ -230,7 +230,7 @@ train:
   val_data: ${ val_data }
   max_steps: 400
   validation_interval: ${ train.max_steps//2 }
-  max_grad_norm: 1.0
+  grad_max_norm: 1.0
   scorer: ${ scorer }
   optimizer: ${ optimizer }
   num_workers: 2