From f0b4b757ef5013689b32c4906dc0566d2c79ed4e Mon Sep 17 00:00:00 2001 From: Ethan Reid Date: Tue, 7 Apr 2026 11:11:55 -0700 Subject: [PATCH] change finetune docs default lr to 2e-4 --- docs/finetuning/http-api-reference.mdx | 4 ++-- docs/finetuning/python-sdk.mdx | 4 ++-- docs/finetuning/quickstart.mdx | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/finetuning/http-api-reference.mdx b/docs/finetuning/http-api-reference.mdx index 9d0aa78..1185601 100644 --- a/docs/finetuning/http-api-reference.mdx +++ b/docs/finetuning/http-api-reference.mdx @@ -337,7 +337,7 @@ Apply one training step using RL or SFT. "rewards": [0.8, 0.3, 0.6, 0.5] } ], - "lr": 0.002 + "lr": 2e-4 } ``` @@ -345,7 +345,7 @@ Apply one training step using RL or SFT. |-------|------|----------|-------------| | finetune_id | string | yes | The finetune ID to train | | groups | array | yes | List of training groups | -| lr | number | no | Learning rate (default: 0.002) | +| lr | number | no | Learning rate (default: 2e-4) | **RL group** diff --git a/docs/finetuning/python-sdk.mdx b/docs/finetuning/python-sdk.mdx index 1a79c84..b0451ae 100644 --- a/docs/finetuning/python-sdk.mdx +++ b/docs/finetuning/python-sdk.mdx @@ -148,13 +148,13 @@ Results are in **completion order**, not submission order. Apply one training step. ```python -step = ft.train_step(groups, lr=0.002) +step = ft.train_step(groups, lr=2e-4) ``` | Parameter | Type | Default | Description | |-----------|------|---------|-------------| | groups | list | required | RL and/or SFT group dicts | -| lr | float | 0.002 | Learning rate | +| lr | float | 2e-4 | Learning rate | Returns: diff --git a/docs/finetuning/quickstart.mdx b/docs/finetuning/quickstart.mdx index ec56129..1c9f1f1 100644 --- a/docs/finetuning/quickstart.mdx +++ b/docs/finetuning/quickstart.mdx @@ -79,7 +79,7 @@ for i in range(20): "request": response["request"], "rollouts": response["rollouts"], "rewards": rewards, - }], lr=0.001) + }], lr=2e-4) print(f"step={step['step']} reward={sum(rewards)/len(rewards):.2f}") ``` @@ -115,7 +115,7 @@ for _, (example, response) in zip(range(20), ft.rollout_stream(requests)): "request": response["request"], "rollouts": response["rollouts"], "rewards": rewards, - }], lr=0.001) + }], lr=2e-4) print(f"step={step['step']} reward={sum(rewards)/len(rewards):.2f}") ```