From 636ae06891d5baf7ffcd70434f9c30e730ec1d3a Mon Sep 17 00:00:00 2001
From: gcw_61YBRfIt <chuzhenxing@huawei.com>
Date: Wed, 5 Nov 2025 10:56:10 +0800
Subject: [PATCH 1/8] Added the README and script files for training sql_agent
 on NPU

---
 docs/how-to/ascend_npu.md              |  96 ++++++++++++
 examples/spider/train_sql_agent_npu.py | 200 +++++++++++++++++++++++++
 2 files changed, 296 insertions(+)
 create mode 100644 docs/how-to/ascend_npu.md
 create mode 100644 examples/spider/train_sql_agent_npu.py

diff --git a/docs/how-to/ascend_npu.md b/docs/how-to/ascend_npu.md
new file mode 100644
index 00000000..7f368859
--- /dev/null
+++ b/docs/how-to/ascend_npu.md
@@ -0,0 +1,96 @@
+# agent-lightning x Ascend
+
+We have added support for **Huawei Ascend NPUs** in **agent-lightning**, and provided an example of training a SQL agent based on the **Spider dataset**.
+
+## Hardware Support
+
+- Atlas 200T A2 Box16
+- Atlas 900 A2 PODc
+- Atlas 800T A3
+
+At least **a single 40GB NPU** is required to run the Qwen2.5-Coder-1.5B-Instruct model.
+
+## Environment Setup
+
+### Basic Environment
+
+- Python: 3.11.13
+- CANN: 8.2.RC1
+- torch: 2.7.1+cpu
+- torch_npu: 2.7.1.dev20250724
+
+> For basic environment preparation, please refer to this [document](https://gitcode.com/Ascend/pytorch).
+
+### Configure Mirror Sources
+
+Before installing dependencies, it is recommended to configure pip mirrors:
+
+```
+pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple
+pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi"
+
+# Mirrors:
+# http://repo.huaweicloud.com/repository/pypi/simple
+# https://download.pytorch.org/whl/cpu/
+# https://mirrors.huaweicloud.com/ascend/repos/pypi
+```
+
+### Install vLLM & vLLM-Ascend
+
+```
+pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com
+pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com
+```
+
+### Install VERL
+
+```
+pip install verl==0.5.0
+```
+
+> ⚠️ Note: To ensure the VERL framework runs correctly on NPU, add the following two lines to the file `verl/utils/vllm_utils.py`:
+
+```
+from vllm_ascend.patch import platform
+from vllm_ascend.patch import worker
+```
+
+### Install agent-lightning
+
+```
+pip install agentlightning==0.2.1
+```
+
+### Install Other Dependencies
+
+```
+pip install autogen-agentchat autogen-ext mcp
+pip install langgraph "langchain[openai]" langchain-community langchain-text-splitters
+pip install sqlparse nltk
+```
+
+## Model
+
+We use the [Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B) model to train the SQL agent. Running requires at least **one 40GB NPU**.
+
+## Dataset
+
+We use the Spider 1.0 dataset, which contains about 8,000 samples, including natural language questions, database schemas, and corresponding standard SQL queries.
+
+Training requires the following three Parquet files:
+
+- `train_spider.parquet`
+- `test_dev_500.parquet`
+- `test_dev.parquet`
+
+## Training Workflow
+
+1. **Prepare the dataset**: Convert the Spider dataset into Parquet format and place it in the `data/` directory.
+
+2. **Configure the environment**: Ensure vLLM-Ascend, VERL, and agent-lightning are correctly installed.
+
+3. **Start training**: Run the following command to begin training the SQL agent:
+
+   ```
+   python train_sql_agent_npu.py qwen
+   ```
diff --git a/examples/spider/train_sql_agent_npu.py b/examples/spider/train_sql_agent_npu.py
new file mode 100644
index 00000000..144d02c1
--- /dev/null
+++ b/examples/spider/train_sql_agent_npu.py
@@ -0,0 +1,200 @@
+"""Train an SQL agent on the Spider dataset using Agent-lightning.
+
+This module provides a training script for SQL agents using different model configurations.
+The script supports three different training configurations:
+
+1. 'fast' - A lightweight configuration optimized for CI testing with reduced epochs
+2. 'qwen' - Standard configuration using Qwen-2.5-Coder-1.5B-Instruct model
+3. 'llama' - Configuration using LLaMA-3.2-1B-Instruct model with JSON formatting
+
+Usage:
+    python train_sql_agent.py fast    # Fast training for CI/testing
+    python train_sql_agent.py qwen    # Standard Qwen model training
+    python train_sql_agent.py llama   # LLaMA model training
+
+The script uses reinforcement learning with VERL framework
+to train agents on the Spider dataset for text-to-SQL generation tasks.
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+from copy import deepcopy
+from datetime import datetime
+from typing import Any, Dict, Optional
+
+import pandas as pd
+from sql_agent import LitSQLAgent
+
+import agentlightning as agl
+
+os.environ["VLLM_USE_V1"] = "1"
+
+RL_TRAINING_CONFIG: Dict[str, Any] = {
+    "agentlightning": {
+        "port": 9997
+    },
+    "algorithm": {
+        "adv_estimator": "grpo",
+        "use_kl_in_reward": False,
+    },
+    "data": {
+        "train_files": "data/train_spider.parquet",
+        "val_files": "data/test_dev_500.parquet",
+        "train_batch_size": 32,
+        "max_prompt_length": 4096,
+        "max_response_length": 2048,
+        "truncation": "error",
+    },
+    "actor_rollout_ref": {
+        "rollout": {
+            "tensor_model_parallel_size": 1,
+            "n": 4,
+            "log_prob_micro_batch_size_per_gpu": 4,
+            "multi_turn": {"format": "hermes"},
+            "name": "vllm",
+            "gpu_memory_utilization": 0.8,
+
+        },
+        "actor": {
+            "ppo_mini_batch_size": 32,
+            "ppo_micro_batch_size_per_gpu": 4,
+            "optim": {"lr": 1e-6},
+            "use_torch_compile": False,
+            "use_kl_loss": False,
+            "kl_loss_coef": 0.0,
+            "entropy_coeff": 0,
+            "clip_ratio_low": 0.2,
+            "clip_ratio_high": 0.3,
+            "fsdp_config": {
+                "param_offload": True,
+                "optimizer_offload": True,
+            },
+        },
+        "ref": {
+            "log_prob_micro_batch_size_per_gpu": 8,
+            "fsdp_config": {"param_offload": True},
+        },
+        "model": {
+            "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
+            "use_remove_padding": True,
+            "enable_gradient_checkpointing": True,
+        },
+    },
+    "trainer": {
+        "n_gpus_per_node": 1,
+        "val_before_train": False,
+        "critic_warmup": 0,
+        "logger": ["console"],
+        "project_name": "AgentLightning",
+        "experiment_name": "spider",
+        "nnodes": 1,
+        "save_freq": 256,
+        "test_freq": 32,
+        "total_epochs": 2,
+        "device": "npu"
+    },
+}
+
+
+def config_train_fast() -> Dict[str, Any]:
+    """A fast training run for CI testing purposes."""
+
+    # `EXPERIMENT_NAME="spider_$(date +%Y%m%d%H%M%S)"`
+    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+    EXPERIMENT_NAME = f"spider_{timestamp}"
+
+    # `PROJECT_NAME=AgentLightningCI`
+    PROJECT_NAME = "AgentLightningCI"
+
+    # Simulate writing to $GITHUB_OUTPUT if it’s set
+    github_output = os.getenv("GITHUB_OUTPUT")
+    if github_output:
+        with open(github_output, "a") as f:
+            f.write(f"project_name={PROJECT_NAME}\n")
+            f.write(f"run_name={EXPERIMENT_NAME}\n")
+
+    print("Set environment variables:")
+    print(f"PROJECT_NAME={PROJECT_NAME}")
+    print(f"EXPERIMENT_NAME={EXPERIMENT_NAME}")
+
+    config = deepcopy(RL_TRAINING_CONFIG)
+    config["actor_rollout_ref"]["rollout"]["gpu_memory_utilization"] = 0.6
+    config["actor_rollout_ref"]["model"]["path"] = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
+    config["data"]["val_files"] = "data/test_dev.parquet"
+    config["trainer"]["total_epochs"] = 1
+    config["trainer"]["total_training_steps"] = 1
+    config["trainer"]["experiment_name"] = EXPERIMENT_NAME
+    config["trainer"]["project_name"] = PROJECT_NAME
+    config["trainer"]["test_freq"] = 1
+    return config
+
+
+def config_train_qwen() -> Dict[str, Any]:
+    """A configuration for training with Qwen-2.5B."""
+
+    config = deepcopy(RL_TRAINING_CONFIG)
+    return config
+
+
+def config_train_llama() -> Dict[str, Any]:
+    """A configuration for training with LLaMA-3.2-1B-Instruct.
+
+    You will need a `HF_TOKEN` set to run with this config.
+    """
+
+    config = deepcopy(RL_TRAINING_CONFIG)
+    config["actor_rollout_ref"]["rollout"]["multi_turn"]["format"] = "llama3_json"
+    config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["tool_call_parser"] = "llama3_json"
+    config["actor_rollout_ref"]["model"]["path"] = "meta-llama/Llama-3.2-1B-Instruct"
+    return config
+
+
+def train(config: Dict[str, Any], active_agent: Optional[str]) -> None:
+    """Train the SQL agent with the given configuration."""
+
+    agent = LitSQLAgent()
+    algorithm = agl.VERL(config)
+    trainer = agl.Trainer(n_runners=10, algorithm=algorithm, adapter={"agent_match": active_agent})
+    print("Adapter agent match acknowledged:", trainer.adapter.agent_match)  # type: ignore
+
+    train_data = pd.read_parquet(config["data"]["train_files"]).to_dict(orient="records")  # type: ignore
+    val_data = pd.read_parquet(config["data"]["val_files"]).to_dict(orient="records")  # type: ignore
+    trainer.fit(agent, train_dataset=train_data, val_dataset=val_data)  # type: ignore
+
+
+def main() -> None:
+    """Main function to parse arguments and run training."""
+    parser = argparse.ArgumentParser(
+        description="Train an SQL agent on the Spider dataset using different model configurations"
+    )
+
+    parser.add_argument(
+        "config",
+        choices=["fast", "qwen", "llama"],
+        help="Training configuration: 'fast' (CI testing), 'qwen' (Qwen-2.5-Coder-1.5B), 'llama' (LLaMA-3.2-3B)",
+    )
+
+    parser.add_argument(
+        "--active-agent", type=str, help="Override the active agent name (default: auto-generated based on config)"
+    )
+
+    args = parser.parse_args()
+
+    # Get the appropriate configuration
+    config_functions = {"fast": config_train_fast, "qwen": config_train_qwen, "llama": config_train_llama}
+
+    config = config_functions[args.config]()
+
+    # Set active agent - use provided value or default based on config choice
+    active_agent = args.active_agent
+
+    print(f"Starting training with '{args.config}' configuration...")
+    print(f"Active agent: {active_agent}")
+
+    train(config, active_agent)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 493621adcc44d52878df134774f7d71468b73726 Mon Sep 17 00:00:00 2001
From: gcw_61YBRfIt <chuzhenxing@huawei.com>
Date: Thu, 6 Nov 2025 20:26:30 +0800
Subject: [PATCH 2/8] fix: update according to review comments

---
 docs/how-to/train-sql-agent.md         |  86 +++++++++++
 examples/spider/README.md              |   1 +
 examples/spider/train_sql_agent.py     |  29 ++--
 examples/spider/train_sql_agent_npu.py | 200 -------------------------
 4 files changed, 107 insertions(+), 209 deletions(-)
 delete mode 100644 examples/spider/train_sql_agent_npu.py

diff --git a/docs/how-to/train-sql-agent.md b/docs/how-to/train-sql-agent.md
index 682e441c..e869cfa4 100644
--- a/docs/how-to/train-sql-agent.md
+++ b/docs/how-to/train-sql-agent.md
@@ -320,6 +320,92 @@ For the LLaMA profile, export an `HF_TOKEN` before running so VERL can download
     ```bash
     env RAY_DEBUG=legacy HYDRA_FULL_ERROR=1 VLLM_USE_V1=1 ray start --head --dashboard-host=0.0.0.0
     ```
+### Launch Training with NPUS
+
+We have added support for **Huawei Ascend NPUs** in **agent-lightning**, and create a  function `config_train_npu` in the script .
+
+#### Hardware Support
+
+- **Atlas 200T A2 Box16**
+- **Atlas 900 A2 PODc**
+- **Atlas 800T A3**
+
+At least **a single 40GB NPU** is required to run the **Qwen2.5-Coder-1.5B-Instruct** model.
+
+#### Environment Setup
+
+##### Basic Environment
+
+- **Python:** 3.11.13
+- **CANN:** 8.2.RC1
+- **torch:** 2.7.1+cpu
+- **torch_npu:** 2.7.1.dev20250724
+
+> For basic environment preparation, please refer to this [document](https://gitcode.com/Ascend/pytorch).
+
+##### Configure Mirror Sources
+
+Before installing dependencies, configure the following pip mirrors:
+
+```
+pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple
+pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi"
+```
+
+##### Install vLLM & vLLM-Ascend
+
+```
+pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com
+pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com
+```
+
+##### Install VERL
+
+```
+pip install verl==0.5.0
+```
+
+> ⚠️ **Note:** To ensure the VERL framework runs correctly on NPU, add the following lines to
+>  `verl/utils/vllm_utils.py`:
+
+```
+from vllm_ascend.patch import platform
+from vllm_ascend.patch import worker
+```
+
+##### Install Agent-Lightning
+
+```
+pip install agentlightning==0.2.1
+```
+
+##### Install Other Dependencies
+
+```
+pip install autogen-agentchat autogen-ext mcp
+pip install langgraph "langchain[openai]" langchain-community langchain-text-splitters
+pip install sqlparse nltk
+```
+
+#### Model
+
+We use the [**Qwen2.5-Coder-1.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B) model to train the SQL agent.
+
+#### Dataset
+
+Refer to the method above for obtaining the dataset.
+
+#### Training Workflow
+
+1. **Prepare the dataset**: Convert the Spider dataset into Parquet format and place it in the `data/` directory.
+
+2. **Configure the environment**: Ensure vLLM-Ascend, VERL, and agent-lightning are correctly installed.
+
+3. **Start training**: Run the following command to begin training the SQL agent:
+
+```
+python train_sql_agent_npu.py npu
+```
 
 ### Debugging the Agent without VERL
 
diff --git a/examples/spider/README.md b/examples/spider/README.md
index 511d0c5b..5bc7a4a2 100644
--- a/examples/spider/README.md
+++ b/examples/spider/README.md
@@ -36,6 +36,7 @@ Train a SQL agent using the Qwen2.5-Coder-1.5B-Instruct model with the following
 ```bash
 python train_sql_agent.py qwen
 ```
+If you want to use an NPU for training, please refer to the **Launch Training with NPUS** section in [How to Train a SQL Agent](../../docs/how-to/train-sql-agent.md).
 
 ### Debugging
 
diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py
index ea799d25..84d7b206 100644
--- a/examples/spider/train_sql_agent.py
+++ b/examples/spider/train_sql_agent.py
@@ -78,7 +78,7 @@
             "fsdp_config": {"param_offload": True},
         },
         "model": {
-            "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
+            "path": "/data/c00940018/Qwen2.5-Coder-1.5B-Instruct",
             "use_remove_padding": True,
             "enable_gradient_checkpointing": True,
         },
@@ -132,11 +132,24 @@ def config_train_fast() -> Dict[str, Any]:
 
 def config_train_qwen() -> Dict[str, Any]:
     """A configuration for training with Qwen-2.5B."""
-
     config = deepcopy(RL_TRAINING_CONFIG)
+    
     return config
 
 
+def config_train_npu() -> Dict[str, Any]:
+    """A configuration for training with NPU."""
+    
+    config = deepcopy(RL_TRAINING_CONFIG)
+    del config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["enable_auto_tool_choice"]
+    del config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["tool_call_parser"]
+    del config["trainer"]["logger"][1]
+    config["actor_rollout_ref"]["actor"]["use_torch_compile"] = False
+    config["trainer"]["val_before_train"] = False
+    config["trainer"]["save_freq"] = 256
+    config["trainer"]["device"] = "npu"
+    return config
+
 def config_train_llama() -> Dict[str, Any]:
     """A configuration for training with LLaMA-3.2-1B-Instruct.
 
@@ -171,21 +184,19 @@ def main() -> None:
 
     parser.add_argument(
         "config",
-        choices=["fast", "qwen", "llama"],
-        help="Training configuration: 'fast' (CI testing), 'qwen' (Qwen-2.5-Coder-1.5B), 'llama' (LLaMA-3.2-3B)",
+        choices=["fast","qwen","llama", "npu"],
+        help="Training configuration: 'fast' (CI testing), 'qwen' (Qwen-2.5-Coder-1.5B), 'llama' (LLaMA-3.2-3B),'npu' (Train with NPU)",
     )
 
     parser.add_argument(
         "--active-agent", type=str, help="Override the active agent name (default: auto-generated based on config)"
     )
-
+    
     args = parser.parse_args()
 
     # Get the appropriate configuration
-    config_functions = {"fast": config_train_fast, "qwen": config_train_qwen, "llama": config_train_llama}
-
+    config_functions = {"fast": config_train_fast,"qwen": config_train_qwen,"llama": config_train_llama,"npu": config_train_npu}
     config = config_functions[args.config]()
-
     # Set active agent - use provided value or default based on config choice
     active_agent = args.active_agent
 
@@ -196,4 +207,4 @@ def main() -> None:
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/examples/spider/train_sql_agent_npu.py b/examples/spider/train_sql_agent_npu.py
deleted file mode 100644
index 144d02c1..00000000
--- a/examples/spider/train_sql_agent_npu.py
+++ /dev/null
@@ -1,200 +0,0 @@
-"""Train an SQL agent on the Spider dataset using Agent-lightning.
-
-This module provides a training script for SQL agents using different model configurations.
-The script supports three different training configurations:
-
-1. 'fast' - A lightweight configuration optimized for CI testing with reduced epochs
-2. 'qwen' - Standard configuration using Qwen-2.5-Coder-1.5B-Instruct model
-3. 'llama' - Configuration using LLaMA-3.2-1B-Instruct model with JSON formatting
-
-Usage:
-    python train_sql_agent.py fast    # Fast training for CI/testing
-    python train_sql_agent.py qwen    # Standard Qwen model training
-    python train_sql_agent.py llama   # LLaMA model training
-
-The script uses reinforcement learning with VERL framework
-to train agents on the Spider dataset for text-to-SQL generation tasks.
-"""
-
-from __future__ import annotations
-
-import argparse
-import os
-from copy import deepcopy
-from datetime import datetime
-from typing import Any, Dict, Optional
-
-import pandas as pd
-from sql_agent import LitSQLAgent
-
-import agentlightning as agl
-
-os.environ["VLLM_USE_V1"] = "1"
-
-RL_TRAINING_CONFIG: Dict[str, Any] = {
-    "agentlightning": {
-        "port": 9997
-    },
-    "algorithm": {
-        "adv_estimator": "grpo",
-        "use_kl_in_reward": False,
-    },
-    "data": {
-        "train_files": "data/train_spider.parquet",
-        "val_files": "data/test_dev_500.parquet",
-        "train_batch_size": 32,
-        "max_prompt_length": 4096,
-        "max_response_length": 2048,
-        "truncation": "error",
-    },
-    "actor_rollout_ref": {
-        "rollout": {
-            "tensor_model_parallel_size": 1,
-            "n": 4,
-            "log_prob_micro_batch_size_per_gpu": 4,
-            "multi_turn": {"format": "hermes"},
-            "name": "vllm",
-            "gpu_memory_utilization": 0.8,
-
-        },
-        "actor": {
-            "ppo_mini_batch_size": 32,
-            "ppo_micro_batch_size_per_gpu": 4,
-            "optim": {"lr": 1e-6},
-            "use_torch_compile": False,
-            "use_kl_loss": False,
-            "kl_loss_coef": 0.0,
-            "entropy_coeff": 0,
-            "clip_ratio_low": 0.2,
-            "clip_ratio_high": 0.3,
-            "fsdp_config": {
-                "param_offload": True,
-                "optimizer_offload": True,
-            },
-        },
-        "ref": {
-            "log_prob_micro_batch_size_per_gpu": 8,
-            "fsdp_config": {"param_offload": True},
-        },
-        "model": {
-            "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
-            "use_remove_padding": True,
-            "enable_gradient_checkpointing": True,
-        },
-    },
-    "trainer": {
-        "n_gpus_per_node": 1,
-        "val_before_train": False,
-        "critic_warmup": 0,
-        "logger": ["console"],
-        "project_name": "AgentLightning",
-        "experiment_name": "spider",
-        "nnodes": 1,
-        "save_freq": 256,
-        "test_freq": 32,
-        "total_epochs": 2,
-        "device": "npu"
-    },
-}
-
-
-def config_train_fast() -> Dict[str, Any]:
-    """A fast training run for CI testing purposes."""
-
-    # `EXPERIMENT_NAME="spider_$(date +%Y%m%d%H%M%S)"`
-    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-    EXPERIMENT_NAME = f"spider_{timestamp}"
-
-    # `PROJECT_NAME=AgentLightningCI`
-    PROJECT_NAME = "AgentLightningCI"
-
-    # Simulate writing to $GITHUB_OUTPUT if it’s set
-    github_output = os.getenv("GITHUB_OUTPUT")
-    if github_output:
-        with open(github_output, "a") as f:
-            f.write(f"project_name={PROJECT_NAME}\n")
-            f.write(f"run_name={EXPERIMENT_NAME}\n")
-
-    print("Set environment variables:")
-    print(f"PROJECT_NAME={PROJECT_NAME}")
-    print(f"EXPERIMENT_NAME={EXPERIMENT_NAME}")
-
-    config = deepcopy(RL_TRAINING_CONFIG)
-    config["actor_rollout_ref"]["rollout"]["gpu_memory_utilization"] = 0.6
-    config["actor_rollout_ref"]["model"]["path"] = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
-    config["data"]["val_files"] = "data/test_dev.parquet"
-    config["trainer"]["total_epochs"] = 1
-    config["trainer"]["total_training_steps"] = 1
-    config["trainer"]["experiment_name"] = EXPERIMENT_NAME
-    config["trainer"]["project_name"] = PROJECT_NAME
-    config["trainer"]["test_freq"] = 1
-    return config
-
-
-def config_train_qwen() -> Dict[str, Any]:
-    """A configuration for training with Qwen-2.5B."""
-
-    config = deepcopy(RL_TRAINING_CONFIG)
-    return config
-
-
-def config_train_llama() -> Dict[str, Any]:
-    """A configuration for training with LLaMA-3.2-1B-Instruct.
-
-    You will need a `HF_TOKEN` set to run with this config.
-    """
-
-    config = deepcopy(RL_TRAINING_CONFIG)
-    config["actor_rollout_ref"]["rollout"]["multi_turn"]["format"] = "llama3_json"
-    config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["tool_call_parser"] = "llama3_json"
-    config["actor_rollout_ref"]["model"]["path"] = "meta-llama/Llama-3.2-1B-Instruct"
-    return config
-
-
-def train(config: Dict[str, Any], active_agent: Optional[str]) -> None:
-    """Train the SQL agent with the given configuration."""
-
-    agent = LitSQLAgent()
-    algorithm = agl.VERL(config)
-    trainer = agl.Trainer(n_runners=10, algorithm=algorithm, adapter={"agent_match": active_agent})
-    print("Adapter agent match acknowledged:", trainer.adapter.agent_match)  # type: ignore
-
-    train_data = pd.read_parquet(config["data"]["train_files"]).to_dict(orient="records")  # type: ignore
-    val_data = pd.read_parquet(config["data"]["val_files"]).to_dict(orient="records")  # type: ignore
-    trainer.fit(agent, train_dataset=train_data, val_dataset=val_data)  # type: ignore
-
-
-def main() -> None:
-    """Main function to parse arguments and run training."""
-    parser = argparse.ArgumentParser(
-        description="Train an SQL agent on the Spider dataset using different model configurations"
-    )
-
-    parser.add_argument(
-        "config",
-        choices=["fast", "qwen", "llama"],
-        help="Training configuration: 'fast' (CI testing), 'qwen' (Qwen-2.5-Coder-1.5B), 'llama' (LLaMA-3.2-3B)",
-    )
-
-    parser.add_argument(
-        "--active-agent", type=str, help="Override the active agent name (default: auto-generated based on config)"
-    )
-
-    args = parser.parse_args()
-
-    # Get the appropriate configuration
-    config_functions = {"fast": config_train_fast, "qwen": config_train_qwen, "llama": config_train_llama}
-
-    config = config_functions[args.config]()
-
-    # Set active agent - use provided value or default based on config choice
-    active_agent = args.active_agent
-
-    print(f"Starting training with '{args.config}' configuration...")
-    print(f"Active agent: {active_agent}")
-
-    train(config, active_agent)
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file

From e4258855eaa32503e763f02618efd438051e7c6d Mon Sep 17 00:00:00 2001
From: gcw_61YBRfIt <chuzhenxing@huawei.com>
Date: Thu, 6 Nov 2025 20:32:03 +0800
Subject: [PATCH 3/8] fix: update according to review comments

---
 examples/spider/train_sql_agent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py
index 84d7b206..9dc48abc 100644
--- a/examples/spider/train_sql_agent.py
+++ b/examples/spider/train_sql_agent.py
@@ -78,7 +78,7 @@
             "fsdp_config": {"param_offload": True},
         },
         "model": {
-            "path": "/data/c00940018/Qwen2.5-Coder-1.5B-Instruct",
+            "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
             "use_remove_padding": True,
             "enable_gradient_checkpointing": True,
         },
@@ -132,8 +132,8 @@ def config_train_fast() -> Dict[str, Any]:
 
 def config_train_qwen() -> Dict[str, Any]:
     """A configuration for training with Qwen-2.5B."""
-    config = deepcopy(RL_TRAINING_CONFIG)
     
+    config = deepcopy(RL_TRAINING_CONFIG)
     return config
 
 

From 61b4327663ffcbbfe10f1410bf6439a74f8e3cf8 Mon Sep 17 00:00:00 2001
From: gcw_61YBRfIt <chuzhenxing@huawei.com>
Date: Wed, 12 Nov 2025 22:15:33 +0800
Subject: [PATCH 4/8] fix: update according to review comments

---
 .idea/.gitignore                              |  8 ++
 .idea/agent-lightning.iml                     | 15 +++
 .../inspectionProfiles/profiles_settings.xml  |  6 ++
 .idea/modules.xml                             |  8 ++
 .idea/vcs.xml                                 |  6 ++
 docs/how-to/ascend_npu.md                     | 96 -------------------
 docs/how-to/train-sql-agent.md                | 39 ++------
 examples/spider/train_sql_agent.py            | 18 ++--
 8 files changed, 63 insertions(+), 133 deletions(-)
 create mode 100644 .idea/.gitignore
 create mode 100644 .idea/agent-lightning.iml
 create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/vcs.xml
 delete mode 100644 docs/how-to/ascend_npu.md

diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 00000000..35410cac
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/agent-lightning.iml b/.idea/agent-lightning.iml
new file mode 100644
index 00000000..5fdd65ba
--- /dev/null
+++ b/.idea/agent-lightning.iml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="py.test" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 00000000..105ce2da
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 00000000..2fca1d23
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/agent-lightning.iml" filepath="$PROJECT_DIR$/.idea/agent-lightning.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 00000000..35eb1ddf
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/docs/how-to/ascend_npu.md b/docs/how-to/ascend_npu.md
deleted file mode 100644
index 7f368859..00000000
--- a/docs/how-to/ascend_npu.md
+++ /dev/null
@@ -1,96 +0,0 @@
-# agent-lightning x Ascend
-
-We have added support for **Huawei Ascend NPUs** in **agent-lightning**, and provided an example of training a SQL agent based on the **Spider dataset**.
-
-## Hardware Support
-
-- Atlas 200T A2 Box16
-- Atlas 900 A2 PODc
-- Atlas 800T A3
-
-At least **a single 40GB NPU** is required to run the Qwen2.5-Coder-1.5B-Instruct model.
-
-## Environment Setup
-
-### Basic Environment
-
-- Python: 3.11.13
-- CANN: 8.2.RC1
-- torch: 2.7.1+cpu
-- torch_npu: 2.7.1.dev20250724
-
-> For basic environment preparation, please refer to this [document](https://gitcode.com/Ascend/pytorch).
-
-### Configure Mirror Sources
-
-Before installing dependencies, it is recommended to configure pip mirrors:
-
-```
-pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple
-pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi"
-
-# Mirrors:
-# http://repo.huaweicloud.com/repository/pypi/simple
-# https://download.pytorch.org/whl/cpu/
-# https://mirrors.huaweicloud.com/ascend/repos/pypi
-```
-
-### Install vLLM & vLLM-Ascend
-
-```
-pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com
-pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com
-```
-
-### Install VERL
-
-```
-pip install verl==0.5.0
-```
-
-> ⚠️ Note: To ensure the VERL framework runs correctly on NPU, add the following two lines to the file `verl/utils/vllm_utils.py`:
-
-```
-from vllm_ascend.patch import platform
-from vllm_ascend.patch import worker
-```
-
-### Install agent-lightning
-
-```
-pip install agentlightning==0.2.1
-```
-
-### Install Other Dependencies
-
-```
-pip install autogen-agentchat autogen-ext mcp
-pip install langgraph "langchain[openai]" langchain-community langchain-text-splitters
-pip install sqlparse nltk
-```
-
-## Model
-
-We use the [Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B) model to train the SQL agent. Running requires at least **one 40GB NPU**.
-
-## Dataset
-
-We use the Spider 1.0 dataset, which contains about 8,000 samples, including natural language questions, database schemas, and corresponding standard SQL queries.
-
-Training requires the following three Parquet files:
-
-- `train_spider.parquet`
-- `test_dev_500.parquet`
-- `test_dev.parquet`
-
-## Training Workflow
-
-1. **Prepare the dataset**: Convert the Spider dataset into Parquet format and place it in the `data/` directory.
-
-2. **Configure the environment**: Ensure vLLM-Ascend, VERL, and agent-lightning are correctly installed.
-
-3. **Start training**: Run the following command to begin training the SQL agent:
-
-   ```
-   python train_sql_agent_npu.py qwen
-   ```
diff --git a/docs/how-to/train-sql-agent.md b/docs/how-to/train-sql-agent.md
index e869cfa4..cb9892a6 100644
--- a/docs/how-to/train-sql-agent.md
+++ b/docs/how-to/train-sql-agent.md
@@ -334,6 +334,8 @@ At least **a single 40GB NPU** is required to run the **Qwen2.5-Coder-1.5B-Instr
 
 #### Environment Setup
 
+In addition to the dependencies originally required by the project, the following dependencies must be installed if you want to run it in an NPU environment.
+
 ##### Basic Environment
 
 - **Python:** 3.11.13
@@ -365,46 +367,21 @@ pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com
 pip install verl==0.5.0
 ```
 
-> ⚠️ **Note:** To ensure the VERL framework runs correctly on NPU, add the following lines to
->  `verl/utils/vllm_utils.py`:
+> Reference: [https://github.com/vllm-project/vllm-ascend/issues/1776](https://github.com/vllm-project/vllm-ascend/issues/1776?utm_source=chatgpt.com)
+> ⚠️**Note:** To ensure the VERL framework runs correctly on NPU, add the following lines to
+> `verl/utils/vllm_utils.py`:
 
 ```
 from vllm_ascend.patch import platform
 from vllm_ascend.patch import worker
 ```
 
-##### Install Agent-Lightning
-
-```
-pip install agentlightning==0.2.1
-```
-
-##### Install Other Dependencies
-
-```
-pip install autogen-agentchat autogen-ext mcp
-pip install langgraph "langchain[openai]" langchain-community langchain-text-splitters
-pip install sqlparse nltk
-```
-
-#### Model
-
-We use the [**Qwen2.5-Coder-1.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B) model to train the SQL agent.
-
-#### Dataset
-
-Refer to the method above for obtaining the dataset.
-
-#### Training Workflow
-
-1. **Prepare the dataset**: Convert the Spider dataset into Parquet format and place it in the `data/` directory.
-
-2. **Configure the environment**: Ensure vLLM-Ascend, VERL, and agent-lightning are correctly installed.
+#### Launch Training
 
-3. **Start training**: Run the following command to begin training the SQL agent:
+After the above dependencies have been installed，from [`examples/spider`]({{ src("examples/spider") }})Run the following script command:
 
 ```
-python train_sql_agent_npu.py npu
+python train_sql_agent.py npu
 ```
 
 ### Debugging the Agent without VERL
diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py
index 9dc48abc..81294f03 100644
--- a/examples/spider/train_sql_agent.py
+++ b/examples/spider/train_sql_agent.py
@@ -132,14 +132,14 @@ def config_train_fast() -> Dict[str, Any]:
 
 def config_train_qwen() -> Dict[str, Any]:
     """A configuration for training with Qwen-2.5B."""
-    
+
     config = deepcopy(RL_TRAINING_CONFIG)
     return config
 
 
 def config_train_npu() -> Dict[str, Any]:
     """A configuration for training with NPU."""
-    
+
     config = deepcopy(RL_TRAINING_CONFIG)
     del config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["enable_auto_tool_choice"]
     del config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["tool_call_parser"]
@@ -150,6 +150,7 @@ def config_train_npu() -> Dict[str, Any]:
     config["trainer"]["device"] = "npu"
     return config
 
+
 def config_train_llama() -> Dict[str, Any]:
     """A configuration for training with LLaMA-3.2-1B-Instruct.
 
@@ -184,18 +185,23 @@ def main() -> None:
 
     parser.add_argument(
         "config",
-        choices=["fast","qwen","llama", "npu"],
+        choices=["fast", "qwen", "llama", "npu"],
         help="Training configuration: 'fast' (CI testing), 'qwen' (Qwen-2.5-Coder-1.5B), 'llama' (LLaMA-3.2-3B),'npu' (Train with NPU)",
     )
 
     parser.add_argument(
         "--active-agent", type=str, help="Override the active agent name (default: auto-generated based on config)"
     )
-    
+
     args = parser.parse_args()
 
     # Get the appropriate configuration
-    config_functions = {"fast": config_train_fast,"qwen": config_train_qwen,"llama": config_train_llama,"npu": config_train_npu}
+    config_functions = {
+        "fast": config_train_fast,
+        "qwen": config_train_qwen,
+        "llama": config_train_llama,
+        "npu": config_train_npu,
+    }
     config = config_functions[args.config]()
     # Set active agent - use provided value or default based on config choice
     active_agent = args.active_agent
@@ -207,4 +213,4 @@ def main() -> None:
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

From d9dfb716556e9f7e81190558804144ae5ceeb82b Mon Sep 17 00:00:00 2001
From: gcw_61YBRfIt <chuzhenxing@huawei.com>
Date: Wed, 12 Nov 2025 22:28:49 +0800
Subject: [PATCH 5/8] delete .idea folder

---
 .idea/.gitignore                               |  8 --------
 .idea/agent-lightning.iml                      | 15 ---------------
 .idea/inspectionProfiles/profiles_settings.xml |  6 ------
 .idea/modules.xml                              |  8 --------
 .idea/vcs.xml                                  |  6 ------
 examples/spider/train_sql_agent.py             |  6 +++++-
 6 files changed, 5 insertions(+), 44 deletions(-)
 delete mode 100644 .idea/.gitignore
 delete mode 100644 .idea/agent-lightning.iml
 delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 delete mode 100644 .idea/modules.xml
 delete mode 100644 .idea/vcs.xml

diff --git a/.idea/.gitignore b/.idea/.gitignore
deleted file mode 100644
index 35410cac..00000000
--- a/.idea/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-# 默认忽略的文件
-/shelf/
-/workspace.xml
-# 基于编辑器的 HTTP 客户端请求
-/httpRequests/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml
diff --git a/.idea/agent-lightning.iml b/.idea/agent-lightning.iml
deleted file mode 100644
index 5fdd65ba..00000000
--- a/.idea/agent-lightning.iml
+++ /dev/null
@@ -1,15 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="PyDocumentationSettings">
-    <option name="format" value="PLAIN" />
-    <option name="myDocStringFormat" value="Plain" />
-  </component>
-  <component name="TestRunnerService">
-    <option name="PROJECT_TEST_RUNNER" value="py.test" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2da..00000000
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index 2fca1d23..00000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/agent-lightning.iml" filepath="$PROJECT_DIR$/.idea/agent-lightning.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 35eb1ddf..00000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="" vcs="Git" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py
index 81294f03..63192656 100644
--- a/examples/spider/train_sql_agent.py
+++ b/examples/spider/train_sql_agent.py
@@ -78,7 +78,7 @@
             "fsdp_config": {"param_offload": True},
         },
         "model": {
-            "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
+            "path": "/data/c00940018/Qwen2.5-Coder-1.5B-Instruct",
             "use_remove_padding": True,
             "enable_gradient_checkpointing": True,
         },
@@ -132,8 +132,12 @@ def config_train_fast() -> Dict[str, Any]:
 
 def config_train_qwen() -> Dict[str, Any]:
     """A configuration for training with Qwen-2.5B."""
+<<<<<<< HEAD
 
+=======
+>>>>>>> parent of e425885 (fix: update according to review comments)
     config = deepcopy(RL_TRAINING_CONFIG)
+    
     return config
 
 

From b1b353b7cc94e4579ba89d2b5b76b37472076ca0 Mon Sep 17 00:00:00 2001
From: gcw_61YBRfIt <chuzhenxing@huawei.com>
Date: Wed, 12 Nov 2025 22:52:55 +0800
Subject: [PATCH 6/8] fix: update according to review comments

---
 examples/spider/train_sql_agent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py
index 63192656..fe62c922 100644
--- a/examples/spider/train_sql_agent.py
+++ b/examples/spider/train_sql_agent.py
@@ -78,7 +78,7 @@
             "fsdp_config": {"param_offload": True},
         },
         "model": {
-            "path": "/data/c00940018/Qwen2.5-Coder-1.5B-Instruct",
+            "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
             "use_remove_padding": True,
             "enable_gradient_checkpointing": True,
         },
@@ -137,7 +137,7 @@ def config_train_qwen() -> Dict[str, Any]:
 =======
 >>>>>>> parent of e425885 (fix: update according to review comments)
     config = deepcopy(RL_TRAINING_CONFIG)
-    
+
     return config
 
 

From 59779a01d5938b904e970a920d7cab2554d3c074 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Thu, 13 Nov 2025 09:54:52 +0800
Subject: [PATCH 7/8] reorganize documentation

---
 docs/how-to/train-sql-agent.md     | 65 ++++++++++--------------------
 examples/spider/train_sql_agent.py |  4 +-
 2 files changed, 22 insertions(+), 47 deletions(-)

diff --git a/docs/how-to/train-sql-agent.md b/docs/how-to/train-sql-agent.md
index cb9892a6..1e96f0bf 100644
--- a/docs/how-to/train-sql-agent.md
+++ b/docs/how-to/train-sql-agent.md
@@ -320,61 +320,38 @@ For the LLaMA profile, export an `HF_TOKEN` before running so VERL can download
     ```bash
     env RAY_DEBUG=legacy HYDRA_FULL_ERROR=1 VLLM_USE_V1=1 ray start --head --dashboard-host=0.0.0.0
     ```
-### Launch Training with NPUS
 
-We have added support for **Huawei Ascend NPUs** in **agent-lightning**, and create a  function `config_train_npu` in the script .
+!!! note "Launching Training with NPUs"
 
-#### Hardware Support
+    The example also supports running with **Huawei Ascend NPUs**. This feature is contributed by [Teams from Huawei](https://github.com/microsoft/agent-lightning/pull/272). To use it, resort to the function `config_train_npu` in the script.
 
-- **Atlas 200T A2 Box16**
-- **Atlas 900 A2 PODc**
-- **Atlas 800T A3**
+    **Hardware Supported:** Atlas 200T A2 Box16, Atlas 900 A2 PODc, Atlas 800T A3. At least **a single 40GB NPU** is required to run the **Qwen2.5-Coder-1.5B-Instruct** model.
 
-At least **a single 40GB NPU** is required to run the **Qwen2.5-Coder-1.5B-Instruct** model.
+    **Environment Setup:** Python 3.11.13, CANN 8.2.RC1, torch 2.7.1+cpu, torch_npu 2.7.1.dev20250724. For basic environment preparation, please refer to this [document](https://gitcode.com/Ascend/pytorch).
 
-#### Environment Setup
+    Before installing dependencies, configure the following pip mirrors:
 
-In addition to the dependencies originally required by the project, the following dependencies must be installed if you want to run it in an NPU environment.
-
-##### Basic Environment
-
-- **Python:** 3.11.13
-- **CANN:** 8.2.RC1
-- **torch:** 2.7.1+cpu
-- **torch_npu:** 2.7.1.dev20250724
-
-> For basic environment preparation, please refer to this [document](https://gitcode.com/Ascend/pytorch).
-
-##### Configure Mirror Sources
-
-Before installing dependencies, configure the following pip mirrors:
-
-```
-pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple
-pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi"
-```
-
-##### Install vLLM & vLLM-Ascend
+    ```
+    pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple
+    pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi"
+    ```
 
-```
-pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com
-pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com
-```
+    Then install vLLM, vLLM-Ascend and VERL:
 
-##### Install VERL
+    ```
+    pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com
+    pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com
+    pip install verl==0.5.0
+    ```
 
-```
-pip install verl==0.5.0
-```
+    To ensure the VERL framework runs correctly on NPU, add the following lines to `verl/utils/vllm_utils.py`:
 
-> Reference: [https://github.com/vllm-project/vllm-ascend/issues/1776](https://github.com/vllm-project/vllm-ascend/issues/1776?utm_source=chatgpt.com)
-> ⚠️**Note:** To ensure the VERL framework runs correctly on NPU, add the following lines to
-> `verl/utils/vllm_utils.py`:
+    ```python
+    from vllm_ascend.patch import platform
+    from vllm_ascend.patch import worker
+    ```
 
-```
-from vllm_ascend.patch import platform
-from vllm_ascend.patch import worker
-```
+    See the following reference for more details: [https://github.com/vllm-project/vllm-ascend/issues/1776](https://github.com/vllm-project/vllm-ascend/issues/1776).
 
 #### Launch Training
 
diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py
index fe62c922..1ebd5f44 100644
--- a/examples/spider/train_sql_agent.py
+++ b/examples/spider/train_sql_agent.py
@@ -132,10 +132,7 @@ def config_train_fast() -> Dict[str, Any]:
 
 def config_train_qwen() -> Dict[str, Any]:
     """A configuration for training with Qwen-2.5B."""
-<<<<<<< HEAD
 
-=======
->>>>>>> parent of e425885 (fix: update according to review comments)
     config = deepcopy(RL_TRAINING_CONFIG)
 
     return config
@@ -207,6 +204,7 @@ def main() -> None:
         "npu": config_train_npu,
     }
     config = config_functions[args.config]()
+
     # Set active agent - use provided value or default based on config choice
     active_agent = args.active_agent
 

From 9801d980ec332db33714e6614ba77a5502730191 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Thu, 13 Nov 2025 09:57:04 +0800
Subject: [PATCH 8/8] minor fix

---
 docs/how-to/train-sql-agent.md     | 14 ++++++--------
 examples/spider/README.md          |  1 +
 examples/spider/train_sql_agent.py |  1 -
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/docs/how-to/train-sql-agent.md b/docs/how-to/train-sql-agent.md
index 1e96f0bf..b85d173f 100644
--- a/docs/how-to/train-sql-agent.md
+++ b/docs/how-to/train-sql-agent.md
@@ -331,14 +331,14 @@ For the LLaMA profile, export an `HF_TOKEN` before running so VERL can download
 
     Before installing dependencies, configure the following pip mirrors:
 
-    ```
+    ```bash
     pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple
     pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi"
     ```
 
     Then install vLLM, vLLM-Ascend and VERL:
 
-    ```
+    ```bash
     pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com
     pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com
     pip install verl==0.5.0
@@ -353,13 +353,11 @@ For the LLaMA profile, export an `HF_TOKEN` before running so VERL can download
 
     See the following reference for more details: [https://github.com/vllm-project/vllm-ascend/issues/1776](https://github.com/vllm-project/vllm-ascend/issues/1776).
 
-#### Launch Training
-
-After the above dependencies have been installed，from [`examples/spider`]({{ src("examples/spider") }})Run the following script command:
+    After the above dependencies have been installed, from [`examples/spider`]({{ src("examples/spider") }}) run the following script command:
 
-```
-python train_sql_agent.py npu
-```
+    ```bash
+    python train_sql_agent.py npu
+    ```
 
 ### Debugging the Agent without VERL
 
diff --git a/examples/spider/README.md b/examples/spider/README.md
index 5bc7a4a2..6b370bcb 100644
--- a/examples/spider/README.md
+++ b/examples/spider/README.md
@@ -36,6 +36,7 @@ Train a SQL agent using the Qwen2.5-Coder-1.5B-Instruct model with the following
 ```bash
 python train_sql_agent.py qwen
 ```
+
 If you want to use an NPU for training, please refer to the **Launch Training with NPUS** section in [How to Train a SQL Agent](../../docs/how-to/train-sql-agent.md).
 
 ### Debugging
diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py
index 1ebd5f44..9602a367 100644
--- a/examples/spider/train_sql_agent.py
+++ b/examples/spider/train_sql_agent.py
@@ -134,7 +134,6 @@ def config_train_qwen() -> Dict[str, Any]:
     """A configuration for training with Qwen-2.5B."""
 
     config = deepcopy(RL_TRAINING_CONFIG)
-
     return config