From 636ae06891d5baf7ffcd70434f9c30e730ec1d3a Mon Sep 17 00:00:00 2001 From: gcw_61YBRfIt Date: Wed, 5 Nov 2025 10:56:10 +0800 Subject: [PATCH 1/8] Added the README and script files for training sql_agent on NPU --- docs/how-to/ascend_npu.md | 96 ++++++++++++ examples/spider/train_sql_agent_npu.py | 200 +++++++++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100644 docs/how-to/ascend_npu.md create mode 100644 examples/spider/train_sql_agent_npu.py diff --git a/docs/how-to/ascend_npu.md b/docs/how-to/ascend_npu.md new file mode 100644 index 00000000..7f368859 --- /dev/null +++ b/docs/how-to/ascend_npu.md @@ -0,0 +1,96 @@ +# agent-lightning x Ascend + +We have added support for **Huawei Ascend NPUs** in **agent-lightning**, and provided an example of training a SQL agent based on the **Spider dataset**. + +## Hardware Support + +- Atlas 200T A2 Box16 +- Atlas 900 A2 PODc +- Atlas 800T A3 + +At least **a single 40GB NPU** is required to run the Qwen2.5-Coder-1.5B-Instruct model. + +## Environment Setup + +### Basic Environment + +- Python: 3.11.13 +- CANN: 8.2.RC1 +- torch: 2.7.1+cpu +- torch_npu: 2.7.1.dev20250724 + +> For basic environment preparation, please refer to this [document](https://gitcode.com/Ascend/pytorch). + +### Configure Mirror Sources + +Before installing dependencies, it is recommended to configure pip mirrors: + +``` +pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple +pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi" + +# Mirrors: +# http://repo.huaweicloud.com/repository/pypi/simple +# https://download.pytorch.org/whl/cpu/ +# https://mirrors.huaweicloud.com/ascend/repos/pypi +``` + +### Install vLLM & vLLM-Ascend + +``` +pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com +pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com +``` + +### Install VERL + +``` +pip install verl==0.5.0 +``` + +> ⚠️ Note: To ensure the VERL framework runs correctly on NPU, add the following two lines to the file `verl/utils/vllm_utils.py`: + +``` +from vllm_ascend.patch import platform +from vllm_ascend.patch import worker +``` + +### Install agent-lightning + +``` +pip install agentlightning==0.2.1 +``` + +### Install Other Dependencies + +``` +pip install autogen-agentchat autogen-ext mcp +pip install langgraph "langchain[openai]" langchain-community langchain-text-splitters +pip install sqlparse nltk +``` + +## Model + +We use the [Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B) model to train the SQL agent. Running requires at least **one 40GB NPU**. + +## Dataset + +We use the Spider 1.0 dataset, which contains about 8,000 samples, including natural language questions, database schemas, and corresponding standard SQL queries. + +Training requires the following three Parquet files: + +- `train_spider.parquet` +- `test_dev_500.parquet` +- `test_dev.parquet` + +## Training Workflow + +1. **Prepare the dataset**: Convert the Spider dataset into Parquet format and place it in the `data/` directory. + +2. **Configure the environment**: Ensure vLLM-Ascend, VERL, and agent-lightning are correctly installed. + +3. **Start training**: Run the following command to begin training the SQL agent: + + ``` + python train_sql_agent_npu.py qwen + ``` diff --git a/examples/spider/train_sql_agent_npu.py b/examples/spider/train_sql_agent_npu.py new file mode 100644 index 00000000..144d02c1 --- /dev/null +++ b/examples/spider/train_sql_agent_npu.py @@ -0,0 +1,200 @@ +"""Train an SQL agent on the Spider dataset using Agent-lightning. + +This module provides a training script for SQL agents using different model configurations. +The script supports three different training configurations: + +1. 'fast' - A lightweight configuration optimized for CI testing with reduced epochs +2. 'qwen' - Standard configuration using Qwen-2.5-Coder-1.5B-Instruct model +3. 'llama' - Configuration using LLaMA-3.2-1B-Instruct model with JSON formatting + +Usage: + python train_sql_agent.py fast # Fast training for CI/testing + python train_sql_agent.py qwen # Standard Qwen model training + python train_sql_agent.py llama # LLaMA model training + +The script uses reinforcement learning with VERL framework +to train agents on the Spider dataset for text-to-SQL generation tasks. +""" + +from __future__ import annotations + +import argparse +import os +from copy import deepcopy +from datetime import datetime +from typing import Any, Dict, Optional + +import pandas as pd +from sql_agent import LitSQLAgent + +import agentlightning as agl + +os.environ["VLLM_USE_V1"] = "1" + +RL_TRAINING_CONFIG: Dict[str, Any] = { + "agentlightning": { + "port": 9997 + }, + "algorithm": { + "adv_estimator": "grpo", + "use_kl_in_reward": False, + }, + "data": { + "train_files": "data/train_spider.parquet", + "val_files": "data/test_dev_500.parquet", + "train_batch_size": 32, + "max_prompt_length": 4096, + "max_response_length": 2048, + "truncation": "error", + }, + "actor_rollout_ref": { + "rollout": { + "tensor_model_parallel_size": 1, + "n": 4, + "log_prob_micro_batch_size_per_gpu": 4, + "multi_turn": {"format": "hermes"}, + "name": "vllm", + "gpu_memory_utilization": 0.8, + + }, + "actor": { + "ppo_mini_batch_size": 32, + "ppo_micro_batch_size_per_gpu": 4, + "optim": {"lr": 1e-6}, + "use_torch_compile": False, + "use_kl_loss": False, + "kl_loss_coef": 0.0, + "entropy_coeff": 0, + "clip_ratio_low": 0.2, + "clip_ratio_high": 0.3, + "fsdp_config": { + "param_offload": True, + "optimizer_offload": True, + }, + }, + "ref": { + "log_prob_micro_batch_size_per_gpu": 8, + "fsdp_config": {"param_offload": True}, + }, + "model": { + "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct", + "use_remove_padding": True, + "enable_gradient_checkpointing": True, + }, + }, + "trainer": { + "n_gpus_per_node": 1, + "val_before_train": False, + "critic_warmup": 0, + "logger": ["console"], + "project_name": "AgentLightning", + "experiment_name": "spider", + "nnodes": 1, + "save_freq": 256, + "test_freq": 32, + "total_epochs": 2, + "device": "npu" + }, +} + + +def config_train_fast() -> Dict[str, Any]: + """A fast training run for CI testing purposes.""" + + # `EXPERIMENT_NAME="spider_$(date +%Y%m%d%H%M%S)"` + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + EXPERIMENT_NAME = f"spider_{timestamp}" + + # `PROJECT_NAME=AgentLightningCI` + PROJECT_NAME = "AgentLightningCI" + + # Simulate writing to $GITHUB_OUTPUT if it’s set + github_output = os.getenv("GITHUB_OUTPUT") + if github_output: + with open(github_output, "a") as f: + f.write(f"project_name={PROJECT_NAME}\n") + f.write(f"run_name={EXPERIMENT_NAME}\n") + + print("Set environment variables:") + print(f"PROJECT_NAME={PROJECT_NAME}") + print(f"EXPERIMENT_NAME={EXPERIMENT_NAME}") + + config = deepcopy(RL_TRAINING_CONFIG) + config["actor_rollout_ref"]["rollout"]["gpu_memory_utilization"] = 0.6 + config["actor_rollout_ref"]["model"]["path"] = "Qwen/Qwen2.5-Coder-0.5B-Instruct" + config["data"]["val_files"] = "data/test_dev.parquet" + config["trainer"]["total_epochs"] = 1 + config["trainer"]["total_training_steps"] = 1 + config["trainer"]["experiment_name"] = EXPERIMENT_NAME + config["trainer"]["project_name"] = PROJECT_NAME + config["trainer"]["test_freq"] = 1 + return config + + +def config_train_qwen() -> Dict[str, Any]: + """A configuration for training with Qwen-2.5B.""" + + config = deepcopy(RL_TRAINING_CONFIG) + return config + + +def config_train_llama() -> Dict[str, Any]: + """A configuration for training with LLaMA-3.2-1B-Instruct. + + You will need a `HF_TOKEN` set to run with this config. + """ + + config = deepcopy(RL_TRAINING_CONFIG) + config["actor_rollout_ref"]["rollout"]["multi_turn"]["format"] = "llama3_json" + config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["tool_call_parser"] = "llama3_json" + config["actor_rollout_ref"]["model"]["path"] = "meta-llama/Llama-3.2-1B-Instruct" + return config + + +def train(config: Dict[str, Any], active_agent: Optional[str]) -> None: + """Train the SQL agent with the given configuration.""" + + agent = LitSQLAgent() + algorithm = agl.VERL(config) + trainer = agl.Trainer(n_runners=10, algorithm=algorithm, adapter={"agent_match": active_agent}) + print("Adapter agent match acknowledged:", trainer.adapter.agent_match) # type: ignore + + train_data = pd.read_parquet(config["data"]["train_files"]).to_dict(orient="records") # type: ignore + val_data = pd.read_parquet(config["data"]["val_files"]).to_dict(orient="records") # type: ignore + trainer.fit(agent, train_dataset=train_data, val_dataset=val_data) # type: ignore + + +def main() -> None: + """Main function to parse arguments and run training.""" + parser = argparse.ArgumentParser( + description="Train an SQL agent on the Spider dataset using different model configurations" + ) + + parser.add_argument( + "config", + choices=["fast", "qwen", "llama"], + help="Training configuration: 'fast' (CI testing), 'qwen' (Qwen-2.5-Coder-1.5B), 'llama' (LLaMA-3.2-3B)", + ) + + parser.add_argument( + "--active-agent", type=str, help="Override the active agent name (default: auto-generated based on config)" + ) + + args = parser.parse_args() + + # Get the appropriate configuration + config_functions = {"fast": config_train_fast, "qwen": config_train_qwen, "llama": config_train_llama} + + config = config_functions[args.config]() + + # Set active agent - use provided value or default based on config choice + active_agent = args.active_agent + + print(f"Starting training with '{args.config}' configuration...") + print(f"Active agent: {active_agent}") + + train(config, active_agent) + + +if __name__ == "__main__": + main() \ No newline at end of file From 493621adcc44d52878df134774f7d71468b73726 Mon Sep 17 00:00:00 2001 From: gcw_61YBRfIt Date: Thu, 6 Nov 2025 20:26:30 +0800 Subject: [PATCH 2/8] fix: update according to review comments --- docs/how-to/train-sql-agent.md | 86 +++++++++++ examples/spider/README.md | 1 + examples/spider/train_sql_agent.py | 29 ++-- examples/spider/train_sql_agent_npu.py | 200 ------------------------- 4 files changed, 107 insertions(+), 209 deletions(-) delete mode 100644 examples/spider/train_sql_agent_npu.py diff --git a/docs/how-to/train-sql-agent.md b/docs/how-to/train-sql-agent.md index 682e441c..e869cfa4 100644 --- a/docs/how-to/train-sql-agent.md +++ b/docs/how-to/train-sql-agent.md @@ -320,6 +320,92 @@ For the LLaMA profile, export an `HF_TOKEN` before running so VERL can download ```bash env RAY_DEBUG=legacy HYDRA_FULL_ERROR=1 VLLM_USE_V1=1 ray start --head --dashboard-host=0.0.0.0 ``` +### Launch Training with NPUS + +We have added support for **Huawei Ascend NPUs** in **agent-lightning**, and create a function `config_train_npu` in the script . + +#### Hardware Support + +- **Atlas 200T A2 Box16** +- **Atlas 900 A2 PODc** +- **Atlas 800T A3** + +At least **a single 40GB NPU** is required to run the **Qwen2.5-Coder-1.5B-Instruct** model. + +#### Environment Setup + +##### Basic Environment + +- **Python:** 3.11.13 +- **CANN:** 8.2.RC1 +- **torch:** 2.7.1+cpu +- **torch_npu:** 2.7.1.dev20250724 + +> For basic environment preparation, please refer to this [document](https://gitcode.com/Ascend/pytorch). + +##### Configure Mirror Sources + +Before installing dependencies, configure the following pip mirrors: + +``` +pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple +pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi" +``` + +##### Install vLLM & vLLM-Ascend + +``` +pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com +pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com +``` + +##### Install VERL + +``` +pip install verl==0.5.0 +``` + +> ⚠️ **Note:** To ensure the VERL framework runs correctly on NPU, add the following lines to +> `verl/utils/vllm_utils.py`: + +``` +from vllm_ascend.patch import platform +from vllm_ascend.patch import worker +``` + +##### Install Agent-Lightning + +``` +pip install agentlightning==0.2.1 +``` + +##### Install Other Dependencies + +``` +pip install autogen-agentchat autogen-ext mcp +pip install langgraph "langchain[openai]" langchain-community langchain-text-splitters +pip install sqlparse nltk +``` + +#### Model + +We use the [**Qwen2.5-Coder-1.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B) model to train the SQL agent. + +#### Dataset + +Refer to the method above for obtaining the dataset. + +#### Training Workflow + +1. **Prepare the dataset**: Convert the Spider dataset into Parquet format and place it in the `data/` directory. + +2. **Configure the environment**: Ensure vLLM-Ascend, VERL, and agent-lightning are correctly installed. + +3. **Start training**: Run the following command to begin training the SQL agent: + +``` +python train_sql_agent_npu.py npu +``` ### Debugging the Agent without VERL diff --git a/examples/spider/README.md b/examples/spider/README.md index 511d0c5b..5bc7a4a2 100644 --- a/examples/spider/README.md +++ b/examples/spider/README.md @@ -36,6 +36,7 @@ Train a SQL agent using the Qwen2.5-Coder-1.5B-Instruct model with the following ```bash python train_sql_agent.py qwen ``` +If you want to use an NPU for training, please refer to the **Launch Training with NPUS** section in [How to Train a SQL Agent](../../docs/how-to/train-sql-agent.md). ### Debugging diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py index ea799d25..84d7b206 100644 --- a/examples/spider/train_sql_agent.py +++ b/examples/spider/train_sql_agent.py @@ -78,7 +78,7 @@ "fsdp_config": {"param_offload": True}, }, "model": { - "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct", + "path": "/data/c00940018/Qwen2.5-Coder-1.5B-Instruct", "use_remove_padding": True, "enable_gradient_checkpointing": True, }, @@ -132,11 +132,24 @@ def config_train_fast() -> Dict[str, Any]: def config_train_qwen() -> Dict[str, Any]: """A configuration for training with Qwen-2.5B.""" - config = deepcopy(RL_TRAINING_CONFIG) + return config +def config_train_npu() -> Dict[str, Any]: + """A configuration for training with NPU.""" + + config = deepcopy(RL_TRAINING_CONFIG) + del config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["enable_auto_tool_choice"] + del config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["tool_call_parser"] + del config["trainer"]["logger"][1] + config["actor_rollout_ref"]["actor"]["use_torch_compile"] = False + config["trainer"]["val_before_train"] = False + config["trainer"]["save_freq"] = 256 + config["trainer"]["device"] = "npu" + return config + def config_train_llama() -> Dict[str, Any]: """A configuration for training with LLaMA-3.2-1B-Instruct. @@ -171,21 +184,19 @@ def main() -> None: parser.add_argument( "config", - choices=["fast", "qwen", "llama"], - help="Training configuration: 'fast' (CI testing), 'qwen' (Qwen-2.5-Coder-1.5B), 'llama' (LLaMA-3.2-3B)", + choices=["fast","qwen","llama", "npu"], + help="Training configuration: 'fast' (CI testing), 'qwen' (Qwen-2.5-Coder-1.5B), 'llama' (LLaMA-3.2-3B),'npu' (Train with NPU)", ) parser.add_argument( "--active-agent", type=str, help="Override the active agent name (default: auto-generated based on config)" ) - + args = parser.parse_args() # Get the appropriate configuration - config_functions = {"fast": config_train_fast, "qwen": config_train_qwen, "llama": config_train_llama} - + config_functions = {"fast": config_train_fast,"qwen": config_train_qwen,"llama": config_train_llama,"npu": config_train_npu} config = config_functions[args.config]() - # Set active agent - use provided value or default based on config choice active_agent = args.active_agent @@ -196,4 +207,4 @@ def main() -> None: if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/examples/spider/train_sql_agent_npu.py b/examples/spider/train_sql_agent_npu.py deleted file mode 100644 index 144d02c1..00000000 --- a/examples/spider/train_sql_agent_npu.py +++ /dev/null @@ -1,200 +0,0 @@ -"""Train an SQL agent on the Spider dataset using Agent-lightning. - -This module provides a training script for SQL agents using different model configurations. -The script supports three different training configurations: - -1. 'fast' - A lightweight configuration optimized for CI testing with reduced epochs -2. 'qwen' - Standard configuration using Qwen-2.5-Coder-1.5B-Instruct model -3. 'llama' - Configuration using LLaMA-3.2-1B-Instruct model with JSON formatting - -Usage: - python train_sql_agent.py fast # Fast training for CI/testing - python train_sql_agent.py qwen # Standard Qwen model training - python train_sql_agent.py llama # LLaMA model training - -The script uses reinforcement learning with VERL framework -to train agents on the Spider dataset for text-to-SQL generation tasks. -""" - -from __future__ import annotations - -import argparse -import os -from copy import deepcopy -from datetime import datetime -from typing import Any, Dict, Optional - -import pandas as pd -from sql_agent import LitSQLAgent - -import agentlightning as agl - -os.environ["VLLM_USE_V1"] = "1" - -RL_TRAINING_CONFIG: Dict[str, Any] = { - "agentlightning": { - "port": 9997 - }, - "algorithm": { - "adv_estimator": "grpo", - "use_kl_in_reward": False, - }, - "data": { - "train_files": "data/train_spider.parquet", - "val_files": "data/test_dev_500.parquet", - "train_batch_size": 32, - "max_prompt_length": 4096, - "max_response_length": 2048, - "truncation": "error", - }, - "actor_rollout_ref": { - "rollout": { - "tensor_model_parallel_size": 1, - "n": 4, - "log_prob_micro_batch_size_per_gpu": 4, - "multi_turn": {"format": "hermes"}, - "name": "vllm", - "gpu_memory_utilization": 0.8, - - }, - "actor": { - "ppo_mini_batch_size": 32, - "ppo_micro_batch_size_per_gpu": 4, - "optim": {"lr": 1e-6}, - "use_torch_compile": False, - "use_kl_loss": False, - "kl_loss_coef": 0.0, - "entropy_coeff": 0, - "clip_ratio_low": 0.2, - "clip_ratio_high": 0.3, - "fsdp_config": { - "param_offload": True, - "optimizer_offload": True, - }, - }, - "ref": { - "log_prob_micro_batch_size_per_gpu": 8, - "fsdp_config": {"param_offload": True}, - }, - "model": { - "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct", - "use_remove_padding": True, - "enable_gradient_checkpointing": True, - }, - }, - "trainer": { - "n_gpus_per_node": 1, - "val_before_train": False, - "critic_warmup": 0, - "logger": ["console"], - "project_name": "AgentLightning", - "experiment_name": "spider", - "nnodes": 1, - "save_freq": 256, - "test_freq": 32, - "total_epochs": 2, - "device": "npu" - }, -} - - -def config_train_fast() -> Dict[str, Any]: - """A fast training run for CI testing purposes.""" - - # `EXPERIMENT_NAME="spider_$(date +%Y%m%d%H%M%S)"` - timestamp = datetime.now().strftime("%Y%m%d%H%M%S") - EXPERIMENT_NAME = f"spider_{timestamp}" - - # `PROJECT_NAME=AgentLightningCI` - PROJECT_NAME = "AgentLightningCI" - - # Simulate writing to $GITHUB_OUTPUT if it’s set - github_output = os.getenv("GITHUB_OUTPUT") - if github_output: - with open(github_output, "a") as f: - f.write(f"project_name={PROJECT_NAME}\n") - f.write(f"run_name={EXPERIMENT_NAME}\n") - - print("Set environment variables:") - print(f"PROJECT_NAME={PROJECT_NAME}") - print(f"EXPERIMENT_NAME={EXPERIMENT_NAME}") - - config = deepcopy(RL_TRAINING_CONFIG) - config["actor_rollout_ref"]["rollout"]["gpu_memory_utilization"] = 0.6 - config["actor_rollout_ref"]["model"]["path"] = "Qwen/Qwen2.5-Coder-0.5B-Instruct" - config["data"]["val_files"] = "data/test_dev.parquet" - config["trainer"]["total_epochs"] = 1 - config["trainer"]["total_training_steps"] = 1 - config["trainer"]["experiment_name"] = EXPERIMENT_NAME - config["trainer"]["project_name"] = PROJECT_NAME - config["trainer"]["test_freq"] = 1 - return config - - -def config_train_qwen() -> Dict[str, Any]: - """A configuration for training with Qwen-2.5B.""" - - config = deepcopy(RL_TRAINING_CONFIG) - return config - - -def config_train_llama() -> Dict[str, Any]: - """A configuration for training with LLaMA-3.2-1B-Instruct. - - You will need a `HF_TOKEN` set to run with this config. - """ - - config = deepcopy(RL_TRAINING_CONFIG) - config["actor_rollout_ref"]["rollout"]["multi_turn"]["format"] = "llama3_json" - config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["tool_call_parser"] = "llama3_json" - config["actor_rollout_ref"]["model"]["path"] = "meta-llama/Llama-3.2-1B-Instruct" - return config - - -def train(config: Dict[str, Any], active_agent: Optional[str]) -> None: - """Train the SQL agent with the given configuration.""" - - agent = LitSQLAgent() - algorithm = agl.VERL(config) - trainer = agl.Trainer(n_runners=10, algorithm=algorithm, adapter={"agent_match": active_agent}) - print("Adapter agent match acknowledged:", trainer.adapter.agent_match) # type: ignore - - train_data = pd.read_parquet(config["data"]["train_files"]).to_dict(orient="records") # type: ignore - val_data = pd.read_parquet(config["data"]["val_files"]).to_dict(orient="records") # type: ignore - trainer.fit(agent, train_dataset=train_data, val_dataset=val_data) # type: ignore - - -def main() -> None: - """Main function to parse arguments and run training.""" - parser = argparse.ArgumentParser( - description="Train an SQL agent on the Spider dataset using different model configurations" - ) - - parser.add_argument( - "config", - choices=["fast", "qwen", "llama"], - help="Training configuration: 'fast' (CI testing), 'qwen' (Qwen-2.5-Coder-1.5B), 'llama' (LLaMA-3.2-3B)", - ) - - parser.add_argument( - "--active-agent", type=str, help="Override the active agent name (default: auto-generated based on config)" - ) - - args = parser.parse_args() - - # Get the appropriate configuration - config_functions = {"fast": config_train_fast, "qwen": config_train_qwen, "llama": config_train_llama} - - config = config_functions[args.config]() - - # Set active agent - use provided value or default based on config choice - active_agent = args.active_agent - - print(f"Starting training with '{args.config}' configuration...") - print(f"Active agent: {active_agent}") - - train(config, active_agent) - - -if __name__ == "__main__": - main() \ No newline at end of file From e4258855eaa32503e763f02618efd438051e7c6d Mon Sep 17 00:00:00 2001 From: gcw_61YBRfIt Date: Thu, 6 Nov 2025 20:32:03 +0800 Subject: [PATCH 3/8] fix: update according to review comments --- examples/spider/train_sql_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py index 84d7b206..9dc48abc 100644 --- a/examples/spider/train_sql_agent.py +++ b/examples/spider/train_sql_agent.py @@ -78,7 +78,7 @@ "fsdp_config": {"param_offload": True}, }, "model": { - "path": "/data/c00940018/Qwen2.5-Coder-1.5B-Instruct", + "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct", "use_remove_padding": True, "enable_gradient_checkpointing": True, }, @@ -132,8 +132,8 @@ def config_train_fast() -> Dict[str, Any]: def config_train_qwen() -> Dict[str, Any]: """A configuration for training with Qwen-2.5B.""" - config = deepcopy(RL_TRAINING_CONFIG) + config = deepcopy(RL_TRAINING_CONFIG) return config From 61b4327663ffcbbfe10f1410bf6439a74f8e3cf8 Mon Sep 17 00:00:00 2001 From: gcw_61YBRfIt Date: Wed, 12 Nov 2025 22:15:33 +0800 Subject: [PATCH 4/8] fix: update according to review comments --- .idea/.gitignore | 8 ++ .idea/agent-lightning.iml | 15 +++ .../inspectionProfiles/profiles_settings.xml | 6 ++ .idea/modules.xml | 8 ++ .idea/vcs.xml | 6 ++ docs/how-to/ascend_npu.md | 96 ------------------- docs/how-to/train-sql-agent.md | 39 ++------ examples/spider/train_sql_agent.py | 18 ++-- 8 files changed, 63 insertions(+), 133 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/agent-lightning.iml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml delete mode 100644 docs/how-to/ascend_npu.md diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..35410cac --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/agent-lightning.iml b/.idea/agent-lightning.iml new file mode 100644 index 00000000..5fdd65ba --- /dev/null +++ b/.idea/agent-lightning.iml @@ -0,0 +1,15 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 00000000..105ce2da --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..2fca1d23 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..35eb1ddf --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/docs/how-to/ascend_npu.md b/docs/how-to/ascend_npu.md deleted file mode 100644 index 7f368859..00000000 --- a/docs/how-to/ascend_npu.md +++ /dev/null @@ -1,96 +0,0 @@ -# agent-lightning x Ascend - -We have added support for **Huawei Ascend NPUs** in **agent-lightning**, and provided an example of training a SQL agent based on the **Spider dataset**. - -## Hardware Support - -- Atlas 200T A2 Box16 -- Atlas 900 A2 PODc -- Atlas 800T A3 - -At least **a single 40GB NPU** is required to run the Qwen2.5-Coder-1.5B-Instruct model. - -## Environment Setup - -### Basic Environment - -- Python: 3.11.13 -- CANN: 8.2.RC1 -- torch: 2.7.1+cpu -- torch_npu: 2.7.1.dev20250724 - -> For basic environment preparation, please refer to this [document](https://gitcode.com/Ascend/pytorch). - -### Configure Mirror Sources - -Before installing dependencies, it is recommended to configure pip mirrors: - -``` -pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple -pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi" - -# Mirrors: -# http://repo.huaweicloud.com/repository/pypi/simple -# https://download.pytorch.org/whl/cpu/ -# https://mirrors.huaweicloud.com/ascend/repos/pypi -``` - -### Install vLLM & vLLM-Ascend - -``` -pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com -pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com -``` - -### Install VERL - -``` -pip install verl==0.5.0 -``` - -> ⚠️ Note: To ensure the VERL framework runs correctly on NPU, add the following two lines to the file `verl/utils/vllm_utils.py`: - -``` -from vllm_ascend.patch import platform -from vllm_ascend.patch import worker -``` - -### Install agent-lightning - -``` -pip install agentlightning==0.2.1 -``` - -### Install Other Dependencies - -``` -pip install autogen-agentchat autogen-ext mcp -pip install langgraph "langchain[openai]" langchain-community langchain-text-splitters -pip install sqlparse nltk -``` - -## Model - -We use the [Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B) model to train the SQL agent. Running requires at least **one 40GB NPU**. - -## Dataset - -We use the Spider 1.0 dataset, which contains about 8,000 samples, including natural language questions, database schemas, and corresponding standard SQL queries. - -Training requires the following three Parquet files: - -- `train_spider.parquet` -- `test_dev_500.parquet` -- `test_dev.parquet` - -## Training Workflow - -1. **Prepare the dataset**: Convert the Spider dataset into Parquet format and place it in the `data/` directory. - -2. **Configure the environment**: Ensure vLLM-Ascend, VERL, and agent-lightning are correctly installed. - -3. **Start training**: Run the following command to begin training the SQL agent: - - ``` - python train_sql_agent_npu.py qwen - ``` diff --git a/docs/how-to/train-sql-agent.md b/docs/how-to/train-sql-agent.md index e869cfa4..cb9892a6 100644 --- a/docs/how-to/train-sql-agent.md +++ b/docs/how-to/train-sql-agent.md @@ -334,6 +334,8 @@ At least **a single 40GB NPU** is required to run the **Qwen2.5-Coder-1.5B-Instr #### Environment Setup +In addition to the dependencies originally required by the project, the following dependencies must be installed if you want to run it in an NPU environment. + ##### Basic Environment - **Python:** 3.11.13 @@ -365,46 +367,21 @@ pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com pip install verl==0.5.0 ``` -> ⚠️ **Note:** To ensure the VERL framework runs correctly on NPU, add the following lines to -> `verl/utils/vllm_utils.py`: +> Reference: [https://github.com/vllm-project/vllm-ascend/issues/1776](https://github.com/vllm-project/vllm-ascend/issues/1776?utm_source=chatgpt.com) +> ⚠️**Note:** To ensure the VERL framework runs correctly on NPU, add the following lines to +> `verl/utils/vllm_utils.py`: ``` from vllm_ascend.patch import platform from vllm_ascend.patch import worker ``` -##### Install Agent-Lightning - -``` -pip install agentlightning==0.2.1 -``` - -##### Install Other Dependencies - -``` -pip install autogen-agentchat autogen-ext mcp -pip install langgraph "langchain[openai]" langchain-community langchain-text-splitters -pip install sqlparse nltk -``` - -#### Model - -We use the [**Qwen2.5-Coder-1.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B) model to train the SQL agent. - -#### Dataset - -Refer to the method above for obtaining the dataset. - -#### Training Workflow - -1. **Prepare the dataset**: Convert the Spider dataset into Parquet format and place it in the `data/` directory. - -2. **Configure the environment**: Ensure vLLM-Ascend, VERL, and agent-lightning are correctly installed. +#### Launch Training -3. **Start training**: Run the following command to begin training the SQL agent: +After the above dependencies have been installed,from [`examples/spider`]({{ src("examples/spider") }})Run the following script command: ``` -python train_sql_agent_npu.py npu +python train_sql_agent.py npu ``` ### Debugging the Agent without VERL diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py index 9dc48abc..81294f03 100644 --- a/examples/spider/train_sql_agent.py +++ b/examples/spider/train_sql_agent.py @@ -132,14 +132,14 @@ def config_train_fast() -> Dict[str, Any]: def config_train_qwen() -> Dict[str, Any]: """A configuration for training with Qwen-2.5B.""" - + config = deepcopy(RL_TRAINING_CONFIG) return config def config_train_npu() -> Dict[str, Any]: """A configuration for training with NPU.""" - + config = deepcopy(RL_TRAINING_CONFIG) del config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["enable_auto_tool_choice"] del config["actor_rollout_ref"]["rollout"]["engine_kwargs"]["vllm"]["tool_call_parser"] @@ -150,6 +150,7 @@ def config_train_npu() -> Dict[str, Any]: config["trainer"]["device"] = "npu" return config + def config_train_llama() -> Dict[str, Any]: """A configuration for training with LLaMA-3.2-1B-Instruct. @@ -184,18 +185,23 @@ def main() -> None: parser.add_argument( "config", - choices=["fast","qwen","llama", "npu"], + choices=["fast", "qwen", "llama", "npu"], help="Training configuration: 'fast' (CI testing), 'qwen' (Qwen-2.5-Coder-1.5B), 'llama' (LLaMA-3.2-3B),'npu' (Train with NPU)", ) parser.add_argument( "--active-agent", type=str, help="Override the active agent name (default: auto-generated based on config)" ) - + args = parser.parse_args() # Get the appropriate configuration - config_functions = {"fast": config_train_fast,"qwen": config_train_qwen,"llama": config_train_llama,"npu": config_train_npu} + config_functions = { + "fast": config_train_fast, + "qwen": config_train_qwen, + "llama": config_train_llama, + "npu": config_train_npu, + } config = config_functions[args.config]() # Set active agent - use provided value or default based on config choice active_agent = args.active_agent @@ -207,4 +213,4 @@ def main() -> None: if __name__ == "__main__": - main() \ No newline at end of file + main() From d9dfb716556e9f7e81190558804144ae5ceeb82b Mon Sep 17 00:00:00 2001 From: gcw_61YBRfIt Date: Wed, 12 Nov 2025 22:28:49 +0800 Subject: [PATCH 5/8] delete .idea folder --- .idea/.gitignore | 8 -------- .idea/agent-lightning.iml | 15 --------------- .idea/inspectionProfiles/profiles_settings.xml | 6 ------ .idea/modules.xml | 8 -------- .idea/vcs.xml | 6 ------ examples/spider/train_sql_agent.py | 6 +++++- 6 files changed, 5 insertions(+), 44 deletions(-) delete mode 100644 .idea/.gitignore delete mode 100644 .idea/agent-lightning.iml delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 35410cac..00000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# 默认忽略的文件 -/shelf/ -/workspace.xml -# 基于编辑器的 HTTP 客户端请求 -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/.idea/agent-lightning.iml b/.idea/agent-lightning.iml deleted file mode 100644 index 5fdd65ba..00000000 --- a/.idea/agent-lightning.iml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2da..00000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 2fca1d23..00000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1ddf..00000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py index 81294f03..63192656 100644 --- a/examples/spider/train_sql_agent.py +++ b/examples/spider/train_sql_agent.py @@ -78,7 +78,7 @@ "fsdp_config": {"param_offload": True}, }, "model": { - "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct", + "path": "/data/c00940018/Qwen2.5-Coder-1.5B-Instruct", "use_remove_padding": True, "enable_gradient_checkpointing": True, }, @@ -132,8 +132,12 @@ def config_train_fast() -> Dict[str, Any]: def config_train_qwen() -> Dict[str, Any]: """A configuration for training with Qwen-2.5B.""" +<<<<<<< HEAD +======= +>>>>>>> parent of e425885 (fix: update according to review comments) config = deepcopy(RL_TRAINING_CONFIG) + return config From b1b353b7cc94e4579ba89d2b5b76b37472076ca0 Mon Sep 17 00:00:00 2001 From: gcw_61YBRfIt Date: Wed, 12 Nov 2025 22:52:55 +0800 Subject: [PATCH 6/8] fix: update according to review comments --- examples/spider/train_sql_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py index 63192656..fe62c922 100644 --- a/examples/spider/train_sql_agent.py +++ b/examples/spider/train_sql_agent.py @@ -78,7 +78,7 @@ "fsdp_config": {"param_offload": True}, }, "model": { - "path": "/data/c00940018/Qwen2.5-Coder-1.5B-Instruct", + "path": "Qwen/Qwen2.5-Coder-1.5B-Instruct", "use_remove_padding": True, "enable_gradient_checkpointing": True, }, @@ -137,7 +137,7 @@ def config_train_qwen() -> Dict[str, Any]: ======= >>>>>>> parent of e425885 (fix: update according to review comments) config = deepcopy(RL_TRAINING_CONFIG) - + return config From 59779a01d5938b904e970a920d7cab2554d3c074 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Thu, 13 Nov 2025 09:54:52 +0800 Subject: [PATCH 7/8] reorganize documentation --- docs/how-to/train-sql-agent.md | 65 ++++++++++-------------------- examples/spider/train_sql_agent.py | 4 +- 2 files changed, 22 insertions(+), 47 deletions(-) diff --git a/docs/how-to/train-sql-agent.md b/docs/how-to/train-sql-agent.md index cb9892a6..1e96f0bf 100644 --- a/docs/how-to/train-sql-agent.md +++ b/docs/how-to/train-sql-agent.md @@ -320,61 +320,38 @@ For the LLaMA profile, export an `HF_TOKEN` before running so VERL can download ```bash env RAY_DEBUG=legacy HYDRA_FULL_ERROR=1 VLLM_USE_V1=1 ray start --head --dashboard-host=0.0.0.0 ``` -### Launch Training with NPUS -We have added support for **Huawei Ascend NPUs** in **agent-lightning**, and create a function `config_train_npu` in the script . +!!! note "Launching Training with NPUs" -#### Hardware Support + The example also supports running with **Huawei Ascend NPUs**. This feature is contributed by [Teams from Huawei](https://github.com/microsoft/agent-lightning/pull/272). To use it, resort to the function `config_train_npu` in the script. -- **Atlas 200T A2 Box16** -- **Atlas 900 A2 PODc** -- **Atlas 800T A3** + **Hardware Supported:** Atlas 200T A2 Box16, Atlas 900 A2 PODc, Atlas 800T A3. At least **a single 40GB NPU** is required to run the **Qwen2.5-Coder-1.5B-Instruct** model. -At least **a single 40GB NPU** is required to run the **Qwen2.5-Coder-1.5B-Instruct** model. + **Environment Setup:** Python 3.11.13, CANN 8.2.RC1, torch 2.7.1+cpu, torch_npu 2.7.1.dev20250724. For basic environment preparation, please refer to this [document](https://gitcode.com/Ascend/pytorch). -#### Environment Setup + Before installing dependencies, configure the following pip mirrors: -In addition to the dependencies originally required by the project, the following dependencies must be installed if you want to run it in an NPU environment. - -##### Basic Environment - -- **Python:** 3.11.13 -- **CANN:** 8.2.RC1 -- **torch:** 2.7.1+cpu -- **torch_npu:** 2.7.1.dev20250724 - -> For basic environment preparation, please refer to this [document](https://gitcode.com/Ascend/pytorch). - -##### Configure Mirror Sources - -Before installing dependencies, configure the following pip mirrors: - -``` -pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple -pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi" -``` - -##### Install vLLM & vLLM-Ascend + ``` + pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple + pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi" + ``` -``` -pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com -pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com -``` + Then install vLLM, vLLM-Ascend and VERL: -##### Install VERL + ``` + pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com + pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com + pip install verl==0.5.0 + ``` -``` -pip install verl==0.5.0 -``` + To ensure the VERL framework runs correctly on NPU, add the following lines to `verl/utils/vllm_utils.py`: -> Reference: [https://github.com/vllm-project/vllm-ascend/issues/1776](https://github.com/vllm-project/vllm-ascend/issues/1776?utm_source=chatgpt.com) -> ⚠️**Note:** To ensure the VERL framework runs correctly on NPU, add the following lines to -> `verl/utils/vllm_utils.py`: + ```python + from vllm_ascend.patch import platform + from vllm_ascend.patch import worker + ``` -``` -from vllm_ascend.patch import platform -from vllm_ascend.patch import worker -``` + See the following reference for more details: [https://github.com/vllm-project/vllm-ascend/issues/1776](https://github.com/vllm-project/vllm-ascend/issues/1776). #### Launch Training diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py index fe62c922..1ebd5f44 100644 --- a/examples/spider/train_sql_agent.py +++ b/examples/spider/train_sql_agent.py @@ -132,10 +132,7 @@ def config_train_fast() -> Dict[str, Any]: def config_train_qwen() -> Dict[str, Any]: """A configuration for training with Qwen-2.5B.""" -<<<<<<< HEAD -======= ->>>>>>> parent of e425885 (fix: update according to review comments) config = deepcopy(RL_TRAINING_CONFIG) return config @@ -207,6 +204,7 @@ def main() -> None: "npu": config_train_npu, } config = config_functions[args.config]() + # Set active agent - use provided value or default based on config choice active_agent = args.active_agent From 9801d980ec332db33714e6614ba77a5502730191 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Thu, 13 Nov 2025 09:57:04 +0800 Subject: [PATCH 8/8] minor fix --- docs/how-to/train-sql-agent.md | 14 ++++++-------- examples/spider/README.md | 1 + examples/spider/train_sql_agent.py | 1 - 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/docs/how-to/train-sql-agent.md b/docs/how-to/train-sql-agent.md index 1e96f0bf..b85d173f 100644 --- a/docs/how-to/train-sql-agent.md +++ b/docs/how-to/train-sql-agent.md @@ -331,14 +331,14 @@ For the LLaMA profile, export an `HF_TOKEN` before running so VERL can download Before installing dependencies, configure the following pip mirrors: - ``` + ```bash pip config set global.index-url http://repo.huaweicloud.com/repository/pypi/simple pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi" ``` Then install vLLM, vLLM-Ascend and VERL: - ``` + ```bash pip install vllm==0.10.0 --trusted-host repo.huaweicloud.com pip install vllm-Ascend==0.10.0rc1 --trusted-host repo.huaweicloud.com pip install verl==0.5.0 @@ -353,13 +353,11 @@ For the LLaMA profile, export an `HF_TOKEN` before running so VERL can download See the following reference for more details: [https://github.com/vllm-project/vllm-ascend/issues/1776](https://github.com/vllm-project/vllm-ascend/issues/1776). -#### Launch Training - -After the above dependencies have been installed,from [`examples/spider`]({{ src("examples/spider") }})Run the following script command: + After the above dependencies have been installed, from [`examples/spider`]({{ src("examples/spider") }}) run the following script command: -``` -python train_sql_agent.py npu -``` + ```bash + python train_sql_agent.py npu + ``` ### Debugging the Agent without VERL diff --git a/examples/spider/README.md b/examples/spider/README.md index 5bc7a4a2..6b370bcb 100644 --- a/examples/spider/README.md +++ b/examples/spider/README.md @@ -36,6 +36,7 @@ Train a SQL agent using the Qwen2.5-Coder-1.5B-Instruct model with the following ```bash python train_sql_agent.py qwen ``` + If you want to use an NPU for training, please refer to the **Launch Training with NPUS** section in [How to Train a SQL Agent](../../docs/how-to/train-sql-agent.md). ### Debugging diff --git a/examples/spider/train_sql_agent.py b/examples/spider/train_sql_agent.py index 1ebd5f44..9602a367 100644 --- a/examples/spider/train_sql_agent.py +++ b/examples/spider/train_sql_agent.py @@ -134,7 +134,6 @@ def config_train_qwen() -> Dict[str, Any]: """A configuration for training with Qwen-2.5B.""" config = deepcopy(RL_TRAINING_CONFIG) - return config