diff --git a/scripts/reinforcement_learning/ray/tuner.py b/scripts/reinforcement_learning/ray/tuner.py index c9d5d6e20b9..63c72184063 100644 --- a/scripts/reinforcement_learning/ray/tuner.py +++ b/scripts/reinforcement_learning/ray/tuner.py @@ -217,17 +217,17 @@ def invoke_tuning_run(cfg: dict, args: argparse.Namespace) -> None: print("[WARNING]: Not saving checkpoints, just running experiment...") print("[INFO]: Model parameters and metrics will be preserved.") print("[WARNING]: For homogeneous cluster resources only...") + + # Initialize Ray + util.ray_init( + ray_address=args.ray_address, + log_to_driver=True, + ) + # Get available resources resources = util.get_gpu_node_resources() print(f"[INFO]: Available resources {resources}") - if not ray.is_initialized(): - ray.init( - address=args.ray_address, - log_to_driver=True, - num_gpus=len(resources), - ) - print(f"[INFO]: Using config {cfg}") # Configure the search algorithm and the repeater diff --git a/scripts/reinforcement_learning/ray/util.py b/scripts/reinforcement_learning/ray/util.py index 427c887cdcc..26a52a90aba 100644 --- a/scripts/reinforcement_learning/ray/util.py +++ b/scripts/reinforcement_learning/ray/util.py @@ -320,6 +320,8 @@ def ray_init(ray_address: str = "auto", runtime_env: dict[str, Any] | None = Non f" runtime_env={runtime_env}" ) ray.init(address=ray_address, runtime_env=runtime_env, log_to_driver=log_to_driver) + else: + print("[WARNING]: Attempting to initialize Ray but it is already initialized!") def get_gpu_node_resources( @@ -343,7 +345,7 @@ def get_gpu_node_resources( or simply the resource for a single node if requested. """ if not ray.is_initialized(): - ray_init() + raise RuntimeError("Ray must be initialized before calling get_gpu_node_resources().") nodes = ray.nodes() node_resources = [] total_cpus = 0