From 4290f4f4ef548e23b658c49b963be9a200e1f600 Mon Sep 17 00:00:00 2001 From: Octi Zhang Date: Sun, 19 Oct 2025 16:12:49 -0700 Subject: [PATCH 1/2] add hydra group override feature --- docs/source/features/hydra.rst | 159 ++++++++++++++++++ source/isaaclab_tasks/config/extension.toml | 2 +- source/isaaclab_tasks/docs/CHANGELOG.rst | 9 + .../franka/agents/rl_games_ppo_cfg.yaml | 12 ++ .../config/franka/agents/rsl_rl_ppo_cfg.py | 52 ++++++ .../reach/config/franka/joint_pos_env_cfg.py | 13 ++ .../manipulation/reach/reach_env_cfg.py | 24 +++ .../isaaclab_tasks/utils/hydra.py | 109 +++++++++++- source/isaaclab_tasks/test/test_hydra.py | 32 +++- 9 files changed, 406 insertions(+), 6 deletions(-) diff --git a/docs/source/features/hydra.rst b/docs/source/features/hydra.rst index 47e84fb328c..89809502e7b 100644 --- a/docs/source/features/hydra.rst +++ b/docs/source/features/hydra.rst @@ -127,3 +127,162 @@ the post init update is as follows: Here, when modifying ``env.decimation`` or ``env.sim.dt``, the user needs to give the updated ``env.sim.render_interval``, ``env.scene.height_scanner.update_period``, and ``env.scene.contact_forces.update_period`` as input as well. + + +Group Override +-------------- +Group override lets you swap out entire groups of environment- or agent-level settings in one go. +Instead of overriding individual fields, you select a named preset defined under a ``variants`` mapping +directly inside your config classes. + + +Defining Variants +^^^^^^^^^^^^^^^^^ +Declare alternatives under ``self.variants`` in your environment and agent configs. Each top-level key under +``variants`` becomes a Hydra group (``env.`` or ``agent.``), and each nested key is a selectable option. + +Environment variants example: + +.. code-block:: python + + @configclass + class ReachEnvCfg(ManagerBasedRLEnvCfg): + def __post_init__(self): + super().__post_init__() + # Share across all derived envs + self.variants = { + "observations": { + "noise_less": NoiselessObservationsCfg(), + } + } + + @configclass + class FrankaReachEnvCfg(ReachEnvCfg): + def __post_init__(self): + super().__post_init__() + variants = { + "actions.arm_action": { + "joint_position_to_limit": mdp.JointPositionToLimitsActionCfg( + asset_name="robot", joint_names=["panda_joint.*"] + ), + "relative_joint_position": mdp.RelativeJointPositionActionCfg( + asset_name="robot", joint_names=["panda_joint.*"], scale=0.2 + ), + } + } + self.variants.update(variants) + +RSL-RL agent variants example: + +.. code-block:: python + + @configclass + class FrankaReachPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + ... + policy = RslRlPpoActorCriticCfg( + ... + ) + algorithm = RslRlPpoAlgorithmCfg( + ... + ) + variants = { + "policy": { + "large_network": RslRlPpoActorCriticCfg( + actor_hidden_dims=[512, 256, 128, 64], critic_hidden_dims=[512, 256, 128, 64], ... + ), + "medium_network": RslRlPpoActorCriticCfg( + actor_hidden_dims=[256, 128, 64], critic_hidden_dims=[256, 128, 64], ... + ), + }, + "algorithm": { + "small_batch_lr": RslRlPpoAlgorithmCfg(num_mini_batches=16, learning_rate=1.0e-4, ...), + }, + } + + +RL Games agent variants example: + +.. code-block:: yaml + + params: + env: ... + config: ... + network: + ... + mlp: + units: [64, 64] + activation: elu + d2rl: False + + variants: + params.network.mlp: + large_network: + units: [256, 128, 64] + activation: elu + d2rl: False + +The above defines a selectable group at ``agent.params.network.mlp`` with option ``large_network``. + + + + + +Override Syntax +^^^^^^^^^^^^^^^ +Select one preset per group via Hydra-style CLI flags. + +.. tab-set:: + :sync-group: rl-override + + .. tab-item:: rsl_rl + :sync: rsl_rl + + .. code-block:: bash + + python scripts/reinforcement_learning/rsl_rl/train.py \ + --task=Isaac-Reach-Franka-v0 \ + --headless \ + env.observations=noise_less \ + env.actions.arm_action=relative_joint_position \ + agent.policy=large_network + + Hydra replaces: + + .. list-table:: + :widths: 30 70 + :header-rows: 1 + + * - CLI key + - Resolved variant node + * - ``env.observations`` + - ``ReachEnvCfg.variants["observations"]["noise_less"]`` + * - ``env.actions.arm_action`` + - ``FrankaReachEnvCfg.variants["actions.arm_action"]["relative_joint_position"]`` + * - ``agent.policy`` + - ``FrankaReachPPORunnerCfg.variants["policy"]["large_network"]`` + + .. tab-item:: rl_games + :sync: rl_games + + .. code-block:: bash + + python scripts/reinforcement_learning/rl_games/train.py \ + --task=Isaac-Reach-Franka-v0 \ + --headless \ + env.observations=noise_less \ + env.actions.arm_action=relative_joint_position \ + agent.params.network.mlp=large_network + + Hydra replaces: + + .. list-table:: + :widths: 35 65 + :header-rows: 1 + + * - CLI key + - Resolved variant node + * - ``agent.params.network.mlp`` + - ``variants["params.network.mlp"]["large_network"]`` (from RL Games YAML) + +These flags let you switch qualitative modes of your experiments with a single option per group. diff --git a/source/isaaclab_tasks/config/extension.toml b/source/isaaclab_tasks/config/extension.toml index c1fd2d9226f..66bc9be41ab 100644 --- a/source/isaaclab_tasks/config/extension.toml +++ b/source/isaaclab_tasks/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.11.1" +version = "0.12.0" # Description title = "Isaac Lab Environments" diff --git a/source/isaaclab_tasks/docs/CHANGELOG.rst b/source/isaaclab_tasks/docs/CHANGELOG.rst index 97170ffb6d7..d299bbd36ab 100644 --- a/source/isaaclab_tasks/docs/CHANGELOG.rst +++ b/source/isaaclab_tasks/docs/CHANGELOG.rst @@ -1,6 +1,15 @@ Changelog --------- +0.12.0 (2025-10-15) +~~~~~~~~~~~~~~~~~~~~ + +Changed +^^^^^^^ + +* Add new feature that support hydra group config override, and provide example at Isaac-Reach-Franka-v0 env + + 0.11.1 (2025-09-24) ~~~~~~~~~~~~~~~~~~~~ diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rl_games_ppo_cfg.yaml index 5945fc0b45d..b45cc17a468 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rl_games_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rl_games_ppo_cfg.yaml @@ -81,3 +81,15 @@ params: clip_value: True clip_actions: False bounds_loss_coef: 0.0001 + +variants: + params.network.mlp: + large_network: + units: [256, 128, 64] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py index 24bea7c5ac1..3116d200b34 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py @@ -37,3 +37,55 @@ class FrankaReachPPORunnerCfg(RslRlOnPolicyRunnerCfg): desired_kl=0.01, max_grad_norm=1.0, ) + variants = { + "policy": { + "large_network": RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[512, 256, 128, 64], + critic_hidden_dims=[512, 256, 128, 64], + activation="elu", + ), + "medium_network": RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ), + "small_network": RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[128, 64], + critic_hidden_dims=[128, 64], + activation="elu", + ), + }, + "algorithm": { + "large_batch_lr": RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.001, + num_learning_epochs=8, + num_mini_batches=2, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ), + "small_batch_lr": RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.001, + num_learning_epochs=8, + num_mini_batches=16, + learning_rate=1.0e-4, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ), + }, + } diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/joint_pos_env_cfg.py index 2c5d573ff1f..e1e630789b7 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/joint_pos_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/joint_pos_env_cfg.py @@ -43,6 +43,19 @@ def __post_init__(self): self.commands.ee_pose.body_name = "panda_hand" self.commands.ee_pose.ranges.pitch = (math.pi, math.pi) + variants = { + "actions.arm_action": { + "joint_position_to_limit": mdp.JointPositionToLimitsActionCfg( + asset_name="robot", joint_names=["panda_joint.*"] + ), + "relative_joint_position": mdp.RelativeJointPositionActionCfg( + asset_name="robot", joint_names=["panda_joint.*"], scale=0.2 + ), + } + } + + self.variants.update(variants) + @configclass class FrankaReachEnvCfg_PLAY(FrankaReachEnvCfg): diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/reach_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/reach_env_cfg.py index 8890010a71b..cd09d8f2501 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/reach_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/reach_env_cfg.py @@ -116,6 +116,28 @@ def __post_init__(self): policy: PolicyCfg = PolicyCfg() +@configclass +class NoiselessObservationsCfg: + """Noise less observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + joint_pos = ObsTerm(func=mdp.joint_pos_rel) + joint_vel = ObsTerm(func=mdp.joint_vel_rel) + pose_command = ObsTerm(func=mdp.generated_commands, params={"command_name": "ee_pose"}) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + @configclass class EventCfg: """Configuration for events.""" @@ -227,3 +249,5 @@ def __post_init__(self): ), }, ) + # variants defined at base env will be shared across all derived robot-specific envs + self.variants = {"observations": {"noise_less": NoiselessObservationsCfg()}} diff --git a/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py b/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py index 6e2648aa029..2cd2e46f5cd 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py +++ b/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py @@ -7,11 +7,12 @@ import functools -from collections.abc import Callable +from collections.abc import Callable, Mapping try: import hydra from hydra.core.config_store import ConfigStore + from hydra.core.hydra_config import HydraConfig from omegaconf import DictConfig, OmegaConf except ImportError: raise ImportError("Hydra is not installed. Please install it by running 'pip install hydra-core'.") @@ -55,8 +56,10 @@ def register_task_to_hydra( cfg_dict = {"env": env_cfg_dict, "agent": agent_cfg_dict} # replace slices with strings because OmegaConf does not support slices cfg_dict = replace_slices_with_strings(cfg_dict) + # --- ENV variants → register groups + record defaults + register_hydra_group(cfg_dict) # store the configuration to Hydra - ConfigStore.instance().store(name=task_name, node=cfg_dict) + ConfigStore.instance().store(name=task_name, node=OmegaConf.create(cfg_dict), group=None) return env_cfg, agent_cfg @@ -87,6 +90,9 @@ def hydra_main(hydra_env_cfg: DictConfig, env_cfg=env_cfg, agent_cfg=agent_cfg): hydra_env_cfg = OmegaConf.to_container(hydra_env_cfg, resolve=True) # replace string with slices because OmegaConf does not support slices hydra_env_cfg = replace_strings_with_slices(hydra_env_cfg) + # update the group configs with Hydra command line arguments + runtime_choice = HydraConfig.get().runtime.choices + resolve_hydra_group_runtime_override(env_cfg, agent_cfg, hydra_env_cfg, runtime_choice) # update the configs with the Hydra command line arguments env_cfg.from_dict(hydra_env_cfg["env"]) # replace strings that represent gymnasium spaces because OmegaConf does not support them. @@ -106,3 +112,102 @@ def hydra_main(hydra_env_cfg: DictConfig, env_cfg=env_cfg, agent_cfg=agent_cfg): return wrapper return decorator + + +def register_hydra_group(cfg_dict: dict) -> None: + """Register Hydra config groups for variant entries and prime defaults. + + The helper inspects the ``env`` and ``agent`` sections of ``cfg_dict`` for ``variants`` mappings, + registers each group/variant pair with Hydra's :class:`~hydra.core.config_store.ConfigStore`, and + records a ``defaults`` list so Hydra selects the ``default`` variant unless overridden. + + Args: + cfg_dict: Mutable configuration dictionary generated for Hydra consumption. + """ + cs = ConfigStore.instance() + default_groups: list[str] = [] + + for section in ("env", "agent"): + section_dict = cfg_dict.get(section, {}) + if isinstance(section_dict, dict) and "variants" in section_dict: + for root_name, root_dict in section_dict["variants"].items(): + group_path = f"{section}.{root_name}" + default_groups.append(group_path) + # register the default node pointing at cfg_dict[section][root_name] + cs.store(group=group_path, name="default", node=getattr_nested(cfg_dict, group_path)) + # register each variant under that group + for variant_name, variant_node in root_dict.items(): + cs.store(group=group_path, name=variant_name, node=variant_node) + + cfg_dict["defaults"] = ["_self_"] + [{g: "default"} for g in default_groups] + + +def resolve_hydra_group_runtime_override( + env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, + agent_cfg: dict | object, + hydra_cfg: dict, + choices_runtime: dict = {}, +) -> None: + """Resolve runtime Hydra overrides for registered variant groups. + + Hydra tracks user-selected variants under ``HydraConfig.get().runtime.choices``. Given the original + environment and agent configuration objects plus the Hydra-parsed dictionary, this function replaces + the default variant nodes with the selected ones (excluding explicit ``default``) so downstream code + consumes the correct configuration objects and dictionaries. + + This function also works in contexts without ``hydra.main`` (e.g., tests using ``hydra.compose``): + it falls back to reading choices from ``hydra_cfg['hydra']['runtime']['choices']`` if + ``HydraConfig.get()`` is not initialized. + + Args: + env_cfg: Environment configuration object, typically a dataclass with optional ``variants`` mapping. + agent_cfg: Agent configuration, either a mutable mapping or object exposing ``variants`` entries. + hydra_cfg: Native dictionary that mirrors the Hydra config tree, including the ``hydra`` section. + """ + # Try to read choices from HydraConfig; fall back to hydra_cfg dict if unavailable. + for sec, cfg in (("env", env_cfg), ("agent", agent_cfg)): + get_variants = lambda c: getattr(c, "variants", None) or (c.get("variants") if isinstance(c, Mapping) else None) + var = get_variants(cfg) + if not var: + continue + pref, cut = f"{sec}.", len(sec) + 1 + is_group_variant = lambda k, v: k.startswith(pref) and k[cut:] in var and v != "default" # noqa: E731 + choices = {k[cut:]: v for k, v in choices_runtime.items() if is_group_variant(k, v)} + for key, choice in choices.items(): + node = var[key][choice] + setattr_nested(cfg, key, node) + setattr_nested(hydra_cfg[sec], key, node.to_dict() if hasattr(node, "to_dict") else node) + delattr_nested(cfg, "variants") + delattr_nested(hydra_cfg, f"{sec}.variants") + + +def setattr_nested(obj: object, attr_path: str, value: object) -> None: + attrs = attr_path.split(".") + for attr in attrs[:-1]: + obj = obj[attr] if isinstance(obj, Mapping) else getattr(obj, attr) + if isinstance(obj, Mapping): + obj[attrs[-1]] = value + else: + setattr(obj, attrs[-1], value) + + +def getattr_nested(obj: object, attr_path: str) -> object: + for attr in attr_path.split("."): + obj = obj[attr] if isinstance(obj, Mapping) else getattr(obj, attr) + return obj + + +def delattr_nested(obj: object, attr_path: str) -> None: + """Delete a nested attribute/key strictly (raises on missing path). + + Uses dict indexing and getattr for traversal, mirroring getattr_nested's strictness. + """ + if "." in attr_path: + parent_path, leaf = attr_path.rsplit(".", 1) + parent = getattr_nested(obj, parent_path) # may raise KeyError/AttributeError + else: + parent, leaf = obj, attr_path + if isinstance(parent, Mapping): + del parent[leaf] + else: + delattr(parent, leaf) diff --git a/source/isaaclab_tasks/test/test_hydra.py b/source/isaaclab_tasks/test/test_hydra.py index c3b24fcaf8d..577d6a13a83 100644 --- a/source/isaaclab_tasks/test/test_hydra.py +++ b/source/isaaclab_tasks/test/test_hydra.py @@ -26,7 +26,7 @@ from isaaclab.utils import replace_strings_with_slices import isaaclab_tasks # noqa: F401 -from isaaclab_tasks.utils.hydra import register_task_to_hydra +from isaaclab_tasks.utils.hydra import register_task_to_hydra, resolve_hydra_group_runtime_override def hydra_task_config_test(task_name: str, agent_cfg_entry_point: str) -> Callable: @@ -42,11 +42,13 @@ def wrapper(*args, **kwargs): # replace hydra.main with initialize and compose with initialize(config_path=None, version_base="1.3"): - hydra_env_cfg = compose(config_name=task_name, overrides=sys.argv[1:]) - # convert to a native dictionary + hydra_env_cfg = compose(config_name=task_name, overrides=sys.argv[1:], return_hydra_config=True) + hydra_env_cfg["hydra"] = hydra_env_cfg["hydra"]["runtime"]["choices"] hydra_env_cfg = OmegaConf.to_container(hydra_env_cfg, resolve=True) # replace string with slices because OmegaConf does not support slices hydra_env_cfg = replace_strings_with_slices(hydra_env_cfg) + # apply group overrides to mutate cfg objects before from_dict + resolve_hydra_group_runtime_override(env_cfg, agent_cfg, hydra_env_cfg, hydra_env_cfg["hydra"]) # update the configs with the Hydra command line arguments env_cfg.from_dict(hydra_env_cfg["env"]) if isinstance(agent_cfg, dict): @@ -103,3 +105,27 @@ def main(env_cfg, agent_cfg): # clean up sys.argv = [sys.argv[0]] hydra.core.global_hydra.GlobalHydra.instance().clear() + + +def test_hydra_group_override(): + """Test the hydra configuration system for group overriding behavior""" + + # set hardcoded command line arguments + sys.argv = [ + sys.argv[0], + "env.observations=noise_less", + "env.actions.arm_action=relative_joint_position", + "agent.policy=large_network", + ] + + @hydra_task_config_test("Isaac-Reach-Franka-v0", "rsl_rl_cfg_entry_point") + def main(env_cfg, agent_cfg): + # env + assert env_cfg.observations.policy.joint_pos.noise is None + assert not env_cfg.observations.policy.enable_corruption + assert agent_cfg.policy.actor_hidden_dims == [512, 256, 128, 64] + + main() + # clean up + sys.argv = [sys.argv[0]] + hydra.core.global_hydra.GlobalHydra.instance().clear() From 3513828a897a48f6345108aed4df8b3451aa8379 Mon Sep 17 00:00:00 2001 From: Octi Zhang Date: Tue, 21 Oct 2025 16:32:58 -0700 Subject: [PATCH 2/2] fix lambda in loop issue --- source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py b/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py index 2cd2e46f5cd..292ab230ace 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py +++ b/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py @@ -165,19 +165,20 @@ def resolve_hydra_group_runtime_override( hydra_cfg: Native dictionary that mirrors the Hydra config tree, including the ``hydra`` section. """ # Try to read choices from HydraConfig; fall back to hydra_cfg dict if unavailable. + vrnt = "variants" + get_variants = lambda c: getattr(c, vrnt, None) or (c.get(vrnt) if isinstance(c, Mapping) else None) # noqa: E731 + is_group_variant = lambda k, v: k.startswith(pref) and k[cut:] in var and v != "default" # noqa: E731 for sec, cfg in (("env", env_cfg), ("agent", agent_cfg)): - get_variants = lambda c: getattr(c, "variants", None) or (c.get("variants") if isinstance(c, Mapping) else None) var = get_variants(cfg) if not var: continue pref, cut = f"{sec}.", len(sec) + 1 - is_group_variant = lambda k, v: k.startswith(pref) and k[cut:] in var and v != "default" # noqa: E731 choices = {k[cut:]: v for k, v in choices_runtime.items() if is_group_variant(k, v)} for key, choice in choices.items(): node = var[key][choice] setattr_nested(cfg, key, node) setattr_nested(hydra_cfg[sec], key, node.to_dict() if hasattr(node, "to_dict") else node) - delattr_nested(cfg, "variants") + delattr_nested(cfg, vrnt) delattr_nested(hydra_cfg, f"{sec}.variants")