From f2fb46479cc00ac9c31c0e41be810dd5f278253d Mon Sep 17 00:00:00 2001 From: Ashwin Varghese Kuruttukulam Date: Tue, 16 Sep 2025 12:00:28 -0700 Subject: [PATCH 01/10] initial cleanup of single gear insertion --- source/isaaclab/isaaclab/envs/mdp/rewards.py | 6 + .../isaaclab/isaaclab/utils/noise/__init__.py | 4 +- .../isaaclab/utils/noise/noise_cfg.py | 12 + .../isaaclab/utils/noise/noise_model.py | 68 +++ source/isaaclab_rl/setup.py | 4 +- .../deploy/gear_assembly/__init__.py | 6 + .../deploy/gear_assembly/config/__init__.py | 9 + .../gear_assembly/config/ur_10e/__init__.py | 34 ++ .../config/ur_10e/agents/__init__.py | 4 + .../config/ur_10e/agents/rsl_rl_ppo_cfg.py | 83 ++++ .../config/ur_10e/joint_pos_env_cfg.py | 271 +++++++++++ .../gear_assembly/gear_assembly_env_cfg.py | 347 ++++++++++++++ .../manipulation/deploy/mdp/__init__.py | 2 + .../manipulation/deploy/mdp/events.py | 451 ++++++++++++++++++ .../manipulation/deploy/mdp/observations.py | 191 ++++++++ .../manipulation/deploy/mdp/rewards.py | 287 +++++++++++ 16 files changed, 1775 insertions(+), 4 deletions(-) create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py diff --git a/source/isaaclab/isaaclab/envs/mdp/rewards.py b/source/isaaclab/isaaclab/envs/mdp/rewards.py index 20df2a557eb..4232c0a8895 100644 --- a/source/isaaclab/isaaclab/envs/mdp/rewards.py +++ b/source/isaaclab/isaaclab/envs/mdp/rewards.py @@ -241,11 +241,17 @@ def applied_torque_limits(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = Sc ) return torch.sum(out_of_limits, dim=1) +def action_rate(env: ManagerBasedRLEnv) -> torch.Tensor: + """Penalize the rate of change of the actions using L2 norm.""" + return torch.norm(env.action_manager.action - env.action_manager.prev_action, p=2, dim=-1) def action_rate_l2(env: ManagerBasedRLEnv) -> torch.Tensor: """Penalize the rate of change of the actions using L2 squared kernel.""" return torch.sum(torch.square(env.action_manager.action - env.action_manager.prev_action), dim=1) +def action(env: ManagerBasedRLEnv) -> torch.Tensor: + """Penalize the actions using L2 squared kernel (summed for each environment).""" + return torch.sum(env.action_manager.action**2, dim=-1) def action_l2(env: ManagerBasedRLEnv) -> torch.Tensor: """Penalize the actions using L2 squared kernel.""" diff --git a/source/isaaclab/isaaclab/utils/noise/__init__.py b/source/isaaclab/isaaclab/utils/noise/__init__.py index d2f703758b0..ef2cea78582 100644 --- a/source/isaaclab/isaaclab/utils/noise/__init__.py +++ b/source/isaaclab/isaaclab/utils/noise/__init__.py @@ -26,8 +26,8 @@ """ from .noise_cfg import NoiseCfg # noqa: F401 -from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg -from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, constant_noise, gaussian_noise, uniform_noise +from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg, ResetSampledNoiseModelCfg +from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, ResetSampledNoiseModel, constant_noise, gaussian_noise, uniform_noise # Backward compatibility ConstantBiasNoiseCfg = ConstantNoiseCfg diff --git a/source/isaaclab/isaaclab/utils/noise/noise_cfg.py b/source/isaaclab/isaaclab/utils/noise/noise_cfg.py index 0c49828b3ff..3de46bc89f1 100644 --- a/source/isaaclab/isaaclab/utils/noise/noise_cfg.py +++ b/source/isaaclab/isaaclab/utils/noise/noise_cfg.py @@ -109,3 +109,15 @@ class NoiseModelWithAdditiveBiasCfg(NoiseModelCfg): Defaults to True. """ + +@configclass +class ResetSampledNoiseModelCfg(NoiseModelCfg): + """Configuration for a noise model that samples noise ONLY during reset.""" + + class_type: type = noise_model.ResetSampledNoiseModel + + noise_cfg: NoiseCfg = MISSING + """The noise configuration for the noise. + + Based on this configuration, the noise is sampled at every reset of the noise model. + """ diff --git a/source/isaaclab/isaaclab/utils/noise/noise_model.py b/source/isaaclab/isaaclab/utils/noise/noise_model.py index dae36b55c72..d780d916067 100644 --- a/source/isaaclab/isaaclab/utils/noise/noise_model.py +++ b/source/isaaclab/isaaclab/utils/noise/noise_model.py @@ -189,3 +189,71 @@ def __call__(self, data: torch.Tensor) -> torch.Tensor: # now re-sample that expanded bias in-place self.reset() return super().__call__(data) + self._bias + +class ResetSampledNoiseModel(NoiseModel): + """Noise model that samples noise ONLY during reset and applies it consistently. + + The noise is sampled from the configured distribution ONLY during reset and applied consistently + until the next reset. Unlike regular noise that generates new random values every step, + this model maintains the same noise values throughout an episode. + """ + + def __init__(self, noise_model_cfg: noise_cfg.NoiseModelCfg, num_envs: int, device: str): + # initialize parent class + super().__init__(noise_model_cfg, num_envs, device) + # store the noise configuration + self._noise_cfg = noise_model_cfg.noise_cfg + self._sampled_noise = torch.zeros((num_envs, 1), device=self._device) + self._num_components: int | None = None + + def reset(self, env_ids: Sequence[int] | None = None): + """Reset the noise model by sampling NEW noise values. + + This method samples new noise for the specified environments using the configured noise function. + The sampled noise will remain constant until the next reset. + + Args: + env_ids: The environment ids to reset the noise model for. Defaults to None, + in which case all environments are considered. + """ + # resolve the environment ids + if env_ids is None: + env_ids = slice(None) + + # Use the existing noise function to sample new noise + # Create dummy data to sample from the noise function + dummy_data = torch.zeros((env_ids.stop - env_ids.start if isinstance(env_ids, slice) else len(env_ids), 1), + device=self._device) + + # Sample noise using the configured noise function + sampled_noise = self._noise_model_cfg.noise_cfg.func(dummy_data, self._noise_model_cfg.noise_cfg) + + self._sampled_noise[env_ids] = sampled_noise + + def __call__(self, data: torch.Tensor) -> torch.Tensor: + """Apply the pre-sampled noise to the data. + + This method applies the noise that was sampled during the last reset. + No new noise is generated - the same values are used consistently. + + Args: + data: The data to apply the noise to. Shape is (num_envs, ...). + + Returns: + The data with the noise applied. Shape is the same as the input data. + """ + # on first apply, expand noise to match last dim of data + if self._num_components is None: + *_, self._num_components = data.shape + # expand noise from (num_envs,1) to (num_envs, num_components) + self._sampled_noise = self._sampled_noise.repeat(1, self._num_components) + + # apply the noise based on operation + if self._noise_cfg.operation == "add": + return data + self._sampled_noise + elif self._noise_cfg.operation == "scale": + return data * self._sampled_noise + elif self._noise_cfg.operation == "abs": + return self._sampled_noise + else: + raise ValueError(f"Unknown operation in noise: {self._noise_cfg.operation}") diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py index f9ddcdb0fa5..a3675ac84e7 100644 --- a/source/isaaclab_rl/setup.py +++ b/source/isaaclab_rl/setup.py @@ -46,11 +46,11 @@ "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11", "gym", ], # rl-games still needs gym :( - "rsl-rl": ["rsl-rl-lib==3.0.1"], + # "rsl-rl": ["rsl-rl-lib==3.0.1"], } # Add the names with hyphens as aliases for convenience EXTRAS_REQUIRE["rl_games"] = EXTRAS_REQUIRE["rl-games"] -EXTRAS_REQUIRE["rsl_rl"] = EXTRAS_REQUIRE["rsl-rl"] +# EXTRAS_REQUIRE["rsl_rl"] = EXTRAS_REQUIRE["rsl-rl"] # Cumulation of all extra-requires EXTRAS_REQUIRE["all"] = list(itertools.chain.from_iterable(EXTRAS_REQUIRE.values())) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py new file mode 100644 index 00000000000..525f8c7d27a --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Assemble 3 gears into a base.""" diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py new file mode 100644 index 00000000000..257dea87052 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configurations for arm-based gear assembly environments.""" + +# We leave this file empty since we don't want to expose any configs in this package directly. +# We still need this file to import the "config" module in the parent package. diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py new file mode 100644 index 00000000000..0f3d90bbcad --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py @@ -0,0 +1,34 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents +## +# Register Gym environments. +## + +gym.register( + id="Isaac-GearAssembly-UR10e-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10eGearAssemblyEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg", + # "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyPPORunnerCfg", + }, +) + +gym.register( + id="Isaac-GearAssembly-UR10e-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10eGearAssemblyEnvCfg_PLAY", + # "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + # "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg", + # "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py new file mode 100644 index 00000000000..bcc238c84a9 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 00000000000..636717b9b55 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,83 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoActorCriticRecurrentCfg, RslRlPpoAlgorithmCfg + + +@configclass +class UpdatedRslRlPpoActorCriticRecurrentCfg(RslRlPpoActorCriticRecurrentCfg): + fixed_sigma = False + share_weights = False + +@configclass +class UpdatedRslRlPpoAlgorithmCfg(RslRlPpoAlgorithmCfg): + bounds_loss_coef = 0.0 + +@configclass +class UR10GearAssemblyPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 512 + max_iterations = 1500 + save_interval = 10 + experiment_name = "gear_assembly_ur10e" + empirical_normalization = True + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.0, + num_learning_epochs=8, + num_mini_batches=8, + learning_rate=5.0e-4, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.008, + max_grad_norm=1.0, + ) + + +@configclass +class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg): + seed = 7858 + num_steps_per_env = 512 + max_iterations = 1500 + save_interval = 50 + experiment_name = "gear_assembly_ur10e" + empirical_normalization = True + clip_actions = 1.0 + resume = False + policy = UpdatedRslRlPpoActorCriticRecurrentCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + noise_std_type="log", + activation="elu", + rnn_type="lstm", + rnn_hidden_dim=256, + rnn_num_layers=2, + ) + algorithm = UpdatedRslRlPpoAlgorithmCfg( + bounds_loss_coef=0.0001, + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.0, + num_learning_epochs=8, + num_mini_batches=16, + learning_rate=5.0e-4, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.008, + max_grad_norm=1.0, + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py new file mode 100644 index 00000000000..6899beb0512 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py @@ -0,0 +1,271 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +import os +import torch + +from isaaclab.utils import configclass +import isaaclab.sim as sim_utils +from isaaclab.assets.articulation import ArticulationCfg + +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import SceneEntityCfg + + +import isaaclab_tasks.manager_based.manipulation.deploy.mdp as mdp +from isaaclab_tasks.manager_based.manipulation.deploy.gear_assembly.gear_assembly_env_cfg import GearAssemblyEnvCfg +from isaaclab.sensors import FrameTransformerCfg, OffsetCfg +from isaaclab.markers.config import FRAME_MARKER_CFG + +import isaaclab_tasks.manager_based.manipulation.deploy.mdp.events as gear_assembly_events + + +## +# Pre-defined configs +## +from isaaclab_assets.robots.universal_robots import UR10e_ROBOTIQ_GRIPPER_CFG # isort: skip + + +## +# Environment configuration +## + +@configclass +class EventCfg: + """Configuration for events.""" + + robot_joint_stiffness_and_damping = EventTerm( + func=mdp.randomize_actuator_gains, + # min_step_count_between_reset=720, + # min_step_count_between_reset=200, + mode="reset", + params={ + # "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), + "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]), # only the arm joints are randomized + "stiffness_distribution_params": (0.75, 1.5), + "damping_distribution_params": (0.3, 3.0), + # "stiffness_distribution_params": (1.0, 1.0), + # "damping_distribution_params": (1.0, 1.0), + "operation": "scale", + "distribution": "log_uniform", + }, + ) + + joint_friction = EventTerm( + func=mdp.randomize_joint_parameters, + # min_step_count_between_reset=720, + # min_step_count_between_reset=200, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]), + "friction_distribution_params": (0.3, 0.7), + "operation": "add", + "distribution": "uniform", + }, + ) + + medium_gear_physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("factory_gear_medium", body_names=".*"), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 16, + }, + ) + gear_base_physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("factory_gear_base", body_names=".*"), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 16, + }, + ) + + robot_physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 16, + }, + ) + + randomize_gear_type = EventTerm( + func=gear_assembly_events.randomize_gear_type, + mode="reset", + params={ + # "gear_types": ["gear_small", "gear_medium", "gear_large"] + "gear_types": ["gear_medium"] + }, + ) + + + reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset") + + + randomize_gears_and_base_pose = EventTerm( + func=gear_assembly_events.randomize_gears_and_base_pose, + mode="reset", + params={ + "pose_range": { + # "x": [0.0, 0.0], + # "y": [0.0, 0.0], + # "z": [0.0, 0.0], + # "roll": [-0.0, 0.0], # 0 degree + # "pitch": [-0.0, 0.0], # 0 degree + # "yaw": [-0.0, 0.0], # 0 degree + "x": [-0.1, 0.1], + "y": [-0.25, 0.25], + "z": [-0.1, 0.1], + "roll": [-math.pi/90, math.pi/90], # 2 degree + "pitch": [-math.pi/90, math.pi/90], # 2 degree + "yaw": [-math.pi/6, math.pi/6], # 2 degree + }, + "gear_pos_range": { + "x": [-0.02, 0.02], + "y": [-0.02, 0.02], + "z": [0.0575, 0.0775], # 0.045 + 0.0225 + # "x": [-0.0, 0.0], + # "y": [-0.0, 0.0], + # "z": [0.0675, 0.0675], + }, + "rot_randomization_range": { + "roll": [-math.pi/36, math.pi/36], # 5 degree + "pitch": [-math.pi/36, math.pi/36], # 5 degree + "yaw": [-math.pi/36, math.pi/36], # 5 degree + }, + "velocity_range": {}, + }, + ) + + set_robot_to_grasp_pose = EventTerm( + func=gear_assembly_events.set_robot_to_grasp_pose, + mode="reset", + params={ + "robot_asset_cfg": SceneEntityCfg("robot"), + # "pos_offset": [-0.030375, 0.0, -0.255], # Offset from wrist_3_link to gripper position + # "pos_offset": [0.0, 0.030375, -0.26], + "rot_offset": [0.0, math.sqrt(2)/2, math.sqrt(2)/2, 0.0], + "pos_randomization_range": { + "x": [-0.0, 0.0], + "y": [-0.005, 0.005], + "z": [-0.003, 0.003] + }, + # "pos_randomization_range": None + }, + ) + + + + +@configclass +class UR10eGearAssemblyEnvCfg(GearAssemblyEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # switch robot to ur10 with local asset path + self.scene.robot = UR10e_ROBOTIQ_GRIPPER_CFG.replace( + prim_path="{ENV_REGEX_NS}/Robot", + spawn=UR10e_ROBOTIQ_GRIPPER_CFG.spawn.replace( + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1 + ), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + joint_pos={ + "shoulder_pan_joint": 2.7228e+00, + "shoulder_lift_joint": -8.3962e-01, + "elbow_joint": 1.3684e+00, + "wrist_1_joint": -2.1048e+00, + "wrist_2_joint": -1.5691e+00, + "wrist_3_joint": -1.9896e+00, + "finger_joint": 0.0, + }, + pos=(0.0, 0.0, 0.0), + rot=(0.0, 0.0, 0.0, 1.0), + ), + ) + + + self.observations.policy.joint_pos.params["asset_cfg"].joint_names = ["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"] + self.observations.policy.joint_vel.params["asset_cfg"].joint_names = ["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"] + + # override events + self.events = EventCfg() + + # overriderride command generator body + self.joint_action_scale = 0.025 + self.action_scale_joint_space = [self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale] + self.actions.arm_action = mdp.RelativeJointPositionActionCfg( + asset_name="robot", joint_names=["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"], scale=self.joint_action_scale, use_zero_offset=True + ) + + self.obs_order = ["arm_dof_pos", "arm_dof_vel", "shaft_pos", "shaft_quat"] + self.policy_action_space = "joint" + self.arm_joint_names = [ + "shoulder_pan_joint", + "shoulder_lift_joint", + "elbow_joint", + "wrist_1_joint", + "wrist_2_joint", + "wrist_3_joint", + ] + self.policy_action_space = "joint" + self.action_space = 6 + self.state_space = 0 + self.observation_space = 19 + + # Set joint_action_scale from the existing arm_action.scale + self.joint_action_scale = self.actions.arm_action.scale + + self.action_scale_joint_space = [ + self.joint_action_scale, + self.joint_action_scale, + self.joint_action_scale, + self.joint_action_scale, + self.joint_action_scale, + self.joint_action_scale, + ] + self.fixed_asset_init_pos_center = (1.0200, -0.2100, -0.1) + self.fixed_asset_init_pos_range = (0.1, 0.25, 0.1) + self.fixed_asset_init_orn_deg = (180.0, 0.0, 90.0) + self.fixed_asset_init_orn_deg_range = (5.0, 5.0, 30.0) + + +@configclass +class UR10eGearAssemblyEnvCfg_PLAY(UR10eGearAssemblyEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py new file mode 100644 index 00000000000..88a3b89009f --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py @@ -0,0 +1,347 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from dataclasses import MISSING +import os + +import torch +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.sensors import FrameTransformerCfg +from isaaclab.managers import ActionTermCfg as ActionTerm +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.managers import EventTermCfg as EventTerm + +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.sim.simulation_cfg import PhysxCfg, SimulationCfg +from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.utils.noise import ResetSampledNoiseModelCfg, UniformNoiseCfg +import isaaclab_tasks.manager_based.manipulation.deploy.mdp as mdp +from isaaclab_tasks.manager_based.manipulation.deploy.reach.reach_env_cfg import ReachEnvCfg + +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR + +# Get the directory where this configuration file is located +CONFIG_DIR = os.path.dirname(os.path.abspath(__file__)) +ASSETS_DIR = os.path.join(CONFIG_DIR, "assets") + +## +# Environment configuration +## + +@configclass +class GearAssemblySceneCfg(InteractiveSceneCfg): + """Configuration for the scene with a robotic arm.""" + + # Disable scene replication to allow USD-level randomization + replicate_physics = False + + # world + ground = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(), + init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)), + ) + + factory_gear_base = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/FactoryGearBase", + # TODO: change to common isaac sim directory + spawn=sim_utils.UsdFileCfg( + usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_base.usd"), + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_base.usd", + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/gear_base_scale_1.5_usd/gear_base_scale_1.5.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + kinematic_enabled=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=196, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=None), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/gear_base_scale_1.5_usd/gear_base_scale_1.5.usd", + # rigid_props=sim_utils.RigidBodyPropertiesCfg(kinematic_enabled=True), + ), + init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)), + ) + + # factory_gear_small = RigidObjectCfg( + # prim_path="{ENV_REGEX_NS}/FactoryGearSmall", + # # TODO: change to common isaac sim directory + # spawn=sim_utils.UsdFileCfg( + # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_small.usd"), + # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_small.usd", + # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/small_gear_scale_1p5_usd/small_gear_scale_1p5.usd", + # activate_contact_sensors=True, + # rigid_props=sim_utils.RigidBodyPropertiesCfg( + # disable_gravity=False, + # kinematic_enabled=False, + # max_depenetration_velocity=5.0, + # linear_damping=0.0, + # angular_damping=0.0, + # max_linear_velocity=1000.0, + # max_angular_velocity=3666.0, + # enable_gyroscopic_forces=True, + # solver_position_iteration_count=196, + # solver_velocity_iteration_count=1, + # max_contact_impulse=1e32, + # ), + # mass_props=sim_utils.MassPropertiesCfg(mass=0.019), + # collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + # ), + # init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)), + # ) + + factory_gear_medium = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/FactoryGearMedium", + # TODO: change to common isaac sim directory + spawn=sim_utils.UsdFileCfg( + usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_medium.usd"), + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_medium.usd", + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/medium_gear_scale_1p5_usd/medium_gear_scale_1p5.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + kinematic_enabled=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=196, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.019), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)), + ) + + # factory_gear_large = RigidObjectCfg( + # prim_path="{ENV_REGEX_NS}/FactoryGearLarge", + # # TODO: change to common isaac sim directory + # spawn=sim_utils.UsdFileCfg( + # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_large.usd"), + # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_large.usd", + # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/large_gear_scale_1p5_usd/large_gear_scale_1p5.usd", + # activate_contact_sensors=True, + # rigid_props=sim_utils.RigidBodyPropertiesCfg( + # disable_gravity=False, + # kinematic_enabled=False, + # max_depenetration_velocity=5.0, + # linear_damping=0.0, + # angular_damping=0.0, + # max_linear_velocity=1000.0, + # max_angular_velocity=3666.0, + # enable_gyroscopic_forces=True, + # solver_position_iteration_count=196, + # solver_velocity_iteration_count=1, + # max_contact_impulse=1e32, + # ), + # mass_props=sim_utils.MassPropertiesCfg(mass=0.019), + # collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + # ), + # init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)), + # ) + + + # robots + robot: ArticulationCfg = MISSING + + + # lights + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0), + ) + +# @configclass +# class CommandsCfg: +# """Command terms for the MDP.""" + +# ee_pose = mdp.UniformPoseCommandCfg( +# asset_name="robot", +# body_name=MISSING, +# resampling_time_range=(4.0, 4.0), +# debug_vis=True, +# ranges=mdp.UniformPoseCommandCfg.Ranges( +# pos_x=(0.35, 0.65), +# pos_y=(-0.2, 0.2), +# pos_z=(0.15, 0.5), +# roll=(0.0, 0.0), +# pitch=MISSING, # depends on end-effector axis +# yaw=(-3.14, 3.14), +# ), +# ) + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + arm_action: ActionTerm = MISSING + gripper_action: ActionTerm | None = None + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + joint_pos = ObsTerm(func=mdp.joint_pos, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + joint_vel = ObsTerm(func=mdp.joint_vel, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=ResetSampledNoiseModelCfg( + noise_cfg=UniformNoiseCfg(n_min=-0.005, n_max=0.005, operation="add") + )) + gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + # @configclass + # class CriticCfg(ObsGroup): + # """Observations for policy group.""" + + # # observation terms (order preserved) + # joint_pos = ObsTerm(func=mdp.joint_pos, + # params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + # joint_vel = ObsTerm(func=mdp.joint_vel, + # params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, + # # noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025)) + # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add")) + # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) + # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) + # gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w) + + # gear_pos = ObsTerm(func=mdp.gear_pos_w) + # gear_quat = ObsTerm(func=mdp.gear_quat_w) + + # observation groups + policy: PolicyCfg = PolicyCfg() + # critic: CriticCfg = CriticCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset") + + reset_gear = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "pose_range": { + "x": [-0.05, 0.05], + "y": [-0.05, 0.05], + "z": [0.1, 0.15], + }, + "velocity_range": {}, + "asset_cfg": SceneEntityCfg("factory_gear_small"), + }, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + end_effector_gear_keypoint_tracking = RewTerm( + func=mdp.keypoint_entity_error, + weight=-1.5, + params={ + "asset_cfg_1": SceneEntityCfg("factory_gear_base"), + "keypoint_scale": 0.15, + }, + ) + + end_effector_gear_keypoint_tracking_exp = RewTerm( + func=mdp.keypoint_entity_error_exp, + weight=1.5, + params={ + "asset_cfg_1": SceneEntityCfg("factory_gear_base"), + "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)], + "kp_use_sum_of_exps": False, + "keypoint_scale": 0.15, + }, + ) + + action_rate = RewTerm(func=mdp.action_rate, weight=-5.0e-06) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + +@configclass +class GearAssemblyEnvCfg(ManagerBasedRLEnvCfg): + # Scene settings + scene: GearAssemblySceneCfg = GearAssemblySceneCfg(num_envs=4096, env_spacing=2.5) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + # commands: CommandsCfg = CommandsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + sim: SimulationCfg = SimulationCfg( + + physx=PhysxCfg( + gpu_collision_stack_size=2**31, # Important to prevent collisionStackSize buffer overflow in contact-rich environments. + gpu_max_rigid_contact_count=2**23, + gpu_max_rigid_patch_count=2**23 + ), + ) + + def __post_init__(self): + """Post initialization.""" + # general settings + self.episode_length_s = 6.66 + self.viewer.eye = (3.5, 3.5, 3.5) + # simulation settings + self.decimation = 2 + self.sim.render_interval = self.decimation + self.sim.dt = 1.0 / 120.0 + + self.hand_close_pos = 0.7 + + self.gear_offsets = {'gear_small': [0.076125, 0.0, 0.0], + 'gear_medium': [0.030375, 0.0, 0.0], + 'gear_large': [-0.045375, 0.0, 0.0]} + + self.gear_offsets_grasp = {'gear_small': [0.0, 0.076125, -0.26], + 'gear_medium': [0.0, 0.030375, -0.26], + 'gear_large': [0.0, -0.045375, -0.26]} + + self.hand_grasp_pos = {"gear_small": 0.67, + "gear_medium": 0.54, + "gear_large": 0.52} diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py index 6686f9f5276..57059c62dac 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py @@ -8,3 +8,5 @@ from isaaclab.envs.mdp import * # noqa: F401, F403 from .rewards import * # noqa: F401, F403 +from .events import * # noqa: F401, F403 +from .observations import * # noqa: F401, F403 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py new file mode 100644 index 00000000000..1e70454510e --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py @@ -0,0 +1,451 @@ +# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Functions specific to the in-hand dexterous manipulation environments.""" + + +from __future__ import annotations + +import carb +import time +import torch +from typing import TYPE_CHECKING, Literal, Optional, List + +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg +from isaaclab.utils.math import sample_uniform +import isaaclab.utils.math as math_utils +import isaaclab.sim as sim_utils + +from isaaclab_tasks.direct.automate import factory_control as fc +import random + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedEnv + + +def set_default_joint_pose( + env: ManagerBasedEnv, + env_ids: torch.Tensor, + default_pose: torch.Tensor, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +): + # Set the default pose for robots in all envs + asset = env.scene[asset_cfg.name] + asset.data.default_joint_pos = torch.tensor(default_pose, device=env.device).repeat(env.num_envs, 1) + +def randomize_gear_type( + env: ManagerBasedEnv, + env_ids: torch.Tensor, + gear_types: List[str] = ["gear_small", "gear_medium", "gear_large"] +): + """Randomize the gear type being used for the specified environments. + + Args: + env: The environment containing the assets + env_ids: Environment IDs to randomize + gear_types: List of available gear types to choose from + """ + # Randomly select gear type for each environment + selected_gears = [random.choice(gear_types) for _ in range(len(env_ids))] + + # Store the selected gear type in the environment instance + # This will be used by the observation functions + if not hasattr(env, '_current_gear_type'): + env._current_gear_type = ["gear_medium"] * env.num_envs + + for i, env_id in enumerate(env_ids): + env._current_gear_type[env_id] = selected_gears[i] + # print(f"env._current_gear_type: {env._current_gear_type}") + +def set_robot_to_grasp_pose( + env: ManagerBasedEnv, + env_ids: torch.Tensor, + robot_asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + pos_threshold: float = 1e-6, + rot_threshold: float = 1e-6, + max_iterations: int = 10, + rot_offset: Optional[list[float]] = None, + pos_randomization_range: Optional[dict] = None, + num_arm_joints: int = 6 +): + # Convert rotation offset to tensor if provided + if rot_offset is not None: + rot_offset_tensor = torch.tensor(rot_offset, device=env.device).unsqueeze(0).expand(len(env_ids), -1) + else: + rot_offset_tensor = None + + # Check if environment has gear type selection + if not hasattr(env, '_current_gear_type'): + raise ValueError("Environment does not have '_current_gear_type' attribute. Ensure randomize_gear_type event is configured.") + + i = 0 + robot_asset: Articulation = env.scene[robot_asset_cfg.name] + joint_pos = robot_asset.data.joint_pos[env_ids].clone() + joint_vel = robot_asset.data.joint_vel[env_ids].clone() + + while i < max_iterations: + robot_asset: Articulation = env.scene[robot_asset_cfg.name] + # Add gaussian noise to joint states + joint_pos = robot_asset.data.joint_pos[env_ids].clone() + joint_vel = robot_asset.data.joint_vel[env_ids].clone() + + # Get gear positions and orientations for each environment based on selected gear type + env_ids_list = env_ids.tolist() if isinstance(env_ids, torch.Tensor) else list(env_ids) + grasp_object_pos_world = torch.zeros(len(env_ids), 3, device=env.device) + grasp_object_quat = torch.zeros(len(env_ids), 4, device=env.device) + + for row_idx, env_id in enumerate(env_ids_list): + # Get the gear type for this environment + gear_key = env._current_gear_type[env_id] + selected_asset_name = f"factory_{gear_key}" + + # Get the gear asset for this environment + gear_asset: RigidObject = env.scene[selected_asset_name] + grasp_object_pos_world[row_idx] = gear_asset.data.root_link_pos_w[env_id].clone() + grasp_object_quat[row_idx] = gear_asset.data.root_link_quat_w[env_id].clone() + + + # First apply rotation offset to get the object's orientation + if rot_offset_tensor is not None: + # Apply rotation offset by quaternion multiplication + # rot_offset is assumed to be in quaternion format (w, x, y, z) + grasp_object_quat = math_utils.quat_mul(grasp_object_quat, rot_offset_tensor) + # print(f"Applied rot_offset: {rot_offset}") + # print(f"grasp_object_quat after offset: {grasp_object_quat}") + + if pos_randomization_range is not None: + pos_keys = ["x", "y", "z"] + range_list_pos = [pos_randomization_range.get(key, (0.0, 0.0)) for key in pos_keys] + ranges_pos = torch.tensor(range_list_pos, device=env.device) + rand_pos_offsets = math_utils.sample_uniform(ranges_pos[:, 0], ranges_pos[:, 1], (len(env_ids), 3), device=env.device) + # Apply gear-specific grasp offsets to each environment + for row_idx, env_id in enumerate(env_ids_list): + gear_key = env._current_gear_type[env_id] + gear_grasp_offset = env.cfg.gear_offsets_grasp[gear_key] + grasp_offset_tensor = torch.tensor(gear_grasp_offset, device=env.device, dtype=grasp_object_pos_world.dtype) + + if pos_randomization_range is not None: + grasp_offset_tensor += rand_pos_offsets[row_idx] + + # Transform position offset from rotated object frame to world frame + grasp_object_pos_world[row_idx] = grasp_object_pos_world[row_idx] + math_utils.quat_apply( + grasp_object_quat[row_idx:row_idx+1], grasp_offset_tensor.unsqueeze(0) + )[0] + + # Convert to environment-relative coordinates by subtracting environment origins + grasp_object_pos = grasp_object_pos_world + + # Get end effector pose of the robot + # Get the specific wrist_3_link pose + try: + # Find the index of the wrist_3_link body + wrist_3_indices, wrist_3_names = robot_asset.find_bodies(["wrist_3_link"]) + wrist_3_idx = wrist_3_indices[0] + + if len(wrist_3_indices) == 1: + wrist_3_idx = wrist_3_indices[0] # Get the first (and should be only) index + + # Get the specific wrist_3_link pose + eef_pos_world = robot_asset.data.body_link_pos_w[env_ids, wrist_3_idx] # Shape: (len(env_ids), 3) + eef_quat = robot_asset.data.body_link_quat_w[env_ids, wrist_3_idx] # Shape: (len(env_ids), 4) + + # Convert to environment-relative coordinates + eef_pos = eef_pos_world + + + # You can also get the full pose as [pos, quat] (7-dimensional) + eef_pos = robot_asset.data.body_pos_w[env_ids, wrist_3_idx] + eef_quat = robot_asset.data.body_quat_w[env_ids, wrist_3_idx] + # print(f"eef_pos: {eef_pos}") + # print(f"eef_quat: {eef_quat}") + elif len(wrist_3_indices) > 1: + print("wrist_3_link found multiple times in robot body names") + print(f"Available body names: {robot_asset.body_names}") + else: + print("wrist_3_link not found in robot body names") + print(f"Available body names: {robot_asset.body_names}") + + except Exception as e: + print(f"Could not get end effector pose: {e}") + print("You may need to adjust the body name or access method based on your robot configuration") + + + # Compute error to target using wrist_3_link as current and grasp_object as target + if len(wrist_3_indices) > 0: + # Get current end effector pose (wrist_3_link) + current_eef_pos = eef_pos # wrist_3_link position + current_eef_quat = eef_quat # wrist_3_link orientation + + # Get target pose (grasp object) + target_eef_pos = grasp_object_pos # grasp object position + target_eef_quat = grasp_object_quat # grasp object orientation + + # print(f"current_eef_pos: {current_eef_pos[0]}") + # print(f"current_eef_quat: {current_eef_quat[0]}") + # print(f"target_eef_pos: {target_eef_pos[0]}") + # print(f"target_eef_quat: {target_eef_quat[0]}") + + # Compute pose error + pos_error, axis_angle_error = fc.get_pose_error( + fingertip_midpoint_pos=current_eef_pos, + fingertip_midpoint_quat=current_eef_quat, + ctrl_target_fingertip_midpoint_pos=target_eef_pos, + ctrl_target_fingertip_midpoint_quat=target_eef_quat, + jacobian_type='geometric', + rot_error_type='axis_angle') + delta_hand_pose = torch.cat((pos_error, axis_angle_error), dim=-1) + # Check if pose error is within threshold + pos_error_norm = torch.norm(pos_error, dim=-1) + rot_error_norm = torch.norm(axis_angle_error, dim=-1) + + # print(f"Pose error - position: {pos_error[0]}") + # print(f"Pose error - orientation: {axis_angle_error[0]}") + # print(f"Position error norm: {pos_error_norm[0]}") + # print(f"Rotation error norm: {rot_error_norm[0]}") + + # Check if all environments have converged + if torch.all(pos_error_norm < pos_threshold) and torch.all(rot_error_norm < rot_threshold): + # print(f"Converged after {i} iterations!") + # print(f"pos_error_norm: {pos_error_norm}") + # print(f"rot_error_norm: {rot_error_norm}") + # print(f"pos_error: {pos_error}") + # print(f"axis_angle_error: {axis_angle_error}") + break + + # Solve DLS problem for inverse kinematics + # Get jacobian for the wrist_3_link using the same approach as task_space_actions.py + try: + # Get all jacobians from the robot + jacobians = robot_asset.root_physx_view.get_jacobians().clone() + + # Select the jacobian for the wrist_3_link body + # For fixed-base robots, Jacobian excludes the base body (index 0) + # So if wrist_3_idx is 6, the Jacobian index should be 5 + jacobi_body_idx = wrist_3_idx - 1 + + + jacobian = jacobians[env_ids, jacobi_body_idx, :, :] # Only first 6 joints (arm, not gripper) + + delta_dof_pos = fc._get_delta_dof_pos( + delta_pose=delta_hand_pose, + ik_method="dls", + jacobian=jacobian, + device=env.device, + ) + + # Update joint positions - only update arm joints (first 6) + joint_pos += delta_dof_pos + joint_vel = torch.zeros_like(joint_pos) + + # Set into the physics simulation + + + except Exception as e: + print(f"Error in IK computation: {e}") + print("Note: You may need to implement proper jacobian computation for your robot") + + # Set into the physics simulation + robot_asset.set_joint_position_target(joint_pos, env_ids=env_ids) + robot_asset.set_joint_velocity_target(joint_vel, env_ids=env_ids) + robot_asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) + + + # print(f"delta_dof_pos.abs().max(): {delta_dof_pos.abs().max()}") + + # if delta_dof_pos.abs().max() < 1e-5: + # break + + # print(f"Step {i}") + + i += 1 + # if i >= max_iterations: + # print(f"IK did not converge after {max_iterations} iterations") + # # print(f"pos_error_norm: {pos_error_norm}") + # # print(f"rot_error_norm: {rot_error_norm}") + # # print(f"pos_error: {pos_error}") + # # print(f"axis_angle_error: {axis_angle_error}") + # # raise RuntimeError(f"IK did not converge after {max_iterations} iterations") + + # # print(f"delta_dof_pos.abs().max(): {delta_dof_pos.abs().max()}") + # for i in range(10): + # env.sim.render() + # input("Going to set object position...") + + # TODO: Resttting the gear based on teh IK solution is not working as expected. The gear is not being placed in the correct position. + # # Set the grasp object's pose to the current end-effector pose in world coordinates (with optional offsets) + # # The offsets should be applied in reverse to get from wrist_3_link to gear pose + # gear_pos_world = current_eef_pos + # gear_quat_world = current_eef_quat + + # # Apply position offset (subtract to get from wrist_3_link to gear position) + # if pos_offset_tensor is not None: + # gear_pos_world = gear_pos_world - pos_offset_tensor + + # # Apply rotation offset (inverse to get from wrist_3_link to gear orientation) + # if rot_offset_tensor is not None: + # # Apply inverse rotation offset + # gear_quat_world = math_utils.quat_mul(current_eef_quat, math_utils.quat_conjugate(rot_offset_tensor)) + + # for i in range(10): + # env.sim.render() + # input("Press Enter to continue 1...") + + # # Set the grasp object's pose for each environment based on selected gear type + # for row_idx, env_id in enumerate(env_ids_list): + # gear_key = env._current_gear_type[env_id] + # selected_asset_name = f"factory_{gear_key}" + # gear_asset: RigidObject = env.scene[selected_asset_name] + # gear_asset.write_root_pose_to_sim( + # torch.cat([gear_pos_world[row_idx:row_idx+1], gear_quat_world[row_idx:row_idx+1]], dim=-1), + # env_ids=torch.tensor([env_id], device=env.device) + # ) + + # Close the gripper by setting the finger_joint based on gear type + all_joints, all_joints_names = robot_asset.find_joints([".*"]) + + joint_pos = robot_asset.data.joint_pos[env_ids].clone() + + # @ireti: We need to change this so it workes with teh public 2f140 gripper that uses mimic joints + finger_joints = all_joints[num_arm_joints:] + finger_joint_names = all_joints_names[num_arm_joints:] + + # for i in range(10): + # env.sim.render() + # input("Press Enter to continue 2...") + + hand_grasp_pos = env.cfg.hand_grasp_pos[gear_key] + set_finger_joint_pos_2f_140(joint_pos, env_ids_list, finger_joints, hand_grasp_pos) + + robot_asset.set_joint_position_target(joint_pos, joint_ids=all_joints, env_ids=env_ids) + robot_asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) + + # for i in range(10): + # env.sim.render() + # input("Press Enter to continue 3...") + + # Set finger joints based on gear type for each environment + hand_close_pos = env.cfg.hand_close_pos + set_finger_joint_pos_2f_140(joint_pos, env_ids_list, finger_joints, hand_close_pos) + + robot_asset.set_joint_position_target(joint_pos, joint_ids=all_joints, env_ids=env_ids) + + # for i in range(10): + # env.sim.render() + # input("Press Enter to continue 4...") + +def randomize_gears_and_base_pose( + env: ManagerBasedEnv, + env_ids: torch.Tensor, + pose_range: dict = {}, + velocity_range: dict = {}, + gear_pos_range: dict = {}, + rot_randomization_range: dict = {}, + +): + """Randomize both the gear base pose and the poses of all gear types with the same value, + then apply the per-env `gear_pos_range` only to the gear selected by `randomize_gear_type`. + """ + if not hasattr(env, '_current_gear_type'): + raise ValueError("Environment does not have '_current_gear_type' attribute. Ensure randomize_gear_type event is configured.") + + device = env.device + + # Shared pose samples for all assets (base and all gears) + pose_keys = ["x", "y", "z", "roll", "pitch", "yaw"] + range_list_pose = [pose_range.get(key, (0.0, 0.0)) for key in pose_keys] + ranges_pose = torch.tensor(range_list_pose, device=device) + rand_pose_samples = math_utils.sample_uniform(ranges_pose[:, 0], ranges_pose[:, 1], (len(env_ids), 6), device=device) + + orientations_delta = math_utils.quat_from_euler_xyz( + rand_pose_samples[:, 3], rand_pose_samples[:, 4], rand_pose_samples[:, 5] + ) + + # Shared velocity samples for all assets (base and all gears) + range_list_vel = [velocity_range.get(key, (0.0, 0.0)) for key in pose_keys] + ranges_vel = torch.tensor(range_list_vel, device=device) + rand_vel_samples = math_utils.sample_uniform(ranges_vel[:, 0], ranges_vel[:, 1], (len(env_ids), 6), device=device) + + # Prepare assets: base + all possible gears (only those present in scene will be processed) + base_asset_name = "factory_gear_base" + possible_gear_assets = [ + # "factory_gear_small", + "factory_gear_medium", + # "factory_gear_large", + # "table", + ] + + + positions_by_asset = {} + orientations_by_asset = {} + velocities_by_asset = {} + + # Combine base and gear assets into one pass to avoid duplication + asset_names_to_process = [base_asset_name] + possible_gear_assets + for asset_name in asset_names_to_process: + asset: RigidObject | Articulation = env.scene[asset_name] + root_states = asset.data.default_root_state[env_ids].clone() + positions = root_states[:, 0:3] + env.scene.env_origins[env_ids] + rand_pose_samples[:, 0:3] + orientations = math_utils.quat_mul(root_states[:, 3:7], orientations_delta) + velocities = root_states[:, 7:13] + rand_vel_samples + positions_by_asset[asset_name] = positions + orientations_by_asset[asset_name] = orientations + velocities_by_asset[asset_name] = velocities + + # Per-env gear offset (gear_pos_range) applied only to the selected gear + range_list_gear = [gear_pos_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z"]] + ranges_gear = torch.tensor(range_list_gear, device=device) + rand_gear_offsets = math_utils.sample_uniform(ranges_gear[:, 0], ranges_gear[:, 1], (len(env_ids), 3), device=device) + + # Per-env gear orientation offset (rot_randomization_range) applied only to the selected gear + range_list_rot = [rot_randomization_range.get(key, (0.0, 0.0)) for key in ["roll", "pitch", "yaw"]] + ranges_rot = torch.tensor(range_list_rot, device=device) + rand_rot_offsets = math_utils.sample_uniform(ranges_rot[:, 0], ranges_rot[:, 1], (len(env_ids), 3), device=device) + rand_rot_quats = math_utils.quat_from_euler_xyz(rand_rot_offsets[:, 0], rand_rot_offsets[:, 1], rand_rot_offsets[:, 2]) + + env_ids_list = env_ids.tolist() if isinstance(env_ids, torch.Tensor) else list(env_ids) + for row_idx, env_id in enumerate(env_ids_list): + # env._current_gear_type[env_id] is expected to be one of: "gear_small", "gear_medium", "gear_large" + gear_key = env._current_gear_type[env_id] + selected_asset_name = f"factory_{gear_key}" + if selected_asset_name in positions_by_asset: + positions_by_asset[selected_asset_name][row_idx] = ( + positions_by_asset[selected_asset_name][row_idx] + rand_gear_offsets[row_idx] + ) + # Apply additional orientation randomization to the selected gear + orientations_by_asset[selected_asset_name][row_idx] = math_utils.quat_mul( + orientations_by_asset[selected_asset_name][row_idx], rand_rot_quats[row_idx] + ) + + # Write back to sim for all prepared assets + for asset_name in positions_by_asset.keys(): + asset = env.scene[asset_name] + positions = positions_by_asset[asset_name] + orientations = orientations_by_asset[asset_name] + velocities = velocities_by_asset[asset_name] + asset.write_root_pose_to_sim(torch.cat([positions, orientations], dim=-1), env_ids=env_ids) + asset.write_root_velocity_to_sim(velocities, env_ids=env_ids) + +def set_finger_joint_pos_2f_140( + joint_pos: torch.Tensor, + env_ids_list: list[int], + finger_joints: list[int], + finger_joint_position: float, +): + # Get hand close positions for each environment based on gear type + for row_idx, env_id in enumerate(env_ids_list): + + joint_pos[row_idx, finger_joints[0]] = finger_joint_position + joint_pos[row_idx, finger_joints[1]] = finger_joint_position + + # outer finger joints + joint_pos[row_idx, finger_joints[2]] = 0 + joint_pos[row_idx, finger_joints[3]] = 0 + + joint_pos[row_idx, finger_joints[4]] = -finger_joint_position + joint_pos[row_idx, finger_joints[5]] = -finger_joint_position + + joint_pos[row_idx, finger_joints[6]] = finger_joint_position + joint_pos[row_idx, finger_joints[7]] = finger_joint_position \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py new file mode 100644 index 00000000000..a434018bea0 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py @@ -0,0 +1,191 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +import isaaclab.utils.math as math_utils +from isaaclab.assets import RigidObject +from isaaclab.managers import SceneEntityCfg +from isaacsim.core.utils.torch.transformations import tf_combine + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def gear_shaft_pos_w( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("factory_gear_base") +) -> torch.Tensor: + """Gear shaft position in world frame with offset applied. + + Args: + env: The environment containing the assets + asset_cfg: Configuration of the gear base asset + + Returns: + Gear shaft position tensor of shape (num_envs, 3) + """ + # extract the asset (to enable type hinting) + asset: RigidObject = env.scene[asset_cfg.name] + + # get base gear position and orientation + base_pos = asset.data.root_pos_w + base_quat = asset.data.root_quat_w + + + # get current gear type from environment, use default if not set + if not hasattr(env, '_current_gear_type'): + print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.") + default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium') + current_gear_type = [default_gear_type] * env.num_envs + else: + current_gear_type = env._current_gear_type # type: ignore + + # get offset for the specific gear type + gear_offsets = getattr(env.cfg, 'gear_offsets', {}) + + # Initialize shaft positions + shaft_pos = torch.zeros_like(base_pos) + + # Apply offsets for each environment based on their gear type + for i in range(env.num_envs): + # gear_type = current_gear_type[i] if i < len(current_gear_type) else "gear_medium" + gear_type = current_gear_type[i] + + offset = torch.tensor(gear_offsets[gear_type], device=base_pos.device, dtype=base_pos.dtype) + offset_pos = offset.unsqueeze(0) + shaft_pos[i] = base_pos[i] + offset_pos[0] + # Apply position and rotation offsets if provided + # create identity quaternion + rot_offset_tensor = torch.tensor([1.0, 0.0, 0.0, 0.0], device=base_pos.device, dtype=base_pos.dtype).unsqueeze(0) + _, shaft_pos[i] = tf_combine( + base_quat[i:i+1], base_pos[i:i+1], + rot_offset_tensor, offset_pos + ) + + return shaft_pos - env.scene.env_origins + + +def gear_shaft_quat_w( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("factory_gear_base"), +) -> torch.Tensor: + """Gear shaft orientation in world frame. + + Args: + env: The environment containing the assets + asset_cfg: Configuration of the gear base asset + gear_type: Type of gear ('gear_small', 'gear_medium', 'gear_large') + + Returns: + Gear shaft orientation tensor of shape (num_envs, 4) + """ + # extract the asset (to enable type hinting) + asset: RigidObject = env.scene[asset_cfg.name] + + + # get base quaternion + base_quat = asset.data.root_quat_w + + # ensure w component is positive for each environment + # if w is negative, negate the entire quaternion to maintain same orientation + w_negative = base_quat[:, 0] < 0 + positive_quat = base_quat.clone() + positive_quat[w_negative] = -base_quat[w_negative] + + return positive_quat + +def gear_pos_w( + env: ManagerBasedRLEnv, +) -> torch.Tensor: + """Gear position in world frame. + + Args: + env: The environment containing the assets + + Returns: + Gear position tensor of shape (num_envs, 3) + """ + # get current gear type from environment, use default if not set + if not hasattr(env, '_current_gear_type'): + print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.") + default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium') + current_gear_type = [default_gear_type] * env.num_envs + else: + current_gear_type = env._current_gear_type # type: ignore + + # Create a mapping from gear type to asset name + gear_to_asset_mapping = { + 'gear_small': 'factory_gear_small', + 'gear_medium': 'factory_gear_medium', + 'gear_large': 'factory_gear_large' + } + + # Initialize positions array + gear_positions = torch.zeros((env.num_envs, 3), device=env.device) + + # Get positions for each environment based on their gear type + for i in range(env.num_envs): + gear_type = current_gear_type[i] + asset_name = gear_to_asset_mapping.get(gear_type) + + asset: RigidObject = env.scene[asset_name] + + # Get position for this specific environment + gear_positions[i] = asset.data.root_pos_w[i] + + return gear_positions + +def gear_quat_w( + env: ManagerBasedRLEnv, +) -> torch.Tensor: + """Gear orientation in world frame. + + Args: + env: The environment containing the assets + + Returns: + Gear orientation tensor of shape (num_envs, 4) + """ + # get current gear type from environment, use default if not set + if not hasattr(env, '_current_gear_type'): + print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.") + default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium') + current_gear_type = [default_gear_type] * env.num_envs + else: + current_gear_type = env._current_gear_type # type: ignore + + # Create a mapping from gear type to asset name + gear_to_asset_mapping = { + 'gear_small': 'factory_gear_small', + 'gear_medium': 'factory_gear_medium', + 'gear_large': 'factory_gear_large' + } + + # Initialize quaternions array + gear_positive_quat = torch.zeros((env.num_envs, 4), device=env.device) + + # Get quaternions for each environment based on their gear type + for i in range(env.num_envs): + gear_type = current_gear_type[i] + asset_name = gear_to_asset_mapping.get(gear_type) + + # Get the asset for this specific gear type + asset: RigidObject = env.scene[asset_name] + + # Get quaternion for this specific environment + gear_quat = asset.data.root_quat_w[i] + + # ensure w component is positive for each environment + # if w is negative, negate the entire quaternion to maintain same orientation + if gear_quat[0] < 0: + gear_quat = -gear_quat + + gear_positive_quat[i] = gear_quat + + return gear_positive_quat \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py index 0d14620e225..8e44e65f416 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py @@ -12,6 +12,7 @@ from isaaclab.managers import SceneEntityCfg from isaaclab.sensors.frame_transformer.frame_transformer import FrameTransformer +from isaaclab.utils.math import quat_mul if TYPE_CHECKING: from isaaclab.envs import ManagerBasedRLEnv @@ -229,3 +230,289 @@ def keypoint_command_error_exp( keypoint_reward_exp += 1.0 / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist)) return keypoint_reward_exp + +def keypoint_entity_error( + env: ManagerBasedRLEnv, + asset_cfg_1: SceneEntityCfg, + keypoint_scale: float = 1.0, + add_cube_center_kp: bool = True, + quat_offset: torch.Tensor | None = None, + pos_offset: torch.Tensor | None = None +) -> torch.Tensor: + """Compute keypoint distance between a RigidObject and the dynamically selected gear. + + The function computes the keypoint distance between the poses of two entities. + Keypoints are created by applying offsets to both poses and the distance is + computed as the L2-norm between corresponding keypoints. + + Args: + env: The environment containing the assets + asset_cfg_1: Configuration of the first asset (RigidObject) + keypoint_scale: Scale factor for keypoint offsets + add_cube_center_kp: Whether to include the center keypoint (0, 0, 0) + quat_offset: Optional quaternion offset to apply to asset_1's orientation (w, x, y, z) + pos_offset: Optional position offset to apply to asset_1's position (x, y, z) + + Returns: + Keypoint distance tensor of shape (num_envs,) where each element + is the mean L2 norm distance between corresponding keypoints + """ + # extract the assets (to enable type hinting) + asset_1 = env.scene[asset_cfg_1.name] # RigidObject + + # get current poses in world frame for asset_1 + # asset_1 is RigidObject - use body_pos_w and body_quat_w + curr_pos_1 = asset_1.data.body_pos_w[:, 0] # type: ignore + curr_quat_1 = asset_1.data.body_quat_w[:, 0] # type: ignore + + # Apply position offset to asset_1 if provided + if pos_offset is not None: + # Convert list to tensor if needed + if isinstance(pos_offset, list): + pos_offset = torch.tensor(pos_offset, device=curr_pos_1.device, dtype=curr_pos_1.dtype) + # Ensure pos_offset has the right shape (num_envs, 3) + if pos_offset.dim() == 1: + pos_offset = pos_offset.unsqueeze(0).expand(curr_pos_1.shape[0], -1) + # Apply the offset by adding to position + curr_pos_1 = curr_pos_1 + pos_offset + + # Apply quaternion offset to asset_1 if provided + if quat_offset is not None: + # Convert list to tensor if needed + if isinstance(quat_offset, list): + quat_offset = torch.tensor(quat_offset, device=curr_quat_1.device, dtype=curr_quat_1.dtype) + # Ensure quat_offset has the right shape (num_envs, 4) + if quat_offset.dim() == 1: + quat_offset = quat_offset.unsqueeze(0).expand(curr_quat_1.shape[0], -1) + # Apply the offset by quaternion multiplication + # TODO: check if this is correct or if its shoudl be the inverse + curr_quat_1 = quat_mul(quat_offset, curr_quat_1) + + # Handle per-environment gear type selection + if hasattr(env, '_current_gear_type'): + # Get the current gear type for each environment + current_gear_type = env._current_gear_type # type: ignore + + # Create a mapping from gear type to asset name + gear_to_asset_mapping = { + 'gear_small': 'factory_gear_small', + 'gear_medium': 'factory_gear_medium', + 'gear_large': 'factory_gear_large' + } + + # Create arrays to store poses for all environments + curr_pos_2_all = torch.zeros_like(curr_pos_1) + curr_quat_2_all = torch.zeros_like(curr_quat_1) + + # Get poses for each environment based on their gear type + for env_id in range(env.num_envs): + # Get the gear type for this specific environment + if env_id < len(current_gear_type): + selected_gear_type = current_gear_type[env_id] + else: + print(f"ERROR: env_id {env_id} is out of bounds for current_gear_type (length: {len(current_gear_type)}), using 'gear_medium' as fallback") + selected_gear_type = 'gear_medium' + selected_asset_name = gear_to_asset_mapping.get(selected_gear_type, 'factory_gear_medium') + + # Get the asset for this specific gear type + try: + asset_2 = env.scene[selected_asset_name] + except KeyError: + # Fallback to factory_gear_medium if the asset doesn't exist + asset_2 = env.scene['factory_gear_medium'] + + # Get poses for this specific environment + curr_pos_2_all[env_id] = asset_2.data.body_pos_w[env_id, 0] # type: ignore + curr_quat_2_all[env_id] = asset_2.data.body_quat_w[env_id, 0] # type: ignore + + # Compute keypoint distance for all environments at once + keypoint_dist_sep = compute_keypoint_distance( + current_pos=curr_pos_1, + current_quat=curr_quat_1, + target_pos=curr_pos_2_all, + target_quat=curr_quat_2_all, + keypoint_scale=keypoint_scale, + add_cube_center_kp=add_cube_center_kp, + device=curr_pos_1.device + ) + else: + # Fallback to factory_gear_medium if _current_gear_type is not available + print("WARNING: env._current_gear_type is not available, using factory_gear_medium as fallback") + asset_2 = env.scene['factory_gear_medium'] + + # get current poses in world frame for asset_2 + curr_pos_2 = asset_2.data.body_pos_w[:, 0] # type: ignore + curr_quat_2 = asset_2.data.body_quat_w[:, 0] # type: ignore + + # compute keypoint distance + keypoint_dist_sep = compute_keypoint_distance( + current_pos=curr_pos_1, + current_quat=curr_quat_1, + target_pos=curr_pos_2, + target_quat=curr_quat_2, + keypoint_scale=keypoint_scale, + add_cube_center_kp=add_cube_center_kp, + device=curr_pos_1.device + ) + + # Return mean distance across keypoints to match expected reward shape (num_envs,) + return keypoint_dist_sep.mean(-1) + + +def keypoint_entity_error_exp( + env: ManagerBasedRLEnv, + asset_cfg_1: SceneEntityCfg, + kp_exp_coeffs: list[tuple[float, float]] = [(1.0, 0.1)], + kp_use_sum_of_exps: bool = True, + keypoint_scale: float = 1.0, + add_cube_center_kp: bool = True, + quat_offset: torch.Tensor | None = None, + pos_offset: torch.Tensor | None = None +) -> torch.Tensor: + """Compute exponential keypoint reward between a RigidObject and the dynamically selected gear. + + The function computes the keypoint distance between the poses of two entities, + then applies an exponential reward function. The reward is computed using the + formula: 1 / (exp(a * distance) + b + exp(-a * distance)) where a and b are coefficients. + + Args: + env: The environment containing the assets + asset_cfg_1: Configuration of the first asset (RigidObject) + kp_exp_coeffs: List of (a, b) coefficient pairs for exponential reward + kp_use_sum_of_exps: Whether to use sum of exponentials (True) or single exponential (False) + keypoint_scale: Scale factor for keypoint offsets + add_cube_center_kp: Whether to include the center keypoint (0, 0, 0) + + Returns: + Exponential keypoint reward tensor of shape (num_envs,) where each element + is the exponential reward value + """ + # extract the assets (to enable type hinting) + asset_1 = env.scene[asset_cfg_1.name] # RigidObject + + # get current poses in world frame for asset_1 + # asset_1 is RigidObject - use body_pos_w and body_quat_w + curr_pos_1 = asset_1.data.body_pos_w[:, 0] # type: ignore + curr_quat_1 = asset_1.data.body_quat_w[:, 0] # type: ignore + + # Apply position offset to asset_1 if provided + if pos_offset is not None: + # Convert list to tensor if needed + if isinstance(pos_offset, list): + pos_offset = torch.tensor(pos_offset, device=curr_pos_1.device, dtype=curr_pos_1.dtype) + # Ensure pos_offset has the right shape (num_envs, 3) + if pos_offset.dim() == 1: + pos_offset = pos_offset.unsqueeze(0).expand(curr_pos_1.shape[0], -1) + # Apply the offset by adding to position + curr_pos_1 = curr_pos_1 + pos_offset + + # Apply quaternion offset to asset_1 if provided + if quat_offset is not None: + # Convert list to tensor if needed + if isinstance(quat_offset, list): + quat_offset = torch.tensor(quat_offset, device=curr_quat_1.device, dtype=curr_quat_1.dtype) + # Ensure quat_offset has the right shape (num_envs, 4) + if quat_offset.dim() == 1: + quat_offset = quat_offset.unsqueeze(0).expand(curr_quat_1.shape[0], -1) + # Apply the offset by quaternion multiplication + # TODO: check if this is correct or if its shoudl be the inverse + curr_quat_1 = quat_mul(quat_offset, curr_quat_1) + + # Handle per-environment gear type selection + if hasattr(env, '_current_gear_type'): + # Get the current gear type for each environment + current_gear_type = env._current_gear_type # type: ignore + + # Create a mapping from gear type to asset name + gear_to_asset_mapping = { + 'gear_small': 'factory_gear_small', + 'gear_medium': 'factory_gear_medium', + 'gear_large': 'factory_gear_large' + } + + # Create arrays to store poses for all environments + curr_pos_2_all = torch.zeros_like(curr_pos_1) + curr_quat_2_all = torch.zeros_like(curr_quat_1) + + # Get poses for each environment based on their gear type + for env_id in range(env.num_envs): + # Get the gear type for this specific environment + if env_id < len(current_gear_type): + selected_gear_type = current_gear_type[env_id] + else: + print(f"ERROR: env_id {env_id} is out of bounds for current_gear_type (length: {len(current_gear_type)}), using 'gear_medium' as fallback") + selected_gear_type = 'gear_medium' + selected_asset_name = gear_to_asset_mapping.get(selected_gear_type, 'factory_gear_medium') + + # Get the asset for this specific gear type + try: + asset_2 = env.scene[selected_asset_name] + except KeyError: + # Fallback to factory_gear_medium if the asset doesn't exist + asset_2 = env.scene['factory_gear_medium'] + + # Get poses for this specific environment + curr_pos_2_all[env_id] = asset_2.data.body_pos_w[env_id, 0] # type: ignore + curr_quat_2_all[env_id] = asset_2.data.body_quat_w[env_id, 0] # type: ignore + + # Compute keypoint distance for all environments at once + keypoint_dist_sep = compute_keypoint_distance( + current_pos=curr_pos_1, + current_quat=curr_quat_1, + target_pos=curr_pos_2_all, + target_quat=curr_quat_2_all, + keypoint_scale=keypoint_scale, + add_cube_center_kp=add_cube_center_kp, + device=curr_pos_1.device + ) + + # compute exponential reward + keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0]) + + if kp_use_sum_of_exps: + # Use sum of exponentials: average across keypoints for each coefficient + for coeff in kp_exp_coeffs: + a, b = coeff + keypoint_reward_exp += (1. / (torch.exp(a * keypoint_dist_sep) + b + torch.exp(-a * keypoint_dist_sep))).mean(-1) + else: + # Use single exponential: average keypoint distance first, then apply exponential + keypoint_dist = keypoint_dist_sep.mean(-1) # shape: (num_envs,) + for coeff in kp_exp_coeffs: + a, b = coeff + keypoint_reward_exp += 1. / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist)) + else: + # Fallback to factory_gear_medium if _current_gear_type is not available + print("WARNING: env._current_gear_type is not available, using factory_gear_medium as fallback") + asset_2 = env.scene['factory_gear_medium'] + + # get current poses in world frame for asset_2 + curr_pos_2 = asset_2.data.body_pos_w[:, 0] # type: ignore + curr_quat_2 = asset_2.data.body_quat_w[:, 0] # type: ignore + + # compute keypoint distance + keypoint_dist_sep = compute_keypoint_distance( + current_pos=curr_pos_1, + current_quat=curr_quat_1, + target_pos=curr_pos_2, + target_quat=curr_quat_2, + keypoint_scale=keypoint_scale, + add_cube_center_kp=add_cube_center_kp, + device=curr_pos_1.device + ) + + # compute exponential reward + keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0]) + + if kp_use_sum_of_exps: + # Use sum of exponentials: average across keypoints for each coefficient + for coeff in kp_exp_coeffs: + a, b = coeff + keypoint_reward_exp += (1. / (torch.exp(a * keypoint_dist_sep) + b + torch.exp(-a * keypoint_dist_sep))).mean(-1) + else: + # Use single exponential: average keypoint distance first, then apply exponential + keypoint_dist = keypoint_dist_sep.mean(-1) # shape: (num_envs,) + for coeff in kp_exp_coeffs: + a, b = coeff + keypoint_reward_exp += 1. / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist)) + + return keypoint_reward_exp From 0fc0098b84fd2d4ed9bb2f4c5bde900e8a92a019 Mon Sep 17 00:00:00 2001 From: Ashwin Varghese Kuruttukulam Date: Tue, 16 Sep 2025 15:09:51 -0700 Subject: [PATCH 02/10] revert rsl_rl change --- scripts/reinforcement_learning/rsl_rl/play.py | 42 +++++-------- .../reinforcement_learning/rsl_rl/train.py | 21 ++++++- .../isaaclab_rl/rsl_rl/vecenv_wrapper.py | 60 +++++++++++++------ .../gear_assembly/gear_assembly_env_cfg.py | 38 ++++++------ 4 files changed, 93 insertions(+), 68 deletions(-) diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py index 11ef7399462..150bbd03492 100644 --- a/scripts/reinforcement_learning/rsl_rl/play.py +++ b/scripts/reinforcement_learning/rsl_rl/play.py @@ -58,7 +58,7 @@ import time import torch -from rsl_rl.runners import DistillationRunner, OnPolicyRunner +from rsl_rl.runners import OnPolicyRunner from isaaclab.envs import ( DirectMARLEnv, @@ -71,7 +71,7 @@ from isaaclab.utils.dict import print_dict from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint -from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx import isaaclab_tasks # noqa: F401 from isaaclab_tasks.utils import get_checkpoint_path @@ -81,14 +81,14 @@ @hydra_task_config(args_cli.task, args_cli.agent) -def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg): """Play with RSL-RL agent.""" # grab task name for checkpoint path task_name = args_cli.task.split(":")[-1] train_task_name = task_name.replace("-Play", "") # override configurations with non-hydra CLI arguments - agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) + agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs # set the environment seed @@ -112,9 +112,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen log_dir = os.path.dirname(resume_path) - # set the log directory for the environment (works for all environment types) - env_cfg.log_dir = log_dir - # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) @@ -139,43 +136,32 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print(f"[INFO]: Loading model checkpoint from: {resume_path}") # load previously trained model - if agent_cfg.class_name == "OnPolicyRunner": - runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) - elif agent_cfg.class_name == "DistillationRunner": - runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) - else: - raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") - runner.load(resume_path) + ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + ppo_runner.load(resume_path) # obtain the trained policy for inference - policy = runner.get_inference_policy(device=env.unwrapped.device) + policy = ppo_runner.get_inference_policy(device=env.unwrapped.device) # extract the neural network module # we do this in a try-except to maintain backwards compatibility. try: # version 2.3 onwards - policy_nn = runner.alg.policy + policy_nn = ppo_runner.alg.policy except AttributeError: # version 2.2 and below - policy_nn = runner.alg.actor_critic - - # extract the normalizer - if hasattr(policy_nn, "actor_obs_normalizer"): - normalizer = policy_nn.actor_obs_normalizer - elif hasattr(policy_nn, "student_obs_normalizer"): - normalizer = policy_nn.student_obs_normalizer - else: - normalizer = None + policy_nn = ppo_runner.alg.actor_critic # export policy to onnx/jit export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") - export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") - export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") + export_policy_as_jit(policy_nn, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt") + export_policy_as_onnx( + policy_nn, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx" + ) dt = env.unwrapped.step_dt # reset environment - obs = env.get_observations() + obs, _ = env.get_observations() timestep = 0 # simulate environment while simulation_app.is_running(): diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index b2056551f0c..0b1d698d2b3 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -10,6 +10,8 @@ import argparse import sys +import rsl_rl + from isaaclab.app import AppLauncher # local imports @@ -56,8 +58,9 @@ from packaging import version # check minimum supported rsl-rl version -RSL_RL_VERSION = "3.0.1" +RSL_RL_VERSION = "2.3.1" installed_version = metadata.version("rsl-rl-lib") +print("installed_version", installed_version) if version.parse(installed_version) < version.parse(RSL_RL_VERSION): if platform.system() == "Windows": cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] @@ -78,7 +81,7 @@ from datetime import datetime import omni -from rsl_rl.runners import DistillationRunner, OnPolicyRunner +from rsl_rl.runners import OnPolicyRunner from isaaclab.envs import ( DirectMARLEnv, @@ -199,6 +202,20 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg) dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg) + import rsl_rl + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + print("[INFO] rsl_rl library location:", rsl_rl.__file__) + # run training runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True) diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index 73ceae04693..d909bf2d912 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -5,7 +5,6 @@ import gymnasium as gym import torch -from tensordict import TensorDict from rsl_rl.env import VecEnv @@ -13,9 +12,16 @@ class RslRlVecEnvWrapper(VecEnv): - """Wraps around Isaac Lab environment for the RSL-RL library + """Wraps around Isaac Lab environment for RSL-RL library + + To use asymmetric actor-critic, the environment instance must have the attributes :attr:`num_privileged_obs` (int). + This is used by the learning agent to allocate buffers in the trajectory memory. Additionally, the returned + observations should have the key "critic" which corresponds to the privileged observations. Since this is + optional for some environments, the wrapper checks if these attributes exist. If they don't then the wrapper + defaults to zero as number of privileged observations. .. caution:: + This class must be the last wrapper in the wrapper chain. This is because the wrapper does not follow the :class:`gym.Wrapper` interface. Any subsequent wrappers will need to be modified to work with this wrapper. @@ -37,14 +43,12 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N Raises: ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`. """ - # check that input is valid if not isinstance(env.unwrapped, ManagerBasedRLEnv) and not isinstance(env.unwrapped, DirectRLEnv): raise ValueError( "The environment must be inherited from ManagerBasedRLEnv or DirectRLEnv. Environment type:" f" {type(env)}" ) - # initialize the wrapper self.env = env self.clip_actions = clip_actions @@ -59,6 +63,20 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N self.num_actions = self.unwrapped.action_manager.total_action_dim else: self.num_actions = gym.spaces.flatdim(self.unwrapped.single_action_space) + if hasattr(self.unwrapped, "observation_manager"): + self.num_obs = self.unwrapped.observation_manager.group_obs_dim["policy"][0] + else: + self.num_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["policy"]) + # -- privileged observations + if ( + hasattr(self.unwrapped, "observation_manager") + and "critic" in self.unwrapped.observation_manager.group_obs_dim + ): + self.num_privileged_obs = self.unwrapped.observation_manager.group_obs_dim["critic"][0] + elif hasattr(self.unwrapped, "num_states") and "critic" in self.unwrapped.single_observation_space: + self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"]) + else: + self.num_privileged_obs = 0 # modify the action space to the clip range self._modify_action_space() @@ -115,6 +133,14 @@ def unwrapped(self) -> ManagerBasedRLEnv | DirectRLEnv: Properties """ + def get_observations(self) -> tuple[torch.Tensor, dict]: + """Returns the current observations of the environment.""" + if hasattr(self.unwrapped, "observation_manager"): + obs_dict = self.unwrapped.observation_manager.compute() + else: + obs_dict = self.unwrapped._get_observations() + return obs_dict["policy"], {"observations": obs_dict} + @property def episode_length_buf(self) -> torch.Tensor: """The episode length buffer.""" @@ -136,20 +162,13 @@ def episode_length_buf(self, value: torch.Tensor): def seed(self, seed: int = -1) -> int: # noqa: D102 return self.unwrapped.seed(seed) - def reset(self) -> tuple[TensorDict, dict]: # noqa: D102 + def reset(self) -> tuple[torch.Tensor, dict]: # noqa: D102 # reset the environment - obs_dict, extras = self.env.reset() - return TensorDict(obs_dict, batch_size=[self.num_envs]), extras - - def get_observations(self) -> TensorDict: - """Returns the current observations of the environment.""" - if hasattr(self.unwrapped, "observation_manager"): - obs_dict = self.unwrapped.observation_manager.compute() - else: - obs_dict = self.unwrapped._get_observations() - return TensorDict(obs_dict, batch_size=[self.num_envs]) + obs_dict, _ = self.env.reset() + # return observations + return obs_dict["policy"], {"observations": obs_dict} - def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]: + def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]: # clip actions if self.clip_actions is not None: actions = torch.clamp(actions, -self.clip_actions, self.clip_actions) @@ -157,12 +176,16 @@ def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.T obs_dict, rew, terminated, truncated, extras = self.env.step(actions) # compute dones for compatibility with RSL-RL dones = (terminated | truncated).to(dtype=torch.long) + # move extra observations to the extras dict + obs = obs_dict["policy"] + extras["observations"] = obs_dict # move time out information to the extras dict # this is only needed for infinite horizon tasks if not self.unwrapped.cfg.is_finite_horizon: extras["time_outs"] = truncated + # return the step information - return TensorDict(obs_dict, batch_size=[self.num_envs]), rew, dones, extras + return obs, rew, dones, extras def close(self): # noqa: D102 return self.env.close() @@ -177,8 +200,7 @@ def _modify_action_space(self): return # modify the action space to the clip range - # note: this is only possible for the box action space. we need to change it in the future for other - # action spaces. + # note: this is only possible for the box action space. we need to change it in the future for other action spaces. self.env.unwrapped.single_action_space = gym.spaces.Box( low=-self.clip_actions, high=self.clip_actions, shape=(self.num_actions,) ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py index 88a3b89009f..bfb5ceedee2 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py @@ -223,28 +223,28 @@ def __post_init__(self): self.enable_corruption = True self.concatenate_terms = True - # @configclass - # class CriticCfg(ObsGroup): - # """Observations for policy group.""" - - # # observation terms (order preserved) - # joint_pos = ObsTerm(func=mdp.joint_pos, - # params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) - # joint_vel = ObsTerm(func=mdp.joint_vel, - # params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) - # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, - # # noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025)) - # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add")) - # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) - # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) - # gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w) - - # gear_pos = ObsTerm(func=mdp.gear_pos_w) - # gear_quat = ObsTerm(func=mdp.gear_quat_w) + @configclass + class CriticCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + joint_pos = ObsTerm(func=mdp.joint_pos, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + joint_vel = ObsTerm(func=mdp.joint_vel, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, + # noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025)) + # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add")) + # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) + gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) + gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w) + + gear_pos = ObsTerm(func=mdp.gear_pos_w) + gear_quat = ObsTerm(func=mdp.gear_quat_w) # observation groups policy: PolicyCfg = PolicyCfg() - # critic: CriticCfg = CriticCfg() + critic: CriticCfg = CriticCfg() @configclass From 3cda66bac766e7a5a1018aef3587bab50b8794d2 Mon Sep 17 00:00:00 2001 From: Ashwin Varghese Kuruttukulam Date: Tue, 16 Sep 2025 15:10:30 -0700 Subject: [PATCH 03/10] fixed_sigma=False, --- .../deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py index 636717b9b55..b322f1a6132 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py @@ -57,6 +57,7 @@ class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg): clip_actions = 1.0 resume = False policy = UpdatedRslRlPpoActorCriticRecurrentCfg( + fixed_sigma=False, init_noise_std=1.0, actor_hidden_dims=[256, 128, 64], critic_hidden_dims=[256, 128, 64], From f5a0c248a5b7f481ccdd31c4ac413f3a0d5ae8a9 Mon Sep 17 00:00:00 2001 From: Ashwin Varghese Kuruttukulam Date: Tue, 16 Sep 2025 15:44:58 -0700 Subject: [PATCH 04/10] cleanup 2 --- .../config/ur_10e/joint_pos_env_cfg.py | 9 -------- .../gear_assembly/gear_assembly_env_cfg.py | 21 ------------------- 2 files changed, 30 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py index 6899beb0512..a95a44c2a67 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py @@ -39,16 +39,11 @@ class EventCfg: robot_joint_stiffness_and_damping = EventTerm( func=mdp.randomize_actuator_gains, - # min_step_count_between_reset=720, - # min_step_count_between_reset=200, mode="reset", params={ - # "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]), # only the arm joints are randomized "stiffness_distribution_params": (0.75, 1.5), "damping_distribution_params": (0.3, 3.0), - # "stiffness_distribution_params": (1.0, 1.0), - # "damping_distribution_params": (1.0, 1.0), "operation": "scale", "distribution": "log_uniform", }, @@ -56,8 +51,6 @@ class EventCfg: joint_friction = EventTerm( func=mdp.randomize_joint_parameters, - # min_step_count_between_reset=720, - # min_step_count_between_reset=200, mode="reset", params={ "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]), @@ -155,8 +148,6 @@ class EventCfg: mode="reset", params={ "robot_asset_cfg": SceneEntityCfg("robot"), - # "pos_offset": [-0.030375, 0.0, -0.255], # Offset from wrist_3_link to gripper position - # "pos_offset": [0.0, 0.030375, -0.26], "rot_offset": [0.0, math.sqrt(2)/2, math.sqrt(2)/2, 0.0], "pos_randomization_range": { "x": [-0.0, 0.0], diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py index bfb5ceedee2..7cd3e2f4867 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py @@ -173,26 +173,6 @@ class GearAssemblySceneCfg(InteractiveSceneCfg): spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0), ) -# @configclass -# class CommandsCfg: -# """Command terms for the MDP.""" - -# ee_pose = mdp.UniformPoseCommandCfg( -# asset_name="robot", -# body_name=MISSING, -# resampling_time_range=(4.0, 4.0), -# debug_vis=True, -# ranges=mdp.UniformPoseCommandCfg.Ranges( -# pos_x=(0.35, 0.65), -# pos_y=(-0.2, 0.2), -# pos_z=(0.15, 0.5), -# roll=(0.0, 0.0), -# pitch=MISSING, # depends on end-effector axis -# yaw=(-3.14, 3.14), -# ), -# ) - - @configclass class ActionsCfg: """Action specifications for the MDP.""" @@ -308,7 +288,6 @@ class GearAssemblyEnvCfg(ManagerBasedRLEnvCfg): # Basic settings observations: ObservationsCfg = ObservationsCfg() actions: ActionsCfg = ActionsCfg() - # commands: CommandsCfg = CommandsCfg() # MDP settings rewards: RewardsCfg = RewardsCfg() terminations: TerminationsCfg = TerminationsCfg() From b175571b55a97d2524eafb31a534ca108020569c Mon Sep 17 00:00:00 2001 From: Ashwin Varghese Kuruttukulam Date: Wed, 17 Sep 2025 09:44:29 -0700 Subject: [PATCH 05/10] remove some cleanup --- .../robots/universal_robots.py | 180 +++++++++++++++--- .../config/ur_10e/joint_pos_env_cfg.py | 35 +--- .../gear_assembly/gear_assembly_env_cfg.py | 38 ++-- 3 files changed, 171 insertions(+), 82 deletions(-) diff --git a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py index 4433b824235..209ebfc107a 100644 --- a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py +++ b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py @@ -55,47 +55,63 @@ UR10e_CFG = ArticulationCfg( spawn=sim_utils.UsdFileCfg( usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/UniversalRobots/ur10e/ur10e.usd", + activate_contact_sensors=True, rigid_props=sim_utils.RigidBodyPropertiesCfg( disable_gravity=True, max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, ), articulation_props=sim_utils.ArticulationRootPropertiesCfg( - enabled_self_collisions=False, solver_position_iteration_count=16, solver_velocity_iteration_count=1 + enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1 ), - activate_contact_sensors=False, + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), ), init_state=ArticulationCfg.InitialStateCfg( joint_pos={ - "shoulder_pan_joint": 3.141592653589793, - "shoulder_lift_joint": -1.5707963267948966, - "elbow_joint": 1.5707963267948966, - "wrist_1_joint": -1.5707963267948966, - "wrist_2_joint": -1.5707963267948966, - "wrist_3_joint": 0.0, + "shoulder_pan_joint": 2.7228e+00, + "shoulder_lift_joint": -8.3962e-01, + "elbow_joint": 1.3684e+00, + "wrist_1_joint": -2.1048e+00, + "wrist_2_joint": -1.5691e+00, + "wrist_3_joint": -1.9896e+00, + "finger_joint": 0.0, }, pos=(0.0, 0.0, 0.0), - rot=(1.0, 0.0, 0.0, 0.0), + rot=(0.0, 0.0, 0.0, 1.0), ), actuators={ # 'shoulder_pan_joint', 'shoulder_lift_joint', 'elbow_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint' "shoulder": ImplicitActuatorCfg( joint_names_expr=["shoulder_.*"], + effort_limit=330.0, + velocity_limit=2.175, stiffness=1320.0, - damping=72.6636085, + damping=72.0, friction=0.0, armature=0.0, ), "elbow": ImplicitActuatorCfg( joint_names_expr=["elbow_joint"], + effort_limit=150.0, + velocity_limit=2.175, stiffness=600.0, - damping=34.64101615, + damping=50.0, friction=0.0, armature=0.0, ), "wrist": ImplicitActuatorCfg( joint_names_expr=["wrist_.*"], + effort_limit=56.0, + velocity_limit=2.175, stiffness=216.0, - damping=29.39387691, + damping=30.0, friction=0.0, armature=0.0, ), @@ -132,34 +148,138 @@ UR10e_ROBOTIQ_GRIPPER_CFG.init_state.joint_pos[".*_inner_finger_pad_joint"] = 0.0 UR10e_ROBOTIQ_GRIPPER_CFG.init_state.joint_pos[".*_outer_.*_joint"] = 0.0 # the major actuator joint for gripper -UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_drive"] = ImplicitActuatorCfg( +UR10e_ROBOTIQ_GRIPPER_CFG.actuators["finger_joint"] = ImplicitActuatorCfg( joint_names_expr=["finger_joint"], - effort_limit_sim=10.0, - velocity_limit_sim=1.0, - stiffness=11.25, - damping=0.1, + effort_limit_sim=1.0, + velocity_limit=2.175, + stiffness=20.0, + damping=8.94, friction=0.0, - armature=0.0, + armature=0.0, # 0.57 ) # the auxiliary actuator joint for gripper -UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_finger"] = ImplicitActuatorCfg( - joint_names_expr=[".*_inner_finger_joint"], +UR10e_ROBOTIQ_GRIPPER_CFG.actuators["others_1"] = ImplicitActuatorCfg( + joint_names_expr=['right_outer_knuckle_joint', 'left_outer_finger_joint'], effort_limit_sim=1.0, - velocity_limit_sim=1.0, - stiffness=0.2, - damping=0.001, + velocity_limit=2.175, + stiffness=0.0, + damping=0.0, friction=0.0, - armature=0.0, + armature=0.0, # 0.57 ) # the passive joints for gripper -UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_passive"] = ImplicitActuatorCfg( - joint_names_expr=[".*_inner_finger_pad_joint", ".*_outer_finger_joint", "right_outer_knuckle_joint"], +UR10e_ROBOTIQ_GRIPPER_CFG.actuators["others_2"] = ImplicitActuatorCfg( + joint_names_expr=['right_outer_finger_joint', 'left_inner_finger_joint', 'right_inner_finger_joint', 'left_inner_finger_pad_joint', 'right_inner_finger_pad_joint'], effort_limit_sim=1.0, - velocity_limit_sim=1.0, - stiffness=0.0, - damping=0.0, + velocity_limit=2.175, + stiffness=400.0, + damping=20.0, friction=0.0, - armature=0.0, + armature=0.0, # 0.57 +) + +UR10e_2f140_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/IsaacARM/Assets/UR10/iakinola/ur10e_robotiq_140_variant.usd", + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Isaac/Samples/Rigging/Manipulator/import_manipulator/ur10e/ur/ur_gripper.usd", + # rigid_props=sim_utils.RigidBodyPropertiesCfg( + # disable_gravity=True, + # max_depenetration_velocity=5.0, + # ), + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1 + ), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + # articulation_props=sim_utils.ArticulationRootPropertiesCfg( + # enabled_self_collisions=False, solver_position_iteration_count=12, solver_velocity_iteration_count=1 + # ), + # activate_contact_sensors=False, + ), + init_state=ArticulationCfg.InitialStateCfg( + joint_pos={ + "shoulder_pan_joint": 2.7228e+00, + "shoulder_lift_joint": -8.3962e-01, + "elbow_joint": 1.3684e+00, + "wrist_1_joint": -2.1048e+00, + "wrist_2_joint": -1.5691e+00, + "wrist_3_joint": -1.9896e+00, + "finger_joint": 0.0, + }, + pos=(0.0, 0.0, 0.0), + # @ireti: Figure out why this rotaion is requried. + # Otherwise the IK during initialization is does not converge. + rot=(0.0, 0.0, 0.0, 1.0), + ), + actuators={ + # @ireti: These values were obtained from 2025-05-30_20-35-06/params/env.yaml + "shoulder": ImplicitActuatorCfg( + joint_names_expr=["shoulder_.*"], + effort_limit=330.0, + velocity_limit=2.175, + stiffness=1320.0, + damping=72.0, + friction=0.0, + armature=0.0, + ), + "elbow": ImplicitActuatorCfg( + joint_names_expr=["elbow_joint"], + effort_limit=150.0, + velocity_limit=2.175, + stiffness=600.0, + damping=50.0, + friction=0.0, + armature=0.0, + ), + "wrist": ImplicitActuatorCfg( + joint_names_expr=["wrist_.*"], + effort_limit=56.0, + velocity_limit=2.175, + stiffness=216.0, + damping=30.0, + friction=0.0, + armature=0.0, + ), + "finger_joint": ImplicitActuatorCfg( + joint_names_expr=["finger_joint"], + effort_limit_sim=1.0, + velocity_limit=2.175, + stiffness=20.0, + damping=8.94, + friction=0.0, + armature=0.0, # 0.57 + ), + "others_1": ImplicitActuatorCfg( + joint_names_expr=['right_outer_knuckle_joint', 'left_outer_finger_joint'], + effort_limit_sim=1.0, + velocity_limit=2.175, + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, # 0.57 + ), + "others_2": ImplicitActuatorCfg( + joint_names_expr=['right_outer_finger_joint', 'left_inner_finger_joint', 'right_inner_finger_joint', 'left_inner_finger_pad_joint', 'right_inner_finger_pad_joint'], + effort_limit_sim=1.0, + velocity_limit=2.175, + stiffness=400.0, + damping=20.0, + friction=0.0, + armature=0.0, # 0.57 + ), + } ) """Configuration of UR-10E arm with Robotiq_2f_140 gripper.""" diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py index a95a44c2a67..8391bb9eb24 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py @@ -8,8 +8,6 @@ import torch from isaaclab.utils import configclass -import isaaclab.sim as sim_utils -from isaaclab.assets.articulation import ArticulationCfg from isaaclab.managers import EventTermCfg as EventTerm from isaaclab.managers import SceneEntityCfg @@ -171,37 +169,8 @@ def __post_init__(self): self.scene.robot = UR10e_ROBOTIQ_GRIPPER_CFG.replace( prim_path="{ENV_REGEX_NS}/Robot", spawn=UR10e_ROBOTIQ_GRIPPER_CFG.spawn.replace( - activate_contact_sensors=True, - rigid_props=sim_utils.RigidBodyPropertiesCfg( - disable_gravity=True, - max_depenetration_velocity=5.0, - linear_damping=0.0, - angular_damping=0.0, - max_linear_velocity=1000.0, - max_angular_velocity=3666.0, - enable_gyroscopic_forces=True, - solver_position_iteration_count=192, - solver_velocity_iteration_count=1, - max_contact_impulse=1e32, - ), - articulation_props=sim_utils.ArticulationRootPropertiesCfg( - enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1 - ), - collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), - ), - init_state=ArticulationCfg.InitialStateCfg( - joint_pos={ - "shoulder_pan_joint": 2.7228e+00, - "shoulder_lift_joint": -8.3962e-01, - "elbow_joint": 1.3684e+00, - "wrist_1_joint": -2.1048e+00, - "wrist_2_joint": -1.5691e+00, - "wrist_3_joint": -1.9896e+00, - "finger_joint": 0.0, - }, - pos=(0.0, 0.0, 0.0), - rot=(0.0, 0.0, 0.0, 1.0), - ), + usd_path=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "assets/ur10e_robotiq_140_variant.usd") + ) ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py index 7cd3e2f4867..ca2ede09e40 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py @@ -203,28 +203,28 @@ def __post_init__(self): self.enable_corruption = True self.concatenate_terms = True - @configclass - class CriticCfg(ObsGroup): - """Observations for policy group.""" - - # observation terms (order preserved) - joint_pos = ObsTerm(func=mdp.joint_pos, - params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) - joint_vel = ObsTerm(func=mdp.joint_vel, - params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) - # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, - # noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025)) - # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add")) - # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) - gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) - gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w) - - gear_pos = ObsTerm(func=mdp.gear_pos_w) - gear_quat = ObsTerm(func=mdp.gear_quat_w) + # @configclass + # class CriticCfg(ObsGroup): + # """Observations for policy group.""" + + # # observation terms (order preserved) + # joint_pos = ObsTerm(func=mdp.joint_pos, + # params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + # joint_vel = ObsTerm(func=mdp.joint_vel, + # params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])}) + # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, + # # noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025)) + # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add")) + # # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) + # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w) + # gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w) + + # gear_pos = ObsTerm(func=mdp.gear_pos_w) + # gear_quat = ObsTerm(func=mdp.gear_quat_w) # observation groups policy: PolicyCfg = PolicyCfg() - critic: CriticCfg = CriticCfg() + # critic: CriticCfg = CriticCfg() @configclass From 1a2858a277eee00931664ce9de25d0a9af4592ae Mon Sep 17 00:00:00 2001 From: Ashwin Varghese Kuruttukulam Date: Wed, 17 Sep 2025 15:21:10 -0700 Subject: [PATCH 06/10] use omniverse assets --- .../gear_assembly/config/ur_10e/joint_pos_env_cfg.py | 6 +++--- .../deploy/gear_assembly/gear_assembly_env_cfg.py | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py index 8391bb9eb24..f44059c4bd9 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py @@ -168,9 +168,9 @@ def __post_init__(self): # switch robot to ur10 with local asset path self.scene.robot = UR10e_ROBOTIQ_GRIPPER_CFG.replace( prim_path="{ENV_REGEX_NS}/Robot", - spawn=UR10e_ROBOTIQ_GRIPPER_CFG.spawn.replace( - usd_path=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "assets/ur10e_robotiq_140_variant.usd") - ) + # spawn=UR10e_ROBOTIQ_GRIPPER_CFG.spawn.replace( + # usd_path=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "assets/ur10e_robotiq_140_variant.usd") + # ) ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py index ca2ede09e40..4e438ac979b 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py @@ -56,7 +56,8 @@ class GearAssemblySceneCfg(InteractiveSceneCfg): prim_path="{ENV_REGEX_NS}/FactoryGearBase", # TODO: change to common isaac sim directory spawn=sim_utils.UsdFileCfg( - usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_base.usd"), + usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_base.usd", + # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_base.usd"), # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_base.usd", # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/gear_base_scale_1.5_usd/gear_base_scale_1.5.usd", activate_contact_sensors=True, @@ -85,7 +86,8 @@ class GearAssemblySceneCfg(InteractiveSceneCfg): # prim_path="{ENV_REGEX_NS}/FactoryGearSmall", # # TODO: change to common isaac sim directory # spawn=sim_utils.UsdFileCfg( - # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_small.usd"), + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_small.usd", + # # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_small.usd"), # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_small.usd", # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/small_gear_scale_1p5_usd/small_gear_scale_1p5.usd", # activate_contact_sensors=True, @@ -112,7 +114,8 @@ class GearAssemblySceneCfg(InteractiveSceneCfg): prim_path="{ENV_REGEX_NS}/FactoryGearMedium", # TODO: change to common isaac sim directory spawn=sim_utils.UsdFileCfg( - usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_medium.usd"), + usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_medium.usd", + # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_medium.usd"), # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_medium.usd", # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/medium_gear_scale_1p5_usd/medium_gear_scale_1p5.usd", activate_contact_sensors=True, @@ -139,7 +142,7 @@ class GearAssemblySceneCfg(InteractiveSceneCfg): # prim_path="{ENV_REGEX_NS}/FactoryGearLarge", # # TODO: change to common isaac sim directory # spawn=sim_utils.UsdFileCfg( - # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_large.usd"), + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_large.usd", # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_large.usd", # # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/large_gear_scale_1p5_usd/large_gear_scale_1p5.usd", # activate_contact_sensors=True, From 037c30b0689249f584bef2ded3494415eb852263 Mon Sep 17 00:00:00 2001 From: Ashwin Varghese Kuruttukulam Date: Wed, 17 Sep 2025 16:19:27 -0700 Subject: [PATCH 07/10] use UR10e_2f140_CFG --- .../deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py index f44059c4bd9..2b48e109d1e 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py @@ -24,7 +24,7 @@ ## # Pre-defined configs ## -from isaaclab_assets.robots.universal_robots import UR10e_ROBOTIQ_GRIPPER_CFG # isort: skip +from isaaclab_assets.robots.universal_robots import UR10e_2f140_CFG # isort: skip ## @@ -166,9 +166,9 @@ def __post_init__(self): super().__post_init__() # switch robot to ur10 with local asset path - self.scene.robot = UR10e_ROBOTIQ_GRIPPER_CFG.replace( + self.scene.robot = UR10e_2f140_CFG.replace( prim_path="{ENV_REGEX_NS}/Robot", - # spawn=UR10e_ROBOTIQ_GRIPPER_CFG.spawn.replace( + # spawn=UR10e_2f140_CFG.spawn.replace( # usd_path=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "assets/ur10e_robotiq_140_variant.usd") # ) ) From 86fc486b1b4342079e7813a91aacf0e4809d9816 Mon Sep 17 00:00:00 2001 From: Ashwin Varghese Kuruttukulam Date: Thu, 18 Sep 2025 13:15:33 -0700 Subject: [PATCH 08/10] use new usd path --- .../isaaclab_assets/isaaclab_assets/robots/universal_robots.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py index 209ebfc107a..053f0ac49ef 100644 --- a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py +++ b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py @@ -180,7 +180,8 @@ UR10e_2f140_CFG = ArticulationCfg( spawn=sim_utils.UsdFileCfg( - usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/IsaacARM/Assets/UR10/iakinola/ur10e_robotiq_140_variant.usd", + usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/ur10e_robotiq_140_variant.usd", + # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/IsaacARM/Assets/UR10/iakinola/ur10e_robotiq_140_variant.usd", # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Isaac/Samples/Rigging/Manipulator/import_manipulator/ur10e/ur/ur_gripper.usd", # rigid_props=sim_utils.RigidBodyPropertiesCfg( # disable_gravity=True, From c4d8324b4bfe7042fe486545d611ae0d92eaee57 Mon Sep 17 00:00:00 2001 From: Ashwin Varghese Kuruttukulam Date: Mon, 29 Sep 2025 11:51:22 -0700 Subject: [PATCH 09/10] revert rls_rl changes --- scripts/reinforcement_learning/rsl_rl/play.py | 42 ++++++++----- .../reinforcement_learning/rsl_rl/train.py | 21 +------ .../isaaclab_rl/rsl_rl/vecenv_wrapper.py | 60 ++++++------------- source/isaaclab_rl/setup.py | 3 +- .../config/ur_10e/agents/rsl_rl_ppo_cfg.py | 7 +-- 5 files changed, 53 insertions(+), 80 deletions(-) diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py index 150bbd03492..11ef7399462 100644 --- a/scripts/reinforcement_learning/rsl_rl/play.py +++ b/scripts/reinforcement_learning/rsl_rl/play.py @@ -58,7 +58,7 @@ import time import torch -from rsl_rl.runners import OnPolicyRunner +from rsl_rl.runners import DistillationRunner, OnPolicyRunner from isaaclab.envs import ( DirectMARLEnv, @@ -71,7 +71,7 @@ from isaaclab.utils.dict import print_dict from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx import isaaclab_tasks # noqa: F401 from isaaclab_tasks.utils import get_checkpoint_path @@ -81,14 +81,14 @@ @hydra_task_config(args_cli.task, args_cli.agent) -def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg): +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): """Play with RSL-RL agent.""" # grab task name for checkpoint path task_name = args_cli.task.split(":")[-1] train_task_name = task_name.replace("-Play", "") # override configurations with non-hydra CLI arguments - agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) + agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs # set the environment seed @@ -112,6 +112,9 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen log_dir = os.path.dirname(resume_path) + # set the log directory for the environment (works for all environment types) + env_cfg.log_dir = log_dir + # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) @@ -136,32 +139,43 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print(f"[INFO]: Loading model checkpoint from: {resume_path}") # load previously trained model - ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) - ppo_runner.load(resume_path) + if agent_cfg.class_name == "OnPolicyRunner": + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + elif agent_cfg.class_name == "DistillationRunner": + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + else: + raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") + runner.load(resume_path) # obtain the trained policy for inference - policy = ppo_runner.get_inference_policy(device=env.unwrapped.device) + policy = runner.get_inference_policy(device=env.unwrapped.device) # extract the neural network module # we do this in a try-except to maintain backwards compatibility. try: # version 2.3 onwards - policy_nn = ppo_runner.alg.policy + policy_nn = runner.alg.policy except AttributeError: # version 2.2 and below - policy_nn = ppo_runner.alg.actor_critic + policy_nn = runner.alg.actor_critic + + # extract the normalizer + if hasattr(policy_nn, "actor_obs_normalizer"): + normalizer = policy_nn.actor_obs_normalizer + elif hasattr(policy_nn, "student_obs_normalizer"): + normalizer = policy_nn.student_obs_normalizer + else: + normalizer = None # export policy to onnx/jit export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") - export_policy_as_jit(policy_nn, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt") - export_policy_as_onnx( - policy_nn, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx" - ) + export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") + export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") dt = env.unwrapped.step_dt # reset environment - obs, _ = env.get_observations() + obs = env.get_observations() timestep = 0 # simulate environment while simulation_app.is_running(): diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index 0b1d698d2b3..a73c3b2881b 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -10,8 +10,6 @@ import argparse import sys -import rsl_rl - from isaaclab.app import AppLauncher # local imports @@ -58,9 +56,8 @@ from packaging import version # check minimum supported rsl-rl version -RSL_RL_VERSION = "2.3.1" +RSL_RL_VERSION = "3.1.0" installed_version = metadata.version("rsl-rl-lib") -print("installed_version", installed_version) if version.parse(installed_version) < version.parse(RSL_RL_VERSION): if platform.system() == "Windows": cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] @@ -81,7 +78,7 @@ from datetime import datetime import omni -from rsl_rl.runners import OnPolicyRunner +from rsl_rl.runners import DistillationRunner, OnPolicyRunner from isaaclab.envs import ( DirectMARLEnv, @@ -202,20 +199,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg) dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg) - import rsl_rl - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - print("[INFO] rsl_rl library location:", rsl_rl.__file__) - # run training runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True) diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index d909bf2d912..73ceae04693 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -5,6 +5,7 @@ import gymnasium as gym import torch +from tensordict import TensorDict from rsl_rl.env import VecEnv @@ -12,16 +13,9 @@ class RslRlVecEnvWrapper(VecEnv): - """Wraps around Isaac Lab environment for RSL-RL library - - To use asymmetric actor-critic, the environment instance must have the attributes :attr:`num_privileged_obs` (int). - This is used by the learning agent to allocate buffers in the trajectory memory. Additionally, the returned - observations should have the key "critic" which corresponds to the privileged observations. Since this is - optional for some environments, the wrapper checks if these attributes exist. If they don't then the wrapper - defaults to zero as number of privileged observations. + """Wraps around Isaac Lab environment for the RSL-RL library .. caution:: - This class must be the last wrapper in the wrapper chain. This is because the wrapper does not follow the :class:`gym.Wrapper` interface. Any subsequent wrappers will need to be modified to work with this wrapper. @@ -43,12 +37,14 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N Raises: ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`. """ + # check that input is valid if not isinstance(env.unwrapped, ManagerBasedRLEnv) and not isinstance(env.unwrapped, DirectRLEnv): raise ValueError( "The environment must be inherited from ManagerBasedRLEnv or DirectRLEnv. Environment type:" f" {type(env)}" ) + # initialize the wrapper self.env = env self.clip_actions = clip_actions @@ -63,20 +59,6 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N self.num_actions = self.unwrapped.action_manager.total_action_dim else: self.num_actions = gym.spaces.flatdim(self.unwrapped.single_action_space) - if hasattr(self.unwrapped, "observation_manager"): - self.num_obs = self.unwrapped.observation_manager.group_obs_dim["policy"][0] - else: - self.num_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["policy"]) - # -- privileged observations - if ( - hasattr(self.unwrapped, "observation_manager") - and "critic" in self.unwrapped.observation_manager.group_obs_dim - ): - self.num_privileged_obs = self.unwrapped.observation_manager.group_obs_dim["critic"][0] - elif hasattr(self.unwrapped, "num_states") and "critic" in self.unwrapped.single_observation_space: - self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"]) - else: - self.num_privileged_obs = 0 # modify the action space to the clip range self._modify_action_space() @@ -133,14 +115,6 @@ def unwrapped(self) -> ManagerBasedRLEnv | DirectRLEnv: Properties """ - def get_observations(self) -> tuple[torch.Tensor, dict]: - """Returns the current observations of the environment.""" - if hasattr(self.unwrapped, "observation_manager"): - obs_dict = self.unwrapped.observation_manager.compute() - else: - obs_dict = self.unwrapped._get_observations() - return obs_dict["policy"], {"observations": obs_dict} - @property def episode_length_buf(self) -> torch.Tensor: """The episode length buffer.""" @@ -162,13 +136,20 @@ def episode_length_buf(self, value: torch.Tensor): def seed(self, seed: int = -1) -> int: # noqa: D102 return self.unwrapped.seed(seed) - def reset(self) -> tuple[torch.Tensor, dict]: # noqa: D102 + def reset(self) -> tuple[TensorDict, dict]: # noqa: D102 # reset the environment - obs_dict, _ = self.env.reset() - # return observations - return obs_dict["policy"], {"observations": obs_dict} + obs_dict, extras = self.env.reset() + return TensorDict(obs_dict, batch_size=[self.num_envs]), extras + + def get_observations(self) -> TensorDict: + """Returns the current observations of the environment.""" + if hasattr(self.unwrapped, "observation_manager"): + obs_dict = self.unwrapped.observation_manager.compute() + else: + obs_dict = self.unwrapped._get_observations() + return TensorDict(obs_dict, batch_size=[self.num_envs]) - def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]: + def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]: # clip actions if self.clip_actions is not None: actions = torch.clamp(actions, -self.clip_actions, self.clip_actions) @@ -176,16 +157,12 @@ def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch obs_dict, rew, terminated, truncated, extras = self.env.step(actions) # compute dones for compatibility with RSL-RL dones = (terminated | truncated).to(dtype=torch.long) - # move extra observations to the extras dict - obs = obs_dict["policy"] - extras["observations"] = obs_dict # move time out information to the extras dict # this is only needed for infinite horizon tasks if not self.unwrapped.cfg.is_finite_horizon: extras["time_outs"] = truncated - # return the step information - return obs, rew, dones, extras + return TensorDict(obs_dict, batch_size=[self.num_envs]), rew, dones, extras def close(self): # noqa: D102 return self.env.close() @@ -200,7 +177,8 @@ def _modify_action_space(self): return # modify the action space to the clip range - # note: this is only possible for the box action space. we need to change it in the future for other action spaces. + # note: this is only possible for the box action space. we need to change it in the future for other + # action spaces. self.env.unwrapped.single_action_space = gym.spaces.Box( low=-self.clip_actions, high=self.clip_actions, shape=(self.num_actions,) ) diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py index a3675ac84e7..1d69ba74a4f 100644 --- a/source/isaaclab_rl/setup.py +++ b/source/isaaclab_rl/setup.py @@ -46,11 +46,10 @@ "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11", "gym", ], # rl-games still needs gym :( - # "rsl-rl": ["rsl-rl-lib==3.0.1"], + "rsl-rl": ["rsl-rl-lib==3.1.0"], } # Add the names with hyphens as aliases for convenience EXTRAS_REQUIRE["rl_games"] = EXTRAS_REQUIRE["rl-games"] -# EXTRAS_REQUIRE["rsl_rl"] = EXTRAS_REQUIRE["rsl-rl"] # Cumulation of all extra-requires EXTRAS_REQUIRE["all"] = list(itertools.chain.from_iterable(EXTRAS_REQUIRE.values())) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py index b322f1a6132..7d84f620e29 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py @@ -10,7 +10,7 @@ @configclass class UpdatedRslRlPpoActorCriticRecurrentCfg(RslRlPpoActorCriticRecurrentCfg): - fixed_sigma = False + state_dependent_std = False share_weights = False @configclass @@ -57,7 +57,7 @@ class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg): clip_actions = 1.0 resume = False policy = UpdatedRslRlPpoActorCriticRecurrentCfg( - fixed_sigma=False, + state_dependent_std=False, init_noise_std=1.0, actor_hidden_dims=[256, 128, 64], critic_hidden_dims=[256, 128, 64], @@ -67,8 +67,7 @@ class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg): rnn_hidden_dim=256, rnn_num_layers=2, ) - algorithm = UpdatedRslRlPpoAlgorithmCfg( - bounds_loss_coef=0.0001, + algorithm = RslRlPpoAlgorithmCfg( value_loss_coef=1.0, use_clipped_value_loss=True, clip_param=0.2, From f805083e78298cedd9d963d8057fa624a98c3940 Mon Sep 17 00:00:00 2001 From: Ashwin Varghese Kuruttukulam Date: Mon, 29 Sep 2025 16:50:59 -0700 Subject: [PATCH 10/10] enable state_dependent_std --- source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py | 3 +++ .../config/ur_10e/agents/rsl_rl_ppo_cfg.py | 13 ++----------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py index 5b03a7c639b..bfc212f4006 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py @@ -31,6 +31,9 @@ class RslRlPpoActorCriticCfg: noise_std_type: Literal["scalar", "log"] = "scalar" """The type of noise standard deviation for the policy. Default is scalar.""" + state_dependent_std: bool = False + """Whether to use state-dependent standard deviation for the policy. Default is False.""" + actor_obs_normalization: bool = MISSING """Whether to normalize the observation for the actor network.""" diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py index 7d84f620e29..7d74c5e7d8e 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py @@ -8,15 +8,6 @@ from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoActorCriticRecurrentCfg, RslRlPpoAlgorithmCfg -@configclass -class UpdatedRslRlPpoActorCriticRecurrentCfg(RslRlPpoActorCriticRecurrentCfg): - state_dependent_std = False - share_weights = False - -@configclass -class UpdatedRslRlPpoAlgorithmCfg(RslRlPpoAlgorithmCfg): - bounds_loss_coef = 0.0 - @configclass class UR10GearAssemblyPPORunnerCfg(RslRlOnPolicyRunnerCfg): num_steps_per_env = 512 @@ -56,8 +47,8 @@ class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg): empirical_normalization = True clip_actions = 1.0 resume = False - policy = UpdatedRslRlPpoActorCriticRecurrentCfg( - state_dependent_std=False, + policy = RslRlPpoActorCriticRecurrentCfg( + state_dependent_std=True, init_noise_std=1.0, actor_hidden_dims=[256, 128, 64], critic_hidden_dims=[256, 128, 64],