From f2fb46479cc00ac9c31c0e41be810dd5f278253d Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Tue, 16 Sep 2025 12:00:28 -0700
Subject: [PATCH 01/10] initial cleanup of single gear insertion

---
 source/isaaclab/isaaclab/envs/mdp/rewards.py  |   6 +
 .../isaaclab/isaaclab/utils/noise/__init__.py |   4 +-
 .../isaaclab/utils/noise/noise_cfg.py         |  12 +
 .../isaaclab/utils/noise/noise_model.py       |  68 +++
 source/isaaclab_rl/setup.py                   |   4 +-
 .../deploy/gear_assembly/__init__.py          |   6 +
 .../deploy/gear_assembly/config/__init__.py   |   9 +
 .../gear_assembly/config/ur_10e/__init__.py   |  34 ++
 .../config/ur_10e/agents/__init__.py          |   4 +
 .../config/ur_10e/agents/rsl_rl_ppo_cfg.py    |  83 ++++
 .../config/ur_10e/joint_pos_env_cfg.py        | 271 +++++++++++
 .../gear_assembly/gear_assembly_env_cfg.py    | 347 ++++++++++++++
 .../manipulation/deploy/mdp/__init__.py       |   2 +
 .../manipulation/deploy/mdp/events.py         | 451 ++++++++++++++++++
 .../manipulation/deploy/mdp/observations.py   | 191 ++++++++
 .../manipulation/deploy/mdp/rewards.py        | 287 +++++++++++
 16 files changed, 1775 insertions(+), 4 deletions(-)
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py

diff --git a/source/isaaclab/isaaclab/envs/mdp/rewards.py b/source/isaaclab/isaaclab/envs/mdp/rewards.py
index 20df2a557eb..4232c0a8895 100644
--- a/source/isaaclab/isaaclab/envs/mdp/rewards.py
+++ b/source/isaaclab/isaaclab/envs/mdp/rewards.py
@@ -241,11 +241,17 @@ def applied_torque_limits(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = Sc
     )
     return torch.sum(out_of_limits, dim=1)
 
+def action_rate(env: ManagerBasedRLEnv) -> torch.Tensor:
+    """Penalize the rate of change of the actions using L2 norm."""
+    return torch.norm(env.action_manager.action - env.action_manager.prev_action, p=2, dim=-1)
 
 def action_rate_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
     """Penalize the rate of change of the actions using L2 squared kernel."""
     return torch.sum(torch.square(env.action_manager.action - env.action_manager.prev_action), dim=1)
 
+def action(env: ManagerBasedRLEnv) -> torch.Tensor:
+    """Penalize the actions using L2 squared kernel (summed for each environment)."""
+    return torch.sum(env.action_manager.action**2, dim=-1)
 
 def action_l2(env: ManagerBasedRLEnv) -> torch.Tensor:
     """Penalize the actions using L2 squared kernel."""
diff --git a/source/isaaclab/isaaclab/utils/noise/__init__.py b/source/isaaclab/isaaclab/utils/noise/__init__.py
index d2f703758b0..ef2cea78582 100644
--- a/source/isaaclab/isaaclab/utils/noise/__init__.py
+++ b/source/isaaclab/isaaclab/utils/noise/__init__.py
@@ -26,8 +26,8 @@
 
 """
 from .noise_cfg import NoiseCfg  # noqa: F401
-from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg
-from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, constant_noise, gaussian_noise, uniform_noise
+from .noise_cfg import ConstantNoiseCfg, GaussianNoiseCfg, NoiseModelCfg, NoiseModelWithAdditiveBiasCfg, UniformNoiseCfg, ResetSampledNoiseModelCfg
+from .noise_model import NoiseModel, NoiseModelWithAdditiveBias, ResetSampledNoiseModel, constant_noise, gaussian_noise, uniform_noise
 
 # Backward compatibility
 ConstantBiasNoiseCfg = ConstantNoiseCfg
diff --git a/source/isaaclab/isaaclab/utils/noise/noise_cfg.py b/source/isaaclab/isaaclab/utils/noise/noise_cfg.py
index 0c49828b3ff..3de46bc89f1 100644
--- a/source/isaaclab/isaaclab/utils/noise/noise_cfg.py
+++ b/source/isaaclab/isaaclab/utils/noise/noise_cfg.py
@@ -109,3 +109,15 @@ class NoiseModelWithAdditiveBiasCfg(NoiseModelCfg):
 
     Defaults to True.
     """
+
+@configclass
+class ResetSampledNoiseModelCfg(NoiseModelCfg):
+    """Configuration for a noise model that samples noise ONLY during reset."""
+
+    class_type: type = noise_model.ResetSampledNoiseModel
+
+    noise_cfg: NoiseCfg = MISSING
+    """The noise configuration for the noise.
+
+    Based on this configuration, the noise is sampled at every reset of the noise model.
+    """
diff --git a/source/isaaclab/isaaclab/utils/noise/noise_model.py b/source/isaaclab/isaaclab/utils/noise/noise_model.py
index dae36b55c72..d780d916067 100644
--- a/source/isaaclab/isaaclab/utils/noise/noise_model.py
+++ b/source/isaaclab/isaaclab/utils/noise/noise_model.py
@@ -189,3 +189,71 @@ def __call__(self, data: torch.Tensor) -> torch.Tensor:
             # now re-sample that expanded bias in-place
             self.reset()
         return super().__call__(data) + self._bias
+
+class ResetSampledNoiseModel(NoiseModel):
+    """Noise model that samples noise ONLY during reset and applies it consistently.
+
+    The noise is sampled from the configured distribution ONLY during reset and applied consistently
+    until the next reset. Unlike regular noise that generates new random values every step,
+    this model maintains the same noise values throughout an episode.
+    """
+
+    def __init__(self, noise_model_cfg: noise_cfg.NoiseModelCfg, num_envs: int, device: str):
+        # initialize parent class
+        super().__init__(noise_model_cfg, num_envs, device)
+        # store the noise configuration
+        self._noise_cfg = noise_model_cfg.noise_cfg
+        self._sampled_noise = torch.zeros((num_envs, 1), device=self._device)
+        self._num_components: int | None = None
+
+    def reset(self, env_ids: Sequence[int] | None = None):
+        """Reset the noise model by sampling NEW noise values.
+
+        This method samples new noise for the specified environments using the configured noise function.
+        The sampled noise will remain constant until the next reset.
+
+        Args:
+            env_ids: The environment ids to reset the noise model for. Defaults to None,
+                in which case all environments are considered.
+        """
+        # resolve the environment ids
+        if env_ids is None:
+            env_ids = slice(None)
+        
+        # Use the existing noise function to sample new noise
+        # Create dummy data to sample from the noise function
+        dummy_data = torch.zeros((env_ids.stop - env_ids.start if isinstance(env_ids, slice) else len(env_ids), 1), 
+                                device=self._device)
+        
+        # Sample noise using the configured noise function
+        sampled_noise = self._noise_model_cfg.noise_cfg.func(dummy_data, self._noise_model_cfg.noise_cfg)
+        
+        self._sampled_noise[env_ids] = sampled_noise
+
+    def __call__(self, data: torch.Tensor) -> torch.Tensor:
+        """Apply the pre-sampled noise to the data.
+
+        This method applies the noise that was sampled during the last reset.
+        No new noise is generated - the same values are used consistently.
+
+        Args:
+            data: The data to apply the noise to. Shape is (num_envs, ...).
+
+        Returns:
+            The data with the noise applied. Shape is the same as the input data.
+        """
+        # on first apply, expand noise to match last dim of data
+        if self._num_components is None:
+            *_, self._num_components = data.shape
+            # expand noise from (num_envs,1) to (num_envs, num_components)
+            self._sampled_noise = self._sampled_noise.repeat(1, self._num_components)
+        
+        # apply the noise based on operation
+        if self._noise_cfg.operation == "add":
+            return data + self._sampled_noise
+        elif self._noise_cfg.operation == "scale":
+            return data * self._sampled_noise
+        elif self._noise_cfg.operation == "abs":
+            return self._sampled_noise
+        else:
+            raise ValueError(f"Unknown operation in noise: {self._noise_cfg.operation}")
diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py
index f9ddcdb0fa5..a3675ac84e7 100644
--- a/source/isaaclab_rl/setup.py
+++ b/source/isaaclab_rl/setup.py
@@ -46,11 +46,11 @@
         "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11",
         "gym",
     ],  # rl-games still needs gym :(
-    "rsl-rl": ["rsl-rl-lib==3.0.1"],
+    # "rsl-rl": ["rsl-rl-lib==3.0.1"],
 }
 # Add the names with hyphens as aliases for convenience
 EXTRAS_REQUIRE["rl_games"] = EXTRAS_REQUIRE["rl-games"]
-EXTRAS_REQUIRE["rsl_rl"] = EXTRAS_REQUIRE["rsl-rl"]
+# EXTRAS_REQUIRE["rsl_rl"] = EXTRAS_REQUIRE["rsl-rl"]
 
 # Cumulation of all extra-requires
 EXTRAS_REQUIRE["all"] = list(itertools.chain.from_iterable(EXTRAS_REQUIRE.values()))
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py
new file mode 100644
index 00000000000..525f8c7d27a
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Assemble 3 gears into a base."""
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py
new file mode 100644
index 00000000000..257dea87052
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Configurations for arm-based gear assembly environments."""
+
+# We leave this file empty since we don't want to expose any configs in this package directly.
+# We still need this file to import the "config" module in the parent package.
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py
new file mode 100644
index 00000000000..0f3d90bbcad
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/__init__.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+
+from . import agents
+##
+# Register Gym environments.
+##
+
+gym.register(
+    id="Isaac-GearAssembly-UR10e-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10eGearAssemblyEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyRNNPPORunnerCfg",
+        # "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10GearAssemblyPPORunnerCfg",
+    },
+)
+
+gym.register(
+    id="Isaac-GearAssembly-UR10e-Play-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10eGearAssemblyEnvCfg_PLAY",
+        # "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        # "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg",
+        # "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+    },
+)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py
new file mode 100644
index 00000000000..bcc238c84a9
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
new file mode 100644
index 00000000000..636717b9b55
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoActorCriticRecurrentCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class UpdatedRslRlPpoActorCriticRecurrentCfg(RslRlPpoActorCriticRecurrentCfg):
+    fixed_sigma = False
+    share_weights = False
+
+@configclass
+class UpdatedRslRlPpoAlgorithmCfg(RslRlPpoAlgorithmCfg):
+    bounds_loss_coef = 0.0
+
+@configclass
+class UR10GearAssemblyPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 512
+    max_iterations = 1500
+    save_interval = 10
+    experiment_name = "gear_assembly_ur10e"
+    empirical_normalization = True
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.0,
+        num_learning_epochs=8,
+        num_mini_batches=8,
+        learning_rate=5.0e-4,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.008,
+        max_grad_norm=1.0,
+    )
+
+
+@configclass
+class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    seed = 7858
+    num_steps_per_env = 512
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "gear_assembly_ur10e"
+    empirical_normalization = True
+    clip_actions = 1.0
+    resume = False
+    policy = UpdatedRslRlPpoActorCriticRecurrentCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        noise_std_type="log",
+        activation="elu",
+        rnn_type="lstm",
+        rnn_hidden_dim=256,
+        rnn_num_layers=2,
+    )
+    algorithm = UpdatedRslRlPpoAlgorithmCfg(
+        bounds_loss_coef=0.0001,
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.0,
+        num_learning_epochs=8,
+        num_mini_batches=16,
+        learning_rate=5.0e-4,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.008,
+        max_grad_norm=1.0,
+    )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
new file mode 100644
index 00000000000..6899beb0512
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
@@ -0,0 +1,271 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import math
+import os
+import torch
+
+from isaaclab.utils import configclass
+import isaaclab.sim as sim_utils
+from isaaclab.assets.articulation import ArticulationCfg
+
+from isaaclab.managers import EventTermCfg as EventTerm
+from isaaclab.managers import SceneEntityCfg
+
+
+import isaaclab_tasks.manager_based.manipulation.deploy.mdp as mdp
+from isaaclab_tasks.manager_based.manipulation.deploy.gear_assembly.gear_assembly_env_cfg import GearAssemblyEnvCfg
+from isaaclab.sensors import FrameTransformerCfg, OffsetCfg
+from isaaclab.markers.config import FRAME_MARKER_CFG
+
+import isaaclab_tasks.manager_based.manipulation.deploy.mdp.events as gear_assembly_events
+
+
+##
+# Pre-defined configs
+##
+from isaaclab_assets.robots.universal_robots import UR10e_ROBOTIQ_GRIPPER_CFG  # isort: skip
+
+
+##
+# Environment configuration
+##
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+
+    robot_joint_stiffness_and_damping = EventTerm(
+        func=mdp.randomize_actuator_gains,
+        # min_step_count_between_reset=720,
+        # min_step_count_between_reset=200,
+        mode="reset",
+        params={
+            # "asset_cfg": SceneEntityCfg("robot", joint_names=".*"),
+            "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]),  # only the arm joints are randomized
+            "stiffness_distribution_params": (0.75, 1.5),
+            "damping_distribution_params": (0.3, 3.0),
+            # "stiffness_distribution_params": (1.0, 1.0),
+            # "damping_distribution_params": (1.0, 1.0),
+            "operation": "scale",
+            "distribution": "log_uniform",
+        },
+    )
+
+    joint_friction = EventTerm(
+        func=mdp.randomize_joint_parameters,
+        # min_step_count_between_reset=720,
+        # min_step_count_between_reset=200,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]),
+            "friction_distribution_params": (0.3, 0.7),
+            "operation": "add",
+            "distribution": "uniform",
+        },
+    )
+
+    medium_gear_physics_material = EventTerm(
+        func=mdp.randomize_rigid_body_material,
+        mode="startup",
+        params={
+            "asset_cfg": SceneEntityCfg("factory_gear_medium", body_names=".*"),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
+            "restitution_range": (0.0, 0.0),
+            "num_buckets": 16,
+        },
+    )
+    gear_base_physics_material = EventTerm(
+        func=mdp.randomize_rigid_body_material,
+        mode="startup",
+        params={
+            "asset_cfg": SceneEntityCfg("factory_gear_base", body_names=".*"),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
+            "restitution_range": (0.0, 0.0),
+            "num_buckets": 16,
+        },
+    )
+
+    robot_physics_material = EventTerm(
+        func=mdp.randomize_rigid_body_material,
+        mode="startup",
+        params={
+            "asset_cfg": SceneEntityCfg("robot", body_names=".*"),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
+            "restitution_range": (0.0, 0.0),
+            "num_buckets": 16,
+        },
+    )
+
+    randomize_gear_type = EventTerm(
+        func=gear_assembly_events.randomize_gear_type,
+        mode="reset",
+        params={
+            # "gear_types": ["gear_small", "gear_medium", "gear_large"]
+            "gear_types": ["gear_medium"]
+        },
+    )
+
+
+    reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")
+
+
+    randomize_gears_and_base_pose = EventTerm(
+        func=gear_assembly_events.randomize_gears_and_base_pose,
+        mode="reset",
+        params={
+            "pose_range": {
+                # "x": [0.0, 0.0],
+                # "y": [0.0, 0.0],
+                # "z": [0.0, 0.0],
+                # "roll": [-0.0, 0.0], # 0 degree
+                # "pitch": [-0.0, 0.0], # 0 degree
+                # "yaw": [-0.0, 0.0], # 0 degree
+                "x": [-0.1, 0.1],
+                "y": [-0.25, 0.25],
+                "z": [-0.1,  0.1],
+                "roll": [-math.pi/90, math.pi/90], # 2 degree
+                "pitch": [-math.pi/90, math.pi/90], # 2 degree
+                "yaw": [-math.pi/6, math.pi/6], # 2 degree
+            },
+            "gear_pos_range": {
+                "x": [-0.02, 0.02],
+                "y": [-0.02, 0.02],
+                "z": [0.0575, 0.0775], # 0.045 + 0.0225
+                # "x": [-0.0, 0.0],
+                # "y": [-0.0, 0.0],
+                # "z": [0.0675, 0.0675],
+            },
+            "rot_randomization_range": {
+                "roll": [-math.pi/36, math.pi/36], # 5 degree
+                "pitch": [-math.pi/36, math.pi/36], # 5 degree
+                "yaw": [-math.pi/36, math.pi/36], # 5 degree
+            },
+            "velocity_range": {},
+        },
+    )
+
+    set_robot_to_grasp_pose = EventTerm(
+        func=gear_assembly_events.set_robot_to_grasp_pose,
+        mode="reset",
+        params={
+            "robot_asset_cfg": SceneEntityCfg("robot"),
+            # "pos_offset": [-0.030375, 0.0, -0.255],  # Offset from wrist_3_link to gripper position
+            # "pos_offset": [0.0, 0.030375, -0.26],
+            "rot_offset": [0.0, math.sqrt(2)/2, math.sqrt(2)/2, 0.0],
+            "pos_randomization_range": {
+                "x": [-0.0, 0.0],
+                "y": [-0.005, 0.005],
+                "z": [-0.003, 0.003]
+            },
+            # "pos_randomization_range": None
+        },
+    )
+
+
+
+
+@configclass
+class UR10eGearAssemblyEnvCfg(GearAssemblyEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+
+        # switch robot to ur10 with local asset path
+        self.scene.robot = UR10e_ROBOTIQ_GRIPPER_CFG.replace(
+            prim_path="{ENV_REGEX_NS}/Robot",
+            spawn=UR10e_ROBOTIQ_GRIPPER_CFG.spawn.replace(
+                activate_contact_sensors=True,
+                rigid_props=sim_utils.RigidBodyPropertiesCfg(
+                    disable_gravity=True,
+                    max_depenetration_velocity=5.0,
+                    linear_damping=0.0,
+                    angular_damping=0.0,
+                    max_linear_velocity=1000.0,
+                    max_angular_velocity=3666.0,
+                    enable_gyroscopic_forces=True,
+                    solver_position_iteration_count=192,
+                    solver_velocity_iteration_count=1,
+                    max_contact_impulse=1e32,
+                ),
+                articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+                    enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1
+                ),
+                collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+            ),
+            init_state=ArticulationCfg.InitialStateCfg(
+                joint_pos={
+                    "shoulder_pan_joint": 2.7228e+00,
+                    "shoulder_lift_joint": -8.3962e-01,
+                    "elbow_joint": 1.3684e+00,
+                    "wrist_1_joint": -2.1048e+00,
+                    "wrist_2_joint": -1.5691e+00,
+                    "wrist_3_joint": -1.9896e+00,
+                    "finger_joint": 0.0,
+                },
+                pos=(0.0, 0.0, 0.0),
+                rot=(0.0, 0.0, 0.0, 1.0),
+            ),
+        )
+
+
+        self.observations.policy.joint_pos.params["asset_cfg"].joint_names = ["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"]
+        self.observations.policy.joint_vel.params["asset_cfg"].joint_names = ["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"]
+
+        # override events
+        self.events = EventCfg()
+
+        # overriderride command generator body
+        self.joint_action_scale = 0.025
+        self.action_scale_joint_space = [self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale, self.joint_action_scale]
+        self.actions.arm_action = mdp.RelativeJointPositionActionCfg(
+            asset_name="robot", joint_names=["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"], scale=self.joint_action_scale, use_zero_offset=True
+        )
+
+        self.obs_order = ["arm_dof_pos", "arm_dof_vel", "shaft_pos", "shaft_quat"]
+        self.policy_action_space = "joint"
+        self.arm_joint_names = [
+            "shoulder_pan_joint",
+            "shoulder_lift_joint",
+            "elbow_joint",
+            "wrist_1_joint",
+            "wrist_2_joint",
+            "wrist_3_joint",
+        ]
+        self.policy_action_space = "joint"
+        self.action_space = 6
+        self.state_space = 0
+        self.observation_space = 19
+
+        # Set joint_action_scale from the existing arm_action.scale
+        self.joint_action_scale = self.actions.arm_action.scale
+
+        self.action_scale_joint_space = [
+            self.joint_action_scale,
+            self.joint_action_scale,
+            self.joint_action_scale,
+            self.joint_action_scale,
+            self.joint_action_scale,
+            self.joint_action_scale,
+        ]
+        self.fixed_asset_init_pos_center = (1.0200, -0.2100, -0.1)
+        self.fixed_asset_init_pos_range = (0.1, 0.25, 0.1)
+        self.fixed_asset_init_orn_deg = (180.0, 0.0, 90.0)
+        self.fixed_asset_init_orn_deg_range = (5.0, 5.0, 30.0)
+
+
+@configclass
+class UR10eGearAssemblyEnvCfg_PLAY(UR10eGearAssemblyEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
new file mode 100644
index 00000000000..88a3b89009f
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -0,0 +1,347 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from dataclasses import MISSING
+import os
+
+import torch
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
+from isaaclab.envs import ManagerBasedRLEnvCfg
+from isaaclab.sensors import FrameTransformerCfg
+from isaaclab.managers import ActionTermCfg as ActionTerm
+from isaaclab.managers import CurriculumTermCfg as CurrTerm
+from isaaclab.managers import EventTermCfg as EventTerm
+
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import RewardTermCfg as RewTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.managers import TerminationTermCfg as DoneTerm
+from isaaclab.sim.simulation_cfg import PhysxCfg, SimulationCfg
+from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.utils import configclass
+from isaaclab.utils.noise import ResetSampledNoiseModelCfg, UniformNoiseCfg
+import isaaclab_tasks.manager_based.manipulation.deploy.mdp as mdp
+from isaaclab_tasks.manager_based.manipulation.deploy.reach.reach_env_cfg import ReachEnvCfg
+
+from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
+
+# Get the directory where this configuration file is located
+CONFIG_DIR = os.path.dirname(os.path.abspath(__file__))
+ASSETS_DIR = os.path.join(CONFIG_DIR, "assets")
+
+##
+# Environment configuration
+##
+
+@configclass
+class GearAssemblySceneCfg(InteractiveSceneCfg):
+    """Configuration for the scene with a robotic arm."""
+    
+    # Disable scene replication to allow USD-level randomization
+    replicate_physics = False
+
+    # world
+    ground = AssetBaseCfg(
+        prim_path="/World/ground",
+        spawn=sim_utils.GroundPlaneCfg(),
+        init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)),
+    )
+
+    factory_gear_base = RigidObjectCfg(
+        prim_path="{ENV_REGEX_NS}/FactoryGearBase",
+        # TODO: change to common isaac sim directory
+        spawn=sim_utils.UsdFileCfg(
+            usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_base.usd"),
+            # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_base.usd",
+            # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/gear_base_scale_1.5_usd/gear_base_scale_1.5.usd",
+            activate_contact_sensors=True,
+            rigid_props=sim_utils.RigidBodyPropertiesCfg(
+                disable_gravity=False,
+                kinematic_enabled=True,
+                max_depenetration_velocity=5.0,
+                linear_damping=0.0,
+                angular_damping=0.0,
+                max_linear_velocity=1000.0,
+                max_angular_velocity=3666.0,
+                enable_gyroscopic_forces=True,
+                solver_position_iteration_count=196,
+                solver_velocity_iteration_count=1,
+                max_contact_impulse=1e32,
+            ),
+            mass_props=sim_utils.MassPropertiesCfg(mass=None),
+            collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+            # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/gear_base_scale_1.5_usd/gear_base_scale_1.5.usd",
+            # rigid_props=sim_utils.RigidBodyPropertiesCfg(kinematic_enabled=True),
+        ),
+        init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)),
+    )
+
+    # factory_gear_small = RigidObjectCfg(
+    #     prim_path="{ENV_REGEX_NS}/FactoryGearSmall",
+    #     # TODO: change to common isaac sim directory
+    #     spawn=sim_utils.UsdFileCfg(
+    #         usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_small.usd"),
+    #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_small.usd",
+    #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/small_gear_scale_1p5_usd/small_gear_scale_1p5.usd",
+    #         activate_contact_sensors=True,
+    #         rigid_props=sim_utils.RigidBodyPropertiesCfg(
+    #             disable_gravity=False,
+    #             kinematic_enabled=False,
+    #             max_depenetration_velocity=5.0,
+    #             linear_damping=0.0,
+    #             angular_damping=0.0,
+    #             max_linear_velocity=1000.0,
+    #             max_angular_velocity=3666.0,
+    #             enable_gyroscopic_forces=True,
+    #             solver_position_iteration_count=196,
+    #             solver_velocity_iteration_count=1,
+    #             max_contact_impulse=1e32,
+    #         ),
+    #         mass_props=sim_utils.MassPropertiesCfg(mass=0.019),
+    #         collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+    #     ),
+    #     init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)),
+    # )
+
+    factory_gear_medium = RigidObjectCfg(
+        prim_path="{ENV_REGEX_NS}/FactoryGearMedium",
+        # TODO: change to common isaac sim directory
+        spawn=sim_utils.UsdFileCfg(
+            usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_medium.usd"),
+            # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_medium.usd",
+            # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/medium_gear_scale_1p5_usd/medium_gear_scale_1p5.usd",
+            activate_contact_sensors=True,
+            rigid_props=sim_utils.RigidBodyPropertiesCfg(
+                disable_gravity=False,
+                kinematic_enabled=False,
+                max_depenetration_velocity=5.0,
+                linear_damping=0.0,
+                angular_damping=0.0,
+                max_linear_velocity=1000.0,
+                max_angular_velocity=3666.0,
+                enable_gyroscopic_forces=True,
+                solver_position_iteration_count=196,
+                solver_velocity_iteration_count=1,
+                max_contact_impulse=1e32,
+            ),
+            mass_props=sim_utils.MassPropertiesCfg(mass=0.019),
+            collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+        ),
+        init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)),
+    )
+
+    # factory_gear_large = RigidObjectCfg(
+    #     prim_path="{ENV_REGEX_NS}/FactoryGearLarge",
+    #     # TODO: change to common isaac sim directory
+    #     spawn=sim_utils.UsdFileCfg(
+    #         usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_large.usd"),
+    #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_large.usd",
+    #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/large_gear_scale_1p5_usd/large_gear_scale_1p5.usd",
+    #         activate_contact_sensors=True,
+    #         rigid_props=sim_utils.RigidBodyPropertiesCfg(
+    #             disable_gravity=False,
+    #             kinematic_enabled=False,
+    #             max_depenetration_velocity=5.0,
+    #             linear_damping=0.0,
+    #             angular_damping=0.0,
+    #             max_linear_velocity=1000.0,
+    #             max_angular_velocity=3666.0,
+    #             enable_gyroscopic_forces=True,
+    #             solver_position_iteration_count=196,
+    #             solver_velocity_iteration_count=1,
+    #             max_contact_impulse=1e32,
+    #         ),
+    #         mass_props=sim_utils.MassPropertiesCfg(mass=0.019),
+    #         collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+    #     ),
+    #     init_state=RigidObjectCfg.InitialStateCfg(pos=(1.0200, -0.2100, -0.1), rot=(-0.70711, 0.0, 0.0, 0.70711)),
+    # )
+    
+
+    # robots
+    robot: ArticulationCfg = MISSING
+
+
+    # lights
+    light = AssetBaseCfg(
+        prim_path="/World/light",
+        spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0),
+    )
+
+# @configclass
+# class CommandsCfg:
+#     """Command terms for the MDP."""
+
+#     ee_pose = mdp.UniformPoseCommandCfg(
+#         asset_name="robot",
+#         body_name=MISSING,
+#         resampling_time_range=(4.0, 4.0),
+#         debug_vis=True,
+#         ranges=mdp.UniformPoseCommandCfg.Ranges(
+#             pos_x=(0.35, 0.65),
+#             pos_y=(-0.2, 0.2),
+#             pos_z=(0.15, 0.5),
+#             roll=(0.0, 0.0),
+#             pitch=MISSING,  # depends on end-effector axis
+#             yaw=(-3.14, 3.14),
+#         ),
+#     )
+
+
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+
+    arm_action: ActionTerm = MISSING
+    gripper_action: ActionTerm | None = None
+
+
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+
+        # observation terms (order preserved)
+        joint_pos = ObsTerm(func=mdp.joint_pos,
+                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+        joint_vel = ObsTerm(func=mdp.joint_vel,
+                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+        gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=ResetSampledNoiseModelCfg(
+            noise_cfg=UniformNoiseCfg(n_min=-0.005, n_max=0.005, operation="add")
+        ))
+        gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)
+
+        def __post_init__(self):
+            self.enable_corruption = True
+            self.concatenate_terms = True
+    
+    # @configclass
+    # class CriticCfg(ObsGroup):
+    #     """Observations for policy group."""
+
+    #     # observation terms (order preserved)
+    #     joint_pos = ObsTerm(func=mdp.joint_pos,
+    #                         params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+    #     joint_vel = ObsTerm(func=mdp.joint_vel,
+    #                         params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w,
+    #     #                          noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025))
+    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add"))
+    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
+    #     gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
+    #     gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)
+
+    #     gear_pos = ObsTerm(func=mdp.gear_pos_w)
+    #     gear_quat = ObsTerm(func=mdp.gear_quat_w)
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+    # critic: CriticCfg = CriticCfg()
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+
+    reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")
+
+    reset_gear = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "pose_range": {
+                "x": [-0.05, 0.05],
+                "y": [-0.05, 0.05],
+                "z": [0.1, 0.15],
+            },
+            "velocity_range": {},
+            "asset_cfg": SceneEntityCfg("factory_gear_small"),
+        },
+    )
+
+
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP."""
+
+    end_effector_gear_keypoint_tracking = RewTerm(
+        func=mdp.keypoint_entity_error,
+        weight=-1.5,
+        params={
+            "asset_cfg_1": SceneEntityCfg("factory_gear_base"), 
+            "keypoint_scale": 0.15,
+        },
+    )
+
+    end_effector_gear_keypoint_tracking_exp = RewTerm(
+        func=mdp.keypoint_entity_error_exp,
+        weight=1.5,
+        params={
+            "asset_cfg_1": SceneEntityCfg("factory_gear_base"), 
+            "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
+            "kp_use_sum_of_exps": False,
+            "keypoint_scale": 0.15,
+        },
+    )
+
+    action_rate = RewTerm(func=mdp.action_rate, weight=-5.0e-06)
+
+
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+
+@configclass
+class GearAssemblyEnvCfg(ManagerBasedRLEnvCfg):
+    # Scene settings
+    scene: GearAssemblySceneCfg = GearAssemblySceneCfg(num_envs=4096, env_spacing=2.5)
+    # Basic settings
+    observations: ObservationsCfg = ObservationsCfg()
+    actions: ActionsCfg = ActionsCfg()
+    # commands: CommandsCfg = CommandsCfg()
+    # MDP settings
+    rewards: RewardsCfg = RewardsCfg()
+    terminations: TerminationsCfg = TerminationsCfg()
+    events: EventCfg = EventCfg()
+    sim: SimulationCfg = SimulationCfg(
+
+        physx=PhysxCfg(
+            gpu_collision_stack_size=2**31,  # Important to prevent collisionStackSize buffer overflow in contact-rich environments.
+            gpu_max_rigid_contact_count=2**23,
+            gpu_max_rigid_patch_count=2**23
+        ),
+    )
+    
+    def __post_init__(self):
+        """Post initialization."""
+        # general settings
+        self.episode_length_s = 6.66
+        self.viewer.eye = (3.5, 3.5, 3.5)
+        # simulation settings
+        self.decimation = 2
+        self.sim.render_interval = self.decimation
+        self.sim.dt = 1.0 / 120.0
+
+        self.hand_close_pos = 0.7
+
+        self.gear_offsets = {'gear_small': [0.076125, 0.0, 0.0],
+                            'gear_medium': [0.030375, 0.0, 0.0],
+                            'gear_large': [-0.045375, 0.0, 0.0]}
+        
+        self.gear_offsets_grasp = {'gear_small': [0.0, 0.076125, -0.26],
+                            'gear_medium': [0.0, 0.030375, -0.26],
+                            'gear_large': [0.0, -0.045375, -0.26]}
+
+        self.hand_grasp_pos = {"gear_small": 0.67,
+                               "gear_medium": 0.54,
+                               "gear_large": 0.52}
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py
index 6686f9f5276..57059c62dac 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.py
@@ -8,3 +8,5 @@
 from isaaclab.envs.mdp import *  # noqa: F401, F403
 
 from .rewards import *  # noqa: F401, F403
+from .events import *  # noqa: F401, F403
+from .observations import *  # noqa: F401, F403
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
new file mode 100644
index 00000000000..1e70454510e
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
@@ -0,0 +1,451 @@
+# Copyright (c) 2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Functions specific to the in-hand dexterous manipulation environments."""
+
+
+from __future__ import annotations
+
+import carb
+import time
+import torch
+from typing import TYPE_CHECKING, Literal, Optional, List
+
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg
+from isaaclab.utils.math import sample_uniform
+import isaaclab.utils.math as math_utils
+import isaaclab.sim as sim_utils
+
+from isaaclab_tasks.direct.automate import factory_control as fc
+import random
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedEnv
+
+
+def set_default_joint_pose(
+    env: ManagerBasedEnv,
+    env_ids: torch.Tensor,
+    default_pose: torch.Tensor,
+    asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+):
+    # Set the default pose for robots in all envs
+    asset = env.scene[asset_cfg.name]
+    asset.data.default_joint_pos = torch.tensor(default_pose, device=env.device).repeat(env.num_envs, 1)
+
+def randomize_gear_type(
+    env: ManagerBasedEnv,
+    env_ids: torch.Tensor,
+    gear_types: List[str] = ["gear_small", "gear_medium", "gear_large"]
+):
+    """Randomize the gear type being used for the specified environments.
+    
+    Args:
+        env: The environment containing the assets
+        env_ids: Environment IDs to randomize
+        gear_types: List of available gear types to choose from
+    """
+    # Randomly select gear type for each environment
+    selected_gears = [random.choice(gear_types) for _ in range(len(env_ids))]
+    
+    # Store the selected gear type in the environment instance
+    # This will be used by the observation functions
+    if not hasattr(env, '_current_gear_type'):
+        env._current_gear_type = ["gear_medium"] * env.num_envs
+    
+    for i, env_id in enumerate(env_ids):
+        env._current_gear_type[env_id] = selected_gears[i]
+    # print(f"env._current_gear_type: {env._current_gear_type}")
+
+def set_robot_to_grasp_pose(
+    env: ManagerBasedEnv,
+    env_ids: torch.Tensor,
+    robot_asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+    pos_threshold: float = 1e-6,
+    rot_threshold: float = 1e-6,
+    max_iterations: int = 10,
+    rot_offset: Optional[list[float]] = None,
+    pos_randomization_range: Optional[dict] = None,
+    num_arm_joints: int = 6
+):
+    # Convert rotation offset to tensor if provided
+    if rot_offset is not None:
+        rot_offset_tensor = torch.tensor(rot_offset, device=env.device).unsqueeze(0).expand(len(env_ids), -1)
+    else:
+        rot_offset_tensor = None
+    
+    # Check if environment has gear type selection
+    if not hasattr(env, '_current_gear_type'):
+        raise ValueError("Environment does not have '_current_gear_type' attribute. Ensure randomize_gear_type event is configured.")
+
+    i = 0
+    robot_asset: Articulation = env.scene[robot_asset_cfg.name]
+    joint_pos = robot_asset.data.joint_pos[env_ids].clone()
+    joint_vel = robot_asset.data.joint_vel[env_ids].clone()
+
+    while i < max_iterations:
+        robot_asset: Articulation = env.scene[robot_asset_cfg.name]
+        # Add gaussian noise to joint states
+        joint_pos = robot_asset.data.joint_pos[env_ids].clone()
+        joint_vel = robot_asset.data.joint_vel[env_ids].clone()
+
+        # Get gear positions and orientations for each environment based on selected gear type
+        env_ids_list = env_ids.tolist() if isinstance(env_ids, torch.Tensor) else list(env_ids)
+        grasp_object_pos_world = torch.zeros(len(env_ids), 3, device=env.device)
+        grasp_object_quat = torch.zeros(len(env_ids), 4, device=env.device)
+        
+        for row_idx, env_id in enumerate(env_ids_list):
+            # Get the gear type for this environment
+            gear_key = env._current_gear_type[env_id]
+            selected_asset_name = f"factory_{gear_key}"
+            
+            # Get the gear asset for this environment
+            gear_asset: RigidObject = env.scene[selected_asset_name]
+            grasp_object_pos_world[row_idx] = gear_asset.data.root_link_pos_w[env_id].clone()
+            grasp_object_quat[row_idx] = gear_asset.data.root_link_quat_w[env_id].clone()
+
+        
+        # First apply rotation offset to get the object's orientation
+        if rot_offset_tensor is not None:
+            # Apply rotation offset by quaternion multiplication
+            # rot_offset is assumed to be in quaternion format (w, x, y, z)
+            grasp_object_quat = math_utils.quat_mul(grasp_object_quat, rot_offset_tensor)
+            # print(f"Applied rot_offset: {rot_offset}")
+            # print(f"grasp_object_quat after offset: {grasp_object_quat}")
+        
+        if pos_randomization_range is not None:
+            pos_keys = ["x", "y", "z"]
+            range_list_pos = [pos_randomization_range.get(key, (0.0, 0.0)) for key in pos_keys]
+            ranges_pos = torch.tensor(range_list_pos, device=env.device)
+            rand_pos_offsets = math_utils.sample_uniform(ranges_pos[:, 0], ranges_pos[:, 1], (len(env_ids), 3), device=env.device)
+        # Apply gear-specific grasp offsets to each environment
+        for row_idx, env_id in enumerate(env_ids_list):
+            gear_key = env._current_gear_type[env_id]
+            gear_grasp_offset = env.cfg.gear_offsets_grasp[gear_key]
+            grasp_offset_tensor = torch.tensor(gear_grasp_offset, device=env.device, dtype=grasp_object_pos_world.dtype)
+
+            if pos_randomization_range is not None:
+                grasp_offset_tensor += rand_pos_offsets[row_idx]
+            
+            # Transform position offset from rotated object frame to world frame
+            grasp_object_pos_world[row_idx] = grasp_object_pos_world[row_idx] + math_utils.quat_apply(
+                grasp_object_quat[row_idx:row_idx+1], grasp_offset_tensor.unsqueeze(0)
+            )[0]
+        
+        # Convert to environment-relative coordinates by subtracting environment origins
+        grasp_object_pos = grasp_object_pos_world
+        
+        # Get end effector pose of the robot
+        # Get the specific wrist_3_link pose
+        try:
+            # Find the index of the wrist_3_link body
+            wrist_3_indices, wrist_3_names = robot_asset.find_bodies(["wrist_3_link"])
+            wrist_3_idx = wrist_3_indices[0]
+            
+            if len(wrist_3_indices) == 1:
+                wrist_3_idx = wrist_3_indices[0]  # Get the first (and should be only) index
+                
+                # Get the specific wrist_3_link pose
+                eef_pos_world = robot_asset.data.body_link_pos_w[env_ids, wrist_3_idx]  # Shape: (len(env_ids), 3)
+                eef_quat = robot_asset.data.body_link_quat_w[env_ids, wrist_3_idx]  # Shape: (len(env_ids), 4)
+                
+                # Convert to environment-relative coordinates
+                eef_pos = eef_pos_world
+
+
+                # You can also get the full pose as [pos, quat] (7-dimensional)
+                eef_pos = robot_asset.data.body_pos_w[env_ids, wrist_3_idx]
+                eef_quat = robot_asset.data.body_quat_w[env_ids, wrist_3_idx]
+                # print(f"eef_pos: {eef_pos}")
+                # print(f"eef_quat: {eef_quat}")
+            elif len(wrist_3_indices) > 1:
+                print("wrist_3_link found multiple times in robot body names")
+                print(f"Available body names: {robot_asset.body_names}")
+            else:
+                print("wrist_3_link not found in robot body names")
+                print(f"Available body names: {robot_asset.body_names}")
+                
+        except Exception as e:
+            print(f"Could not get end effector pose: {e}")
+            print("You may need to adjust the body name or access method based on your robot configuration")
+        
+
+        # Compute error to target using wrist_3_link as current and grasp_object as target
+        if len(wrist_3_indices) > 0:
+            # Get current end effector pose (wrist_3_link)
+            current_eef_pos = eef_pos  # wrist_3_link position
+            current_eef_quat = eef_quat  # wrist_3_link orientation
+            
+            # Get target pose (grasp object)
+            target_eef_pos = grasp_object_pos  # grasp object position
+            target_eef_quat = grasp_object_quat  # grasp object orientation
+
+            # print(f"current_eef_pos: {current_eef_pos[0]}")
+            # print(f"current_eef_quat: {current_eef_quat[0]}")
+            # print(f"target_eef_pos: {target_eef_pos[0]}")
+            # print(f"target_eef_quat: {target_eef_quat[0]}")
+
+            # Compute pose error
+            pos_error, axis_angle_error = fc.get_pose_error(
+                fingertip_midpoint_pos=current_eef_pos,
+                fingertip_midpoint_quat=current_eef_quat,
+                ctrl_target_fingertip_midpoint_pos=target_eef_pos,
+                ctrl_target_fingertip_midpoint_quat=target_eef_quat,
+                jacobian_type='geometric',
+                rot_error_type='axis_angle')
+            delta_hand_pose = torch.cat((pos_error, axis_angle_error), dim=-1)
+            # Check if pose error is within threshold
+            pos_error_norm = torch.norm(pos_error, dim=-1)
+            rot_error_norm = torch.norm(axis_angle_error, dim=-1)
+            
+            # print(f"Pose error - position: {pos_error[0]}")
+            # print(f"Pose error - orientation: {axis_angle_error[0]}")
+            # print(f"Position error norm: {pos_error_norm[0]}")
+            # print(f"Rotation error norm: {rot_error_norm[0]}")
+            
+            # Check if all environments have converged
+            if torch.all(pos_error_norm < pos_threshold) and torch.all(rot_error_norm < rot_threshold):
+                # print(f"Converged after {i} iterations!")
+                # print(f"pos_error_norm: {pos_error_norm}")
+                # print(f"rot_error_norm: {rot_error_norm}")
+                # print(f"pos_error: {pos_error}")
+                # print(f"axis_angle_error: {axis_angle_error}")
+                break
+
+            # Solve DLS problem for inverse kinematics
+            # Get jacobian for the wrist_3_link using the same approach as task_space_actions.py
+            try:
+                # Get all jacobians from the robot
+                jacobians = robot_asset.root_physx_view.get_jacobians().clone()
+				
+                # Select the jacobian for the wrist_3_link body
+                # For fixed-base robots, Jacobian excludes the base body (index 0)
+                # So if wrist_3_idx is 6, the Jacobian index should be 5
+                jacobi_body_idx = wrist_3_idx - 1
+                
+                
+                jacobian = jacobians[env_ids, jacobi_body_idx, :, :]  # Only first 6 joints (arm, not gripper)
+                
+                delta_dof_pos = fc._get_delta_dof_pos(
+                    delta_pose=delta_hand_pose,
+                    ik_method="dls",
+                    jacobian=jacobian,
+                    device=env.device,
+                )
+                
+                # Update joint positions - only update arm joints (first 6)
+                joint_pos += delta_dof_pos
+                joint_vel = torch.zeros_like(joint_pos)
+
+                # Set into the physics simulation
+
+                
+            except Exception as e:
+                print(f"Error in IK computation: {e}")
+                print("Note: You may need to implement proper jacobian computation for your robot")
+
+        # Set into the physics simulation
+        robot_asset.set_joint_position_target(joint_pos, env_ids=env_ids)
+        robot_asset.set_joint_velocity_target(joint_vel, env_ids=env_ids)
+        robot_asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
+
+
+        # print(f"delta_dof_pos.abs().max(): {delta_dof_pos.abs().max()}")
+
+        # if delta_dof_pos.abs().max() < 1e-5:
+        #     break
+
+        # print(f"Step {i}")
+        
+        i += 1
+    # if i >= max_iterations:
+    #     print(f"IK did not converge after {max_iterations} iterations")
+    #     # print(f"pos_error_norm: {pos_error_norm}")
+    #     # print(f"rot_error_norm: {rot_error_norm}")
+    #     # print(f"pos_error: {pos_error}")
+    #     # print(f"axis_angle_error: {axis_angle_error}")
+    #     # raise RuntimeError(f"IK did not converge after {max_iterations} iterations")
+
+    # # print(f"delta_dof_pos.abs().max(): {delta_dof_pos.abs().max()}")
+    # for i in range(10):
+    #     env.sim.render()
+    #     input("Going to set object position...")
+    
+    # TODO: Resttting the gear based on teh IK solution is not working as expected. The gear is not being placed in the correct position.
+    # # Set the grasp object's pose to the current end-effector pose in world coordinates (with optional offsets)
+    # # The offsets should be applied in reverse to get from wrist_3_link to gear pose
+    # gear_pos_world = current_eef_pos
+    # gear_quat_world = current_eef_quat
+    
+    # # Apply position offset (subtract to get from wrist_3_link to gear position)
+    # if pos_offset_tensor is not None:
+    #     gear_pos_world = gear_pos_world - pos_offset_tensor
+    
+    # # Apply rotation offset (inverse to get from wrist_3_link to gear orientation)
+    # if rot_offset_tensor is not None:
+    #     # Apply inverse rotation offset
+    #     gear_quat_world = math_utils.quat_mul(current_eef_quat, math_utils.quat_conjugate(rot_offset_tensor))
+    
+    # for i in range(10):
+    #     env.sim.render()
+    #     input("Press Enter to continue 1...")
+    
+    # # Set the grasp object's pose for each environment based on selected gear type
+    # for row_idx, env_id in enumerate(env_ids_list):
+    #     gear_key = env._current_gear_type[env_id]
+    #     selected_asset_name = f"factory_{gear_key}"
+    #     gear_asset: RigidObject = env.scene[selected_asset_name]
+    #     gear_asset.write_root_pose_to_sim(
+    #         torch.cat([gear_pos_world[row_idx:row_idx+1], gear_quat_world[row_idx:row_idx+1]], dim=-1), 
+    #         env_ids=torch.tensor([env_id], device=env.device)
+    #     )
+
+    # Close the gripper by setting the finger_joint based on gear type
+    all_joints, all_joints_names = robot_asset.find_joints([".*"])
+    
+    joint_pos = robot_asset.data.joint_pos[env_ids].clone()
+    
+    # @ireti: We need to change this so it workes with teh public 2f140 gripper that uses mimic joints
+    finger_joints = all_joints[num_arm_joints:]
+    finger_joint_names = all_joints_names[num_arm_joints:]
+
+    # for i in range(10):
+    #     env.sim.render()
+    #     input("Press Enter to continue 2...")
+
+    hand_grasp_pos = env.cfg.hand_grasp_pos[gear_key]
+    set_finger_joint_pos_2f_140(joint_pos, env_ids_list, finger_joints, hand_grasp_pos)
+
+    robot_asset.set_joint_position_target(joint_pos, joint_ids=all_joints, env_ids=env_ids)
+    robot_asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
+
+    # for i in range(10):
+    #     env.sim.render()
+    #     input("Press Enter to continue 3...")
+
+    # Set finger joints based on gear type for each environment
+    hand_close_pos = env.cfg.hand_close_pos
+    set_finger_joint_pos_2f_140(joint_pos, env_ids_list, finger_joints, hand_close_pos)
+
+    robot_asset.set_joint_position_target(joint_pos, joint_ids=all_joints, env_ids=env_ids)
+
+    # for i in range(10):
+    #     env.sim.render()
+    #     input("Press Enter to continue 4...")
+
+def randomize_gears_and_base_pose(
+    env: ManagerBasedEnv,
+    env_ids: torch.Tensor,
+    pose_range: dict = {},
+    velocity_range: dict = {},
+    gear_pos_range: dict = {},
+    rot_randomization_range: dict = {},
+
+):
+    """Randomize both the gear base pose and the poses of all gear types with the same value,
+    then apply the per-env `gear_pos_range` only to the gear selected by `randomize_gear_type`.
+    """
+    if not hasattr(env, '_current_gear_type'):
+        raise ValueError("Environment does not have '_current_gear_type' attribute. Ensure randomize_gear_type event is configured.")
+
+    device = env.device
+
+    # Shared pose samples for all assets (base and all gears)
+    pose_keys = ["x", "y", "z", "roll", "pitch", "yaw"]
+    range_list_pose = [pose_range.get(key, (0.0, 0.0)) for key in pose_keys]
+    ranges_pose = torch.tensor(range_list_pose, device=device)
+    rand_pose_samples = math_utils.sample_uniform(ranges_pose[:, 0], ranges_pose[:, 1], (len(env_ids), 6), device=device)
+
+    orientations_delta = math_utils.quat_from_euler_xyz(
+        rand_pose_samples[:, 3], rand_pose_samples[:, 4], rand_pose_samples[:, 5]
+    )
+
+    # Shared velocity samples for all assets (base and all gears)
+    range_list_vel = [velocity_range.get(key, (0.0, 0.0)) for key in pose_keys]
+    ranges_vel = torch.tensor(range_list_vel, device=device)
+    rand_vel_samples = math_utils.sample_uniform(ranges_vel[:, 0], ranges_vel[:, 1], (len(env_ids), 6), device=device)
+
+    # Prepare assets: base + all possible gears (only those present in scene will be processed)
+    base_asset_name = "factory_gear_base"
+    possible_gear_assets = [
+        # "factory_gear_small",
+        "factory_gear_medium",
+        # "factory_gear_large",
+        # "table",
+    ]
+
+
+    positions_by_asset = {}
+    orientations_by_asset = {}
+    velocities_by_asset = {}
+
+    # Combine base and gear assets into one pass to avoid duplication
+    asset_names_to_process = [base_asset_name] + possible_gear_assets
+    for asset_name in asset_names_to_process:
+        asset: RigidObject | Articulation = env.scene[asset_name]
+        root_states = asset.data.default_root_state[env_ids].clone()
+        positions = root_states[:, 0:3] + env.scene.env_origins[env_ids] + rand_pose_samples[:, 0:3]
+        orientations = math_utils.quat_mul(root_states[:, 3:7], orientations_delta)
+        velocities = root_states[:, 7:13] + rand_vel_samples
+        positions_by_asset[asset_name] = positions
+        orientations_by_asset[asset_name] = orientations
+        velocities_by_asset[asset_name] = velocities
+
+    # Per-env gear offset (gear_pos_range) applied only to the selected gear
+    range_list_gear = [gear_pos_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z"]]
+    ranges_gear = torch.tensor(range_list_gear, device=device)
+    rand_gear_offsets = math_utils.sample_uniform(ranges_gear[:, 0], ranges_gear[:, 1], (len(env_ids), 3), device=device)
+
+    # Per-env gear orientation offset (rot_randomization_range) applied only to the selected gear
+    range_list_rot = [rot_randomization_range.get(key, (0.0, 0.0)) for key in ["roll", "pitch", "yaw"]]
+    ranges_rot = torch.tensor(range_list_rot, device=device)
+    rand_rot_offsets = math_utils.sample_uniform(ranges_rot[:, 0], ranges_rot[:, 1], (len(env_ids), 3), device=device)
+    rand_rot_quats = math_utils.quat_from_euler_xyz(rand_rot_offsets[:, 0], rand_rot_offsets[:, 1], rand_rot_offsets[:, 2])
+
+    env_ids_list = env_ids.tolist() if isinstance(env_ids, torch.Tensor) else list(env_ids)
+    for row_idx, env_id in enumerate(env_ids_list):
+        # env._current_gear_type[env_id] is expected to be one of: "gear_small", "gear_medium", "gear_large"
+        gear_key = env._current_gear_type[env_id]
+        selected_asset_name = f"factory_{gear_key}"
+        if selected_asset_name in positions_by_asset:
+            positions_by_asset[selected_asset_name][row_idx] = (
+                positions_by_asset[selected_asset_name][row_idx] + rand_gear_offsets[row_idx]
+            )
+            # Apply additional orientation randomization to the selected gear
+            orientations_by_asset[selected_asset_name][row_idx] = math_utils.quat_mul(
+                orientations_by_asset[selected_asset_name][row_idx], rand_rot_quats[row_idx]
+            )
+
+    # Write back to sim for all prepared assets
+    for asset_name in positions_by_asset.keys():
+        asset = env.scene[asset_name]
+        positions = positions_by_asset[asset_name]
+        orientations = orientations_by_asset[asset_name]
+        velocities = velocities_by_asset[asset_name]
+        asset.write_root_pose_to_sim(torch.cat([positions, orientations], dim=-1), env_ids=env_ids)
+        asset.write_root_velocity_to_sim(velocities, env_ids=env_ids)
+
+def set_finger_joint_pos_2f_140(
+    joint_pos: torch.Tensor,
+    env_ids_list: list[int],
+    finger_joints: list[int],
+    finger_joint_position: float,
+):
+    # Get hand close positions for each environment based on gear type
+    for row_idx, env_id in enumerate(env_ids_list):
+        
+        joint_pos[row_idx, finger_joints[0]] = finger_joint_position
+        joint_pos[row_idx, finger_joints[1]] = finger_joint_position
+        
+        # outer finger joints
+        joint_pos[row_idx, finger_joints[2]] = 0
+        joint_pos[row_idx, finger_joints[3]] = 0
+
+        joint_pos[row_idx, finger_joints[4]] = -finger_joint_position
+        joint_pos[row_idx, finger_joints[5]] = -finger_joint_position
+
+        joint_pos[row_idx, finger_joints[6]] = finger_joint_position
+        joint_pos[row_idx, finger_joints[7]] = finger_joint_position
\ No newline at end of file
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py
new file mode 100644
index 00000000000..a434018bea0
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py
@@ -0,0 +1,191 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import torch
+from typing import TYPE_CHECKING
+
+import isaaclab.utils.math as math_utils
+from isaaclab.assets import RigidObject
+from isaaclab.managers import SceneEntityCfg
+from isaacsim.core.utils.torch.transformations import tf_combine
+
+if TYPE_CHECKING:
+    from isaaclab.envs import ManagerBasedRLEnv
+
+
+def gear_shaft_pos_w(
+    env: ManagerBasedRLEnv, 
+    asset_cfg: SceneEntityCfg = SceneEntityCfg("factory_gear_base")
+) -> torch.Tensor:
+    """Gear shaft position in world frame with offset applied.
+    
+    Args:
+        env: The environment containing the assets
+        asset_cfg: Configuration of the gear base asset
+        
+    Returns:
+        Gear shaft position tensor of shape (num_envs, 3)
+    """
+    # extract the asset (to enable type hinting)
+    asset: RigidObject = env.scene[asset_cfg.name]
+    
+    # get base gear position and orientation
+    base_pos = asset.data.root_pos_w
+    base_quat = asset.data.root_quat_w
+    
+
+    # get current gear type from environment, use default if not set
+    if not hasattr(env, '_current_gear_type'):
+        print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.")
+        default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium')
+        current_gear_type = [default_gear_type] * env.num_envs
+    else:
+        current_gear_type = env._current_gear_type  # type: ignore
+    
+    # get offset for the specific gear type
+    gear_offsets = getattr(env.cfg, 'gear_offsets', {})
+    
+    # Initialize shaft positions
+    shaft_pos = torch.zeros_like(base_pos)
+    
+    # Apply offsets for each environment based on their gear type
+    for i in range(env.num_envs):
+        # gear_type = current_gear_type[i] if i < len(current_gear_type) else "gear_medium"
+        gear_type = current_gear_type[i]
+        
+        offset = torch.tensor(gear_offsets[gear_type], device=base_pos.device, dtype=base_pos.dtype)
+        offset_pos = offset.unsqueeze(0)
+        shaft_pos[i] = base_pos[i] + offset_pos[0]
+        # Apply position and rotation offsets if provided
+        # create identity quaternion
+        rot_offset_tensor = torch.tensor([1.0, 0.0, 0.0, 0.0], device=base_pos.device, dtype=base_pos.dtype).unsqueeze(0)
+        _, shaft_pos[i] = tf_combine(
+            base_quat[i:i+1], base_pos[i:i+1],
+            rot_offset_tensor, offset_pos
+        )
+
+    return shaft_pos - env.scene.env_origins
+
+
+def gear_shaft_quat_w(
+    env: ManagerBasedRLEnv, 
+    asset_cfg: SceneEntityCfg = SceneEntityCfg("factory_gear_base"),
+) -> torch.Tensor:
+    """Gear shaft orientation in world frame.
+    
+    Args:
+        env: The environment containing the assets
+        asset_cfg: Configuration of the gear base asset
+        gear_type: Type of gear ('gear_small', 'gear_medium', 'gear_large')
+        
+    Returns:
+        Gear shaft orientation tensor of shape (num_envs, 4)
+    """
+    # extract the asset (to enable type hinting)
+    asset: RigidObject = env.scene[asset_cfg.name]
+
+    
+    # get base quaternion
+    base_quat = asset.data.root_quat_w
+    
+    # ensure w component is positive for each environment
+    # if w is negative, negate the entire quaternion to maintain same orientation
+    w_negative = base_quat[:, 0] < 0
+    positive_quat = base_quat.clone()
+    positive_quat[w_negative] = -base_quat[w_negative]
+    
+    return positive_quat
+
+def gear_pos_w(
+    env: ManagerBasedRLEnv, 
+) -> torch.Tensor:
+    """Gear position in world frame.
+    
+    Args:
+        env: The environment containing the assets
+
+    Returns:
+        Gear position tensor of shape (num_envs, 3)
+    """
+    # get current gear type from environment, use default if not set
+    if not hasattr(env, '_current_gear_type'):
+        print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.")
+        default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium')
+        current_gear_type = [default_gear_type] * env.num_envs
+    else:
+        current_gear_type = env._current_gear_type  # type: ignore
+    
+    # Create a mapping from gear type to asset name
+    gear_to_asset_mapping = {
+        'gear_small': 'factory_gear_small',
+        'gear_medium': 'factory_gear_medium', 
+        'gear_large': 'factory_gear_large'
+    }
+    
+    # Initialize positions array
+    gear_positions = torch.zeros((env.num_envs, 3), device=env.device)
+    
+    # Get positions for each environment based on their gear type
+    for i in range(env.num_envs):
+        gear_type = current_gear_type[i]
+        asset_name = gear_to_asset_mapping.get(gear_type)
+        
+        asset: RigidObject = env.scene[asset_name]
+        
+        # Get position for this specific environment
+        gear_positions[i] = asset.data.root_pos_w[i]
+    
+    return gear_positions
+
+def gear_quat_w(
+    env: ManagerBasedRLEnv, 
+) -> torch.Tensor:
+    """Gear orientation in world frame.
+    
+    Args:
+        env: The environment containing the assets
+        
+    Returns:
+        Gear orientation tensor of shape (num_envs, 4)
+    """
+    # get current gear type from environment, use default if not set
+    if not hasattr(env, '_current_gear_type'):
+        print("Environment does not have attribute '_current_gear_type'. Using default_gear_type from configuration.")
+        default_gear_type = getattr(env.cfg, 'default_gear_type', 'gear_medium')
+        current_gear_type = [default_gear_type] * env.num_envs
+    else:
+        current_gear_type = env._current_gear_type  # type: ignore
+    
+    # Create a mapping from gear type to asset name
+    gear_to_asset_mapping = {
+        'gear_small': 'factory_gear_small',
+        'gear_medium': 'factory_gear_medium', 
+        'gear_large': 'factory_gear_large'
+    }
+    
+    # Initialize quaternions array
+    gear_positive_quat = torch.zeros((env.num_envs, 4), device=env.device)
+    
+    # Get quaternions for each environment based on their gear type
+    for i in range(env.num_envs):
+        gear_type = current_gear_type[i]
+        asset_name = gear_to_asset_mapping.get(gear_type)
+        
+        # Get the asset for this specific gear type
+        asset: RigidObject = env.scene[asset_name]
+        
+        # Get quaternion for this specific environment
+        gear_quat = asset.data.root_quat_w[i]
+        
+        # ensure w component is positive for each environment
+        # if w is negative, negate the entire quaternion to maintain same orientation
+        if gear_quat[0] < 0:
+            gear_quat = -gear_quat
+        
+        gear_positive_quat[i] = gear_quat
+    
+    return gear_positive_quat
\ No newline at end of file
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
index 0d14620e225..8e44e65f416 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
@@ -12,6 +12,7 @@
 
 from isaaclab.managers import SceneEntityCfg
 from isaaclab.sensors.frame_transformer.frame_transformer import FrameTransformer
+from isaaclab.utils.math import quat_mul
 
 if TYPE_CHECKING:
     from isaaclab.envs import ManagerBasedRLEnv
@@ -229,3 +230,289 @@ def keypoint_command_error_exp(
             keypoint_reward_exp += 1.0 / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist))
 
     return keypoint_reward_exp
+
+def keypoint_entity_error(
+    env: ManagerBasedRLEnv, 
+    asset_cfg_1: SceneEntityCfg,
+    keypoint_scale: float = 1.0,
+    add_cube_center_kp: bool = True,
+    quat_offset: torch.Tensor | None = None,
+    pos_offset: torch.Tensor | None = None
+) -> torch.Tensor:
+    """Compute keypoint distance between a RigidObject and the dynamically selected gear.
+
+    The function computes the keypoint distance between the poses of two entities.
+    Keypoints are created by applying offsets to both poses and the distance is 
+    computed as the L2-norm between corresponding keypoints.
+
+    Args:
+        env: The environment containing the assets
+        asset_cfg_1: Configuration of the first asset (RigidObject)
+        keypoint_scale: Scale factor for keypoint offsets
+        add_cube_center_kp: Whether to include the center keypoint (0, 0, 0)
+        quat_offset: Optional quaternion offset to apply to asset_1's orientation (w, x, y, z)
+        pos_offset: Optional position offset to apply to asset_1's position (x, y, z)
+
+    Returns:
+        Keypoint distance tensor of shape (num_envs,) where each element
+        is the mean L2 norm distance between corresponding keypoints
+    """
+    # extract the assets (to enable type hinting)
+    asset_1 = env.scene[asset_cfg_1.name]  # RigidObject
+    
+    # get current poses in world frame for asset_1
+    # asset_1 is RigidObject - use body_pos_w and body_quat_w
+    curr_pos_1 = asset_1.data.body_pos_w[:, 0]  # type: ignore
+    curr_quat_1 = asset_1.data.body_quat_w[:, 0]  # type: ignore
+    
+    # Apply position offset to asset_1 if provided
+    if pos_offset is not None:
+        # Convert list to tensor if needed
+        if isinstance(pos_offset, list):
+            pos_offset = torch.tensor(pos_offset, device=curr_pos_1.device, dtype=curr_pos_1.dtype)
+        # Ensure pos_offset has the right shape (num_envs, 3)
+        if pos_offset.dim() == 1:
+            pos_offset = pos_offset.unsqueeze(0).expand(curr_pos_1.shape[0], -1)
+        # Apply the offset by adding to position
+        curr_pos_1 = curr_pos_1 + pos_offset
+    
+    # Apply quaternion offset to asset_1 if provided
+    if quat_offset is not None:
+        # Convert list to tensor if needed
+        if isinstance(quat_offset, list):
+            quat_offset = torch.tensor(quat_offset, device=curr_quat_1.device, dtype=curr_quat_1.dtype)
+        # Ensure quat_offset has the right shape (num_envs, 4)
+        if quat_offset.dim() == 1:
+            quat_offset = quat_offset.unsqueeze(0).expand(curr_quat_1.shape[0], -1)
+        # Apply the offset by quaternion multiplication
+        # TODO: check if this is correct or if its shoudl be the inverse
+        curr_quat_1 = quat_mul(quat_offset, curr_quat_1)
+
+    # Handle per-environment gear type selection
+    if hasattr(env, '_current_gear_type'):
+        # Get the current gear type for each environment
+        current_gear_type = env._current_gear_type  # type: ignore
+        
+        # Create a mapping from gear type to asset name
+        gear_to_asset_mapping = {
+            'gear_small': 'factory_gear_small',
+            'gear_medium': 'factory_gear_medium', 
+            'gear_large': 'factory_gear_large'
+        }
+        
+        # Create arrays to store poses for all environments
+        curr_pos_2_all = torch.zeros_like(curr_pos_1)
+        curr_quat_2_all = torch.zeros_like(curr_quat_1)
+        
+        # Get poses for each environment based on their gear type
+        for env_id in range(env.num_envs):
+            # Get the gear type for this specific environment
+            if env_id < len(current_gear_type):
+                selected_gear_type = current_gear_type[env_id]
+            else:
+                print(f"ERROR: env_id {env_id} is out of bounds for current_gear_type (length: {len(current_gear_type)}), using 'gear_medium' as fallback")
+                selected_gear_type = 'gear_medium'
+            selected_asset_name = gear_to_asset_mapping.get(selected_gear_type, 'factory_gear_medium')
+            
+            # Get the asset for this specific gear type
+            try:
+                asset_2 = env.scene[selected_asset_name]
+            except KeyError:
+                # Fallback to factory_gear_medium if the asset doesn't exist
+                asset_2 = env.scene['factory_gear_medium']
+            
+            # Get poses for this specific environment
+            curr_pos_2_all[env_id] = asset_2.data.body_pos_w[env_id, 0]  # type: ignore
+            curr_quat_2_all[env_id] = asset_2.data.body_quat_w[env_id, 0]  # type: ignore
+        
+        # Compute keypoint distance for all environments at once
+        keypoint_dist_sep = compute_keypoint_distance(
+            current_pos=curr_pos_1,
+            current_quat=curr_quat_1,
+            target_pos=curr_pos_2_all,
+            target_quat=curr_quat_2_all,
+            keypoint_scale=keypoint_scale,
+            add_cube_center_kp=add_cube_center_kp,
+            device=curr_pos_1.device
+        )
+    else:
+        # Fallback to factory_gear_medium if _current_gear_type is not available
+        print("WARNING: env._current_gear_type is not available, using factory_gear_medium as fallback")
+        asset_2 = env.scene['factory_gear_medium']
+        
+        # get current poses in world frame for asset_2
+        curr_pos_2 = asset_2.data.body_pos_w[:, 0]  # type: ignore
+        curr_quat_2 = asset_2.data.body_quat_w[:, 0]  # type: ignore
+        
+        # compute keypoint distance
+        keypoint_dist_sep = compute_keypoint_distance(
+            current_pos=curr_pos_1,
+            current_quat=curr_quat_1,
+            target_pos=curr_pos_2,
+            target_quat=curr_quat_2,
+            keypoint_scale=keypoint_scale,
+            add_cube_center_kp=add_cube_center_kp,
+            device=curr_pos_1.device
+        )
+    
+    # Return mean distance across keypoints to match expected reward shape (num_envs,)
+    return keypoint_dist_sep.mean(-1)
+
+
+def keypoint_entity_error_exp(
+    env: ManagerBasedRLEnv, 
+    asset_cfg_1: SceneEntityCfg,
+    kp_exp_coeffs: list[tuple[float, float]] = [(1.0, 0.1)],
+    kp_use_sum_of_exps: bool = True,
+    keypoint_scale: float = 1.0,
+    add_cube_center_kp: bool = True,
+    quat_offset: torch.Tensor | None = None,
+    pos_offset: torch.Tensor | None = None
+) -> torch.Tensor:
+    """Compute exponential keypoint reward between a RigidObject and the dynamically selected gear.
+
+    The function computes the keypoint distance between the poses of two entities,
+    then applies an exponential reward function. The reward is computed using the 
+    formula: 1 / (exp(a * distance) + b + exp(-a * distance)) where a and b are coefficients.
+
+    Args:
+        env: The environment containing the assets
+        asset_cfg_1: Configuration of the first asset (RigidObject)
+        kp_exp_coeffs: List of (a, b) coefficient pairs for exponential reward
+        kp_use_sum_of_exps: Whether to use sum of exponentials (True) or single exponential (False)
+        keypoint_scale: Scale factor for keypoint offsets
+        add_cube_center_kp: Whether to include the center keypoint (0, 0, 0)
+
+    Returns:
+        Exponential keypoint reward tensor of shape (num_envs,) where each element
+        is the exponential reward value
+    """
+    # extract the assets (to enable type hinting)
+    asset_1 = env.scene[asset_cfg_1.name]  # RigidObject
+    
+    # get current poses in world frame for asset_1
+    # asset_1 is RigidObject - use body_pos_w and body_quat_w
+    curr_pos_1 = asset_1.data.body_pos_w[:, 0]  # type: ignore
+    curr_quat_1 = asset_1.data.body_quat_w[:, 0]  # type: ignore
+    
+    # Apply position offset to asset_1 if provided
+    if pos_offset is not None:
+        # Convert list to tensor if needed
+        if isinstance(pos_offset, list):
+            pos_offset = torch.tensor(pos_offset, device=curr_pos_1.device, dtype=curr_pos_1.dtype)
+        # Ensure pos_offset has the right shape (num_envs, 3)
+        if pos_offset.dim() == 1:
+            pos_offset = pos_offset.unsqueeze(0).expand(curr_pos_1.shape[0], -1)
+        # Apply the offset by adding to position
+        curr_pos_1 = curr_pos_1 + pos_offset
+    
+    # Apply quaternion offset to asset_1 if provided
+    if quat_offset is not None:
+        # Convert list to tensor if needed
+        if isinstance(quat_offset, list):
+            quat_offset = torch.tensor(quat_offset, device=curr_quat_1.device, dtype=curr_quat_1.dtype)
+        # Ensure quat_offset has the right shape (num_envs, 4)
+        if quat_offset.dim() == 1:
+            quat_offset = quat_offset.unsqueeze(0).expand(curr_quat_1.shape[0], -1)
+        # Apply the offset by quaternion multiplication
+        # TODO: check if this is correct or if its shoudl be the inverse
+        curr_quat_1 = quat_mul(quat_offset, curr_quat_1)
+
+    # Handle per-environment gear type selection
+    if hasattr(env, '_current_gear_type'):
+        # Get the current gear type for each environment
+        current_gear_type = env._current_gear_type  # type: ignore
+        
+        # Create a mapping from gear type to asset name
+        gear_to_asset_mapping = {
+            'gear_small': 'factory_gear_small',
+            'gear_medium': 'factory_gear_medium', 
+            'gear_large': 'factory_gear_large'
+        }
+        
+        # Create arrays to store poses for all environments
+        curr_pos_2_all = torch.zeros_like(curr_pos_1)
+        curr_quat_2_all = torch.zeros_like(curr_quat_1)
+        
+        # Get poses for each environment based on their gear type
+        for env_id in range(env.num_envs):
+            # Get the gear type for this specific environment
+            if env_id < len(current_gear_type):
+                selected_gear_type = current_gear_type[env_id]
+            else:
+                print(f"ERROR: env_id {env_id} is out of bounds for current_gear_type (length: {len(current_gear_type)}), using 'gear_medium' as fallback")
+                selected_gear_type = 'gear_medium'
+            selected_asset_name = gear_to_asset_mapping.get(selected_gear_type, 'factory_gear_medium')
+            
+            # Get the asset for this specific gear type
+            try:
+                asset_2 = env.scene[selected_asset_name]
+            except KeyError:
+                # Fallback to factory_gear_medium if the asset doesn't exist
+                asset_2 = env.scene['factory_gear_medium']
+            
+            # Get poses for this specific environment
+            curr_pos_2_all[env_id] = asset_2.data.body_pos_w[env_id, 0]  # type: ignore
+            curr_quat_2_all[env_id] = asset_2.data.body_quat_w[env_id, 0]  # type: ignore
+        
+        # Compute keypoint distance for all environments at once
+        keypoint_dist_sep = compute_keypoint_distance(
+            current_pos=curr_pos_1,
+            current_quat=curr_quat_1,
+            target_pos=curr_pos_2_all,
+            target_quat=curr_quat_2_all,
+            keypoint_scale=keypoint_scale,
+            add_cube_center_kp=add_cube_center_kp,
+            device=curr_pos_1.device
+        )
+        
+        # compute exponential reward
+        keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0])
+        
+        if kp_use_sum_of_exps:
+            # Use sum of exponentials: average across keypoints for each coefficient
+            for coeff in kp_exp_coeffs:
+                a, b = coeff
+                keypoint_reward_exp += (1. / (torch.exp(a * keypoint_dist_sep) + b + torch.exp(-a * keypoint_dist_sep))).mean(-1)
+        else:
+            # Use single exponential: average keypoint distance first, then apply exponential
+            keypoint_dist = keypoint_dist_sep.mean(-1)  # shape: (num_envs,)
+            for coeff in kp_exp_coeffs:
+                a, b = coeff
+                keypoint_reward_exp += 1. / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist))
+    else:
+        # Fallback to factory_gear_medium if _current_gear_type is not available
+        print("WARNING: env._current_gear_type is not available, using factory_gear_medium as fallback")
+        asset_2 = env.scene['factory_gear_medium']
+        
+        # get current poses in world frame for asset_2
+        curr_pos_2 = asset_2.data.body_pos_w[:, 0]  # type: ignore
+        curr_quat_2 = asset_2.data.body_quat_w[:, 0]  # type: ignore
+        
+        # compute keypoint distance
+        keypoint_dist_sep = compute_keypoint_distance(
+            current_pos=curr_pos_1,
+            current_quat=curr_quat_1,
+            target_pos=curr_pos_2,
+            target_quat=curr_quat_2,
+            keypoint_scale=keypoint_scale,
+            add_cube_center_kp=add_cube_center_kp,
+            device=curr_pos_1.device
+        )
+        
+        # compute exponential reward
+        keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0])
+        
+        if kp_use_sum_of_exps:
+            # Use sum of exponentials: average across keypoints for each coefficient
+            for coeff in kp_exp_coeffs:
+                a, b = coeff
+                keypoint_reward_exp += (1. / (torch.exp(a * keypoint_dist_sep) + b + torch.exp(-a * keypoint_dist_sep))).mean(-1)
+        else:
+            # Use single exponential: average keypoint distance first, then apply exponential
+            keypoint_dist = keypoint_dist_sep.mean(-1)  # shape: (num_envs,)
+            for coeff in kp_exp_coeffs:
+                a, b = coeff
+                keypoint_reward_exp += 1. / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist))
+    
+    return keypoint_reward_exp

From 0fc0098b84fd2d4ed9bb2f4c5bde900e8a92a019 Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Tue, 16 Sep 2025 15:09:51 -0700
Subject: [PATCH 02/10] revert rsl_rl change

---
 scripts/reinforcement_learning/rsl_rl/play.py | 42 +++++--------
 .../reinforcement_learning/rsl_rl/train.py    | 21 ++++++-
 .../isaaclab_rl/rsl_rl/vecenv_wrapper.py      | 60 +++++++++++++------
 .../gear_assembly/gear_assembly_env_cfg.py    | 38 ++++++------
 4 files changed, 93 insertions(+), 68 deletions(-)

diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py
index 11ef7399462..150bbd03492 100644
--- a/scripts/reinforcement_learning/rsl_rl/play.py
+++ b/scripts/reinforcement_learning/rsl_rl/play.py
@@ -58,7 +58,7 @@
 import time
 import torch
 
-from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+from rsl_rl.runners import OnPolicyRunner
 
 from isaaclab.envs import (
     DirectMARLEnv,
@@ -71,7 +71,7 @@
 from isaaclab.utils.dict import print_dict
 from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
 
-from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
 
 import isaaclab_tasks  # noqa: F401
 from isaaclab_tasks.utils import get_checkpoint_path
@@ -81,14 +81,14 @@
 
 
 @hydra_task_config(args_cli.task, args_cli.agent)
-def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg):
     """Play with RSL-RL agent."""
     # grab task name for checkpoint path
     task_name = args_cli.task.split(":")[-1]
     train_task_name = task_name.replace("-Play", "")
 
     # override configurations with non-hydra CLI arguments
-    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
 
     # set the environment seed
@@ -112,9 +112,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
 
     log_dir = os.path.dirname(resume_path)
 
-    # set the log directory for the environment (works for all environment types)
-    env_cfg.log_dir = log_dir
-
     # create isaac environment
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
 
@@ -139,43 +136,32 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
 
     print(f"[INFO]: Loading model checkpoint from: {resume_path}")
     # load previously trained model
-    if agent_cfg.class_name == "OnPolicyRunner":
-        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
-    elif agent_cfg.class_name == "DistillationRunner":
-        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
-    else:
-        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
-    runner.load(resume_path)
+    ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    ppo_runner.load(resume_path)
 
     # obtain the trained policy for inference
-    policy = runner.get_inference_policy(device=env.unwrapped.device)
+    policy = ppo_runner.get_inference_policy(device=env.unwrapped.device)
 
     # extract the neural network module
     # we do this in a try-except to maintain backwards compatibility.
     try:
         # version 2.3 onwards
-        policy_nn = runner.alg.policy
+        policy_nn = ppo_runner.alg.policy
     except AttributeError:
         # version 2.2 and below
-        policy_nn = runner.alg.actor_critic
-
-    # extract the normalizer
-    if hasattr(policy_nn, "actor_obs_normalizer"):
-        normalizer = policy_nn.actor_obs_normalizer
-    elif hasattr(policy_nn, "student_obs_normalizer"):
-        normalizer = policy_nn.student_obs_normalizer
-    else:
-        normalizer = None
+        policy_nn = ppo_runner.alg.actor_critic
 
     # export policy to onnx/jit
     export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
-    export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
-    export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
+    export_policy_as_jit(policy_nn, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt")
+    export_policy_as_onnx(
+        policy_nn, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx"
+    )
 
     dt = env.unwrapped.step_dt
 
     # reset environment
-    obs = env.get_observations()
+    obs, _ = env.get_observations()
     timestep = 0
     # simulate environment
     while simulation_app.is_running():
diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py
index b2056551f0c..0b1d698d2b3 100644
--- a/scripts/reinforcement_learning/rsl_rl/train.py
+++ b/scripts/reinforcement_learning/rsl_rl/train.py
@@ -10,6 +10,8 @@
 import argparse
 import sys
 
+import rsl_rl
+
 from isaaclab.app import AppLauncher
 
 # local imports
@@ -56,8 +58,9 @@
 from packaging import version
 
 # check minimum supported rsl-rl version
-RSL_RL_VERSION = "3.0.1"
+RSL_RL_VERSION = "2.3.1"
 installed_version = metadata.version("rsl-rl-lib")
+print("installed_version", installed_version)
 if version.parse(installed_version) < version.parse(RSL_RL_VERSION):
     if platform.system() == "Windows":
         cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
@@ -78,7 +81,7 @@
 from datetime import datetime
 
 import omni
-from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+from rsl_rl.runners import OnPolicyRunner
 
 from isaaclab.envs import (
     DirectMARLEnv,
@@ -199,6 +202,20 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
     dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)
 
+    import rsl_rl
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
+
     # run training
     runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
 
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
index 73ceae04693..d909bf2d912 100644
--- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
+++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
@@ -5,7 +5,6 @@
 
 import gymnasium as gym
 import torch
-from tensordict import TensorDict
 
 from rsl_rl.env import VecEnv
 
@@ -13,9 +12,16 @@
 
 
 class RslRlVecEnvWrapper(VecEnv):
-    """Wraps around Isaac Lab environment for the RSL-RL library
+    """Wraps around Isaac Lab environment for RSL-RL library
+
+    To use asymmetric actor-critic, the environment instance must have the attributes :attr:`num_privileged_obs` (int).
+    This is used by the learning agent to allocate buffers in the trajectory memory. Additionally, the returned
+    observations should have the key "critic" which corresponds to the privileged observations. Since this is
+    optional for some environments, the wrapper checks if these attributes exist. If they don't then the wrapper
+    defaults to zero as number of privileged observations.
 
     .. caution::
+
         This class must be the last wrapper in the wrapper chain. This is because the wrapper does not follow
         the :class:`gym.Wrapper` interface. Any subsequent wrappers will need to be modified to work with this
         wrapper.
@@ -37,14 +43,12 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N
         Raises:
             ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`.
         """
-
         # check that input is valid
         if not isinstance(env.unwrapped, ManagerBasedRLEnv) and not isinstance(env.unwrapped, DirectRLEnv):
             raise ValueError(
                 "The environment must be inherited from ManagerBasedRLEnv or DirectRLEnv. Environment type:"
                 f" {type(env)}"
             )
-
         # initialize the wrapper
         self.env = env
         self.clip_actions = clip_actions
@@ -59,6 +63,20 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N
             self.num_actions = self.unwrapped.action_manager.total_action_dim
         else:
             self.num_actions = gym.spaces.flatdim(self.unwrapped.single_action_space)
+        if hasattr(self.unwrapped, "observation_manager"):
+            self.num_obs = self.unwrapped.observation_manager.group_obs_dim["policy"][0]
+        else:
+            self.num_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["policy"])
+        # -- privileged observations
+        if (
+            hasattr(self.unwrapped, "observation_manager")
+            and "critic" in self.unwrapped.observation_manager.group_obs_dim
+        ):
+            self.num_privileged_obs = self.unwrapped.observation_manager.group_obs_dim["critic"][0]
+        elif hasattr(self.unwrapped, "num_states") and "critic" in self.unwrapped.single_observation_space:
+            self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"])
+        else:
+            self.num_privileged_obs = 0
 
         # modify the action space to the clip range
         self._modify_action_space()
@@ -115,6 +133,14 @@ def unwrapped(self) -> ManagerBasedRLEnv | DirectRLEnv:
     Properties
     """
 
+    def get_observations(self) -> tuple[torch.Tensor, dict]:
+        """Returns the current observations of the environment."""
+        if hasattr(self.unwrapped, "observation_manager"):
+            obs_dict = self.unwrapped.observation_manager.compute()
+        else:
+            obs_dict = self.unwrapped._get_observations()
+        return obs_dict["policy"], {"observations": obs_dict}
+
     @property
     def episode_length_buf(self) -> torch.Tensor:
         """The episode length buffer."""
@@ -136,20 +162,13 @@ def episode_length_buf(self, value: torch.Tensor):
     def seed(self, seed: int = -1) -> int:  # noqa: D102
         return self.unwrapped.seed(seed)
 
-    def reset(self) -> tuple[TensorDict, dict]:  # noqa: D102
+    def reset(self) -> tuple[torch.Tensor, dict]:  # noqa: D102
         # reset the environment
-        obs_dict, extras = self.env.reset()
-        return TensorDict(obs_dict, batch_size=[self.num_envs]), extras
-
-    def get_observations(self) -> TensorDict:
-        """Returns the current observations of the environment."""
-        if hasattr(self.unwrapped, "observation_manager"):
-            obs_dict = self.unwrapped.observation_manager.compute()
-        else:
-            obs_dict = self.unwrapped._get_observations()
-        return TensorDict(obs_dict, batch_size=[self.num_envs])
+        obs_dict, _ = self.env.reset()
+        # return observations
+        return obs_dict["policy"], {"observations": obs_dict}
 
-    def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]:
+    def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
         # clip actions
         if self.clip_actions is not None:
             actions = torch.clamp(actions, -self.clip_actions, self.clip_actions)
@@ -157,12 +176,16 @@ def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.T
         obs_dict, rew, terminated, truncated, extras = self.env.step(actions)
         # compute dones for compatibility with RSL-RL
         dones = (terminated | truncated).to(dtype=torch.long)
+        # move extra observations to the extras dict
+        obs = obs_dict["policy"]
+        extras["observations"] = obs_dict
         # move time out information to the extras dict
         # this is only needed for infinite horizon tasks
         if not self.unwrapped.cfg.is_finite_horizon:
             extras["time_outs"] = truncated
+
         # return the step information
-        return TensorDict(obs_dict, batch_size=[self.num_envs]), rew, dones, extras
+        return obs, rew, dones, extras
 
     def close(self):  # noqa: D102
         return self.env.close()
@@ -177,8 +200,7 @@ def _modify_action_space(self):
             return
 
         # modify the action space to the clip range
-        # note: this is only possible for the box action space. we need to change it in the future for other
-        #   action spaces.
+        # note: this is only possible for the box action space. we need to change it in the future for other action spaces.
         self.env.unwrapped.single_action_space = gym.spaces.Box(
             low=-self.clip_actions, high=self.clip_actions, shape=(self.num_actions,)
         )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 88a3b89009f..bfb5ceedee2 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -223,28 +223,28 @@ def __post_init__(self):
             self.enable_corruption = True
             self.concatenate_terms = True
     
-    # @configclass
-    # class CriticCfg(ObsGroup):
-    #     """Observations for policy group."""
-
-    #     # observation terms (order preserved)
-    #     joint_pos = ObsTerm(func=mdp.joint_pos,
-    #                         params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
-    #     joint_vel = ObsTerm(func=mdp.joint_vel,
-    #                         params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
-    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w,
-    #     #                          noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025))
-    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add"))
-    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
-    #     gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
-    #     gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)
-
-    #     gear_pos = ObsTerm(func=mdp.gear_pos_w)
-    #     gear_quat = ObsTerm(func=mdp.gear_quat_w)
+    @configclass
+    class CriticCfg(ObsGroup):
+        """Observations for policy group."""
+
+        # observation terms (order preserved)
+        joint_pos = ObsTerm(func=mdp.joint_pos,
+                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+        joint_vel = ObsTerm(func=mdp.joint_vel,
+                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+        # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w,
+        #                          noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025))
+        # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add"))
+        # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
+        gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
+        gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)
+
+        gear_pos = ObsTerm(func=mdp.gear_pos_w)
+        gear_quat = ObsTerm(func=mdp.gear_quat_w)
 
     # observation groups
     policy: PolicyCfg = PolicyCfg()
-    # critic: CriticCfg = CriticCfg()
+    critic: CriticCfg = CriticCfg()
 
 
 @configclass

From 3cda66bac766e7a5a1018aef3587bab50b8794d2 Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Tue, 16 Sep 2025 15:10:30 -0700
Subject: [PATCH 03/10] fixed_sigma=False,

---
 .../deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
index 636717b9b55..b322f1a6132 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
@@ -57,6 +57,7 @@ class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg):
     clip_actions = 1.0
     resume = False
     policy = UpdatedRslRlPpoActorCriticRecurrentCfg(
+        fixed_sigma=False,
         init_noise_std=1.0,
         actor_hidden_dims=[256, 128, 64],
         critic_hidden_dims=[256, 128, 64],

From f5a0c248a5b7f481ccdd31c4ac413f3a0d5ae8a9 Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Tue, 16 Sep 2025 15:44:58 -0700
Subject: [PATCH 04/10] cleanup 2

---
 .../config/ur_10e/joint_pos_env_cfg.py        |  9 --------
 .../gear_assembly/gear_assembly_env_cfg.py    | 21 -------------------
 2 files changed, 30 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
index 6899beb0512..a95a44c2a67 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
@@ -39,16 +39,11 @@ class EventCfg:
 
     robot_joint_stiffness_and_damping = EventTerm(
         func=mdp.randomize_actuator_gains,
-        # min_step_count_between_reset=720,
-        # min_step_count_between_reset=200,
         mode="reset",
         params={
-            # "asset_cfg": SceneEntityCfg("robot", joint_names=".*"),
             "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]),  # only the arm joints are randomized
             "stiffness_distribution_params": (0.75, 1.5),
             "damping_distribution_params": (0.3, 3.0),
-            # "stiffness_distribution_params": (1.0, 1.0),
-            # "damping_distribution_params": (1.0, 1.0),
             "operation": "scale",
             "distribution": "log_uniform",
         },
@@ -56,8 +51,6 @@ class EventCfg:
 
     joint_friction = EventTerm(
         func=mdp.randomize_joint_parameters,
-        # min_step_count_between_reset=720,
-        # min_step_count_between_reset=200,
         mode="reset",
         params={
             "asset_cfg": SceneEntityCfg("robot", joint_names=["shoulder_.*", "elbow_.*", "wrist_.*"]),
@@ -155,8 +148,6 @@ class EventCfg:
         mode="reset",
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
-            # "pos_offset": [-0.030375, 0.0, -0.255],  # Offset from wrist_3_link to gripper position
-            # "pos_offset": [0.0, 0.030375, -0.26],
             "rot_offset": [0.0, math.sqrt(2)/2, math.sqrt(2)/2, 0.0],
             "pos_randomization_range": {
                 "x": [-0.0, 0.0],
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index bfb5ceedee2..7cd3e2f4867 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -173,26 +173,6 @@ class GearAssemblySceneCfg(InteractiveSceneCfg):
         spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0),
     )
 
-# @configclass
-# class CommandsCfg:
-#     """Command terms for the MDP."""
-
-#     ee_pose = mdp.UniformPoseCommandCfg(
-#         asset_name="robot",
-#         body_name=MISSING,
-#         resampling_time_range=(4.0, 4.0),
-#         debug_vis=True,
-#         ranges=mdp.UniformPoseCommandCfg.Ranges(
-#             pos_x=(0.35, 0.65),
-#             pos_y=(-0.2, 0.2),
-#             pos_z=(0.15, 0.5),
-#             roll=(0.0, 0.0),
-#             pitch=MISSING,  # depends on end-effector axis
-#             yaw=(-3.14, 3.14),
-#         ),
-#     )
-
-
 @configclass
 class ActionsCfg:
     """Action specifications for the MDP."""
@@ -308,7 +288,6 @@ class GearAssemblyEnvCfg(ManagerBasedRLEnvCfg):
     # Basic settings
     observations: ObservationsCfg = ObservationsCfg()
     actions: ActionsCfg = ActionsCfg()
-    # commands: CommandsCfg = CommandsCfg()
     # MDP settings
     rewards: RewardsCfg = RewardsCfg()
     terminations: TerminationsCfg = TerminationsCfg()

From b175571b55a97d2524eafb31a534ca108020569c Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Wed, 17 Sep 2025 09:44:29 -0700
Subject: [PATCH 05/10] remove some cleanup

---
 .../robots/universal_robots.py                | 180 +++++++++++++++---
 .../config/ur_10e/joint_pos_env_cfg.py        |  35 +---
 .../gear_assembly/gear_assembly_env_cfg.py    |  38 ++--
 3 files changed, 171 insertions(+), 82 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
index 4433b824235..209ebfc107a 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
@@ -55,47 +55,63 @@
 UR10e_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
         usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/UniversalRobots/ur10e/ur10e.usd",
+        activate_contact_sensors=True,
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
             disable_gravity=True,
             max_depenetration_velocity=5.0,
+            linear_damping=0.0,
+            angular_damping=0.0,
+            max_linear_velocity=1000.0,
+            max_angular_velocity=3666.0,
+            enable_gyroscopic_forces=True,
+            solver_position_iteration_count=192,
+            solver_velocity_iteration_count=1,
+            max_contact_impulse=1e32,
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-            enabled_self_collisions=False, solver_position_iteration_count=16, solver_velocity_iteration_count=1
+            enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1
         ),
-        activate_contact_sensors=False,
+        collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
     ),
     init_state=ArticulationCfg.InitialStateCfg(
         joint_pos={
-            "shoulder_pan_joint": 3.141592653589793,
-            "shoulder_lift_joint": -1.5707963267948966,
-            "elbow_joint": 1.5707963267948966,
-            "wrist_1_joint": -1.5707963267948966,
-            "wrist_2_joint": -1.5707963267948966,
-            "wrist_3_joint": 0.0,
+            "shoulder_pan_joint": 2.7228e+00,
+            "shoulder_lift_joint": -8.3962e-01,
+            "elbow_joint": 1.3684e+00,
+            "wrist_1_joint": -2.1048e+00,
+            "wrist_2_joint": -1.5691e+00,
+            "wrist_3_joint": -1.9896e+00,
+            "finger_joint": 0.0,
         },
         pos=(0.0, 0.0, 0.0),
-        rot=(1.0, 0.0, 0.0, 0.0),
+        rot=(0.0, 0.0, 0.0, 1.0),
     ),
     actuators={
         # 'shoulder_pan_joint', 'shoulder_lift_joint', 'elbow_joint', 'wrist_1_joint', 'wrist_2_joint', 'wrist_3_joint'
         "shoulder": ImplicitActuatorCfg(
             joint_names_expr=["shoulder_.*"],
+            effort_limit=330.0,
+            velocity_limit=2.175,
             stiffness=1320.0,
-            damping=72.6636085,
+            damping=72.0,
             friction=0.0,
             armature=0.0,
         ),
         "elbow": ImplicitActuatorCfg(
             joint_names_expr=["elbow_joint"],
+            effort_limit=150.0,
+            velocity_limit=2.175,
             stiffness=600.0,
-            damping=34.64101615,
+            damping=50.0,
             friction=0.0,
             armature=0.0,
         ),
         "wrist": ImplicitActuatorCfg(
             joint_names_expr=["wrist_.*"],
+            effort_limit=56.0,
+            velocity_limit=2.175,
             stiffness=216.0,
-            damping=29.39387691,
+            damping=30.0,
             friction=0.0,
             armature=0.0,
         ),
@@ -132,34 +148,138 @@
 UR10e_ROBOTIQ_GRIPPER_CFG.init_state.joint_pos[".*_inner_finger_pad_joint"] = 0.0
 UR10e_ROBOTIQ_GRIPPER_CFG.init_state.joint_pos[".*_outer_.*_joint"] = 0.0
 # the major actuator joint for gripper
-UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_drive"] = ImplicitActuatorCfg(
+UR10e_ROBOTIQ_GRIPPER_CFG.actuators["finger_joint"] = ImplicitActuatorCfg(
     joint_names_expr=["finger_joint"],
-    effort_limit_sim=10.0,
-    velocity_limit_sim=1.0,
-    stiffness=11.25,
-    damping=0.1,
+    effort_limit_sim=1.0,
+    velocity_limit=2.175,
+    stiffness=20.0,
+    damping=8.94,
     friction=0.0,
-    armature=0.0,
+    armature=0.0,  # 0.57
 )
 # the auxiliary actuator joint for gripper
-UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_finger"] = ImplicitActuatorCfg(
-    joint_names_expr=[".*_inner_finger_joint"],
+UR10e_ROBOTIQ_GRIPPER_CFG.actuators["others_1"] = ImplicitActuatorCfg(
+    joint_names_expr=['right_outer_knuckle_joint', 'left_outer_finger_joint'],
     effort_limit_sim=1.0,
-    velocity_limit_sim=1.0,
-    stiffness=0.2,
-    damping=0.001,
+    velocity_limit=2.175,
+    stiffness=0.0,
+    damping=0.0,
     friction=0.0,
-    armature=0.0,
+    armature=0.0,  # 0.57
 )
 # the passive joints for gripper
-UR10e_ROBOTIQ_GRIPPER_CFG.actuators["gripper_passive"] = ImplicitActuatorCfg(
-    joint_names_expr=[".*_inner_finger_pad_joint", ".*_outer_finger_joint", "right_outer_knuckle_joint"],
+UR10e_ROBOTIQ_GRIPPER_CFG.actuators["others_2"] = ImplicitActuatorCfg(
+    joint_names_expr=['right_outer_finger_joint', 'left_inner_finger_joint', 'right_inner_finger_joint', 'left_inner_finger_pad_joint', 'right_inner_finger_pad_joint'],
     effort_limit_sim=1.0,
-    velocity_limit_sim=1.0,
-    stiffness=0.0,
-    damping=0.0,
+    velocity_limit=2.175,
+    stiffness=400.0,
+    damping=20.0,
     friction=0.0,
-    armature=0.0,
+    armature=0.0,  # 0.57
+)
+
+UR10e_2f140_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/IsaacARM/Assets/UR10/iakinola/ur10e_robotiq_140_variant.usd",
+        # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Isaac/Samples/Rigging/Manipulator/import_manipulator/ur10e/ur/ur_gripper.usd",
+        # rigid_props=sim_utils.RigidBodyPropertiesCfg(
+        #     disable_gravity=True,
+        #     max_depenetration_velocity=5.0,
+        # ),
+        activate_contact_sensors=True,
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=True,
+            max_depenetration_velocity=5.0,
+            linear_damping=0.0,
+            angular_damping=0.0,
+            max_linear_velocity=1000.0,
+            max_angular_velocity=3666.0,
+            enable_gyroscopic_forces=True,
+            solver_position_iteration_count=192,
+            solver_velocity_iteration_count=1,
+            max_contact_impulse=1e32,
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1
+        ),
+        collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+        # articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+        #         enabled_self_collisions=False, solver_position_iteration_count=12, solver_velocity_iteration_count=1
+        #     ),
+        # activate_contact_sensors=False,
+    ),    
+    init_state=ArticulationCfg.InitialStateCfg(
+            joint_pos={
+                "shoulder_pan_joint": 2.7228e+00,
+                "shoulder_lift_joint": -8.3962e-01,
+                "elbow_joint": 1.3684e+00,
+                "wrist_1_joint": -2.1048e+00,
+                "wrist_2_joint": -1.5691e+00,
+                "wrist_3_joint": -1.9896e+00,
+                "finger_joint": 0.0,
+            },
+            pos=(0.0, 0.0, 0.0),
+            # @ireti: Figure out why this rotaion is requried.
+            # Otherwise the IK during initialization is does not converge.
+            rot=(0.0, 0.0, 0.0, 1.0),
+        ),
+    actuators={
+            # @ireti: These values were obtained from 2025-05-30_20-35-06/params/env.yaml
+            "shoulder": ImplicitActuatorCfg(
+                joint_names_expr=["shoulder_.*"],
+                effort_limit=330.0,
+                velocity_limit=2.175,
+                stiffness=1320.0,
+                damping=72.0,
+                friction=0.0,
+                armature=0.0,
+            ),
+            "elbow": ImplicitActuatorCfg(
+                joint_names_expr=["elbow_joint"],
+                effort_limit=150.0,
+                velocity_limit=2.175,
+                stiffness=600.0,
+                damping=50.0,
+                friction=0.0,
+                armature=0.0,
+            ),
+            "wrist": ImplicitActuatorCfg(
+                joint_names_expr=["wrist_.*"],
+                effort_limit=56.0,
+                velocity_limit=2.175,
+                stiffness=216.0,
+                damping=30.0,
+                friction=0.0,
+                armature=0.0,
+            ),
+            "finger_joint": ImplicitActuatorCfg(
+                joint_names_expr=["finger_joint"],
+                effort_limit_sim=1.0,
+                velocity_limit=2.175,
+                stiffness=20.0,
+                damping=8.94,
+                friction=0.0,
+                armature=0.0,  # 0.57
+            ),
+            "others_1": ImplicitActuatorCfg(
+                joint_names_expr=['right_outer_knuckle_joint', 'left_outer_finger_joint'],
+                effort_limit_sim=1.0,
+                velocity_limit=2.175,
+                stiffness=0.0,
+                damping=0.0,
+                friction=0.0,
+                armature=0.0,  # 0.57
+            ),
+            "others_2": ImplicitActuatorCfg(
+                joint_names_expr=['right_outer_finger_joint', 'left_inner_finger_joint', 'right_inner_finger_joint', 'left_inner_finger_pad_joint', 'right_inner_finger_pad_joint'],
+                effort_limit_sim=1.0,
+                velocity_limit=2.175,
+                stiffness=400.0,
+                damping=20.0,
+                friction=0.0,
+                armature=0.0,  # 0.57
+            ),
+    }
 )
 
 """Configuration of UR-10E arm with Robotiq_2f_140 gripper."""
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
index a95a44c2a67..8391bb9eb24 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
@@ -8,8 +8,6 @@
 import torch
 
 from isaaclab.utils import configclass
-import isaaclab.sim as sim_utils
-from isaaclab.assets.articulation import ArticulationCfg
 
 from isaaclab.managers import EventTermCfg as EventTerm
 from isaaclab.managers import SceneEntityCfg
@@ -171,37 +169,8 @@ def __post_init__(self):
         self.scene.robot = UR10e_ROBOTIQ_GRIPPER_CFG.replace(
             prim_path="{ENV_REGEX_NS}/Robot",
             spawn=UR10e_ROBOTIQ_GRIPPER_CFG.spawn.replace(
-                activate_contact_sensors=True,
-                rigid_props=sim_utils.RigidBodyPropertiesCfg(
-                    disable_gravity=True,
-                    max_depenetration_velocity=5.0,
-                    linear_damping=0.0,
-                    angular_damping=0.0,
-                    max_linear_velocity=1000.0,
-                    max_angular_velocity=3666.0,
-                    enable_gyroscopic_forces=True,
-                    solver_position_iteration_count=192,
-                    solver_velocity_iteration_count=1,
-                    max_contact_impulse=1e32,
-                ),
-                articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-                    enabled_self_collisions=False, solver_position_iteration_count=192, solver_velocity_iteration_count=1
-                ),
-                collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
-            ),
-            init_state=ArticulationCfg.InitialStateCfg(
-                joint_pos={
-                    "shoulder_pan_joint": 2.7228e+00,
-                    "shoulder_lift_joint": -8.3962e-01,
-                    "elbow_joint": 1.3684e+00,
-                    "wrist_1_joint": -2.1048e+00,
-                    "wrist_2_joint": -1.5691e+00,
-                    "wrist_3_joint": -1.9896e+00,
-                    "finger_joint": 0.0,
-                },
-                pos=(0.0, 0.0, 0.0),
-                rot=(0.0, 0.0, 0.0, 1.0),
-            ),
+                usd_path=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "assets/ur10e_robotiq_140_variant.usd")
+            )
         )
 
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 7cd3e2f4867..ca2ede09e40 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -203,28 +203,28 @@ def __post_init__(self):
             self.enable_corruption = True
             self.concatenate_terms = True
     
-    @configclass
-    class CriticCfg(ObsGroup):
-        """Observations for policy group."""
-
-        # observation terms (order preserved)
-        joint_pos = ObsTerm(func=mdp.joint_pos,
-                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
-        joint_vel = ObsTerm(func=mdp.joint_vel,
-                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
-        # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w,
-        #                          noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025))
-        # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add"))
-        # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
-        gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
-        gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)
-
-        gear_pos = ObsTerm(func=mdp.gear_pos_w)
-        gear_quat = ObsTerm(func=mdp.gear_quat_w)
+    # @configclass
+    # class CriticCfg(ObsGroup):
+    #     """Observations for policy group."""
+
+    #     # observation terms (order preserved)
+    #     joint_pos = ObsTerm(func=mdp.joint_pos,
+    #                         params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+    #     joint_vel = ObsTerm(func=mdp.joint_vel,
+    #                         params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*"])})
+    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w,
+    #     #                          noise=AdditiveUniformNoiseModelCfg(n_min=-0.0025, n_max=0.0025))
+    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w, noise=Unoise(n_min=-0.005, n_max=0.005, operation="add"))
+    #     # gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
+    #     gear_shaft_pos = ObsTerm(func=mdp.gear_shaft_pos_w)
+    #     gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)
+
+    #     gear_pos = ObsTerm(func=mdp.gear_pos_w)
+    #     gear_quat = ObsTerm(func=mdp.gear_quat_w)
 
     # observation groups
     policy: PolicyCfg = PolicyCfg()
-    critic: CriticCfg = CriticCfg()
+    # critic: CriticCfg = CriticCfg()
 
 
 @configclass

From 1a2858a277eee00931664ce9de25d0a9af4592ae Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Wed, 17 Sep 2025 15:21:10 -0700
Subject: [PATCH 06/10] use omniverse assets

---
 .../gear_assembly/config/ur_10e/joint_pos_env_cfg.py  |  6 +++---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py     | 11 +++++++----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
index 8391bb9eb24..f44059c4bd9 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
@@ -168,9 +168,9 @@ def __post_init__(self):
         # switch robot to ur10 with local asset path
         self.scene.robot = UR10e_ROBOTIQ_GRIPPER_CFG.replace(
             prim_path="{ENV_REGEX_NS}/Robot",
-            spawn=UR10e_ROBOTIQ_GRIPPER_CFG.spawn.replace(
-                usd_path=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "assets/ur10e_robotiq_140_variant.usd")
-            )
+            # spawn=UR10e_ROBOTIQ_GRIPPER_CFG.spawn.replace(
+            #     usd_path=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "assets/ur10e_robotiq_140_variant.usd")
+            # )
         )
 
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index ca2ede09e40..4e438ac979b 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -56,7 +56,8 @@ class GearAssemblySceneCfg(InteractiveSceneCfg):
         prim_path="{ENV_REGEX_NS}/FactoryGearBase",
         # TODO: change to common isaac sim directory
         spawn=sim_utils.UsdFileCfg(
-            usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_base.usd"),
+            usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_base.usd",
+            # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_base.usd"),
             # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_base.usd",
             # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/gear_base_scale_1.5_usd/gear_base_scale_1.5.usd",
             activate_contact_sensors=True,
@@ -85,7 +86,8 @@ class GearAssemblySceneCfg(InteractiveSceneCfg):
     #     prim_path="{ENV_REGEX_NS}/FactoryGearSmall",
     #     # TODO: change to common isaac sim directory
     #     spawn=sim_utils.UsdFileCfg(
-    #         usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_small.usd"),
+    #         usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_small.usd",
+    #         # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_small.usd"),
     #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_small.usd",
     #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/small_gear_scale_1p5_usd/small_gear_scale_1p5.usd",
     #         activate_contact_sensors=True,
@@ -112,7 +114,8 @@ class GearAssemblySceneCfg(InteractiveSceneCfg):
         prim_path="{ENV_REGEX_NS}/FactoryGearMedium",
         # TODO: change to common isaac sim directory
         spawn=sim_utils.UsdFileCfg(
-            usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_medium.usd"),
+            usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_medium.usd",
+            # usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_medium.usd"),
             # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_medium.usd",
             # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/medium_gear_scale_1p5_usd/medium_gear_scale_1p5.usd",
             activate_contact_sensors=True,
@@ -139,7 +142,7 @@ class GearAssemblySceneCfg(InteractiveSceneCfg):
     #     prim_path="{ENV_REGEX_NS}/FactoryGearLarge",
     #     # TODO: change to common isaac sim directory
     #     spawn=sim_utils.UsdFileCfg(
-    #         usd_path=os.path.join(ASSETS_DIR, "Factory/Gears_1.5x/factory_gear_large.usd"),
+    #         usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/Factory/Gears_1.5x/factory_gear_large.usd",
     #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Users/ashwinvk@nvidia.com/props/factory_gear_large.usd",
     #         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaacsim/Props/gear_assembly/large_gear_scale_1p5_usd/large_gear_scale_1p5.usd",
     #         activate_contact_sensors=True,

From 037c30b0689249f584bef2ded3494415eb852263 Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Wed, 17 Sep 2025 16:19:27 -0700
Subject: [PATCH 07/10] use UR10e_2f140_CFG

---
 .../deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
index f44059c4bd9..2b48e109d1e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
@@ -24,7 +24,7 @@
 ##
 # Pre-defined configs
 ##
-from isaaclab_assets.robots.universal_robots import UR10e_ROBOTIQ_GRIPPER_CFG  # isort: skip
+from isaaclab_assets.robots.universal_robots import UR10e_2f140_CFG  # isort: skip
 
 
 ##
@@ -166,9 +166,9 @@ def __post_init__(self):
         super().__post_init__()
 
         # switch robot to ur10 with local asset path
-        self.scene.robot = UR10e_ROBOTIQ_GRIPPER_CFG.replace(
+        self.scene.robot = UR10e_2f140_CFG.replace(
             prim_path="{ENV_REGEX_NS}/Robot",
-            # spawn=UR10e_ROBOTIQ_GRIPPER_CFG.spawn.replace(
+            # spawn=UR10e_2f140_CFG.spawn.replace(
             #     usd_path=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "assets/ur10e_robotiq_140_variant.usd")
             # )
         )

From 86fc486b1b4342079e7813a91aacf0e4809d9816 Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Thu, 18 Sep 2025 13:15:33 -0700
Subject: [PATCH 08/10] use new usd path

---
 .../isaaclab_assets/isaaclab_assets/robots/universal_robots.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
index 209ebfc107a..053f0ac49ef 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py
@@ -180,7 +180,8 @@
 
 UR10e_2f140_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
-        usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/IsaacARM/Assets/UR10/iakinola/ur10e_robotiq_140_variant.usd",
+        usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/isaac_ros_gear_insertion/ur10e_robotiq_140_variant.usd",
+        # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Projects/IsaacARM/Assets/UR10/iakinola/ur10e_robotiq_140_variant.usd",
         # usd_path=f"omniverse://isaac-dev.ov.nvidia.com/Isaac/Samples/Rigging/Manipulator/import_manipulator/ur10e/ur/ur_gripper.usd",
         # rigid_props=sim_utils.RigidBodyPropertiesCfg(
         #     disable_gravity=True,

From c4d8324b4bfe7042fe486545d611ae0d92eaee57 Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Mon, 29 Sep 2025 11:51:22 -0700
Subject: [PATCH 09/10] revert rls_rl changes

---
 scripts/reinforcement_learning/rsl_rl/play.py | 42 ++++++++-----
 .../reinforcement_learning/rsl_rl/train.py    | 21 +------
 .../isaaclab_rl/rsl_rl/vecenv_wrapper.py      | 60 ++++++-------------
 source/isaaclab_rl/setup.py                   |  3 +-
 .../config/ur_10e/agents/rsl_rl_ppo_cfg.py    |  7 +--
 5 files changed, 53 insertions(+), 80 deletions(-)

diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py
index 150bbd03492..11ef7399462 100644
--- a/scripts/reinforcement_learning/rsl_rl/play.py
+++ b/scripts/reinforcement_learning/rsl_rl/play.py
@@ -58,7 +58,7 @@
 import time
 import torch
 
-from rsl_rl.runners import OnPolicyRunner
+from rsl_rl.runners import DistillationRunner, OnPolicyRunner
 
 from isaaclab.envs import (
     DirectMARLEnv,
@@ -71,7 +71,7 @@
 from isaaclab.utils.dict import print_dict
 from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
 
-from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
 
 import isaaclab_tasks  # noqa: F401
 from isaaclab_tasks.utils import get_checkpoint_path
@@ -81,14 +81,14 @@
 
 
 @hydra_task_config(args_cli.task, args_cli.agent)
-def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg):
+def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
     """Play with RSL-RL agent."""
     # grab task name for checkpoint path
     task_name = args_cli.task.split(":")[-1]
     train_task_name = task_name.replace("-Play", "")
 
     # override configurations with non-hydra CLI arguments
-    agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
 
     # set the environment seed
@@ -112,6 +112,9 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
 
     log_dir = os.path.dirname(resume_path)
 
+    # set the log directory for the environment (works for all environment types)
+    env_cfg.log_dir = log_dir
+
     # create isaac environment
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
 
@@ -136,32 +139,43 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
 
     print(f"[INFO]: Loading model checkpoint from: {resume_path}")
     # load previously trained model
-    ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
-    ppo_runner.load(resume_path)
+    if agent_cfg.class_name == "OnPolicyRunner":
+        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    elif agent_cfg.class_name == "DistillationRunner":
+        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    else:
+        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+    runner.load(resume_path)
 
     # obtain the trained policy for inference
-    policy = ppo_runner.get_inference_policy(device=env.unwrapped.device)
+    policy = runner.get_inference_policy(device=env.unwrapped.device)
 
     # extract the neural network module
     # we do this in a try-except to maintain backwards compatibility.
     try:
         # version 2.3 onwards
-        policy_nn = ppo_runner.alg.policy
+        policy_nn = runner.alg.policy
     except AttributeError:
         # version 2.2 and below
-        policy_nn = ppo_runner.alg.actor_critic
+        policy_nn = runner.alg.actor_critic
+
+    # extract the normalizer
+    if hasattr(policy_nn, "actor_obs_normalizer"):
+        normalizer = policy_nn.actor_obs_normalizer
+    elif hasattr(policy_nn, "student_obs_normalizer"):
+        normalizer = policy_nn.student_obs_normalizer
+    else:
+        normalizer = None
 
     # export policy to onnx/jit
     export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
-    export_policy_as_jit(policy_nn, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt")
-    export_policy_as_onnx(
-        policy_nn, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx"
-    )
+    export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt")
+    export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx")
 
     dt = env.unwrapped.step_dt
 
     # reset environment
-    obs, _ = env.get_observations()
+    obs = env.get_observations()
     timestep = 0
     # simulate environment
     while simulation_app.is_running():
diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py
index 0b1d698d2b3..a73c3b2881b 100644
--- a/scripts/reinforcement_learning/rsl_rl/train.py
+++ b/scripts/reinforcement_learning/rsl_rl/train.py
@@ -10,8 +10,6 @@
 import argparse
 import sys
 
-import rsl_rl
-
 from isaaclab.app import AppLauncher
 
 # local imports
@@ -58,9 +56,8 @@
 from packaging import version
 
 # check minimum supported rsl-rl version
-RSL_RL_VERSION = "2.3.1"
+RSL_RL_VERSION = "3.1.0"
 installed_version = metadata.version("rsl-rl-lib")
-print("installed_version", installed_version)
 if version.parse(installed_version) < version.parse(RSL_RL_VERSION):
     if platform.system() == "Windows":
         cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"]
@@ -81,7 +78,7 @@
 from datetime import datetime
 
 import omni
-from rsl_rl.runners import OnPolicyRunner
+from rsl_rl.runners import DistillationRunner, OnPolicyRunner
 
 from isaaclab.envs import (
     DirectMARLEnv,
@@ -202,20 +199,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
     dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)
 
-    import rsl_rl
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-    print("[INFO] rsl_rl library location:", rsl_rl.__file__)
-
     # run training
     runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
 
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
index d909bf2d912..73ceae04693 100644
--- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
+++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
@@ -5,6 +5,7 @@
 
 import gymnasium as gym
 import torch
+from tensordict import TensorDict
 
 from rsl_rl.env import VecEnv
 
@@ -12,16 +13,9 @@
 
 
 class RslRlVecEnvWrapper(VecEnv):
-    """Wraps around Isaac Lab environment for RSL-RL library
-
-    To use asymmetric actor-critic, the environment instance must have the attributes :attr:`num_privileged_obs` (int).
-    This is used by the learning agent to allocate buffers in the trajectory memory. Additionally, the returned
-    observations should have the key "critic" which corresponds to the privileged observations. Since this is
-    optional for some environments, the wrapper checks if these attributes exist. If they don't then the wrapper
-    defaults to zero as number of privileged observations.
+    """Wraps around Isaac Lab environment for the RSL-RL library
 
     .. caution::
-
         This class must be the last wrapper in the wrapper chain. This is because the wrapper does not follow
         the :class:`gym.Wrapper` interface. Any subsequent wrappers will need to be modified to work with this
         wrapper.
@@ -43,12 +37,14 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N
         Raises:
             ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`.
         """
+
         # check that input is valid
         if not isinstance(env.unwrapped, ManagerBasedRLEnv) and not isinstance(env.unwrapped, DirectRLEnv):
             raise ValueError(
                 "The environment must be inherited from ManagerBasedRLEnv or DirectRLEnv. Environment type:"
                 f" {type(env)}"
             )
+
         # initialize the wrapper
         self.env = env
         self.clip_actions = clip_actions
@@ -63,20 +59,6 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N
             self.num_actions = self.unwrapped.action_manager.total_action_dim
         else:
             self.num_actions = gym.spaces.flatdim(self.unwrapped.single_action_space)
-        if hasattr(self.unwrapped, "observation_manager"):
-            self.num_obs = self.unwrapped.observation_manager.group_obs_dim["policy"][0]
-        else:
-            self.num_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["policy"])
-        # -- privileged observations
-        if (
-            hasattr(self.unwrapped, "observation_manager")
-            and "critic" in self.unwrapped.observation_manager.group_obs_dim
-        ):
-            self.num_privileged_obs = self.unwrapped.observation_manager.group_obs_dim["critic"][0]
-        elif hasattr(self.unwrapped, "num_states") and "critic" in self.unwrapped.single_observation_space:
-            self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"])
-        else:
-            self.num_privileged_obs = 0
 
         # modify the action space to the clip range
         self._modify_action_space()
@@ -133,14 +115,6 @@ def unwrapped(self) -> ManagerBasedRLEnv | DirectRLEnv:
     Properties
     """
 
-    def get_observations(self) -> tuple[torch.Tensor, dict]:
-        """Returns the current observations of the environment."""
-        if hasattr(self.unwrapped, "observation_manager"):
-            obs_dict = self.unwrapped.observation_manager.compute()
-        else:
-            obs_dict = self.unwrapped._get_observations()
-        return obs_dict["policy"], {"observations": obs_dict}
-
     @property
     def episode_length_buf(self) -> torch.Tensor:
         """The episode length buffer."""
@@ -162,13 +136,20 @@ def episode_length_buf(self, value: torch.Tensor):
     def seed(self, seed: int = -1) -> int:  # noqa: D102
         return self.unwrapped.seed(seed)
 
-    def reset(self) -> tuple[torch.Tensor, dict]:  # noqa: D102
+    def reset(self) -> tuple[TensorDict, dict]:  # noqa: D102
         # reset the environment
-        obs_dict, _ = self.env.reset()
-        # return observations
-        return obs_dict["policy"], {"observations": obs_dict}
+        obs_dict, extras = self.env.reset()
+        return TensorDict(obs_dict, batch_size=[self.num_envs]), extras
+
+    def get_observations(self) -> TensorDict:
+        """Returns the current observations of the environment."""
+        if hasattr(self.unwrapped, "observation_manager"):
+            obs_dict = self.unwrapped.observation_manager.compute()
+        else:
+            obs_dict = self.unwrapped._get_observations()
+        return TensorDict(obs_dict, batch_size=[self.num_envs])
 
-    def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
+    def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]:
         # clip actions
         if self.clip_actions is not None:
             actions = torch.clamp(actions, -self.clip_actions, self.clip_actions)
@@ -176,16 +157,12 @@ def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch
         obs_dict, rew, terminated, truncated, extras = self.env.step(actions)
         # compute dones for compatibility with RSL-RL
         dones = (terminated | truncated).to(dtype=torch.long)
-        # move extra observations to the extras dict
-        obs = obs_dict["policy"]
-        extras["observations"] = obs_dict
         # move time out information to the extras dict
         # this is only needed for infinite horizon tasks
         if not self.unwrapped.cfg.is_finite_horizon:
             extras["time_outs"] = truncated
-
         # return the step information
-        return obs, rew, dones, extras
+        return TensorDict(obs_dict, batch_size=[self.num_envs]), rew, dones, extras
 
     def close(self):  # noqa: D102
         return self.env.close()
@@ -200,7 +177,8 @@ def _modify_action_space(self):
             return
 
         # modify the action space to the clip range
-        # note: this is only possible for the box action space. we need to change it in the future for other action spaces.
+        # note: this is only possible for the box action space. we need to change it in the future for other
+        #   action spaces.
         self.env.unwrapped.single_action_space = gym.spaces.Box(
             low=-self.clip_actions, high=self.clip_actions, shape=(self.num_actions,)
         )
diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py
index a3675ac84e7..1d69ba74a4f 100644
--- a/source/isaaclab_rl/setup.py
+++ b/source/isaaclab_rl/setup.py
@@ -46,11 +46,10 @@
         "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11",
         "gym",
     ],  # rl-games still needs gym :(
-    # "rsl-rl": ["rsl-rl-lib==3.0.1"],
+    "rsl-rl": ["rsl-rl-lib==3.1.0"],
 }
 # Add the names with hyphens as aliases for convenience
 EXTRAS_REQUIRE["rl_games"] = EXTRAS_REQUIRE["rl-games"]
-# EXTRAS_REQUIRE["rsl_rl"] = EXTRAS_REQUIRE["rsl-rl"]
 
 # Cumulation of all extra-requires
 EXTRAS_REQUIRE["all"] = list(itertools.chain.from_iterable(EXTRAS_REQUIRE.values()))
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
index b322f1a6132..7d84f620e29 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
@@ -10,7 +10,7 @@
 
 @configclass
 class UpdatedRslRlPpoActorCriticRecurrentCfg(RslRlPpoActorCriticRecurrentCfg):
-    fixed_sigma = False
+    state_dependent_std = False
     share_weights = False
 
 @configclass
@@ -57,7 +57,7 @@ class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg):
     clip_actions = 1.0
     resume = False
     policy = UpdatedRslRlPpoActorCriticRecurrentCfg(
-        fixed_sigma=False,
+        state_dependent_std=False,
         init_noise_std=1.0,
         actor_hidden_dims=[256, 128, 64],
         critic_hidden_dims=[256, 128, 64],
@@ -67,8 +67,7 @@ class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg):
         rnn_hidden_dim=256,
         rnn_num_layers=2,
     )
-    algorithm = UpdatedRslRlPpoAlgorithmCfg(
-        bounds_loss_coef=0.0001,
+    algorithm = RslRlPpoAlgorithmCfg(
         value_loss_coef=1.0,
         use_clipped_value_loss=True,
         clip_param=0.2,

From f805083e78298cedd9d963d8057fa624a98c3940 Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Mon, 29 Sep 2025 16:50:59 -0700
Subject: [PATCH 10/10] enable state_dependent_std

---
 source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py     |  3 +++
 .../config/ur_10e/agents/rsl_rl_ppo_cfg.py          | 13 ++-----------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
index 5b03a7c639b..bfc212f4006 100644
--- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
+++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
@@ -31,6 +31,9 @@ class RslRlPpoActorCriticCfg:
     noise_std_type: Literal["scalar", "log"] = "scalar"
     """The type of noise standard deviation for the policy. Default is scalar."""
 
+    state_dependent_std: bool = False
+    """Whether to use state-dependent standard deviation for the policy. Default is False."""
+
     actor_obs_normalization: bool = MISSING
     """Whether to normalize the observation for the actor network."""
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
index 7d84f620e29..7d74c5e7d8e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/agents/rsl_rl_ppo_cfg.py
@@ -8,15 +8,6 @@
 from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoActorCriticRecurrentCfg, RslRlPpoAlgorithmCfg
 
 
-@configclass
-class UpdatedRslRlPpoActorCriticRecurrentCfg(RslRlPpoActorCriticRecurrentCfg):
-    state_dependent_std = False
-    share_weights = False
-
-@configclass
-class UpdatedRslRlPpoAlgorithmCfg(RslRlPpoAlgorithmCfg):
-    bounds_loss_coef = 0.0
-
 @configclass
 class UR10GearAssemblyPPORunnerCfg(RslRlOnPolicyRunnerCfg):
     num_steps_per_env = 512
@@ -56,8 +47,8 @@ class UR10GearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg):
     empirical_normalization = True
     clip_actions = 1.0
     resume = False
-    policy = UpdatedRslRlPpoActorCriticRecurrentCfg(
-        state_dependent_std=False,
+    policy = RslRlPpoActorCriticRecurrentCfg(
+        state_dependent_std=True,
         init_noise_std=1.0,
         actor_hidden_dims=[256, 128, 64],
         critic_hidden_dims=[256, 128, 64],