From f800cc5f1cdeeb44e5a7cba9677aa2bbd2cec81a Mon Sep 17 00:00:00 2001 From: lilkm Date: Mon, 25 Nov 2024 00:12:55 +0100 Subject: [PATCH] fix: correct missing observation in 'image' observation mode --- gym_lowcostrobot/envs/lift_cube_env.py | 6 ++++-- gym_lowcostrobot/envs/pick_place_cube_env.py | 8 ++++++-- gym_lowcostrobot/envs/push_cube_env.py | 8 ++++++-- gym_lowcostrobot/envs/reach_cube_env.py | 8 ++++++-- gym_lowcostrobot/envs/stack_two_cubes_env.py | 13 ++++++++++--- 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/gym_lowcostrobot/envs/lift_cube_env.py b/gym_lowcostrobot/envs/lift_cube_env.py index 40a9b15..da78828 100644 --- a/gym_lowcostrobot/envs/lift_cube_env.py +++ b/gym_lowcostrobot/envs/lift_cube_env.py @@ -327,9 +327,11 @@ def step(self, action): observation = self.get_observation() # Get the position of the cube and the distance between the end effector and the cube + cube_id = self.model.body("cube").id + cube_pos = self.data.body(cube_id).xpos.copy() ee_id = self.model.site("end_effector_site").id ee_pos = self.data.site(ee_id).xpos.copy() - cube_z = observation["cube_pos"][2] + cube_z = cube_pos[2] # info = {"is_success": self.is_success(ee_pos, observation["cube_pos"])} info = {} @@ -339,7 +341,7 @@ def step(self, action): # Compute the reward (dense reward) reward_height = cube_z - self.height_threshold - reward_distance = np.linalg.norm(ee_pos - observation["cube_pos"]) + reward_distance = np.linalg.norm(ee_pos - cube_pos) reward = reward_height + reward_distance return observation, reward, terminated, truncated, info diff --git a/gym_lowcostrobot/envs/pick_place_cube_env.py b/gym_lowcostrobot/envs/pick_place_cube_env.py index f81db48..4c80e30 100644 --- a/gym_lowcostrobot/envs/pick_place_cube_env.py +++ b/gym_lowcostrobot/envs/pick_place_cube_env.py @@ -342,11 +342,15 @@ def step(self, action): # Get the new observation observation = self.get_observation() - info = {"is_success": self.is_success(observation["cube_pos"], observation["target_pos"])} + # Get the position of the cube + cube_id = self.model.body("cube").id + cube_pos = self.data.body(cube_id).xpos.copy() + + info = {"is_success": self.is_success(cube_pos, observation["target_pos"])} terminated = info["is_success"] truncated = False - reward = self.compute_reward(observation["cube_pos"], observation["target_pos"]) + reward = self.compute_reward(cube_pos, observation["target_pos"]) return observation, reward, terminated, truncated, info def goal_distance(self, goal_a, goal_b): diff --git a/gym_lowcostrobot/envs/push_cube_env.py b/gym_lowcostrobot/envs/push_cube_env.py index 1640382..1ee9119 100644 --- a/gym_lowcostrobot/envs/push_cube_env.py +++ b/gym_lowcostrobot/envs/push_cube_env.py @@ -334,11 +334,15 @@ def step(self, action): # Get the new observation observation = self.get_observation() - info = {"is_success": self.is_success(observation["cube_pos"], observation["target_pos"])} + # Get the position of the cube + cube_id = self.model.body("cube").id + cube_pos = self.data.body(cube_id).xpos.copy() + + info = {"is_success": self.is_success(cube_pos, observation["target_pos"])} terminated = info["is_success"] truncated = False - reward = self.compute_reward(observation["cube_pos"], observation["target_pos"]) + reward = self.compute_reward(cube_pos, observation["target_pos"]) return observation, reward, terminated, truncated, info def goal_distance(self, goal_a, goal_b): diff --git a/gym_lowcostrobot/envs/reach_cube_env.py b/gym_lowcostrobot/envs/reach_cube_env.py index 4064eee..b8a65a3 100644 --- a/gym_lowcostrobot/envs/reach_cube_env.py +++ b/gym_lowcostrobot/envs/reach_cube_env.py @@ -317,15 +317,19 @@ def step(self, action): # Get the new observation observation = self.get_observation() + # Get the position of the cube + cube_id = self.model.body("cube").id + cube_pos = self.data.body(cube_id).xpos.copy() + # Get the position of the cube and the distance between the end effector and the cube ee_id = self.model.site("end_effector_site").id ee_pos = self.data.site(ee_id).xpos.copy() - info = {"is_success": self.is_success(ee_pos, observation["cube_pos"])} + info = {"is_success": self.is_success(ee_pos, cube_pos)} terminated = info["is_success"] truncated = False - reward = self.compute_reward(ee_pos, observation["cube_pos"]) + reward = self.compute_reward(ee_pos, cube_pos) return observation, reward, terminated, truncated, info def goal_distance(self, goal_a, goal_b): diff --git a/gym_lowcostrobot/envs/stack_two_cubes_env.py b/gym_lowcostrobot/envs/stack_two_cubes_env.py index ffb492f..b3eafe1 100644 --- a/gym_lowcostrobot/envs/stack_two_cubes_env.py +++ b/gym_lowcostrobot/envs/stack_two_cubes_env.py @@ -330,14 +330,21 @@ def step(self, action): # Get the new observation observation = self.get_observation() + # Get the position of the blue cube and the red cube + cube_blue_id = self.model.body("cube_blue").id + cube_blue_pos = self.data.body(cube_blue_id).xpos.copy() + + cube_red_id = self.model.body("cube_red").id + cube_red_pos = self.data.body(cube_red_id).xpos.copy() + # The target position is the position of the red cube plus translated in the z-axis by the height of the red cube - target_pos = observation["cube_red_pos"] + np.array([0.0, 0.0, 0.03]) + target_pos = cube_red_pos + np.array([0.0, 0.0, 0.03]) - info = {"is_success": self.is_success(observation["cube_blue_pos"], target_pos)} + info = {"is_success": self.is_success(cube_blue_pos, target_pos)} terminated = info["is_success"] truncated = False - reward = self.compute_reward(observation["cube_blue_pos"], target_pos) + reward = self.compute_reward(cube_blue_pos, target_pos) return observation, reward, terminated, truncated, info def goal_distance(self, goal_a, goal_b):