From f800cc5f1cdeeb44e5a7cba9677aa2bbd2cec81a Mon Sep 17 00:00:00 2001
From: lilkm <meftah.khalil@live.fr>
Date: Mon, 25 Nov 2024 00:12:55 +0100
Subject: [PATCH] fix: correct missing observation in 'image' observation mode

---
 gym_lowcostrobot/envs/lift_cube_env.py       |  6 ++++--
 gym_lowcostrobot/envs/pick_place_cube_env.py |  8 ++++++--
 gym_lowcostrobot/envs/push_cube_env.py       |  8 ++++++--
 gym_lowcostrobot/envs/reach_cube_env.py      |  8 ++++++--
 gym_lowcostrobot/envs/stack_two_cubes_env.py | 13 ++++++++++---
 5 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/gym_lowcostrobot/envs/lift_cube_env.py b/gym_lowcostrobot/envs/lift_cube_env.py
index 40a9b15..da78828 100644
--- a/gym_lowcostrobot/envs/lift_cube_env.py
+++ b/gym_lowcostrobot/envs/lift_cube_env.py
@@ -327,9 +327,11 @@ def step(self, action):
         observation = self.get_observation()
 
         # Get the position of the cube and the distance between the end effector and the cube
+        cube_id = self.model.body("cube").id
+        cube_pos = self.data.body(cube_id).xpos.copy()
         ee_id = self.model.site("end_effector_site").id
         ee_pos = self.data.site(ee_id).xpos.copy()
-        cube_z = observation["cube_pos"][2]
+        cube_z = cube_pos[2]
 
         # info = {"is_success": self.is_success(ee_pos, observation["cube_pos"])}
         info = {}
@@ -339,7 +341,7 @@ def step(self, action):
 
         # Compute the reward (dense reward)
         reward_height = cube_z - self.height_threshold
-        reward_distance = np.linalg.norm(ee_pos - observation["cube_pos"])
+        reward_distance = np.linalg.norm(ee_pos - cube_pos)
         reward = reward_height + reward_distance
         return observation, reward, terminated, truncated, info
 
diff --git a/gym_lowcostrobot/envs/pick_place_cube_env.py b/gym_lowcostrobot/envs/pick_place_cube_env.py
index f81db48..4c80e30 100644
--- a/gym_lowcostrobot/envs/pick_place_cube_env.py
+++ b/gym_lowcostrobot/envs/pick_place_cube_env.py
@@ -342,11 +342,15 @@ def step(self, action):
         # Get the new observation
         observation = self.get_observation()
 
-        info = {"is_success": self.is_success(observation["cube_pos"], observation["target_pos"])}
+        # Get the position of the cube 
+        cube_id = self.model.body("cube").id
+        cube_pos = self.data.body(cube_id).xpos.copy()
+
+        info = {"is_success": self.is_success(cube_pos, observation["target_pos"])}
 
         terminated = info["is_success"]
         truncated = False
-        reward = self.compute_reward(observation["cube_pos"], observation["target_pos"])
+        reward = self.compute_reward(cube_pos, observation["target_pos"])
         return observation, reward, terminated, truncated, info
 
     def goal_distance(self, goal_a, goal_b):
diff --git a/gym_lowcostrobot/envs/push_cube_env.py b/gym_lowcostrobot/envs/push_cube_env.py
index 1640382..1ee9119 100644
--- a/gym_lowcostrobot/envs/push_cube_env.py
+++ b/gym_lowcostrobot/envs/push_cube_env.py
@@ -334,11 +334,15 @@ def step(self, action):
         # Get the new observation
         observation = self.get_observation()
 
-        info = {"is_success": self.is_success(observation["cube_pos"], observation["target_pos"])}
+        # Get the position of the cube
+        cube_id = self.model.body("cube").id
+        cube_pos = self.data.body(cube_id).xpos.copy()
+
+        info = {"is_success": self.is_success(cube_pos, observation["target_pos"])}
 
         terminated = info["is_success"]
         truncated = False
-        reward = self.compute_reward(observation["cube_pos"], observation["target_pos"])
+        reward = self.compute_reward(cube_pos, observation["target_pos"])
         return observation, reward, terminated, truncated, info
 
     def goal_distance(self, goal_a, goal_b):
diff --git a/gym_lowcostrobot/envs/reach_cube_env.py b/gym_lowcostrobot/envs/reach_cube_env.py
index 4064eee..b8a65a3 100644
--- a/gym_lowcostrobot/envs/reach_cube_env.py
+++ b/gym_lowcostrobot/envs/reach_cube_env.py
@@ -317,15 +317,19 @@ def step(self, action):
         # Get the new observation
         observation = self.get_observation()
 
+        # Get the position of the cube
+        cube_id = self.model.body("cube").id
+        cube_pos = self.data.body(cube_id).xpos.copy()
+
         # Get the position of the cube and the distance between the end effector and the cube
         ee_id = self.model.site("end_effector_site").id
         ee_pos = self.data.site(ee_id).xpos.copy()
 
-        info = {"is_success": self.is_success(ee_pos, observation["cube_pos"])}
+        info = {"is_success": self.is_success(ee_pos, cube_pos)}
 
         terminated = info["is_success"]
         truncated = False
-        reward = self.compute_reward(ee_pos, observation["cube_pos"])
+        reward = self.compute_reward(ee_pos, cube_pos)
         return observation, reward, terminated, truncated, info
 
     def goal_distance(self, goal_a, goal_b):
diff --git a/gym_lowcostrobot/envs/stack_two_cubes_env.py b/gym_lowcostrobot/envs/stack_two_cubes_env.py
index ffb492f..b3eafe1 100644
--- a/gym_lowcostrobot/envs/stack_two_cubes_env.py
+++ b/gym_lowcostrobot/envs/stack_two_cubes_env.py
@@ -330,14 +330,21 @@ def step(self, action):
         # Get the new observation
         observation = self.get_observation()
 
+        # Get the position of the blue cube and the red cube
+        cube_blue_id = self.model.body("cube_blue").id
+        cube_blue_pos = self.data.body(cube_blue_id).xpos.copy()
+
+        cube_red_id = self.model.body("cube_red").id
+        cube_red_pos = self.data.body(cube_red_id).xpos.copy()
+
         # The target position is the position of the red cube plus translated in the z-axis by the height of the red cube
-        target_pos = observation["cube_red_pos"] + np.array([0.0, 0.0, 0.03])
+        target_pos = cube_red_pos + np.array([0.0, 0.0, 0.03])
 
-        info = {"is_success": self.is_success(observation["cube_blue_pos"], target_pos)}
+        info = {"is_success": self.is_success(cube_blue_pos, target_pos)}
 
         terminated = info["is_success"]
         truncated = False
-        reward = self.compute_reward(observation["cube_blue_pos"], target_pos)
+        reward = self.compute_reward(cube_blue_pos, target_pos)
         return observation, reward, terminated, truncated, info
 
     def goal_distance(self, goal_a, goal_b):