Update

fanfeichen · fanfeichen · commit a4148179e5f1 · 2020-07-13T02:17:27.000-04:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -17,3 +17,4 @@ endif (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
 find_package(pybind11 REQUIRED)
 
 pybind11_add_module(inverse_sensor_model src/inverse_sensor_model.cpp)
+pybind11_add_module(astar src/astar.cpp)
diff --git a/README.md b/README.md
@@ -81,4 +81,5 @@ Please cite [our paper](https://www.researchgate.net/profile/Fanfei_Chen/publica
 ## Reference
 - [DeepRL-Agents](https://github.com/awjuliani/DeepRL-Agents)
 - [DeepLearningFlappyBird](https://github.com/yenchenlin/DeepLearningFlappyBird)
-- [Random Dungeon Generator](http://perplexingtech.weebly.com/random-dungeon-demo.html)
+- [Random Dungeon Generator](http://perplexingtech.weebly.com/random-dungeon-demo.html)
+- [PyAstar](https://github.com/hjweide/pyastar)
diff --git a/doc/policy.gif b/doc/policy.gif
diff --git a/scripts/robot_simulation.py b/scripts/robot_simulation.py
@@ -9,6 +9,7 @@
 
 sys.path.append(sys.path[0] + '/..')
 from build.inverse_sensor_model import *
+from build.astar import *
 from random import shuffle
 import os
 
@@ -29,6 +30,7 @@ def __init__(self, index_map, train, plot):
         self.global_map, self.robot_position = self.map_setup(self.map_dir + '/' + self.map_list[self.li_map])
         self.op_map = np.ones(self.global_map.shape) * 127
         self.map_size = np.shape(self.global_map)
+        self.finish_percent = 0.985
         self.resolution = 1
         self.sensor_range = 80
         self.old_position = np.zeros([2])
@@ -40,17 +42,17 @@ def __init__(self, index_map, train, plot):
         self.robot_size = 6
         self.local_size = 40
         if self.plot:
-            self.xPoint = []
-            self.yPoint = []
+            self.xPoint = np.array([self.robot_position[0]])
+            self.yPoint = np.array([self.robot_position[1]])
+            self.x2frontier = np.empty([0])
+            self.y2frontier = np.empty([0])
 
     def begin(self):
         self.op_map = self.inverse_sensor(self.robot_position, self.sensor_range, self.op_map, self.global_map)
         step_map = self.robot_model(self.robot_position, self.robot_size, self.t, self.op_map)
         map_local = self.local_map(self.robot_position, step_map, self.map_size, self.sensor_range + self.local_size)
         if self.plot:
-            self.xPoint.append(self.robot_position[0])
-            self.yPoint.append(self.robot_position[1])
-            self.gui()
+            self.plot_env()
         return map_local
 
     def step(self, action_index):
@@ -92,21 +94,21 @@ def step(self, action_index):
                 new_location = True
                 terminal = True
             if self.plot and self.mode:
-                self.xPoint.append(self.robot_position[0])
-                self.yPoint.append(self.robot_position[1])
-                self.gui()
+                self.xPoint = ma.append(self.xPoint, self.robot_position[0])
+                self.yPoint = ma.append(self.yPoint, self.robot_position[1])
+                self.plot_env()
             self.robot_position = self.old_position.copy()
             self.op_map = self.old_op_map.copy()
             if self.plot and self.mode:
-                self.xPoint.pop()
-                self.yPoint.pop()
+                self.xPoint[self.xPoint.size-1] = ma.masked
+                self.yPoint[self.yPoint.size-1] = ma.masked
         else:
             if self.plot:
-                self.xPoint.append(self.robot_position[0])
-                self.yPoint.append(self.robot_position[1])
-                self.gui()
+                self.xPoint = ma.append(self.xPoint, self.robot_position[0])
+                self.yPoint = ma.append(self.yPoint, self.robot_position[1])
+                self.plot_env()
 
-        if np.size(np.where(self.global_map == 255)) - np.size(np.where(self.op_map == 255)) < 500:
+        if np.size(np.where(self.op_map == 255))/np.size(np.where(self.global_map == 255)) > self.finish_percent:
             self.li_map += 1
             if self.li_map == self.map_number:
                 self.li_map = 0
@@ -121,20 +123,25 @@ def step(self, action_index):
     def rescuer(self):
         complete = False
         all_map = False
-
+        pre_position = self.robot_position.copy()
         self.robot_position = self.frontier(self.op_map, self.map_size, self.t)
         self.op_map = self.inverse_sensor(self.robot_position, self.sensor_range, self.op_map, self.global_map)
         step_map = self.robot_model(self.robot_position, self.robot_size, self.t, self.op_map)
         map_local = self.local_map(self.robot_position, step_map, self.map_size, self.sensor_range + self.local_size)
 
         if self.plot:
-            self.xPoint.append(ma.masked)
-            self.yPoint.append(ma.masked)
-            self.xPoint.append(self.robot_position[0])
-            self.yPoint.append(self.robot_position[1])
-            self.gui()
-
-        if np.size(np.where(self.global_map == 255)) - np.size(np.where(self.op_map == 255)) < 500:
+            path = self.astar_path(self.op_map, pre_position.tolist(), self.robot_position.tolist())
+            self.x2frontier = ma.append(self.x2frontier, ma.masked)
+            self.y2frontier = ma.append(self.y2frontier, ma.masked)
+            self.x2frontier = ma.append(self.x2frontier, path[1, :])
+            self.y2frontier = ma.append(self.y2frontier, path[0, :])
+            self.xPoint = ma.append(self.xPoint, ma.masked)
+            self.yPoint = ma.append(self.yPoint, ma.masked)
+            self.xPoint = ma.append(self.xPoint, self.robot_position[0])
+            self.yPoint = ma.append(self.yPoint, self.robot_position[1])
+            self.plot_env()
+
+        if np.size(np.where(self.op_map == 255))/np.size(np.where(self.global_map == 255)) > self.finish_percent:
             self.li_map += 1
             if self.li_map == self.map_number:
                 self.li_map = 0
@@ -297,11 +304,38 @@ def unique_rows(self, a):
         result = result[~np.isnan(result).any(axis=1)]
         return result
 
-    def gui(self):
+    def astar_path(self, weights, start, goal, allow_diagonal=True):
+        temp_start = [start[1], start[0]]
+        temp_goal = [goal[1], goal[0]]
+        temp_weight = (weights < 150) * 254 + 1
+        # For the heuristic to be valid, each move must cost at least 1.
+        if temp_weight.min(axis=None) < 1.:
+            raise ValueError("Minimum cost to move must be 1, but got %f" % (
+                temp_weight.min(axis=None)))
+        # Ensure start is within bounds.
+        if (temp_start[0] < 0 or temp_start[0] >= temp_weight.shape[0] or
+                temp_start[1] < 0 or temp_start[1] >= temp_weight.shape[1]):
+            raise ValueError("Start lies outside grid.")
+        # Ensure goal is within bounds.
+        if (temp_goal[0] < 0 or temp_goal[0] >= temp_weight.shape[0] or
+                temp_goal[1] < 0 or temp_goal[1] >= temp_weight.shape[1]):
+            raise ValueError("Goal of lies outside grid.")
+
+        height, width = temp_weight.shape
+        start_idx = np.ravel_multi_index(temp_start, (height, width))
+        goal_idx = np.ravel_multi_index(temp_goal, (height, width))
+
+        path = astar(
+            temp_weight.flatten(), height, width, start_idx, goal_idx, allow_diagonal,
+        )
+        return path
+
+    def plot_env(self):
         plt.cla()
         plt.imshow(self.op_map, cmap='gray')
         plt.axis((0, self.map_size[1], self.map_size[0], 0))
         plt.plot(self.xPoint, self.yPoint, 'b', linewidth=2)
+        plt.plot(self.x2frontier, self.y2frontier, 'r', linewidth=2)
         plt.plot(self.robot_position[0], self.robot_position[1], 'mo', markersize=8)
         plt.plot(self.xPoint[0], self.yPoint[0], 'co', markersize=8)
         plt.pause(0.05)
diff --git a/scripts/tf_policy_cnn.py b/scripts/tf_policy_cnn.py
@@ -1,6 +1,5 @@
-from __future__ import print_function
-import tensorflow as tf
 import os
+import tensorflow as tf
 from skimage.transform import resize
 import random
 import numpy as np
@@ -11,26 +10,27 @@
 import robot_simulation as robot
 
 # select mode
-TRAIN = True
-PLOT = False
+TRAIN = False
+PLOT = True
 
 # training environment parameters
 ACTIONS = 50  # number of valid actions
 GAMMA = 0.99  # decay rate of past observations
 OBSERVE = 1e4  # timesteps to observe before training
 EXPLORE = 2e6  # frames over which to anneal epsilon
-REPLAY_MEMORY = 10000  # number of previous transitions to remember
+REPLAY_MEMORY = 1e4  # number of previous transitions to remember
 BATCH = 64  # size of minibatch
 FINAL_RATE = 0  # final value of dropout rate
 INITIAL_RATE = 0.9  # initial value of dropout rate
-TARGET_UPDATE = 5e4
+TARGET_UPDATE = 2e4
 
 network_dir = "../saved_networks/" + "cnn_" + str(ACTIONS)
-log_dir = "../log/" + "cnn_" + str(ACTIONS)
 if not os.path.exists(network_dir):
     os.makedirs(network_dir)
-if not os.path.exists(log_dir):
-    os.makedirs(log_dir)
+if TRAIN:
+    log_dir = "../log/" + "cnn_" + str(ACTIONS)
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
 
 
 def copy_weights(sess):
@@ -143,7 +143,7 @@ def start():
         total_reward = np.append(total_reward, r_t)
 
         # save progress
-        if step_t % 2e4 == 0 or step_t % 2e5 == 0 or step_t % 2e6 == 0:
+        if step_t == 2e4 or step_t == 2e5 or step_t == 2e6:
             saver.save(sess, network_dir + '/cnn', global_step=step_t)
 
         print("TIMESTEP", step_t, "/ DROPOUT", drop_rate, "/ ACTION", action_index, "/ REWARD", r_t,
diff --git a/scripts/tf_policy_rnn.py b/scripts/tf_policy_rnn.py
@@ -9,28 +9,29 @@
 import robot_simulation as robot
 
 # select mode
-TRAIN = True
-PLOT = False
+TRAIN = False
+PLOT = True
 
 # training environment parameters
 ACTIONS = 50  # number of valid actions
 GAMMA = 0.99  # decay rate of past observations
 OBSERVE = 1e4  # timesteps to observe before training
 EXPLORE = 2e6  # frames over which to anneal epsilon
-REPLAY_MEMORY = 1000  # number of previous transitions to remember
+REPLAY_MEMORY = 1e3  # number of previous transitions to remember
 BATCH = 8  # size of minibatch
 h_size = 512  # size of hidden cells of LSTM
 trace_length = 8  # memory length
 FINAL_RATE = 0  # final value of dropout rate
 INITIAL_RATE = 0.9  # initial value of dropout rate
-TARGET_UPDATE = 5e4
+TARGET_UPDATE = 2e4
 
 network_dir = "../saved_networks/" + "rnn_" + str(ACTIONS)
-log_dir = "../log/" + "rnn_" + str(ACTIONS)
 if not os.path.exists(network_dir):
     os.makedirs(network_dir)
-if not os.path.exists(log_dir):
-    os.makedirs(log_dir)
+if TRAIN:
+    log_dir = "../log/" + "rnn_" + str(ACTIONS)
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
 
 
 class experience_buffer():
@@ -188,7 +189,7 @@ def start():
         total_reward = np.append(total_reward, r_t)
 
         # save progress
-        if step_t % 2e4 == 0 or step_t % 2e5 == 0 or step_t % 2e6 == 0:
+        if step_t == 2e4 or step_t == 2e5 or step_t == 2e6:
             saver.save(sess, network_dir + '/rnn', global_step=step_t)
 
         print("TIMESTEP", step_t, "/ DROPOUT", drop_rate, "/ ACTION", action_index, "/ REWARD", r_t,
diff --git a/src/astar.cpp b/src/astar.cpp

Original file line number	Diff line number	Diff line change
`@@ -17,3 +17,4 @@ endif (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")`
`17`	`17`	`find_package(pybind11 REQUIRED)`
`18`	`18`
`19`	`19`	`pybind11_add_module(inverse_sensor_model src/inverse_sensor_model.cpp)`
	`20`	`+pybind11_add_module(astar src/astar.cpp)`