Skip to content

Commit a414817

Browse files
committed
Update
1 parent 9845ded commit a414817

File tree

7 files changed

+222
-42
lines changed

7 files changed

+222
-42
lines changed

CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ endif (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
1717
find_package(pybind11 REQUIRED)
1818

1919
pybind11_add_module(inverse_sensor_model src/inverse_sensor_model.cpp)
20+
pybind11_add_module(astar src/astar.cpp)

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -81,4 +81,5 @@ Please cite [our paper](https://www.researchgate.net/profile/Fanfei_Chen/publica
8181
## Reference
8282
- [DeepRL-Agents](https://github.com/awjuliani/DeepRL-Agents)
8383
- [DeepLearningFlappyBird](https://github.com/yenchenlin/DeepLearningFlappyBird)
84-
- [Random Dungeon Generator](http://perplexingtech.weebly.com/random-dungeon-demo.html)
84+
- [Random Dungeon Generator](http://perplexingtech.weebly.com/random-dungeon-demo.html)
85+
- [PyAstar](https://github.com/hjweide/pyastar)

doc/policy.gif

-2.59 KB
Loading

scripts/robot_simulation.py

+57-23
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
sys.path.append(sys.path[0] + '/..')
1111
from build.inverse_sensor_model import *
12+
from build.astar import *
1213
from random import shuffle
1314
import os
1415

@@ -29,6 +30,7 @@ def __init__(self, index_map, train, plot):
2930
self.global_map, self.robot_position = self.map_setup(self.map_dir + '/' + self.map_list[self.li_map])
3031
self.op_map = np.ones(self.global_map.shape) * 127
3132
self.map_size = np.shape(self.global_map)
33+
self.finish_percent = 0.985
3234
self.resolution = 1
3335
self.sensor_range = 80
3436
self.old_position = np.zeros([2])
@@ -40,17 +42,17 @@ def __init__(self, index_map, train, plot):
4042
self.robot_size = 6
4143
self.local_size = 40
4244
if self.plot:
43-
self.xPoint = []
44-
self.yPoint = []
45+
self.xPoint = np.array([self.robot_position[0]])
46+
self.yPoint = np.array([self.robot_position[1]])
47+
self.x2frontier = np.empty([0])
48+
self.y2frontier = np.empty([0])
4549

4650
def begin(self):
4751
self.op_map = self.inverse_sensor(self.robot_position, self.sensor_range, self.op_map, self.global_map)
4852
step_map = self.robot_model(self.robot_position, self.robot_size, self.t, self.op_map)
4953
map_local = self.local_map(self.robot_position, step_map, self.map_size, self.sensor_range + self.local_size)
5054
if self.plot:
51-
self.xPoint.append(self.robot_position[0])
52-
self.yPoint.append(self.robot_position[1])
53-
self.gui()
55+
self.plot_env()
5456
return map_local
5557

5658
def step(self, action_index):
@@ -92,21 +94,21 @@ def step(self, action_index):
9294
new_location = True
9395
terminal = True
9496
if self.plot and self.mode:
95-
self.xPoint.append(self.robot_position[0])
96-
self.yPoint.append(self.robot_position[1])
97-
self.gui()
97+
self.xPoint = ma.append(self.xPoint, self.robot_position[0])
98+
self.yPoint = ma.append(self.yPoint, self.robot_position[1])
99+
self.plot_env()
98100
self.robot_position = self.old_position.copy()
99101
self.op_map = self.old_op_map.copy()
100102
if self.plot and self.mode:
101-
self.xPoint.pop()
102-
self.yPoint.pop()
103+
self.xPoint[self.xPoint.size-1] = ma.masked
104+
self.yPoint[self.yPoint.size-1] = ma.masked
103105
else:
104106
if self.plot:
105-
self.xPoint.append(self.robot_position[0])
106-
self.yPoint.append(self.robot_position[1])
107-
self.gui()
107+
self.xPoint = ma.append(self.xPoint, self.robot_position[0])
108+
self.yPoint = ma.append(self.yPoint, self.robot_position[1])
109+
self.plot_env()
108110

109-
if np.size(np.where(self.global_map == 255)) - np.size(np.where(self.op_map == 255)) < 500:
111+
if np.size(np.where(self.op_map == 255))/np.size(np.where(self.global_map == 255)) > self.finish_percent:
110112
self.li_map += 1
111113
if self.li_map == self.map_number:
112114
self.li_map = 0
@@ -121,20 +123,25 @@ def step(self, action_index):
121123
def rescuer(self):
122124
complete = False
123125
all_map = False
124-
126+
pre_position = self.robot_position.copy()
125127
self.robot_position = self.frontier(self.op_map, self.map_size, self.t)
126128
self.op_map = self.inverse_sensor(self.robot_position, self.sensor_range, self.op_map, self.global_map)
127129
step_map = self.robot_model(self.robot_position, self.robot_size, self.t, self.op_map)
128130
map_local = self.local_map(self.robot_position, step_map, self.map_size, self.sensor_range + self.local_size)
129131

130132
if self.plot:
131-
self.xPoint.append(ma.masked)
132-
self.yPoint.append(ma.masked)
133-
self.xPoint.append(self.robot_position[0])
134-
self.yPoint.append(self.robot_position[1])
135-
self.gui()
136-
137-
if np.size(np.where(self.global_map == 255)) - np.size(np.where(self.op_map == 255)) < 500:
133+
path = self.astar_path(self.op_map, pre_position.tolist(), self.robot_position.tolist())
134+
self.x2frontier = ma.append(self.x2frontier, ma.masked)
135+
self.y2frontier = ma.append(self.y2frontier, ma.masked)
136+
self.x2frontier = ma.append(self.x2frontier, path[1, :])
137+
self.y2frontier = ma.append(self.y2frontier, path[0, :])
138+
self.xPoint = ma.append(self.xPoint, ma.masked)
139+
self.yPoint = ma.append(self.yPoint, ma.masked)
140+
self.xPoint = ma.append(self.xPoint, self.robot_position[0])
141+
self.yPoint = ma.append(self.yPoint, self.robot_position[1])
142+
self.plot_env()
143+
144+
if np.size(np.where(self.op_map == 255))/np.size(np.where(self.global_map == 255)) > self.finish_percent:
138145
self.li_map += 1
139146
if self.li_map == self.map_number:
140147
self.li_map = 0
@@ -297,11 +304,38 @@ def unique_rows(self, a):
297304
result = result[~np.isnan(result).any(axis=1)]
298305
return result
299306

300-
def gui(self):
307+
def astar_path(self, weights, start, goal, allow_diagonal=True):
308+
temp_start = [start[1], start[0]]
309+
temp_goal = [goal[1], goal[0]]
310+
temp_weight = (weights < 150) * 254 + 1
311+
# For the heuristic to be valid, each move must cost at least 1.
312+
if temp_weight.min(axis=None) < 1.:
313+
raise ValueError("Minimum cost to move must be 1, but got %f" % (
314+
temp_weight.min(axis=None)))
315+
# Ensure start is within bounds.
316+
if (temp_start[0] < 0 or temp_start[0] >= temp_weight.shape[0] or
317+
temp_start[1] < 0 or temp_start[1] >= temp_weight.shape[1]):
318+
raise ValueError("Start lies outside grid.")
319+
# Ensure goal is within bounds.
320+
if (temp_goal[0] < 0 or temp_goal[0] >= temp_weight.shape[0] or
321+
temp_goal[1] < 0 or temp_goal[1] >= temp_weight.shape[1]):
322+
raise ValueError("Goal of lies outside grid.")
323+
324+
height, width = temp_weight.shape
325+
start_idx = np.ravel_multi_index(temp_start, (height, width))
326+
goal_idx = np.ravel_multi_index(temp_goal, (height, width))
327+
328+
path = astar(
329+
temp_weight.flatten(), height, width, start_idx, goal_idx, allow_diagonal,
330+
)
331+
return path
332+
333+
def plot_env(self):
301334
plt.cla()
302335
plt.imshow(self.op_map, cmap='gray')
303336
plt.axis((0, self.map_size[1], self.map_size[0], 0))
304337
plt.plot(self.xPoint, self.yPoint, 'b', linewidth=2)
338+
plt.plot(self.x2frontier, self.y2frontier, 'r', linewidth=2)
305339
plt.plot(self.robot_position[0], self.robot_position[1], 'mo', markersize=8)
306340
plt.plot(self.xPoint[0], self.yPoint[0], 'co', markersize=8)
307341
plt.pause(0.05)

scripts/tf_policy_cnn.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
from __future__ import print_function
2-
import tensorflow as tf
31
import os
2+
import tensorflow as tf
43
from skimage.transform import resize
54
import random
65
import numpy as np
@@ -11,26 +10,27 @@
1110
import robot_simulation as robot
1211

1312
# select mode
14-
TRAIN = True
15-
PLOT = False
13+
TRAIN = False
14+
PLOT = True
1615

1716
# training environment parameters
1817
ACTIONS = 50 # number of valid actions
1918
GAMMA = 0.99 # decay rate of past observations
2019
OBSERVE = 1e4 # timesteps to observe before training
2120
EXPLORE = 2e6 # frames over which to anneal epsilon
22-
REPLAY_MEMORY = 10000 # number of previous transitions to remember
21+
REPLAY_MEMORY = 1e4 # number of previous transitions to remember
2322
BATCH = 64 # size of minibatch
2423
FINAL_RATE = 0 # final value of dropout rate
2524
INITIAL_RATE = 0.9 # initial value of dropout rate
26-
TARGET_UPDATE = 5e4
25+
TARGET_UPDATE = 2e4
2726

2827
network_dir = "../saved_networks/" + "cnn_" + str(ACTIONS)
29-
log_dir = "../log/" + "cnn_" + str(ACTIONS)
3028
if not os.path.exists(network_dir):
3129
os.makedirs(network_dir)
32-
if not os.path.exists(log_dir):
33-
os.makedirs(log_dir)
30+
if TRAIN:
31+
log_dir = "../log/" + "cnn_" + str(ACTIONS)
32+
if not os.path.exists(log_dir):
33+
os.makedirs(log_dir)
3434

3535

3636
def copy_weights(sess):
@@ -143,7 +143,7 @@ def start():
143143
total_reward = np.append(total_reward, r_t)
144144

145145
# save progress
146-
if step_t % 2e4 == 0 or step_t % 2e5 == 0 or step_t % 2e6 == 0:
146+
if step_t == 2e4 or step_t == 2e5 or step_t == 2e6:
147147
saver.save(sess, network_dir + '/cnn', global_step=step_t)
148148

149149
print("TIMESTEP", step_t, "/ DROPOUT", drop_rate, "/ ACTION", action_index, "/ REWARD", r_t,

scripts/tf_policy_rnn.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,29 @@
99
import robot_simulation as robot
1010

1111
# select mode
12-
TRAIN = True
13-
PLOT = False
12+
TRAIN = False
13+
PLOT = True
1414

1515
# training environment parameters
1616
ACTIONS = 50 # number of valid actions
1717
GAMMA = 0.99 # decay rate of past observations
1818
OBSERVE = 1e4 # timesteps to observe before training
1919
EXPLORE = 2e6 # frames over which to anneal epsilon
20-
REPLAY_MEMORY = 1000 # number of previous transitions to remember
20+
REPLAY_MEMORY = 1e3 # number of previous transitions to remember
2121
BATCH = 8 # size of minibatch
2222
h_size = 512 # size of hidden cells of LSTM
2323
trace_length = 8 # memory length
2424
FINAL_RATE = 0 # final value of dropout rate
2525
INITIAL_RATE = 0.9 # initial value of dropout rate
26-
TARGET_UPDATE = 5e4
26+
TARGET_UPDATE = 2e4
2727

2828
network_dir = "../saved_networks/" + "rnn_" + str(ACTIONS)
29-
log_dir = "../log/" + "rnn_" + str(ACTIONS)
3029
if not os.path.exists(network_dir):
3130
os.makedirs(network_dir)
32-
if not os.path.exists(log_dir):
33-
os.makedirs(log_dir)
31+
if TRAIN:
32+
log_dir = "../log/" + "rnn_" + str(ACTIONS)
33+
if not os.path.exists(log_dir):
34+
os.makedirs(log_dir)
3435

3536

3637
class experience_buffer():
@@ -188,7 +189,7 @@ def start():
188189
total_reward = np.append(total_reward, r_t)
189190

190191
# save progress
191-
if step_t % 2e4 == 0 or step_t % 2e5 == 0 or step_t % 2e6 == 0:
192+
if step_t == 2e4 or step_t == 2e5 or step_t == 2e6:
192193
saver.save(sess, network_dir + '/rnn', global_step=step_t)
193194

194195
print("TIMESTEP", step_t, "/ DROPOUT", drop_rate, "/ ACTION", action_index, "/ REWARD", r_t,

0 commit comments

Comments
 (0)