Skip to content

Commit 9845ded

Browse files
committed
Update
1 parent 30cff1d commit 9845ded

File tree

5 files changed

+37
-23
lines changed

5 files changed

+37
-23
lines changed

README.md

+17-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Robot Exploration with Deep Reinforcement Learning
2-
This repository contains code for robot exploration training with Deep Reinforcement Learning (DRL). The agent utilize the local structure of the environment to predict robot’s optimal sensing action. A demonstration can be found here -> www.youtube.com/watch?v=2gNF6efv12s
2+
This repository contains code for robot exploration training with Deep Reinforcement Learning (DRL). The agent utilize the local structure of the environment to predict robot’s optimal sensing action. A demonstration video can be found here -> www.youtube.com/watch?v=2gNF6efv12s
33

44
<p align='center'>
55
<img src="/doc/exploration.png" alt="drawing" width="1000"/>
@@ -12,7 +12,13 @@ This repository contains code for robot exploration training with Deep Reinforce
1212
## Dependency
1313
- Python 3
1414
- [scikit-image](https://scikit-image.org/)
15+
```
16+
pip3 install scikit-image
17+
```
1518
- [tensorboardX](https://github.com/lanpa/tensorboardX)
19+
```
20+
pip3 install tensorboardX
21+
```
1622
- [TensorFlow](https://www.tensorflow.org/install) (code is writen under TF1.x but it is modified to be compatible with TF2)
1723
- [pybind11](https://github.com/pybind/pybind11) (pybind11 — Seamless operability between C++11 and Python)
1824
```
@@ -51,8 +57,14 @@ make
5157
TRAIN = True
5258
PLOT = False
5359
```
54-
Set ``TRAIN=False`` to run saved policy. You can train your own policy by set ``TRAIN=True``. Set `` PLOT=True `` to turn on visualization plots.
55-
60+
Set ``TRAIN=False`` to run saved policy. You can train your own policy by setting ``TRAIN=True``. Set `` PLOT=True `` to turn on visualization plots.
61+
62+
- To show average reward during the training:
63+
```
64+
cd DRL_robot_exploration
65+
tensorboard --logdir=log
66+
```
67+
5668
## Cite
5769
5870
Please cite [our paper](https://www.researchgate.net/profile/Fanfei_Chen/publication/330200308_Self-Learning_Exploration_and_Mapping_for_Mobile_Robots_via_Deep_Reinforcement_Learning/links/5d6e7ad4a6fdccf93d381d2e/Self-Learning-Exploration-and-Mapping-for-Mobile-Robots-via-Deep-Reinforcement-Learning.pdf) if you use any of this code:
@@ -68,4 +80,5 @@ Please cite [our paper](https://www.researchgate.net/profile/Fanfei_Chen/publica
6880
6981
## Reference
7082
- [DeepRL-Agents](https://github.com/awjuliani/DeepRL-Agents)
71-
- [DeepLearningFlappyBird](https://github.com/yenchenlin/DeepLearningFlappyBird)
83+
- [DeepLearningFlappyBird](https://github.com/yenchenlin/DeepLearningFlappyBird)
84+
- [Random Dungeon Generator](http://perplexingtech.weebly.com/random-dungeon-demo.html)

scripts/robot_simulation.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ def __init__(self, index_map, train, plot):
2323
self.map_dir = '../DungeonMaps/test'
2424
self.map_list = os.listdir(self.map_dir)
2525
self.map_number = np.size(self.map_list)
26-
shuffle(self.map_list)
26+
if self.mode:
27+
shuffle(self.map_list)
2728
self.li_map = index_map
2829
self.global_map, self.robot_position = self.map_setup(self.map_dir + '/' + self.map_list[self.li_map])
2930
self.op_map = np.ones(self.global_map.shape) * 127

scripts/tf_networks.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,13 @@ def create_CNN(num_action):
3939
h_conv3_flat = tf.compat.v1.layers.flatten(h_conv3)
4040

4141
h_fc1 = tf.compat.v1.nn.relu(tf.matmul(h_conv3_flat, W_fc1) + b_fc1)
42-
keep_per = tf.compat.v1.placeholder(shape=None, dtype=tf.float32)
43-
hidden = tf.compat.v1.nn.dropout(h_fc1, keep_per)
42+
keep_rate = tf.compat.v1.placeholder(shape=None, dtype=tf.float32)
43+
hidden = tf.compat.v1.nn.dropout(h_fc1, keep_rate)
4444

4545
# readout layer
4646
readout = tf.matmul(hidden, W_fc2) + b_fc2
4747

48-
return s, readout, keep_per
48+
return s, readout, keep_rate
4949

5050

5151
def create_LSTM(num_action, num_cell, scope):
@@ -84,10 +84,10 @@ def create_LSTM(num_action, num_cell, scope):
8484
inputs=convFlat, cell=rnn_cell, dtype=tf.float32, initial_state=state_in, scope=scope)
8585
rnn = tf.reshape(rnn, shape=[-1, num_cell])
8686

87-
keep_per = tf.compat.v1.placeholder(shape=None, dtype=tf.float32)
88-
hidden = tf.compat.v1.nn.dropout(rnn, keep_per)
87+
keep_rate = tf.compat.v1.placeholder(shape=None, dtype=tf.float32)
88+
hidden = tf.compat.v1.nn.dropout(rnn, keep_rate)
8989

9090
# readout layer
9191
readout = tf.matmul(hidden, W_fc2) + b_fc2
9292

93-
return s, readout, keep_per, trainLength, batch_size, state_in, rnn_state
93+
return s, readout, keep_rate, trainLength, batch_size, state_in, rnn_state

scripts/tf_policy_cnn.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ def start():
4444
config = tf.compat.v1.ConfigProto()
4545
config.gpu_options.allow_growth = True
4646
sess = tf.compat.v1.InteractiveSession(config=config)
47-
s, readout, keep_per = create_CNN(ACTIONS)
48-
s_target, readout_target, keep_per_target = create_CNN(ACTIONS)
47+
s, readout, keep_rate = create_CNN(ACTIONS)
48+
s_target, readout_target, keep_rate_target = create_CNN(ACTIONS)
4949

5050
# define the cost function
5151
a = tf.compat.v1.placeholder("float", [None, ACTIONS])
@@ -93,7 +93,7 @@ def start():
9393
drop_rate -= (INITIAL_RATE - FINAL_RATE) / EXPLORE
9494

9595
# choose an action by uncertainty
96-
readout_t = readout.eval(feed_dict={s: s_t, keep_per: 1-drop_rate})[0]
96+
readout_t = readout.eval(feed_dict={s: s_t, keep_rate: 1-drop_rate})[0]
9797
readout_t[a_t_coll] = None
9898
a_t = np.zeros([ACTIONS])
9999
action_index = np.nanargmax(readout_t)
@@ -125,7 +125,7 @@ def start():
125125
r_batch = np.vstack(minibatch[:, 2]).flatten()
126126
s_j1_batch = np.vstack(minibatch[:, 3])
127127

128-
readout_j1_batch = readout_target.eval(feed_dict={s_target: s_j1_batch, keep_per_target: 0.2})
128+
readout_j1_batch = readout_target.eval(feed_dict={s_target: s_j1_batch, keep_rate_target: 0.2})
129129
end_multiplier = -(np.vstack(minibatch[:, 4]).flatten() - 1)
130130
y_batch = r_batch + GAMMA * np.max(readout_j1_batch) * end_multiplier
131131

@@ -134,7 +134,7 @@ def start():
134134
y: y_batch,
135135
a: a_batch,
136136
s: s_j_batch,
137-
keep_per: 0.2}
137+
keep_rate: 0.2}
138138
)
139139
new_average_reward = np.average(total_reward[len(total_reward) - 10000:])
140140
writer.add_scalar('average reward', new_average_reward, step_t)
@@ -170,7 +170,7 @@ def start():
170170

171171
while not TRAIN and not finish_all_map:
172172
# choose an action by policy
173-
readout_t = readout.eval(feed_dict={s: s_t, keep_per: 1})[0]
173+
readout_t = readout.eval(feed_dict={s: s_t, keep_rate: 1})[0]
174174
readout_t[a_t_coll] = None
175175
a_t = np.zeros([ACTIONS])
176176
action_index = np.nanargmax(readout_t)

scripts/tf_policy_rnn.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ def start():
7676
config = tf.compat.v1.ConfigProto()
7777
config.gpu_options.allow_growth = True
7878
sess = tf.compat.v1.InteractiveSession(config=config)
79-
s, readout, keep_per, tl, bs, si, rnn_state = create_LSTM(ACTIONS, h_size, 'policy')
80-
s_target, readout_target, keep_per_target, \
79+
s, readout, keep_rate, tl, bs, si, rnn_state = create_LSTM(ACTIONS, h_size, 'policy')
80+
s_target, readout_target, keep_rate_target, \
8181
tl_target, bs_target, si_target, rnn_state_target = create_LSTM(ACTIONS, h_size, 'target')
8282

8383
# define the cost function
@@ -130,7 +130,7 @@ def start():
130130

131131
# choose an action by uncertainty
132132
readout_t, state1 = sess.run([readout, rnn_state],
133-
feed_dict={s: s_t, keep_per: 1 - drop_rate, tl: 1, bs: 1, si: state})
133+
feed_dict={s: s_t, keep_rate: 1 - drop_rate, tl: 1, bs: 1, si: state})
134134
readout_t = readout_t[0]
135135
readout_t[a_t_coll] = None
136136
a_t = np.zeros([ACTIONS])
@@ -165,7 +165,7 @@ def start():
165165
r_batch = np.vstack(trainBatch[:, 2]).flatten()
166166
s_j1_batch = np.vstack(trainBatch[:, 3])
167167

168-
readout_j1_batch = readout_target.eval(feed_dict={s_target: s_j1_batch, keep_per_target: 0.2,
168+
readout_j1_batch = readout_target.eval(feed_dict={s_target: s_j1_batch, keep_rate_target: 0.2,
169169
tl_target: trace_length, bs_target: BATCH,
170170
si_target: state_train})[0]
171171
end_multiplier = -(np.vstack(trainBatch[:, 4]).flatten() - 1)
@@ -176,7 +176,7 @@ def start():
176176
y: y_batch,
177177
a: a_batch,
178178
s: s_j_batch,
179-
keep_per: 0.2,
179+
keep_rate: 0.2,
180180
tl: trace_length,
181181
bs: BATCH,
182182
si: state_train}
@@ -222,7 +222,7 @@ def start():
222222
while not TRAIN and not finish_all_map:
223223
# choose an action by uncertainty
224224
readout_t, state1 = sess.run([readout, rnn_state],
225-
feed_dict={s: s_t, keep_per: 1, tl: 1, bs: 1, si: state})
225+
feed_dict={s: s_t, keep_rate: 1, tl: 1, bs: 1, si: state})
226226
readout_t = readout_t[0]
227227
readout_t[a_t_coll] = None
228228
a_t = np.zeros([ACTIONS])

0 commit comments

Comments
 (0)