Skip to content

Commit

Permalink
Setting up the basic lunar lander to soar python and soar files
Browse files Browse the repository at this point in the history
  • Loading branch information
Tim Saucer committed May 1, 2019
1 parent b4656c0 commit 1b1533f
Show file tree
Hide file tree
Showing 6 changed files with 483 additions and 107 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
.project
.pydevproject
**/ngs-temp.txt

3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "soar/new-goal-system-4"]
path = soar/new-goal-system-4
url = https://github.com/soartech/new-goal-system-4.git
198 changes: 91 additions & 107 deletions SoarLunarLander.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,48 +79,6 @@ def parse_output_commands(agent, structure):
def register_print_callback(kernel, agent, function, user_data=None):
agent.RegisterForPrintEvent(sml.smlEVENT_PRINT, function, user_data)

def register_output_callback(kernel, agent, function, user_data=None):
agent.RegisterForRunEvent(sml.smlEVENT_AFTER_OUTPUT_PHASE, function, user_data)

def register_output_change_callback(kernel, agent, function, user_data=None):
kernel.RegisterForUpdateEvent(sml.smlEVENT_AFTER_ALL_GENERATED_OUTPUT, function, user_data)

# callback functions

def callback_output_cycle(event_id, user_data, kernel, flags):
(gym_env, agent, cart_pos, cart_vel, pole_pos, pole_vel, reward_il) = user_data

output_command_list = {
'move-cart': ['direction']
}

if agent.Commands():
(commands, mapping) = parse_output_commands(agent, output_command_list)
gym_env.render()

move_cart_cmd = commands['move-cart']
direction = move_cart_cmd['direction']

if direction == 'left':
action = 0
else:
action = 1

observation, reward, done, info = gym_env.step(action)
cart_pos.Update(observation[0])
cart_vel.Update(observation[1])
pole_pos.Update(observation[2])
pole_vel.Update(observation[3])
reward_il.Update(reward)

mapping['move-cart'].CreateStringWME('status', 'complete')

# print(observation)
if done:
gym_env.reset()
print(agent.ExecuteCommandLine("interrupt"))
print(agent.ExecuteCommandLine("init-soar"))

def get_move_command(agent):
output_command_list = { 'move-cart': ['direction'] }

Expand All @@ -142,7 +100,34 @@ def get_move_command(agent):
def callback_print_message(mid, user_data, agent, message):
print(message.strip())

# soar code management
def create_input_wmes(agent):
gym_id = agent.GetInputLink().CreateIdWME('gym')
x_pos = gym_id.CreateFloatWME('x-position', 0.)
y_pos = gym_id.CreateFloatWME('y-position', 0.)
x_vel = gym_id.CreateFloatWME('x-velocity', 0.)
y_vel = gym_id.CreateFloatWME('y-velocity', 0.)
ang_pos = gym_id.CreateFloatWME('orientation-angle', 0.)
ang_vel = gym_id.CreateFloatWME('orientation-angular-velocity', 0.)
left_lander = gym_id.CreateStringWME('left-pad-contact', '*NGS_NO*')
right_lander = gym_id.CreateStringWME('right-pad-contact', '*NGS_NO*')

return (x_pos, y_pos, x_vel, y_vel, ang_pos, ang_vel, left_lander, right_lander)

def has_contact(pad_value):
return pad_value > 0.5

def update_input_wmes(observation):
global input_wmes
(x_pos, y_pos, x_vel, y_vel, ang_pos, ang_vel, left_lander, right_lander) = input_wmes

x_pos.Update(float(observation[0]))
y_pos.Update(float(observation[1]))
x_vel.Update(float(observation[2]))
y_vel.Update(float(observation[3]))
ang_pos.Update(float(observation[4]))
ang_vel.Update(float(observation[5]))
left_lander.Update('*YES*' if has_contact(observation[6]) else '*NO*')
right_lander.Update('*YES*' if has_contact(observation[7]) else '*NO*')

if __name__ == "__main__":
# Create the user input thread and queue for return commands
Expand All @@ -155,75 +140,74 @@ def callback_print_message(mid, user_data, agent, message):
agent = create_agent(kernel, "agent")
register_print_callback(kernel, agent, callback_print_message, None)

# Cannot just execute this in the source file because the library doesn't load fast enough.
# We might even need to put in a delay or verify we got a response that it was loaded.

print(agent.ExecuteCommandLine("rl --set learning on"))
print(agent.ExecuteCommandLine("indifferent-selection --epsilon-greedy"))

print(agent.ExecuteCommandLine("soar tcl on"))

input_wmes = create_input_wmes(agent)

# Create the gym environment
gym_env = gym.make('LunarLander-v2')
observation = gym_env.reset()
update_input_wmes(observation)

print(observation)
step_num = 0
print('Step, x-pos, y-pos, x-vel, y-vel, ang, ang-vel, left-pad, right-pad')
print('{}, {}, {}, {}, {}, {}, {}, {}, {}'.format(step_num, observation[0], observation[1], observation[2], observation[3], observation[4], observation[5], has_contact(observation[6]), has_contact(observation[0])))

# gym_id = agent.GetInputLink().CreateIdWME('gym')
# cart_pos = gym_id.CreateFloatWME('cart-position', observation[0])
# cart_vel = gym_id.CreateFloatWME('cart-velocity', observation[1])
# pole_pos = gym_id.CreateFloatWME('pole-angle', observation[2])
# pole_vel = gym_id.CreateFloatWME('pole-tip-velocity', observation[3])
# reward_il = gym_id.CreateFloatWME('current-reward', 0.)
#
# step_num = 0
#
# print(agent.ExecuteCommandLine("source soar/load.soar"))
#
# while True:
# gym_env.render()
#
# try:
# user_cmd = queue_user_cmds.get(False)
# except queue.Empty:
# pass
# else:
# if user_cmd in ("exit", "quit"):
# break
# elif user_cmd == "pause":
# is_paused = True
# elif user_cmd == "continue":
# is_paused = False
# else:
# print(agent.ExecuteCommandLine(user_cmd).strip())
#
# if is_paused:
# continue
#
# kernel.RunAllAgents(1)
# move_cmd = get_move_command(agent)
#
# if move_cmd is not None:
# observation, reward, done, info = gym_env.step(move_cmd)
#
# step_num = step_num + 1
#
# cart_pos.Update(observation[0])
# cart_vel.Update(observation[1])
# pole_pos.Update(observation[2])
# pole_vel.Update(observation[3])
# reward_il.Update(reward)
#
# # print(step_num, observation)
#
# if done:
# if step_num >= 195:
# if last_run_passed:
# num_consecutive_passes = num_consecutive_passes + 1
# else:
# last_run_passed = True
# num_consecutive_passes = 1
# else:
# last_run_passed = False
#
# print('Episode: ', episode_num, "Number of steps: ", step_num, 'Number of consecutive passes: ', num_consecutive_passes)
# episode_num = episode_num + 1
#
# step_num = 0
# gym_env.reset()
# agent.ExecuteCommandLine("init-soar")
print(agent.ExecuteCommandLine("source soar/lunar-lander.soar"))

while True:
gym_env.render()

try:
user_cmd = queue_user_cmds.get(False)
except queue.Empty:
pass
else:
if user_cmd in ("exit", "quit"):
break
elif user_cmd == "pause":
is_paused = True
elif user_cmd == "continue":
is_paused = False
else:
print(agent.ExecuteCommandLine(user_cmd).strip())
is_paused = True
if is_paused:
continue

kernel.RunAllAgents(1)
move_cmd = get_move_command(agent)

if move_cmd is not None:
observation, reward, done, info = gym_env.step(move_cmd)

step_num = step_num + 1
update_input_wmes(observation)

print('{}, {}, {}, {}, {}, {}, {}, {}, {}'.format(step_num, observation[0], observation[1], observation[2], observation[3], observation[4], observation[5], has_contact(observation[6]), has_contact(observation[0])))

if done:
if step_num >= 195:
if last_run_passed:
num_consecutive_passes = num_consecutive_passes + 1
else:
last_run_passed = True
num_consecutive_passes = 1
else:
last_run_passed = False

print('Episode: ', episode_num, "Number of steps: ", step_num, 'Number of consecutive passes: ', num_consecutive_passes)
episode_num = episode_num + 1

step_num = 0
gym_env.reset()
agent.ExecuteCommandLine("init-soar")


gym_env.close()
Expand Down
Loading

0 comments on commit 1b1533f

Please sign in to comment.