Setting up the basic lunar lander to soar python and soar files

timsaucer · May 1, 2019 · 1b1533f · 1b1533f
1 parent b4656c0
commit 1b1533f
Show file tree

Hide file tree

Showing 6 changed files with 483 additions and 107 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 .project
 .pydevproject
+**/ngs-temp.txt
+
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "soar/new-goal-system-4"]
+	path = soar/new-goal-system-4
+	url = https://github.com/soartech/new-goal-system-4.git
diff --git a/SoarLunarLander.py b/SoarLunarLander.py
@@ -79,48 +79,6 @@ def parse_output_commands(agent, structure):
 def register_print_callback(kernel, agent, function, user_data=None):
     agent.RegisterForPrintEvent(sml.smlEVENT_PRINT, function, user_data)
 
-def register_output_callback(kernel, agent, function, user_data=None):
-    agent.RegisterForRunEvent(sml.smlEVENT_AFTER_OUTPUT_PHASE, function, user_data)
-
-def register_output_change_callback(kernel, agent, function, user_data=None):
-    kernel.RegisterForUpdateEvent(sml.smlEVENT_AFTER_ALL_GENERATED_OUTPUT, function, user_data)
-
-# callback functions
-
-def callback_output_cycle(event_id, user_data, kernel, flags):
-    (gym_env, agent, cart_pos, cart_vel, pole_pos, pole_vel, reward_il) = user_data
-
-    output_command_list = {
-        'move-cart': ['direction']
-    }
-
-    if agent.Commands():
-        (commands, mapping) = parse_output_commands(agent, output_command_list)
-        gym_env.render()
-
-        move_cart_cmd = commands['move-cart']
-        direction = move_cart_cmd['direction']
-
-        if direction == 'left':
-            action = 0
-        else:
-            action = 1
-
-        observation, reward, done, info = gym_env.step(action)
-        cart_pos.Update(observation[0])
-        cart_vel.Update(observation[1])
-        pole_pos.Update(observation[2])
-        pole_vel.Update(observation[3])
-        reward_il.Update(reward)
-
-        mapping['move-cart'].CreateStringWME('status', 'complete')
-
-#        print(observation)
-        if done:
-            gym_env.reset()
-            print(agent.ExecuteCommandLine("interrupt"))
-            print(agent.ExecuteCommandLine("init-soar"))
-
 def get_move_command(agent):
     output_command_list = { 'move-cart': ['direction'] }
 
@@ -142,7 +100,34 @@ def get_move_command(agent):
 def callback_print_message(mid, user_data, agent, message):
     print(message.strip())
 
-# soar code management
+def create_input_wmes(agent):
+    gym_id = agent.GetInputLink().CreateIdWME('gym')
+    x_pos = gym_id.CreateFloatWME('x-position', 0.)
+    y_pos = gym_id.CreateFloatWME('y-position', 0.)
+    x_vel = gym_id.CreateFloatWME('x-velocity', 0.)
+    y_vel = gym_id.CreateFloatWME('y-velocity', 0.)
+    ang_pos = gym_id.CreateFloatWME('orientation-angle', 0.)
+    ang_vel = gym_id.CreateFloatWME('orientation-angular-velocity', 0.)
+    left_lander = gym_id.CreateStringWME('left-pad-contact', '*NGS_NO*')
+    right_lander = gym_id.CreateStringWME('right-pad-contact', '*NGS_NO*')
+
+    return (x_pos, y_pos, x_vel, y_vel, ang_pos, ang_vel, left_lander, right_lander)
+
+def has_contact(pad_value):
+    return pad_value > 0.5
+
+def update_input_wmes(observation):
+    global input_wmes
+    (x_pos, y_pos, x_vel, y_vel, ang_pos, ang_vel, left_lander, right_lander) = input_wmes
+
+    x_pos.Update(float(observation[0]))
+    y_pos.Update(float(observation[1]))
+    x_vel.Update(float(observation[2]))
+    y_vel.Update(float(observation[3]))
+    ang_pos.Update(float(observation[4]))
+    ang_vel.Update(float(observation[5]))
+    left_lander.Update('*YES*' if has_contact(observation[6]) else '*NO*')
+    right_lander.Update('*YES*' if has_contact(observation[7]) else '*NO*')
 
 if __name__ == "__main__":
     # Create the user input thread and queue for return commands
@@ -155,75 +140,74 @@ def callback_print_message(mid, user_data, agent, message):
     agent = create_agent(kernel, "agent")
     register_print_callback(kernel, agent, callback_print_message, None)
 
+    # Cannot just execute this in the source file because the library doesn't load fast enough.
+    # We might even need to put in a delay or verify we got a response that it was loaded.
+
+    print(agent.ExecuteCommandLine("rl --set learning on"))
+    print(agent.ExecuteCommandLine("indifferent-selection --epsilon-greedy"))
+
+    print(agent.ExecuteCommandLine("soar tcl on"))
+
+    input_wmes = create_input_wmes(agent)
+
     # Create the gym environment
     gym_env = gym.make('LunarLander-v2')
     observation = gym_env.reset()
+    update_input_wmes(observation)
 
-    print(observation)
+    step_num = 0
+    print('Step, x-pos, y-pos, x-vel, y-vel, ang, ang-vel, left-pad, right-pad')
+    print('{}, {}, {}, {}, {}, {}, {}, {}, {}'.format(step_num, observation[0], observation[1], observation[2], observation[3], observation[4], observation[5], has_contact(observation[6]), has_contact(observation[0])))
 
-#     gym_id = agent.GetInputLink().CreateIdWME('gym')
-#     cart_pos = gym_id.CreateFloatWME('cart-position', observation[0])
-#     cart_vel = gym_id.CreateFloatWME('cart-velocity', observation[1])
-#     pole_pos = gym_id.CreateFloatWME('pole-angle', observation[2])
-#     pole_vel = gym_id.CreateFloatWME('pole-tip-velocity', observation[3])
-#     reward_il = gym_id.CreateFloatWME('current-reward', 0.)
-#     
-#     step_num = 0
-#     
-#     print(agent.ExecuteCommandLine("source soar/load.soar"))
-#     
-#     while True:
-#         gym_env.render()
-#         
-#         try:
-#             user_cmd = queue_user_cmds.get(False)
-#         except queue.Empty:
-#             pass
-#         else:
-#             if user_cmd in ("exit", "quit"):
-#                 break
-#             elif user_cmd == "pause":
-#                 is_paused = True
-#             elif user_cmd == "continue":
-#                 is_paused = False
-#             else:
-#                 print(agent.ExecuteCommandLine(user_cmd).strip())
-#         
-#         if is_paused:
-#             continue
-#         
-#         kernel.RunAllAgents(1)
-#         move_cmd = get_move_command(agent)
-#         
-#         if move_cmd is not None:
-#             observation, reward, done, info = gym_env.step(move_cmd)
-#             
-#             step_num = step_num + 1
-#             
-#             cart_pos.Update(observation[0])
-#             cart_vel.Update(observation[1])
-#             pole_pos.Update(observation[2])
-#             pole_vel.Update(observation[3])
-#             reward_il.Update(reward)
-#             
-# #            print(step_num, observation)
-#             
-#             if done:
-#                 if step_num >= 195:
-#                     if last_run_passed:
-#                         num_consecutive_passes = num_consecutive_passes + 1
-#                     else:
-#                         last_run_passed = True
-#                         num_consecutive_passes = 1
-#                 else:
-#                     last_run_passed = False
-#                     
-#                 print('Episode: ', episode_num, "Number of steps: ", step_num, 'Number of consecutive passes: ', num_consecutive_passes)
-#                 episode_num = episode_num + 1
-#                 
-#                 step_num = 0
-#                 gym_env.reset()
-#                 agent.ExecuteCommandLine("init-soar")
+    print(agent.ExecuteCommandLine("source soar/lunar-lander.soar"))
+
+    while True:
+        gym_env.render()
+
+        try:
+            user_cmd = queue_user_cmds.get(False)
+        except queue.Empty:
+            pass
+        else:
+            if user_cmd in ("exit", "quit"):
+                break
+            elif user_cmd == "pause":
+                is_paused = True
+            elif user_cmd == "continue":
+                is_paused = False
+            else:
+                print(agent.ExecuteCommandLine(user_cmd).strip())
+        is_paused = True
+        if is_paused:
+            continue
+
+        kernel.RunAllAgents(1)
+        move_cmd = get_move_command(agent)
+
+        if move_cmd is not None:
+            observation, reward, done, info = gym_env.step(move_cmd)
+
+            step_num = step_num + 1
+            update_input_wmes(observation)
+
+            print('{}, {}, {}, {}, {}, {}, {}, {}, {}'.format(step_num, observation[0], observation[1], observation[2], observation[3], observation[4], observation[5], has_contact(observation[6]), has_contact(observation[0])))
+
+            if done:
+                if step_num >= 195:
+                    if last_run_passed:
+                        num_consecutive_passes = num_consecutive_passes + 1
+                    else:
+                        last_run_passed = True
+                        num_consecutive_passes = 1
+                else:
+                    last_run_passed = False
+
+                print('Episode: ', episode_num, "Number of steps: ", step_num, 'Number of consecutive passes: ', num_consecutive_passes)
+                episode_num = episode_num + 1
+
+                step_num = 0
+                gym_env.reset()
+                agent.ExecuteCommandLine("init-soar")
 
 
     gym_env.close()