Merge pull request #4 from harvitronix/sonar-with-cat

Sonar with cat
harvitronix · Mar 5, 2016 · 279dbbc · 279dbbc
2 parents 678172b + 42cc630
commit 279dbbc
Show file tree

Hide file tree

Showing 260 changed files with 2,322,040 additions and 8,106,140 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,13 @@
 
 This is a hobby project I created to learn the basics of reinforcement learning. It uses Python3, Pygame, Pymunk, Keras and Theanos. It employes a Q-learning (unsupervised) algorithm to learn how to move an object around a screen (drive itself) without running into obstacles.
 
-Full writeup can be found here:
+The purpose of this project is to eventually use the learnings from the game to operate a real-life remote-control car, using distance sensors.
+
+This version of the code attempts to simulate the use of sensors to get us a step closer to being able to use this in the real world.
+
+**If you're coming here from parts 1 or 2 of the Medium posts, you want to visit the releases section and check out version 1.0.0, as the code has evolved passed that.**
+
+Full writeups that pertain to version 1.0.0 can be found here:
 
 *Part 1:* https://medium.com/@harvitronix/using-reinforcement-learning-in-python-to-teach-a-virtual-car-to-avoid-obstacles-6e782cc7d4c6
 
@@ -27,7 +33,7 @@ First, you need to train a model. This will save weights to the `saved-models` f
 
 `python3 learning.py`
 
-On my MBP with four cores, it takes ~~2-3~~ 8-12 hours to train a model. However, it will spit out weights whenever it has a "best" run, so you can move on to the next step in just 5-10 minutes while it continues to train.
+It can take anywhere from an hour to 36 hours to train a model, depending on the complexity of the network and the size of your sample. However, it will spit out weights every 25,000 frames, so you can move on to the next step in much less time.
 
 ### Playing
 

diff --git a/flat_game/carmunk.py b/flat_game/carmunk.py
@@ -15,14 +15,13 @@
 pygame.init()
 screen = pygame.display.set_mode((width, height))
 clock = pygame.time.Clock()
-running = True
-speed_multiplier = 0.02
 
 # Turn off alpha since we don't use it.
 screen.set_alpha(None)
 
-# Showing sensors slows things down.
+# Showing sensors and redrawing slows things down.
 show_sensors = False
+draw_screen = False
 
 
 class GameState:
@@ -33,21 +32,11 @@ def __init__(self):
         # Physics stuff.
         self.space = pymunk.Space()
         self.space.gravity = pymunk.Vec2d(0., 0.)
-        self.space.add_collision_handler(1, 1, post_solve=self.car_crashed)
-
-        # Create a car at a random corner.
-        corner = random.randint(0, 2)
-        if corner == 0:
-            # Bottom left.
-            self.create_car(100, 100, 0.5)
-        elif corner == 1:
-            # Top left.
-            self.create_car(100, height-100, 5)
-        elif corner == 2:
-            # Top right.
-            self.create_car(width-100, height-100, 3.5)
-
-        # To increase speed.
+
+        # Create the car.
+        self.create_car(100, 100, 0.5)
+
+        # Record steps.
         self.num_steps = 0
 
         # Create walls.
@@ -73,25 +62,34 @@ def __init__(self):
         self.space.add(static)
 
         # Create some obstacles, semi-randomly.
-        if random.random() > 0.5:
-            self.create_obstacle(450, 350, random.randint(75, 125))
-        else:
-            self.create_obstacle(300, 350, random.randint(125, 150))
-        if random.random() > 0.5:
-            self.create_obstacle(750, 200, random.randint(75, 125))
-        else:
-            self.create_obstacle(750, 350, random.randint(50, 100))
-        if random.random() > 0.5:
-            self.create_obstacle(600, 600, random.randint(25, 50))
+        # We'll create three and they'll move around to prevent over-fitting.
+        self.obstacles = []
+        self.obstacles.append(self.create_obstacle(200, 350, 100))
+        self.obstacles.append(self.create_obstacle(700, 200, 125))
+        self.obstacles.append(self.create_obstacle(600, 600, 35))
+
+        # Create a cat.
+        self.create_cat()
 
     def create_obstacle(self, x, y, r):
         c_body = pymunk.Body(pymunk.inf, pymunk.inf)
         c_shape = pymunk.Circle(c_body, r)
-        c_shape.collision_type = 1
         c_shape.elasticity = 1.0
         c_body.position = x, y
         c_shape.color = THECOLORS["blue"]
         self.space.add(c_body, c_shape)
+        return c_body
+
+    def create_cat(self):
+        inertia = pymunk.moment_for_circle(1, 0, 14, (0, 0))
+        self.cat_body = pymunk.Body(1, inertia)
+        self.cat_body.position = 50, height - 100
+        self.cat_shape = pymunk.Circle(self.cat_body, 30)
+        self.cat_shape.color = THECOLORS["orange"]
+        self.cat_shape.elasticity = 1.0
+        self.cat_shape.angle = 0.5
+        direction = Vec2d(1, 0).rotated(self.cat_body.angle)
+        self.space.add(self.cat_body, self.cat_shape)
 
     def create_car(self, x, y, r):
         inertia = pymunk.moment_for_circle(1, 0, 14, (0, 0))
@@ -101,118 +99,161 @@ def create_car(self, x, y, r):
         self.car_shape.color = THECOLORS["green"]
         self.car_shape.elasticity = 1.0
         self.car_body.angle = r
-        self.car_shape.collision_type = 1
         driving_direction = Vec2d(1, 0).rotated(self.car_body.angle)
         self.car_body.apply_impulse(driving_direction)
         self.space.add(self.car_body, self.car_shape)
 
-    def car_crashed(self, space, arbiter):
-        if arbiter.is_first_contact:
-            for contact in arbiter.contacts:
-                self.crashed = True
-
     def frame_step(self, action):
         if action == 0:  # Turn left.
             self.car_body.angle -= .2
         elif action == 1:  # Turn right.
             self.car_body.angle += .2
 
-        driving_direction = Vec2d(1, 0).rotated(self.car_body.angle)
-
-        # Make it get faster over time.
-        # self.car_body.velocity = (100 + self.num_steps * speed_multiplier) \
-        #    * driving_direction
-        self.car_body.velocity = 100 * driving_direction
+        # Move obstacles.
+        if self.num_steps % 100 == 0:
+            self.move_obstacles()
 
-        # Get the current location and the readings there.
-        x, y = self.car_body.position
-        readings = self.get_sensor_readings(x, y, self.car_body.angle)
-        state = np.array([readings])
+        # Move cat.
+        if self.num_steps % 5 == 0:
+            self.move_cat()
 
-        # Breadcrumbs.
-        # if self.num_steps % 10 == 0:
-        # self.drop_crumb(x, y)
+        driving_direction = Vec2d(1, 0).rotated(self.car_body.angle)
+        self.car_body.velocity = 100 * driving_direction
 
         # Update the screen and stuff.
         screen.fill(THECOLORS["black"])
         draw(screen, self.space)
         self.space.step(1./10)
-        #pygame.display.flip()
+        if draw_screen:
+            pygame.display.flip()
         clock.tick()
 
+        # Get the current location and the readings there.
+        x, y = self.car_body.position
+        readings = self.get_sonar_readings(x, y, self.car_body.angle)
+        state = np.array([readings])
+
         # Set the reward.
-        if self.crashed:
+        # Car crashed when any reading == 1
+        if self.car_is_crashed(readings):
+            self.crashed = True
             reward = -500
+            self.recover_from_crash(driving_direction)
         else:
-            reward = 75 - self.sum_readings(readings)
-            # reward = 1
+            # Higher readings are better, so return the sum.
+            reward = -5 + int(self.sum_readings(readings) / 10)
         self.num_steps += 1
 
         return reward, state
 
-    def drop_crumb(self, x, y):
-        crumb_body = pymunk.Body(pymunk.inf, pymunk.inf)
-        crumb_shape = pymunk.Circle(crumb_body, 2)
-        crumb_body.position = x, y
-        crumb_shape.color = THECOLORS["white"]
-        self.space.add(crumb_body, crumb_shape)
-        # screen.set_at((int(x), int(y)), THECOLORS["white"])
+    def move_obstacles(self):
+        # Randomly move obstacles around.
+        for obstacle in self.obstacles:
+            speed = random.randint(1, 5)
+            direction = Vec2d(1, 0).rotated(self.car_body.angle + random.randint(-2, 2))
+            obstacle.velocity = speed * direction
+
+    def move_cat(self):
+        speed = random.randint(20, 200)
+        self.cat_body.angle -= random.randint(-1, 1)
+        direction = Vec2d(1, 0).rotated(self.cat_body.angle)
+        self.cat_body.velocity = speed * direction
+
+    def car_is_crashed(self, readings):
+        if readings[0] == 1 or readings[1] == 1 or readings[2] == 1:
+            return True
+        else:
+            return False
+
+    def recover_from_crash(self, driving_direction):
+        """
+        We hit something, so recover.
+        """
+        while self.crashed:
+            # Go backwards.
+            self.car_body.velocity = -100 * driving_direction
+            self.crashed = False
+            for i in range(10):
+                self.car_body.angle += .2  # Turn a little.
+                screen.fill(THECOLORS["red"])  # Red is scary!
+                draw(screen, self.space)
+                self.space.step(1./10)
+                if draw_screen:
+                    pygame.display.flip()
+                clock.tick()
 
     def sum_readings(self, readings):
         """Sum the number of non-zero readings."""
         tot = 0
         for i in readings:
-            if i > 0:
-                tot += 1  # Reduce wall reading (2) to 1.
+            tot += i
         return tot
 
-    def get_sensor_readings(self, x, y, angle):
-        # Set a default distance.
-        distance = 15
-
-        # Get the points, as if the angle is 0.
-        # We use a list because it retains order.
-        sens_points = []
-
-        # Let's try making it a big grid.
-        for j in ([-8, 8, -7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0]):
-            for i in ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]):
-                if (i == 0 and (j == 0 or j == 1 or j == -1)) or \
-                        (i == 1 and j == 0):
-                    continue  # Skip the dots on top of the car.
-                sens_points.append((x+(distance*j), y+(i*distance)))
+    def get_sonar_readings(self, x, y, angle):
+        readings = []
         """
-        # Use far fewer sensors.
-        for i in ([0, 1, 2, 3]):
-            for j in ([-2, 2, -1, 1, 0]):
-                if i == 0 and j == 0:
-                    continue  # Skip the dot on top of the car.
-                sens_points.append((x+(distance*j), y+(i*distance)))
+        Instead of using a grid of boolean(ish) sensors, sonar readings
+        simply return N "distance" readings, one for each sonar
+        we're simulating. The distance is a count of the first non-zero
+        reading starting at the object. For instance, if the fifth sensor
+        in a sonar "arm" is non-zero, then that arm returns a distance of 5.
         """
+        # Make our arms.
+        arm_left = self.make_sonar_arm(x, y)
+        arm_middle = arm_left
+        arm_right = arm_left
+
+        # Rotate them and get readings.
+        readings.append(self.get_arm_distance(arm_left, x, y, angle, 0.75))
+        readings.append(self.get_arm_distance(arm_middle, x, y, angle, 0))
+        readings.append(self.get_arm_distance(arm_right, x, y, angle, -0.75))
+
+        if show_sensors:
+            pygame.display.update()
+
+        return readings
+
+    def get_arm_distance(self, arm, x, y, angle, offset):
+        # Used to count the distance.
+        i = 0
 
-        # Now rotate those to make it in the front of the car.
-        # And get the observations.
-        sensor_obs = []
-        for point in sens_points:
-            # Get the point location.
-            rotated_p = self.get_rotated_point(x, y, point[0], point[1], angle)
-            # Get the color there.
+        # Look at each point and see if we've hit something.
+        for point in arm:
+            i += 1
+
+            # Move the point to the right spot.
+            rotated_p = self.get_rotated_point(
+                x, y, point[0], point[1], angle + offset
+            )
+
+            # Check if we've hit something. Return the current i (distance)
+            # if we did.
             if rotated_p[0] <= 0 or rotated_p[1] <= 0 \
                     or rotated_p[0] >= width or rotated_p[1] >= height:
-                sensor_obs.append(2)  # Sensor is off the screen.
+                return i  # Sensor is off the screen.
             else:
                 obs = screen.get_at(rotated_p)
-                sensor_obs.append(self.get_track_or_not(obs))
-            # Now that we have the color, draw so we can see.
+                if self.get_track_or_not(obs) != 0:
+                    return i
+
             if show_sensors:
                 pygame.draw.circle(screen, (255, 255, 255), (rotated_p), 2)
-        if show_sensors:
-            pygame.display.update()
 
-        return sensor_obs
+        # Return the distance for the arm.
+        return i
+
+    def make_sonar_arm(self, x, y):
+        spread = 10  # Default spread.
+        distance = 20  # Gap before first sensor.
+        arm_points = []
+        # Make an arm. We build it flat because we'll rotate it about the
+        # center later.
+        for i in range(1, 40):
+            arm_points.append((distance + x + (spread * i), y))
+
+        return arm_points
 
     def get_rotated_point(self, x_1, y_1, x_2, y_2, radians):
-        radians += 1.5  # I have no idea why I have to do this.
         # Rotate x_2, y_2 around x_1, y_1 by angle.
         x_change = (x_2 - x_1) * math.cos(radians) + \
             (y_2 - y_1) * math.sin(radians)
@@ -223,13 +264,12 @@ def get_rotated_point(self, x_1, y_1, x_2, y_2, radians):
         return int(new_x), int(new_y)
 
     def get_track_or_not(self, reading):
-        # Check the colors returned and convert to a 1 or a 0.
-        # Reading[0] is 255 when it's red.
-        # Reading[2] is 255 when it's blue.
-        # Reading[1] is 255 when it's green.
-        if reading == THECOLORS['blue']:
-            return 1  # Sensor is on a ball.
-        elif reading == THECOLORS['red']:
-            return 2  # Sensor is on a wall.
-        else:
+        if reading == THECOLORS['black']:
             return 0
+        else:
+            return 1
+
+if __name__ == "__main__":
+    game_state = GameState()
+    while True:
+        game_state.frame_step((random.randint(0, 2)))