Skip to content

Commit

Permalink
Merge pull request #4 from harvitronix/sonar-with-cat
Browse files Browse the repository at this point in the history
Sonar with cat
  • Loading branch information
harvitronix committed Mar 5, 2016
2 parents 678172b + 42cc630 commit 279dbbc
Show file tree
Hide file tree
Showing 260 changed files with 2,322,040 additions and 8,106,140 deletions.
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@

This is a hobby project I created to learn the basics of reinforcement learning. It uses Python3, Pygame, Pymunk, Keras and Theanos. It employes a Q-learning (unsupervised) algorithm to learn how to move an object around a screen (drive itself) without running into obstacles.

Full writeup can be found here:
The purpose of this project is to eventually use the learnings from the game to operate a real-life remote-control car, using distance sensors.

This version of the code attempts to simulate the use of sensors to get us a step closer to being able to use this in the real world.

**If you're coming here from parts 1 or 2 of the Medium posts, you want to visit the releases section and check out version 1.0.0, as the code has evolved passed that.**

Full writeups that pertain to version 1.0.0 can be found here:

*Part 1:* https://medium.com/@harvitronix/using-reinforcement-learning-in-python-to-teach-a-virtual-car-to-avoid-obstacles-6e782cc7d4c6

Expand All @@ -27,7 +33,7 @@ First, you need to train a model. This will save weights to the `saved-models` f

`python3 learning.py`

On my MBP with four cores, it takes ~~2-3~~ 8-12 hours to train a model. However, it will spit out weights whenever it has a "best" run, so you can move on to the next step in just 5-10 minutes while it continues to train.
It can take anywhere from an hour to 36 hours to train a model, depending on the complexity of the network and the size of your sample. However, it will spit out weights every 25,000 frames, so you can move on to the next step in much less time.

### Playing

Expand Down
250 changes: 145 additions & 105 deletions flat_game/carmunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@
pygame.init()
screen = pygame.display.set_mode((width, height))
clock = pygame.time.Clock()
running = True
speed_multiplier = 0.02

# Turn off alpha since we don't use it.
screen.set_alpha(None)

# Showing sensors slows things down.
# Showing sensors and redrawing slows things down.
show_sensors = False
draw_screen = False


class GameState:
Expand All @@ -33,21 +32,11 @@ def __init__(self):
# Physics stuff.
self.space = pymunk.Space()
self.space.gravity = pymunk.Vec2d(0., 0.)
self.space.add_collision_handler(1, 1, post_solve=self.car_crashed)

# Create a car at a random corner.
corner = random.randint(0, 2)
if corner == 0:
# Bottom left.
self.create_car(100, 100, 0.5)
elif corner == 1:
# Top left.
self.create_car(100, height-100, 5)
elif corner == 2:
# Top right.
self.create_car(width-100, height-100, 3.5)

# To increase speed.

# Create the car.
self.create_car(100, 100, 0.5)

# Record steps.
self.num_steps = 0

# Create walls.
Expand All @@ -73,25 +62,34 @@ def __init__(self):
self.space.add(static)

# Create some obstacles, semi-randomly.
if random.random() > 0.5:
self.create_obstacle(450, 350, random.randint(75, 125))
else:
self.create_obstacle(300, 350, random.randint(125, 150))
if random.random() > 0.5:
self.create_obstacle(750, 200, random.randint(75, 125))
else:
self.create_obstacle(750, 350, random.randint(50, 100))
if random.random() > 0.5:
self.create_obstacle(600, 600, random.randint(25, 50))
# We'll create three and they'll move around to prevent over-fitting.
self.obstacles = []
self.obstacles.append(self.create_obstacle(200, 350, 100))
self.obstacles.append(self.create_obstacle(700, 200, 125))
self.obstacles.append(self.create_obstacle(600, 600, 35))

# Create a cat.
self.create_cat()

def create_obstacle(self, x, y, r):
c_body = pymunk.Body(pymunk.inf, pymunk.inf)
c_shape = pymunk.Circle(c_body, r)
c_shape.collision_type = 1
c_shape.elasticity = 1.0
c_body.position = x, y
c_shape.color = THECOLORS["blue"]
self.space.add(c_body, c_shape)
return c_body

def create_cat(self):
inertia = pymunk.moment_for_circle(1, 0, 14, (0, 0))
self.cat_body = pymunk.Body(1, inertia)
self.cat_body.position = 50, height - 100
self.cat_shape = pymunk.Circle(self.cat_body, 30)
self.cat_shape.color = THECOLORS["orange"]
self.cat_shape.elasticity = 1.0
self.cat_shape.angle = 0.5
direction = Vec2d(1, 0).rotated(self.cat_body.angle)
self.space.add(self.cat_body, self.cat_shape)

def create_car(self, x, y, r):
inertia = pymunk.moment_for_circle(1, 0, 14, (0, 0))
Expand All @@ -101,118 +99,161 @@ def create_car(self, x, y, r):
self.car_shape.color = THECOLORS["green"]
self.car_shape.elasticity = 1.0
self.car_body.angle = r
self.car_shape.collision_type = 1
driving_direction = Vec2d(1, 0).rotated(self.car_body.angle)
self.car_body.apply_impulse(driving_direction)
self.space.add(self.car_body, self.car_shape)

def car_crashed(self, space, arbiter):
if arbiter.is_first_contact:
for contact in arbiter.contacts:
self.crashed = True

def frame_step(self, action):
if action == 0: # Turn left.
self.car_body.angle -= .2
elif action == 1: # Turn right.
self.car_body.angle += .2

driving_direction = Vec2d(1, 0).rotated(self.car_body.angle)

# Make it get faster over time.
# self.car_body.velocity = (100 + self.num_steps * speed_multiplier) \
# * driving_direction
self.car_body.velocity = 100 * driving_direction
# Move obstacles.
if self.num_steps % 100 == 0:
self.move_obstacles()

# Get the current location and the readings there.
x, y = self.car_body.position
readings = self.get_sensor_readings(x, y, self.car_body.angle)
state = np.array([readings])
# Move cat.
if self.num_steps % 5 == 0:
self.move_cat()

# Breadcrumbs.
# if self.num_steps % 10 == 0:
# self.drop_crumb(x, y)
driving_direction = Vec2d(1, 0).rotated(self.car_body.angle)
self.car_body.velocity = 100 * driving_direction

# Update the screen and stuff.
screen.fill(THECOLORS["black"])
draw(screen, self.space)
self.space.step(1./10)
#pygame.display.flip()
if draw_screen:
pygame.display.flip()
clock.tick()

# Get the current location and the readings there.
x, y = self.car_body.position
readings = self.get_sonar_readings(x, y, self.car_body.angle)
state = np.array([readings])

# Set the reward.
if self.crashed:
# Car crashed when any reading == 1
if self.car_is_crashed(readings):
self.crashed = True
reward = -500
self.recover_from_crash(driving_direction)
else:
reward = 75 - self.sum_readings(readings)
# reward = 1
# Higher readings are better, so return the sum.
reward = -5 + int(self.sum_readings(readings) / 10)
self.num_steps += 1

return reward, state

def drop_crumb(self, x, y):
crumb_body = pymunk.Body(pymunk.inf, pymunk.inf)
crumb_shape = pymunk.Circle(crumb_body, 2)
crumb_body.position = x, y
crumb_shape.color = THECOLORS["white"]
self.space.add(crumb_body, crumb_shape)
# screen.set_at((int(x), int(y)), THECOLORS["white"])
def move_obstacles(self):
# Randomly move obstacles around.
for obstacle in self.obstacles:
speed = random.randint(1, 5)
direction = Vec2d(1, 0).rotated(self.car_body.angle + random.randint(-2, 2))
obstacle.velocity = speed * direction

def move_cat(self):
speed = random.randint(20, 200)
self.cat_body.angle -= random.randint(-1, 1)
direction = Vec2d(1, 0).rotated(self.cat_body.angle)
self.cat_body.velocity = speed * direction

def car_is_crashed(self, readings):
if readings[0] == 1 or readings[1] == 1 or readings[2] == 1:
return True
else:
return False

def recover_from_crash(self, driving_direction):
"""
We hit something, so recover.
"""
while self.crashed:
# Go backwards.
self.car_body.velocity = -100 * driving_direction
self.crashed = False
for i in range(10):
self.car_body.angle += .2 # Turn a little.
screen.fill(THECOLORS["red"]) # Red is scary!
draw(screen, self.space)
self.space.step(1./10)
if draw_screen:
pygame.display.flip()
clock.tick()

def sum_readings(self, readings):
"""Sum the number of non-zero readings."""
tot = 0
for i in readings:
if i > 0:
tot += 1 # Reduce wall reading (2) to 1.
tot += i
return tot

def get_sensor_readings(self, x, y, angle):
# Set a default distance.
distance = 15

# Get the points, as if the angle is 0.
# We use a list because it retains order.
sens_points = []

# Let's try making it a big grid.
for j in ([-8, 8, -7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0]):
for i in ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]):
if (i == 0 and (j == 0 or j == 1 or j == -1)) or \
(i == 1 and j == 0):
continue # Skip the dots on top of the car.
sens_points.append((x+(distance*j), y+(i*distance)))
def get_sonar_readings(self, x, y, angle):
readings = []
"""
# Use far fewer sensors.
for i in ([0, 1, 2, 3]):
for j in ([-2, 2, -1, 1, 0]):
if i == 0 and j == 0:
continue # Skip the dot on top of the car.
sens_points.append((x+(distance*j), y+(i*distance)))
Instead of using a grid of boolean(ish) sensors, sonar readings
simply return N "distance" readings, one for each sonar
we're simulating. The distance is a count of the first non-zero
reading starting at the object. For instance, if the fifth sensor
in a sonar "arm" is non-zero, then that arm returns a distance of 5.
"""
# Make our arms.
arm_left = self.make_sonar_arm(x, y)
arm_middle = arm_left
arm_right = arm_left

# Rotate them and get readings.
readings.append(self.get_arm_distance(arm_left, x, y, angle, 0.75))
readings.append(self.get_arm_distance(arm_middle, x, y, angle, 0))
readings.append(self.get_arm_distance(arm_right, x, y, angle, -0.75))

if show_sensors:
pygame.display.update()

return readings

def get_arm_distance(self, arm, x, y, angle, offset):
# Used to count the distance.
i = 0

# Now rotate those to make it in the front of the car.
# And get the observations.
sensor_obs = []
for point in sens_points:
# Get the point location.
rotated_p = self.get_rotated_point(x, y, point[0], point[1], angle)
# Get the color there.
# Look at each point and see if we've hit something.
for point in arm:
i += 1

# Move the point to the right spot.
rotated_p = self.get_rotated_point(
x, y, point[0], point[1], angle + offset
)

# Check if we've hit something. Return the current i (distance)
# if we did.
if rotated_p[0] <= 0 or rotated_p[1] <= 0 \
or rotated_p[0] >= width or rotated_p[1] >= height:
sensor_obs.append(2) # Sensor is off the screen.
return i # Sensor is off the screen.
else:
obs = screen.get_at(rotated_p)
sensor_obs.append(self.get_track_or_not(obs))
# Now that we have the color, draw so we can see.
if self.get_track_or_not(obs) != 0:
return i

if show_sensors:
pygame.draw.circle(screen, (255, 255, 255), (rotated_p), 2)
if show_sensors:
pygame.display.update()

return sensor_obs
# Return the distance for the arm.
return i

def make_sonar_arm(self, x, y):
spread = 10 # Default spread.
distance = 20 # Gap before first sensor.
arm_points = []
# Make an arm. We build it flat because we'll rotate it about the
# center later.
for i in range(1, 40):
arm_points.append((distance + x + (spread * i), y))

return arm_points

def get_rotated_point(self, x_1, y_1, x_2, y_2, radians):
radians += 1.5 # I have no idea why I have to do this.
# Rotate x_2, y_2 around x_1, y_1 by angle.
x_change = (x_2 - x_1) * math.cos(radians) + \
(y_2 - y_1) * math.sin(radians)
Expand All @@ -223,13 +264,12 @@ def get_rotated_point(self, x_1, y_1, x_2, y_2, radians):
return int(new_x), int(new_y)

def get_track_or_not(self, reading):
# Check the colors returned and convert to a 1 or a 0.
# Reading[0] is 255 when it's red.
# Reading[2] is 255 when it's blue.
# Reading[1] is 255 when it's green.
if reading == THECOLORS['blue']:
return 1 # Sensor is on a ball.
elif reading == THECOLORS['red']:
return 2 # Sensor is on a wall.
else:
if reading == THECOLORS['black']:
return 0
else:
return 1

if __name__ == "__main__":
game_state = GameState()
while True:
game_state.frame_step((random.randint(0, 2)))
Loading

0 comments on commit 279dbbc

Please sign in to comment.