Skip to content

Commit

Permalink
style(ai_trainer): use pep8 style guide for tainer
Browse files Browse the repository at this point in the history
  • Loading branch information
Flecart committed Jul 31, 2022
1 parent 69325ad commit 1028285
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 83 deletions.
92 changes: 52 additions & 40 deletions ai_trainer/board.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,34 @@
HUNTER = 1
BEAR = 2


class Board:
"""
Board abstraction
"""
adjacent = [[1,2,3], #0
[0,3,4],
[0,3,6], #2
[0,1,2,5],
[1,7,8], #4
[3,9,10,11],
[2,12,13], #6
[4,8,14],
[7,4,14,9], #8
[8, 10,5,15],
[5,9,11,15],#10
[5,10,15,12],
[11,6,16,13],#12
[6,12,16],
[7,8,18],#14
[9,10,11,17],
[12,13,19], #16
[15,18,19,20],
[14,17,20], #18
adjacent = [[1, 2, 3], # 0
[0, 3, 4],
[0, 3, 6], # 2
[0, 1, 2, 5],
[1, 7, 8], # 4
[3, 9, 10, 11],
[2, 12, 13], # 6
[4, 8, 14],
[7, 4, 14, 9], # 8
[8, 10, 5, 15],
[5, 9, 11, 15], # 10
[5, 10, 15, 12],
[11, 6, 16, 13], # 12
[6, 12, 16],
[7, 8, 18], # 14
[9, 10, 11, 17],
[12, 13, 19], # 16
[15, 18, 19, 20],
[14, 17, 20], # 18
[16, 17, 20],
[18, 17, 19]]

def __init__(self, size: int, default_char = '_'):
def __init__(self, size: int, default_char='_'):
self._default_char = default_char
self._cells = [default_char] * size
self._last_action = None
Expand All @@ -51,7 +52,7 @@ def get_hash(self) -> str:
def get_cells(self) -> list[str]:
""" get cells """
return self._cells

def set_cells(self, cells: list[str]) -> None:
""" set cells """
self._cells = cells
Expand All @@ -64,22 +65,23 @@ def get_default_char(self) -> str:
""" get default character """
return self._default_char


class Game:
"""
Game abstraction
"""
end_states = [ '2111_________________',
'1_21__1______________',
'__1___2_____11_______',
'______1_____12__1____',
'____________11__2__1_',
'________________11_21',
'_________________1112',
'______________1__12_1',
'_______11_____2___1__',
'____1__21_____1______',
'_1__2__11____________',
'12_11________________']
end_states = ['2111_________________',
'1_21__1______________',
'__1___2_____11_______',
'______1_____12__1____',
'____________11__2__1_',
'________________11_21',
'_________________1112',
'______________1__12_1',
'_______11_____2___1__',
'____1__21_____1______',
'_1__2__11____________',
'12_11________________']

def __init__(self, board: Board, player1, player2, max_turns: int = 300):
# pylint: disable=locally-disabled, import-outside-toplevel
Expand All @@ -93,8 +95,6 @@ def __init__(self, board: Board, player1, player2, max_turns: int = 300):
self._max_turns: int = max_turns
self._winner: 0 | 1 | 2 = None



def has_ended(self) -> bool:
"""
Check if the game has ended
Expand All @@ -118,7 +118,7 @@ def play(self) -> int:
"""
Play the game
"""

while True:
self._player_1.move(self._board)
self._player_1.add_state(self._board.get_hash())
Expand Down Expand Up @@ -147,16 +147,28 @@ def train(self, n_times: int = 100) -> None:
self.apply_reward()
self.reset()

self._player_1.save_policy(n_times, self._player_2.get_state_info(n_times))
self._player_2.save_policy(n_times, self._player_1.get_state_info(n_times))
self._player_1.save_policy(
n_times,
self._player_2.get_state_info(n_times)
)
self._player_2.save_policy(
n_times,
self._player_1.get_state_info(n_times)
)
print("Saved policy")

def apply_reward(self) -> None:
"""
Apply reward to the players
"""
bear = self._player_1 if self._player_1.get_symbol() == '2' else self._player_2
hunter = self._player_2 if self._player_1.get_symbol() == '2' else self._player_1
bear = (
self._player_1 if self._player_1.get_symbol() == '2' else
self._player_2
)
hunter = (
self._player_2 if self._player_1.get_symbol() == '2' else
self._player_1
)
if self._winner == HUNTER:
bear.feed_reward(has_won=False)
hunter.feed_reward(has_won=True)
Expand Down
22 changes: 18 additions & 4 deletions ai_trainer/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@
This module handles all the logic to start the game
"""
import argparse
import random
from board import Board, Game
from player import AIPlayer
import random


DEFAULT_NO_PLAYER = 'random'
DEFAULT_HUNTER_POSITION = [0, 1, 2]
DEFAULT_BEAR_POSITION = [20]


def inizialize_board() -> Board:
"""
Initialize the board
"""

board = Board(21)
board[0] = '1'
board[1] = '1'
Expand All @@ -30,6 +34,7 @@ def _parse_arguments():
parser.add_argument('--seed', type=int, default=None)
return parser.parse_args()


if __name__ == '__main__':
args = _parse_arguments()
hunter_player_file = args.hunter_player
Expand All @@ -39,9 +44,18 @@ def _parse_arguments():
if args.seed is not None:
random.seed(args.seed)

hunter_player = AIPlayer(DEFAULT_HUNTER_POSITION, 'hunter', '1', loss_reward=-5, win_reward=100)
bear_player = AIPlayer(DEFAULT_BEAR_POSITION, 'bear', '2', loss_reward=-5, win_reward=1)

hunter_player = AIPlayer(
DEFAULT_HUNTER_POSITION,
'hunter',
'1',
loss_reward=-5, win_reward=100
)
bear_player = AIPlayer(
DEFAULT_BEAR_POSITION,
'bear',
'2',
loss_reward=-5, win_reward=1
)

if hunter_player_file != DEFAULT_NO_PLAYER:
hunter_player.load_policy(hunter_player_file)
Expand Down
45 changes: 29 additions & 16 deletions ai_trainer/dump_pickle.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,41 @@
import argparse
import argparse
import pickle

parser = argparse.ArgumentParser()

parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', help='input file', required=True)
parser.add_argument('-d', '--dump', type=bool, help='dump the state', default=False)
parser.add_argument('--reverse', type=bool, help='reverse the input file', default=False)
parser.add_argument('--state', help='display valueof the state', default = '')
parser.add_argument(
'-d', '--dump',
type=bool,
help='dump the state', default=False
)
parser.add_argument(
'--reverse',
type=bool,
help='reverse the input file',
default=False
)
parser.add_argument('--state', help='display valueof the state', default='')

args = parser.parse_args()
input = args.input
state = args.state
with open(input, 'rb') as f:
input = args.input
state = args.state
with open(input, 'rb') as f:
data: dict[str, float] = pickle.load(f)
# sort my data dict by value


states = data['states_value'].copy()
data['states_value'] = None
data['states_value'] = None

print(data)
print(data)
if state == '' and args.dump:
states = {k: v for k, v in sorted(states.items(), key=lambda item: item[1], reverse = args.reverse)}
for key, value in states.items():
states = {
k: v for k, v in sorted(
states.items(), key=lambda item: item[1],
reverse=args.reverse
)
}
for key, value in states.items():
print(key, value)
else:
else:
print("the value of the state is:")
print(f"{state}: {data[state]}")
print(f"{state}: {data[state]}")
Loading

0 comments on commit 1028285

Please sign in to comment.