-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy patheasy21.py
More file actions
86 lines (69 loc) · 2.51 KB
/
easy21.py
File metadata and controls
86 lines (69 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 11 15:21:25 2018
@author: Pan Zhao
"""
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 7 10:12:25 2018
@author: Boran Zhao
The codes at https://github.com/analog-rl/Easy21 were referred to in creating this code
"""
#%%
import random
import matplotlib.pyplot as plt
import copy
from enum import Enum
class Action(Enum):
hit = 0
stick = 1
@staticmethod
def to_action(n):
return Action.hit if n==0 else Action.stick
@staticmethod
def as_int(a):
return 0 if a==Action.hit else 1
class Card:
def __init__(self,force_black = False):
self.value = random.randint(1,10)
if force_black or random.randint(1,3) != 3:
self.is_black = True;
else:
self.is_black = False;
self.value = -self.value;
class State:
def __init__ (self, dealer, player, is_terminal = False):
self.dealer = dealer # the summation of the dealer
self.player = player # the summation of the player
self.is_terminal = is_terminal # whether the state is terminal
class Environment:
def __init__(self):
self.dealer_value_count = 10; # [1:10], note that black card is enforced at the start
self.player_value_count = 21; # [1:21]
self.action_count = 2; # hit and stick
def gen_start_state(self):
s = State(Card(True).value,Card(True).value)
return s
def step(self, state, action):
# new_state = state does not work because modifying new_state will influence state
new_state = copy.copy(state)
reward = 0;
if action == Action.hit:
new_state.player += Card().value;
if new_state.player > 21 or new_state.player <1:
new_state.is_terminal = True;
reward = -1
return new_state, reward
elif action == Action.stick:
while not new_state.is_terminal:
new_state.dealer += Card().value;
if new_state.dealer > 21 or new_state.dealer < 1:
new_state.is_terminal = True;
reward = 1
elif new_state.dealer> 17:
new_state.is_terminal = True;
if new_state.player > new_state.dealer:
reward = 1
elif new_state.player < new_state.dealer:
reward = -1
return new_state, reward