Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update pypop7/benchmarks/gymnasium.py and its test functions #379

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 192 additions & 0 deletions pypop7/benchmarks/gymnasium.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,195 @@ def __call__(self, x):
if terminated or truncated:
return fitness # for minimization (rather than maximization)
return fitness # for minimization (rather than maximization)


class Ant(object): # linear neural network
"""Control of Ant via a linear neural network.
"""
def __init__(self):
self.env = gym.make('Ant-v2')
self.observation, _ = self.env.reset()
self.action_dim = np.prod(self.env.action_space.shape) # for action probability space

def __call__(self, x):
"""Control of Ant via a linear neural network.

Parameters
----------
x : ndarray
input vector.

Returns
-------
fitness : float
negative reward (for minimization rather than maximization).
"""
fitness = 0
self.observation, _ = self.env.reset()
for i in range(1000):
action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
self.observation, reward, terminated, truncated, _ = self.env.step(action)
fitness -= reward
if terminated or truncated:
return fitness # for minimization (rather than maximization)
return fitness # for minimization (rather than maximization)


class HalfCheetah(object): # linear neural network
"""Control of HalfCheetah via a linear neural network.
"""
def __init__(self):
self.env = gym.make('HalfCheetah-v2')
self.observation, _ = self.env.reset()
self.action_dim = np.prod(self.env.action_space.shape) # for action probability space

def __call__(self, x):
"""Control of HalfCheetah via a linear neural network.

Parameters
----------
x : ndarray
input vector.

Returns
-------
fitness : float
negative reward (for minimization rather than maximization).
"""
fitness = 0
self.observation, _ = self.env.reset()
for i in range(1000):
action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
self.observation, reward, terminated, truncated, _ = self.env.step(action)
fitness -= reward
if terminated or truncated:
return fitness # for minimization (rather than maximization)
return fitness # for minimization (rather than maximization)


class Hopper(object): # linear neural network
"""Control of Hopper via a linear neural network.
"""
def __init__(self):
self.env = gym.make('Hopper-v2')
self.observation, _ = self.env.reset()
self.action_dim = np.prod(self.env.action_space.shape) # for action probability space

def __call__(self, x):
"""Control of Hopper via a linear neural network.

Parameters
----------
x : ndarray
input vector.

Returns
-------
fitness : float
negative reward (for minimization rather than maximization).
"""
fitness = 0
self.observation, _ = self.env.reset()
for i in range(1000):
action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
self.observation, reward, terminated, truncated, _ = self.env.step(action)
fitness -= reward
if terminated or truncated:
return fitness # for minimization (rather than maximization)
return fitness # for minimization (rather than maximization)


class Humanoid(object): # linear neural network
"""Control of Humanoid via a linear neural network.
"""
def __init__(self):
self.env = gym.make('Humanoid-v2')
self.observation, _ = self.env.reset()
self.action_dim = np.prod(self.env.action_space.shape) # for action probability space

def __call__(self, x):
"""Control of Humanoid via a linear neural network.

Parameters
----------
x : ndarray
input vector.

Returns
-------
fitness : float
negative reward (for minimization rather than maximization).
"""
fitness = 0
self.observation, _ = self.env.reset()
for i in range(1000):
action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
self.observation, reward, terminated, truncated, _ = self.env.step(action)
fitness -= reward
if terminated or truncated:
return fitness # for minimization (rather than maximization)
return fitness # for minimization (rather than maximization)


class Swimmer(object): # linear neural network
"""Control of Swimmer via a linear neural network.
"""
def __init__(self):
self.env = gym.make('Swimmer-v2')
self.observation, _ = self.env.reset()
self.action_dim = np.prod(self.env.action_space.shape) # for action probability space

def __call__(self, x):
"""Control of Swimmer via a linear neural network.

Parameters
----------
x : ndarray
input vector.

Returns
-------
fitness : float
negative reward (for minimization rather than maximization).
"""
fitness = 0
self.observation, _ = self.env.reset()
for i in range(1000):
action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
self.observation, reward, terminated, truncated, _ = self.env.step(action)
fitness -= reward
if terminated or truncated:
return fitness # for minimization (rather than maximization)
return fitness # for minimization (rather than maximization)


class Walker2d(object): # linear neural network
"""Control of Walker2d via a linear neural network.
"""
def __init__(self):
self.env = gym.make('Walker2d-v2')
self.observation, _ = self.env.reset()
self.action_dim = np.prod(self.env.action_space.shape) # for action probability space

def __call__(self, x):
"""Control of Walker2d via a linear neural network.

Parameters
----------
x : ndarray
input vector.

Returns
-------
fitness : float
negative reward (for minimization rather than maximization).
"""
fitness = 0
self.observation, _ = self.env.reset()
for i in range(1000):
action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
self.observation, reward, terminated, truncated, _ = self.env.step(action)
fitness -= reward
if terminated or truncated:
return fitness # for minimization (rather than maximization)
return fitness # for minimization (rather than maximization)
86 changes: 85 additions & 1 deletion pypop7/benchmarks/test_gymnasium.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np

from pypop7.benchmarks.gymnasium import Cartpole
from pypop7.benchmarks.gymnasium import Cartpole, Ant, HalfCheetah, Hopper, Humanoid, Swimmer, Walker2d
from pypop7.optimizers.es.maes import MAES as Controller


Expand All @@ -16,3 +16,87 @@ def testCartpole():
'verbose': 1}
controller = Controller(pro, opt)
print(controller.optimize())


def testAnt():
env = Ant()
pro = {'fitness_function': env,
'ndim_problem': len(env.observation)*env.action_dim,
'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
opt = {'max_function_evaluations': 7,
'seed_rng': 0,
'sigma': 3.0,
'verbose': 1}
controller = Controller(pro, opt)
print(controller.optimize())


def testHalfCheetah():
env = HalfCheetah()
pro = {'fitness_function': env,
'ndim_problem': len(env.observation)*env.action_dim,
'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
opt = {'max_function_evaluations': 7,
'seed_rng': 0,
'sigma': 3.0,
'verbose': 1}
controller = Controller(pro, opt)
print(controller.optimize())


def testHopper():
env = Hopper()
pro = {'fitness_function': env,
'ndim_problem': len(env.observation)*env.action_dim,
'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
opt = {'max_function_evaluations': 7,
'seed_rng': 0,
'sigma': 3.0,
'verbose': 1}
controller = Controller(pro, opt)
print(controller.optimize())

def testHumanoid():
env = Humanoid()
pro = {'fitness_function': env,
'ndim_problem': len(env.observation)*env.action_dim,
'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
opt = {'max_function_evaluations': 7,
'seed_rng': 0,
'sigma': 3.0,
'verbose': 1}
controller = Controller(pro, opt)
print(controller.optimize())


def testSwimmer():
env = Swimmer()
pro = {'fitness_function': env,
'ndim_problem': len(env.observation)*env.action_dim,
'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
opt = {'max_function_evaluations': 7,
'seed_rng': 0,
'sigma': 3.0,
'verbose': 1}
controller = Controller(pro, opt)
print(controller.optimize())


def testWalker2d():
env = Walker2d()
pro = {'fitness_function': env,
'ndim_problem': len(env.observation)*env.action_dim,
'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
opt = {'max_function_evaluations': 7,
'seed_rng': 0,
'sigma': 3.0,
'verbose': 1}
controller = Controller(pro, opt)
print(controller.optimize())

Loading