Evolutionary-Intelligence · GuochenZhou · Jul 4, 2024 · Jul 4, 2024 · Jul 5, 2024
diff --git a/pypop7/benchmarks/gymnasium.py b/pypop7/benchmarks/gymnasium.py
@@ -37,3 +37,195 @@ def __call__(self, x):
             if terminated or truncated:
                 return fitness  # for minimization (rather than maximization)
         return fitness  # for minimization (rather than maximization)
+
+
+class Ant(object):  # linear neural network
+    """Control of Ant via a linear neural network.
+    """
+    def __init__(self):
+        self.env = gym.make('Ant-v2')
+        self.observation, _ = self.env.reset()
+        self.action_dim = np.prod(self.env.action_space.shape)  # for action probability space
+
+    def __call__(self, x):
+        """Control of Ant via a linear neural network.
+
+        Parameters
+        ----------
+        x : ndarray
+            input vector.
+
+        Returns
+        -------
+        fitness : float
+                  negative reward (for minimization rather than maximization).
+        """
+        fitness = 0
+        self.observation, _ = self.env.reset()
+        for i in range(1000):
+            action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
+            self.observation, reward, terminated, truncated, _ = self.env.step(action)
+            fitness -= reward
+            if terminated or truncated:
+                return fitness  # for minimization (rather than maximization)
+        return fitness  # for minimization (rather than maximization)
+
+
+class HalfCheetah(object):  # linear neural network
+    """Control of HalfCheetah via a linear neural network.
+    """
+    def __init__(self):
+        self.env = gym.make('HalfCheetah-v2')
+        self.observation, _ = self.env.reset()
+        self.action_dim = np.prod(self.env.action_space.shape)  # for action probability space
+
+    def __call__(self, x):
+        """Control of HalfCheetah via a linear neural network.
+
+        Parameters
+        ----------
+        x : ndarray
+            input vector.
+
+        Returns
+        -------
+        fitness : float
+                  negative reward (for minimization rather than maximization).
+        """
+        fitness = 0
+        self.observation, _ = self.env.reset()
+        for i in range(1000):
+            action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
+            self.observation, reward, terminated, truncated, _ = self.env.step(action)
+            fitness -= reward
+            if terminated or truncated:
+                return fitness  # for minimization (rather than maximization)
+        return fitness  # for minimization (rather than maximization)
+
+
+class Hopper(object):  # linear neural network
+    """Control of Hopper via a linear neural network.
+    """
+    def __init__(self):
+        self.env = gym.make('Hopper-v2')
+        self.observation, _ = self.env.reset()
+        self.action_dim = np.prod(self.env.action_space.shape)  # for action probability space
+
+    def __call__(self, x):
+        """Control of Hopper via a linear neural network.
+
+        Parameters
+        ----------
+        x : ndarray
+            input vector.
+
+        Returns
+        -------
+        fitness : float
+                  negative reward (for minimization rather than maximization).
+        """
+        fitness = 0
+        self.observation, _ = self.env.reset()
+        for i in range(1000):
+            action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
+            self.observation, reward, terminated, truncated, _ = self.env.step(action)
+            fitness -= reward
+            if terminated or truncated:
+                return fitness  # for minimization (rather than maximization)
+        return fitness  # for minimization (rather than maximization)
+
+
+class Humanoid(object):  # linear neural network
+    """Control of Humanoid via a linear neural network.
+    """
+    def __init__(self):
+        self.env = gym.make('Humanoid-v2')
+        self.observation, _ = self.env.reset()
+        self.action_dim = np.prod(self.env.action_space.shape)  # for action probability space
+
+    def __call__(self, x):
+        """Control of Humanoid via a linear neural network.
+
+        Parameters
+        ----------
+        x : ndarray
+            input vector.
+
+        Returns
+        -------
+        fitness : float
+                  negative reward (for minimization rather than maximization).
+        """
+        fitness = 0
+        self.observation, _ = self.env.reset()
+        for i in range(1000):
+            action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
+            self.observation, reward, terminated, truncated, _ = self.env.step(action)
+            fitness -= reward
+            if terminated or truncated:
+                return fitness  # for minimization (rather than maximization)
+        return fitness  # for minimization (rather than maximization)
+
+
+class Swimmer(object):  # linear neural network
+    """Control of Swimmer via a linear neural network.
+    """
+    def __init__(self):
+        self.env = gym.make('Swimmer-v2')
+        self.observation, _ = self.env.reset()
+        self.action_dim = np.prod(self.env.action_space.shape)  # for action probability space
+
+    def __call__(self, x):
+        """Control of Swimmer via a linear neural network.
+
+        Parameters
+        ----------
+        x : ndarray
+            input vector.
+
+        Returns
+        -------
+        fitness : float
+                  negative reward (for minimization rather than maximization).
+        """
+        fitness = 0
+        self.observation, _ = self.env.reset()
+        for i in range(1000):
+            action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
+            self.observation, reward, terminated, truncated, _ = self.env.step(action)
+            fitness -= reward
+            if terminated or truncated:
+                return fitness  # for minimization (rather than maximization)
+        return fitness  # for minimization (rather than maximization)
+
+
+class Walker2d(object):  # linear neural network
+    """Control of Walker2d via a linear neural network.
+    """
+    def __init__(self):
+        self.env = gym.make('Walker2d-v2')
+        self.observation, _ = self.env.reset()
+        self.action_dim = np.prod(self.env.action_space.shape)  # for action probability space
+
+    def __call__(self, x):
+        """Control of Walker2d via a linear neural network.
+
+        Parameters
+        ----------
+        x : ndarray
+            input vector.
+
+        Returns
+        -------
+        fitness : float
+                  negative reward (for minimization rather than maximization).
+        """
+        fitness = 0
+        self.observation, _ = self.env.reset()
+        for i in range(1000):
+            action = np.matmul(x.reshape(self.action_dim, -1), self.observation[:, np.newaxis])
+            self.observation, reward, terminated, truncated, _ = self.env.step(action)
+            fitness -= reward
+            if terminated or truncated:
+                return fitness  # for minimization (rather than maximization)
+        return fitness  # for minimization (rather than maximization)
diff --git a/pypop7/benchmarks/test_gymnasium.py b/pypop7/benchmarks/test_gymnasium.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from pypop7.benchmarks.gymnasium import Cartpole
+from pypop7.benchmarks.gymnasium import Cartpole, Ant, HalfCheetah, Hopper, Humanoid, Swimmer, Walker2d
 from pypop7.optimizers.es.maes import MAES as Controller
 
 
@@ -16,3 +16,87 @@ def testCartpole():
            'verbose': 1}
     controller = Controller(pro, opt)
     print(controller.optimize())
+
+
+def testAnt():
+    env = Ant()
+    pro = {'fitness_function': env,
+           'ndim_problem': len(env.observation)*env.action_dim,
+           'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
+           'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
+    opt = {'max_function_evaluations': 7,
+           'seed_rng': 0,
+           'sigma': 3.0,
+           'verbose': 1}
+    controller = Controller(pro, opt)
+    print(controller.optimize())
+
+
+def testHalfCheetah():
+    env = HalfCheetah()
+    pro = {'fitness_function': env,
+           'ndim_problem': len(env.observation)*env.action_dim,
+           'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
+           'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
+    opt = {'max_function_evaluations': 7,
+           'seed_rng': 0,
+           'sigma': 3.0,
+           'verbose': 1}
+    controller = Controller(pro, opt)
+    print(controller.optimize())
+
+
+def testHopper():
+    env = Hopper()
+    pro = {'fitness_function': env,
+           'ndim_problem': len(env.observation)*env.action_dim,
+           'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
+           'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
+    opt = {'max_function_evaluations': 7,
+           'seed_rng': 0,
+           'sigma': 3.0,
+           'verbose': 1}
+    controller = Controller(pro, opt)
+    print(controller.optimize())
+
+def testHumanoid():
+    env = Humanoid()
+    pro = {'fitness_function': env,
+           'ndim_problem': len(env.observation)*env.action_dim,
+           'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
+           'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
+    opt = {'max_function_evaluations': 7,
+           'seed_rng': 0,
+           'sigma': 3.0,
+           'verbose': 1}
+    controller = Controller(pro, opt)
+    print(controller.optimize())
+
+
+def testSwimmer():
+    env = Swimmer()
+    pro = {'fitness_function': env,
+           'ndim_problem': len(env.observation)*env.action_dim,
+           'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
+           'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
+    opt = {'max_function_evaluations': 7,
+           'seed_rng': 0,
+           'sigma': 3.0,
+           'verbose': 1}
+    controller = Controller(pro, opt)
+    print(controller.optimize())
+
+
+def testWalker2d():
+    env = Walker2d()
+    pro = {'fitness_function': env,
+           'ndim_problem': len(env.observation)*env.action_dim,
+           'lower_boundary': -10 * np.ones((len(env.observation) * env.action_dim,)),
+           'upper_boundary': 10 * np.ones((len(env.observation) * env.action_dim,))}
+    opt = {'max_function_evaluations': 7,
+           'seed_rng': 0,
+           'sigma': 3.0,
+           'verbose': 1}
+    controller = Controller(pro, opt)
+    print(controller.optimize())
+