diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..70eb1d1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,118 @@ +# Pycharm +.idea + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/gym_example.py b/gym_example.py index 3b2df68..46a2c1a 100644 --- a/gym_example.py +++ b/gym_example.py @@ -1,88 +1,34 @@ -from rover_domain_core_gym import RoverDomainCoreGym -from mods import * -import datetime -from code.world_setup import * # Rover Domain Construction -from code.agent_domain_2 import * # Rover Domain Dynamic -from code.trajectory_history import * # Agent Position Trajectory History -from code.reward_2 import * # Agent Reward -from code.reward_history import * # Performance Recording -from code.ccea_2 import * # CCEA -from code.save_to_pickle import * # Save data as pickle file -import random +""" +An example using the rover domain gym-style interface and the standard, included CCEA learning algorithms. +This is a minimal example, showing the minimal Gym interface. +""" +from rover_domain_core_gym import RoverDomainGym +import code.ccea_2 as ccea +import code.agent_domain_2 as domain +episodeCount = 1000 # Number of learning episodes -stepCount = 5 -trainCountXEpisode = 3 -testCountXEpisode = 1 -episodeCount = 20 +sim = RoverDomainGym() -# NOTE: Add the mod functions (variables) to run to modCol here: -modCol = [ - globalRewardMod, - differenceRewardMod, - dppRewardMod -] +ccea.initCcea(input_shape=8, num_outputs=2, num_units=32)(sim.data) -i = 0 -while True: - print("Run %i"%(i)) - random.shuffle(modCol) - for mod in modCol: - sim = RoverDomainCoreGym() - mod(sim) - - #Trial Begins - createRewardHistory(sim.data) - initCcea(input_shape= 8, num_outputs=2, num_units = 32)(sim.data) - sim.data["Steps"] = stepCount - - for episodeIndex in range(episodeCount): - sim.data["Episode Index"] = episodeIndex - - # Training Phase - - obs = sim.reset('Train', True) - - for worldIndex in range(trainCountXEpisode): - sim.data["World Index"] = worldIndex - obs = sim.reset('Train', False) - assignCceaPolicies(sim.data) - - done = False - stepCount = 0 - while not done: - doAgentProcess(sim.data) - jointAction = sim.data["Agent Actions"] - obs, reward, done, info = sim.step(jointAction) - stepCount += 1 - - rewardCceaPolicies(sim.data) - - - # Testing Phase - - obs = sim.reset('Test', True) - assignBestCceaPolicies(sim.data) - - for worldIndex in range(testCountXEpisode): - sim.data["World Index"] = worldIndex - obs = sim.reset('Test', False) - - done = False - stepCount = 0 - while not done: - doAgentProcess(sim.data) - jointAction = sim.data["Agent Actions"] - obs, reward, done, info = sim.step(jointAction) - stepCount += 1 - - evolveCceaPolicies(sim.data) - updateRewardHistory(sim.data) - - # Trial End - saveRewardHistory(sim.data) - saveTrajectoryHistories(sim.data) - - - - i += 1 \ No newline at end of file +for episodeIndex in range(episodeCount): + sim.data["Episode Index"] = episodeIndex + + obs = sim.reset() + ccea.assignCceaPolicies(sim.data) + + done = False + stepCount = 0 + while not done: + # Select actions and create the joint action from the simulation data + # Note that this specific function extracts "obs" from the data structure directly, which is why obs is not + # directly used in this example. + domain.doAgentProcess(sim.data) + jointAction = sim.data["Agent Actions"] + obs, reward, done, info = sim.step(jointAction) + stepCount += 1 + + ccea.rewardCceaPolicies(sim.data) + ccea.evolveCceaPolicies(sim.data) + print(sim.data["Global Reward"]) diff --git a/rover_domain_core_gym.py b/rover_domain_core_gym.py index 8af5890..125fb38 100644 --- a/rover_domain_core_gym.py +++ b/rover_domain_core_gym.py @@ -1,36 +1,36 @@ -# Dependencies: numpy, cython - -import datetime -from core import SimulationCore -import pyximport; pyximport.install() # For cython(pyx) code -from code.world_setup import * # Rover Domain Construction -from code.agent_domain_2 import * # Rover Domain Dynamic -from code.reward_2 import * # Agent Reward and Performance Recording -from code.trajectory_history import * # Record trajectory of agents for calculating rewards - - """ -Provides Open AI gym wrapper for rover domain selfulation core with some extra - gym-specific functionality. This is the gym equivalent to 'getSim()' in +Provides Open AI gym wrapper for rover domain simulation core with some extra + gym-specific functionality. This is the gym equivalent to 'getSim()' in the specific.py file. - + Get a default rover domain simulation with some default functionality. Users are encouraged to modify this function and save copies of it for each trial to use as a parameter reference. - + Set data["Reward Function"] to define the reward function callback Set data["Evaluation Function"] to define the evaluation function callback -Set data["Observation Function"] to define the observation funciton callback +Set data["Observation Function"] to define the observation function callback -Note: step function returns result of either the reward or evaluation function +Note: step function returns result of either the reward or evaluation function depending mode ("Train" vs "Test" respectively) -RoverDomainCoreGym should be mods +RoverDomainGym should be mods """ -class RoverDomainCoreGym(SimulationCore): + +from core import SimulationCore +import pyximport +import code.world_setup as world_setup # Rover Domain Construction +import code.agent_domain_2 as rover_domain # Rover Domain Dynamic +import code.reward_2 as rewards # Agent Reward and Performance Recording +from code.trajectory_history import * # Record trajectory of agents for calculating rewards + +pyximport.install() # For cython(pyx) code + + +class RoverDomainGym(SimulationCore): def __init__(self): SimulationCore.__init__(self) - + self.data["Number of Agents"] = 30 self.data["Number of POIs"] = 8 self.data["Minimum Distance"] = 1.0 @@ -40,31 +40,31 @@ def __init__(self): self.data["Number of Episodes"] = 5000 self.data["Specifics Name"] = "test" self.data["Mod Name"] = "global" - + self.data["World Index"] = 0 + # Add Rover Domain Construction Functionality # Note: reset() will generate random world based on seed self.data["World Width"] = 50 self.data["World Length"] = 50 - self.data['Poi Static Values'] = np.array([1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0]) + self.data['Poi Static Values'] = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]) self.data['Poi Relative Static Positions'] = np.array([ - [0.0, 0.0], - [0.0, 1.0], - [1.0, 0.0], - [1.0, 1.0], - [1.0, 0.5], - [0.5, 1.0], + [0.0, 0.0], + [0.0, 1.0], + [1.0, 0.0], + [1.0, 1.0], + [1.0, 0.5], + [0.5, 1.0], [0.0, 5.0], [0.5, 0.0] ]) self.data['Agent Initialization Size'] = 0.1 - self.trainBeginFuncCol.append(blueprintStatic) - self.trainBeginFuncCol.append(blueprintAgentInitSize) - self.worldTrainBeginFuncCol.append(initWorld) - self.testBeginFuncCol.append(blueprintStatic) - self.testBeginFuncCol.append(blueprintAgentInitSize) - self.worldTestBeginFuncCol.append(initWorld) - - + self.trainBeginFuncCol.append(world_setup.blueprintStatic) + self.trainBeginFuncCol.append(world_setup.blueprintAgentInitSize) + self.worldTrainBeginFuncCol.append(world_setup.initWorld) + self.testBeginFuncCol.append(world_setup.blueprintStatic) + self.testBeginFuncCol.append(world_setup.blueprintAgentInitSize) + self.worldTestBeginFuncCol.append(world_setup.initWorld) + # Add Rover Domain Dynamic Functionality """ step() parameter [action] (2d numpy array with double precision): @@ -81,12 +81,10 @@ def __init__(self): called automatically by this object, no need to call it in a function collection """ - self.data["Observation Function"] = doAgentSense - self.worldTrainStepFuncCol.append(doAgentMove) - self.worldTestStepFuncCol.append(doAgentMove) + self.data["Observation Function"] = rover_domain.doAgentSense + self.worldTrainStepFuncCol.append(rover_domain.doAgentMove) + self.worldTestStepFuncCol.append(rover_domain.doAgentMove) - - # Add Agent Training Reward and Evaluation Functionality """ Training Mode: @@ -98,16 +96,18 @@ def __init__(self): step() return [reward] (double): Performance defined by data["Evaluation Function"] """ - self.data["Coupling"] = 6 + self.data["Coupling"] = 1 self.data["Observation Radius"] = 4.0 - self.data["Reward Function"] = assignGlobalReward - self.data["Evaluation Function"] = assignGlobalReward - + self.data["Reward Function"] = rewards.assignGlobalReward + self.data["Evaluation Function"] = rewards.assignGlobalReward + self.worldTrainBeginFuncCol.append(createTrajectoryHistories) self.worldTrainStepFuncCol.append(updateTrajectoryHistories) self.worldTestBeginFuncCol.append(createTrajectoryHistories) self.worldTestStepFuncCol.append(updateTrajectoryHistories) - + + # TODO make these be hidden class attributes, no reason to have them be lambdas + # TODO for what should be a fixed-environment scenario self.worldTrainBeginFuncCol.append( lambda data: data.update({"Gym Reward": np.zeros(data['Number of Agents'])}) ) @@ -124,12 +124,12 @@ def __init__(self): lambda data: data["Evaluation Function"](data) ) self.worldTestEndFuncCol.append( - lambda data: data.update({"Gym Reward": data["Global Reward"]}) - ) - + lambda data: data.update({"Gym Reward": data["Global Reward"]}) + ) + # Setup world for first time - self.reset(newMode = "Train", fullyResetting = True) - + self.reset(new_mode="Train", fully_resetting=True) + def step(self, action): """ Proceed 1 time step in world if world is not done @@ -150,10 +150,9 @@ def step(self, action): # Store Action for other functions to use self.data["Agent Actions"] = action - # If not done, do step functionality if self.data["Step Index"] < self.data["Steps"]: - + # Do Step Functionality self.data["Agent Actions"] = action if self.data["Mode"] == "Train": @@ -166,10 +165,10 @@ def step(self, action): raise Exception( 'data["Mode"] should be set to "Train" or "Test"' ) - + # Increment step index for future step() calls self.data["Step Index"] += 1 - + # Check is world is done; if so, do ending functions if self.data["Step Index"] >= self.data["Steps"]: if self.data["Mode"] == "Train": @@ -182,19 +181,18 @@ def step(self, action): raise Exception( 'data["Mode"] should be set to "Train" or "Test"' ) - + # Observe state, store result in self.data self.data["Observation Function"](self.data) - + # Check if simulation is done done = False if self.data["Step Index"] >= self.data["Steps"]: done = True - - return self.data["Agent Observations"], self.data["Gym Reward"], \ - done, self.data - - def reset(self, newMode = None, fullyResetting = False): + + return self.data["Agent Observations"], self.data["Gym Reward"], done, self.data + + def reset(self, new_mode=None, fully_resetting=False): """ Reset the world @@ -203,7 +201,7 @@ def reset(self, newMode = None, fullyResetting = False): training mode. Set to "Test" to enable functions associated with testing mode instead. If None, does not change current simulation mode. - fullyResetting (boolean): If true, do addition functions + fully_resetting (boolean): If true, do addition functions (self.trainBeginFuncCol) when setting up world. Typically used for resetting the world for a different episode and/or different training/testing simulation mode. @@ -214,31 +212,28 @@ def reset(self, newMode = None, fullyResetting = False): """ # Zero step index for future step() calls self.data["Step Index"] = 0 - + # Set mode if not None - if newMode != None: - self.data["Mode"] = newMode - + if new_mode is not None: + self.data["Mode"] = new_mode + # Execute setting functionality if self.data["Mode"] == "Train": - if fullyResetting: + if fully_resetting: for func in self.trainBeginFuncCol: func(self.data) for func in self.worldTrainBeginFuncCol: func(self.data) elif self.data["Mode"] == "Test": - if fullyResetting: + if fully_resetting: for func in self.testBeginFuncCol: func(self.data) for func in self.worldTestBeginFuncCol: func(self.data) else: raise Exception('data["Mode"] should be set to "Train" or "Test"') - + # Observe state, store result in self.data self.data["Observation Function"](self.data) - + return self.data["Agent Observations"] - -def assign(data, key, value): - data[key] = value \ No newline at end of file