Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Pycharm
.idea

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
114 changes: 30 additions & 84 deletions gym_example.py
Original file line number Diff line number Diff line change
@@ -1,88 +1,34 @@
from rover_domain_core_gym import RoverDomainCoreGym
from mods import *
import datetime
from code.world_setup import * # Rover Domain Construction
from code.agent_domain_2 import * # Rover Domain Dynamic
from code.trajectory_history import * # Agent Position Trajectory History
from code.reward_2 import * # Agent Reward
from code.reward_history import * # Performance Recording
from code.ccea_2 import * # CCEA
from code.save_to_pickle import * # Save data as pickle file
import random
"""
An example using the rover domain gym-style interface and the standard, included CCEA learning algorithms.
This is a minimal example, showing the minimal Gym interface.
"""
from rover_domain_core_gym import RoverDomainGym
import code.ccea_2 as ccea
import code.agent_domain_2 as domain

episodeCount = 1000 # Number of learning episodes

stepCount = 5
trainCountXEpisode = 3
testCountXEpisode = 1
episodeCount = 20
sim = RoverDomainGym()

# NOTE: Add the mod functions (variables) to run to modCol here:
modCol = [
globalRewardMod,
differenceRewardMod,
dppRewardMod
]
ccea.initCcea(input_shape=8, num_outputs=2, num_units=32)(sim.data)

i = 0
while True:
print("Run %i"%(i))
random.shuffle(modCol)
for mod in modCol:
sim = RoverDomainCoreGym()
mod(sim)

#Trial Begins
createRewardHistory(sim.data)
initCcea(input_shape= 8, num_outputs=2, num_units = 32)(sim.data)
sim.data["Steps"] = stepCount

for episodeIndex in range(episodeCount):
sim.data["Episode Index"] = episodeIndex

# Training Phase

obs = sim.reset('Train', True)

for worldIndex in range(trainCountXEpisode):
sim.data["World Index"] = worldIndex
obs = sim.reset('Train', False)
assignCceaPolicies(sim.data)

done = False
stepCount = 0
while not done:
doAgentProcess(sim.data)
jointAction = sim.data["Agent Actions"]
obs, reward, done, info = sim.step(jointAction)
stepCount += 1

rewardCceaPolicies(sim.data)


# Testing Phase

obs = sim.reset('Test', True)
assignBestCceaPolicies(sim.data)

for worldIndex in range(testCountXEpisode):
sim.data["World Index"] = worldIndex
obs = sim.reset('Test', False)

done = False
stepCount = 0
while not done:
doAgentProcess(sim.data)
jointAction = sim.data["Agent Actions"]
obs, reward, done, info = sim.step(jointAction)
stepCount += 1

evolveCceaPolicies(sim.data)
updateRewardHistory(sim.data)

# Trial End
saveRewardHistory(sim.data)
saveTrajectoryHistories(sim.data)



i += 1
for episodeIndex in range(episodeCount):
sim.data["Episode Index"] = episodeIndex

obs = sim.reset()
ccea.assignCceaPolicies(sim.data)

done = False
stepCount = 0
while not done:
# Select actions and create the joint action from the simulation data
# Note that this specific function extracts "obs" from the data structure directly, which is why obs is not
# directly used in this example.
domain.doAgentProcess(sim.data)
jointAction = sim.data["Agent Actions"]
obs, reward, done, info = sim.step(jointAction)
stepCount += 1

ccea.rewardCceaPolicies(sim.data)
ccea.evolveCceaPolicies(sim.data)
print(sim.data["Global Reward"])
Loading