diff --git a/code/agent_domain.py b/code/agent_domain.py deleted file mode 100644 index 02a5e32..0000000 --- a/code/agent_domain.py +++ /dev/null @@ -1,146 +0,0 @@ -# WARNING: Deprecated until this is like agent_domain_2 -# from numpy import matmul, array, dot, zeros, linalg, sin -# -# -# -# def doAgentSense(data): -# """ -# Sensor model is -# Where a means (other) agent, p means poi, and the rest are the quadrants -# """ -# number_agents = data['Number of Agents'] -# number_pois = data['Number of POIs'] -# poiValueCol = data['Poi Values'] -# agentPositionCol = data["Agent Positions"] -# poiPositionCol = data["Poi Positions"] -# orientationCol = data["Agent Orientations"] -# minDistanceSqr = data["Minimum Distance"] ** 2 -# observationCol = [None] * number_agents -# -# for agentIndex in range(number_agents): -# -# # recover agent position and orientation -# agentPosition = agentPositionCol[agentIndex] -# agentOrientation = orientationCol[agentIndex] -# c = agentOrientation[0]; s = agentOrientation[1] -# -# # initialize observation to zero -# observation = zeros(8) -# -# # calculate observation values due to other agents -# for otherAgentIndex in range(number_agents): -# -# # agents do not sense self (ergo skip self comparison) -# if agentIndex == otherAgentIndex: -# continue -# -# # Get global separation vector between the two agents -# otherPosition = agentPositionCol[otherAgentIndex] -# globalFrameSeparation = otherPosition - agentPosition -# -# # Translate separation to agent frame using inverse rotation matrix -# agentFrameSeparation = matmul(array([[c, s], [-s, c]]), globalFrameSeparation) -# distanceSqr = dot(agentFrameSeparation, agentFrameSeparation) -# -# # By bounding distance value we implicitly bound sensor values -# if distanceSqr < minDistanceSqr: -# distanceSqr = minDistanceSqr -# -# -# # other is east of agent -# if agentFrameSeparation[0] > 0: -# # other is north-east of agent -# if agentFrameSeparation[1] > 0: -# observation[0] += 1.0 / distanceSqr -# else: # other is south-east of agent -# observation[3] += 1.0 / distanceSqr -# else: # other is west of agent -# # other is north-west of agent -# if agentFrameSeparation[1] > 0: -# observation[1] += 1.0 / distanceSqr -# else: # other is south-west of agent -# observation[2] += 1.0 / distanceSqr -# -# # calculate observation values due to pois -# for poiIndex in range(number_pois): -# -# # Get global separation vector between the two agents -# poiPosition = poiPositionCol[poiIndex] -# globalFrameSeparation = poiPosition - agentPosition -# -# # Translate separation to agent frame using inverse rotation matrix -# agentFrameSeparation = matmul(array([[c, s], [-s, c]]), globalFrameSeparation) -# distanceSqr = dot(agentFrameSeparation, agentFrameSeparation) -# -# # By bounding distance value we implicitly bound sensor values -# if distanceSqr < minDistanceSqr: -# distanceSqr = minDistanceSqr -# -# -# # poi is east of agent -# if agentFrameSeparation[0] > 0: -# # poi is north-east of agent -# if agentFrameSeparation[1] > 0: -# observation[4] += poiValueCol[poiIndex] / distanceSqr -# else: # poi is south-east of agent -# observation[7] += poiValueCol[poiIndex] / distanceSqr -# else: # poi is west of agent -# # poi is north-west of agent -# if agentFrameSeparation[1] > 0: -# observation[5] += poiValueCol[poiIndex] / distanceSqr -# else: # poi is south-west of agent -# observation[6] += poiValueCol[poiIndex] / distanceSqr -# -# observationCol[agentIndex] = observation -# data["Agent Observations"] = observationCol -# -# -# def doAgentProcess(data): -# number_agents = data['Number of Agents'] -# actionCol = [None] * number_agents -# policyCol = data["Agent Policies"] -# observationCol = data["Agent Observations"] -# for agentIndex in range(number_agents): -# actionCol[agentIndex] = policyCol[agentIndex].get_action(observationCol[agentIndex]) -# data["Agent Actions"] = actionCol -# -# -# def doAgentMove(data): -# worldWidth = data["World Width"] -# worldLength = data["World Length"] -# number_agents = data['Number of Agents'] -# actionCol = data["Agent Actions"] -# positionCol = data["Agent Positions"] -# orientationCol = data["Agent Orientations"] -# -# # move all agents -# for agentIndex in range(number_agents): -# # recover agent position and orientation -# agentPosition = positionCol[agentIndex] -# agentOrientation = orientationCol[agentIndex] -# c = orientationCol[agentIndex][0]; s = orientationCol[agentIndex][1] -# -# # turn action into global frame motion -# agentFrameMotion = actionCol[agentIndex] -# globalFrameMotion = matmul(array([[c, -s], [s, c]]), agentFrameMotion) -# -# # globally move and reorient agent -# positionCol[agentIndex] += globalFrameMotion -# if (globalFrameMotion == zeros(2)).all(): -# orientationCol[agentIndex] = array([1.0,0.0]) -# else: -# orientationCol[agentIndex] = globalFrameMotion / linalg.norm(globalFrameMotion) -# -# # # Check if action moves agent within the world bounds -# # if positionCol[agentIndex][0] > worldWidth: -# # positionCol[agentIndex][0] = worldWidth -# # elif positionCol[agentIndex][0] < 0.0: -# # positionCol[agentIndex][0] = 0.0 -# # -# # if positionCol[agentIndex][1] > worldLength: -# # positionCol[agentIndex][1] = worldLength -# # elif positionCol[agentIndex][1] < 0.0: -# # positionCol[agentIndex][1] = 0.0 -# -# data["Agent Positions"] = positionCol -# data["Agent Orientations"] = orientationCol \ No newline at end of file diff --git a/code/agent_domain_2.pyx b/code/agent_domain.pyx similarity index 100% rename from code/agent_domain_2.pyx rename to code/agent_domain.pyx diff --git a/code/ccea.py b/code/ccea.py deleted file mode 100644 index 31104e0..0000000 --- a/code/ccea.py +++ /dev/null @@ -1,125 +0,0 @@ -# WARNING: Deprecated until this updated to be like ccea_2 -# import numpy as np -# import math -# import torch -# import torch.nn as nn -# import torch.nn.functional as F -# import random -# from numba import jit -# from torch.autograd import Variable -# from torch import Tensor -# -# -# -# class RandomPolicy: -# def __init__(self, output_shape, low=-1, high=1): -# self.output_shape = output_shape -# self.low = low -# self.high = high -# -# def get_next(self,state): -# return np.random.uniform(self.low, self.high, self.output_shape) -# -# class Evo_MLP(nn.Module): -# def __init__(self, input_shape, num_outputs, num_units=16): -# super(Evo_MLP, self).__init__() -# -# self.input_shape = input_shape -# self.num_outputs = num_outputs -# self.num_units = num_units -# self.fitness = float("-inf") -# -# self.fc1 = nn.Linear(input_shape, num_units) -# self.fc2 = nn.Linear(num_units, num_outputs) -# -# for param in self.parameters(): -# param.requires_grad = False -# -# -# def get_action(self, state): -# x = Variable(torch.FloatTensor(state)) -# x = self.fc1(x).tanh() -# y = self.fc2(x).tanh() -# return y.numpy() -# -# def init_weights(m): -# #Not implemented -# if type(m) == nn.Linear: -# m.weight.data.normal_(0, 2) -# -# def mutate(self): -# m = 10 -# mr = 0.01 -# random_w1 = np.random.normal(0, m, list(self.fc1.weight.size())) -# random_w1 *= (np.random.uniform(size = list(self.fc1.weight.size())) < mr).astype(float) -# random_w2 = np.random.normal(0, m, list(self.fc2.weight.size())) -# random_w2 *= (np.random.uniform(size = list(self.fc2.weight.size())) < mr).astype(float) -# self.fc1.weight += Tensor(random_w1) -# self.fc2.weight += Tensor(random_w2) -# -# def copy(self): -# newMlp = Evo_MLP(self.input_shape, self.num_outputs, self.num_units) -# newMlp.load_state_dict(self.state_dict()) -# return newMlp -# -# -# -# def initCcea(input_shape, num_outputs, num_units=16): -# def initCceaGo(data): -# number_agents = data['Number of Agents'] -# populationCol = [[Evo_MLP(input_shape,num_outputs,num_units) for i in range(data['Trains per Episode'])] for j in range(number_agents)] -# data['Agent Populations'] = populationCol -# return initCceaGo -# -# def assignCceaPolicies(data): -# number_agents = data['Number of Agents'] -# populationCol = data['Agent Populations'] -# worldIndex = data["World Index"] -# policyCol = [None] * number_agents -# for agentIndex in range(number_agents): -# policyCol[agentIndex] = populationCol[agentIndex][worldIndex] -# data["Agent Policies"] = policyCol -# -# def assignBestCceaPolicies(data): -# number_agents = data['Number of Agents'] -# populationCol = data['Agent Populations'] -# policyCol = [None] * number_agents -# for agentIndex in range(number_agents): -# policyCol[agentIndex] = max(populationCol[agentIndex], key = lambda policy: policy.fitness) -# #policyCol[agentIndex] = populationCol[agentIndex][0] -# data["Agent Policies"] = policyCol -# -# def rewardCceaPolicies(data): -# policyCol = data["Agent Policies"] -# number_agents = data['Number of Agents'] -# rewardCol = data["Agent Rewards"] -# for agentIndex in range(number_agents): -# policyCol[agentIndex].fitness = rewardCol[agentIndex] -# -# def evolveCceaPolicies(data): -# number_agents = data['Number of Agents'] -# populationCol = data['Agent Populations'] -# for agentIndex in range(number_agents): -# population = populationCol[agentIndex] -# newPopulation = [None] * len(population) -# -# # Binary Tournament -# newPolicyIndex = 0 -# for matchIndex in range(len(population)//2): -# if population[2 * matchIndex].fitness > population[2 * matchIndex + 1].fitness: -# newPopulation[matchIndex] = population[2 * matchIndex] -# else: -# newPopulation[matchIndex] = population[2 * matchIndex + 1] -# newPolicyIndex += 1 -# -# # Random fill with mutation -# elite = newPopulation[:newPolicyIndex] -# while newPolicyIndex < len(population): -# newPolicy = random.choice(elite).copy() -# newPolicy.mutate() -# newPopulation[newPolicyIndex] = newPolicy -# newPolicyIndex += 1 -# -# random.shuffle(newPopulation) -# data['Agent Populations'][agentIndex] = newPopulation -# \ No newline at end of file diff --git a/code/ccea_2.pyx b/code/ccea.pyx similarity index 100% rename from code/ccea_2.pyx rename to code/ccea.pyx diff --git a/code/reward.py b/code/reward.py deleted file mode 100644 index 7d9269d..0000000 --- a/code/reward.py +++ /dev/null @@ -1,180 +0,0 @@ -# WARNING: DEPRECATED, use reward_2, this will be updated later -# from numpy import dot -# -# def assignGlobalReward(data): -# agentPositionHistory = data["Agent Position History"] -# number_agents = data['Number of Agents'] -# number_pois = data['Number of POIs'] -# poiValueCol = data['Poi Values'] -# poiPositionCol = data["Poi Positions"] -# minDistanceSqr = data["Minimum Distance"] ** 2 -# historyStepCount = data["Steps"] + 1 -# coupling = data["Coupling"] -# observationRadiusSqr = data["Observation Radius"] ** 2 -# -# globalReward = 0.0 -# -# for poiIndex in range(number_pois): -# poiPosition = poiPositionCol[poiIndex] -# closestObsDistanceSqr = float("inf") -# for stepIndex in range(historyStepCount): -# # Count how many agents observe poi, update closest distance if necessary -# observerCount = 0 -# stepClosestObsDistanceSqr = float("inf") -# for agentIndex in range(number_agents): -# # Calculate separation distance between poi and agent -# agentPosition = agentPositionHistory[stepIndex, agentIndex] -# separation = poiPosition - agentPosition -# distanceSqr = dot(separation, separation) -# -# # Check if agent observes poi, update closest step distance -# if distanceSqr < observationRadiusSqr: -# observerCount += 1 -# if distanceSqr < stepClosestObsDistanceSqr: -# stepClosestObsDistanceSqr = distanceSqr -# -# -# # update closest distance only if poi is observed -# if observerCount >= coupling: -# if stepClosestObsDistanceSqr < closestObsDistanceSqr: -# closestObsDistanceSqr = stepClosestObsDistanceSqr -# -# # add to global reward if poi is observed -# if closestObsDistanceSqr < observationRadiusSqr: -# if closestObsDistanceSqr < minDistanceSqr: -# closestObsDistanceSqr = minDistanceSqr -# globalReward += poiValueCol[poiIndex] / closestObsDistanceSqr -# -# data["Global Reward"] = globalReward -# data["Agent Rewards"] = [globalReward] * number_agents -# -# def assignStepGlobalReward(data): -# agentPositionHistory = data["Agent Position History"] -# number_agents = data['Number of Agents'] -# number_pois = data['Number of POIs'] -# poiValueCol = data['Poi Values'] -# poiPositionCol = data["Poi Positions"] -# minDistanceSqr = data["Minimum Distance"] ** 2 -# historyStepCount = data["Steps"] + 1 -# coupling = data["Coupling"] -# observationRadiusSqr = data["Observation Radius"] ** 2 -# -# globalReward = 0.0 -# -# for poiIndex in range(number_pois): -# poiPosition = poiPositionCol[poiIndex] -# for stepIndex in range(historyStepCount): -# # Count how many agents observe poi, update closest distance if necessary -# observerCount = 0 -# stepClosestObsDistanceSqr = float("inf") -# for agentIndex in range(number_agents): -# # Calculate separation distance between poi and agent -# agentPosition = agentPositionHistory[stepIndex, agentIndex] -# separation = poiPosition - agentPosition -# distanceSqr = dot(separation, separation) -# -# # Check if agent observes poi, update closest step distance -# if distanceSqr < observationRadiusSqr: -# observerCount += 1 -# if distanceSqr < stepClosestObsDistanceSqr: -# stepClosestObsDistanceSqr = distanceSqr -# -# -# # update closest distance only if poi is observed -# if observerCount >= coupling: -# # add to global reward if poi is observed -# if stepClosestObsDistanceSqr < observationRadiusSqr: -# if stepClosestObsDistanceSqr < minDistanceSqr: -# stepClosestObsDistanceSqr = minDistanceSqr -# globalReward += poiValueCol[poiIndex] / stepClosestObsDistanceSqr -# -# data["Global Reward"] = globalReward -# data["Agent Rewards"] = [globalReward] * number_agents -# -# def assignDifferenceReward(data): -# agentPositionHistory = data["Agent Position History"] -# number_agents = data['Number of Agents'] -# number_pois = data['Number of POIs'] -# poiValueCol = data['Poi Values'] -# poiPositionCol = data["Poi Positions"] -# minDistanceSqr = data["Minimum Distance"] ** 2 -# historyStepCount = data["Steps"] + 1 -# coupling = data["Coupling"] -# observationRadiusSqr = data["Observation Radius"] ** 2 -# -# globalReward = 0.0 -# -# for poiIndex in range(number_pois): -# poiPosition = poiPositionCol[poiIndex] -# closestObsDistanceSqr = float("inf") -# for stepIndex in range(historyStepCount): -# # Count how many agents observe poi, update closest distance if necessary -# observerCount = 0 -# stepClosestObsDistanceSqr = float("inf") -# for agentIndex in range(number_agents): -# # Calculate separation distance between poi and agent -# agentPosition = agentPositionHistory[stepIndex, agentIndex] -# separation = poiPosition - agentPosition -# distanceSqr = dot(separation, separation) -# -# # Check if agent observes poi, update closest step distance -# if distanceSqr < observationRadiusSqr: -# observerCount += 1 -# if distanceSqr < stepClosestObsDistanceSqr: -# stepClosestObsDistanceSqr = distanceSqr -# -# -# # update closest distance only if poi is observed -# if observerCount >= coupling: -# if stepClosestObsDistanceSqr < closestObsDistanceSqr: -# closestObsDistanceSqr = stepClosestObsDistanceSqr -# -# # add to global reward if poi is observed -# if closestObsDistanceSqr < observationRadiusSqr: -# if closestObsDistanceSqr < minDistanceSqr: -# closestObsDistanceSqr = minDistanceSqr -# globalReward += poiValueCol[poiIndex] / closestObsDistanceSqr -# -# differenceRewards = [0] * number_agents -# -# for agentIndex in range(number_agents): -# globalWithoutReward = 0 -# for poiIndex in range(number_pois): -# poiPosition = poiPositionCol[poiIndex] -# closestObsDistanceSqr = float("inf") -# for stepIndex in range(historyStepCount): -# # Count how many agents observe poi, update closest distance if necessary -# observerCount = 0 -# stepClosestObsDistanceSqr = float("inf") -# for otherAgentIndex in range(number_agents): -# if agentIndex != otherAgentIndex: -# # Calculate separation distance between poi and agent -# agentPosition = agentPositionHistory[stepIndex, otherAgentIndex] -# separation = poiPosition - agentPosition -# distanceSqr = dot(separation, separation) -# -# # Check if agent observes poi, update closest step distance -# if distanceSqr < observationRadiusSqr: -# observerCount += 1 -# if distanceSqr < stepClosestObsDistanceSqr: -# stepClosestObsDistanceSqr = distanceSqr -# -# -# # update closest distance only if poi is observed -# if observerCount >= coupling: -# if stepClosestObsDistanceSqr < closestObsDistanceSqr: -# closestObsDistanceSqr = stepClosestObsDistanceSqr -# -# # add to global reward if poi is observed -# if closestObsDistanceSqr < observationRadiusSqr: -# if closestObsDistanceSqr < minDistanceSqr: -# closestObsDistanceSqr = minDistanceSqr -# globalWithoutReward += poiValueCol[poiIndex] / closestObsDistanceSqr -# differenceRewards[agentIndex] = globalReward - globalWithoutReward -# -# data["Agent Rewards"] = differenceRewards -# data["Global Reward"] = globalReward -# -# -# -# \ No newline at end of file diff --git a/code/reward_2.pyx b/code/reward.pyx similarity index 100% rename from code/reward_2.pyx rename to code/reward.pyx diff --git a/code/setup.py b/code/setup.py index f72b280..aaa2283 100644 --- a/code/setup.py +++ b/code/setup.py @@ -3,6 +3,6 @@ import numpy setup( - ext_modules = cythonize("agent_domain_2.pyx", annotate=True), + ext_modules = cythonize("agent_domain.pyx", annotate=True), include_dirs=[numpy.get_include()] ) \ No newline at end of file diff --git a/gym_example.py b/gym_example.py index 3b2df68..72c8f2b 100644 --- a/gym_example.py +++ b/gym_example.py @@ -2,11 +2,11 @@ from mods import * import datetime from code.world_setup import * # Rover Domain Construction -from code.agent_domain_2 import * # Rover Domain Dynamic +from code.agent_domain import * # Rover Domain Dynamic from code.trajectory_history import * # Agent Position Trajectory History -from code.reward_2 import * # Agent Reward +from code.reward import * # Agent Reward from code.reward_history import * # Performance Recording -from code.ccea_2 import * # CCEA +from code.ccea import * # CCEA from code.save_to_pickle import * # Save data as pickle file import random diff --git a/mods.py b/mods.py index e92e5f4..9f08ac7 100644 --- a/mods.py +++ b/mods.py @@ -1,5 +1,5 @@ import datetime -from code.reward_2 import * # Agent Reward +from code.reward import * # Agent Reward from code.curriculum import * # Agent Curriculum diff --git a/rover_domain_core_gym.py b/rover_domain_core_gym.py index 8af5890..19b0bbe 100644 --- a/rover_domain_core_gym.py +++ b/rover_domain_core_gym.py @@ -4,8 +4,8 @@ from core import SimulationCore import pyximport; pyximport.install() # For cython(pyx) code from code.world_setup import * # Rover Domain Construction -from code.agent_domain_2 import * # Rover Domain Dynamic -from code.reward_2 import * # Agent Reward and Performance Recording +from code.agent_domain import * # Rover Domain Dynamic +from code.reward import * # Agent Reward and Performance Recording from code.trajectory_history import * # Record trajectory of agents for calculating rewards diff --git a/specifics.py b/specifics.py index 1963fe7..a57abd8 100644 --- a/specifics.py +++ b/specifics.py @@ -2,11 +2,11 @@ from core import SimulationCore import pyximport; pyximport.install() # For cython(pyx) code from code.world_setup import * # Rover Domain Construction -from code.agent_domain_2 import * # Rover Domain Dynamic +from code.agent_domain import * # Rover Domain Dynamic from code.trajectory_history import * # Agent Position Trajectory History -from code.reward_2 import * # Agent Reward +from code.reward import * # Agent Reward from code.reward_history import * # Performance Recording -from code.ccea_2 import * # CCEA +from code.ccea import * # CCEA from code.save_to_pickle import * # Save data as pickle file # from code.experience_replay import *