diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ad_hoc.py b/ad_hoc.py new file mode 100644 index 0000000..586c7b2 --- /dev/null +++ b/ad_hoc.py @@ -0,0 +1,97 @@ +""" +An example using the rover domain gym-style interface and the standard, included CCEA learning algorithms. +This is a minimal example, showing the minimal Gym interface. +""" +import numpy as np +import sys +import multiprocessing as mp + + + +from rover_domain_core_gym import RoverDomainGym +import code.ccea_2 as ccea +import code.agent_domain_2 as domain +import mods +from teaming.learner import learner +from sys import argv +import pickle +import tensorflow as tf + + +def make_env(progress): + + nagents=1 + + sim = RoverDomainGym(nagents,100) + #mods.recipePoi(sim) + obs=sim.reset() + #print(len(obs[0])) + #for i in range(sim.data["Recipe Size" ]): + # sim.data["Item Held"][0][i]=progress[i] + + #obs=reduce_state(obs) + + sim.data["Coupling"]=1 + sim.data['Number of Agents']=nagents + return sim + +def test1(trial): + env=make_env([1,1,1,1]) + team=[0,0,1,1,2,2,3,3] + team=[0] + with tf.Session() as sess: + + controller = learner(team,sess) + init=tf.global_variables_initializer() + sess.run(init) + + + for i in range(10001): + r=controller.run(env,i,0)# i%100 == -10) + print(i,max(r)) + if i%1000==0 and 0: + controller.test(env) + if i%1000==0: + controller.save("logs/"+str(trial)+"t.pkl") + #print(r) +''' +test1(0) +for i in range(0): + p=mp.Process(target=test1,args=(i,)) + p.start() + #p.join() +''' +env=make_env(None) + +from time import sleep + +s=env.reset() +s=s[:,4:][0] +for i in range(100): + + idx=1 + loc=env.data["Poi Positions"][idx] + ang=env.data["Agent Orientations"][0] + pos=env.data["Agent Positions"][0] + + heading=[loc[0]-pos[0],loc[1]-pos[1]] + + trn=np.arccos( (heading[0]*ang[0]+heading[1]*ang[1])/( np.sqrt(heading[0]**2+heading[1]**2))* np.sqrt(ang[0]**2+ang[1]**2) ) + trn/=4 + spd=1.0 + + a=[spd,trn] + + s,r,_,_=env.step([a]) + s=s[:,4:][0] + print(i,r,trn,spd) + env.render() + sleep(0.033) + + + + + + + + diff --git a/ad_hoc2.py b/ad_hoc2.py new file mode 100644 index 0000000..aa8b248 --- /dev/null +++ b/ad_hoc2.py @@ -0,0 +1,234 @@ +""" +An example using the rover domain gym-style interface and the standard, included CCEA learning algorithms. +This is a minimal example, showing the minimal Gym interface. +""" +import numpy as np +import sys +import multiprocessing as mp + + + +from rover_domain_core_gym import RoverDomainGym +import code.ccea_2 as ccea +import code.agent_domain_2 as domain +import mods +from teaming.learner3 import learner +from sys import argv +import pickle +import tensorflow as tf + +def rand_loc(n): + x,y=np.random.random(2) + pos=[[x,y]] + while len(pos)<6: + X,Y=np.random.random(2) + for x,y in pos: + dist=((X-x)**2.0+(Y-y)**2.0 )**0.5 + if dist<0.2: + X=None + break + if not X is None: + pos.append([X,Y]) + + return np.array(pos) + + +#print(vals) +def make_env(team): + vals =np.array([0.1, 0.1, 0.5,0.3, 0.0, 0.0]) + + pos=np.array([ + [0.0, 0.0], + [1.0, 1.0], + [0.0, 1.0], + [1.0, 0.5], + [0.0, 0.5], + [1.0, 0.0] + ]) + + #pos=rand_loc(6)#np.random.random((6,2)) + #vals=np.random.random(6)/2.0 + print(vals) + nagents=len(team) + + sim = RoverDomainGym(nagents,100,pos,vals) + #mods.recipePoi(sim) + obs=sim.reset() + #print(len(obs[0])) + #for i in range(sim.data["Recipe Size" ]): + # sim.data["Item Held"][0][i]=progress[i] + + #obs=reduce_state(obs) + + sim.data["Coupling"]=2 + sim.data['Number of Agents']=nagents + return sim + + +import time + +def test1(trial,frq): + frq=1 + #print(np.random.get_state())[1] + np.random.seed(int(time.time()*100000)%100000) + team=[0,0,1,1,2,2,3,3] + team=[0,1,2,2,1] + + #team=[0,1,2,2,1,2,0,0] + #team=team+team + env=make_env(team) + with tf.compat.v1.Session() as sess: + + controller = learner(team,sess,env) + init=tf.compat.v1.global_variables_initializer() + sess.run(init) + + + for i in range(10001): + + if i%int(frq)==0: + controller.randomize() + + r=controller.run(env,i,0)# i%100 == -10) + if i%10==0: + print(i,r[-1],controller.team) + if i%50==0 and 1: + controller.test(env) + + if i%50==0: + #controller.save("tests/q"+str(frq)+"-"+str(trial)+".pkl") + #controller.save("logs/"+str(trial)+"r"+str(16)+".pkl") + #controller.save("tests/jj"+str(121)+"-"+str(trial)+".pkl") + controller.save("tests/evo"+str(121)+"-"+str(trial)+".pkl") + #print(r) + #print(r) + + +def test2(trial,f): + #print(np.random.get_state())[1] + np.random.seed(int(time.time()*100000)%100000) + team=[i for i in range(16)] + #team=[0,1,2,3,4] + env=make_env(team) + with tf.compat.v1Session() as sess: + + controller = learner(team,sess) + init=tf.compat.v1.global_variables_initializer() + sess.run(init) + + + for i in range(10001): + r=controller.run(env,i,0)# i%100 == -10) + if i%10==0: + print(i,r[-1],controller.team) + if i%100==0 and 1: + controller.test(env) + if i%1000==0: + controller.save("logs/"+str(trial)+"v16.pkl") + #print(r) + + +def test3(trial,f): + #print(np.random.get_state())[1] + np.random.seed(int(time.time()*100000)%100000) + team=[0,1,2,0,0,0,0,0] + team=team+team + #team=[0,1,2,0,0] + env=make_env(team) + with tf.compat.v1.Session() as sess: + + controller = learner(team,sess) + init=tf.compat.v1.global_variables_initializer() + sess.run(init) + + controller.randomize() + for i in range(10001): + r=controller.run(env,i,0)# i%100 == -10) + if i%10==0: + print(i,r[-1],controller.team) + if i%100==0 and 1: + controller.test(env) + if i%1000==0: + controller.save("logs/"+str(trial)+"r8.pkl") + #print(r) + +def test4(trial,frq): + #print(np.random.get_state())[1] + np.random.seed(int(time.time()*100000)%100000) + team=[0,0,1,1,2,2,3,3] + team=[0,1,2,2,1,1,0,0] + team=team+team + team=np.array([i%int(frq) for i in range(8)]) + env=make_env(team) + with tf.compat.v1.Session() as sess: + + controller = learner(team,sess) + init=tf.compat.v1.global_variables_initializer() + sess.run(init) + + + for i in range(10001): + + if i%1==0: + controller.randomize() + + r=controller.run(env,i,0)# i%100 == -10) + if i%10==0: + print(i,r[-1],controller.team) + if i%100==0 and 1: + controller.test(env) + if i%1000==0: + controller.save("tests/qq"+str(frq)+"-"+str(trial)+".pkl") + + #print(r) +def test5(trial,frq): + #print(np.random.get_state())[1] + np.random.seed(int(time.time()*100000)%100000) + team=[0,0,1,1,2,2,3,3] + team=[0,1,2,2,1,1,0,0] + #team=team+team + #team=np.array([i%int(frq) for i in range(16)]) + env=make_env(team) + with tf.compat.v1.Session() as sess: + + controller = learner(team,sess) + init=tf.compat.v1.global_variables_initializer() + sess.run(init) + controller.put("poi vals",vals) + + for i in range(10001): + + if i%1==0: + controller.randomize() + + r=controller.run(env,i,0)# i%100 == -10) + if i%10==0: + print(i,r[-1],controller.team) + if i%100==0 and 1: + controller.test(env) + if i%1000==0: + controller.save("tests/c"+str(frq)+"-"+str(trial)+".pkl") + +if 0: + + test1(20) +else: + f=sys.argv[1] + print(f) + f=int(f) + for i in range(4): + p=mp.Process(target=test1,args=(i+(8*f),f)) + p.start() + time.sleep(0.01) + #p.join() + +#env=make_env(None) + + + + + + + + + diff --git a/code/agent_domain_2.html b/code/agent_domain_2.html new file mode 100644 index 0000000..4ef583b --- /dev/null +++ b/code/agent_domain_2.html @@ -0,0 +1,1255 @@ + + + + + + Cython: agent_domain_2.pyx + + + +

Generated by Cython 0.29.21

+

+ Yellow lines hint at Python interaction.
+ Click on a line that starts with a "+" to see the C code that Cython generated for it. +

+

Raw output: agent_domain_2.c

+
 001: 
+
+002: import numpy as np
+
  __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+/* … */
+  __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
 003: cimport cython
+
 004: 
+
 005: cdef extern from "math.h":
+
 006:     double sqrt(double m)
+
 007: 
+
 008: @cython.boundscheck(False)  # Deactivate bounds checking
+
 009: @cython.wraparound(False)   # Deactivate negative indexing.
+
+010: cpdef doAgentSense(data):
+
static PyObject *__pyx_pw_4code_14agent_domain_2_1doAgentSense(PyObject *__pyx_self, PyObject *__pyx_v_data); /*proto*/
+static PyObject *__pyx_f_4code_14agent_domain_2_doAgentSense(PyObject *__pyx_v_data, CYTHON_UNUSED int __pyx_skip_dispatch) {
+  int __pyx_v_number_agents;
+  int __pyx_v_number_pois;
+  double __pyx_v_minDistanceSqr;
+  __Pyx_memviewslice __pyx_v_agentPositionCol = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_poiValueCol = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_poiPositionCol = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_orientationCol = { 0, 0, { 0 }, { 0 }, { 0 } };
+  PyObject *__pyx_v_npObservationCol = NULL;
+  __Pyx_memviewslice __pyx_v_observationCol = { 0, 0, { 0 }, { 0 }, { 0 } };
+  int __pyx_v_agentIndex;
+  int __pyx_v_otherAgentIndex;
+  int __pyx_v_poiIndex;
+  double __pyx_v_globalFrameSeparation0;
+  double __pyx_v_globalFrameSeparation1;
+  double __pyx_v_agentFrameSeparation0;
+  double __pyx_v_agentFrameSeparation1;
+  double __pyx_v_distanceSqr;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("doAgentSense", 0);
+/* … */
+  /* function exit code */
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_3);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_5, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_XDECREF(__pyx_t_9);
+  __Pyx_AddTraceback("code.agent_domain_2.doAgentSense", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __PYX_XDEC_MEMVIEW(&__pyx_v_agentPositionCol, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_poiValueCol, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_poiPositionCol, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_orientationCol, 1);
+  __Pyx_XDECREF(__pyx_v_npObservationCol);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_observationCol, 1);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_4code_14agent_domain_2_1doAgentSense(PyObject *__pyx_self, PyObject *__pyx_v_data); /*proto*/
+static char __pyx_doc_4code_14agent_domain_2_doAgentSense[] = "\n     Sensor model is <aNE, aNW, aSW, aSE, pNE, pNE, pSW, pSE>\n     Where a means (other) agent, p means poi, and the rest are the quadrants\n    ";
+static PyObject *__pyx_pw_4code_14agent_domain_2_1doAgentSense(PyObject *__pyx_self, PyObject *__pyx_v_data) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("doAgentSense (wrapper)", 0);
+  __pyx_r = __pyx_pf_4code_14agent_domain_2_doAgentSense(__pyx_self, ((PyObject *)__pyx_v_data));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_4code_14agent_domain_2_doAgentSense(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_data) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("doAgentSense", 0);
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __pyx_f_4code_14agent_domain_2_doAgentSense(__pyx_v_data, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("code.agent_domain_2.doAgentSense", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
 011:     """
+
 012:      Sensor model is <aNE, aNW, aSW, aSE, pNE, pNE, pSW, pSE>
+
 013:      Where a means (other) agent, p means poi, and the rest are the quadrants
+
 014:     """
+
+015:     cdef int number_agents = data['Number of Agents']
+
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Number_of_Agents); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 15, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_number_agents = __pyx_t_2;
+
+016:     cdef int number_pois = data['Number of POIs']
+
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Number_of_POIs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 16, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 16, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_number_pois = __pyx_t_2;
+
+017:     cdef double minDistanceSqr = data["Minimum Distance"] ** 2
+
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Minimum_Distance); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 17, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_3 = PyNumber_Power(__pyx_t_1, __pyx_int_2, Py_None); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 17, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_4 = __pyx_PyFloat_AsDouble(__pyx_t_3); if (unlikely((__pyx_t_4 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 17, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_v_minDistanceSqr = __pyx_t_4;
+
+018:     cdef double[:, :] agentPositionCol = data["Agent Positions"]
+
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Agent_Positions); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 18, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_5 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_3, PyBUF_WRITABLE); if (unlikely(!__pyx_t_5.memview)) __PYX_ERR(0, 18, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_v_agentPositionCol = __pyx_t_5;
+  __pyx_t_5.memview = NULL;
+  __pyx_t_5.data = NULL;
+
+019:     cdef double[:] poiValueCol = data['Poi Values']
+
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Poi_Values); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 19, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_ds_double(__pyx_t_3, PyBUF_WRITABLE); if (unlikely(!__pyx_t_6.memview)) __PYX_ERR(0, 19, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_v_poiValueCol = __pyx_t_6;
+  __pyx_t_6.memview = NULL;
+  __pyx_t_6.data = NULL;
+
+020:     cdef double[:, :] poiPositionCol = data["Poi Positions"]
+
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Poi_Positions); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 20, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_5 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_3, PyBUF_WRITABLE); if (unlikely(!__pyx_t_5.memview)) __PYX_ERR(0, 20, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_v_poiPositionCol = __pyx_t_5;
+  __pyx_t_5.memview = NULL;
+  __pyx_t_5.data = NULL;
+
+021:     cdef double[:, :] orientationCol = data["Agent Orientations"]
+
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Agent_Orientations); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 21, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_5 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_3, PyBUF_WRITABLE); if (unlikely(!__pyx_t_5.memview)) __PYX_ERR(0, 21, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_v_orientationCol = __pyx_t_5;
+  __pyx_t_5.memview = NULL;
+  __pyx_t_5.data = NULL;
+
+022:     npObservationCol = np.zeros((number_agents, 8), dtype = np.float64)
+
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_zeros); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_number_agents); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_7);
+  __Pyx_GIVEREF(__pyx_t_3);
+  PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_3);
+  __Pyx_INCREF(__pyx_int_8);
+  __Pyx_GIVEREF(__pyx_int_8);
+  PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_int_8);
+  __pyx_t_3 = 0;
+  __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_GIVEREF(__pyx_t_7);
+  PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_7);
+  __pyx_t_7 = 0;
+  __pyx_t_7 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_7);
+  __Pyx_GetModuleGlobalName(__pyx_t_8, __pyx_n_s_np); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_8);
+  __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_float64); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_9);
+  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+  if (PyDict_SetItem(__pyx_t_7, __pyx_n_s_dtype, __pyx_t_9) < 0) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+  __pyx_t_9 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_3, __pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_9);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+  __pyx_v_npObservationCol = __pyx_t_9;
+  __pyx_t_9 = 0;
+
+023:     cdef double[:, :] observationCol = npObservationCol
+
  __pyx_t_5 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_v_npObservationCol, PyBUF_WRITABLE); if (unlikely(!__pyx_t_5.memview)) __PYX_ERR(0, 23, __pyx_L1_error)
+  __pyx_v_observationCol = __pyx_t_5;
+  __pyx_t_5.memview = NULL;
+  __pyx_t_5.data = NULL;
+
 024: 
+
 025:     cdef int agentIndex, otherAgentIndex, poiIndex, obsIndex
+
 026:     cdef double globalFrameSeparation0, globalFrameSeparation1
+
 027:     cdef double agentFrameSeparation0, agentFrameSeparation1
+
 028: 
+
 029:     cdef double distanceSqr
+
 030: 
+
 031: 
+
+032:     for agentIndex in range(number_agents):
+
  __pyx_t_2 = __pyx_v_number_agents;
+  __pyx_t_10 = __pyx_t_2;
+  for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_10; __pyx_t_11+=1) {
+    __pyx_v_agentIndex = __pyx_t_11;
+
 033: 
+
 034:         # calculate observation values due to other agents
+
+035:         for otherAgentIndex in range(number_agents):
+
    __pyx_t_12 = __pyx_v_number_agents;
+    __pyx_t_13 = __pyx_t_12;
+    for (__pyx_t_14 = 0; __pyx_t_14 < __pyx_t_13; __pyx_t_14+=1) {
+      __pyx_v_otherAgentIndex = __pyx_t_14;
+
 036: 
+
 037:             # agents do not sense self (ergo skip self comparison)
+
+038:             if agentIndex == otherAgentIndex:
+
      __pyx_t_15 = ((__pyx_v_agentIndex == __pyx_v_otherAgentIndex) != 0);
+      if (__pyx_t_15) {
+/* … */
+      }
+
+039:                 continue
+
        goto __pyx_L5_continue;
+
 040: 
+
 041:             # Get global separation vector between the two agents
+
+042:             globalFrameSeparation0 = agentPositionCol[otherAgentIndex,0] - agentPositionCol[agentIndex,0]
+
      __pyx_t_16 = __pyx_v_otherAgentIndex;
+      __pyx_t_17 = 0;
+      __pyx_t_18 = __pyx_v_agentIndex;
+      __pyx_t_19 = 0;
+      __pyx_v_globalFrameSeparation0 = ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_agentPositionCol.data + __pyx_t_16 * __pyx_v_agentPositionCol.strides[0]) ) + __pyx_t_17 * __pyx_v_agentPositionCol.strides[1]) ))) - (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_agentPositionCol.data + __pyx_t_18 * __pyx_v_agentPositionCol.strides[0]) ) + __pyx_t_19 * __pyx_v_agentPositionCol.strides[1]) ))));
+
+043:             globalFrameSeparation1 = agentPositionCol[otherAgentIndex,1] - agentPositionCol[agentIndex,1]
+
      __pyx_t_19 = __pyx_v_otherAgentIndex;
+      __pyx_t_18 = 1;
+      __pyx_t_17 = __pyx_v_agentIndex;
+      __pyx_t_16 = 1;
+      __pyx_v_globalFrameSeparation1 = ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_agentPositionCol.data + __pyx_t_19 * __pyx_v_agentPositionCol.strides[0]) ) + __pyx_t_18 * __pyx_v_agentPositionCol.strides[1]) ))) - (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_agentPositionCol.data + __pyx_t_17 * __pyx_v_agentPositionCol.strides[0]) ) + __pyx_t_16 * __pyx_v_agentPositionCol.strides[1]) ))));
+
 044: 
+
 045:             # Translate separation to agent frame using inverse rotation matrix
+
+046:             agentFrameSeparation0 = orientationCol[agentIndex, 0] * globalFrameSeparation0 + orientationCol[agentIndex, 1] * globalFrameSeparation1
+
      __pyx_t_16 = __pyx_v_agentIndex;
+      __pyx_t_17 = 0;
+      __pyx_t_18 = __pyx_v_agentIndex;
+      __pyx_t_19 = 1;
+      __pyx_v_agentFrameSeparation0 = (((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_16 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_17 * __pyx_v_orientationCol.strides[1]) ))) * __pyx_v_globalFrameSeparation0) + ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_18 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_19 * __pyx_v_orientationCol.strides[1]) ))) * __pyx_v_globalFrameSeparation1));
+
+047:             agentFrameSeparation1 = orientationCol[agentIndex, 0] * globalFrameSeparation1 - orientationCol[agentIndex, 1] * globalFrameSeparation0
+
      __pyx_t_19 = __pyx_v_agentIndex;
+      __pyx_t_18 = 0;
+      __pyx_t_17 = __pyx_v_agentIndex;
+      __pyx_t_16 = 1;
+      __pyx_v_agentFrameSeparation1 = (((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_19 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_18 * __pyx_v_orientationCol.strides[1]) ))) * __pyx_v_globalFrameSeparation1) - ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_17 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_16 * __pyx_v_orientationCol.strides[1]) ))) * __pyx_v_globalFrameSeparation0));
+
+048:             distanceSqr = agentFrameSeparation0 * agentFrameSeparation0 + agentFrameSeparation1 * agentFrameSeparation1
+
      __pyx_v_distanceSqr = ((__pyx_v_agentFrameSeparation0 * __pyx_v_agentFrameSeparation0) + (__pyx_v_agentFrameSeparation1 * __pyx_v_agentFrameSeparation1));
+
 049: 
+
 050:             # By bounding distance value we implicitly bound sensor values
+
+051:             if distanceSqr < minDistanceSqr:
+
      __pyx_t_15 = ((__pyx_v_distanceSqr < __pyx_v_minDistanceSqr) != 0);
+      if (__pyx_t_15) {
+/* … */
+      }
+
+052:                 distanceSqr = minDistanceSqr
+
        __pyx_v_distanceSqr = __pyx_v_minDistanceSqr;
+
 053: 
+
 054: 
+
 055:             # other is east of agent
+
+056:             if agentFrameSeparation0 > 0:
+
      __pyx_t_15 = ((__pyx_v_agentFrameSeparation0 > 0.0) != 0);
+      if (__pyx_t_15) {
+/* … */
+        goto __pyx_L9;
+      }
+
 057:                 # other is north-east of agent
+
+058:                 if agentFrameSeparation1 > 0:
+
        __pyx_t_15 = ((__pyx_v_agentFrameSeparation1 > 0.0) != 0);
+        if (__pyx_t_15) {
+/* … */
+          goto __pyx_L10;
+        }
+
+059:                     observationCol[agentIndex,0] += 1.0 / distanceSqr
+
          if (unlikely(__pyx_v_distanceSqr == 0)) {
+            PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+            __PYX_ERR(0, 59, __pyx_L1_error)
+          }
+          __pyx_t_16 = __pyx_v_agentIndex;
+          __pyx_t_17 = 0;
+          *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_observationCol.data + __pyx_t_16 * __pyx_v_observationCol.strides[0]) ) + __pyx_t_17 * __pyx_v_observationCol.strides[1]) )) += (1.0 / __pyx_v_distanceSqr);
+
 060:                 else: # other is south-east of agent
+
+061:                     observationCol[agentIndex,3] += 1.0  / distanceSqr
+
        /*else*/ {
+          if (unlikely(__pyx_v_distanceSqr == 0)) {
+            PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+            __PYX_ERR(0, 61, __pyx_L1_error)
+          }
+          __pyx_t_17 = __pyx_v_agentIndex;
+          __pyx_t_16 = 3;
+          *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_observationCol.data + __pyx_t_17 * __pyx_v_observationCol.strides[0]) ) + __pyx_t_16 * __pyx_v_observationCol.strides[1]) )) += (1.0 / __pyx_v_distanceSqr);
+        }
+        __pyx_L10:;
+
 062:             else:  # other is west of agent
+
 063:                 # other is north-west of agent
+
+064:                 if agentFrameSeparation1 > 0:
+
      /*else*/ {
+        __pyx_t_15 = ((__pyx_v_agentFrameSeparation1 > 0.0) != 0);
+        if (__pyx_t_15) {
+/* … */
+          goto __pyx_L11;
+        }
+
+065:                     observationCol[agentIndex,1] += 1.0  / distanceSqr
+
          if (unlikely(__pyx_v_distanceSqr == 0)) {
+            PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+            __PYX_ERR(0, 65, __pyx_L1_error)
+          }
+          __pyx_t_16 = __pyx_v_agentIndex;
+          __pyx_t_17 = 1;
+          *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_observationCol.data + __pyx_t_16 * __pyx_v_observationCol.strides[0]) ) + __pyx_t_17 * __pyx_v_observationCol.strides[1]) )) += (1.0 / __pyx_v_distanceSqr);
+
 066:                 else:  # other is south-west of agent
+
+067:                     observationCol[agentIndex,2] += 1.0  / distanceSqr
+
        /*else*/ {
+          if (unlikely(__pyx_v_distanceSqr == 0)) {
+            PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+            __PYX_ERR(0, 67, __pyx_L1_error)
+          }
+          __pyx_t_17 = __pyx_v_agentIndex;
+          __pyx_t_16 = 2;
+          *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_observationCol.data + __pyx_t_17 * __pyx_v_observationCol.strides[0]) ) + __pyx_t_16 * __pyx_v_observationCol.strides[1]) )) += (1.0 / __pyx_v_distanceSqr);
+        }
+        __pyx_L11:;
+      }
+      __pyx_L9:;
+      __pyx_L5_continue:;
+    }
+
 068: 
+
 069: 
+
 070: 
+
 071:         # calculate observation values due to pois
+
+072:         for poiIndex in range(number_pois):
+
    __pyx_t_12 = __pyx_v_number_pois;
+    __pyx_t_13 = __pyx_t_12;
+    for (__pyx_t_14 = 0; __pyx_t_14 < __pyx_t_13; __pyx_t_14+=1) {
+      __pyx_v_poiIndex = __pyx_t_14;
+
 073: 
+
 074:             # Get global separation vector between the two agents
+
+075:             globalFrameSeparation0 = poiPositionCol[poiIndex,0] - agentPositionCol[agentIndex,0]
+
      __pyx_t_16 = __pyx_v_poiIndex;
+      __pyx_t_17 = 0;
+      __pyx_t_18 = __pyx_v_agentIndex;
+      __pyx_t_19 = 0;
+      __pyx_v_globalFrameSeparation0 = ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_poiPositionCol.data + __pyx_t_16 * __pyx_v_poiPositionCol.strides[0]) ) + __pyx_t_17 * __pyx_v_poiPositionCol.strides[1]) ))) - (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_agentPositionCol.data + __pyx_t_18 * __pyx_v_agentPositionCol.strides[0]) ) + __pyx_t_19 * __pyx_v_agentPositionCol.strides[1]) ))));
+
+076:             globalFrameSeparation1 = poiPositionCol[poiIndex,1] - agentPositionCol[agentIndex,1]
+
      __pyx_t_19 = __pyx_v_poiIndex;
+      __pyx_t_18 = 1;
+      __pyx_t_17 = __pyx_v_agentIndex;
+      __pyx_t_16 = 1;
+      __pyx_v_globalFrameSeparation1 = ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_poiPositionCol.data + __pyx_t_19 * __pyx_v_poiPositionCol.strides[0]) ) + __pyx_t_18 * __pyx_v_poiPositionCol.strides[1]) ))) - (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_agentPositionCol.data + __pyx_t_17 * __pyx_v_agentPositionCol.strides[0]) ) + __pyx_t_16 * __pyx_v_agentPositionCol.strides[1]) ))));
+
 077: 
+
 078:             # Translate separation to agent frame unp.sing inverse rotation matrix
+
+079:             agentFrameSeparation0 = orientationCol[agentIndex, 0] * globalFrameSeparation0 + orientationCol[agentIndex, 1] * globalFrameSeparation1
+
      __pyx_t_16 = __pyx_v_agentIndex;
+      __pyx_t_17 = 0;
+      __pyx_t_18 = __pyx_v_agentIndex;
+      __pyx_t_19 = 1;
+      __pyx_v_agentFrameSeparation0 = (((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_16 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_17 * __pyx_v_orientationCol.strides[1]) ))) * __pyx_v_globalFrameSeparation0) + ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_18 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_19 * __pyx_v_orientationCol.strides[1]) ))) * __pyx_v_globalFrameSeparation1));
+
+080:             agentFrameSeparation1 = orientationCol[agentIndex, 0] * globalFrameSeparation1 - orientationCol[agentIndex, 1] * globalFrameSeparation0
+
      __pyx_t_19 = __pyx_v_agentIndex;
+      __pyx_t_18 = 0;
+      __pyx_t_17 = __pyx_v_agentIndex;
+      __pyx_t_16 = 1;
+      __pyx_v_agentFrameSeparation1 = (((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_19 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_18 * __pyx_v_orientationCol.strides[1]) ))) * __pyx_v_globalFrameSeparation1) - ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_17 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_16 * __pyx_v_orientationCol.strides[1]) ))) * __pyx_v_globalFrameSeparation0));
+
+081:             distanceSqr = agentFrameSeparation0 * agentFrameSeparation0 + agentFrameSeparation1 * agentFrameSeparation1
+
      __pyx_v_distanceSqr = ((__pyx_v_agentFrameSeparation0 * __pyx_v_agentFrameSeparation0) + (__pyx_v_agentFrameSeparation1 * __pyx_v_agentFrameSeparation1));
+
 082: 
+
 083:             # By bounding distance value we implicitly bound sensor values
+
+084:             if distanceSqr < minDistanceSqr:
+
      __pyx_t_15 = ((__pyx_v_distanceSqr < __pyx_v_minDistanceSqr) != 0);
+      if (__pyx_t_15) {
+/* … */
+      }
+
+085:                 distanceSqr = minDistanceSqr
+
        __pyx_v_distanceSqr = __pyx_v_minDistanceSqr;
+
 086: 
+
 087:             # poi is east of agent
+
+088:             if agentFrameSeparation0> 0:
+
      __pyx_t_15 = ((__pyx_v_agentFrameSeparation0 > 0.0) != 0);
+      if (__pyx_t_15) {
+/* … */
+        goto __pyx_L15;
+      }
+
 089:                 # poi is north-east of agent
+
+090:                 if agentFrameSeparation1 > 0:
+
        __pyx_t_15 = ((__pyx_v_agentFrameSeparation1 > 0.0) != 0);
+        if (__pyx_t_15) {
+/* … */
+          goto __pyx_L16;
+        }
+
+091:                     observationCol[agentIndex,4] += poiValueCol[poiIndex]  / distanceSqr
+
          __pyx_t_16 = __pyx_v_poiIndex;
+          __pyx_t_4 = (*((double *) ( /* dim=0 */ (__pyx_v_poiValueCol.data + __pyx_t_16 * __pyx_v_poiValueCol.strides[0]) )));
+          if (unlikely(__pyx_v_distanceSqr == 0)) {
+            PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+            __PYX_ERR(0, 91, __pyx_L1_error)
+          }
+          __pyx_t_16 = __pyx_v_agentIndex;
+          __pyx_t_17 = 4;
+          *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_observationCol.data + __pyx_t_16 * __pyx_v_observationCol.strides[0]) ) + __pyx_t_17 * __pyx_v_observationCol.strides[1]) )) += (__pyx_t_4 / __pyx_v_distanceSqr);
+
 092:                 else: # poi is south-east of agent
+
+093:                     observationCol[agentIndex,7] += poiValueCol[poiIndex]  / distanceSqr
+
        /*else*/ {
+          __pyx_t_17 = __pyx_v_poiIndex;
+          __pyx_t_4 = (*((double *) ( /* dim=0 */ (__pyx_v_poiValueCol.data + __pyx_t_17 * __pyx_v_poiValueCol.strides[0]) )));
+          if (unlikely(__pyx_v_distanceSqr == 0)) {
+            PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+            __PYX_ERR(0, 93, __pyx_L1_error)
+          }
+          __pyx_t_17 = __pyx_v_agentIndex;
+          __pyx_t_16 = 7;
+          *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_observationCol.data + __pyx_t_17 * __pyx_v_observationCol.strides[0]) ) + __pyx_t_16 * __pyx_v_observationCol.strides[1]) )) += (__pyx_t_4 / __pyx_v_distanceSqr);
+        }
+        __pyx_L16:;
+
 094:             else:  # poi is west of agent
+
 095:                 # poi is north-west of agent
+
+096:                 if agentFrameSeparation1 > 0:
+
      /*else*/ {
+        __pyx_t_15 = ((__pyx_v_agentFrameSeparation1 > 0.0) != 0);
+        if (__pyx_t_15) {
+/* … */
+          goto __pyx_L17;
+        }
+
+097:                     observationCol[agentIndex,5] += poiValueCol[poiIndex]  / distanceSqr
+
          __pyx_t_16 = __pyx_v_poiIndex;
+          __pyx_t_4 = (*((double *) ( /* dim=0 */ (__pyx_v_poiValueCol.data + __pyx_t_16 * __pyx_v_poiValueCol.strides[0]) )));
+          if (unlikely(__pyx_v_distanceSqr == 0)) {
+            PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+            __PYX_ERR(0, 97, __pyx_L1_error)
+          }
+          __pyx_t_16 = __pyx_v_agentIndex;
+          __pyx_t_17 = 5;
+          *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_observationCol.data + __pyx_t_16 * __pyx_v_observationCol.strides[0]) ) + __pyx_t_17 * __pyx_v_observationCol.strides[1]) )) += (__pyx_t_4 / __pyx_v_distanceSqr);
+
 098:                 else:  # poi is south-west of agent
+
+099:                     observationCol[agentIndex,6] += poiValueCol[poiIndex]  / distanceSqr
+
        /*else*/ {
+          __pyx_t_17 = __pyx_v_poiIndex;
+          __pyx_t_4 = (*((double *) ( /* dim=0 */ (__pyx_v_poiValueCol.data + __pyx_t_17 * __pyx_v_poiValueCol.strides[0]) )));
+          if (unlikely(__pyx_v_distanceSqr == 0)) {
+            PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+            __PYX_ERR(0, 99, __pyx_L1_error)
+          }
+          __pyx_t_17 = __pyx_v_agentIndex;
+          __pyx_t_16 = 6;
+          *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_observationCol.data + __pyx_t_17 * __pyx_v_observationCol.strides[0]) ) + __pyx_t_16 * __pyx_v_observationCol.strides[1]) )) += (__pyx_t_4 / __pyx_v_distanceSqr);
+        }
+        __pyx_L17:;
+      }
+      __pyx_L15:;
+    }
+  }
+
 100: 
+
+101:     data["Agent Observations"] = npObservationCol
+
  if (unlikely(PyObject_SetItem(__pyx_v_data, __pyx_kp_s_Agent_Observations, __pyx_v_npObservationCol) < 0)) __PYX_ERR(0, 101, __pyx_L1_error)
+
 102: 
+
 103: @cython.boundscheck(False)  # Deactivate bounds checking
+
 104: @cython.wraparound(False)   # Deactivate negative indexing.
+
+105: cpdef doAgentProcess(data):
+
static PyObject *__pyx_pw_4code_14agent_domain_2_3doAgentProcess(PyObject *__pyx_self, PyObject *__pyx_v_data); /*proto*/
+static PyObject *__pyx_f_4code_14agent_domain_2_doAgentProcess(PyObject *__pyx_v_data, CYTHON_UNUSED int __pyx_skip_dispatch) {
+  int __pyx_v_number_agents;
+  PyObject *__pyx_v_actionCol = NULL;
+  PyObject *__pyx_v_policyCol = NULL;
+  PyObject *__pyx_v_observationCol = NULL;
+  int __pyx_v_agentIndex;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("doAgentProcess", 0);
+/* … */
+  /* function exit code */
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_AddTraceback("code.agent_domain_2.doAgentProcess", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XDECREF(__pyx_v_actionCol);
+  __Pyx_XDECREF(__pyx_v_policyCol);
+  __Pyx_XDECREF(__pyx_v_observationCol);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_4code_14agent_domain_2_3doAgentProcess(PyObject *__pyx_self, PyObject *__pyx_v_data); /*proto*/
+static PyObject *__pyx_pw_4code_14agent_domain_2_3doAgentProcess(PyObject *__pyx_self, PyObject *__pyx_v_data) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("doAgentProcess (wrapper)", 0);
+  __pyx_r = __pyx_pf_4code_14agent_domain_2_2doAgentProcess(__pyx_self, ((PyObject *)__pyx_v_data));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_4code_14agent_domain_2_2doAgentProcess(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_data) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("doAgentProcess", 0);
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __pyx_f_4code_14agent_domain_2_doAgentProcess(__pyx_v_data, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 105, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("code.agent_domain_2.doAgentProcess", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+106:     cdef int number_agents = data['Number of Agents']
+
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Number_of_Agents); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 106, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 106, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_number_agents = __pyx_t_2;
+
+107:     actionCol = np.zeros((number_agents, 2), dtype = np.float_)
+
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 107, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 107, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_number_agents); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 107, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 107, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1);
+  __Pyx_INCREF(__pyx_int_2);
+  __Pyx_GIVEREF(__pyx_int_2);
+  PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_int_2);
+  __pyx_t_1 = 0;
+  __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 107, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_4);
+  PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_4);
+  __pyx_t_4 = 0;
+  __pyx_t_4 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 107, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_np); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 107, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_float); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 107, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_6);
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_dtype, __pyx_t_6) < 0) __PYX_ERR(0, 107, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+  __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_1, __pyx_t_4); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 107, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_6);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+  __pyx_v_actionCol = __pyx_t_6;
+  __pyx_t_6 = 0;
+
+108:     policyCol = data["Agent Policies"]
+
  __pyx_t_6 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Agent_Policies); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 108, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_6);
+  __pyx_v_policyCol = __pyx_t_6;
+  __pyx_t_6 = 0;
+
+109:     observationCol = data["Agent Observations"]
+
  __pyx_t_6 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Agent_Observations); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 109, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_6);
+  __pyx_v_observationCol = __pyx_t_6;
+  __pyx_t_6 = 0;
+
 110:     cdef int agentIndex
+
+111:     for agentIndex in range(number_agents):
+
  __pyx_t_2 = __pyx_v_number_agents;
+  __pyx_t_7 = __pyx_t_2;
+  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
+    __pyx_v_agentIndex = __pyx_t_8;
+
+112:         actionCol[agentIndex] = policyCol[agentIndex].get_action(observationCol[agentIndex])
+
    __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_policyCol, __pyx_v_agentIndex, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 112, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_get_action); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 112, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_observationCol, __pyx_v_agentIndex, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 112, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_3 = NULL;
+    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_1))) {
+      __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1);
+      if (likely(__pyx_t_3)) {
+        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+        __Pyx_INCREF(__pyx_t_3);
+        __Pyx_INCREF(function);
+        __Pyx_DECREF_SET(__pyx_t_1, function);
+      }
+    }
+    __pyx_t_6 = (__pyx_t_3) ? __Pyx_PyObject_Call2Args(__pyx_t_1, __pyx_t_3, __pyx_t_4) : __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_4);
+    __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 112, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+    if (unlikely(__Pyx_SetItemInt(__pyx_v_actionCol, __pyx_v_agentIndex, __pyx_t_6, int, 1, __Pyx_PyInt_From_int, 0, 0, 0) < 0)) __PYX_ERR(0, 112, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+  }
+
+113:     data["Agent Actions"] = actionCol
+
  if (unlikely(PyObject_SetItem(__pyx_v_data, __pyx_kp_s_Agent_Actions, __pyx_v_actionCol) < 0)) __PYX_ERR(0, 113, __pyx_L1_error)
+
 114: 
+
 115: @cython.boundscheck(False)  # Deactivate bounds checking
+
 116: @cython.wraparound(False)   # Deactivate negative indexing.
+
+117: cpdef doAgentMove(data):
+
static PyObject *__pyx_pw_4code_14agent_domain_2_5doAgentMove(PyObject *__pyx_self, PyObject *__pyx_v_data); /*proto*/
+static PyObject *__pyx_f_4code_14agent_domain_2_doAgentMove(PyObject *__pyx_v_data, CYTHON_UNUSED int __pyx_skip_dispatch) {
+  CYTHON_UNUSED float __pyx_v_worldWidth;
+  CYTHON_UNUSED float __pyx_v_worldLength;
+  int __pyx_v_number_agents;
+  __Pyx_memviewslice __pyx_v_agentPositionCol = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_orientationCol = { 0, 0, { 0 }, { 0 }, { 0 } };
+  PyObject *__pyx_v_npActionCol = NULL;
+  __Pyx_memviewslice __pyx_v_actionCol = { 0, 0, { 0 }, { 0 }, { 0 } };
+  int __pyx_v_agentIndex;
+  double __pyx_v_globalFrameMotion0;
+  double __pyx_v_globalFrameMotion1;
+  double __pyx_v_norm;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("doAgentMove", 0);
+/* … */
+  /* function exit code */
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_4, 1);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("code.agent_domain_2.doAgentMove", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __PYX_XDEC_MEMVIEW(&__pyx_v_agentPositionCol, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_orientationCol, 1);
+  __Pyx_XDECREF(__pyx_v_npActionCol);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_actionCol, 1);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_4code_14agent_domain_2_5doAgentMove(PyObject *__pyx_self, PyObject *__pyx_v_data); /*proto*/
+static PyObject *__pyx_pw_4code_14agent_domain_2_5doAgentMove(PyObject *__pyx_self, PyObject *__pyx_v_data) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("doAgentMove (wrapper)", 0);
+  __pyx_r = __pyx_pf_4code_14agent_domain_2_4doAgentMove(__pyx_self, ((PyObject *)__pyx_v_data));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_4code_14agent_domain_2_4doAgentMove(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_data) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("doAgentMove", 0);
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __pyx_f_4code_14agent_domain_2_doAgentMove(__pyx_v_data, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 117, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("code.agent_domain_2.doAgentMove", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+118:     cdef float worldWidth = data["World Width"]
+
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_World_Width); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 118, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __pyx_PyFloat_AsFloat(__pyx_t_1); if (unlikely((__pyx_t_2 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 118, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_worldWidth = __pyx_t_2;
+
+119:     cdef float worldLength = data["World Length"]
+
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_World_Length); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 119, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __pyx_PyFloat_AsFloat(__pyx_t_1); if (unlikely((__pyx_t_2 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 119, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_worldLength = __pyx_t_2;
+
+120:     cdef int number_agents = data['Number of Agents']
+
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Number_of_Agents); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 120, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_3 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_3 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 120, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_number_agents = __pyx_t_3;
+
+121:     cdef double[:, :] agentPositionCol = data["Agent Positions"]
+
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Agent_Positions); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 121, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_4 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1, PyBUF_WRITABLE); if (unlikely(!__pyx_t_4.memview)) __PYX_ERR(0, 121, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_agentPositionCol = __pyx_t_4;
+  __pyx_t_4.memview = NULL;
+  __pyx_t_4.data = NULL;
+
+122:     cdef double[:, :] orientationCol = data["Agent Orientations"]
+
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Agent_Orientations); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 122, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_4 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1, PyBUF_WRITABLE); if (unlikely(!__pyx_t_4.memview)) __PYX_ERR(0, 122, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_orientationCol = __pyx_t_4;
+  __pyx_t_4.memview = NULL;
+  __pyx_t_4.data = NULL;
+
+123:     npActionCol = np.array(data["Agent Actions"]).astype(np.float_)
+
  __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_np); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 123, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_6);
+  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_array); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 123, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_7);
+  __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+  __pyx_t_6 = __Pyx_PyObject_Dict_GetItem(__pyx_v_data, __pyx_kp_s_Agent_Actions); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 123, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_6);
+  __pyx_t_8 = NULL;
+  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_7))) {
+    __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_7);
+    if (likely(__pyx_t_8)) {
+      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7);
+      __Pyx_INCREF(__pyx_t_8);
+      __Pyx_INCREF(function);
+      __Pyx_DECREF_SET(__pyx_t_7, function);
+    }
+  }
+  __pyx_t_5 = (__pyx_t_8) ? __Pyx_PyObject_Call2Args(__pyx_t_7, __pyx_t_8, __pyx_t_6) : __Pyx_PyObject_CallOneArg(__pyx_t_7, __pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0;
+  __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+  if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 123, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_astype); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 123, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_7);
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_np); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 123, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_float); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 123, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_6);
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  __pyx_t_5 = NULL;
+  if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_7))) {
+    __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_7);
+    if (likely(__pyx_t_5)) {
+      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7);
+      __Pyx_INCREF(__pyx_t_5);
+      __Pyx_INCREF(function);
+      __Pyx_DECREF_SET(__pyx_t_7, function);
+    }
+  }
+  __pyx_t_1 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_7, __pyx_t_5, __pyx_t_6) : __Pyx_PyObject_CallOneArg(__pyx_t_7, __pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+  __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 123, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+  __pyx_v_npActionCol = __pyx_t_1;
+  __pyx_t_1 = 0;
+
+124:     npActionCol = np.clip(npActionCol, -1, 1)
+
  __Pyx_GetModuleGlobalName(__pyx_t_7, __pyx_n_s_np); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 124, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_7);
+  __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_clip); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 124, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_6);
+  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+  __pyx_t_7 = NULL;
+  __pyx_t_3 = 0;
+  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_6))) {
+    __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_6);
+    if (likely(__pyx_t_7)) {
+      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6);
+      __Pyx_INCREF(__pyx_t_7);
+      __Pyx_INCREF(function);
+      __Pyx_DECREF_SET(__pyx_t_6, function);
+      __pyx_t_3 = 1;
+    }
+  }
+  #if CYTHON_FAST_PYCALL
+  if (PyFunction_Check(__pyx_t_6)) {
+    PyObject *__pyx_temp[4] = {__pyx_t_7, __pyx_v_npActionCol, __pyx_int_neg_1, __pyx_int_1};
+    __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_3, 3+__pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 124, __pyx_L1_error)
+    __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
+    __Pyx_GOTREF(__pyx_t_1);
+  } else
+  #endif
+  #if CYTHON_FAST_PYCCALL
+  if (__Pyx_PyFastCFunction_Check(__pyx_t_6)) {
+    PyObject *__pyx_temp[4] = {__pyx_t_7, __pyx_v_npActionCol, __pyx_int_neg_1, __pyx_int_1};
+    __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_3, 3+__pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 124, __pyx_L1_error)
+    __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
+    __Pyx_GOTREF(__pyx_t_1);
+  } else
+  #endif
+  {
+    __pyx_t_5 = PyTuple_New(3+__pyx_t_3); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 124, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    if (__pyx_t_7) {
+      __Pyx_GIVEREF(__pyx_t_7); PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_7); __pyx_t_7 = NULL;
+    }
+    __Pyx_INCREF(__pyx_v_npActionCol);
+    __Pyx_GIVEREF(__pyx_v_npActionCol);
+    PyTuple_SET_ITEM(__pyx_t_5, 0+__pyx_t_3, __pyx_v_npActionCol);
+    __Pyx_INCREF(__pyx_int_neg_1);
+    __Pyx_GIVEREF(__pyx_int_neg_1);
+    PyTuple_SET_ITEM(__pyx_t_5, 1+__pyx_t_3, __pyx_int_neg_1);
+    __Pyx_INCREF(__pyx_int_1);
+    __Pyx_GIVEREF(__pyx_int_1);
+    PyTuple_SET_ITEM(__pyx_t_5, 2+__pyx_t_3, __pyx_int_1);
+    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 124, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  }
+  __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+  __Pyx_DECREF_SET(__pyx_v_npActionCol, __pyx_t_1);
+  __pyx_t_1 = 0;
+
+125:     cdef double[:, :] actionCol = npActionCol
+
  __pyx_t_4 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_v_npActionCol, PyBUF_WRITABLE); if (unlikely(!__pyx_t_4.memview)) __PYX_ERR(0, 125, __pyx_L1_error)
+  __pyx_v_actionCol = __pyx_t_4;
+  __pyx_t_4.memview = NULL;
+  __pyx_t_4.data = NULL;
+
 126: 
+
 127:     cdef int agentIndex
+
 128: 
+
 129:     cdef double globalFrameMotion0, globalFrameMotion1, norm
+
 130: 
+
 131:     # move all agents
+
+132:     for agentIndex in range(number_agents):
+
  __pyx_t_3 = __pyx_v_number_agents;
+  __pyx_t_9 = __pyx_t_3;
+  for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
+    __pyx_v_agentIndex = __pyx_t_10;
+
 133: 
+
 134:         # turn action into global frame motion
+
+135:         globalFrameMotion0 = orientationCol[agentIndex, 0] * actionCol[agentIndex, 0] - orientationCol[agentIndex, 1] * actionCol[agentIndex, 1]
+
    __pyx_t_11 = __pyx_v_agentIndex;
+    __pyx_t_12 = 0;
+    __pyx_t_13 = __pyx_v_agentIndex;
+    __pyx_t_14 = 0;
+    __pyx_t_15 = __pyx_v_agentIndex;
+    __pyx_t_16 = 1;
+    __pyx_t_17 = __pyx_v_agentIndex;
+    __pyx_t_18 = 1;
+    __pyx_v_globalFrameMotion0 = (((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_11 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_12 * __pyx_v_orientationCol.strides[1]) ))) * (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_actionCol.data + __pyx_t_13 * __pyx_v_actionCol.strides[0]) ) + __pyx_t_14 * __pyx_v_actionCol.strides[1]) )))) - ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_15 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_16 * __pyx_v_orientationCol.strides[1]) ))) * (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_actionCol.data + __pyx_t_17 * __pyx_v_actionCol.strides[0]) ) + __pyx_t_18 * __pyx_v_actionCol.strides[1]) )))));
+
+136:         globalFrameMotion1 = orientationCol[agentIndex, 0] * actionCol[agentIndex, 1] + orientationCol[agentIndex, 1] * actionCol[agentIndex, 0]
+
    __pyx_t_18 = __pyx_v_agentIndex;
+    __pyx_t_17 = 0;
+    __pyx_t_16 = __pyx_v_agentIndex;
+    __pyx_t_15 = 1;
+    __pyx_t_14 = __pyx_v_agentIndex;
+    __pyx_t_13 = 1;
+    __pyx_t_12 = __pyx_v_agentIndex;
+    __pyx_t_11 = 0;
+    __pyx_v_globalFrameMotion1 = (((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_18 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_17 * __pyx_v_orientationCol.strides[1]) ))) * (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_actionCol.data + __pyx_t_16 * __pyx_v_actionCol.strides[0]) ) + __pyx_t_15 * __pyx_v_actionCol.strides[1]) )))) + ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_14 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_13 * __pyx_v_orientationCol.strides[1]) ))) * (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_actionCol.data + __pyx_t_12 * __pyx_v_actionCol.strides[0]) ) + __pyx_t_11 * __pyx_v_actionCol.strides[1]) )))));
+
 137: 
+
 138: 
+
 139:         # globally move and reorient agent
+
+140:         agentPositionCol[agentIndex, 0] += globalFrameMotion0
+
    __pyx_t_11 = __pyx_v_agentIndex;
+    __pyx_t_12 = 0;
+    *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_agentPositionCol.data + __pyx_t_11 * __pyx_v_agentPositionCol.strides[0]) ) + __pyx_t_12 * __pyx_v_agentPositionCol.strides[1]) )) += __pyx_v_globalFrameMotion0;
+
+141:         agentPositionCol[agentIndex, 1] += globalFrameMotion1
+
    __pyx_t_12 = __pyx_v_agentIndex;
+    __pyx_t_11 = 1;
+    *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_agentPositionCol.data + __pyx_t_12 * __pyx_v_agentPositionCol.strides[0]) ) + __pyx_t_11 * __pyx_v_agentPositionCol.strides[1]) )) += __pyx_v_globalFrameMotion1;
+
 142: 
+
+143:         if globalFrameMotion0 == 0.0 and globalFrameMotion1 == 0.0:
+
    __pyx_t_20 = ((__pyx_v_globalFrameMotion0 == 0.0) != 0);
+    if (__pyx_t_20) {
+    } else {
+      __pyx_t_19 = __pyx_t_20;
+      goto __pyx_L6_bool_binop_done;
+    }
+    __pyx_t_20 = ((__pyx_v_globalFrameMotion1 == 0.0) != 0);
+    __pyx_t_19 = __pyx_t_20;
+    __pyx_L6_bool_binop_done:;
+    if (__pyx_t_19) {
+/* … */
+      goto __pyx_L5;
+    }
+
+144:             orientationCol[agentIndex,0] = 1.0
+
      __pyx_t_11 = __pyx_v_agentIndex;
+      __pyx_t_12 = 0;
+      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_11 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_12 * __pyx_v_orientationCol.strides[1]) )) = 1.0;
+
+145:             orientationCol[agentIndex,1] = 0.0
+
      __pyx_t_12 = __pyx_v_agentIndex;
+      __pyx_t_11 = 1;
+      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_12 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_11 * __pyx_v_orientationCol.strides[1]) )) = 0.0;
+
 146:         else:
+
+147:             norm = sqrt(globalFrameMotion0**2 +  globalFrameMotion1 **2)
+
    /*else*/ {
+      __pyx_v_norm = sqrt((pow(__pyx_v_globalFrameMotion0, 2.0) + pow(__pyx_v_globalFrameMotion1, 2.0)));
+
+148:             orientationCol[agentIndex,0] = globalFrameMotion0 /norm
+
      if (unlikely(__pyx_v_norm == 0)) {
+        PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+        __PYX_ERR(0, 148, __pyx_L1_error)
+      }
+      __pyx_t_11 = __pyx_v_agentIndex;
+      __pyx_t_12 = 0;
+      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_11 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_12 * __pyx_v_orientationCol.strides[1]) )) = (__pyx_v_globalFrameMotion0 / __pyx_v_norm);
+
+149:             orientationCol[agentIndex,1] = globalFrameMotion1 /norm
+
      if (unlikely(__pyx_v_norm == 0)) {
+        PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+        __PYX_ERR(0, 149, __pyx_L1_error)
+      }
+      __pyx_t_12 = __pyx_v_agentIndex;
+      __pyx_t_11 = 1;
+      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_orientationCol.data + __pyx_t_12 * __pyx_v_orientationCol.strides[0]) ) + __pyx_t_11 * __pyx_v_orientationCol.strides[1]) )) = (__pyx_v_globalFrameMotion1 / __pyx_v_norm);
+    }
+    __pyx_L5:;
+  }
+
 150: 
+
 151:         # # Check if action moves agent within the world bounds
+
 152:         # if agentPositionCol[agentIndex,0] > worldWidth:
+
 153:         #     agentPositionCol[agentIndex,0] = worldWidth
+
 154:         # elif agentPositionCol[agentIndex,0] < 0.0:
+
 155:         #     agentPositionCol[agentIndex,0] = 0.0
+
 156:         #
+
 157:         # if agentPositionCol[agentIndex,1] > worldLength:
+
 158:         #     agentPositionCol[agentIndex,1] = worldLength
+
 159:         # elif agentPositionCol[agentIndex,1] < 0.0:
+
 160:         #     agentPositionCol[agentIndex,1] = 0.0
+
 161: 
+
 162: 
+
+163:     data["Agent Positions"]  = agentPositionCol
+
  __pyx_t_1 = __pyx_memoryview_fromslice(__pyx_v_agentPositionCol, 2, (PyObject *(*)(char *)) __pyx_memview_get_double, (int (*)(char *, PyObject *)) __pyx_memview_set_double, 0);; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 163, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (unlikely(PyObject_SetItem(__pyx_v_data, __pyx_kp_s_Agent_Positions, __pyx_t_1) < 0)) __PYX_ERR(0, 163, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+164:     data["Agent Orientations"] = orientationCol
+
  __pyx_t_1 = __pyx_memoryview_fromslice(__pyx_v_orientationCol, 2, (PyObject *(*)(char *)) __pyx_memview_get_double, (int (*)(char *, PyObject *)) __pyx_memview_set_double, 0);; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 164, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (unlikely(PyObject_SetItem(__pyx_v_data, __pyx_kp_s_Agent_Orientations, __pyx_t_1) < 0)) __PYX_ERR(0, 164, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
diff --git a/code/makefile b/code/makefile new file mode 100644 index 0000000..f7a5596 --- /dev/null +++ b/code/makefile @@ -0,0 +1,24 @@ +CC = gcc +PYVERSION=3.8 +FLAGS = -shared -pthread -fPIC -fwrapv -O2 -Wall -fno-strict-aliasing -I/usr/include/python${PYVERSION} -o + +default: part1 part2 part3 clean + +part1: + cython agent_domain_2.pyx + ${CC} ${FLAGS} agent_domain_2.so agent_domain_2.c + +part2: + cython ccea_2.pyx + ${CC} ${FLAGS} ccea_2.so ccea_2.c + +part3: + cython reward_2.pyx + ${CC} ${FLAGS} reward_2.so reward_2.c + +clean: + rm *.c + +clear: + rm *.c + rm *.so diff --git a/code/reward_2.pyx b/code/reward_2.pyx index 4c73cf3..ddd8a9a 100644 --- a/code/reward_2.pyx +++ b/code/reward_2.pyx @@ -14,9 +14,9 @@ def assignGlobalReward(data): cdef double[:, :, :] agentPositionHistory = data["Agent Position History"] cdef double[:] poiValueCol = data['Poi Values'] cdef double[:, :] poiPositionCol = data["Poi Positions"] - + cdef stepIndex = data["Step Index"] - cdef int poiIndex, stepIndex, agentIndex, observerCount + cdef int poiIndex, agentIndex, observerCount cdef double separation0, separation1, closestObsDistanceSqr, distanceSqr, stepClosestObsDistanceSqr cdef double Inf = float("inf") @@ -25,33 +25,35 @@ def assignGlobalReward(data): for poiIndex in range(number_pois): closestObsDistanceSqr = Inf - for stepIndex in range(historyStepCount): - # Count how many agents observe poi, update closest distance if necessary - observerCount = 0 - stepClosestObsDistanceSqr = Inf - for agentIndex in range(number_agents): - # Calculate separation distance between poi and agent - separation0 = poiPositionCol[poiIndex, 0] - agentPositionHistory[stepIndex, agentIndex, 0] - separation1 = poiPositionCol[poiIndex, 1] - agentPositionHistory[stepIndex, agentIndex, 1] - distanceSqr = separation0 * separation0 + separation1 * separation1 - - # Check if agent observes poi, update closest step distance - if distanceSqr < observationRadiusSqr: - observerCount += 1 - if distanceSqr < stepClosestObsDistanceSqr: - stepClosestObsDistanceSqr = distanceSqr - - - # update closest distance only if poi is observed - if observerCount >= coupling: - if stepClosestObsDistanceSqr < closestObsDistanceSqr: - closestObsDistanceSqr = stepClosestObsDistanceSqr + #for stepIndex in range(historyStepCount): + # Count how many agents observe poi, update closest distance if necessary + observerCount = 0 + stepClosestObsDistanceSqr = 0 + for agentIndex in range(number_agents): + # Calculate separation distance between poi and agent + separation0 = poiPositionCol[poiIndex, 0] - agentPositionHistory[stepIndex, agentIndex, 0] + separation1 = poiPositionCol[poiIndex, 1] - agentPositionHistory[stepIndex, agentIndex, 1] + distanceSqr = separation0 * separation0 + separation1 * separation1 + + # Check if agent observes poi, update closest step distance + if distanceSqr < observationRadiusSqr: + observerCount += 1 + #if distanceSqr > stepClosestObsDistanceSqr: + stepClosestObsDistanceSqr += distanceSqr + + stepClosestObsDistanceSqr /= (float(observerCount)+0.001) + + # update closest distance only if poi is observed + if observerCount >= coupling: + if stepClosestObsDistanceSqr < closestObsDistanceSqr: + closestObsDistanceSqr = stepClosestObsDistanceSqr # add to global reward if poi is observed if closestObsDistanceSqr < observationRadiusSqr: if closestObsDistanceSqr < minDistanceSqr: closestObsDistanceSqr = minDistanceSqr - globalReward += poiValueCol[poiIndex] / closestObsDistanceSqr + closestObsDistanceSqr=(observationRadiusSqr-closestObsDistanceSqr)/(observationRadiusSqr-minDistanceSqr) + globalReward += poiValueCol[poiIndex] * closestObsDistanceSqr data["Global Reward"] = globalReward data["Agent Rewards"] = np.ones(number_agents) * globalReward @@ -83,7 +85,7 @@ def assignDifferenceReward(data): for poiIndex in range(number_pois): closestObsDistanceSqr = Inf - for stepIndex in range(historyStepCount): + for stepIndex in range(historyStepCount-1,historyStepCount): # Count how many agents observe poi, update closest distance if necessary observerCount = 0 stepClosestObsDistanceSqr = Inf @@ -109,14 +111,14 @@ def assignDifferenceReward(data): if closestObsDistanceSqr < observationRadiusSqr: if closestObsDistanceSqr < minDistanceSqr: closestObsDistanceSqr = minDistanceSqr - globalReward += poiValueCol[poiIndex] / closestObsDistanceSqr + globalReward += poiValueCol[poiIndex] #/ closestObsDistanceSqr for agentIndex in range(number_agents): globalWithoutReward = 0 for poiIndex in range(number_pois): closestObsDistanceSqr = Inf - for stepIndex in range(historyStepCount): + for stepIndex in range(historyStepCount-1,historyStepCount): # Count how many agents observe poi, update closest distance if necessary observerCount = 0 stepClosestObsDistanceSqr = Inf @@ -143,7 +145,7 @@ def assignDifferenceReward(data): if closestObsDistanceSqr < observationRadiusSqr: if closestObsDistanceSqr < minDistanceSqr: closestObsDistanceSqr = minDistanceSqr - globalWithoutReward += poiValueCol[poiIndex] / closestObsDistanceSqr + globalWithoutReward += poiValueCol[poiIndex] #/ closestObsDistanceSqr differenceRewardCol[agentIndex] = globalReward - globalWithoutReward data["Agent Rewards"] = npDifferenceRewardCol @@ -181,7 +183,7 @@ def assignDppReward(data): for stepIndex in range(historyStepCount): # Count how many agents observe poi, update closest distance if necessary observerCount = 0 - stepClosestObsDistanceSqr = Inf + stepClosestObsDistanceSqr = 0 for agentIndex in range(number_agents): # Calculate separation distance between poi and agent separation0 = poiPositionCol[poiIndex, 0] - agentPositionHistory[stepIndex, agentIndex, 0] @@ -191,7 +193,7 @@ def assignDppReward(data): # Check if agent observes poi, update closest step distance if distanceSqr < observationRadiusSqr: observerCount += 1 - if distanceSqr < stepClosestObsDistanceSqr: + if distanceSqr > stepClosestObsDistanceSqr: stepClosestObsDistanceSqr = distanceSqr diff --git a/figs/Figure_166.png b/figs/Figure_166.png new file mode 100644 index 0000000..aaeedfa Binary files /dev/null and b/figs/Figure_166.png differ diff --git a/figs/Figure_52.png b/figs/Figure_52.png new file mode 100644 index 0000000..66afafb Binary files /dev/null and b/figs/Figure_52.png differ diff --git a/figs/Figure_83.png b/figs/Figure_83.png new file mode 100644 index 0000000..dd74b08 Binary files /dev/null and b/figs/Figure_83.png differ diff --git a/figs/Figure_freq.png b/figs/Figure_freq.png new file mode 100644 index 0000000..6c559db Binary files /dev/null and b/figs/Figure_freq.png differ diff --git a/figs/Figure_pos.png b/figs/Figure_pos.png new file mode 100644 index 0000000..1949aa3 Binary files /dev/null and b/figs/Figure_pos.png differ diff --git a/figs/Figure_ternary.png b/figs/Figure_ternary.png new file mode 100644 index 0000000..0ddf113 Binary files /dev/null and b/figs/Figure_ternary.png differ diff --git a/figs/Figure_types.png b/figs/Figure_types.png new file mode 100644 index 0000000..4fa6c55 Binary files /dev/null and b/figs/Figure_types.png differ diff --git a/figs/Figure_v2.png b/figs/Figure_v2.png new file mode 100644 index 0000000..b86d25c Binary files /dev/null and b/figs/Figure_v2.png differ diff --git a/figs/Figure_valsa.png b/figs/Figure_valsa.png new file mode 100644 index 0000000..8e55b8e Binary files /dev/null and b/figs/Figure_valsa.png differ diff --git a/figs/Figure_valsb.png b/figs/Figure_valsb.png new file mode 100644 index 0000000..db5c0e4 Binary files /dev/null and b/figs/Figure_valsb.png differ diff --git a/figs/figs summary.odt b/figs/figs summary.odt new file mode 100644 index 0000000..5ca58b1 Binary files /dev/null and b/figs/figs summary.odt differ diff --git a/figs/mars1.gif b/figs/mars1.gif new file mode 100644 index 0000000..6127055 Binary files /dev/null and b/figs/mars1.gif differ diff --git a/figs/mars1.odg b/figs/mars1.odg new file mode 100644 index 0000000..5c94248 Binary files /dev/null and b/figs/mars1.odg differ diff --git a/figs/mars2.gif b/figs/mars2.gif new file mode 100644 index 0000000..e8d1dcd Binary files /dev/null and b/figs/mars2.gif differ diff --git a/figs/mars2.odg b/figs/mars2.odg new file mode 100644 index 0000000..f20b148 Binary files /dev/null and b/figs/mars2.odg differ diff --git a/figsv2/Figure_1.png b/figsv2/Figure_1.png new file mode 100644 index 0000000..ba5603b Binary files /dev/null and b/figsv2/Figure_1.png differ diff --git a/figsv2/Figure_2.png b/figsv2/Figure_2.png new file mode 100644 index 0000000..de07c1f Binary files /dev/null and b/figsv2/Figure_2.png differ diff --git a/figsv2/Figure_3.png b/figsv2/Figure_3.png new file mode 100644 index 0000000..c86ce11 Binary files /dev/null and b/figsv2/Figure_3.png differ diff --git a/figsv2/Figure_4.png b/figsv2/Figure_4.png new file mode 100644 index 0000000..86c7e14 Binary files /dev/null and b/figsv2/Figure_4.png differ diff --git a/figsv2/Figure_5.png b/figsv2/Figure_5.png new file mode 100644 index 0000000..46198f6 Binary files /dev/null and b/figsv2/Figure_5.png differ diff --git a/figsv2/Figure_6.png b/figsv2/Figure_6.png new file mode 100644 index 0000000..71c62bb Binary files /dev/null and b/figsv2/Figure_6.png differ diff --git a/figsv2/Figure_7.png b/figsv2/Figure_7.png new file mode 100644 index 0000000..3755eb0 Binary files /dev/null and b/figsv2/Figure_7.png differ diff --git a/figsv2/Figure_fixed.png b/figsv2/Figure_fixed.png new file mode 100644 index 0000000..e425faf Binary files /dev/null and b/figsv2/Figure_fixed.png differ diff --git a/figsv2/Figure_random.png b/figsv2/Figure_random.png new file mode 100644 index 0000000..a5c9b92 Binary files /dev/null and b/figsv2/Figure_random.png differ diff --git a/gym_example.py b/gym_example.py index 3b2df68..6525029 100644 --- a/gym_example.py +++ b/gym_example.py @@ -1,88 +1,66 @@ -from rover_domain_core_gym import RoverDomainCoreGym -from mods import * -import datetime -from code.world_setup import * # Rover Domain Construction -from code.agent_domain_2 import * # Rover Domain Dynamic -from code.trajectory_history import * # Agent Position Trajectory History -from code.reward_2 import * # Agent Reward -from code.reward_history import * # Performance Recording -from code.ccea_2 import * # CCEA -from code.save_to_pickle import * # Save data as pickle file -import random +""" +An example using the rover domain gym-style interface and the standard, included CCEA learning algorithms. +This is a minimal example, showing the minimal Gym interface. +""" +from rover_domain_core_gym import RoverDomainGym +import code.ccea_2 as ccea +import code.agent_domain_2 as domain +import mods -stepCount = 5 -trainCountXEpisode = 3 -testCountXEpisode = 1 -episodeCount = 20 +episodeCount = 1000 # Number of learning episodes -# NOTE: Add the mod functions (variables) to run to modCol here: -modCol = [ - globalRewardMod, - differenceRewardMod, - dppRewardMod -] +sim = RoverDomainGym() +#mods.sequentialPoi(sim) +mods.recipePoi(sim) +#mods.lowVisibility(sim) +obs=sim.reset() -i = 0 -while True: - print("Run %i"%(i)) - random.shuffle(modCol) - for mod in modCol: - sim = RoverDomainCoreGym() - mod(sim) +sim.data["Coupling"]=3 + +obs_size=len(obs[0]) + +print(obs_size) +ccea.initCcea(input_shape=obs_size, num_outputs=2, num_units=32)(sim.data) + +for episodeIndex in range(episodeCount): + sim.data["Episode Index"] = episodeIndex + populationSize=len(sim.data['Agent Populations'][0]) + GlobalRewards=[0.0] + + for worldIndex in range(populationSize): + sim.data["World Index"]=worldIndex + + obs = sim.reset() + + #ccea.assignCceaPolicies(sim.data) + mods.assignHomogeneousPolicy(sim) + + done = False + stepCount = 0 - #Trial Begins - createRewardHistory(sim.data) - initCcea(input_shape= 8, num_outputs=2, num_units = 32)(sim.data) - sim.data["Steps"] = stepCount + while not done: + + #mods.poiVelocity(sim) - for episodeIndex in range(episodeCount): - sim.data["Episode Index"] = episodeIndex + # Select actions and create the joint action from the simulation data + # Note that this specific function extracts "obs" from the data structure directly, which is why obs is not + # directly used in this example. - # Training Phase + domain.doAgentProcess(sim.data) + #mods.abilityVariation(sim) - obs = sim.reset('Train', True) + jointAction = sim.data["Agent Actions"] + + obs, reward, done, info = sim.step(jointAction) - for worldIndex in range(trainCountXEpisode): - sim.data["World Index"] = worldIndex - obs = sim.reset('Train', False) - assignCceaPolicies(sim.data) + stepCount += 1 + if ( episodeIndex%50==49 and worldIndex==0): + sim.render() - done = False - stepCount = 0 - while not done: - doAgentProcess(sim.data) - jointAction = sim.data["Agent Actions"] - obs, reward, done, info = sim.step(jointAction) - stepCount += 1 - - rewardCceaPolicies(sim.data) - - - # Testing Phase - - obs = sim.reset('Test', True) - assignBestCceaPolicies(sim.data) - - for worldIndex in range(testCountXEpisode): - sim.data["World Index"] = worldIndex - obs = sim.reset('Test', False) - done = False - stepCount = 0 - while not done: - doAgentProcess(sim.data) - jointAction = sim.data["Agent Actions"] - obs, reward, done, info = sim.step(jointAction) - stepCount += 1 - - evolveCceaPolicies(sim.data) - updateRewardHistory(sim.data) - - # Trial End - saveRewardHistory(sim.data) - saveTrajectoryHistories(sim.data) - - + GlobalRewards.append(sim.data["Global Reward"]) + ccea.rewardCceaPolicies(sim.data) - i += 1 \ No newline at end of file + ccea.evolveCceaPolicies(sim.data) + print(episodeIndex,max(GlobalRewards)) diff --git a/gym_example2.py b/gym_example2.py new file mode 100644 index 0000000..c3f8ae2 --- /dev/null +++ b/gym_example2.py @@ -0,0 +1,70 @@ +""" +An example using the rover domain gym-style interface and the standard, included CCEA learning algorithms. +This is a minimal example, showing the minimal Gym interface. +""" +from rover_domain_core_gym import RoverDomainGym +import code.ccea_2 as ccea +import code.agent_domain_2 as domain +import mods +from sys import argv +import numpy as np + +episodeCount = 1000 # Number of learning episodes +nagents=8 +sim = RoverDomainGym(nagents,250) +mods.recipePoi(sim) +obs=sim.reset() + +DATA = str(nagents)+"agent/data"+argv[1]+"_0.txt" + +sim.data["Coupling"]=3 + +obs_size=len(obs[0]) + +print(obs_size) +ccea.initCcea(input_shape=obs_size, num_outputs=2, num_units=32)(sim.data) + +for episodeIndex in range(episodeCount): + sim.data["Episode Index"] = episodeIndex + populationSize=len(sim.data['Agent Populations'][0]) + GlobalRewards=[0.0] + + for worldIndex in range(populationSize): + sim.data["World Index"]=worldIndex + + obs = sim.reset() + + ccea.assignCceaPolicies(sim.data) + #mods.assignHomogeneousPolicy(sim) + + done = False + stepCount = 0 + + while not done: + + #mods.poiVelocity(sim) + + # Select actions and create the joint action from the simulation data + # Note that this specific function extracts "obs" from the data structure directly, which is why obs is not + # directly used in this example. + + domain.doAgentProcess(sim.data) + #mods.abilityVariation(sim) + + jointAction = sim.data["Agent Actions"] + + obs, reward, done, info = sim.step(jointAction) + + stepCount += 1 + #if ( episodeIndex%50==49 and worldIndex==0): + # sim.render() + + + GlobalRewards.append(sim.data["Global Reward"]) + ccea.rewardCceaPolicies(sim.data) + tr=np.sum(obs[:,-4:],axis=0) + ccea.evolveCceaPolicies(sim.data) + print(tr,episodeIndex,max(GlobalRewards)) + with open(DATA, "a") as myfile: + myfile.write( ",".join([str(f) for f in [episodeIndex,float(max(GlobalRewards))/float(nagents*6)]])) + myfile.write('\n') diff --git a/makefile b/makefile new file mode 100644 index 0000000..3c145c6 --- /dev/null +++ b/makefile @@ -0,0 +1,17 @@ +CC = gcc +PYVERSION=3.8 +FLAGS = -shared -pthread -fPIC -fwrapv -O2 -Wall -fno-strict-aliasing -I/usr/include/python${PYVERSION} -o + +default: compile clean + +compile: + cython mod_funcs.pyx + ${CC} ${FLAGS} mod_funcs.so mod_funcs.c + +clean: + rm mod_funcs.c + +video: + ffmpeg -r 12 -i ims/test%d.png -c:v libx264 -vf fps=12 -pix_fmt yuv420p out.mp4 + + diff --git a/mod_funcs.pyx b/mod_funcs.pyx new file mode 100644 index 0000000..bf8957d --- /dev/null +++ b/mod_funcs.pyx @@ -0,0 +1,570 @@ +import numpy as np +cimport cython + +cdef extern from "math.h": + double sqrt(double m) + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. + +cpdef assignGlobalRewardMod(data): + + cdef int[:] itemHeld=data["Item Held"] + cdef int number_agents = data['Number of Agents'] + cdef int number_pois = data['Number of POIs'] + cdef double minDistanceSqr = data["Minimum Distance"] ** 2 + cdef int historyStepCount = data["Steps"] + 1 + cdef int coupling = data["Coupling"] + cdef double observationRadiusSqr = data["Observation Radius"] ** 2 + cdef double[:, :, :] agentPositionHistory = data["Agent Position History"] + cdef double[:] poiValueCol = data['Poi Values'] + cdef double[:, :] poiPositionCol = data["Poi Positions"] + + + cdef int poiIndex, stepIndex, agentIndex, observerCount + cdef double separation0, separation1, closestObsDistanceSqr, distanceSqr, stepClosestObsDistanceSqr + cdef double Inf = float("inf") + + cdef double globalReward = 0.0 + + + for poiIndex in range(number_pois//2): + closestObsDistanceSqr = Inf + for stepIndex in range(historyStepCount): + # Count how many agents observe poi, update closest distance if necessary + observerCount = 0 + stepClosestObsDistanceSqr = Inf + for agentIndex in range(number_agents): + # Calculate separation distance between poi and agent + separation0 = poiPositionCol[poiIndex, 0] - agentPositionHistory[stepIndex, agentIndex, 0] + separation1 = poiPositionCol[poiIndex, 1] - agentPositionHistory[stepIndex, agentIndex, 1] + distanceSqr = separation0 * separation0 + separation1 * separation1 + + # Check if agent observes poi, update closest step distance + if distanceSqr < observationRadiusSqr and itemHeld[agentIndex]: + observerCount += 1 + if distanceSqr < stepClosestObsDistanceSqr: + stepClosestObsDistanceSqr = distanceSqr + + + # update closest distance only if poi is observed + if observerCount >= coupling: + if stepClosestObsDistanceSqr < closestObsDistanceSqr: + closestObsDistanceSqr = stepClosestObsDistanceSqr + + # add to global reward if poi is observed + if closestObsDistanceSqr < observationRadiusSqr: + if closestObsDistanceSqr < minDistanceSqr: + closestObsDistanceSqr = minDistanceSqr + globalReward += poiValueCol[poiIndex] / closestObsDistanceSqr + + data["Global Reward"] = globalReward + data["Agent Rewards"] = np.ones(number_agents) * globalReward + + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +cpdef doAgentSenseMod(data): + """ + Sensor model is + Where a means (other) agent, p means poi, and the rest are the quadrants + """ + cdef double obsRadius=data["Observation Radius"] ** 2 + cdef double viewDistance = data['View Distance'] ** 2 + + cdef int number_agents = data['Number of Agents'] + cdef int number_pois = data['Number of POIs'] + cdef double minDistanceSqr = data["Minimum Distance"] ** 2 + cdef double[:, :] agentPositionCol = data["Agent Positions"] + cdef double[:] poiValueCol = data['Poi Values'] + cdef double[:, :] poiPositionCol = data["Poi Positions"] + cdef double[:, :] orientationCol = data["Agent Orientations"] + npObservationCol = np.zeros((number_agents, 8), dtype = np.float64) + + + + + cdef int[:] itemHeld + + if data["Sequential"]: + itemHeld=data["Item Held"] + npObservationCol = np.zeros((number_agents, 13), dtype = np.float64) + else: + npObservationCol = np.zeros((number_agents, 8), dtype = np.float64) + cdef double[:, :] observationCol = npObservationCol + + + cdef int agentIndex, otherAgentIndex, poiIndex, obsIndex, shift + cdef double globalFrameSeparation0, globalFrameSeparation1 + cdef double agentFrameSeparation0, agentFrameSeparation1 + + cdef double distanceSqr + + + for agentIndex in range(number_agents): + + # calculate observation values due to other agents + for otherAgentIndex in range(number_agents): + + # agents do not sense self (ergo skip self comparison) + if agentIndex == otherAgentIndex: + continue + + # Get global separation vector between the two agents + globalFrameSeparation0 = agentPositionCol[otherAgentIndex,0] - agentPositionCol[agentIndex,0] + globalFrameSeparation1 = agentPositionCol[otherAgentIndex,1] - agentPositionCol[agentIndex,1] + + # Translate separation to agent frame using inverse rotation matrix + agentFrameSeparation0 = orientationCol[agentIndex, 0] * globalFrameSeparation0 + orientationCol[agentIndex, 1] * globalFrameSeparation1 + agentFrameSeparation1 = orientationCol[agentIndex, 0] * globalFrameSeparation1 - orientationCol[agentIndex, 1] * globalFrameSeparation0 + distanceSqr = agentFrameSeparation0 * agentFrameSeparation0 + agentFrameSeparation1 * agentFrameSeparation1 + + if viewDistance > 0 and distanceSqr > viewDistance : + continue + + # By bounding distance value we implicitly bound sensor values + if distanceSqr < minDistanceSqr: + distanceSqr = minDistanceSqr + + + # other is east of agent + if agentFrameSeparation0 > 0: + # other is north-east of agent + if agentFrameSeparation1 > 0: + observationCol[agentIndex,0] += 1.0 / distanceSqr + else: # other is south-east of agent + observationCol[agentIndex,3] += 1.0 / distanceSqr + else: # other is west of agent + # other is north-west of agent + if agentFrameSeparation1 > 0: + observationCol[agentIndex,1] += 1.0 / distanceSqr + else: # other is south-west of agent + observationCol[agentIndex,2] += 1.0 / distanceSqr + + + + # calculate observation values due to pois + for poiIndex in range(number_pois): + + # Get global separation vector between the two agents + globalFrameSeparation0 = poiPositionCol[poiIndex,0] - agentPositionCol[agentIndex,0] + globalFrameSeparation1 = poiPositionCol[poiIndex,1] - agentPositionCol[agentIndex,1] + + # Translate separation to agent frame unp.sing inverse rotation matrix + agentFrameSeparation0 = orientationCol[agentIndex, 0] * globalFrameSeparation0 + orientationCol[agentIndex, 1] * globalFrameSeparation1 + agentFrameSeparation1 = orientationCol[agentIndex, 0] * globalFrameSeparation1 - orientationCol[agentIndex, 1] * globalFrameSeparation0 + distanceSqr = agentFrameSeparation0 * agentFrameSeparation0 + agentFrameSeparation1 * agentFrameSeparation1 + + if viewDistance > 0 and distanceSqr > viewDistance: + continue + + # By bounding distance value we implicitly bound sensor values + if distanceSqr < minDistanceSqr: + distanceSqr = minDistanceSqr + + # half of poi give a "key" and the other half are a "lock" which need multiple agents to unlock + shift=0 + + if (poiIndex < number_pois//2 and data["Sequential"]): + shift = 4 + + if (obsRadius > distanceSqr): + itemHeld[agentIndex]=1 + + + if ( itemHeld[agentIndex] ) : + observationCol[12]=1 + + # poi is east of agent + if agentFrameSeparation0> 0: + # poi is north-east of agent + if agentFrameSeparation1 > 0: + observationCol[agentIndex,4+shift] += poiValueCol[poiIndex] / distanceSqr + else: # poi is south-east of agent + observationCol[agentIndex,7+shift] += poiValueCol[poiIndex] / distanceSqr + else: # poi is west of agent + # poi is north-west of agent + if agentFrameSeparation1 > 0: + observationCol[agentIndex,5+shift] += poiValueCol[poiIndex] / distanceSqr + else: # poi is south-west of agent + observationCol[agentIndex,6+shift] += poiValueCol[poiIndex] / distanceSqr + + data["Agent Observations"] = npObservationCol + + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. + +cpdef assignGlobalRewardRecipe(data): + + cdef int[:,:] itemHeld=data["Item Held"] + cdef int number_agents = data['Number of Agents'] + cdef int number_pois = data['Number of POIs'] + cdef double minDistanceSqr = data["Minimum Distance"] ** 2 + cdef int historyStepCount = data["Steps"] + 1 + cdef int coupling = data["Coupling"] + cdef double observationRadiusSqr = data["Observation Radius"] ** 2 + cdef double[:, :, :] agentPositionHistory = data["Agent Position History"] + cdef double[:] poiValueCol = data['Poi Values'] + cdef double[:, :] poiPositionCol = data["Poi Positions"] + + + cdef int[:] recipe = data["Recipe"] + cdef int recipeSize = data["Recipe Size"] + cdef int nPoiTypes = data["Number of POI Types"] + cdef int ordered = data["Ordered"] + + cdef int poiIndex, stepIndex, agentIndex, observerCount, poiType + cdef double separation0, separation1, closestObsDistanceSqr, distanceSqr, stepClosestObsDistanceSqr + cdef double Inf = float("inf") + + + cdef double globalReward = 0.0 + + + for poiIndex in range(number_pois): + poiType = poiIndex % nPoiTypes + + + closestObsDistanceSqr = Inf + for stepIndex in range(historyStepCount): + # Count how many agents observe poi, update closest distance if necessary + observerCount = 0 + stepClosestObsDistanceSqr = Inf + for agentIndex in range(number_agents): + # Calculate separation distance between poi and agent + separation0 = poiPositionCol[poiIndex, 0] - agentPositionHistory[stepIndex, agentIndex, 0] + separation1 = poiPositionCol[poiIndex, 1] - agentPositionHistory[stepIndex, agentIndex, 1] + distanceSqr = separation0 * separation0 + separation1 * separation1 + + # Check if agent observes poi, update closest step distance + if distanceSqr < observationRadiusSqr: + observerCount += 1 + if distanceSqr < stepClosestObsDistanceSqr: + stepClosestObsDistanceSqr = distanceSqr + + + # update closest distance only if poi is observed + if observerCount >= coupling: + if stepClosestObsDistanceSqr < closestObsDistanceSqr: + closestObsDistanceSqr = stepClosestObsDistanceSqr + + # add to global reward if poi is observed + if closestObsDistanceSqr < observationRadiusSqr: + if closestObsDistanceSqr < minDistanceSqr: + closestObsDistanceSqr = minDistanceSqr + globalReward += poiValueCol[poiIndex] / closestObsDistanceSqr + + data["Global Reward"] = globalReward + data["Agent Rewards"] = np.ones(number_agents) * globalReward + +#@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. + +cpdef assignGlobalRewardSimple(data): + + cdef int[:,:] itemHeld=data["Item Held"] + cdef int number_agents = data['Number of Agents'] + + + + cdef int[:] recipe = data["Recipe"] + cdef int recipeSize = data["Recipe Size"] + cdef int ordered = data["Ordered"] + + cdef int agentIndex, observerCount, poiType, recipeIndex + + + + cdef double globalReward = 0.0 + + + for agentIndex in range(number_agents): + for recipeIndex in range(recipeSize): + if ( itemHeld[agentIndex][recipeIndex] == 0): + break + if (recipeIndex == recipeSize-1): + globalReward+=1.0 + + + data["Global Reward"] = globalReward + data["Agent Rewards"] = np.ones(number_agents) * globalReward + + + +#@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +cpdef giveKey(data): + cdef double obsRadius=data["Observation Radius"] ** 2 + cdef int number_agents = data['Number of Agents'] + cdef int number_pois = data['Number of POIs'] + cdef int coupling=data["Coupling"] + cdef int couplingLimit=data["Coupling Limit"] + + cdef int[:,:] itemHeld = data["Item Held"] + cdef double[:, :] agentPositionCol = data["Agent Positions"] + cdef double[:, :] poiPositionCol = data["Poi Positions"] + + + cdef int[:] viewCount=np.zeros(number_pois,dtype=np.int32) + cdef int[:] indexes=np.zeros(number_agents,dtype=np.int32) + cdef double[:] dists=np.zeros(number_agents,dtype=np.float64) + + cdef int[:] recipe = data["Recipe"] + cdef int recipeSize = data["Recipe Size"] + cdef int nPoiTypes = data["Number of POI Types"] + cdef int ordered = data["Ordered"] + cdef int globe= data["Global Recipe"] + + + cdef int agentIndex, poiIndex, closestIndex, recipeIndex, poiType + cdef double distanceSqr,closestDist + + #determine closest poi to each agent and if each poi is fully viewed + for agentIndex in range(number_agents): + closestIndex=-1 + closestDist =1e9 + + for poiIndex in range(number_pois): + distanceSqr= (poiPositionCol[poiIndex, 0]-agentPositionCol[agentIndex,0])**2 + distanceSqr+=(poiPositionCol[poiIndex, 1]-agentPositionCol[agentIndex,1])**2 + + if (distanceSqr=coupling): + poiType=closestIndex%nPoiTypes + + #loop through recipt + for recipeIndex in range(recipeSize): + + #if order doesnt matter and poi in recipe, grab key from poi + if not ordered: + if recipe[recipeIndex]==poiType: + itemHeld[0][recipeIndex]=1 + + #if order matters, + else: + #check to see if previous parts of recipe fulfilled + if globe: + IDX=0 + else: + IDX=agentIndex + if itemHeld[IDX][recipeIndex] == 1 or recipe[recipeIndex]==poiType: + itemHeld[IDX][recipeIndex] = 1 + + #if not, break the loop ans stop checking + else: + break + + + +#@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +cpdef doAgentSenseRecipe(data): + """ + Sensor model is + Where a means (other) agent, p means poi, and the rest are the quadrants + """ + cdef double obsRadius=data["Observation Radius"] ** 2 + + + cdef int number_agents = data['Number of Agents'] + cdef int number_pois = data['Number of POIs'] + cdef double minDistanceSqr = data["Minimum Distance"] ** 2 + cdef double[:, :] agentPositionCol = data["Agent Positions"] + cdef double[:] poiValueCol = data['Poi Values'] + cdef double[:, :] poiPositionCol = data["Poi Positions"] + cdef double[:, :] orientationCol = data["Agent Orientations"] + + + + cdef int [:] recipe = data["Recipe"] + cdef int recipeSize = data["Recipe Size"] + cdef int nPoiTypes = data["Number of POI Types"] + cdef int ordered = data["Ordered"] + cdef int globe = data["Global Recipe"] + cdef int[:,:] itemHeld = data["Item Held"] + + # agent view + poi view + recipe seen+ items grabbed from recipe + cdef int obsSize = 4 + 4*nPoiTypes + recipeSize + recipeSize + + cdef double[:,:] observationCol = np.zeros((number_agents, obsSize), dtype = np.float64) + + + + cdef int agentIndex, otherAgentIndex, poiIndex, obsIndex, recipeIndex, poiType, shift + cdef double globalFrameSeparation0, globalFrameSeparation1 + cdef double agentFrameSeparation0, agentFrameSeparation1 + + cdef double distanceSqr + + + for agentIndex in range(number_agents): + + # calculate observation values due to other agents + for otherAgentIndex in range(number_agents): + + # agents do not sense self (ergo skip self comparison) + if agentIndex == otherAgentIndex: + continue + + # Get global separation vector between the two agents + globalFrameSeparation0 = agentPositionCol[otherAgentIndex,0] - agentPositionCol[agentIndex,0] + globalFrameSeparation1 = agentPositionCol[otherAgentIndex,1] - agentPositionCol[agentIndex,1] + + # Translate separation to agent frame using inverse rotation matrix + agentFrameSeparation0 = orientationCol[agentIndex, 0] * globalFrameSeparation0 + orientationCol[agentIndex, 1] * globalFrameSeparation1 + agentFrameSeparation1 = orientationCol[agentIndex, 0] * globalFrameSeparation1 - orientationCol[agentIndex, 1] * globalFrameSeparation0 + distanceSqr = agentFrameSeparation0 * agentFrameSeparation0 + agentFrameSeparation1 * agentFrameSeparation1 + + + + # By bounding distance value we implicitly bound sensor values + if distanceSqr < minDistanceSqr: + distanceSqr = minDistanceSqr + + + # other is east of agent + if agentFrameSeparation0 > 0: + # other is north-east of agent + if agentFrameSeparation1 > 0: + observationCol[agentIndex,0] += 1.0 / distanceSqr + else: # other is south-east of agent + observationCol[agentIndex,3] += 1.0 / distanceSqr + else: # other is west of agent + # other is north-west of agent + if agentFrameSeparation1 > 0: + observationCol[agentIndex,1] += 1.0 / distanceSqr + else: # other is south-west of agent + observationCol[agentIndex,2] += 1.0 / distanceSqr + + + + # calculate observation values due to pois + for poiIndex in range(number_pois): + + # Get global separation vector between the two agents + globalFrameSeparation0 = poiPositionCol[poiIndex,0] - agentPositionCol[agentIndex,0] + globalFrameSeparation1 = poiPositionCol[poiIndex,1] - agentPositionCol[agentIndex,1] + + # Translate separation to agent frame unp.sing inverse rotation matrix + agentFrameSeparation0 = orientationCol[agentIndex, 0] * globalFrameSeparation0 + orientationCol[agentIndex, 1] * globalFrameSeparation1 + agentFrameSeparation1 = orientationCol[agentIndex, 0] * globalFrameSeparation1 - orientationCol[agentIndex, 1] * globalFrameSeparation0 + distanceSqr = agentFrameSeparation0 * agentFrameSeparation0 + agentFrameSeparation1 * agentFrameSeparation1 + + + + # By bounding distance value we implicitly bound sensor values + if distanceSqr < minDistanceSqr: + distanceSqr = minDistanceSqr + + + poiType=poiIndex % nPoiTypes + + shift=poiType*4 + + + # poi is east of agent + if agentFrameSeparation0> 0: + # poi is north-east of agent + if agentFrameSeparation1 > 0: + observationCol[agentIndex,4+shift] += poiValueCol[poiIndex] / distanceSqr + else: # poi is south-east of agent + observationCol[agentIndex,7+shift] += poiValueCol[poiIndex] / distanceSqr + else: # poi is west of agent + # poi is north-west of agent + if agentFrameSeparation1 > 0: + observationCol[agentIndex,5+shift] += poiValueCol[poiIndex] / distanceSqr + else: # poi is south-west of agent + observationCol[agentIndex,6+shift] += poiValueCol[poiIndex] / distanceSqr + + for recipeIndex in range(recipeSize): + shift=4+4*nPoiTypes + + #recipe requested + observationCol[agentIndex,shift+recipeIndex] = recipe[recipeIndex] + + #keys obtained + if globe: + IDX=0 + else: + IDX=agentIndex + observationCol[agentIndex,shift+recipeIndex+recipeSize] = itemHeld[IDX,recipeIndex] + + + data["Agent Observations"] = observationCol + + +def doAgentSenseRecipe2(data): + giveKey(data) + doAgentSenseRecipe(data) + + + + + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. + +cpdef IndivReward(data): + + cdef int number_agents = data['Number of Agents'] + cdef int number_pois = data['Number of POIs'] + cdef double minDistanceSqr = data["Minimum Distance"] ** 2 + cdef int historyStepCount = data["Steps"] + 1 + cdef int coupling = data["Coupling"] + cdef double observationRadiusSqr = data["Observation Radius"] ** 2 + cdef double[:, :] agentPositionHistory = data["Agent Positions"] + cdef double[:] poiValueCol = data['Poi Values'] + cdef double[:, :] poiPositionCol = data["Poi Positions"] + + + cdef int poiIndex, stepIndex, agentIndex, observerCount + cdef double separation0, separation1, closestObsDistanceSqr, distanceSqr, stepClosestObsDistanceSqr + cdef double Inf = float("inf") + + cdef double globalReward = 0.0 + + + for poiIndex in range(number_pois): + closestObsDistanceSqr = Inf + + + for agentIndex in range(number_agents): + # Calculate separation distance between poi and agent + separation0 = poiPositionCol[poiIndex, 0] - agentPositionHistory[agentIndex, 0] + separation1 = poiPositionCol[poiIndex, 1] - agentPositionHistory[agentIndex, 1] + distanceSqr = separation0 * separation0 + separation1 * separation1 + + # Check if agent observes poi, update closest step distance + if distanceSqr < observationRadiusSqr: + globalReward += poiValueCol[poiIndex] + + + data["Global Reward"] = globalReward + data["Agent Rewards"] = np.ones(number_agents) * globalReward + \ No newline at end of file diff --git a/mods.py b/mods.py index e92e5f4..be2d563 100644 --- a/mods.py +++ b/mods.py @@ -1,7 +1,8 @@ import datetime from code.reward_2 import * # Agent Reward from code.curriculum import * # Agent Curriculum - +from mod_funcs import * +from math import sqrt def globalRewardMod(sim): sim.data["Mod Name"] = "global" @@ -455,4 +456,245 @@ def differenceRewardCoupCurrMod5(sim): (sim.data["Specifics Name"], sim.data["Mod Name"], dateTimeString) sim.data["Pickle Save File Name"] = "log/%s/%s/pickle/data %s.pickle"%\ - (sim.data["Specifics Name"], sim.data["Mod Name"], dateTimeString) \ No newline at end of file + (sim.data["Specifics Name"], sim.data["Mod Name"], dateTimeString) + + + +''' +:param sim: provides a simulation with the global data structure +:returns: none +:pre: policies have been assigned to each agent +:post: one of the existing policies is reassigned to each agent +:note: call function after sim.reset. data["World Index"] is used to determine which population to use and must also be set +''' + +def assignHomogeneousPolicy(sim): + data=sim.data + number_agents = data['Number of Agents'] + populationCol = data['Agent Populations'] + worldIndex = data["World Index"] + policyCol = [None] * number_agents + for agentIndex in range(number_agents): + policyCol[agentIndex] = populationCol[0][worldIndex] + data["Agent Policies"] = policyCol + + +''' +:param sim: provides a simulation with the global data structure +:returns: none +:pre: none +:post:poi move with a seeded random velocity +:note: call function after sim.step +''' +def poiVelocity(sim): + data=sim.data + + if not "Poi Velocity" in data: + state=np.random.get_state() + np.random.seed(123) + data["Poi Velocity"]=np.random.random(data["Poi Positions"].shape)-.5 + np.random.set_state(state) + data["Poi Positions"]+=data["Poi Velocity"]*0.5 + + +''' +:param sim: provides a simulation with the global data structure +:returns: none +:pre: An action has been determined +:post: agents have varying max speeds from 50% to 100% +:note: call function after actions are determined +''' + +def abilityVariation(sim): + data=sim.data + + variation=np.linspace(0.5,1.0, sim.data["Number of Agents"]) + + for n in range(sim.data["Number of Agents"]): + sim.data["Agent Actions"][n,:] *= variation[n] + +''' +:param data: global data structure +:returns: none +:pre: an array holds whether or not an agent has found a "key" +:post: array is cleared, indicating that no agents are holding "keys" +:note: none +''' + +def clearItemHeld(data): + nAgents=sim.data["Number of Agents"] + sim.data["Item Held"] =np.zeros((nAgents), dtype = np.int32) + + +''' +:param sim: Provides a simulation with the global data structure +:returns: None +:pre: None +:post: Agents must go to poi type-a to recieve a "key" and then group at poi type-b to open the "lock" and recieve a reward. Poi[0:n/2] = Type B and Poi[n/2:n] = Type A +:note: Call function after sim is created +''' + +def sequentialPoi(sim): + + sim.data["Sequential"]=True + sim.data["Observation Function"]=doAgentSenseMod + + sim.data["Reward Function"]=assignGlobalRewardMod + + sim.worldTrainBeginFuncCol.append( clearItemHeld ) + + if not "View Distance" in sim.data: sim.data["View Distance"]= -1 + + + +''' +:param sim: Provides a simulation with the global data structure +:returns: None +:pre: A visibility range is given to each agent +:post: Agents can only perceive items in the visibility range +:note: Call function after sim is created +''' + +def lowVisibility(sim): + if not "Sequential" in sim.data: sim.data["Sequential"]= False + + sim.data["View Distance"]=15 + + sim.data["Observation Function"]=doAgentSenseMod + +''' +:param data: Global data structure +:returns: None +:pre: One step of the simulation has passed +:post: Rewards are assigned based on the number of total parts of the recipe completed for each agent. The max reward is: Number_of_Agents * Size_of_Recipe +:note: None +''' + +def simpleReward(data): + number_agents=data["Number of Agents"] + + #globalReward=np.sum(data["Item Held"]) + d=data["Item Held"] + globalReward=sum(d[0])/len(d[0]) + #d=np.sum(d[:,-4:],axis=0) + #if data["Global Recipe"]: + # d*=number_agents + #globalReward=d[0]*1.0+d[1]*1.33+d[2]*1.66+d[3]*2.0 + data["Global Reward"] = globalReward + data["Agent Rewards"] = np.ones(number_agents) * globalReward + +''' +:param data: Global data structure +:returns: None +:pre: An array holds whether or not an agent has complete a part of the recipe +:post: Array is cleared, indicating that no agents have completed any part of the recipe +:note: None +''' + +def resetItemHeld(data): + nAgents = data["Number of Agents"] + recipeSize = data["Recipe Size" ] + data["Item Held"] = np.zeros(( nAgents,recipeSize), dtype = np.int32) + + +''' +:param sim: Provides a simulation with the global data structure +:returns: None +:pre: A recipe of POI types is given to the agent. +:post: The agents must go to each poi on the list to recieve a reward. The global reward is determined by the number of agents which complete the recipe +:note: Call function after sim is created. Recipe completion can be ordered or unordered +''' + +def recipePoi(sim): + + + sim.data["Observation Function"]=doAgentSenseRecipe2 + #sim.data["Reward Function"]=assignGlobalRewardSimple #reward for each recipe completed + sim.data["Reward Function"]=simpleReward #reward for each step of recipe completed + + sim.data["Recipe"] = np.array([0,1,2,3],dtype=np.int32) #recipe, each item is a POI type from 0 to (N-Poi Types)-1 + sim.data["Recipe Size"]=len(sim.data["Recipe"]) + sim.data["Ordered"] = False #flag for whether order matters + sim.data["Number of POI Types"] = 4 + sim.data["Coupling Limit"]=15 #max number of agents which can see view a poi at a time + sim.data["Global Recipe"]=True + sim.worldTrainBeginFuncCol.append( resetItemHeld ) + +def multiReward(sim): + + data=sim.data + number_agents = data['Number of Agents'] + number_pois = data['Number of POIs'] + + historyStepCount = data["Steps"] + #coupling = data["Coupling"] + observationRadiusSqr = data["Observation Radius"] ** 2 + agentPositionHistory = data["Agent Position History"] + poiValueCol = data['Poi Values'] + poiPositionCol = data["Poi Positions"] + + + #recipe = data["Recipe"] + #recipeSize = data["Recipe Size"] + nPoiTypes = data["Number of POI Types"] + #ordered = data["Ordered"] + + Inf = float("inf") + + + rewards=[0.0 for i in range(nPoiTypes + 1)] + + + for poiIndex in range(number_pois): + poiType = poiIndex % nPoiTypes + + + + stepIndex = historyStepCount + + + for agentIndex in range(number_agents): + # Calculate separation distance between poi and agent + separation0 = poiPositionCol[poiIndex, 0] - agentPositionHistory[stepIndex, agentIndex, 0] + separation1 = poiPositionCol[poiIndex, 1] - agentPositionHistory[stepIndex, agentIndex, 1] + distanceSqr = separation0 * separation0 + separation1 * separation1 + + if distanceSqr < observationRadiusSqr: + rewards[poiType]+= 1.0/float(number_agents) + #rewards[poiType]+=-sqrt(distanceSqr) + dist=0.0 + if poiIndex == 0: + min_dist=1e9 + + for otherIndex in range(number_agents): + # Calculate separation distance between poi and agent + separation0 = agentPositionHistory[stepIndex, otherIndex, 0] - agentPositionHistory[stepIndex, agentIndex, 0] + separation1 = agentPositionHistory[stepIndex, otherIndex, 1] - agentPositionHistory[stepIndex, agentIndex, 1] + distanceSqr = separation0 * separation0 + separation1 * separation1 + + dist=sqrt( distanceSqr ) + if dist0: + min_dist=dist + + rewards[-1]+= -min_dist + + return rewards + + +def posInit(data,mu,sig): + number_agents = data['Number of Agents'] + world_width = data['World Width'] + world_length = data['World Length'] + agentInitSize = sig + + worldSize = np.array([world_width, world_length]) + + # Initialize all agents in the np.randomly in world + positionCol = np.random.rand(number_agents, 2)-0.5 + positionCol *= agentInitSize + positionCol +=0.5 + (np.random.rand(2)-0.5) * mu + + positionCol *= worldSize + data['Agent Positions BluePrint'] = positionCol + angleCol = np.random.uniform(-np.pi, np.pi, number_agents) + data['Agent Orientations BluePrint'] = np.vstack((np.cos(angleCol), np.sin(angleCol))).T diff --git a/multiq/__init__.py b/multiq/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/multiq/genagent.py b/multiq/genagent.py new file mode 100644 index 0000000..2c2a124 --- /dev/null +++ b/multiq/genagent.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +import numpy as np +from random import randint,gauss,shuffle,random,seed + + + + +#np.random.seed(123) +#seed(123) + +class net: + def __init__(self,s): + self.shape=s + self.depth=len(s)-1 + + + self.shuffle() + self.e=0.0 + + + + def shuffle(self): + s=self.shape + self.w=[np.random.normal(size=[s[i],s[i+1]]) for i in range(len(s)-1)] + self.b=[np.random.normal(size=[1,s[i+1]]) for i in range(len(s)-1)] + + + + def cross(self,p,p1,p2): + for i in range(len(p)): + P=np.random.random(p[i].shape)<.5 + nP=np.logical_not(P) + p[i][P]=p1[i][P] + p[i][nP]=p2[i][nP] + def copy(self,p): + for i in range(len(self.w)): + self.w[i]=p.w[i].copy() + self.b[i]=p.b[i].copy() + + + def crossover(self,p1,p2): + self.cross(self.w,p1.w,p2.w) + self.cross(self.b,p1.b,p2.b) + + + def mut(self,p,m,rad): + for i in range(len(p)): + P=np.random.random(p[i].shape) > m + if(self.bloom<0.95): + d=np.random.normal(0,rad,p[i].shape) + else: + d=np.random.normal(0,1.0/rad,p[i].shape) + d[P]=0 + p[i]+=d + + def mutate(self,mut,rad): + self.bloom=random() + self.mut(self.w,mut,rad) + self.mut(self.b,mut,rad) + + + def s(self,x): + return 1.0/(1.0+np.exp(-x)) + + def h(self,x): + return np.tanh(x) + + def l(self,x): + return x + + + + def feed(self,x): + + for w,b in zip(self.w,self.b): + x=self.h(np.matmul(x,w)+b) + return x + + def error(self,x,y): + Y=self.feed(x) + self.e=np.sum((Y-y)**2) + return self.e + + + + +class agent: + def __init__(self,s,n,env=None,N=.1,L=.1,MUT=.1,RAD=.2): + + + + self.POP=n + self.NEXT=int(N*n) # elite + self.LUCK=int(L*n) + self.CHILDREN=self.POP-self.NEXT-self.LUCK + self.MUT=MUT + self.RAD=RAD + self.environ=env + + + + self.pop=[net(s) for i in range(self.POP)] + + self.best=self.pop[0] + + def policies(self): + return [p.feed for p in self.pop] + + def train(self,rewards=None,tournament=True): + + + if self.environ != None: + for p in self.pop: + p.e=self.environ(p.feed) + else: + for p,r in zip(self.pop,rewards): + p.e=r + + self.pop=sorted(self.pop,key=lambda x: x.e,reverse=True) + + self.best=self.pop[0] + + if tournament: + mid=self.POP//2 + shuffle(self.pop) + new=[] + + old1=self.pop[:mid] + old2=self.pop[mid:] + + for p1,p2 in zip(old1,old2): + if p1.e > p2.e: + best = p1 + worst= p2 + else: + best = p2 + worst= p1 + worst.copy(best) + worst.mutate(self.MUT,self.RAD) + + new.append(best) + new.append(worst) + + + else: + new=self.pop[:self.NEXT] + old=self.pop[self.NEXT:] + + shuffle(old) + new=new+old[:self.LUCK] + old=old[self.LUCK:] + + + + for i in range(len(old)): + p1,p2=randint(0,self.NEXT+self.LUCK-1),randint(0,self.NEXT+self.LUCK-1) + Net=old.pop() + Net.copy(new[p1]) + #Net.crossover(new[p1],new[p1]) + Net.mutate(self.MUT,self.RAD) + new.append(Net) + + self.pop=new + + return self.best + + def policy(self,index=-1): + if index<0: + return self.best + else: + return self.pop[index] + diff --git a/multiq/learner.py b/multiq/learner.py new file mode 100644 index 0000000..c989542 --- /dev/null +++ b/multiq/learner.py @@ -0,0 +1,311 @@ +import tensorflow as tf +import numpy as np +from collections import deque +from random import seed, sample, random, randint +import pickle +import heapq + +from .qnet import net +from .genagent import agent + + + + +class learner: + + def __init__(self,nagents,nstate,naction,npolicies,npop,minr=0.05): + self.nagents=nagents + self.nstate =nstate + self.naction=naction + self.npolicies=npolicies + self.histories=[deque(maxlen=100000) for i in range(nagents)] + self.zero_hist=[deque(maxlen=100000) for i in range(nagents)] + self.heap=[[]for i in range(nagents)] + self.sess=tf.InteractiveSession() + + + self.rand_prob=1.0 + self.step=0 + self.idxs=None + self.LR=0.001 + self.SPLIT_TRAIN=0 + self.minr=minr + self.priority=False + self.counter=0 + + #qsize=[naction+nstate, int( (naction+nstate) * 2),4,1] + #qsize=[nstate, 2*int( npolicies ),npolicies] + + qsize=[9,int( npolicies )*3,npolicies] + + psize=[nstate, int( (naction+nstate)/2 ),naction] + + self.qnets = [net(self.sess,qsize,self.LR) for i in range(nagents)] + + self.sess.run(tf.global_variables_initializer()) + + self.saver = tf.train.Saver() + + self.policies=[ agent(psize,npop) for i in range(self.npolicies) ] + self.bestpolicies=[best.policy() for best in self.policies] + def qstate(self,s,st): + r=[] + for i in range(5): + r.append(max(s[i*4:(i+1)*4])) + return np.hstack([r,s[-4:]]) + + def save(self,fname,idx): + fname=fname.split('.') + fname.insert(1,str(idx)+'.') + fname=''.join(fname) + + with open(fname, 'wb') as handle: + pickle.dump(self.policies[idx], handle, protocol=pickle.HIGHEST_PROTOCOL) + + + def load(self,fname,idx): + fname=fname.split('.') + fname.insert(1,str(idx)+'.') + fname=''.join(fname) + + with open(fname, 'rb') as handle: + self.policies[idx] = pickle.load(handle) + self.bestpolicies=[best.policy() for best in self.policies] + + + def saveq(self,fname): + self.saver.save(self.sess, fname) + + + def loadq(self,fname): + self.saver.restore(self.sess, fname) + + def statemod(self,s,pindex): + s=np.asarray(s).copy() + mask=np.ones(s.shape,dtype=bool) + mask[pindex*4+4:pindex*4+8]=False + s[mask]=0.0 + return s + + def policy_action(self,states,pindex,popindex): + policy = self.policies[pindex].policy(popindex) + actions=[] + for s in states: + s=self.statemod(s,pindex) + actions.append(policy.feed(s)[0]) + actions=np.array(actions) + return actions + + + def policy_train(self,rewards,pindex): + policy=self.policies[pindex] + best=policy.train(rewards) + self.bestpolicies[pindex]=best + + + def action(self,state): + Action=[] + + self.rand_prob*=.99999995 + if self.rand_prob<.15: self.rand_prob=.15 + if self.step == 0: self.idxs=[] + for agentindex in range(self.nagents): + s=state[agentindex] + actions=[best.feed(s)[0] for best in self.bestpolicies] + qnet=self.qnets[agentindex] + + if self.step == 0: + exprewards=[ qnet.feed( np.array([ np.hstack([s,a]) ]) ) for a in actions] + #print(exprewards) + index=np.argmax(exprewards) + #print(index) + + + if self.rand_prob > random(): + index=randint( 0, len(exprewards)-1 ) + #index=0 + + if self.step!=0: + index=self.idxs[agentindex] + else: + self.idxs.append(index) + + Action.append(actions[index]) + + + self.step+=1 + if self.step>=10: + self.step=0 + + #print(index) + return np.array(Action),self.idxs + + + def train(self,states,actions,rewards,train_steps=1): + err=0.0 + for agentindex in range(self.nagents): + + for r,s,a in zip(rewards[agentindex],states[agentindex],actions[agentindex]): + + if r[0]==0.0 and self.SPLIT_TRAIN: + self.zero_hist[agentindex].append([r,np.hstack([s,a])]) + else: + self.histories[agentindex].append([r,np.hstack([s,a])]) + + qnet=self.qnets[agentindex] + + for i in range(train_steps): + if not self.SPLIT_TRAIN or (random()>.5 and len(self.histories[agentindex]) > 100): + hist=sample(self.histories[agentindex],64) + + elif (len(self.zero_hist[agentindex])>100): + hist=sample(self.zero_hist[agentindex],64) + + else: + continue + + SA,R=[],[] + for h in hist: + r,sa=h + SA.append(sa) + R.append(r) + + SA=np.array(SA) + R=np.array(R) + + err+=qnet.train(SA,R) + + return err/float(train_steps)/float(self.nagents) + + + def action2(self,state,st): + Action=[] + + self.rand_prob*=.999995 + if self.rand_prob random(): + index=randint( 0, len(exprewards)-1 ) + #print(index) + #index=0 + + if self.step!=0: + index=self.idxs[agentindex] + else: + self.idxs.append(index) + + best = self.bestpolicies[index] + s2=self.statemod(s,index) + actions=best.feed(s2)[0] + + Action.append(actions) + + + self.step+=1 + if self.step>=10: + self.step=0 + #if exprewards is not None: print(exprewards) + #print(index) + return np.array(Action),self.idxs + + def store(self,data,idx): + r=data[0] + + if self.priority == True: + data=(data[0][0],self.counter,data[1],data[2]) + self.counter+=1 + #print(data) + + heapq.heappush(self.heap[idx],data) + if len(self.heap[idx])>100000: + heapq.heappop(self.heap[idx]) + return + + if r[0]==0.0 and self.SPLIT_TRAIN: + self.zero_hist[idx].append(data) + else: + self.histories[idx].append(data) + + def sample(self,idx): + + if self.priority == True: + return sample(self.heap[idx],32) + + if not self.SPLIT_TRAIN or (random()>.5 and len(self.histories[idx]) > 100): + return sample(self.histories[idx],32) + + elif (len(self.zero_hist[idx])>100): + return sample(self.zero_hist[idx],32) + + else: + return None + + def prent(self): + print(self.heap[0][0][0]) + + def train2(self,states,actions,rewards,train_steps=1,train_idx=-1): + err=0.0 + trainpop=range(self.nagents) + if train_idx>=0: + trainpop=[train_idx] + for agentindex in trainpop: + + for r,s,a in zip(rewards[agentindex],states[agentindex],actions[agentindex]): + idx=a + data=[r,s,idx] + self.store(data,agentindex) + + + qnet=self.qnets[agentindex] + + for i in range(train_steps): + hist=self.sample(agentindex) + if hist == None: + continue + + S,R,IDX=[],[],[] + for h in hist: + if self.priority == True: + r,c,s,idx=h + R.append([r]) + else: + r,s,idx=h + R.append(r) + s,st=s + s=self.qstate(s,st) + + S.append(s) + + IDX.append(idx) + + S=np.array(S) + + R_=qnet.feed(S) + #R_*=1.2 + + for j in range(len(R_)): + + idx=IDX[j] + r=R[j][0] + #print(r-R_[j][idx]) + R_[j][idx]=r + + err+=qnet.train(S,R_) + + return err/float(train_steps)/float(self.nagents) \ No newline at end of file diff --git a/multiq/qnet.py b/multiq/qnet.py new file mode 100644 index 0000000..a87a817 --- /dev/null +++ b/multiq/qnet.py @@ -0,0 +1,59 @@ +import tensorflow as tf +import numpy as np +from matplotlib import pyplot as plt + +class net: + + def __init__(self,sess, size,lr=0.001): + + self.nlayers=len(size) + self.size=size + self.sess=sess + + self.x = tf.placeholder(tf.float32, [None, size[0]]) + + y=self.x + for i in range(self.nlayers-1): + W = tf.Variable(tf.random_normal([ size[i], size[i+1] ],stddev=0.1)) + b = tf.Variable(tf.random_normal([ size[i+1] ],stddev=0.1)) + y=tf.matmul(y, W) + b + #if ir_max: + r[-i][0]=r_max + return np.clip(r,-0.95,0.95) + + +episodeCount = 15000 # Number of learning episodes +STEPS=100 + +nagents=2 +RENDER=0 +TEST=0 +minr=1.0 +#TRAIN_POLICY_IDX=4 + +sim = RoverDomainGym(nagents,STEPS) +#mods.recipePoi(sim) +obs=sim.reset() +#obs=reduce_state(obs) + +sim.data["Coupling"]=1 +sim.data['Number of Agents']=nagents +obs_size=len(obs[0]) +act_size=2 +print(obs_size) + + +controller = multi(nagents,2,8) + +DATA = "save/0.txt" +DATA2= "save/0.pkl" +#DATA3= str(nagents)+"agent/"+argv[1]+".ckpt" + + +open(DATA, 'w').close() +max_score=-0.1 + +for episodeIndex in range(episodeCount): + if episodeIndex % 2 == 0: + controller.NOISE_RATIO=1.0 + else: + controller.NOISE_RATIO=0.0 + rewards=[] + actions=[[] for i in range(nagents)] + states =[[] for i in range(nagents)] + states_p=[[] for i in range(nagents)] +# for worldIndex in range(populationSize): + obs = sim.reset() + err=0.0 + R_=[] + done = False + stepCount = 0 + while not done: + # obs=reduce_state(obs) + obs=np.asarray(obs) + obs[:,:4]=0.0 + action=controller.act(obs) + + obs2, reward, done, info = sim.step(action) + #print(np.max(obs2[:,:4]),np.max(obs2[:,4:20])) + IndivReward(sim.data) + reward=sim.data["Agent Rewards"] + gr=sim.data["Global Reward"] + #if gr>0: print(gr,stepCount) + rewards.append([reward[0]]) + for a in range(nagents): + states[a].append( obs[a] ) + actions[a].append(action[a]) + states_p[a].append(obs2[a]) + + old_obs=obs + obs=obs2 + + stepCount += 1 + if (RENDER == True) and episodeIndex%100==0: + sim.render() + + if stepCount%5==0: + err_,r_=controller.learn() + err+=err_ + R_.append(r_) + + + score=max(rewards)[0] + rewards=np.array(rewards)/(float(nagents)) + rewards=discount(rewards,0.99) + m_r=max(rewards)[0] + + rewards=[rewards for i in range(nagents)] + + controller.store(states,actions,rewards,states_p)#,np.random.randint(0,nagents)) + + + #print(tr,episodeIndex, score,prob,err) + #controller.prent() + if score>max_score: + max_score=score + with open(DATA2, 'wb') as handle: + pickle.dump(np.asarray(sim.data["Agent Position History"]), handle, protocol=pickle.HIGHEST_PROTOCOL) + #pickle.dump(sim.data, handle, protocol=pickle.HIGHEST_PROTOCOL) + + with open(DATA, "a") as myfile: + log=[episodeIndex, score,err,m_r,max(R_)] + controller.idxs + myfile.write( ",".join([str(f) for f in log])) + print(log) + myfile.write('\n') + + + + diff --git a/renderer.py b/renderer.py new file mode 100644 index 0000000..4de6d28 --- /dev/null +++ b/renderer.py @@ -0,0 +1,66 @@ + + +import numpy as np +import matplotlib +import pickle +from rover_domain_core_gym import RoverDomainGym + +import matplotlib.pyplot as plt + +imageIndex=0 + +nagents=15 +sim = RoverDomainGym(nagents,100) + +def render(data): + global imageIndex + imageIndex+=1 + scale=.5 + + plt.ion() + + plt.clf() + + plt.title("Episode Index "+str(data["Episode Index"])) + + plt.xlim(-data["World Width"]*scale,data["World Width"]*(1.0+scale)) + plt.ylim(-data["World Length"]*scale,data["World Length"]*(1.0+scale)) + + plt.scatter(data["Agent Positions"][:,0],data["Agent Positions"][:,1]) + + + if ("Number of POI Types" in data): + + ntypes=data["Number of POI Types"] + xpoints=[[] for i in range(ntypes)] + ypoints=[[] for i in range(ntypes)] + for i in range(len(data["Poi Positions"])): + xpoints[i%ntypes].append(data["Poi Positions"][i,0]) + ypoints[i%ntypes].append(data["Poi Positions"][i,1]) + for i in range(ntypes): + plt.scatter(xpoints[i],ypoints[i],label=str(i)) + plt.legend() + + else: + print("Single") + + + #plt.savefig("ims/test"+str(imageIndex)+".png") + plt.draw() + plt.pause(1.0/30.0) + + +with open("save/0.pkl" , 'rb') as f: + data=pickle.load(f) + +for i in range(100): + plt.ion() + plt.clf() + plt.scatter(data[i,:,0],data[i,:,1]) + pois=sim.data["Poi Positions"] + npois=sim.data["Number of POIs"] + plt.scatter(pois[:,0],pois[:,1]) + plt.show() + plt.xlim((0,50)) + plt.ylim((0,50)) + plt.pause(1.0/30.0) diff --git a/rover_domain_core_gym.py b/rover_domain_core_gym.py index 8af5890..6f8c516 100644 --- a/rover_domain_core_gym.py +++ b/rover_domain_core_gym.py @@ -1,70 +1,83 @@ -# Dependencies: numpy, cython - -import datetime -from core import SimulationCore -import pyximport; pyximport.install() # For cython(pyx) code -from code.world_setup import * # Rover Domain Construction -from code.agent_domain_2 import * # Rover Domain Dynamic -from code.reward_2 import * # Agent Reward and Performance Recording -from code.trajectory_history import * # Record trajectory of agents for calculating rewards - - """ -Provides Open AI gym wrapper for rover domain selfulation core with some extra - gym-specific functionality. This is the gym equivalent to 'getSim()' in +Provides Open AI gym wrapper for rover domain simulation core with some extra + gym-specific functionality. This is the gym equivalent to 'getSim()' in the specific.py file. - + Get a default rover domain simulation with some default functionality. Users are encouraged to modify this function and save copies of it for each trial to use as a parameter reference. - + Set data["Reward Function"] to define the reward function callback Set data["Evaluation Function"] to define the evaluation function callback -Set data["Observation Function"] to define the observation funciton callback +Set data["Observation Function"] to define the observation function callback -Note: step function returns result of either the reward or evaluation function +Note: step function returns result of either the reward or evaluation function depending mode ("Train" vs "Test" respectively) -RoverDomainCoreGym should be mods +RoverDomainGym should be mods """ -class RoverDomainCoreGym(SimulationCore): - def __init__(self): + +from core import SimulationCore +import pyximport +import code.world_setup as world_setup # Rover Domain Construction +import code.agent_domain_2 as rover_domain # Rover Domain Dynamic +import code.reward_2 as rewards # Agent Reward and Performance Recording +from code.trajectory_history import * # Record trajectory of agents for calculating rewards + +pyximport.install() # For cython(pyx) code + +import matplotlib.pyplot as plt + +class RoverDomainGym(SimulationCore): + def __init__(self,nagent,nsteps,Pos,Vals): SimulationCore.__init__(self) - - self.data["Number of Agents"] = 30 - self.data["Number of POIs"] = 8 - self.data["Minimum Distance"] = 1.0 - self.data["Steps"] = 100 - self.data["Trains per Episode"] = 50 + + self.data["Number of Agents"] = nagent + self.data["Number of POIs"] = 6 + self.data["Minimum Distance"] = 4.0 + self.data["Steps"] = nsteps + self.data["Trains per Episode"] = 100 self.data["Tests per Episode"] = 1 self.data["Number of Episodes"] = 5000 self.data["Specifics Name"] = "test" self.data["Mod Name"] = "global" - + self.data["World Index"] = 0 + + self.data["Coupling"] = 3 + self.data["Observation Radius"] = 4.1 + # Add Rover Domain Construction Functionality # Note: reset() will generate random world based on seed - self.data["World Width"] = 50 - self.data["World Length"] = 50 - self.data['Poi Static Values'] = np.array([1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0]) - self.data['Poi Relative Static Positions'] = np.array([ - [0.0, 0.0], - [0.0, 1.0], - [1.0, 0.0], - [1.0, 1.0], - [1.0, 0.5], - [0.5, 1.0], - [0.0, 5.0], + self.data["World Width"] = 30 + self.data["World Length"] = 30 + self.data['Poi Static Values'] =Vals# np.array([0.1, 0.1, 0.5,0.3, 0.0, 0.0])#, 6.0, 7.0, 8.0]) + self.data['Poi Relative Static Positions'] = Pos + ''' + np.array([ + [0.0, 0.0], + [1.0, 1.0], + [0.0, 1.0], + [1.0, 0.5], + [0.0, 0.5], + [1.0, 0.0] + ]) + ''' + ''' + [1.0, 1.0], + [1.0, 0.5], + [0.5, 1.0], + [0.0, 0.5], [0.5, 0.0] ]) - self.data['Agent Initialization Size'] = 0.1 - self.trainBeginFuncCol.append(blueprintStatic) - self.trainBeginFuncCol.append(blueprintAgentInitSize) - self.worldTrainBeginFuncCol.append(initWorld) - self.testBeginFuncCol.append(blueprintStatic) - self.testBeginFuncCol.append(blueprintAgentInitSize) - self.worldTestBeginFuncCol.append(initWorld) - - + ''' + self.data['Agent Initialization Size'] = 0.4 + self.trainBeginFuncCol.append(world_setup.blueprintStatic) + self.trainBeginFuncCol.append(world_setup.blueprintAgentInitSize) + self.worldTrainBeginFuncCol.append(world_setup.initWorld) + self.testBeginFuncCol.append(world_setup.blueprintStatic) + self.testBeginFuncCol.append(world_setup.blueprintAgentInitSize) + self.worldTestBeginFuncCol.append(world_setup.initWorld) + # Add Rover Domain Dynamic Functionality """ step() parameter [action] (2d numpy array with double precision): @@ -81,12 +94,10 @@ def __init__(self): called automatically by this object, no need to call it in a function collection """ - self.data["Observation Function"] = doAgentSense - self.worldTrainStepFuncCol.append(doAgentMove) - self.worldTestStepFuncCol.append(doAgentMove) + self.data["Observation Function"] = rover_domain.doAgentSense + self.worldTrainStepFuncCol.append(rover_domain.doAgentMove) + self.worldTestStepFuncCol.append(rover_domain.doAgentMove) - - # Add Agent Training Reward and Evaluation Functionality """ Training Mode: @@ -98,18 +109,25 @@ def __init__(self): step() return [reward] (double): Performance defined by data["Evaluation Function"] """ - self.data["Coupling"] = 6 - self.data["Observation Radius"] = 4.0 - self.data["Reward Function"] = assignGlobalReward - self.data["Evaluation Function"] = assignGlobalReward + #self.data["Reward Function"] = rewards.assignDifferenceReward + #self.data["Evaluation Function"] = rewards.assignDifferenceReward + + self.data["Reward Function"] = rewards.assignGlobalReward + self.data["Evaluation Function"] = rewards.assignGlobalReward + self.worldTrainBeginFuncCol.append(createTrajectoryHistories) self.worldTrainStepFuncCol.append(updateTrajectoryHistories) self.worldTestBeginFuncCol.append(createTrajectoryHistories) self.worldTestStepFuncCol.append(updateTrajectoryHistories) - - self.worldTrainBeginFuncCol.append( - lambda data: data.update({"Gym Reward": np.zeros(data['Number of Agents'])}) + + # TODO make these be hidden class attributes, no reason to have them be lambdas + # TODO for what should be a fixed-environment scenario + self.worldTrainStepFuncCol.append( + lambda data: data["Reward Function"](data) + ) + self.worldTrainStepFuncCol.append( + lambda data: data.update({"Gym Reward": data["Agent Rewards"]}) ) self.worldTestBeginFuncCol.append( lambda data: data.update({"Gym Reward": 0}) @@ -124,12 +142,12 @@ def __init__(self): lambda data: data["Evaluation Function"](data) ) self.worldTestEndFuncCol.append( - lambda data: data.update({"Gym Reward": data["Global Reward"]}) - ) - + lambda data: data.update({"Gym Reward": data["Global Reward"]}) + ) + # Setup world for first time - self.reset(newMode = "Train", fullyResetting = True) - + self.reset(new_mode="Train", fully_resetting=True) + def step(self, action): """ Proceed 1 time step in world if world is not done @@ -150,10 +168,9 @@ def step(self, action): # Store Action for other functions to use self.data["Agent Actions"] = action - # If not done, do step functionality if self.data["Step Index"] < self.data["Steps"]: - + # Do Step Functionality self.data["Agent Actions"] = action if self.data["Mode"] == "Train": @@ -166,10 +183,12 @@ def step(self, action): raise Exception( 'data["Mode"] should be set to "Train" or "Test"' ) - + # Increment step index for future step() calls self.data["Step Index"] += 1 + + # Check is world is done; if so, do ending functions if self.data["Step Index"] >= self.data["Steps"]: if self.data["Mode"] == "Train": @@ -185,16 +204,16 @@ def step(self, action): # Observe state, store result in self.data self.data["Observation Function"](self.data) - + + # Check if simulation is done done = False if self.data["Step Index"] >= self.data["Steps"]: done = True - - return self.data["Agent Observations"], self.data["Gym Reward"], \ - done, self.data - - def reset(self, newMode = None, fullyResetting = False): + + return self.data["Agent Observations"], self.data["Gym Reward"], done, self.data + + def reset(self, new_mode=None, fully_resetting=False): """ Reset the world @@ -203,7 +222,7 @@ def reset(self, newMode = None, fullyResetting = False): training mode. Set to "Test" to enable functions associated with testing mode instead. If None, does not change current simulation mode. - fullyResetting (boolean): If true, do addition functions + fully_resetting (boolean): If true, do addition functions (self.trainBeginFuncCol) when setting up world. Typically used for resetting the world for a different episode and/or different training/testing simulation mode. @@ -214,31 +233,60 @@ def reset(self, newMode = None, fullyResetting = False): """ # Zero step index for future step() calls self.data["Step Index"] = 0 - + # Set mode if not None - if newMode != None: - self.data["Mode"] = newMode - + if new_mode is not None: + self.data["Mode"] = new_mode + # Execute setting functionality if self.data["Mode"] == "Train": - if fullyResetting: + if fully_resetting: for func in self.trainBeginFuncCol: func(self.data) for func in self.worldTrainBeginFuncCol: func(self.data) elif self.data["Mode"] == "Test": - if fullyResetting: + if fully_resetting: for func in self.testBeginFuncCol: func(self.data) for func in self.worldTestBeginFuncCol: func(self.data) else: raise Exception('data["Mode"] should be set to "Train" or "Test"') - + # Observe state, store result in self.data self.data["Observation Function"](self.data) - + return self.data["Agent Observations"] -def assign(data, key, value): - data[key] = value \ No newline at end of file + def render(self): + scale=.5 + nPois= self.data["Number of POIs"]//2 + if (self.data["World Index"] ==0): + plt.ion() + plt.clf() + plt.xlim(-self.data["World Width"]*scale,self.data["World Width"]*(1.0+scale)) + plt.ylim(-self.data["World Length"]*scale,self.data["World Length"]*(1.0+scale)) + + plt.scatter(self.data["Agent Positions"][:,0],self.data["Agent Positions"][:,1]) + + if ("Sequential" in self.data and self.data["Sequential"]): + plt.scatter(self.data["Poi Positions"][nPois:,0],self.data["Poi Positions"][nPois:,1]) + plt.scatter(self.data["Poi Positions"][:nPois,0],self.data["Poi Positions"][:nPois,1]) + elif ("Number of POI Types" in self.data): + + ntypes=self.data["Number of POI Types"] + xpoints=[[] for i in range(ntypes)] + ypoints=[[] for i in range(ntypes)] + for i in range(len(self.data["Poi Positions"])): + xpoints[i%ntypes].append(self.data["Poi Positions"][i,0]) + ypoints[i%ntypes].append(self.data["Poi Positions"][i,1]) + for i in range(ntypes): + plt.scatter(xpoints[i],ypoints[i],label=str(i)) + + + else: + plt.scatter(self.data["Poi Positions"][:,0],self.data["Poi Positions"][:,1]) + + plt.draw() + plt.pause(1.0/30.0) diff --git a/teaming/__init__.py b/teaming/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/teaming/ccea.pyx b/teaming/ccea.pyx new file mode 100644 index 0000000..1ad9152 --- /dev/null +++ b/teaming/ccea.pyx @@ -0,0 +1,260 @@ +import numpy as np +import random +cimport cython + +cdef extern from "math.h": + double tanh(double m) + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +cdef mul(double[:, :] mat, double[:] vec, double[:] out): + cdef int colIndex, rowIndex + cdef double sum = 0 + for rowIndex in range(mat.shape[0]): + sum = 0 + for colIndex in range(mat.shape[1]): + sum += mat[rowIndex, colIndex] * vec[colIndex] + out[rowIndex] = sum + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +cdef addInPlace(double[:] vec, double[:] other): + cdef int index + for index in range(vec.shape[0]): + vec[index] += other[index] + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +cdef addInPlaceMat(double[:,:] mat, double[:,:] other): + cdef int colIndex, rowIndex + for rowIndex in range(mat.shape[0]): + for colIndex in range(mat.shape[1]): + mat[rowIndex, colIndex] += other[rowIndex, colIndex] + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +cdef tanhInPlace(double[:] vec): + cdef int index + for index in range(vec.shape[0]): + vec[index] = tanh(vec[index]) + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +cdef reluInPlace(double[:] vec): + cdef int index + for index in range(vec.shape[0]): + vec[index] = vec[index] * (vec[index] > 0) + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +cdef mutate(double[:] vec, double m, double mr): + shape = [vec.shape[0]] + npMutation = np.random.standard_cauchy(shape) + npMutation *= np.random.uniform(0, 1, shape) < mr + cdef double[:] mutation = npMutation + addInPlace(vec, mutation) + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +cdef mutateMat(double[:,:] mat, double m, double mr): + shape = [mat.shape[0], mat.shape[1]] + npMutation = m * np.random.standard_cauchy(shape) + npMutation *= np.random.uniform(0, 1, shape) < mr + cdef double[:,:] mutation = npMutation + addInPlaceMat(mat, mutation) + +@cython.auto_pickle(True) +cdef class Evo_MLP: + cdef public double[:,:] inToHiddenMat + cdef public double[:] inToHiddenBias + cdef public double[:,:] hiddenToOutMat + cdef public double[:] hiddenToOutBias + cdef public double[:] hidden + cdef public double[:] out + cdef public object npInToHiddenMat + cdef public object npInToHiddenBias + cdef public object npHiddenToOutMat + cdef public object npHiddenToOutBias + cdef public object npHidden + cdef public object npOut + cdef public int input_shape + cdef public int num_outputs + cdef public int num_units + cdef public double fitness + + def __init__(self, input_shape, num_outputs, num_units=16): + self.input_shape = input_shape + self.num_outputs = num_outputs + self.num_units = num_units + self.fitness = 0 + + # XAVIER INITIALIZATION + stdev = (3/ input_shape) ** 0.5 + self.npInToHiddenMat = np.random.uniform(-stdev, stdev, (num_units, input_shape)) + self.npInToHiddenBias = np.random.uniform(-stdev, stdev, num_units) + stdev = (3/ num_units) ** 0.5 + self.npHiddenToOutMat = np.random.uniform(-stdev, stdev, (num_outputs, num_units)) + self.npHiddenToOutBias = np.random.uniform(-stdev, stdev, num_outputs) + + self.npHidden = np.zeros(num_units) + self.npOut = np.zeros(num_outputs) + + self.inToHiddenMat = self.npInToHiddenMat + self.inToHiddenBias = self.npInToHiddenBias + self.hiddenToOutMat = self.npHiddenToOutMat + self.hiddenToOutBias = self.npHiddenToOutBias + self.hidden = self.npHidden + self.out = self.npOut + def __getstate__(self): + return (self.inToHiddenMat.base, + self.inToHiddenBias.base, + self.hiddenToOutMat.base, + self.hiddenToOutBias.base, + self.hidden.base, + self.out.base, + self.input_shape, + self.num_outputs, + self.num_units + ) + def __setstate__(self,x): + self.inToHiddenMat ,self.inToHiddenBias,self.hiddenToOutMat,self.hiddenToOutBias, self.hidden,self.out,self.input_shape,self.num_outputs,self.num_units = x + cpdef get_action(self, double[:] state): + mul(self.inToHiddenMat, state, self.hidden) + addInPlace(self.hidden, self.inToHiddenBias) + reluInPlace(self.hidden) + mul(self.hiddenToOutMat, self.hidden, self.out) + addInPlace(self.out, self.hiddenToOutBias) + tanhInPlace(self.out) + return np.argmax(self.npOut) #change + + cpdef mutate(self): + cdef double m = 1 + cdef double mr = 0.01 + mutateMat(self.inToHiddenMat, m, mr) + mutate(self.inToHiddenBias, m, mr) + mutateMat(self.hiddenToOutMat, m, mr) + mutate(self.hiddenToOutBias, m, mr) + + + cpdef copyFrom(self, other): + self.input_shape = other.input_shape + self.num_outputs = other.num_outputs + self.num_units = other.num_units + + cdef double[:,:] newInToHiddenMat = other.npInToHiddenMat + self.inToHiddenMat[:] = newInToHiddenMat + cdef double[:] newInToHiddenBias = other.npInToHiddenBias + self.inToHiddenBias[:] = newInToHiddenBias + cdef double[:,:] newHiddenToOutMat = other.npHiddenToOutMat + self.hiddenToOutMat[:] = newHiddenToOutMat + cdef double[:] newHiddenToOutBias = other.npHiddenToOutBias + self.hiddenToOutBias[:] = newHiddenToOutBias + + + + +def initCcea(input_shape, num_outputs, num_units=16): + def initCceaGo(data): + number_agents = data['Number of Agents'] + + populationCol = [[Evo_MLP(input_shape,num_outputs,num_units) for i in range(data['Trains per Episode'])] for j in range(number_agents)] + data['Agent Populations'] = populationCol + return initCceaGo + +def initCcea2(input_shape, num_outputs, num_units=16): + def initCceaGo(data): + number_agents = data['Number of Agents'] + policyCount = data['Number of Policies'] + populationCol = [[Evo_MLP(input_shape,num_outputs,num_units) for i in range(policyCount)] for j in range(number_agents)] + data['Agent Populations'] = populationCol + return initCceaGo + +def clearFitness(data): + populationCol = data['Agent Populations'] + number_agents = data['Number of Agents'] + + for agentIndex in range(number_agents): + for policy in populationCol[agentIndex]: + policy.fitness = 0 + +def assignCceaPolicies(data): + number_agents = data['Number of Agents'] + populationCol = data['Agent Populations'] + worldIndex = data["World Index"] + policyCol = [None] * number_agents + for agentIndex in range(number_agents): + policyCol[agentIndex] = populationCol[agentIndex][worldIndex] + data["Agent Policies"] = policyCol + +def assignCceaPolicies2(data): + number_agents = data['Number of Agents'] + populationCol = data['Agent Populations'] + worldIndex = data["World Index"] + policyCount = len(populationCol[0]) + policyCol = [None] * number_agents + for agentIndex in range(number_agents): + policyCol[agentIndex] = populationCol[agentIndex][worldIndex % policyCount] + data["Agent Policies"] = policyCol + +def assignBestCceaPolicies(data): + number_agents = data['Number of Agents'] + populationCol = data['Agent Populations'] + policyCol = [None] * number_agents + for agentIndex in range(number_agents): + policyCol[agentIndex] = max(populationCol[agentIndex], key = lambda policy: policy.fitness) + #policyCol[agentIndex] = populationCol[agentIndex][0] + data["Agent Policies"] = policyCol + +def assignCceaPoliciesHOF(data): + assignBestCceaPolicies(data) + curAgent = data["Current Agent"] + worldIndex = data["World Index"] + populationCol = data['Agent Populations'] + + data["Agent Policies"][curAgent] = populationCol[curAgent][worldIndex] + +def rewardCceaPolicies(data): + policyCol = data["Agent Policies"] + number_agents = data['Number of Agents'] + rewardCol = data["Agent Rewards"] + for agentIndex in range(number_agents): + policyCol[agentIndex].fitness = rewardCol[agentIndex] + +def rewardCceaPoliciesHOF(data,G): + policyCol = data["Agent Policies"] + curAgent = data["Current Agent"] + number_agents = data['Number of Agents'] + rewardCol = data["Agent Rewards"] + + policyCol[curAgent].fitness = G + +def rewardCceaPolicies2(data): + policyCol = data["Agent Policies"] + number_agents = data['Number of Agents'] + rewardCol = data["Agent Rewards"] + for agentIndex in range(number_agents): + policyCol[agentIndex].fitness += rewardCol[agentIndex] + +cpdef evolveCceaPolicies(data): + cdef int number_agents = data['Number of Agents'] + populationCol = data['Agent Populations'] + cdef int agentIndex, matchIndex, halfPopLen + halfPopLen = int(len(populationCol[0])//2) + for agentIndex in range(number_agents): + population = populationCol[agentIndex] + + # Binary Tournament, replace loser with copy of winner, then mutate copy + for matchIndex in range(halfPopLen): + + if population[2 * matchIndex].fitness > population[2 * matchIndex + 1].fitness: + population[2 * matchIndex + 1].copyFrom(population[2 * matchIndex]) + else: + population[2 * matchIndex].copyFrom(population[2 * matchIndex + 1]) + + population[2 * matchIndex + 1].mutate() + + random.shuffle(population) + data['Agent Populations'][agentIndex] = population + + + \ No newline at end of file diff --git a/teaming/ddpg.py b/teaming/ddpg.py new file mode 100644 index 0000000..030be60 --- /dev/null +++ b/teaming/ddpg.py @@ -0,0 +1,244 @@ +import tensorflow as tf +import numpy as np +from collections import deque +from random import sample + +class noise: + def __init__(self,size,th=.3*2,sig=.15*2,mu=0.0,dt=1e-2): + self.size=size + + self.th=th + self.dt=dt + self.sig=sig + self.mu=mu + self.reset() + + def sample(self): + + #return 0.2*np.random.normal(size=self.size) + self.state=self.state \ + + self.th*(self.mu-self.state)*self.dt \ + + self.sig*(self.dt**0.5)*np.random.normal(size=self.size) + out=self.state.copy() + #out[0]+=1 # forward vel + out[1]*=0.5 # angled vel + return out + + def reset(self): + self.state=np.zeros(self.size) + + +class agent: + def __init__(self, sess, s_dim, a_dim, hc_dim,ha_dim, lr, batch,gamma,dueling=0): + self.sess=sess + self.tau=0.001 + self.s_dim=s_dim + self.a_dim=a_dim + self.batch=batch + self.gamma=gamma + self.dueling=dueling + + self.base_line=False + + self.var=0.05 + self.a_hidden=ha_dim + self.c_hidden=hc_dim + + self.s=tf.placeholder(tf.float32,[None,s_dim]) + self.a=tf.placeholder(tf.float32,[None,a_dim]) + self.r=tf.placeholder(tf.float32,[None,1]) + + self.hist=[deque(maxlen=100000) for i in range(2)] + + self.actor0, self.a_params0 = self.gen_actor() + self.critic0,self.c_params0 = self.gen_critic() + + + self.actor1, self.a_params1 = self.gen_actor() + self.critic1,self.c_params1 = self.gen_critic() + + + self.tactor0, self.ta_params0 = self.gen_actor() + self.tcritic0,self.tc_params0 = self.gen_critic() + + + self.tactor1, self.ta_params1 = self.gen_actor() + self.tcritic1,self.tc_params1 = self.gen_critic() + + self.actor, self.a_params = [self.actor0,self.actor1], [self.a_params0, self.a_params1] + self.critic,self.c_params = [self.critic0,self.critic1],[self.c_params0,self.c_params1] + + self.tactor, self.ta_params = [self.tactor0,self.tactor1], [self.ta_params0, self.ta_params1] + self.tcritic,self.tc_params = [self.tcritic0,self.tcritic1],[self.tc_params0,self.tc_params1] + + if self.dueling: + self.action_grads = [tf.gradients(self.critic[i]-self.critic[j], self.a) for i,j in [[0,1],[1,0]] ] + else: + self.action_grads = [tf.gradients(self.critic[i], self.a) for i in range(2) ] + + + self.a_grad= tf.placeholder(tf.float32, [None, self.a_dim]) + + self.actor_gradients_ = [tf.gradients(self.actor[i], self.a_params[i], -self.a_grad) for i in range(2) ] + self.actor_gradients = [list(map(lambda x: tf.div(x, self.batch), self.actor_gradients_[i])) for i in range(2)] + + + self.a_opt = [tf.train.AdamOptimizer(lr*0.1).apply_gradients(zip(self.actor_gradients[i], self.a_params[i])) for i in range(2)] + + + self.loss = [tf.losses.mean_squared_error(self.critic[i],self.r) for i in range(2)] + self.c_opt = [tf.train.AdamOptimizer(lr).minimize(self.loss[i]) for i in range(2)] + + self.update_ta = \ + [[self.ta_params[j][i].assign(tf.multiply(self.a_params[j][i], self.tau) + \ + tf.multiply(self.ta_params[j][i], 1. - self.tau)) + for i in range(len(self.a_params[0]))] for j in range(2)] + + self.update_tc = \ + [[self.tc_params[j][i].assign(tf.multiply(self.c_params[j][i], self.tau) + \ + tf.multiply(self.tc_params[j][i], 1. - self.tau)) + for i in range(len(self.c_params[0]))] for j in range(2)] + + def activate(self,x): + return tf.nn.tanh(x) + + def gen_critic(self): + with tf.name_scope("critic") as scope: + w1=tf.Variable(tf.random_normal([self.s_dim, self.c_hidden],stddev=self.var)) + b1=tf.Variable(tf.random_normal([self.c_hidden],stddev=self.var)) + netc=tf.matmul(self.s,w1)+b1 + + w2=tf.Variable(tf.random_normal([self.a_dim, self.c_hidden],stddev=self.var)) + #b2=tf.Variable(tf.random_normal([self.c_hidden],stddev=self.var)) + neta=tf.matmul(self.a,w2) + + net=self.activate(neta+netc) + + + + w3=tf.Variable(tf.random_normal([self.c_hidden,1],stddev=self.var)) + b3=tf.Variable(tf.random_normal([1],stddev=self.var)) + #net=self.activate(tf.matmul(net,w3)+b3) + net=tf.matmul(net,w3)+b3 + return net, [w1,w2,w3,b1,b3] + + + def gen_actor(self): + with tf.name_scope("actor") as scope: + w1=tf.Variable(tf.random_normal([self.s_dim, self.a_hidden],stddev=self.var)) + b1=tf.Variable(tf.random_normal([self.a_hidden],stddev=self.var)) + #net=self.activate(tf.matmul(self.s,w1)+b1) + + net=self.activate(tf.matmul(self.s,w1)+b1) + + w2=tf.Variable(tf.random_normal([self.a_hidden,self.a_dim],stddev=self.var)) + b2=tf.Variable(tf.random_normal([self.a_dim],stddev=self.var)) + net=self.activate(tf.matmul(net,w2)+b2) + + return net, [w1,w2,b1,b2] + + def store(self,s,a,r,sp,done,idx): + self.hist[idx].append([s,a,r,sp,done]) + #for h in zip(s,a,r,sp,done): + # self.hist[idx].append(h) + + def actor_train(self,s,a,idx): + grads=self.sess.run(self.action_grads[idx],feed_dict={self.s:s,self.a:a}) + #print(grads) + self.sess.run(self.a_opt[idx],feed_dict={self.s:s,self.a_grad:grads[0]}) + + def critic_train(self,s,a,r,idx): + _,loss = self.sess.run([self.c_opt[idx],self.loss[idx]],feed_dict={self.s:s,self.a:a,self.r:r}) + return loss + + def train_all(self,idx): + if len(self.hist[idx])np.pi: + trn-=2*np.pi + if trn<-np.pi: + trn+=2*np.pi + + spd=1.7#min(2.0,dst) + + a=[spd,trn] + A_.append(a) + + return A_ + + + def test(self,env,itrs=50): + print("test") + old_team=self.team + ''' + if self.dueling: + idxs=[episode%2]*self.types + #idxs=np.random.randint(2,size=self.types) #which of the pair of policies to use + else: + ''' + # + self.log.clear("position") + self.log.clear("types") + + self.log.clear("poi") + self.log.store("poi",np.array(env.data["Poi Positions"])) + self.log.clear("poi vals") + self.log.store("poi vals",np.array(env.data['Poi Static Values'])) + Rs=[] + teams=self.all_teams(self.nagents) + + for i in range(len(teams)): + + team=np.array(teams[i]) + + self.team=team + #for i in range(itrs): + + #self.randomize() + s=env.reset() + done=False + R=[] + i=0 + self.log.store("types",self.team.copy(),i) + + while not done: + + self.log.store("position",np.array(env.data["Agent Positions"]),i) + if i%self.pol_freq==0: + a=self.act(s,False) + + action=self.idx2a(env,a) + sp, r, done, info = env.step(action) + R.append(r[0]) + g=env.data["Global Reward"] + s=sp + i+=1 + Rs.append(g) + self.log.store("test",Rs) + + self.team=old_team + + def quick(self,env,episode,render=False): + s=env.reset() + + for i in range(100): + a=[[0,0] for i in range(self.nagents)] + sp, r, done, info = env.step(a) + return [0.0] + + def all_teams(self,k): + teams=[] + for i in range(k+1): + for j in range(k-i+1): + team=[0]*i+[1]*j+[2]*(k-j-i) + teams.append(team) + #print(teams) + return teams \ No newline at end of file diff --git a/teaming/learner3.py b/teaming/learner3.py new file mode 100644 index 0000000..76ab098 --- /dev/null +++ b/teaming/learner3.py @@ -0,0 +1,213 @@ +import numpy as np +#import tensorflow as tf +import numpy as np + + +from .logger import logger +import pyximport +from .ccea import * + +class learner: + def __init__(self,team,sess,sim): + self.log=logger() + self.nagents=len(team) + + self.itr=0 + self.update_freq=20 + self.pol_freq=50 + self.types=max(team)+1 + self.team=team + self.team_trials=1 + self.every_team=self.all_teams(self.nagents) + initCcea(input_shape=8, num_outputs=6, num_units=20)(sim.data) + #self.agents=[net(sess,[8,20,6], 0.0005) for i in range(self.types)] + + def act(self,S,data,trial): + policyCol=data["Agent Policies"] + A=[] + for s,t in zip(S,self.team[trial]): + + a = policyCol[t].get_action(s) + + A.append(a) + return A + + def store(self,S,A,R): + for s,a,t,r in zip(S,A,self.team,R): + + self.agents[t].store(s,a,r) + + def learn(self,share=False): + loss=[] + for i in range(self.types): + + L=self.agents[i].batch_train() + loss.append(L) + if share: + L=self.agents[i-1].batch_train(self.agents[i].buffer) + loss.append(L) + return loss + + def randomize(self): + length=len(self.every_team) + teams=[] + for i in range(self.team_trials): + idx=np.random.choice(length) + teams.append(self.every_team[idx].copy()) + self.team=teams + #self.team=np.random.randint(0,self.types,self.nagents) + + def save(self,fname="log.pkl"): + print("saved") + self.log.save(fname) + + def run(self,env,episode,render=False): + populationSize=len(env.data['Agent Populations'][0]) + for worldIndex in range(populationSize): + env.data["World Index"]=worldIndex + + for agent_idx in range(self.types): + G=[] + for trial in range(self.team_trials): + env.data["Current Agent"]=agent_idx #another loop needed + + s = env.reset() + + assignCceaPoliciesHOF(env.data) + #mods.assignHomogeneousPolicy(sim) + + done=False + + #self.log.clear("position") + i=0 + while not done: + #self.log.store("position",np.array(env.data["Agent Positions"]),episode) + self.itr+=1 + + + + if i%self.pol_freq==0: + #print(i) + a=self.act(s,env.data,trial) + #print(a) + + action=self.idx2a(env,a) + + sp, r, done, info = env.step(action) + if r[0]==0: + r-=0.0 + #r=r[0] + g=env.data["Global Reward"] + + s=sp + + if render: + env.render() + + G.append(g) + + rewardCceaPoliciesHOF(env.data,sum(G)) + evolveCceaPolicies(env.data) + + self.log.store("reward",g) + return 0,g + def put(self,key,data): + self.log.store(key,data) + + def idx2a(self,env,idx): + A_=[] + for j in range(self.nagents): + i=idx[j] + + loc=env.data["Poi Positions"][i] + ang=env.data["Agent Orientations"][j] + pos=env.data["Agent Positions"][j] + + heading=[loc[0]-pos[0],loc[1]-pos[1]] + dst=(heading[0]**2.0+heading[1]**2.0)**0.5 + #trn=np.arccos( (heading[0]*ang[0]+heading[1]*ang[1])/( np.sqrt(heading[0]**2+heading[1]**2))* np.sqrt(ang[0]**2+ang[1]**2) ) + + trn= np.arctan2( heading[1], heading[0] ) - np.arctan2(ang[1],ang[0]) + + if trn>np.pi: + trn-=2*np.pi + if trn<-np.pi: + trn+=2*np.pi + + spd=1.7#min(2.0,dst) + + a=[spd,trn] + A_.append(a) + + return A_ + + + def test(self,env,itrs=50): + print("test") + old_team=self.team + ''' + if self.dueling: + idxs=[episode%2]*self.types + #idxs=np.random.randint(2,size=self.types) #which of the pair of policies to use + else: + ''' + # + assignBestCceaPolicies(env.data) + + self.log.clear("position") + self.log.clear("types") + + self.log.clear("poi") + self.log.store("poi",np.array(env.data["Poi Positions"])) + self.log.clear("poi vals") + self.log.store("poi vals",np.array(env.data['Poi Static Values'])) + Rs=[] + teams=self.all_teams(self.nagents) + + for i in range(len(teams)): + + team=np.array(teams[i]) + + self.team=[team] + #for i in range(itrs): + + #self.randomize() + s=env.reset() + done=False + R=[] + i=0 + self.log.store("types",self.team[0].copy(),i) + + while not done: + + self.log.store("position",np.array(env.data["Agent Positions"]),i) + if i%self.pol_freq==0: + a=self.act(s,env.data,0) + + action=self.idx2a(env,a) + sp, r, done, info = env.step(action) + R.append(r[0]) + g=env.data["Global Reward"] + s=sp + i+=1 + Rs.append(g) + self.log.store("test",Rs) + + self.team=old_team + + def quick(self,env,episode,render=False): + s=env.reset() + + for i in range(100): + a=[[0,0] for i in range(self.nagents)] + sp, r, done, info = env.step(a) + return [0.0] + + def all_teams(self,k): + teams=[] + for i in range(k+1): + for j in range(k-i+1): + team=[0]*i+[1]*j+[2]*(k-j-i) + teams.append(team) + #print(teams) + return teams \ No newline at end of file diff --git a/teaming/logger.py b/teaming/logger.py new file mode 100644 index 0000000..285f9a8 --- /dev/null +++ b/teaming/logger.py @@ -0,0 +1,35 @@ +import pickle as pkl +import gzip + +class logger: + def __init__(self): + self.data={} + + def store(self,tag,data,idx=None): + + if not tag in self.data: + self.data[tag]=[] + + if idx is None: + self.data[tag].append(data) + elif idx<0: + self.data[tag]=data + else: + if len(self.data[tag])<=idx: + self.data[tag].append([]) + #print(idx,self.data[tag]) + self.data[tag][idx].append(data) + + + def pull(self,tag): + return self.data[tag] + + def clear(self,tag): + self.data[tag]=[] + + def save(self,fname): + with gzip.open(fname,"wb") as f: + pkl.dump( self.data,f) + def load(self,fname): + with gzip.open(fname,"rb") as f: + self.data=pkl.load(f) \ No newline at end of file diff --git a/teaming/qnet.py b/teaming/qnet.py new file mode 100644 index 0000000..4bbe892 --- /dev/null +++ b/teaming/qnet.py @@ -0,0 +1,95 @@ +import tensorflow as tf +import numpy as np +from matplotlib import pyplot as plt +from collections import deque +from random import sample +class net: + + def __init__(self,sess, size,lr=0.0001): + + self.nlayers=len(size) + self.size=size + self.sess=sess + self.epsilon=0.1 + self.nactions=size[-1] + self.buffer=deque(maxlen=100000) + self.batch=32 + + + self.x = tf.compat.v1.placeholder(tf.float32, [None, size[0]]) + + y=self.x + for i in range(self.nlayers-1): + W = tf.Variable(tf.compat.v1.random_normal([ size[i], size[i+1] ],stddev=0.1)) + b = tf.Variable(tf.compat.v1.random_normal([ size[i+1] ],stddev=0.1)) + y=tf.matmul(y, W) + b + #if i0.9 or (TRAIN_POLICY_IDX == 4 and score>-2.5): + + controller.save('saves/genetic.pickle',TRAIN_POLICY_IDX) + controller.policy_train(rewards,TRAIN_POLICY_IDX) + + + diff --git a/thesis2.py b/thesis2.py new file mode 100644 index 0000000..f5df425 --- /dev/null +++ b/thesis2.py @@ -0,0 +1,126 @@ +""" +An example using the rover domain gym-style interface and the standard, included CCEA learning algorithms. +This is a minimal example, showing the minimal Gym interface. +""" +import numpy as np + +from rover_domain_core_gym import RoverDomainGym +import code.ccea_2 as ccea +import code.agent_domain_2 as domain +import mods +from multiq.learner import learner +from sys import argv +import pickle + +#heap +#paralell q + +def discount(r,gamma): + length=len(r) + for i in range(2,length+1): + r[-i][0]+=r[-i+1][0]*gamma + return r + +def reduce_state(state): + state=np.asarray(state) + return state[:,-4:] + +episodeCount = 15000 # Number of learning episodes +populationSize = 50 +nagents=8 +RENDER=0 +TEST=0 +minr=0.05 +#TRAIN_POLICY_IDX=4 + +sim = RoverDomainGym(nagents,100) +mods.recipePoi(sim) +obs=sim.reset() +#obs=reduce_state(obs) + +sim.data["Coupling"]=3 +sim.data['Number of Agents']=nagents +obs_size=len(obs[0]) +act_size=4 +print(obs_size) +nPolicies=4 + + +controller = learner(nagents,obs_size,act_size, nPolicies,populationSize,minr) + +fname="saves/qnet.pickle" +DATA = str(nagents)+"agent/data"+argv[1]+".txt" +DATA2= str(nagents)+"agent/data"+argv[1]+".pkl" +DATA3= str(nagents)+"agent/data"+argv[1]+".ckpt" + + + +for idx in range(nPolicies): + controller.load('saves/genetic.pickle',idx) + +open(DATA, 'w').close() +max_score=0.0 + +for episodeIndex in range(episodeCount): + rewards=[] + actions=[[] for i in range(nagents)] + states =[[] for i in range(nagents)] +# for worldIndex in range(populationSize): + obs = sim.reset() + + done = False + stepCount = 0 + while not done: + # obs=reduce_state(obs) + obs=np.asarray(obs) + if TEST: + if stepCount >0: + controller.idxs=[0 for i in range(12)] + if stepCount >50: + controller.idxs=[1 for i in range(12)] + if stepCount >100: + controller.idxs=[2 for i in range(12)] + if stepCount >150: + controller.idxs=[3 for i in range(12)] + + obs_ = tr=np.sum(obs[:,-4:],axis=0)/nagents + jointAction,idxs=controller.action2(obs,obs_) + + obs2, reward, done, info = sim.step(jointAction) + #print(np.max(obs2[:,:4]),np.max(obs2[:,4:20])) + + + rewards.append([reward[0]]) + for a in range(nagents): + states[a].append( (obs[a],obs_) ) + actions[a].append(idxs[a]) + + old_obs=obs + obs=obs2 + + stepCount += 1 + if (RENDER == True) and episodeIndex%100==0: + sim.render() + + tr=np.sum(old_obs[:,-4:],axis=0) + rewards=discount(rewards,1.0) + rewards=np.array(rewards)/(float(nagents*6)) +.01 + score,prob=rewards[-1][0],controller.rand_prob + rewards=[rewards for i in range(nagents)] + + err=controller.train2(states,actions,rewards,5)#,np.random.randint(0,nagents)) + print(tr,episodeIndex, score,prob,err) + #controller.prent() + if score>max_score: + max_score=score + with open(DATA2, 'wb') as handle: + pickle.dump(np.asarray(sim.data["Agent Position History"]), handle, protocol=pickle.HIGHEST_PROTOCOL) + controller.saveq(DATA3) + + with open(DATA, "a") as myfile: + myfile.write( ",".join([str(f) for f in [episodeIndex, score,prob,err]])) + myfile.write('\n') + + + + diff --git a/thesis3.py b/thesis3.py new file mode 100644 index 0000000..94206f1 --- /dev/null +++ b/thesis3.py @@ -0,0 +1,127 @@ +""" +An example using the rover domain gym-style interface and the standard, included CCEA learning algorithms. +This is a minimal example, showing the minimal Gym interface. +""" +import numpy as np + +from rover_domain_core_gym import RoverDomainGym +import code.ccea_2 as ccea +import code.agent_domain_2 as domain +import mods +from multiq.learner import learner +from sys import argv +import pickle + +#heap +#paralell q + +def discount(r,gamma): + length=len(r) + for i in range(2,length+1): + r[-i][0]+=r[-i+1][0]*gamma + return r + +def reduce_state(state): + state=np.asarray(state) + return state[:,-4:] + +episodeCount = 15000 # Number of learning episodes +populationSize = 50 +nagents=8 +RENDER=0 +TEST=0 +minr=1.0 +#TRAIN_POLICY_IDX=4 + +sim = RoverDomainGym(nagents,100) +mods.recipePoi(sim) +obs=sim.reset() +#obs=reduce_state(obs) + +sim.data["Coupling"]=3 +sim.data['Number of Agents']=nagents +obs_size=len(obs[0]) +act_size=4 +print(obs_size) +nPolicies=4 + + +controller = learner(nagents,obs_size,act_size, nPolicies,populationSize,minr) + +fname="saves/qnet.pickle" +DATA = str(nagents)+"agent/bad"+argv[1]+".txt" +DATA2= str(nagents)+"agent/bad"+argv[1]+".pkl" +DATA3= str(nagents)+"agent/bad"+argv[1]+".ckpt" + + + +for idx in range(nPolicies): + controller.load('saves/genetic.pickle',idx) + +open(DATA, 'w').close() +max_score=0.0 + +for episodeIndex in range(episodeCount): + rewards=[] + actions=[[] for i in range(nagents)] + states =[[] for i in range(nagents)] +# for worldIndex in range(populationSize): + obs = sim.reset() + + done = False + stepCount = 0 + while not done: + # obs=reduce_state(obs) + obs=np.asarray(obs) + if TEST: + if stepCount >0: + controller.idxs=[0 for i in range(12)] + if stepCount >50: + controller.idxs=[1 for i in range(12)] + if stepCount >100: + controller.idxs=[2 for i in range(12)] + if stepCount >150: + controller.idxs=[3 for i in range(12)] + + obs_ = tr=np.sum(obs[:,-4:],axis=0)/nagents + jointAction,idxs=controller.action2(obs,obs_) + + obs2, reward, done, info = sim.step(jointAction) + #print(np.max(obs2[:,:4]),np.max(obs2[:,4:20])) + + + rewards.append([reward[0]]) + for a in range(nagents): + states[a].append( (obs[a],obs_) ) + actions[a].append(idxs[a]) + + old_obs=obs + obs=obs2 + + stepCount += 1 + if (RENDER == True) and episodeIndex%100==0: + sim.render() + + tr=np.sum(old_obs[:,-4:],axis=0) + rewards=discount(rewards,1.0) + rewards=np.array(rewards)/(float(nagents*6)) +.01 + score,prob=rewards[-1][0],controller.rand_prob + rewards=[rewards for i in range(nagents)] + + #err=controller.train2(states,actions,rewards,5)#,np.random.randint(0,nagents)) + err=0.0 + print(tr,episodeIndex, score,prob,err) + #controller.prent() + if score>max_score: + max_score=score + with open(DATA2, 'wb') as handle: + pickle.dump(np.asarray(sim.data["Agent Position History"]), handle, protocol=pickle.HIGHEST_PROTOCOL) + controller.saveq(DATA3) + + with open(DATA, "a") as myfile: + myfile.write( ",".join([str(f) for f in [episodeIndex, score,prob,err]])) + myfile.write('\n') + + + + diff --git a/transfer/__init__.py b/transfer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/transfer/agent.py b/transfer/agent.py new file mode 100644 index 0000000..1e58553 --- /dev/null +++ b/transfer/agent.py @@ -0,0 +1,65 @@ +import numpy as np +import tensorflow as tf +from .ddpg import noise,agent +#from .ddpg2 import agent + +class multi: + + def __init__(self,n,a,s,rand=True): + BATCH=32 + LR=1e-3 + self.epsilon=0.15 + self.NOISE_RATIO=1.0 + + self.sess=tf.InteractiveSession() + + self.rand=rand + self.n_agents=n + self.noise=[noise(a) for i in range(n)] + self.agents=[agent(self.sess,s,a,LR,BATCH) for i in range(n)] + + self.sess.run(tf.global_variables_initializer()) + self.set_idxs() + + def set_idxs(self): + for n in self.noise: + n.reset() + self.idxs=[] + for i in range(self.n_agents): + if np.random.random()BEST: + BEST=summ + player.save("save/test0") + R=disc(R,.99,100.0) + + print(i,round(R[0][0],3),round(R[-1][0],3),round(summ,3),j) + R_hist.append(summ) + if len(L)==0: L=[1.0] + L_hist.append(np.mean(L)) + break + if j%5: + l=player.train_all() + if l!=None: + L.append(l) + player.store(S,A,R) + print(max(R),min(R)) + if i%50==0: + plt.clf() + plt.subplot(3,1,1) + plt.plot(R_hist) + conv = np.convolve(np.ones(25)/25.0,R_hist,"valid") + plt.subplot(3,1,2) + plt.plot(conv) + plt.subplot(3,1,3) + plt.plot(np.log10(L_hist)) + plt.pause(0.01) diff --git a/transfer/env_test.py b/transfer/env_test.py new file mode 100644 index 0000000..5708810 --- /dev/null +++ b/transfer/env_test.py @@ -0,0 +1,41 @@ +import tensorflow as tf +import numpy as np +from ddpg import agent,noise +import gym +import matplotlib.pyplot as plt + +env = gym.make('BipedalWalker-v2') +s_size=len(env.observation_space.low) +a_size=len(env.action_space.low) +print(env.action_space.low,env.action_space.high) + +print(s_size,a_size) + + +BEST=-80.0 + + +with tf.Session() as sess: + + player=agent(sess,s_size,a_size,0.001,64) + + sess.run(tf.global_variables_initializer()) + player.load("save/test0") + for i in range(10000): + state=env.reset() + R=0.0 + for j in range(1000): + + env.render() + act=player.act(np.array([state]))[0] + + action=act + + + state, reward, done, info = env.step(action) + R+=reward + if done or j==999: + print(i,round(R,4)) + break + + diff --git a/transfer/ideas.txt b/transfer/ideas.txt new file mode 100644 index 0000000..3f96884 --- /dev/null +++ b/transfer/ideas.txt @@ -0,0 +1,22 @@ +ideas for transfer: + + +l2 regularize old weights + +mse difference transfer + + + +Inter-agent transfer learning + +-- few critic many actors + +-- policy swapping (save examples in replay buffer) + +-- 1 critic?? + +-- advantage estimate (V_global-Q_agent) + +-- linear combination of gradients/ Q? (probs bad) + +-- diff --git a/transfer/notes.txt b/transfer/notes.txt new file mode 100644 index 0000000..637e2bd --- /dev/null +++ b/transfer/notes.txt @@ -0,0 +1,95 @@ +action transfer learning + + +policy transfer learning + + +replay transfer + + +noise from other network + + + +inter agent transfer learning + +shouldnt learn better but faster + +policy transfer, critic transfer, buffer transfer, reward shaping + + +sample other policies for exploration for critic to learn + +policy learns from multiple critics + + +other: + +might could would = bad +first paragraph = what paper is about +specific application - dont be vague +agents have ____ - give example +avoid human comparison + + +1- intro first paragraph: +define problem +what is it +why important + +2- define difficulty (related work) +-general difficulty, how addressed, second more specific difficulty + +3 - "in this paper..." what you are going to do / what are you pitching (approach) +"the key contribution/ idea" + +4 - how does solution solve difficulty? +" what is cool about this " + +5 - what is the impact/ results / contributions / results (results) (can use page six terms) +speedups? performance? what leads into? + +expand into paper +shrink into abstract + +that used for differentiat +,which , for additional info + +advice + +dont: start scentence with "there are" +"can be" too wishy washy - does is better +avoid filler like "very, extreme" +repetition is worse than clarity + except math to intuition +"propose" is not direct enough "what you did" better +dont use vague wording + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/transfer/save/checkpoint b/transfer/save/checkpoint new file mode 100644 index 0000000..4cd6bde --- /dev/null +++ b/transfer/save/checkpoint @@ -0,0 +1,2 @@ +model_checkpoint_path: "test0.ckpt" +all_model_checkpoint_paths: "test0.ckpt" diff --git a/transfer/save/test.ckpt.data-00000-of-00001 b/transfer/save/test.ckpt.data-00000-of-00001 new file mode 100644 index 0000000..8a4fc96 Binary files /dev/null and b/transfer/save/test.ckpt.data-00000-of-00001 differ diff --git a/transfer/save/test.ckpt.index b/transfer/save/test.ckpt.index new file mode 100644 index 0000000..b004a2c Binary files /dev/null and b/transfer/save/test.ckpt.index differ diff --git a/transfer/save/test.ckpt.meta b/transfer/save/test.ckpt.meta new file mode 100644 index 0000000..bac6fa4 Binary files /dev/null and b/transfer/save/test.ckpt.meta differ diff --git a/transfer/save/test0.ckpt.data-00000-of-00001 b/transfer/save/test0.ckpt.data-00000-of-00001 new file mode 100644 index 0000000..85e4527 Binary files /dev/null and b/transfer/save/test0.ckpt.data-00000-of-00001 differ diff --git a/transfer/save/test0.ckpt.index b/transfer/save/test0.ckpt.index new file mode 100644 index 0000000..8832031 Binary files /dev/null and b/transfer/save/test0.ckpt.index differ diff --git a/transfer/save/test0.ckpt.meta b/transfer/save/test0.ckpt.meta new file mode 100644 index 0000000..0e808a5 Binary files /dev/null and b/transfer/save/test0.ckpt.meta differ diff --git a/transfer/sims.py b/transfer/sims.py new file mode 100644 index 0000000..9a8a8d0 --- /dev/null +++ b/transfer/sims.py @@ -0,0 +1,47 @@ +import numpy as np + +class predpred: + + def __init__(self,npred,nprey,speed): + self.npred=npred + self.nprey=nprey + self.speedprey=speed + self.speedpred=1.0 + self.init_pred=np.random.normal(0,0.1,(2,npred)) + self.init_pred_dir=np.random.random(npred) + self.init_prey=np.random.normal(2.0,0.1,(2,nprey)) + + self.sight=5.0 + + def reset(self): + self.pred_loc=self.init_pred.copy() + self.prey_loc=self.init_prey_dir.copy() + self.pred_dir=self.init_pred.copy() + self.prey_dir=self.init_prey_dir.copy() + + def closest(self,loc): + d=1e9 + close=None + x,y=loc + for i in range(self.npred): + X,Y=self.pred_loc[i] + D=(X-x)**2+(Y-y)**2 + if D