6060console = Console ()
6161
6262parser = argparse .ArgumentParser (description = "Minimal tree morphology + brain joint evolution (multiprocessing)" )
63- parser .add_argument ("--budget" , type = int , default = 20 , help = "Morphology generations" )
64- parser .add_argument ("--pop" , type = int , default = 16 , help = "Morphology population" )
65- parser .add_argument ("--dur" , type = float , default = 5 .0 , help = "Active control duration" )
63+ parser .add_argument ("--budget" , type = int , default = 5 , help = "Morphology generations" )
64+ parser .add_argument ("--pop" , type = int , default = 5 , help = "Morphology population" )
65+ parser .add_argument ("--dur" , type = float , default = 10 .0 , help = "Active control duration" )
6666parser .add_argument (
6767 "--eval-delay" ,
6868 type = float ,
7575 default = 2.0 ,
7676 help = "Penalty weight for vertical (z-axis) motion during active control" ,
7777)
78- parser .add_argument ("--learn-budget" , type = int , default = 4 , help = "CMA iterations per morphology" )
79- parser .add_argument ("--learn-pop" , type = int , default = 16 , help = "CMA population per iteration" )
78+ parser .add_argument ("--learn-budget" , type = int , default = 20 , help = "CMA iterations per morphology" )
79+ parser .add_argument ("--learn-pop" , type = int , default = 5 , help = "CMA population per iteration" )
8080parser .add_argument (
8181 "--eval-workers" ,
8282 type = int ,
8383 default = max (1 , os .cpu_count () or 1 ),
8484 help = "Worker processes for parallel individual evaluation" ,
8585)
8686
87- parser .add_argument ("--max-modules" , type = int , default = 10 , help = "Max modules in tree" )
87+ parser .add_argument ("--max-modules" , type = int , default = 20 , help = "Max modules in tree" )
8888parser .add_argument ("--max-depth" , type = int , default = 12 , help = "Max tree depth" )
8989parser .add_argument ("--morph-weight" , type = float , default = 0.3 , help = "Weight of morphology term" )
9090parser .add_argument (
@@ -226,14 +226,14 @@ def _learn_brain_progress_for_genome(
226226 learn_pop : int ,
227227 target_position : np .ndarray ,
228228 z_penalty_weight : float ,
229- ) -> tuple [float , list [float ]]:
229+ ) -> tuple [float , list [float ], list [ float ] ]:
230230 try :
231231 world , model , data = _spawn_with_fallback (genome_dict , SPAWN_POSITION )
232232 except Exception :
233- return - float ("inf" ), []
233+ return - float ("inf" ), [], []
234234
235235 if model .nu == 0 :
236- return - float ("inf" ), []
236+ return - float ("inf" ), [], []
237237
238238 net = Network (
239239 input_size = len (get_state_from_data (data )) + 2 ,
@@ -254,9 +254,11 @@ def _learn_brain_progress_for_genome(
254254
255255 best_score = - float ("inf" )
256256 best_vec : list [float ] = []
257+ iteration_scores : list [float ] = []
257258
258259 for _ in range (learn_budget ):
259260 candidates = [learner .ask () for _ in range (learn_pop )]
261+ iteration_best_score = - float ("inf" )
260262 for candidate in candidates :
261263 vec = candidate .value
262264 fill_parameters (net , vec )
@@ -289,16 +291,20 @@ def _learn_brain_progress_for_genome(
289291 if score > best_score :
290292 best_score = score
291293 best_vec = vec .tolist ()
294+ if score > iteration_best_score :
295+ iteration_best_score = score
292296
293- return best_score , best_vec
297+ iteration_scores . append ( iteration_best_score )
294298
299+ return best_score , best_vec , iteration_scores
295300
296- def _evaluate_individual_process (task : tuple [dict , float , float , int , int , float , float ]) -> tuple [float , list [float ]]:
301+
302+ def _evaluate_individual_process (task : tuple [dict , float , float , int , int , float , float ]) -> tuple [float , list [float ], list [float ]]:
297303 genome_dict , duration , eval_delay , learn_budget , learn_pop , morph_weight , z_penalty_weight = task
298304 try :
299305 genome = TreeGenome .from_dict (genome_dict )
300306 morph_term = morphology_fitness_term (genome )
301- score , best_vec = _learn_brain_progress_for_genome (
307+ score , best_vec , iteration_scores = _learn_brain_progress_for_genome (
302308 genome_dict ,
303309 duration ,
304310 eval_delay ,
@@ -309,12 +315,20 @@ def _evaluate_individual_process(task: tuple[dict, float, float, int, int, float
309315 )
310316
311317 if not np .isfinite (morph_term ) or not np .isfinite (score ):
312- return float ("inf" ), best_vec
318+ return float ("inf" ), best_vec , []
313319
314320 fit = - score + morph_weight * morph_term
315- return fit , best_vec
321+ # Compute deltas (improvement at each iteration)
322+ deltas = []
323+ prev_score = - float ("inf" )
324+ for iter_score in iteration_scores :
325+ delta = iter_score - prev_score
326+ deltas .append (delta )
327+ prev_score = iter_score
328+
329+ return fit , best_vec , deltas
316330 except Exception :
317- return float ("inf" ), []
331+ return float ("inf" ), [], []
318332
319333
320334class MinimalJointEvolution :
@@ -479,9 +493,10 @@ def evaluate(self, population: Population) -> Population:
479493
480494 if EVAL_WORKERS == 1 :
481495 for ind , task in track (zip (to_eval , tasks ), total = len (to_eval ), description = "Learning + Evaluating..." ):
482- fit , best_vec = _evaluate_individual_process (task )
496+ fit , best_vec , deltas = _evaluate_individual_process (task )
483497 ind .fitness = fit
484498 ind .tags ["last_brain" ] = best_vec
499+ ind .tags ["learning_deltas" ] = deltas
485500 ind .requires_eval = False
486501 return population
487502
@@ -494,11 +509,12 @@ def evaluate(self, population: Population) -> Population:
494509 for fut in as_completed (future_to_ind ):
495510 ind = future_to_ind [fut ]
496511 try :
497- fit , best_vec = fut .result ()
512+ fit , best_vec , deltas = fut .result ()
498513 except Exception :
499- fit , best_vec = float ("inf" ), []
514+ fit , best_vec , deltas = float ("inf" ), [] , []
500515 ind .fitness = fit
501516 ind .tags ["last_brain" ] = best_vec
517+ ind .tags ["learning_deltas" ] = deltas
502518 ind .requires_eval = False
503519
504520 return population
0 commit comments