From 24dfecf48a8138594a6ddae6ef1bae921d779c4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=92=B2=E6=BA=90?= <2402552459@qq.com> Date: Tue, 1 Apr 2025 21:35:28 +0800 Subject: [PATCH 1/2] v0.2.0 --- CHANGELOG.md | 38 ++++++++++++++++++++++++++++++++++++++ README.md | 2 +- README.zh.md | 2 +- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fd3a83efb..03763e9ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,41 @@ +2025.04.01 (v0.2.0) +- env: Add Metadrive environment and configurations (#192) +- env: Add Sampled MuZero/UniZero and DMC environment with related configurations (#260) +- env: Polish Chess environment and its render method; add unittests and configurations (#272) +- env: Add Jericho environment and its configurations (#307) +- algo: Add Harmony Dream loss balance in MuZero (#242) +- algo: Adopt AlphaZero for non-zero-sum games (#245) +- algo: Add AlphaZero CTree unittest (#306) +- algo: Add recent MCTS-related papers (#324) +- algo: Introduce rope to use true timestep index as pos_index (#266) +- algo: Add Jericho DDP configuration (#337) +- feat: Add LightZero Sphinx documentation (#237) +- feat: Add Wandb support (#294) +- feat: Add Atari100k metric utilities (#295) +- feat: Add eval_benchmark tests (#296) +- feat: Add save_replay and collect_episode_data options in Jericho (#333) +- feat: Add an MCTS TicTacToe demo in one single file (#315) +- fix: Fix DownSample for different observation shapes (#254) +- fix: Fix wrong chance values in Stochastic MuZero (#275) +- fix: Use display_frames_as_gif in CartPole (#288) +- fix: Fix chance encoder in stochastic_muzero_model_mlp.py (#284) +- fix: Correct typo in model/utils.py (#290) +- fix: Fix SMZ compile_args and num_simulations bug in world_model (#297) +- fix: Fix reward type bug in 2048 and OS import issue in CartPole (#304) +- fix: Switch to macos-13 in action (#319) +- fix: Fix SMZ & SEZ config for pixel-based DMC (#322) +- fix: Fix update_per_collect in DDP setting (#321) +- fix: Fix obs_shape tuple bug in initialize_zeros_batch (#327) +- fix: Fix prepare_obs_stack_for_unizero (#328) +- fix: Fix random_policy when len(ready_env_id) Date: Wed, 2 Apr 2025 17:14:23 +0800 Subject: [PATCH 2/2] how to fix the bug of loading trained model for evaluation --- zoo/jericho/configs/jericho_unizero_config.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/zoo/jericho/configs/jericho_unizero_config.py b/zoo/jericho/configs/jericho_unizero_config.py index 13155dfd5..b7e6d2096 100644 --- a/zoo/jericho/configs/jericho_unizero_config.py +++ b/zoo/jericho/configs/jericho_unizero_config.py @@ -185,13 +185,19 @@ def main(env_id: str = 'detective.z5', seed: int = 0, max_env_step: int = int(1e f"nlayer{num_layers}_embed{embed_dim}_Htrain{num_unroll_steps}-" f"Hinfer{infer_context_length}_bs{batch_size}_seed{seed}" ) - from lzero.entry import train_unizero + from lzero.entry import train_unizero, eval_muzero + main_config.policy.model_path = '/mnt/afs/niuyazhe/xiongjyu/LightZero/data_lz/data_unizero_jericho/bge-base-en-v1.5/rr_0.1_mtd_F/uz_detectiv_ms50_ass-10_nlayer2_embed768_Htrain10-Hinfer4_bs64_seed0' # Launch the training process - train_unizero( + # train_unizero( + # [main_config, create_config], + # seed=seed, + # model_path=main_config.policy.model_path, + # max_env_step=max_env_step, + # ) + eval_muzero( [main_config, create_config], seed=seed, model_path=main_config.policy.model_path, - max_env_step=max_env_step, ) @@ -221,4 +227,4 @@ def main(env_id: str = 'detective.z5', seed: int = 0, max_env_step: int = int(1e # def run(max_env_step: int): # main(args.env, args.seed, max_env_step=max_env_step) # import cProfile - # cProfile.run(f"run({10000})", filename="./zoo/jericho/detective_unizero_cprofile_10k_envstep", sort="cumulative") \ No newline at end of file + # cProfile.run(f"run({10000})", filename="./zoo/jericho/detective_unizero_cprofile_10k_envstep", sort="cumulative")