Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
278 changes: 278 additions & 0 deletions _posts/2026-04-27-revisiting-the-nle.md

Large diffs are not rendered by default.

90 changes: 90 additions & 0 deletions assets/bibliography/2026-04-27-revisiting-the-nle.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
@article{samvelyan2021minihack,
title={Minihack the planet: A sandbox for open-ended reinforcement learning research},
author={Samvelyan, Mikayel and Kirk, Robert and Kurin, Vitaly and Parker-Holder, Jack and Jiang, Minqi and Hambro, Eric and Petroni, Fabio and K{\"u}ttler, Heinrich and Grefenstette, Edward and Rockt{\"a}schel, Tim},
journal={arXiv preprint arXiv:2109.13202},
year={2021}
}

@article{zheng2024online,
title={Online intrinsic rewards for decision making agents from large language model feedback},
author={Zheng, Qinqing and Henaff, Mikael and Zhang, Amy and Grover, Aditya and Amos, Brandon},
journal={arXiv preprint arXiv:2410.23022},
year={2024}
}

@article{henaff2025scalable,
title={Scalable Option Learning in High-Throughput Environments},
author={Henaff, Mikael and Fujimoto, Scott and Matthews, Michael and Rabbat, Michael},
journal={arXiv preprint arXiv:2509.00338},
year={2025}
}

@article{klissarov2024maestromotif,
title={Maestromotif: Skill design from artificial intelligence feedback},
author={Klissarov, Martin and Henaff, Mikael and Raileanu, Roberta and Sodhani, Shagun and Vincent, Pascal and Zhang, Amy and Bacon, Pierre-Luc and Precup, Doina and Machado, Marlos C and D'Oro, Pierluca},
journal={arXiv preprint arXiv:2412.08542},
year={2024}
}

@article{klissarov2023motif,
title={Motif: Intrinsic motivation from artificial intelligence feedback},
author={Klissarov, Martin and D'Oro, Pierluca and Sodhani, Shagun and Raileanu, Roberta and Bacon, Pierre-Luc and Vincent, Pascal and Zhang, Amy and Henaff, Mikael},
journal={arXiv preprint arXiv:2310.00166},
year={2023}
}

@article{tuyls2023scaling,
title={Scaling laws for imitation learning in single-agent games},
author={Tuyls, Jens and Madeka, Dhruv and Torkkola, Kari and Foster, Dean and Narasimhan, Karthik and Kakade, Sham},
journal={arXiv preprint arXiv:2307.09423},
year={2023}
}

@article{piterbarg2023nethack,
title={Nethack is hard to hack},
author={Piterbarg, Ulyana and Pinto, Lerrel and Fergus, Rob},
journal={Advances in Neural Information Processing Systems},
volume={36},
pages={37540--37566},
year={2023}
}

@inproceedings{hambro2022insights,
title={Insights from the neurips 2021 nethack challenge},
author={Hambro, Eric and Mohanty, Sharada and Babaev, Dmitrii and Byeon, Minwoo and Chakraborty, Dipam and Grefenstette, Edward and Jiang, Minqi and Daejin, Jo and Kanervisto, Anssi and Kim, Jongmin and others},
booktitle={NeurIPS 2021 Competitions and Demonstrations Track},
pages={41--52},
year={2022},
organization={PMLR}
}

@article{wolczyk2024fine,
title={Fine-tuning reinforcement learning models is secretly a forgetting mitigation problem},
author={Wo{\l}czyk, Maciej and Cupia{\l}, Bart{\l}omiej and Ostaszewski, Mateusz and Bortkiewicz, Micha{\l} and Zaj{\k{a}}c, Micha{\l} and Pascanu, Razvan and Kuci{\'n}ski, {\L}ukasz and Mi{\l}o{\'s}, Piotr},
journal={arXiv preprint arXiv:2402.02868},
year={2024}
}

@article{hambro2022dungeons,
title={Dungeons and data: A large-scale nethack dataset},
author={Hambro, Eric and Raileanu, Roberta and Rothermel, Danielle and Mella, Vegard and Rockt{\"a}schel, Tim and K{\"u}ttler, Heinrich and Murray, Naila},
journal={Advances in Neural Information Processing Systems},
volume={35},
pages={24864--24878},
year={2022}
}

@article{kuttler2020nethack,
title={The nethack learning environment},
author={K{\"u}ttler, Heinrich and Nardelli, Nantas and Miller, Alexander and Raileanu, Roberta and Selvatici, Marco and Grefenstette, Edward and Rockt{\"a}schel, Tim},
journal={Advances in Neural Information Processing Systems},
volume={33},
pages={7671--7684},
year={2020}
}
@article{paglieri2024balrog,
title={Balrog: Benchmarking agentic llm and vlm reasoning on games},
author={Paglieri, Davide and Cupia{\l}, Bart{\l}omiej and Coward, Samuel and Piterbarg, Ulyana and Wolczyk, Maciej and Khan, Akbir and Pignatelli, Eduardo and Kuci{\'n}ski, {\L}ukasz and Pinto, Lerrel and Fergus, Rob and others},
journal={arXiv preprint arXiv:2411.13543},
year={2024}
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading