[BugFix] Fix various test failures (#2994)

vmoens · web-flow · commit 16b70be3af0d · 2025-06-09T01:35:21.000+01:00
diff --git a/sota-implementations/grpo/grpo_utils.py b/sota-implementations/grpo/grpo_utils.py
@@ -4,7 +4,6 @@
 # LICENSE file in the root directory of this source tree.
 from __future__ import annotations
 
-import os
 from typing import Any, Literal
 
 import torch
diff --git a/test/test_cost.py b/test/test_cost.py
@@ -14328,6 +14328,7 @@ def _forward_value_estimator_keys(self, **kwargs) -> None:
 
 
 class TestValues:
+    @pytest.mark.skipif(not _has_gym, reason="requires gym")
     def test_gae_multi_done(self):
 
         # constants
diff --git a/torchrl/data/llm/chat.py b/torchrl/data/llm/chat.py
@@ -430,7 +430,7 @@ def append(
                 history = history.copy().clear_device_()
             else:
                 history = history.to(self.device)
-        return torch.stack(list(self.unbind(dim)) + [history], dim=dim)
+        return lazy_stack(list(self.unbind(dim)) + [history], dim=dim)
 
     def extend(
         self, history: History, *, inplace: bool = True, dim: int = 0
diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -16,7 +16,6 @@
 import torch.nn as nn
 from tensordict import (
     is_tensor_collection,
-    lazy_stack,
     LazyStackedTensorDict,
     TensorDictBase,
     unravel_key,
@@ -3326,7 +3325,9 @@ def rollout(
                     )
                 raise
         else:
-            out_td = lazy_stack(tensordicts, len(batch_size), out=out)
+            out_td = LazyStackedTensorDict.maybe_dense_stack(
+                tensordicts, len(batch_size), out=out
+            )
         if set_truncated:
             found_truncated = False
             for key in self.done_keys:
diff --git a/torchrl/envs/llm/transforms/browser.py b/torchrl/envs/llm/transforms/browser.py
@@ -8,17 +8,11 @@
 from __future__ import annotations
 
 import asyncio
-import json
-import re
-import signal
-from contextlib import asynccontextmanager
-from typing import Any, Optional
+from typing import Any
 from urllib.parse import urlparse
 
-from playwright.async_api import async_playwright
 from tensordict import TensorDictBase
 
-from torchrl.data.llm import History
 from torchrl.envs.llm.transforms.tools import MCPToolTransform
 
 # Schema for the browser tool
@@ -147,6 +141,8 @@ def __init__(
 
     async def _init_browser(self):
         """Initialize the browser if not already initialized."""
+        from playwright.async_api import async_playwright
+
         if self.browser is None:
             playwright = await async_playwright().start()
             self.browser = await playwright.chromium.launch(headless=self.headless)
@@ -213,7 +209,7 @@ async def _scroll(self, amount: int) -> dict[str, Any]:
             return {"success": False, "error": str(e)}
 
     async def _extract(
-        self, selector: str, extract_type: str, attribute: Optional[str] = None
+        self, selector: str, extract_type: str, attribute: str | None = None
     ) -> dict[str, Any]:
         """Extract content from the page."""
         try:
diff --git a/torchrl/envs/llm/transforms/tools.py b/torchrl/envs/llm/transforms/tools.py
@@ -8,7 +8,6 @@
 import os
 import queue
 import re
-import signal
 import subprocess
 import tempfile
 import threading
@@ -310,6 +309,8 @@ def execute(self, prompt: str) -> dict[str, any]:
 
     def cleanup(self):
         """Clean up the persistent process."""
+        import signal
+
         if self.process:
             try:
                 self.process.send_signal(signal.SIGTERM)
diff --git a/tutorials/sphinx-tutorials/llm_browser.py b/tutorials/sphinx-tutorials/llm_browser.py
@@ -63,7 +63,6 @@
 from __future__ import annotations
 
 import warnings
-from pprint import pprint
 
 import torch
 
@@ -210,7 +209,7 @@ def execute_tool_action(
         print(action)
         print("\nEnvironment Response:")
         print("--------------------")
-        pprint(s_["history"].apply_chat_template(tokenizer=env.tokenizer))
+        torchrl_logger.info(s_["history"].apply_chat_template(tokenizer=env.tokenizer))
 
     return s, s_