pytorch
diff --git a/‎docs/requirements.txt
Lines changed: 4 additions & 0 deletions b/‎docs/requirements.txt
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/source/_static/img/rollout-llm.png
318 KB b/‎docs/source/_static/img/rollout-llm.png
318 KB
diff --git a/‎docs/source/index.rst
Lines changed: 1 addition & 0 deletions b/‎docs/source/index.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/reference/envs.rst
Lines changed: 2 additions & 0 deletions b/‎docs/source/reference/envs.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/reference/trainers.rst
Lines changed: 2 additions & 2 deletions b/‎docs/source/reference/trainers.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎test/mocking_classes.py
Lines changed: 7 additions & 3 deletions b/‎test/mocking_classes.py
Lines changed: 7 additions & 3 deletions
diff --git a/‎test/test_env.py
Lines changed: 8 additions & 3 deletions b/‎test/test_env.py
Lines changed: 8 additions & 3 deletions
diff --git a/‎torchrl/_utils.py
Lines changed: 1 addition & 0 deletions b/‎torchrl/_utils.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎torchrl/data/map/hash.py
Lines changed: 2 additions & 1 deletion b/‎torchrl/data/map/hash.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎torchrl/data/map/tdstorage.py
Lines changed: 17 additions & 3 deletions b/‎torchrl/data/map/tdstorage.py
Lines changed: 17 additions & 3 deletions
@@ -28,3 +28,7 @@ vmas
 onnxscript
 onnxruntime
 onnx
+plotly
+igraph
+transformers
+datasets
@@ -105,6 +105,7 @@ Intermediate
    tutorials/dqn_with_rnn
    tutorials/rb_tutorial
    tutorials/export
+   tutorials/beam_search_with_gpt
 
 Advanced
 --------
 
@@ -347,6 +347,8 @@ TorchRL offers a series of custom built-in environments.
 
     PendulumEnv
     TicTacToeEnv
+    LLMHashingEnv
+
 
 Multi-agent environments
 ------------------------
 
@@ -79,7 +79,7 @@ Hooks can be split into 3 categories: **data processing** (``"batch_process"`` a
 
 - **Logging** hooks take a batch of data presented as a ``TensorDict`` and write in the logger
   some information retrieved from that data. Examples include the ``LogValidationReward`` hook, the reward
-  logger (``LogScaler``) and such. Hooks should return a dictionary (or a None value) containing the
+  logger (``LogScalar``) and such. Hooks should return a dictionary (or a None value) containing the
   data to log. The key ``"log_pbar"`` is reserved to boolean values indicating if the logged value
   should be displayed on the progression bar printed on the training log.
 
@@ -174,7 +174,7 @@ Trainer and hooks
     BatchSubSampler
     ClearCudaCache
     CountFramesLog
-    LogScaler
+    LogScalar
     OptimizerHook
     LogValidationReward
     ReplayBufferTrainer
 
@@ -1931,14 +1931,18 @@ def __init__(self):
             tensor=Unbounded(3),
             non_tensor=NonTensor(shape=()),
         )
+        self._saved_obs_spec = self.observation_spec.clone()
         self.state_spec = Composite(
             non_tensor=NonTensor(shape=()),
         )
+        self._saved_state_spec = self.state_spec.clone()
         self.reward_spec = Unbounded(1)
+        self._saved_full_reward_spec = self.full_reward_spec.clone()
         self.action_spec = Unbounded(1)
+        self._saved_full_action_spec = self.full_action_spec.clone()
 
     def _reset(self, tensordict):
-        data = self.observation_spec.zero()
+        data = self._saved_obs_spec.zero()
         data.set_non_tensor("non_tensor", 0)
         data.update(self.full_done_spec.zero())
         return data
@@ -1947,10 +1951,10 @@ def _step(
         self,
         tensordict: TensorDictBase,
     ) -> TensorDictBase:
-        data = self.observation_spec.zero()
+        data = self._saved_obs_spec.zero()
         data.set_non_tensor("non_tensor", tensordict["non_tensor"] + 1)
         data.update(self.full_done_spec.zero())
-        data.update(self.full_reward_spec.zero())
+        data.update(self._saved_full_reward_spec.zero())
         return data
 
     def _set_seed(self, seed: Optional[int]):
 
@@ -3528,8 +3528,13 @@ def test_single_env_spec():
     assert env.input_spec.is_in(env.input_spec_unbatched.zeros(env.shape))
 
 
-def test_auto_spec():
-    env = CountingEnv()
+@pytest.mark.parametrize("env_type", [CountingEnv, EnvWithMetadata])
+def test_auto_spec(env_type):
+    if env_type is EnvWithMetadata:
+        obs_vals = ["tensor", "non_tensor"]
+    else:
+        obs_vals = "observation"
+    env = env_type()
     td = env.reset()
 
     policy = lambda td, action_spec=env.full_action_spec.clone(): td.update(
@@ -3552,7 +3557,7 @@ def test_auto_spec():
         shape=env.full_state_spec.shape, device=env.full_state_spec.device
     )
     env._action_keys = ["action"]
-    env.auto_specs_(policy, tensordict=td.copy())
+    env.auto_specs_(policy, tensordict=td.copy(), observation_key=obs_vals)
     env.check_env_specs(tensordict=td.copy())
 
 
 
@@ -829,6 +829,7 @@ def _can_be_pickled(obj):
 def _make_ordinal_device(device: torch.device):
     if device is None:
         return device
+    device = torch.device(device)
     if device.type == "cuda" and device.index is None:
         return torch.device("cuda", index=torch.cuda.current_device())
     if device.type == "mps" and device.index is None:
 
@@ -75,7 +75,8 @@ def forward(self, features: torch.Tensor) -> torch.Tensor:
 class SipHash(Module):
     """A Module to Compute SipHash values for given tensors.
 
-    A hash function module based on SipHash implementation in python.
+    A hash function module based on SipHash implementation in python. Input tensors should have shape ``[batch_size, num_features]``
+    and the output shape will be ``[batch_size]``.
 
     Args:
         as_tensor (bool, optional): if ``True``, the bytes will be turned into integers
 
@@ -177,7 +177,7 @@ def from_tensordict_pair(
         collate_fn: Callable[[Any], Any] | None = None,
         write_fn: Callable[[Any, Any], Any] | None = None,
         consolidated: bool | None = None,
-    ):
+    ) -> TensorDictMap:
         """Creates a new TensorDictStorage from a pair of tensordicts (source and dest) using pre-defined rules of thumb.
 
         Args:
@@ -308,7 +308,23 @@ def __setitem__(self, item: TensorDictBase, value: TensorDictBase):
         if not self._has_lazy_out_keys():
             # TODO: make this work with pytrees and avoid calling select if keys match
             value = value.select(*self.out_keys, strict=False)
+        item, value = self._maybe_add_batch(item, value)
+        index = self._to_index(item, extend=True)
+        if index.unique().numel() < index.numel():
+            # If multiple values point to the same place in the storage, we cannot process them by batch
+            # There could be a better way to deal with this, using unique ids.
+            vals = []
+            for it, val in zip(item.split(1), value.split(1)):
+                self[it] = val
+                vals.append(val)
+            # __setitem__ may affect the content of the input data
+            value.update(TensorDictBase.lazy_stack(vals))
+            return
         if self.write_fn is not None:
+            # We use this block in the following context: the value written in the storage is already present,
+            # but it needs to be updated.
+            # We first check if the value is already there using `contains`. If so, we pass the new value and the
+            # previous one to write_fn. The values that are not present are passed alone.
             if len(self):
                 modifiable = self.contains(item)
                 if modifiable.any():
@@ -322,8 +338,6 @@ def __setitem__(self, item: TensorDictBase, value: TensorDictBase):
                     value = self.write_fn(value)
             else:
                 value = self.write_fn(value)
-        item, value = self._maybe_add_batch(item, value)
-        index = self._to_index(item, extend=True)
         self.storage.set(index, value)
 
     def __len__(self):