Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion rsl_rl/algorithms/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(
desired_kl=0.01,
device="cpu",
normalize_advantage_per_mini_batch=False,
reshuffle_each_epoch=False,
# RND parameters
rnd_cfg: dict | None = None,
# Symmetry parameters
Expand Down Expand Up @@ -114,6 +115,7 @@ def __init__(
self.schedule = schedule
self.learning_rate = learning_rate
self.normalize_advantage_per_mini_batch = normalize_advantage_per_mini_batch
self.reshuffle_each_epoch = reshuffle_each_epoch

def init_storage(
self, training_type, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, actions_shape
Expand Down Expand Up @@ -204,7 +206,9 @@ def update(self): # noqa: C901
if self.policy.is_recurrent:
generator = self.storage.recurrent_mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
else:
generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
generator = self.storage.mini_batch_generator(
self.num_mini_batches, self.num_learning_epochs, self.reshuffle_each_epoch
)

# iterate over batches
for (
Expand Down
5 changes: 4 additions & 1 deletion rsl_rl/storage/rollout_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def generator(self):
], self.dones[i]

# for reinforcement learning with feedforward networks
def mini_batch_generator(self, num_mini_batches, num_epochs=8):
def mini_batch_generator(self, num_mini_batches, num_epochs=8, reshuffle_each_epoch: bool = False):
if self.training_type != "rl":
raise ValueError("This function is only available for reinforcement learning training.")
batch_size = self.num_envs * self.num_transitions_per_env
Expand Down Expand Up @@ -210,6 +210,9 @@ def mini_batch_generator(self, num_mini_batches, num_epochs=8):
rnd_state = self.rnd_state.flatten(0, 1)

for epoch in range(num_epochs):
if reshuffle_each_epoch and epoch > 0:
indices = torch.randperm(num_mini_batches * mini_batch_size, requires_grad=False, device=self.device)

for i in range(num_mini_batches):
# Select the indices for the mini-batch
start = i * mini_batch_size
Expand Down