diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 20d29a392e..ce86ff7154 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -4244,7 +4244,8 @@ static void compute_rewards(Drive *env, int i) { // Rl-align (GIGAFLOW): min(cos,0) + vel_align*min(cos*v,0) + 0.0025*(1-|θ|/(π/2)) float against_lane_penalty = fminf(cos_theta, 0.0f); // negative when >90 degrees off - float vel_aligned_penalty = agent->reward_coefs[REWARD_COEF_VEL_ALIGN] * fminf(cos_theta * agent->sim_speed, 0.0f); + float vel_aligned_penalty + = agent->reward_coefs[REWARD_COEF_VEL_ALIGN] * fminf(cos_theta * agent->sim_speed_signed, 0.0f); float alignment_bonus = 0.0025f * (1.0f - theta_f / (M_PI / 2.0f)); float lane_align_reward = agent->reward_coefs[REWARD_COEF_LANE_ALIGN] * env->dt