Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions config/ocean/blastar.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,38 @@ package = ocean
env_name = puffer_blastar
policy_name = Policy
rnn_name = Recurrent
vec = multiprocessing

[env]
num_envs = 2048
num_envs = 4096

[train]
total_timesteps = 100_000_000
anneal_lr = False
batch_size = 131072 # 65536
bptt_horizon = 8
checkpoint_interval = 100
clip_coef = 0.2
clip_vloss = True
compile = False
compile_mode = reduce-overhead
cpu_offload = False
data_dir = experiments
device = cuda
ent_coef = 0.002511261007200052
env_batch_size = 1
gae_lambda = 0.9212875655241286
gamma = 0.9410283509696092
learning_rate = 0.0010700459984905535
max_grad_norm = 0.9702296257019044
minibatch_size = 4096
norm_adv = True
num_envs = 1
num_workers = 1
torch_deterministic = True
total_timesteps = 800000000
update_epochs = 1
vf_clip_coef = 0.2
vf_coef = 0.47285471525325906
zero_copy = True

[sweep.metric]
name = environment/enemy_crossed_screen
Expand Down
22 changes: 22 additions & 0 deletions pufferlib/ocean/blastar/binding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include "blastar.h"
#define Env Blastar
#include "../env_binding.h"

static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
env->num_obs = unpack(kwargs, "num_obs");
init(env, env->num_obs);
return 0;
}

static int my_log(PyObject* dict, Log* log) {
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "episode_length", log->episode_length);
assign_to_dict(dict, "lives", log->lives);
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "vertical_closeness_rew", log->vertical_closeness_rew);
assign_to_dict(dict, "fired_bullet_rew", log->fired_bullet_rew);
assign_to_dict(dict, "kill_streak", (float)log->kill_streak);
assign_to_dict(dict, "hit_enemy_with_bullet_rew", log->hit_enemy_with_bullet_rew);
assign_to_dict(dict, "avg_score_difference", log->avg_score_difference);
return 0;
}
10 changes: 4 additions & 6 deletions pufferlib/ocean/blastar/blastar.c
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
#include "blastar.h"

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#include "puffernet.h"

const char* WEIGHTS_PATH = "resources/blastar/blastar_weights.bin";
#define OBSERVATIONS_SIZE 10
#define ACTIONS_SIZE 6
#define NUM_WEIGHTS 134407

void get_input(BlastarEnv* env) {
void get_input(Blastar* env) {
if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) {
env->actions[0] = 1; // Left
} else if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) {
Expand All @@ -31,7 +29,7 @@ void get_input(BlastarEnv* env) {
int demo() {
Weights* weights = load_weights(WEIGHTS_PATH, NUM_WEIGHTS);
LinearLSTM* net = make_linearlstm(weights, 1, OBSERVATIONS_SIZE, ACTIONS_SIZE);
BlastarEnv env = {
Blastar env = {
.num_obs = OBSERVATIONS_SIZE,
};
allocate(&env, env.num_obs);
Expand All @@ -47,7 +45,7 @@ int demo() {
forward_linearlstm(net, env.observations, env.actions); // AI input
}
c_step(&env);
c_render(client, &env);
c_render(&env);
if (WindowShouldClose() || env.game_over) {
running = 0;
}
Expand All @@ -60,7 +58,7 @@ int demo() {
}

void perftest(float test_time) {
BlastarEnv env = {
Blastar env = {
.num_obs = OBSERVATIONS_SIZE,
};
allocate(&env, env.num_obs);
Expand Down
Loading
Loading