Skip to content

Commit

Permalink
Helper functions for main_tests, create output csv
Browse files Browse the repository at this point in the history
  • Loading branch information
e-aakash committed May 16, 2021
1 parent c235f11 commit cb4dfa2
Show file tree
Hide file tree
Showing 5 changed files with 263 additions and 9 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,3 @@ build/*
*.cubin
*.fatbin
tests/*_tests
data/*
138 changes: 138 additions & 0 deletions src/fhp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -445,3 +445,141 @@ initialize_grid(u8* device_grid, u8* device_obstacle, double* probability,
return;
}

__global__
void
evolve_non_local(u8* device_grid, curandState_t* randstate, int width, int height, int timesteps,
double* device_channels, double* mx, double *my, double* ocpy,
double* amx, double* amy, double* aoc)
{
__shared__ u8 sdm[default_bh+2][default_bw+2];
__shared__ float spx;
__shared__ float spy;
__shared__ float soc;

const auto local_row = threadIdx.y+1;
const auto local_col = threadIdx.x+1;
const auto row = blockIdx.y * blockDim.y + threadIdx.y;
const auto col = blockIdx.x * blockDim.x + threadIdx.x;
if (row >= height || col >= width)
return;
curandState_t localstate = randstate[row*width + col];
__syncthreads();

double px = 0, py = 0;
double ocp = 0;
__syncthreads();


for (size_t t = 0; t < timesteps; t++)
{

// 1. load into shared memory
sdm[local_row][local_col] = device_grid[row*width + col];
// As such all these bool values can be stored in the same
// register. Is nvcc smart enough to do this if the number
// of registers at hand are low?
const auto ubound_y {local_row == blockDim.y-1};
const auto ubound_x {local_col == blockDim.x-1};
const auto lbound_y {local_row == 1};
const auto lbound_x {local_col == 1};
size_t pad_row = row, pad_col = col;
size_t local_pad_row = local_row, local_pad_col = local_col;

// printf("row %d col %d: lr %d lbx %d\n", row, col, local_row, lbound_x);
// NSight is going to roast tf out of this kernel
if (lbound_y)
{
pad_row = (row == 0) ? height-1 : row-1;
local_pad_row = 0;
sdm[local_pad_row][local_col] = device_grid[pad_row * width + col];
} else if (ubound_y)
{
pad_row = (row+1) % height;
local_pad_row = blockDim.y+1;
sdm[blockDim.y+1][local_col] = device_grid[pad_row * width + col];
}
// __syncthreads();
if (lbound_x)
{
pad_col = (col == 0) ? width-1 : col-1;
local_pad_col = 0;
sdm[local_row][local_pad_col] = device_grid[row * width + pad_col];
} else if (ubound_x)
{
pad_col = (col+1) % width;
local_pad_col = blockDim.x+1;
sdm[local_row][local_pad_col] = device_grid[row * width + pad_col];
}
// __syncthreads();

if (pad_row != row && pad_col != col)
{
sdm[local_pad_row][local_pad_col] = device_grid[pad_row*width + pad_col];
}
__syncthreads();

// 2. Streaming
u8 state = stream<u8, 6, 8, 8>(local_row, local_col, sdm);
// if (row == 0 && col == 2){
// printf("row %d, col %d: state: %d\n", row, col, state);
// printf("%d %d %d\n", sdm[local_row][local_col-1], sdm[local_row+1][local_col], sdm[local_row+1][local_col+1]);
// printf("%d %d %d\n", sdm[local_row][local_col+1], sdm[local_row-1][local_col], sdm[local_row-1][local_col-1]);
// }

// printf("[%d, %d]: state %d\n", row, col, state);
__syncthreads();
// 3. Collision
u8 size = d_eq_class_size[state];
u8 base_index = d_state_to_eq[state];

// This is from [0,...,size-1]
float rand = curand_uniform(&localstate);
rand *= size-0.00001;
u8 random_index = (u8)(rand) % size; // Require curand_init for each thread

state = d_eq_classes[base_index + random_index];

device_grid[row*width + col] = state;
// printf("row %d, col %d: collide: %d\n", row, col, state);

// Add to momentum and occupancy matrices
px += momentum_x<u8, 6>(state, device_channels);
py += momentum_y<u8, 6>(state, device_channels);
ocp += occupancy<u8, 6>(state);

atomicAdd(&spx, px);
atomicAdd(&spy, py);
atomicAdd(&soc, ocp);
__syncthreads();
if (local_row == 1 && local_col == 1)
{
spx /= 64;
spy /= 64;
soc/= 64;
}
}
__syncthreads();

mx[row*width + col] = px / timesteps;
my[row*width + col] = py / timesteps;

ocpy[row*width+col] = ocp / timesteps;

if (local_col == 1 && local_row == 1){
const auto br = blockIdx.y;
const auto bc = blockIdx.x;
const auto addr = br*gridDim.x+bc;
// if (br==1)
// printf("[%d %d]\tbr: %d, bc: %d bdx: %d bdy %d index: %d px: %f\n",
// row, col, br, bc, gridDim.x, gridDim.x, addr, spx / timesteps);

amx[addr] = spx / timesteps;
amy[addr] = spy / timesteps;
aoc[addr] = soc / timesteps;
}
// if (row == height-1 && col == width-1){
// printf("[%d %d] br: %d, bc: %d px: %f\n", row, col, blockIdx.y, blockIdx.x, spx / timesteps);
// }
return;
}

19 changes: 19 additions & 0 deletions src/fhp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,19 @@ struct fhp_grid
}
stream << "\n";
}
void
create_csv(std::ofstream &stream, double *buf, const int& w, const int& h)
{
for(int i=0; i<h; i++)
{
int j;
for(j=0; j<w-1; j++)
stream << buf[i*w + j] << ", " ;
stream << buf[i*w + j];
stream << "\n";
}
stream << "\n";
}

};

Expand Down Expand Up @@ -275,3 +288,9 @@ __global__
void
initialize_grid(u8* device_grid, u8* device_obstacle, double* probability,
curandState_t *randstate, int width);

__global__
void
evolve_non_local(u8* device_grid, curandState_t* randstate, int width, int height, int timesteps,
double* device_channels, double* mx, double *my, double* ocpy,
double* amx, double* amy, double* aoc);
6 changes: 3 additions & 3 deletions src/obstacle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ init_cyl_obst_kernel(word *buffer, size_t width, size_t height,
const auto c_dist = lattice_vector_length_d(d_1, d_2, u1_x, u1_y, u2_x, u2_y);
buffer[row*width + col] = (c_dist <= radius ? 1 : 0);

// if (row == 0 || row == height-1){
// buffer[row*width + col] = 1;
// }
if (row == 0 || row == height-1){
buffer[row*width + col] = 1;
}
}

return;
Expand Down
108 changes: 103 additions & 5 deletions tests/main_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,57 @@

constexpr std::array<double, 2> base_velocity_vec { 1.0, 0.0 };

void
aligned_rect(int width, int height, u8* buffer)
{
const auto centre_x = width / 2;
const auto centre_y = height / 2;

for(int i=0; i<height; i++)
{
for(int j=0; j<width; j++)
buffer[i*width+j] = 0;
}

for(int i= centre_y - 100; i<centre_y+100; i++)
{
for(int j=centre_x-5; j<centre_x+5; j++)
buffer[i*width+j] = 1;
}
return;
}

void
true_rect(int width, int height, u8* buffer)
{
const auto centre_x = width / 2;
const auto centre_y = height / 2;

int rh = 63, rw = 9;

for(int i=0; i<height; i++)
{
for(int j=0; j<width; j++)
buffer[i*width+j] = 0;
}

int i = centre_y+rh/2, j = centre_x;
int base = centre_x;
for(int x=0; x<rh/2; x++){
for(j = -rw/2; j<= +rw/2; j++)
buffer[i*width + base + j] = 1;
i = i-1;
for(j = -rw/2; j< +rw/2; j++)
buffer[i*width + base + j] = 1;
i = i-1;
base = base-1;
}
for(j = -rw/2; j<= +rw/2; j++)
{
buffer[i*width + base + j] = 1;
}
return;
}

int main(int argc, char *argv[])
{
Expand Down Expand Up @@ -38,7 +89,7 @@ int main(int argc, char *argv[])
const auto base_v = velocity2(base_velocity_vec);
const auto ch = generate_fhp1_velocities(base_v);

long seed = 1000;
long seed = 1024;

// buffer for storing obstacle information
u8 *buffer = new u8 [width * height];
Expand All @@ -47,10 +98,13 @@ int main(int argc, char *argv[])
double *my = new double[width*height];

// generating an obstacle
initialize_cylindrical_obstacle<u8>(buffer, width, height, centre_x, centre_y, radius);
// initialize_cylindrical_obstacle<u8>(buffer, width, height, centre_x, centre_y, radius);
true_rect(width, height, buffer);
// aligned_rect(width, height, buffer);


// channel-wise occupancy probabilities for initialization
double h_prob[] = { 0.8, 0.7, 0.4, 0.1, 0.4, 0.7 };
double h_prob[] = { 0.7, 0.5, 0.2, 0.1, 0.2, 0.5 };

const dim3 block_config(8, 8);
const dim3 grid_config = make_tiles(block_config, width, height);
Expand All @@ -75,9 +129,15 @@ int main(int argc, char *argv[])
fhp.create_csv(ipy, my);
fhp.create_csv(iocpy, occup);

double *avx, *avy, *aoc;
cudaMalloc((void **) &avx, grid_config.x*grid_config.y*sizeof(double));
cudaMalloc((void **) &avy, grid_config.x*grid_config.y*sizeof(double));
cudaMalloc((void **) &aoc, grid_config.x*grid_config.y*sizeof(double));

// time evolution
evolve<<<grid_config, block_config>>>(fhp.device_grid, fhp.state, fhp.width, fhp.height,
timesteps, fhp.device_channels, fhp.mx, fhp.my, fhp.ocpy);
evolve_non_local<<<grid_config, block_config>>>(fhp.device_grid, fhp.state, fhp.width,
fhp.height, timesteps, fhp.device_channels, fhp.mx, fhp.my, fhp.ocpy,
avx, avy, aoc);
cudaDeviceSynchronize();
gpuErrchk(cudaGetLastError());

Expand All @@ -94,7 +154,45 @@ int main(int argc, char *argv[])
fhp.create_csv(py, my);
fhp.create_csv(ocpy, occup);


std::ofstream apx("data/avx.csv"), apy("data/avy.csv"), aocp("data/aocpy.csv");

double* hpx = new double[grid_config.x*grid_config.y];
double* hpy = new double[grid_config.x*grid_config.y];
double* hoc = new double[grid_config.x*grid_config.y];

cudaMemcpy(hpx, avx, grid_config.x*grid_config.y*sizeof(double),
cudaMemcpyDeviceToHost);
cudaMemcpy(hpy, avy, grid_config.x*grid_config.y*sizeof(double),
cudaMemcpyDeviceToHost);
cudaMemcpy(hoc, aoc, grid_config.x*grid_config.y*sizeof(double),
cudaMemcpyDeviceToHost);

// for(int i=0; i< grid_config.x*grid_config.y; i++)
// printf("%lf ", hpx[i]);
// printf("\n");
fhp.create_csv(apx, hpx, grid_config.x, grid_config.y);
fhp.create_csv(apy, hpy, grid_config.x, grid_config.y);
fhp.create_csv(aocp, hoc, grid_config.x, grid_config.y);

// TODO print output
momentum<<<grid_config, block_config>>>(fhp.device_grid, fhp.device_channels,
fhp.mx, fhp.my, fhp.ocpy, fhp.width);
cudaDeviceSynchronize();
gpuErrchk(cudaGetLastError( ));

std::ofstream fpx("data/fpx.csv"), fpy("data/fpy.csv"), focpy("data/foccupancy.csv");

fhp.get_output(buffer, mx, my, occup);

fhp.create_csv(fpx, mx);
fhp.create_csv(fpy, my);
fhp.create_csv(focpy, occup);

cudaFree(avx);
cudaFree(avy);
cudaFree(aoc);
delete[] hpx; delete[] hpy; delete[] hoc;
delete[] occup;
delete[] buffer;
return 0;
Expand Down

0 comments on commit cb4dfa2

Please sign in to comment.