Skip to content

Commit

Permalink
Add time averaging in evolve kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
e-aakash committed May 15, 2021
1 parent b46f882 commit 55143a8
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 17 deletions.
23 changes: 21 additions & 2 deletions src/fhp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ setup_kernel(curandState *state, size_t width, size_t height, long seed)

__global__
void
evolve(u8* device_grid, curandState* randstate, int width, int height, int timesteps)
evolve(u8* device_grid, curandState* randstate, int width, int height, int timesteps,
double* device_channels, double* mx, double *my, double* ocpy)
{
__shared__ u8 sdm[default_bh+2][default_bw+2];
const auto local_row = threadIdx.y+1;
Expand All @@ -129,6 +130,13 @@ evolve(u8* device_grid, curandState* randstate, int width, int height, int times
curandState localstate = randstate[row*width + col];
__syncthreads();

mx[row*width + col] = 0;
my[row*width + col] = 0;

ocpy[row*width+col] = 0;
__syncthreads();


for (size_t t = 0; t < timesteps; t++)
{

Expand Down Expand Up @@ -198,7 +206,18 @@ evolve(u8* device_grid, curandState* randstate, int width, int height, int times

device_grid[row*width + col] = state;
// printf("row %d, col %d: collide: %d\n", row, col, device_grid[row*width + col]);

// Add to momentum and occupancy matrices
mx[row*width + col] = mx[row*width + col] + momentum_x<u8, 6>(state, device_channels);
my[row*width + col] = my[row*width + col] + momentum_y<u8, 6>(state, device_channels);

ocpy[row*width+col] = ocpy[row*width+col] + occupancy<u8, 6>(state);
}

mx[row*width + col] = mx[row*width + col] / timesteps;
my[row*width + col] = my[row*width + col] / timesteps;

ocpy[row*width+col] = ocpy[row*width+col] / timesteps;
return;
}

Expand Down Expand Up @@ -347,7 +366,7 @@ setup_constants(fhp1_grid *grid)

__global__
void
momentum(u8* device_grid, double* device_channels, double* mx, double *my, u8* ocpy, int width)
momentum(u8* device_grid, double* device_channels, double* mx, double *my, double* ocpy, int width)
{
const auto row = blockIdx.y * blockDim.y + threadIdx.y;
const auto col = blockIdx.x * blockDim.x + threadIdx.x;
Expand Down
11 changes: 6 additions & 5 deletions src/fhp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ struct fhp_grid
double *device_channels;
curandState *state;
double *mx, *my;
word* ocpy;
double* ocpy;
double *probability = nullptr;

const size_t width, height;
Expand Down Expand Up @@ -120,7 +120,7 @@ struct fhp_grid

cudaMalloc((void **) &device_channels, channel_mem_sz);
cudaMalloc((void **) &device_grid, mem_sz);
cudaMalloc((void **) &ocpy, mem_sz);
cudaMalloc((void **) &ocpy, grid_sz*sizeof(double));
cudaMalloc((void **) &mx, grid_sz*sizeof(double));
cudaMalloc((void **) &my, grid_sz*sizeof(double));
cudaMalloc((void **) &state, width*height*sizeof(curandState));
Expand Down Expand Up @@ -183,7 +183,7 @@ struct fhp_grid
number_of_particles(word n);

void
get_output(word* output, double *p_x, double* p_y, word* o)
get_output(word* output, double *p_x, double* p_y, double* o)
{
assert(output != NULL);
assert(p_x != NULL);
Expand Down Expand Up @@ -256,11 +256,12 @@ auto momentum_y(word state, double *device_channels)->double;
// kernels
__global__
void
evolve(u8* device_grid, curandState* randstate, int width, int height, int timesteps);
evolve(u8* device_grid, curandState* randstate, int width, int height, int timesteps,
double* device_channels, double* mx, double *my, double* ocpy);

__global__
void
momentum(u8* device_grid, double* device_channels, double* mx, double *my, u8* ocpy, int width);
momentum(u8* device_grid, double* device_channels, double* mx, double *my, double* ocpy, int width);

__global__
void
Expand Down
9 changes: 6 additions & 3 deletions tests/fhp_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ const char *test_fhp_1step()

dim3 block(8, 8);
dim3 grid(width/8, height/8);
evolve<<<grid, block>>>(fhp.device_grid, fhp.state, fhp.width, fhp.height, 1);
evolve<<<grid, block>>>(fhp.device_grid, fhp.state, fhp.width, fhp.height, 1,
fhp.device_channels, fhp.mx, fhp.my, fhp.ocpy);
cudaDeviceSynchronize();
gpuErrchk(cudaGetLastError( ));

Expand Down Expand Up @@ -117,7 +118,8 @@ const char *fhp_all1()

dim3 block(8, 8);
dim3 grid(width/8, height/8);
evolve<<<grid, block>>>(fhp.device_grid, fhp.state, fhp.width, fhp.height, 3);
evolve<<<grid, block>>>(fhp.device_grid, fhp.state, fhp.width, fhp.height, 3,
fhp.device_channels, fhp.mx, fhp.my, fhp.ocpy);
momentum<<<grid, block>>>(fhp.device_grid, fhp.device_channels,
fhp.mx, fhp.my, fhp.ocpy, fhp.width);

Expand Down Expand Up @@ -202,7 +204,8 @@ const char *fhp_all3()

dim3 block(8, 8);
dim3 grid(width/8, height/8);
evolve<<<grid, block>>>(fhp.device_grid, fhp.state, fhp.width, fhp.height, 1000);
evolve<<<grid, block>>>(fhp.device_grid, fhp.state, fhp.width, fhp.height, 1000,
fhp.device_channels, fhp.mx, fhp.my, fhp.ocpy);
momentum<<<grid, block>>>(fhp.device_grid, fhp.device_channels,
fhp.mx, fhp.my, fhp.ocpy, fhp.width);

Expand Down
10 changes: 3 additions & 7 deletions tests/main_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,16 @@ int main(int argc, char *argv[])
// initializing grid
fhp1_grid fhp(width, height, ch, seed, h_prob, buffer);
// time evolution
evolve<<<grid_config, block_config>>>(fhp.device_grid, fhp.state, fhp.width, fhp.height, 1000);
cudaDeviceSynchronize();
gpuErrchk(cudaGetLastError());

momentum<<<grid_config, block_config>>>(fhp.device_grid, fhp.device_channels,
fhp.mx, fhp.my, fhp.ocpy, fhp.width);
evolve<<<grid_config, block_config>>>(fhp.device_grid, fhp.state, fhp.width, fhp.height, 1000,
fhp.device_channels, fhp.mx, fhp.my, fhp.ocpy);
cudaDeviceSynchronize();
gpuErrchk(cudaGetLastError());

// copying back to buffer
cudaMemcpy(buffer, fhp.device_grid, grid_sz * sizeof(u8), cudaMemcpyDeviceToHost);
gpuErrchk(cudaGetLastError());

u8 *occup = new u8[width*height];
double *occup = new double[width*height];
double *mx = new double[width*height];
double *my = new double[width*height];

Expand Down

0 comments on commit 55143a8

Please sign in to comment.