Skip to content

Commit

Permalink
mop_seed_latlonz
Browse files Browse the repository at this point in the history
  • Loading branch information
hguo committed Jan 30, 2024
1 parent 63971cf commit cf3651e
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 59 deletions.
4 changes: 4 additions & 0 deletions include/ftk/basic/kd_lite.hh
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,11 @@ inline I kdlite_nearest(I n, const F *X, const I *heap, const F *x)
const I i = S[--top]; // pop stack

const I xid = heap[i];
#ifdef __CUDACC__
const I depth = 31 - __clz(i+1); // see https://forums.developer.nvidia.com/t/integer-logarithm-to-the-base-2-how/3722/6
#else
const I depth = std::log2(i+1);
#endif
const I axis = depth % nd;
I next, other;

Expand Down
116 changes: 71 additions & 45 deletions include/ftk/filters/particle_tracer_mpas_ocean.hh
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ protected:
#if FTK_HAVE_CUDA
mop_ctx_t *ctx = NULL;
#endif
void load_particles_cuda();
void load_particles_cuda(); // this function is no longer needed now that all seeding are done on gpu
};

////
Expand All @@ -91,49 +91,77 @@ inline void particle_tracer_mpas_ocean::initialize_particles_latlonz(
fprintf(stderr, "initializing geo particles: nlat=%d, nlon=%d, nz=%d, lat0=%f, lat1=%f, lon0=%f, lon1=%f, z0=%f, z1=%f\n",
nlat, nlon, nz, lat0, lat1, lon0, lon1, z0, z1);

const double dz = nz == 1 ? 0.0 : (z1 - z0) / (nz - 1),
dlat = nlat == 1 ? 0.0 : (lat1 - lat0) / (nlat - 1),
dlon = nlon == 1 ? 0.0 : (lon1 - lon0) / (nlon - 1);
int hint = 0; // TODO: hint should be thread-local if openmp is used

#pragma omp parallel for collapse(3)
for (int i = 0; i < nlat; i ++) {
for (int j = 0; j < nlon; j ++) {
for (int k = 0; k < nz; k ++) {
const double lat = deg2rad(i * dlat + lat0);
const double slat = std::sin(lat),
clat = std::cos(lat);

const double lon = deg2rad(j * dlon + lon0);
const double clon = std::cos(lon),
slon = std::sin(lon);

const double z = k * dz + z0;
const double r = earth_radius + z;

feature_curve_t curve;
curve.id = i*nlat*nz + j*nz + k;

feature_point_t p;
p.x[0] = r * clon * clat;
p.x[1] = r * slon * clat;
p.x[2] = r * slat;

p.id = curve.id;
const int ci = m->locate_cell_i({p.x[0], p.x[1], p.x[2]}, hint);
p.tag = m->i2cid(ci);

if (xl == FTK_XL_CUDA) {
#if FTK_HAVE_CUDA
mop_seed_latlonz(ctx,
nlat, lat0, lat1,
nlon, lon0, lon1,
nz, z0, z1);

for (int i = 0; i < ctx->nparticles; i ++) {
feature_point_lite_t &p = ctx->hparts[i];
feature_point_t pt(p);

pt.v[0] = p.scalar[0];
pt.v[1] = p.scalar[1];
pt.v[2] = p.scalar[2];
pt.scalar[0] = p.scalar[3]; // vertVel
pt.scalar[1] = p.scalar[4]; // salinity
pt.scalar[2] = p.scalar[5]; // temperature
pt.id = i;

feature_curve_t curve;
curve.id = i;
curve.emplace_back(p);

trajectories.add(curve);
}
#endif
} else {
const double dz = nz == 1 ? 0.0 : (z1 - z0) / (nz - 1),
dlat = nlat == 1 ? 0.0 : (lat1 - lat0) / (nlat - 1),
dlon = nlon == 1 ? 0.0 : (lon1 - lon0) / (nlon - 1);
int hint = 0; // TODO: hint should be thread-local if openmp is used

#pragma omp parallel for collapse(3)
for (int i = 0; i < nlat; i ++) {
for (int j = 0; j < nlon; j ++) {
for (int k = 0; k < nz; k ++) {
const double lat = deg2rad(i * dlat + lat0);
const double slat = std::sin(lat),
clat = std::cos(lat);

const double lon = deg2rad(j * dlon + lon0);
const double clon = std::cos(lon),
slon = std::sin(lon);

const double z = k * dz + z0;
const double r = earth_radius + z;

feature_curve_t curve;
curve.id = i*nlat*nz + j*nz + k;

feature_point_t p;
p.x[0] = r * clon * clat;
p.x[1] = r * slon * clat;
p.x[2] = r * slat;

p.id = curve.id;
const int ci = m->locate_cell_i({p.x[0], p.x[1], p.x[2]}, hint);
p.tag = m->i2cid(ci);

#if 0
fprintf(stderr, "lat=%f, lon=%f, z=%f, x=%f, %f, %f, tag=%llu\n",
rad2deg(lat), rad2deg(lon), z,
p.x[0], p.x[1], p.x[2], p.tag);
fprintf(stderr, "lat=%f, lon=%f, z=%f, x=%f, %f, %f, tag=%llu\n",
rad2deg(lat), rad2deg(lon), z,
p.x[0], p.x[1], p.x[2], p.tag);
#endif

if (ci >= 0) {
#pragma omp critical
{
curve.push_back(p);
trajectories.add(curve);
if (ci >= 0) {
#pragma omp critical
{
curve.push_back(p);
trajectories.add(curve);
}
}
}
}
Expand Down Expand Up @@ -194,8 +222,8 @@ inline void particle_tracer_mpas_ocean::initialize_particles_at_grid_points(std:
fprintf(stderr, "#trajectories=%zu\n", trajectories.size());

if (xl == FTK_XL_CUDA) {
// fprintf(stderr, "loading particles to gpu...\n");
// load_particles_cuda();
fprintf(stderr, "loading particles to gpu...\n");
load_particles_cuda();
}
}

Expand Down Expand Up @@ -336,8 +364,6 @@ inline void particle_tracer_mpas_ocean::push_field_data_snapshot(std::shared_ptr

// std::cerr << m->coeffsReconstruct.shape() << std::endl;
mop_load_e2c_interpolants(ctx, m->coeffsReconstruct.data());

load_particles_cuda();
}

typedef std::chrono::high_resolution_clock clock_type;
Expand Down
1 change: 1 addition & 0 deletions include/ftk/mesh/mpas_mesh.hh
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ ndarray<F> mpas_mesh<I, F>::zTop_from_zMid_and_thickness(const ndarray<F>& zMid,
zTop.reshape(zMid);
const auto nlayers = zMid.dimf(1);

#pragma omp parallel for collapse(2)
for (auto i = 0; i < n_cells(); i ++)
for (auto j = 0; j < nlayers; j ++) {
zTop.f(0, j, i) = zMid.f(0, j, i) + layerThickness.f(0, j, i) * 0.5;
Expand Down
26 changes: 15 additions & 11 deletions src/cli/ftk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,22 +328,26 @@ void initialize_particle_tracer_mpas_ocean(diy::mpi::communicator comm)
// tracker_particle_mpas_ocean->set_nsteps_per_checkpoint(pt_nsteps_per_checkpoint);
tracker_particle_mpas_ocean->set_delta_t( pt_delta_t );

tracker_particle_mpas_ocean->initialize();
if (pt_seed_box.size() > 0)
tracker_particle_mpas_ocean->initialize_particles_latlonz(
pt_seed_strides[0], pt_seed_box[0], pt_seed_box[1],
pt_seed_strides[1], pt_seed_box[2], pt_seed_box[3],
pt_seed_strides[2], pt_seed_box[4], pt_seed_box[5]);
else
tracker_particle_mpas_ocean->initialize_particles_at_grid_points(pt_seed_strides);

for (int i = 0; i < stream->total_timesteps(); i ++) {
auto g = stream->read(i);
g->print_info(std::cerr);

tracker_particle_mpas_ocean->push_field_data_snapshot(g); // field_data);

if (i != 0) tracker_particle_mpas_ocean->advance_timestep();

if (i == 0) { // initialize here; mainly because the cuda tracer needs to know the data precision first
tracker_particle_mpas_ocean->initialize();
if (pt_seed_box.size() > 0)
tracker_particle_mpas_ocean->initialize_particles_latlonz(
pt_seed_strides[0], pt_seed_box[0], pt_seed_box[1],
pt_seed_strides[1], pt_seed_box[2], pt_seed_box[3],
pt_seed_strides[2], pt_seed_box[4], pt_seed_box[5]);
else
tracker_particle_mpas_ocean->initialize_particles_at_grid_points(pt_seed_strides);
} else { // if (i != 0)
tracker_particle_mpas_ocean->advance_timestep();
}

// the last step
if (i == stream->total_timesteps() - 1) tracker_particle_mpas_ocean->update_timestep();
}

Expand Down
18 changes: 15 additions & 3 deletions src/filters/particle_tracer_mpas.cu
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,15 @@ static int locate_cell_global( // global cell search
const int *verts_on_cell)
{
const int c = ftk::kdlite_nearest<3, int, Fm>(
ncells, Xc, kdheap, x);
ncells, Xc, kdheap, x) + 1; // to cell id

if (point_in_cell<F, Fm>(
c, x, iv, xv,
max_edges, Xv, nedges_on_cell, verts_on_cell))
max_edges, Xv, nedges_on_cell, verts_on_cell)) {

// printf("cell=%d, x=%f, %f, %f\n", c, x[0], x[1], x[2]);
return c;
}
else
return -1;
}
Expand Down Expand Up @@ -1179,7 +1182,12 @@ void mop_seed_latlonz(mop_ctx_t *c,

if (nBlocks >= maxGridDim) gridSize = dim3(idivup(nBlocks, maxGridDim), maxGridDim);
else gridSize = dim3(nBlocks);

// allocate
realloc_both(&c->hparts, &c->dparts, c->nparticles, ntasks);
c->nparticles = ntasks;

// seed
seed_latlonz<double, double><<<gridSize, blockSize>>>(
c->dparts,
nlat, lat0, lat1,
Expand All @@ -1194,6 +1202,10 @@ void mop_seed_latlonz(mop_ctx_t *c,
c->d_cells_on_cell,
c->d_verts_on_cell);

cudaMemcpy(c->hparts, c->dparts,
c->nparticles * sizeof(ftk::feature_point_lite_t),
cudaMemcpyDeviceToHost);

checkLastCudaError("[FTK-CUDA] mop_seed_latlonz");
}

Expand Down

0 comments on commit cf3651e

Please sign in to comment.