Skip to content

Commit

Permalink
Merge branch 'no_ndarray' of github.com:hguo/ftk into no_ndarray
Browse files Browse the repository at this point in the history
  • Loading branch information
Hanqi Guo committed Feb 12, 2024
2 parents a80c6b9 + 2a39497 commit 3dd54f0
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 22 deletions.
29 changes: 29 additions & 0 deletions include/ftk/numeric/rk4.hh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,35 @@ void angular_stepping(
xn[3] = xn[3] + h;
}

template <int VERT=1, typename T=double>
__device__ __host__
void angular_and_vertical_stepping( // composition of angular and vertical velocities
const T *x,
const T *v, // horizontal
const T vv, // vertical
const T h,
T *xn)
{
const T R = vector_2norm<3, T>(x); // radius

T axis[3]; // rotation axis
cross_product(x, v, axis);

const T vm = vector_2norm<3>(v); // velocity magnitude
const T w = vm / R; // angular velocity
const T dw = h * w; // angular step

axis_rotate_vector(axis, dw, x, xn);

if (VERT) {
const T R1 = R + vv * h; // new radius
for (int i = 0; i < 3; i ++)
xn[i] = xn[i] * R1 / R;
}

xn[3] = x[3] + h; // new time
}

template <typename T=double>
__device__ __host__
void angular_and_vertical_stepping( // composition of angular and vertical velocities
Expand Down
100 changes: 78 additions & 22 deletions src/filters/particle_tracer_mpas.cu
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ inline static bool mpas_eval(
nlayers, hint_c, hint_l);
}

template <int ORDER=1, typename F=double, typename Fm=double, typename Fv=double>
template <int ORDER=1, int VERT=1, typename F=double, typename Fm=double, typename Fv=double>
__device__
inline static bool spherical_rk_with_vertical_velocity(
const F h,
Expand All @@ -375,7 +375,7 @@ inline static bool spherical_rk_with_vertical_velocity(
if (ORDER == 1) {
const F alpha = (F)istep / nsteps;

F v[4];
F v[3];
if (!mpas_eval(p.x, v, vv, f, alpha,
V, Vv, Z, nattrs, A,
Xv, max_edges, nedges_on_cell, cells_on_cell, verts_on_cell,
Expand All @@ -385,17 +385,56 @@ inline static bool spherical_rk_with_vertical_velocity(
for (int k = 0; k < 3; k ++)
v0[k] = v[k];

ftk::angular_stepping(p.x, v, h, p.x);
#if 0
const F R = ftk::vector_2norm<3, F>(p.x); // radius
const F R1 = R + vv[0] * h; // new radius
for (int k = 0; k < 3; k ++)
p.x[k] = p.x[k] * R1 / R;
#endif

ftk::angular_and_vertical_stepping(p.x, v, *vv, h, p.x);
return true;
} else if (ORDER == 4) {
const F alpha = (F)istep / nsteps;
const F half = 0.5 / nsteps;

F k1[3], k2[3], k3[3], k4[3];
F k1v, k2v, k3v, k4v; // vertical

F x1[4] = {p.x[0], p.x[1], p.x[2], p.t};
if (!mpas_eval(x1, k1, &k1v, f, alpha,
V, Vv, Z, nattrs, A,
Xv, max_edges, nedges_on_cell, cells_on_cell, verts_on_cell,
nlayers, hint_c, hint_l))
return false;

F x2[4];
ftk::angular_and_vertical_stepping<VERT>(x1, k1, k1v, h/2, x2);
if (!mpas_eval(x2, k2, &k2v, f, alpha+half,
V, Vv, Z, nattrs, A,
Xv, max_edges, nedges_on_cell, cells_on_cell, verts_on_cell,
nlayers, hint_c, hint_l))
return false;

F x3[4];
ftk::angular_and_vertical_stepping<VERT>(x1, k2, k2v, h/2, x3);
if (!mpas_eval(x3, k3, &k3v, f, alpha+half,
V, Vv, Z, nattrs, A,
Xv, max_edges, nedges_on_cell, cells_on_cell, verts_on_cell,
nlayers, hint_c, hint_l))
return false;

F x4[4];
ftk::angular_and_vertical_stepping<VERT>(x1, k3, k3v, h, x4);
if (!mpas_eval(x4, k4, &k4v, f, alpha+half*2,
V, Vv, Z, nattrs, A,
Xv, max_edges, nedges_on_cell, cells_on_cell, verts_on_cell,
nlayers, hint_c, hint_l))
return false;

F w[3], wv = (k1v + 2*k2v + 2*k3v + k4v) / 6.0;
for (int i = 0; i < 3; i ++) {
w[i] = (k1[i] + 2*k2[i] * 2*k3[i] + k4[i]) / 6.0;
v0[i] = k1[i];
}

ftk::angular_and_vertical_stepping<VERT>(x1, w, wv, h, p.x);

return true;

} else
return false;

Expand Down Expand Up @@ -598,6 +637,7 @@ static void derive_z_from_zMid(
template <typename F=double, typename Fm=double>
__global__
static void seed_latlonz(
int *nparticles,
ftk::feature_point_lite_t* particles,
const int nlat, const F lat0, const F lat1,
const int nlon, const F lon0, const F lon1,
Expand Down Expand Up @@ -633,16 +673,25 @@ static void seed_latlonz(
const F z = k * dz + z0;
const F r = R0 + z;

ftk::feature_point_lite_t &p = particles[id];
p.x[0] = r * clon * clat;
p.x[1] = r * slon * clat;
p.x[2] = r * slat;
p.t = 0.0;

int iv[7];
F xv[7][3];
p.tag /* hint_c */ = locate_cell_global(p.x, iv, xv, ncells, kdheap,
const F x[3] = {
r * clon * clat,
r * slon * clat,
r * slat
};
int iv[7]; // not used
F xv[7][3]; // not used

const int c = locate_cell_global(x, iv, xv, ncells, kdheap,
Xc, Xv, max_edges, nedges_on_cell, cells_on_cell, verts_on_cell);

if (c >= 0) {
const int ii = atomicAdd(nparticles, 1);

ftk::feature_point_lite_t &p = particles[ii];
memcpy(p.x, x, sizeof(F)*3);
p.t = 0.0;
p.tag /* hint_c */ = c;
}
}

template <typename F=double, typename Fm=double, typename Fv=double>
Expand Down Expand Up @@ -685,7 +734,7 @@ static void mpas_trace(
unsigned int &hint_l = p.type;

for (int j = isubstart; j < nsubsteps; j ++) {
bool succ = spherical_rk_with_vertical_velocity<1>(
bool succ = spherical_rk_with_vertical_velocity<4, false>(
h, nsteps, j,
p, v0, vv, f,
V, Vv, Z, nattrs, A, Xv,
Expand Down Expand Up @@ -1233,10 +1282,14 @@ void mop_seed_latlonz(mop_ctx_t *c,

// allocate
realloc_both(&c->hparts, &c->dparts, c->nparticles, ntasks);
c->nparticles = ntasks;
// c->nparticles = ntasks;

int *d_nparticles;
cudaMalloc(&d_nparticles, sizeof(int));

// seed
seed_latlonz<double, double><<<gridSize, blockSize>>>(
d_nparticles,
c->dparts,
nlat, lat0, lat1,
nlon, lon0, lon1,
Expand All @@ -1249,7 +1302,10 @@ void mop_seed_latlonz(mop_ctx_t *c,
c->d_nedges_on_cell,
c->d_cells_on_cell,
c->d_verts_on_cell);


cudaMemcpy(&c->nparticles, d_nparticles, sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(d_nparticles);

cudaMemcpy(c->hparts, c->dparts,
c->nparticles * sizeof(ftk::feature_point_lite_t),
cudaMemcpyDeviceToHost);
Expand Down

0 comments on commit 3dd54f0

Please sign in to comment.