Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions src/common/include/3dHardcodedIC.fpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#:def Hardcoded3DVariables()
! Place any declaration of intermediate variables here
real(wp) :: rhoH, rhoL, pRef, pInt, h, lam, wl, amp, intH, alph
real(wp) :: rhoH, rhoL, pRef, pInt, h, lam, wl, amp, intH, alph, Mach

real(wp) :: eps

Expand Down Expand Up @@ -94,10 +94,11 @@
! This is patch is hard-coded for test suite optimization used in the
! 3D_TaylorGreenVortex case:
! This analytic patch used geometry 9
Mach = 0.1
if (patch_id == 1) then
q_prim_vf(E_idx)%sf(i, j, k) = 101325 + (1*37.6636429464809**2/16)*(cos(2*x_cc(i)/1) + cos(2*y_cc(j)/1))*(cos(2*z_cc(k)/1) + 2)
q_prim_vf(momxb + 0)%sf(i, j, k) = 37.6636429464809*sin(x_cc(i)/1)*cos(y_cc(j)/1)*sin(z_cc(k)/1)
q_prim_vf(momxb + 1)%sf(i, j, k) = -37.6636429464809*cos(x_cc(i)/1)*sin(y_cc(j)/1)*sin(z_cc(k)/1)
q_prim_vf(E_idx)%sf(i, j, k) = 101325 + (Mach**2*376.636429464809**2/16)*(cos(2*x_cc(i)/1) + cos(2*y_cc(j)/1))*(cos(2*z_cc(k)/1) + 2)
q_prim_vf(momxb + 0)%sf(i, j, k) = Mach*376.636429464809*sin(x_cc(i)/1)*cos(y_cc(j)/1)*sin(z_cc(k)/1)
q_prim_vf(momxb + 1)%sf(i, j, k) = -Mach*376.636429464809*cos(x_cc(i)/1)*sin(y_cc(j)/1)*sin(z_cc(k)/1)
end if

case default
Expand Down
184 changes: 114 additions & 70 deletions src/common/m_mpi_common.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -1174,117 +1174,161 @@ contains
if (n > 0) then

if (p > 0) then
if (fft_wrt) then

if (cyl_coord .and. p > 0) then
! Implement pencil processor blocking if using cylindrical coordinates so
! that all cells in azimuthal direction are stored on a single processor.
! This is necessary for efficient application of Fourier filter near axis.

! Initial values of the processor factorization optimization
! Initial estimate of optimal processor topology
num_procs_x = 1
num_procs_y = num_procs
num_procs_z = 1
num_procs_y = 1
num_procs_z = num_procs
ierr = -1

! Computing minimization variable for these initial values
tmp_num_procs_x = num_procs_x
! Benchmarking the quality of this initial guess
tmp_num_procs_y = num_procs_y
tmp_num_procs_z = num_procs_z
fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y)
fct_min = 10._wp*abs((n + 1)/tmp_num_procs_y &
- (p + 1)/tmp_num_procs_z)

! Searching for optimal computational domain distribution
! Optimization of the initial processor topology
do i = 1, num_procs

if (mod(num_procs, i) == 0 &
.and. &
(m + 1)/i >= num_stcls_min*recon_order) then
(n + 1)/i >= num_stcls_min*recon_order) then

tmp_num_procs_x = i
tmp_num_procs_y = num_procs/i
tmp_num_procs_y = i
tmp_num_procs_z = num_procs/i

if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y) &
if (fct_min >= abs((n + 1)/tmp_num_procs_y &
- (p + 1)/tmp_num_procs_z) &
.and. &
(n + 1)/tmp_num_procs_y &
(p + 1)/tmp_num_procs_z &
>= &
num_stcls_min*recon_order) then

num_procs_x = i
num_procs_y = num_procs/i
fct_min = abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y)
num_procs_y = i
num_procs_z = num_procs/i
fct_min = abs((n + 1)/tmp_num_procs_y &
- (p + 1)/tmp_num_procs_z)
ierr = 0

end if

end if

end do

else

! Initial estimate of optimal processor topology
num_procs_x = 1
num_procs_y = 1
num_procs_z = num_procs
ierr = -1
if (cyl_coord .and. p > 0) then
! Implement pencil processor blocking if using cylindrical coordinates so
! that all cells in azimuthal direction are stored on a single processor.
! This is necessary for efficient application of Fourier filter near axis.

! Benchmarking the quality of this initial guess
tmp_num_procs_x = num_procs_x
tmp_num_procs_y = num_procs_y
tmp_num_procs_z = num_procs_z
fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y) &
+ 10._wp*abs((n + 1)/tmp_num_procs_y &
- (p + 1)/tmp_num_procs_z)
! Initial values of the processor factorization optimization
num_procs_x = 1
num_procs_y = num_procs
num_procs_z = 1
ierr = -1

! Optimization of the initial processor topology
do i = 1, num_procs
! Computing minimization variable for these initial values
tmp_num_procs_x = num_procs_x
tmp_num_procs_y = num_procs_y
tmp_num_procs_z = num_procs_z
fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y)

if (mod(num_procs, i) == 0 &
.and. &
(m + 1)/i >= num_stcls_min*recon_order) then
! Searching for optimal computational domain distribution
do i = 1, num_procs

if (mod(num_procs, i) == 0 &
.and. &
(m + 1)/i >= num_stcls_min*recon_order) then

do j = 1, num_procs/i
tmp_num_procs_x = i
tmp_num_procs_y = num_procs/i

if (mod(num_procs/i, j) == 0 &
if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y) &
.and. &
(n + 1)/j >= num_stcls_min*recon_order) then
(n + 1)/tmp_num_procs_y &
>= &
num_stcls_min*recon_order) then

tmp_num_procs_x = i
tmp_num_procs_y = j
tmp_num_procs_z = num_procs/(i*j)
num_procs_x = i
num_procs_y = num_procs/i
fct_min = abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y)
ierr = 0

if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y) &
+ abs((n + 1)/tmp_num_procs_y &
- (p + 1)/tmp_num_procs_z) &
end if

end if

end do

else

! Initial estimate of optimal processor topology
num_procs_x = 1
num_procs_y = 1
num_procs_z = num_procs
ierr = -1

! Benchmarking the quality of this initial guess
tmp_num_procs_x = num_procs_x
tmp_num_procs_y = num_procs_y
tmp_num_procs_z = num_procs_z
fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y) &
+ 10._wp*abs((n + 1)/tmp_num_procs_y &
- (p + 1)/tmp_num_procs_z)

! Optimization of the initial processor topology
do i = 1, num_procs

if (mod(num_procs, i) == 0 &
.and. &
(m + 1)/i >= num_stcls_min*recon_order) then

do j = 1, num_procs/i

if (mod(num_procs/i, j) == 0 &
.and. &
(p + 1)/tmp_num_procs_z &
>= &
num_stcls_min*recon_order) &
then

num_procs_x = i
num_procs_y = j
num_procs_z = num_procs/(i*j)
fct_min = abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y) &
+ abs((n + 1)/tmp_num_procs_y &
- (p + 1)/tmp_num_procs_z)
ierr = 0
(n + 1)/j >= num_stcls_min*recon_order) then

tmp_num_procs_x = i
tmp_num_procs_y = j
tmp_num_procs_z = num_procs/(i*j)

if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y) &
+ abs((n + 1)/tmp_num_procs_y &
- (p + 1)/tmp_num_procs_z) &
.and. &
(p + 1)/tmp_num_procs_z &
>= &
num_stcls_min*recon_order) &
then

num_procs_x = i
num_procs_y = j
num_procs_z = num_procs/(i*j)
fct_min = abs((m + 1)/tmp_num_procs_x &
- (n + 1)/tmp_num_procs_y) &
+ abs((n + 1)/tmp_num_procs_y &
- (p + 1)/tmp_num_procs_z)
ierr = 0

end if

end if

end if

end do
end do

end if
end if

end do
end do

end if
end if

! Verifying that a valid decomposition of the computational
Expand Down
18 changes: 17 additions & 1 deletion src/post_process/m_checker.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ module m_checker

implicit none

private; public :: s_check_inputs
private; public :: s_check_inputs, s_check_inputs_fft

contains

Expand Down Expand Up @@ -111,6 +111,22 @@ contains
@:PROHIBIT(any(omega_wrt) .and. fd_order == dflt_int, "fd_order must be set for omega_wrt")
end subroutine s_check_inputs_vorticity

!> Checks constraints on fft_wrt
impure subroutine s_check_inputs_fft
integer :: num_procs_y, num_procs_z

@:PROHIBIT(fft_wrt .and. (n == 0 .or. p == 0), "FFT WRT only in 3D")
@:PROHIBIT(fft_wrt .and. cyl_coord, "FFT WRT incompatible with cylindrical coordinates")
@:PROHIBIT(fft_wrt .and. (MOD(m_glb+1,2) == 1 .or. MOD(n_glb+1,2) == 1 .or. MOD(p_glb+1,2) == 1), "FFT WRT requires global dimensions divisible by 2")
@:PROHIBIT(fft_wrt .and. MOD(n_glb+1,n+1) /= 0, "FFT WRT requires n_glb to be divisible by num_procs_y")
@:PROHIBIT(fft_wrt .and. MOD(p_glb+1,p+1) /= 0, "FFT WRT requires p_glb to be divisible by num_procs_z")
num_procs_y = (n_glb + 1)/(n + 1)
num_procs_z = (p_glb + 1)/(p + 1)
@:PROHIBIT(fft_wrt .and. MOD(m_glb+1,num_procs_y) /= 0, "FFT WRT requires m_glb to be divisible by num_procs_y")
@:PROHIBIT(fft_wrt .and. MOD(n_glb+1,num_procs_z) /= 0, "FFT WRT requires n_glb to be divisible by num_procs_z")
@:PROHIBIT(fft_wrt .and. (bc_x%beg < -1 .or. bc_y%beg < -1 .or. bc_z%beg < -1 .or. bc_x%end < -1 .or. bc_y%end < -1 .or. bc_z%end < -1), "FFT WRT requires periodic BCs")
end subroutine s_check_inputs_fft

!> Checks constraints on Q-criterion parameters
impure subroutine s_check_inputs_qm
@:PROHIBIT(n == 0 .and. qm_wrt)
Expand Down
2 changes: 2 additions & 0 deletions src/post_process/m_global_parameters.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ module m_global_parameters
integer :: flux_lim
logical, dimension(3) :: flux_wrt
logical :: E_wrt
logical :: fft_wrt
logical :: pres_wrt
logical, dimension(num_fluids_max) :: alpha_wrt
logical :: gamma_wrt
Expand Down Expand Up @@ -441,6 +442,7 @@ contains
parallel_io = .false.
file_per_process = .false.
E_wrt = .false.
fft_wrt = .false.
pres_wrt = .false.
alpha_wrt = .false.
gamma_wrt = .false.
Expand Down
2 changes: 1 addition & 1 deletion src/post_process/m_mpi_proxy.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ contains
& 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', &
& 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', &
& 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io', &
& 'down_sample' ]
& 'down_sample','fft_wrt' ]
call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
#:endfor

Expand Down
Loading
Loading