MFlowCode · sbryngelson · Oct 21, 2025 · Sep 8, 2025 · Sep 8, 2025 · Sep 8, 2025
@@ -1,6 +1,6 @@
 #:def Hardcoded3DVariables()
     ! Place any declaration of intermediate variables here
-    real(wp) :: rhoH, rhoL, pRef, pInt, h, lam, wl, amp, intH, alph
+    real(wp) :: rhoH, rhoL, pRef, pInt, h, lam, wl, amp, intH, alph, Mach
 
     real(wp) :: eps
 
@@ -94,10 +94,11 @@
         ! This is patch is hard-coded for test suite optimization used in the
         ! 3D_TaylorGreenVortex case:
         ! This analytic patch used geometry 9
+        Mach = 0.1
         if (patch_id == 1) then
-            q_prim_vf(E_idx)%sf(i, j, k) = 101325 + (1*37.6636429464809**2/16)*(cos(2*x_cc(i)/1) + cos(2*y_cc(j)/1))*(cos(2*z_cc(k)/1) + 2)
-            q_prim_vf(momxb + 0)%sf(i, j, k) = 37.6636429464809*sin(x_cc(i)/1)*cos(y_cc(j)/1)*sin(z_cc(k)/1)
-            q_prim_vf(momxb + 1)%sf(i, j, k) = -37.6636429464809*cos(x_cc(i)/1)*sin(y_cc(j)/1)*sin(z_cc(k)/1)
+            q_prim_vf(E_idx)%sf(i, j, k) = 101325 + (Mach**2*376.636429464809**2/16)*(cos(2*x_cc(i)/1) + cos(2*y_cc(j)/1))*(cos(2*z_cc(k)/1) + 2)
+            q_prim_vf(momxb + 0)%sf(i, j, k) = Mach*376.636429464809*sin(x_cc(i)/1)*cos(y_cc(j)/1)*sin(z_cc(k)/1)
+            q_prim_vf(momxb + 1)%sf(i, j, k) = -Mach*376.636429464809*cos(x_cc(i)/1)*sin(y_cc(j)/1)*sin(z_cc(k)/1)
         end if
 
     case default

@@ -1174,117 +1174,161 @@ contains
         if (n > 0) then
 
             if (p > 0) then
+                if (fft_wrt) then
 
-                if (cyl_coord .and. p > 0) then
-                    ! Implement pencil processor blocking if using cylindrical coordinates so
-                    ! that all cells in azimuthal direction are stored on a single processor.
-                    ! This is necessary for efficient application of Fourier filter near axis.
-
-                    ! Initial values of the processor factorization optimization
+                    ! Initial estimate of optimal processor topology
                     num_procs_x = 1
-                    num_procs_y = num_procs
-                    num_procs_z = 1
+                    num_procs_y = 1
+                    num_procs_z = num_procs
                     ierr = -1
 
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
+                    ! Benchmarking the quality of this initial guess
                     tmp_num_procs_y = num_procs_y
                     tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y)
+                    fct_min = 10._wp*abs((n + 1)/tmp_num_procs_y &
+                                         - (p + 1)/tmp_num_procs_z)
 
-                    ! Searching for optimal computational domain distribution
+                    ! Optimization of the initial processor topology
                     do i = 1, num_procs
 
                         if (mod(num_procs, i) == 0 &
                             .and. &
-                            (m + 1)/i >= num_stcls_min*recon_order) then
+                            (n + 1)/i >= num_stcls_min*recon_order) then
 
-                            tmp_num_procs_x = i
-                            tmp_num_procs_y = num_procs/i
+                            tmp_num_procs_y = i
+                            tmp_num_procs_z = num_procs/i
 
-                            if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                               - (n + 1)/tmp_num_procs_y) &
+                            if (fct_min >= abs((n + 1)/tmp_num_procs_y &
+                                               - (p + 1)/tmp_num_procs_z) &
                                 .and. &
-                                (n + 1)/tmp_num_procs_y &
+                                (p + 1)/tmp_num_procs_z &
                                 >= &
                                 num_stcls_min*recon_order) then
 
-                                num_procs_x = i
-                                num_procs_y = num_procs/i
-                                fct_min = abs((m + 1)/tmp_num_procs_x &
-                                              - (n + 1)/tmp_num_procs_y)
+                                num_procs_y = i
+                                num_procs_z = num_procs/i
+                                fct_min = abs((n + 1)/tmp_num_procs_y &
+                                              - (p + 1)/tmp_num_procs_z)
                                 ierr = 0
 
                             end if
 
                         end if
 
                     end do
-
                 else
 
-                    ! Initial estimate of optimal processor topology
-                    num_procs_x = 1
-                    num_procs_y = 1
-                    num_procs_z = num_procs
-                    ierr = -1
+                    if (cyl_coord .and. p > 0) then
+                        ! Implement pencil processor blocking if using cylindrical coordinates so
+                        ! that all cells in azimuthal direction are stored on a single processor.
+                        ! This is necessary for efficient application of Fourier filter near axis.
 
-                    ! Benchmarking the quality of this initial guess
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y) &
-                              + 10._wp*abs((n + 1)/tmp_num_procs_y &
-                                           - (p + 1)/tmp_num_procs_z)
+                        ! Initial values of the processor factorization optimization
+                        num_procs_x = 1
+                        num_procs_y = num_procs
+                        num_procs_z = 1
+                        ierr = -1
 
-                    ! Optimization of the initial processor topology
-                    do i = 1, num_procs
+                        ! Computing minimization variable for these initial values
+                        tmp_num_procs_x = num_procs_x
+                        tmp_num_procs_y = num_procs_y
+                        tmp_num_procs_z = num_procs_z
+                        fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
+                                             - (n + 1)/tmp_num_procs_y)
 
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*recon_order) then
+                        ! Searching for optimal computational domain distribution
+                        do i = 1, num_procs
+
+                            if (mod(num_procs, i) == 0 &
+                                .and. &
+                                (m + 1)/i >= num_stcls_min*recon_order) then
 
-                            do j = 1, num_procs/i
+                                tmp_num_procs_x = i
+                                tmp_num_procs_y = num_procs/i
 
-                                if (mod(num_procs/i, j) == 0 &
+                                if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+                                                   - (n + 1)/tmp_num_procs_y) &
                                     .and. &
-                                    (n + 1)/j >= num_stcls_min*recon_order) then
+                                    (n + 1)/tmp_num_procs_y &
+                                    >= &
+                                    num_stcls_min*recon_order) then
 
-                                    tmp_num_procs_x = i
-                                    tmp_num_procs_y = j
-                                    tmp_num_procs_z = num_procs/(i*j)
+                                    num_procs_x = i
+                                    num_procs_y = num_procs/i
+                                    fct_min = abs((m + 1)/tmp_num_procs_x &
+                                                  - (n + 1)/tmp_num_procs_y)
+                                    ierr = 0
 
-                                    if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                                       - (n + 1)/tmp_num_procs_y) &
-                                        + abs((n + 1)/tmp_num_procs_y &
-                                              - (p + 1)/tmp_num_procs_z) &
+                                end if
+
+                            end if
+
+                        end do
+
+                    else
+
+                        ! Initial estimate of optimal processor topology
+                        num_procs_x = 1
+                        num_procs_y = 1
+                        num_procs_z = num_procs
+                        ierr = -1
+
+                        ! Benchmarking the quality of this initial guess
+                        tmp_num_procs_x = num_procs_x
+                        tmp_num_procs_y = num_procs_y
+                        tmp_num_procs_z = num_procs_z
+                        fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
+                                             - (n + 1)/tmp_num_procs_y) &
+                                  + 10._wp*abs((n + 1)/tmp_num_procs_y &
+                                               - (p + 1)/tmp_num_procs_z)
+
+                        ! Optimization of the initial processor topology
+                        do i = 1, num_procs
+
+                            if (mod(num_procs, i) == 0 &
+                                .and. &
+                                (m + 1)/i >= num_stcls_min*recon_order) then
+
+                                do j = 1, num_procs/i
+
+                                    if (mod(num_procs/i, j) == 0 &
                                         .and. &
-                                        (p + 1)/tmp_num_procs_z &
-                                        >= &
-                                        num_stcls_min*recon_order) &
-                                        then
-
-                                        num_procs_x = i
-                                        num_procs_y = j
-                                        num_procs_z = num_procs/(i*j)
-                                        fct_min = abs((m + 1)/tmp_num_procs_x &
-                                                      - (n + 1)/tmp_num_procs_y) &
-                                                  + abs((n + 1)/tmp_num_procs_y &
-                                                        - (p + 1)/tmp_num_procs_z)
-                                        ierr = 0
+                                        (n + 1)/j >= num_stcls_min*recon_order) then
+
+                                        tmp_num_procs_x = i
+                                        tmp_num_procs_y = j
+                                        tmp_num_procs_z = num_procs/(i*j)
+
+                                        if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+                                                           - (n + 1)/tmp_num_procs_y) &
+                                            + abs((n + 1)/tmp_num_procs_y &
+                                                  - (p + 1)/tmp_num_procs_z) &
+                                            .and. &
+                                            (p + 1)/tmp_num_procs_z &
+                                            >= &
+                                            num_stcls_min*recon_order) &
+                                            then
+
+                                            num_procs_x = i
+                                            num_procs_y = j
+                                            num_procs_z = num_procs/(i*j)
+                                            fct_min = abs((m + 1)/tmp_num_procs_x &
+                                                          - (n + 1)/tmp_num_procs_y) &
+                                                      + abs((n + 1)/tmp_num_procs_y &
+                                                            - (p + 1)/tmp_num_procs_z)
+                                            ierr = 0
+
+                                        end if
 
                                     end if
 
-                                end if
-
-                            end do
+                                end do
 
-                        end if
+                            end if
 
-                    end do
+                        end do
 
+                    end if
                 end if
 
                 ! Verifying that a valid decomposition of the computational

@@ -17,7 +17,7 @@ module m_checker
 
     implicit none
 
-    private; public :: s_check_inputs
+    private; public :: s_check_inputs, s_check_inputs_fft
 
 contains
 
@@ -111,6 +111,22 @@ contains
         @:PROHIBIT(any(omega_wrt) .and. fd_order == dflt_int, "fd_order must be set for omega_wrt")
     end subroutine s_check_inputs_vorticity
 
+    !> Checks constraints on fft_wrt
+    impure subroutine s_check_inputs_fft
+        integer :: num_procs_y, num_procs_z
+
+        @:PROHIBIT(fft_wrt .and. (n == 0 .or. p == 0), "FFT WRT only in 3D")
+        @:PROHIBIT(fft_wrt .and. cyl_coord, "FFT WRT incompatible with cylindrical coordinates")
+        @:PROHIBIT(fft_wrt .and. (MOD(m_glb+1,2) == 1 .or. MOD(n_glb+1,2) == 1 .or. MOD(p_glb+1,2) == 1), "FFT WRT requires global dimensions divisible by 2")
+        @:PROHIBIT(fft_wrt .and. MOD(n_glb+1,n+1) /= 0, "FFT WRT requires n_glb to be divisible by num_procs_y")
+        @:PROHIBIT(fft_wrt .and. MOD(p_glb+1,p+1) /= 0, "FFT WRT requires p_glb to be divisible by num_procs_z")
+        num_procs_y = (n_glb + 1)/(n + 1)
+        num_procs_z = (p_glb + 1)/(p + 1)
+        @:PROHIBIT(fft_wrt .and. MOD(m_glb+1,num_procs_y) /= 0, "FFT WRT requires m_glb to be divisible by num_procs_y")
+        @:PROHIBIT(fft_wrt .and. MOD(n_glb+1,num_procs_z) /= 0, "FFT WRT requires n_glb to be divisible by num_procs_z")
+        @:PROHIBIT(fft_wrt .and. (bc_x%beg < -1 .or. bc_y%beg < -1 .or. bc_z%beg < -1 .or. bc_x%end < -1 .or. bc_y%end < -1 .or. bc_z%end < -1), "FFT WRT requires periodic BCs")
+    end subroutine s_check_inputs_fft
+
     !> Checks constraints on Q-criterion parameters
     impure subroutine s_check_inputs_qm
         @:PROHIBIT(n == 0 .and. qm_wrt)

@@ -240,6 +240,7 @@ module m_global_parameters
     integer :: flux_lim
     logical, dimension(3) :: flux_wrt
     logical :: E_wrt
+    logical :: fft_wrt
     logical :: pres_wrt
     logical, dimension(num_fluids_max) :: alpha_wrt
     logical :: gamma_wrt
@@ -441,6 +442,7 @@ contains
         parallel_io = .false.
         file_per_process = .false.
         E_wrt = .false.
+        fft_wrt = .false.
         pres_wrt = .false.
         alpha_wrt = .false.
         gamma_wrt = .false.

@@ -105,7 +105,7 @@ contains
             & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt',          &
             & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange',        &
             & 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io',   &
-            & 'down_sample' ]
+            & 'down_sample','fft_wrt' ]
             call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         #:endfor