From 7b06d1a336099e4e1cb90bb0bf69de6eafc9650a Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Wed, 25 Jun 2025 16:56:06 +0200 Subject: [PATCH 1/8] part1, making omp-reproducible work again --- src/gen_support.F90 | 12 ++++++++++++ src/ice_EVP.F90 | 8 ++++++++ src/ice_fct.F90 | 28 ++++++++++++++++++++++++++++ src/ice_maEVP.F90 | 24 ++++++++++++++++++++++++ src/oce_adv_tra_fct.F90 | 6 +++++- src/oce_dyn.F90 | 20 ++++++++++++++++++++ src/oce_muscl_adv.F90 | 4 ++++ 7 files changed, 101 insertions(+), 1 deletion(-) diff --git a/src/gen_support.F90 b/src/gen_support.F90 index a7a62dfde..1bab6c7bf 100644 --- a/src/gen_support.F90 +++ b/src/gen_support.F90 @@ -374,7 +374,11 @@ subroutine integrate_nod_3D(data, int3D, partit, mesh) #include "associate_mesh_ass.h" lval=0.0_WP +#if defined(__openmp_reproducible) +!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) +#endif do row=1, myDim_nod2D lval_row = 0. do k=ulevels_nod2D(row), nlevels_nod2D(row)-1 @@ -663,7 +667,11 @@ subroutine integrate_elem_3D(data, int3D, partit, mesh) #include "associate_mesh_ass.h" lval=0.0_WP +#if defined(__openmp_reproducible) +!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) +#endif do row=1, myDim_elem2D if(elem2D_nodes(1, row) > myDim_nod2D) cycle lval_row = 0. @@ -707,7 +715,11 @@ subroutine integrate_elem_2D(data, int2D, partit, mesh) #include "associate_mesh_ass.h" lval=0.0_WP +#if defined(__openmp_reproducible) +!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row) REDUCTION(+: lval) ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row) REDUCTION(+: lval) +#endif do row=1, myDim_elem2D if(elem2D_nodes(1, row) > myDim_nod2D) cycle #if defined(__openmp_reproducible) diff --git a/src/ice_EVP.F90 b/src/ice_EVP.F90 index b646bc6da..0d409e336 100755 --- a/src/ice_EVP.F90 +++ b/src/ice_EVP.F90 @@ -231,7 +231,11 @@ subroutine stress2rhs(ice, partit, mesh) !$ACC UPDATE SELF(u_rhs_ice, v_rhs_ice, sigma11, sigma12, sigma22) #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #endif do el=1,myDim_elem2D ! ===== Skip if ice is absent @@ -709,7 +713,11 @@ subroutine EVPdynamics(ice, partit, mesh) ! apply sea ice velocity boundary condition #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else ! With the binary data of np2 goes only inside the first if !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) diff --git a/src/ice_fct.F90 b/src/ice_fct.F90 index 5e0b87093..d785d08e2 100755 --- a/src/ice_fct.F90 +++ b/src/ice_fct.F90 @@ -755,7 +755,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG VECTOR PRIVATE(elnodes) DEFAULT(PRESENT) @@ -897,7 +901,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG VECTOR PRIVATE(elnodes) DEFAULT(PRESENT) @@ -955,7 +963,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) end do #ifndef ENABLE_OPENACC !$OMP END DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC END PARALLEL LOOP #if !defined(DISABLE_OPENACC_ATOMICS) @@ -1014,7 +1026,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) end do #ifndef ENABLE_OPENACC !$OMP END DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC END PARALLEL LOOP #if !defined(DISABLE_OPENACC_ATOMICS) @@ -1074,7 +1090,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) end do #ifndef ENABLE_OPENACC !$OMP END DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC END PARALLEL LOOP #endif @@ -1161,7 +1181,11 @@ SUBROUTINE ice_mass_matrix_fill(ice, partit, mesh) ! ! a) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, k, row, elem, elnodes, q, offset, ipos, aa) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO elem=1,myDim_elem2D elnodes=elem2D_nodes(:,elem) @@ -1320,7 +1344,11 @@ subroutine ice_TG_rhs_div(ice, partit, mesh) #ifndef ENABLE_OPENACC !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(diff, entries, um, vm, vol, dx, dy, n, q, row, elem, elnodes, c1, c2, c3, c4, cx1, cx2, cx3, cx4, entries2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG VECTOR PRIVATE(elnodes, dx, dy, entries, entries2) DEFAULT(PRESENT) diff --git a/src/ice_maEVP.F90 b/src/ice_maEVP.F90 index 62fa9c0a0..364921184 100644 --- a/src/ice_maEVP.F90 +++ b/src/ice_maEVP.F90 @@ -245,7 +245,11 @@ subroutine ssh2rhs(ice, partit, mesh) !_____________________________________________________________________________ ! use floating sea ice for zlevel and zstar if (use_floatice .and. .not. trim(which_ale)=='linfs') then +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do elem=1,myDim_elem2d elnodes=elem2D_nodes(:,elem) !_______________________________________________________________________ @@ -285,7 +289,11 @@ subroutine ssh2rhs(ice, partit, mesh) end do !$OMP END DO else +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do elem=1,myDim_elem2d elnodes=elem2D_nodes(:,elem) !_______________________________________________________________________ @@ -371,7 +379,11 @@ subroutine stress2rhs_m(ice, partit, mesh) !$OMP END PARALLEL DO !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(elem, elnodes, k, row, dx, dy, vol, mf, aa, bb, mass, cluster_area, elevation_elem) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do elem=1,myDim_elem2d elnodes=elem2D_nodes(:,elem) !_______________________________________________________________________ @@ -545,7 +557,11 @@ subroutine EVPdynamics_m(ice, partit, mesh) ! use floating sea ice for zlevel and zstar !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(el, elnodes, vol, dx, dy, p_ice, n, bb, aa) if (use_floatice .and. .not. trim(which_ale)=='linfs') then +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do el=1,myDim_elem2d elnodes=elem2D_nodes(:,el) @@ -588,7 +604,11 @@ subroutine EVPdynamics_m(ice, partit, mesh) !_____________________________________________________________________________ ! use levitating sea ice for linfs, zlevel and zstar else +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do el=1,myDim_elem2d elnodes=elem2D_nodes(:,el) !_______________________________________________________________________ @@ -685,7 +705,11 @@ subroutine EVPdynamics_m(ice, partit, mesh) ! SD, 30.07.2014 !_______________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(el, i, ed, row, elnodes, dx, dy, meancos, eps1, eps2, delta, pressure, umod, drag, rhsu, rhsv, det, n) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do el=1,myDim_elem2D if (ulevels(el)>1) cycle diff --git a/src/oce_adv_tra_fct.F90 b/src/oce_adv_tra_fct.F90 index f9a382787..9722b22ac 100644 --- a/src/oce_adv_tra_fct.F90 +++ b/src/oce_adv_tra_fct.F90 @@ -267,7 +267,7 @@ subroutine oce_tra_adv_fct(dt, ttf, lo, adf_h, adf_v, fct_ttf_min, fct_ttf_max, #endif !Vertical #ifndef ENABLE_OPENACC -!$OMP DO +!$OMP DO #else !$ACC PARALLEL LOOP GANG DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) #endif @@ -288,7 +288,11 @@ subroutine oce_tra_adv_fct(dt, ttf, lo, adf_h, adf_v, fct_ttf_min, fct_ttf_max, #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !Horizontal #if !defined(DISABLE_OPENACC_ATOMICS) diff --git a/src/oce_dyn.F90 b/src/oce_dyn.F90 index b632a68a9..f9466a2d0 100755 --- a/src/oce_dyn.F90 +++ b/src/oce_dyn.F90 @@ -323,7 +323,11 @@ SUBROUTINE visc_filt_bcksct(dynamics, partit, mesh) !___________________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(u1, v1, len, vi, nz, ed, el, nelem, k, elem, nzmin, nzmax, update_u, update_v) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) @@ -473,7 +477,11 @@ SUBROUTINE visc_filt_bilapl(dynamics, partit, mesh) !___________________________________________________________________________ ! Sum up velocity differences over edge with respect to elemtnal index !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(u1, v1, len, vi, ed, el, nz, nzmin, nzmax) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el = edge_tri(:,ed) @@ -533,7 +541,11 @@ SUBROUTINE visc_filt_bilapl(dynamics, partit, mesh) !$OMP BARRIER !___________________________________________________________________________ +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) @@ -627,7 +639,11 @@ SUBROUTINE visc_filt_bidiff(dynamics, partit, mesh) !___________________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(u1, v1, len, vi, ed, el, nz, nzmin, nzmax, update_u, update_v) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) @@ -675,7 +691,11 @@ SUBROUTINE visc_filt_bidiff(dynamics, partit, mesh) !$OMP BARRIER !___________________________________________________________________________ +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) diff --git a/src/oce_muscl_adv.F90 b/src/oce_muscl_adv.F90 index 2a1270f7f..7d050c486 100755 --- a/src/oce_muscl_adv.F90 +++ b/src/oce_muscl_adv.F90 @@ -92,7 +92,11 @@ subroutine muscl_adv_init(twork, partit, mesh) allocate(twork%nboundary_lay(myDim_nod2D+eDim_nod2D)) !node n becomes a boundary node after layer twork%nboundary_lay(n) twork%nboundary_lay=nl-1 !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, k, n1, n2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do n=1, myDim_edge2D ! n1 and n2 are local indices n1=edges(1,n) From 74cc3f6fe55f152632b356a13742353fe98aeb8c Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Wed, 25 Jun 2025 21:17:02 +0200 Subject: [PATCH 2/8] more order --- src/gen_modules_diag.F90 | 10 ++++++- src/oce_ale_ssh_splitexpl_subcycl.F90 | 40 +++++++++++++++++++++------ 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/gen_modules_diag.F90 b/src/gen_modules_diag.F90 index 41b937a43..a5a70c44d 100644 --- a/src/gen_modules_diag.F90 +++ b/src/gen_modules_diag.F90 @@ -3251,7 +3251,11 @@ subroutine dvd_add_difflux_bhvisc(do_SDdvd, tr_num, dvd_tot, tr, trstar, gamma0_ ! first round !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, nz, ednodes, edelem, elnodes_l, elnodes_r, & !$OMP nu1, nl1, du, dv, dt, len, vi) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do edge=1, myDim_edge2D!+eDim_edge2D ! skip boundary edges only consider inner edges if (myList_edge2D(edge) > edge2D_in) cycle @@ -3298,7 +3302,11 @@ subroutine dvd_add_difflux_bhvisc(do_SDdvd, tr_num, dvd_tot, tr, trstar, gamma0_ !___________________________________________________________________________ ! second round: +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do edge=1, myDim_edge2D!+eDim_edge2D ! skip boundary edges only consider inner edges if (myList_edge2D(edge) > edge2D_in) cycle diff --git a/src/oce_ale_ssh_splitexpl_subcycl.F90 b/src/oce_ale_ssh_splitexpl_subcycl.F90 index f2d795865..4eb7827e0 100644 --- a/src/oce_ale_ssh_splitexpl_subcycl.F90 +++ b/src/oce_ale_ssh_splitexpl_subcycl.F90 @@ -110,7 +110,11 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(node, elem, ed, nz, nl1, ul1, nl2, ul2, nl12, ul12, & !$OMP uv1, uv2, uv12, qc, qu, qd, wu, wv, & !$OMP ednodes, edelem, un1, un2) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do node=1, myDim_nod2D ul1 = ulevels_nod2D(node) nl1 = nlevels_nod2D(node) @@ -179,7 +183,11 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) !___________________________________________________________________________ ! 2nd. compute horizontal advection component: u*du/dx, u*dv/dx & v*du/dy, v*dv/dy ! loop over triangle edges -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed ednodes = edges(:,ed) @@ -303,7 +311,7 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) !_______________________________________________________________________ ! if edelem(2)==0 than edge is boundary edge - else ! --> if(edelem(2)>0) then + else !___________________________________________________________________ !NR add contribution to first edge node --> ednodes(1) !NR Do not calculate on Halo nodes, as the result will not be used. @@ -338,13 +346,13 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) call omp_unset_lock(partit%plock(ednodes(2))) #endif end if - end if ! --> if(edelem(2)>0) then + end if #if defined(__openmp_reproducible) !$OMP END ORDERED #endif - end do ! --> do ed=1, myDim_edge2D + end do !$OMP END DO !___________________________________________________________________________ @@ -847,7 +855,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) ! remove viscosity !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, edelem, ednodes, hh, len, & !$OMP vi, update_ubt, update_vbt) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do edge=1, myDim_edge2D+eDim_edge2D ! if ed is an outer boundary edge, skip it @@ -902,7 +914,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) ! remove bottom drag if (dynamics%se_bottdrag) then !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(elem, elnodes, nzmax, hh) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do elem=1, myDim_elem2D elnodes= elem2D_nodes(:,elem) nzmax = nlevels(elem) @@ -952,7 +968,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) !___________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, edelem, ednodes, hh, len, & !$OMP vi, update_ubt, update_vbt) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do edge=1, myDim_edge2D+eDim_edge2D ! if ed is an outer boundary edge, skip it @@ -1138,7 +1158,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) ! and advance ssh --> eta^(n+(m+1)/M) = eta^(n+(m)/M) - dt/M * div_H * [...] !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, ednodes, edelem, c1, c2, & !$OMP deltaX1, deltaX2, deltaY1, deltaY2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do edge=1, myDim_edge2D ednodes = edges(:,edge) edelem = edge_tri(:,edge) From 88cd8de857fc50532fe3884ba708c483581f11a9 Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Wed, 25 Jun 2025 22:02:45 +0200 Subject: [PATCH 3/8] move +src/oce_ale_ssh_splitexpl_subcycl.F90OMP ORDERED to outside if block --- src/oce_ale_ssh_splitexpl_subcycl.F90 | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/oce_ale_ssh_splitexpl_subcycl.F90 b/src/oce_ale_ssh_splitexpl_subcycl.F90 index 4eb7827e0..aae2f737e 100644 --- a/src/oce_ale_ssh_splitexpl_subcycl.F90 +++ b/src/oce_ale_ssh_splitexpl_subcycl.F90 @@ -207,6 +207,13 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) un1(ul1:nl1) = ( UVh(2, ul1:nl1, edelem(1))*edge_cross_dxdy(1,ed) & - UVh(1, ul1:nl1, edelem(1))*edge_cross_dxdy(2,ed)) + !___________________________________________________________________ + ! ensure openmp numerical reproducability + ! block had to be moved outside of if statement, otherwise the else + ! part was missing the !$OMP ORDERED +#if defined(__openmp_reproducible) +!$OMP ORDERED +#endif !_______________________________________________________________________ ! if edelem(2)==0 than edge is boundary edge if(edelem(2)>0) then @@ -229,11 +236,6 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) ul12 = max(ul1, ul2) nl12 = min(nl1, nl2) - !___________________________________________________________________ - ! ensure openmp numerical reproducability -#if defined(__openmp_reproducible) -!$OMP ORDERED -#endif !___________________________________________________________________ !NR add contribution to first edge node --> ednodes(1) !NR Do not calculate on Halo nodes, as the result will not be used. From 5b3d73e5884c9992db6e22bc12134c89630976fd Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Wed, 25 Jun 2025 22:15:33 +0200 Subject: [PATCH 4/8] compiles with __openmp_reproducible and gcc 13 --- src/oce_adv_tra_driver.F90 | 8 ++++++++ src/oce_ale.F90 | 21 ++++++++++++++++++++- src/oce_ale_tracer.F90 | 12 ++++++++++++ src/oce_ale_vel_rhs.F90 | 11 +++++++---- 4 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/oce_adv_tra_driver.F90 b/src/oce_adv_tra_driver.F90 index 8ef6e9a4c..49ad31d1b 100644 --- a/src/oce_adv_tra_driver.F90 +++ b/src/oce_adv_tra_driver.F90 @@ -131,7 +131,11 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(e, enodes, el, nl1, nu1, nl2, nu2, nu12, nl12, nz) ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(e, enodes, el, nl1, nu1, nl2, nu2, nu12, nl12, nz) +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG PRIVATE(enodes, el) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) @@ -489,7 +493,11 @@ subroutine oce_tra_adv_flux2dtracer(dt, dttf_h, dttf_v, flux_h, flux_v, partit, #endif ! Horizontal #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG PRIVATE(enodes, el) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) diff --git a/src/oce_ale.F90 b/src/oce_ale.F90 index 48f76ce4e..6ef200dc4 100644 --- a/src/oce_ale.F90 +++ b/src/oce_ale.F90 @@ -1709,8 +1709,11 @@ subroutine update_stiff_mat_ale(partit, mesh) ! to finite volumes" --> stiff matrix part ! loop over lcal edges factor=g*dt*alpha*theta - +#if defined(__openmp_reproducible) +!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, i, j, k, row, ed, n2, enodes, elnodes, el, elem, npos, offset, nini, nend, fx, fy) ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, i, j, k, row, ed, n2, enodes, elnodes, el, elem, npos, offset, nini, nend, fx, fy) +#endif do ed=1,myDim_edge2D !! Attention ! enodes ... local node indices of nodes that edge ed enodes=edges(:,ed) @@ -1840,7 +1843,11 @@ subroutine compute_ssh_rhs_ale(dynamics, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, el, enodes, n, nz, nzmin, nzmax, c1, c2, deltaX1, deltaX2, deltaY1, deltaY2, & !$OMP dumc1_1, dumc1_2, dumc2_1, dumc2_2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed enodes=edges(:,ed) @@ -1992,7 +1999,11 @@ subroutine compute_hbar_ale(dynamics, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, el, enodes, elem, elnodes, n, nz, nzmin, nzmax, & !$OMP c1, c2, deltaX1, deltaX2, deltaY1, deltaY2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed enodes=edges(:,ed) @@ -2171,7 +2182,11 @@ subroutine vert_vel_ale(dynamics, partit, mesh) !$OMP END PARALLEL DO !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, enodes, el, deltaX1, deltaY1, nz, nzmin, nzmax, deltaX2, deltaY2, c1, c2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed enodes=edges(:,ed) @@ -2725,7 +2740,11 @@ subroutine compute_vert_vel_transpv(dynamics, partit, mesh) !___________________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, ednodes, edelem, nz, nzmin, nzmax, & !$OMP deltaX1, deltaY1, deltaX2, deltaY2, c1, c2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed ednodes=edges(:,ed) diff --git a/src/oce_ale_tracer.F90 b/src/oce_ale_tracer.F90 index 2f4a77ec4..63ac07f26 100644 --- a/src/oce_ale_tracer.F90 +++ b/src/oce_ale_tracer.F90 @@ -1193,7 +1193,11 @@ subroutine diff_part_hor_redi(tracers, partit, mesh) !$OMP nl1, ul1, nl2, ul2, nl12, ul12, nz, el, elnodes, enodes, & !$OMP c, Fx, Fy, Tx, Ty, Tx_z, Ty_z, SxTz, SyTz, Tz, & !$OMP rhs1, rhs2, Kh, dz) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do edge=1, myDim_edge2D rhs1=0.0_WP rhs2=0.0_WP @@ -1366,7 +1370,11 @@ SUBROUTINE diff_part_bh(tr_num, dynamics, tracers, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, nz, ed, el, en, k, elem, nzmin, nzmax, u1, v1, len, vi, tt, ww, & !$OMP elnodes1, elnodes2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D!+eDim_edge2D if (myList_edge2D(ed) > edge2D_in) cycle el=edge_tri(:,ed) @@ -1412,7 +1420,11 @@ SUBROUTINE diff_part_bh(tr_num, dynamics, tracers, partit, mesh) ! =========== ! Second round: ! =========== +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D!+eDim_edge2D if (myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) diff --git a/src/oce_ale_vel_rhs.F90 b/src/oce_ale_vel_rhs.F90 index 6d5d76a35..a4f61e410 100644 --- a/src/oce_ale_vel_rhs.F90 +++ b/src/oce_ale_vel_rhs.F90 @@ -423,7 +423,11 @@ subroutine momentum_adv_scalar(dynamics, partit, mesh) !___________________________________________________________________________ ! 2nd. compute horizontal advection component: u*du/dx, u*dv/dx & v*du/dy, v*dv/dy ! loop over triangle edges +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D nod = edges(:,ed) el1 = edge_tri(1,ed) @@ -454,6 +458,9 @@ subroutine momentum_adv_scalar(dynamics, partit, mesh) ! compute horizontal normal velocity with respect to the edge from triangle ! centroid towards triangel edge mid-pointe for element el2 when it is valid ! --> if its a boundary triangle el2 will be not valid +#if defined(__openmp_reproducible) +!$OMP ORDERED +#endif if (el2>0) then ! --> el2 is valid element nl2 = nlevels(el2)-1 ul2 = ulevels(el2) @@ -469,10 +476,6 @@ subroutine momentum_adv_scalar(dynamics, partit, mesh) un1(1:ul1-1) = 0._WP un2(1:ul2-1) = 0._WP -#if defined(__openmp_reproducible) -!$OMP ORDERED -#endif - ! first edge node ! Do not calculate on Halo nodes, as the result will not be used. ! The "if" is cheaper than the avoided computiations. From 3d9edc8dbfa7850dabcc690b19e59a354159361a Mon Sep 17 00:00:00 2001 From: Vera Sidorenko Date: Sun, 2 Nov 2025 22:42:10 +0100 Subject: [PATCH 5/8] runs with Intel --- src/gen_support.F90 | 45 ++++++++++++++++++++------- src/ice_fct.F90 | 2 +- src/oce_mesh.F90 | 76 ++++++++++++++++++++++----------------------- src/solver.F90 | 2 +- 4 files changed, 74 insertions(+), 51 deletions(-) diff --git a/src/gen_support.F90 b/src/gen_support.F90 index 1bab6c7bf..87e4663a4 100644 --- a/src/gen_support.F90 +++ b/src/gen_support.F90 @@ -337,13 +337,16 @@ subroutine integrate_nod_2D(data, int2D, partit, mesh) #if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row) !$OMP DO REDUCTION (+: lval) -#endif do row=1, myDim_nod2D lval=lval+data(row)*areasvol(ulevels_nod2D(row),row) end do -#if !defined(__openmp_reproducible) !$OMP END DO !$OMP END PARALLEL +#else + ! Use serial computation for reproducible results + do row=1, myDim_nod2D + lval=lval+data(row)*areasvol(ulevels_nod2D(row),row) + end do #endif int2D=0.0_WP call MPI_AllREDUCE(lval, int2D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & @@ -375,7 +378,8 @@ subroutine integrate_nod_3D(data, int3D, partit, mesh) lval=0.0_WP #if defined(__openmp_reproducible) -!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) ORDERED +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row, k, lval_row) +!$OMP DO ORDERED #else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) #endif @@ -386,13 +390,19 @@ subroutine integrate_nod_3D(data, int3D, partit, mesh) end do #if defined(__openmp_reproducible) !$OMP ORDERED -#endif +!$OMP ATOMIC UPDATE lval = lval + lval_row -#if defined(__openmp_reproducible) !$OMP END ORDERED +#else + lval = lval + lval_row #endif end do +#if defined(__openmp_reproducible) +!$OMP END DO +!$OMP END PARALLEL +#else !$OMP END PARALLEL DO +#endif int3D=0.0_WP call MPI_AllREDUCE(lval, int3D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & @@ -443,7 +453,7 @@ subroutine extrap_nod3D(arr, partit, mesh) !_______________________________________________________________ ! loop over local vertices n - do n=1, myDim_nod2D+eDim_nod2D + do n=1, myDim_nod2D!+eDim_nod2D ! found node n that has to be extrapolated if ( (work_array(n)>0.99_WP*dummy) .and. (nlevels_nod2D(n)>nz)) then cnt=0 @@ -579,7 +589,7 @@ FUNCTION omp_min_max_sum2(arr, pos11, pos12, pos21, pos22, what, partit, nan) character(3), intent(in) :: what real(kind=WP), optional :: nan !to be implemented upon the need (for masked arrays) real(kind=WP) :: omp_min_max_sum2 - real(kind=WP) :: val, vmasked, val_part(pos11:pos12) + real(kind=WP) :: val, vmasked, val_part(pos21:pos22) integer :: i, j @@ -668,7 +678,8 @@ subroutine integrate_elem_3D(data, int3D, partit, mesh) lval=0.0_WP #if defined(__openmp_reproducible) -!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) ORDERED +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row, k, lval_row) +!$OMP DO ORDERED #else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) #endif @@ -680,13 +691,19 @@ subroutine integrate_elem_3D(data, int3D, partit, mesh) end do #if defined(__openmp_reproducible) !$OMP ORDERED -#endif +!$OMP ATOMIC UPDATE lval = lval + lval_row -#if defined(__openmp_reproducible) !$OMP END ORDERED +#else + lval = lval + lval_row #endif end do +#if defined(__openmp_reproducible) +!$OMP END DO +!$OMP END PARALLEL +#else !$OMP END PARALLEL DO +#endif int3D=0.0_WP call MPI_AllREDUCE(lval, int3D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & @@ -716,7 +733,8 @@ subroutine integrate_elem_2D(data, int2D, partit, mesh) lval=0.0_WP #if defined(__openmp_reproducible) -!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row) REDUCTION(+: lval) ORDERED +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row) +!$OMP DO ORDERED #else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row) REDUCTION(+: lval) #endif @@ -730,7 +748,12 @@ subroutine integrate_elem_2D(data, int2D, partit, mesh) !$OMP END ORDERED #endif end do +#if defined(__openmp_reproducible) +!$OMP END DO +!$OMP END PARALLEL +#else !$OMP END PARALLEL DO +#endif int2D=0.0_WP call MPI_AllREDUCE(lval, int2D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & diff --git a/src/ice_fct.F90 b/src/ice_fct.F90 index d785d08e2..a4529115b 100755 --- a/src/ice_fct.F90 +++ b/src/ice_fct.F90 @@ -1180,7 +1180,7 @@ SUBROUTINE ice_mass_matrix_fill(ice, partit, mesh) mass_matrix => ice%work%fct_massmatrix(:) ! ! a) -!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, k, row, elem, elnodes, q, offset, ipos, aa) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, k, row, elem, elnodes, q, offset, ipos, aa, flag, iflag) #if defined(__openmp_reproducible) !$OMP DO ORDERED #else diff --git a/src/oce_mesh.F90 b/src/oce_mesh.F90 index 6e6a83b90..9096ea819 100755 --- a/src/oce_mesh.F90 +++ b/src/oce_mesh.F90 @@ -6,9 +6,9 @@ subroutine read_mesh(partit, mesh) USE MOD_PARSUP type(t_mesh), intent(inout), target :: mesh type(t_partit), intent(inout), target :: partit - end subroutine + end subroutine read_mesh end interface -end module +end module read_mesh_interface module find_levels_interface interface subroutine find_levels(partit, mesh) @@ -17,9 +17,9 @@ subroutine find_levels(partit, mesh) USE MOD_PARSUP type(t_mesh), intent(inout), target :: mesh type(t_partit), intent(inout), target :: partit - end subroutine + end subroutine find_levels end interface -end module +end module find_levels_interface module find_levels_cavity_interface interface subroutine find_levels_cavity(partit, mesh) @@ -28,9 +28,9 @@ subroutine find_levels_cavity(partit, mesh) USE MOD_PARSUP type(t_mesh), intent(inout), target :: mesh type(t_partit), intent(inout), target :: partit - end subroutine + end subroutine find_levels_cavity end interface -end module +end module find_levels_cavity_interface module test_tri_interface interface subroutine test_tri(partit, mesh) @@ -39,9 +39,9 @@ subroutine test_tri(partit, mesh) USE MOD_PARSUP type(t_mesh), intent(inout), target :: mesh type(t_partit), intent(inout), target :: partit - end subroutine + end subroutine test_tri end interface -end module +end module test_tri_interface module load_edges_interface interface subroutine load_edges(partit, mesh) @@ -50,9 +50,9 @@ subroutine load_edges(partit, mesh) USE MOD_PARSUP type(t_mesh), intent(inout), target :: mesh type(t_partit), intent(inout), target :: partit - end subroutine + end subroutine load_edges end interface -end module +end module load_edges_interface module find_neighbors_interface interface subroutine find_neighbors(partit, mesh) @@ -61,9 +61,9 @@ subroutine find_neighbors(partit, mesh) USE MOD_PARSUP type(t_mesh), intent(inout), target :: mesh type(t_partit), intent(inout), target :: partit - end subroutine + end subroutine find_neighbors end interface -end module +end module find_neighbors_interface module mesh_areas_interface interface subroutine mesh_areas(partit, mesh) @@ -72,9 +72,9 @@ subroutine mesh_areas(partit, mesh) USE MOD_PARSUP type(t_mesh), intent(inout), target :: mesh type(t_partit), intent(inout), target :: partit - end subroutine + end subroutine mesh_areas end interface -end module +end module mesh_areas_interface module elem_center_interface interface subroutine elem_center(elem, x, y, mesh) @@ -84,9 +84,9 @@ subroutine elem_center(elem, x, y, mesh) integer :: elem real(kind=WP), intent(inout) :: x, y type(t_mesh), intent(inout), target :: mesh - end subroutine + end subroutine elem_center end interface -end module +end module elem_center_interface module edge_center_interface interface subroutine edge_center(n1, n2, x, y, mesh) @@ -96,9 +96,9 @@ subroutine edge_center(n1, n2, x, y, mesh) integer :: n1, n2 real(kind=WP), intent(inout):: x, y type(t_mesh), intent(inout), target :: mesh - end subroutine + end subroutine edge_center end interface -end module +end module edge_center_interface module mesh_auxiliary_arrays_interface interface subroutine mesh_auxiliary_arrays(partit, mesh) @@ -107,9 +107,9 @@ subroutine mesh_auxiliary_arrays(partit, mesh) USE MOD_PARSUP type(t_mesh), intent(inout), target :: mesh type(t_partit), intent(inout), target :: partit - end subroutine + end subroutine mesh_auxiliary_arrays end interface -end module +end module mesh_auxiliary_arrays_interface module find_levels_min_e2n_interface interface subroutine find_levels_min_e2n(partit, mesh) @@ -118,9 +118,9 @@ subroutine find_levels_min_e2n(partit, mesh) USE MOD_PARSUP type(t_mesh), intent(inout), target :: mesh type(t_partit), intent(inout), target :: partit - end subroutine + end subroutine find_levels_min_e2n end interface -end module +end module find_levels_min_e2n_interface module check_total_volume_interface interface subroutine check_total_volume(partit, mesh) @@ -129,9 +129,9 @@ subroutine check_total_volume(partit, mesh) USE MOD_PARSUP type(t_mesh), intent(inout), target :: mesh type(t_partit), intent(inout), target :: partit - end subroutine + end subroutine check_total_volume end interface -end module +end module check_total_volume_interface ! Driving routine. The distributed mesh information and mesh proper ! are read from files. @@ -489,12 +489,12 @@ SUBROUTINE read_mesh(partit, mesh) write(*,*) 'reading '// trim(file_name) end if call MPI_BCast(mesh%elem2d, 1, MPI_INTEGER, 0, MPI_COMM_FESOM, ierror) - allocate(mesh%elem2D_nodes(3, myDim_elem2D+eDim_elem2D+eXDim_elem2D)) + allocate(mesh%elem2D_nodes(3, myDim_elem2D)) !+eDim_elem2D+eXDim_elem2D ! 0 proc reads the data in chunks and distributes it between other procs do nchunk=0, (mesh%elem2D-1)/chunk_size mapping(1:chunk_size)=0 - do n=1, myDim_elem2D+eDim_elem2D+eXDim_elem2D + do n=1, myDim_elem2D!+eDim_elem2D+eXDim_elem2D ipos=(myList_elem2D(n)-1)/chunk_size if (ipos==nchunk) then iofs=myList_elem2D(n)-nchunk*chunk_size @@ -532,7 +532,7 @@ SUBROUTINE read_mesh(partit, mesh) mapping(iofs)=n end if end do - do n=1, myDim_elem2D+eDim_elem2D+eXDim_elem2D + do n=1, myDim_elem2D!+eDim_elem2D+eXDim_elem2D do m=1,3 nn=mesh%elem2D_nodes(m, n) ipos=(nn-1)/chunk_size @@ -2510,7 +2510,7 @@ SUBROUTINE mesh_auxiliary_arrays(partit, mesh) center_x(n)=ax center_y(n)=ay mesh%elem_cos(n)=cos(ay) - mesh%metric_factor=tan(ay)/r_earth + mesh%metric_factor(n)=tan(ay)/r_earth END DO call exchange_elem(mesh%metric_factor, partit) @@ -2731,16 +2731,16 @@ SUBROUTINE mesh_auxiliary_arrays(partit, mesh) END DO deallocate(center_y, center_x) -! !array of 2D boundary conditions is used in ice_maEVP -! if (whichEVP > 0) then -! allocate(mesh%bc_index_nod2D(myDim_nod2D+eDim_nod2D)) -! mesh%bc_index_nod2D=1._WP -! do n=1, myDim_edge2D -! ed=mesh%edges(:, n) -! if (myList_edge2D(n) <= mesh%edge2D_in) cycle -! mesh%bc_index_nod2D(ed)=0._WP -! end do -! end if + !!array of 2D boundary conditions is used in ice_maEVP + !if (ice%whichEVP > 0) then + ! allocate(mesh%bc_index_nod2D(myDim_nod2D+eDim_nod2D)) + ! mesh%bc_index_nod2D=1._WP + ! do n=1, myDim_edge2D + ! ed=mesh%edges(:, n) + ! if (myList_edge2D(n) <= mesh%edge2D_in) cycle + ! mesh%bc_index_nod2D(ed)=0._WP + ! end do + !end if #if defined (__oasis) nn=0 diff --git a/src/solver.F90 b/src/solver.F90 index 7b1c7d503..ce8fe09cf 100644 --- a/src/solver.F90 +++ b/src/solver.F90 @@ -252,7 +252,7 @@ subroutine ssh_solve_cg(x, rhs, solverinfo, partit, mesh) !$OMP END PARALLEL DO #else sprod(1) = sum(rr(1:myDim_nod2D) * zz(1:myDim_nod2D)) - sprod(1) = sum(rr(1:myDim_nod2D) * rr(1:myDim_nod2D)) + sprod(2) = sum(rr(1:myDim_nod2D) * rr(1:myDim_nod2D)) #endif call MPI_Allreduce(MPI_IN_PLACE, sprod, 2, MPI_DOUBLE, MPI_SUM, partit%MPI_COMM_FESOM, MPIerr) From b81ac048fad672ce59e7d1d5776dfaf4c734a225 Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Thu, 27 Nov 2025 10:42:49 +0900 Subject: [PATCH 6/8] Fix OpenMP warnings: correct privatization and reduction clauses - icb_step.F90: Change area_ib_tot from PRIVATE to REDUCTION(+:) for proper parallel accumulation (fixes ftn-7208 warnings at lines 574 and 798) - oce_ale_pressure_bv.F90: Change f_min from PRIVATE to FIRSTPRIVATE to preserve initialization value (fixes ftn-7208 warning at line 3037) - write_step_info.F90: Remove loc_deta from REDUCTION clause as its result is unused (fixes ftn-7276 warning at line 117) --- src/icb_step.F90 | 4 ++-- src/oce_ale_pressure_bv.F90 | 2 +- src/write_step_info.F90 | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/icb_step.F90 b/src/icb_step.F90 index bac9cff66..62521a85d 100644 --- a/src/icb_step.F90 +++ b/src/icb_step.F90 @@ -567,7 +567,7 @@ subroutine iceberg_step1(ice, mesh, partit, dynamics, ib, height_ib_single,lengt case(2) area_ib_tot = length_ib_single*width_ib_single*scaling(ib) end select -!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(idx, area_ib_tot) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(idx) REDUCTION(+:area_ib_tot) !$OMP DO do idx = 1, size(elem_block) if (elem_block(idx) == iceberg_elem) then @@ -790,7 +790,7 @@ subroutine iceberg_step2(mesh, partit,arr, elem_from_block, ib, height_ib_single case(2) area_ib_tot = length_ib_single*width_ib_single*scaling(ib) end select -!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(idx, area_ib_tot) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(idx) REDUCTION(+:area_ib_tot) !$OMP DO do idx = 1, size(elem_block_red) if (elem_block_red(idx) == iceberg_elem) then diff --git a/src/oce_ale_pressure_bv.F90 b/src/oce_ale_pressure_bv.F90 index 2dbb2aa72..281eee270 100644 --- a/src/oce_ale_pressure_bv.F90 +++ b/src/oce_ale_pressure_bv.F90 @@ -2973,7 +2973,7 @@ subroutine compute_neutral_slope(partit, mesh) eps=5.0e-6_WP !PS S_cr=1.0e-2_WP !PS S_d=1.0e-3_WP -!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, deltaX1, deltaY1, deltaX2, deltaY2, n, nz, nl1, ul1, el, elnodes, enodes, c, ro_z_inv, f_min, dep_scale, rssby, c1, c2) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, deltaX1, deltaY1, deltaX2, deltaY2, n, nz, nl1, ul1, el, elnodes, enodes, c, ro_z_inv, dep_scale, rssby, c1, c2) FIRSTPRIVATE(f_min) !$OMP DO do n=1, myDim_nod2D slope_tapered(: , :, n)=0._WP diff --git a/src/write_step_info.F90 b/src/write_step_info.F90 index c43f7c631..9ad384bf5 100644 --- a/src/write_step_info.F90 +++ b/src/write_step_info.F90 @@ -114,7 +114,7 @@ subroutine write_step_info(istep, outfreq, ice, dynamics, tracers, partit, mesh) !_______________________________________________________________________ #if !defined(__openmp_reproducible) -!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n) REDUCTION(+:loc_eta, loc_hbar, loc_deta, loc_dhbar, loc_wflux) +!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n) REDUCTION(+:loc_eta, loc_hbar, loc_dhbar, loc_wflux) #endif do n=1, myDim_nod2D loc_eta = loc_eta + areasvol(ulevels_nod2D(n), n)*eta_n(n) From 0d8cd77b25534c5d195f49f718a24a5bd856951f Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Thu, 27 Nov 2025 10:49:22 +0900 Subject: [PATCH 7/8] Fix preprocessor directive nesting in ice_EVP.F90 Fix 'unterminated #else' error by correctly placing __openmp_reproducible check inside the ENABLE_OPENACC=false branch. The nested #ifndef was incorrectly placed inside the #else block, causing a preprocessor error. --- src/ice_EVP.F90 | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/ice_EVP.F90 b/src/ice_EVP.F90 index 7750b0ab2..beff8f684 100755 --- a/src/ice_EVP.F90 +++ b/src/ice_EVP.F90 @@ -227,7 +227,11 @@ subroutine stress2rhs(ice, partit, mesh) #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) #if !defined(DISABLE_OPENACC_ATOMICS) @@ -235,12 +239,6 @@ subroutine stress2rhs(ice, partit, mesh) #else !$ACC UPDATE SELF(u_rhs_ice, v_rhs_ice, sigma11, sigma12, sigma22) #endif -#ifndef ENABLE_OPENACC -#if defined(__openmp_reproducible) -!$OMP DO ORDERED -#else -!$OMP DO -#endif #endif do el=1,myDim_elem2D ! ===== Skip if ice is absent From ea47b289844a3c502346aa88b6937a1c03702a15 Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Sun, 25 Jan 2026 20:53:03 +0100 Subject: [PATCH 8/8] save wip --- src/gen_support.F90 | 16 ++++++++++++++++ src/oce_ale_pressure_bv.F90 | 22 ++++++++++++++++++++++ src/oce_muscl_adv.F90 | 5 +++++ 3 files changed, 43 insertions(+) diff --git a/src/gen_support.F90 b/src/gen_support.F90 index 8047641cf..93345156c 100644 --- a/src/gen_support.F90 +++ b/src/gen_support.F90 @@ -553,6 +553,7 @@ FUNCTION omp_min_max_sum1(arr, pos1, pos2, what, partit, nan) CASE ('min') val=arr(1) +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n) !$OMP DO REDUCTION(min: val) do n=pos1, pos2 @@ -560,9 +561,13 @@ FUNCTION omp_min_max_sum1(arr, pos1, pos2, what, partit, nan) end do !$OMP END DO !$OMP END PARALLEL +#else + val = minval(arr(pos1:pos2)) +#endif CASE ('max') val=arr(1) +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n) !$OMP DO REDUCTION(max: val) do n=pos1, pos2 @@ -570,6 +575,9 @@ FUNCTION omp_min_max_sum1(arr, pos1, pos2, what, partit, nan) end do !$OMP END DO !$OMP END PARALLEL +#else + val = maxval(arr(pos1:pos2)) +#endif CASE DEFAULT if (partit%mype==0) write(*,*) trim(what), ' is not implemented in omp_min_max_sum case!' @@ -602,6 +610,7 @@ FUNCTION omp_min_max_sum2(arr, pos11, pos12, pos21, pos22, what, partit, nan) CASE ('min') if (.not. present(nan)) vmasked=huge(vmasked) !just some crazy number val=arr(1,1) +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i, j) !$OMP DO REDUCTION(min: val) do j=pos21, pos22 @@ -611,10 +620,14 @@ FUNCTION omp_min_max_sum2(arr, pos11, pos12, pos21, pos22, what, partit, nan) end do !$OMP END DO !$OMP END PARALLEL +#else + val = minval(arr(pos11:pos12,pos21:pos22), mask=(arr(pos11:pos12,pos21:pos22)/=vmasked)) +#endif CASE ('max') if (.not. present(nan)) vmasked=tiny(vmasked) !just some crazy number val=arr(1,1) +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i, j) !$OMP DO REDUCTION(max: val) do j=pos21, pos22 @@ -624,6 +637,9 @@ FUNCTION omp_min_max_sum2(arr, pos11, pos12, pos21, pos22, what, partit, nan) end do !$OMP END DO !$OMP END PARALLEL +#else + val = maxval(arr(pos11:pos12,pos21:pos22), mask=(arr(pos11:pos12,pos21:pos22)/=vmasked)) +#endif CASE ('sum') if (.not. present(nan)) vmasked=huge(vmasked) !just some crazy number diff --git a/src/oce_ale_pressure_bv.F90 b/src/oce_ale_pressure_bv.F90 index e66abfe85..0ec89c50d 100644 --- a/src/oce_ale_pressure_bv.F90 +++ b/src/oce_ale_pressure_bv.F90 @@ -232,6 +232,7 @@ subroutine pressure_bv(tracers, partit, mesh) !___________________________________________________________________________ ! Screen salinity a =0.0_WP +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(node, nz, nzmin, nzmax) !$OMP DO REDUCTION(min: a) do node=1, myDim_nod2D+eDim_nod2D @@ -243,6 +244,10 @@ subroutine pressure_bv(tracers, partit, mesh) enddo !$OMP END DO !$OMP END PARALLEL +#else + ! Reproducible sequential minval + a = minval(salt(1:nl-1, 1:myDim_nod2D+eDim_nod2D)) +#endif !___________________________________________________________________________ ! model explodes, no OpenMP parallelization ! @@ -3238,6 +3243,7 @@ subroutine init_ref_density_advanced(tracers, partit, mesh) vol1D=0.0_WP ref_temp1D=0.0_WP ref_salt1D=0.0_WP +#if !defined(__openmp_reproducible) !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(node, nz, nzmin, nzmax) REDUCTION(+:vol1D) do node=1,myDim_nod2d x=geo_coord_nod2D(1,node)/rad @@ -3253,6 +3259,22 @@ subroutine init_ref_density_advanced(tracers, partit, mesh) end do end do !$OMP END PARALLEL DO +#else +! Reproducible sequential summation +do node=1,myDim_nod2d + x=geo_coord_nod2D(1,node)/rad + y=geo_coord_nod2D(1,node)/rad + if ((x>=-6.) .AND. (x<=42) .AND. (y>=30.15) .AND. (y<=42)) CYCLE + if ((x>= 2.) .AND. (x<=42) .AND. (y>=41) .AND. (y<=48)) CYCLE + nzmin = 1 + nzmax = nlevels_nod2d(node)-1 + do nz=nzmin,nzmax + ref_temp1D(nz)=ref_temp1D(nz)+areasvol(nz,node)*temp(nz,node) + ref_salt1D(nz)=ref_salt1D(nz)+areasvol(nz,node)*salt(nz,node) + vol1D(nz)=vol1D(nz)+areasvol(nz,node) + end do +end do +#endif call MPI_Allreduce(MPI_IN_PLACE, ref_temp1D, mesh%nl-1, MPI_DOUBLE, MPI_SUM, partit%MPI_COMM_FESOM, MPIerr) call MPI_Allreduce(MPI_IN_PLACE, ref_salt1D, mesh%nl-1, MPI_DOUBLE, MPI_SUM, partit%MPI_COMM_FESOM, MPIerr) call MPI_Allreduce(MPI_IN_PLACE, vol1D, mesh%nl-1, MPI_DOUBLE, MPI_SUM, partit%MPI_COMM_FESOM, MPIerr) diff --git a/src/oce_muscl_adv.F90 b/src/oce_muscl_adv.F90 index 997a51560..67075689a 100755 --- a/src/oce_muscl_adv.F90 +++ b/src/oce_muscl_adv.F90 @@ -58,6 +58,7 @@ subroutine muscl_adv_init(twork, partit, mesh) !___________________________________________________________________________ nn_size=0 k=0 +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n) !$OMP DO REDUCTION(max: k) do n=1, myDim_nod2D @@ -73,6 +74,10 @@ subroutine muscl_adv_init(twork, partit, mesh) end do !$OMP END DO !$OMP END PARALLEL +#else + ! Reproducible sequential maxval + k = maxval(SSH_stiff%rowptr(2:myDim_nod2D+1) - SSH_stiff%rowptr(1:myDim_nod2D)) +#endif nn_size=k !___________________________________________________________________________ allocate(mesh%nn_num(myDim_nod2D), mesh%nn_pos(nn_size,myDim_nod2D))