From 7d77de70a11f9785d19315897c39a151cd80e517 Mon Sep 17 00:00:00 2001
From: Edward Yang <edward_yang_125@hotmail.com>
Date: Thu, 28 Aug 2025 17:55:26 +1000
Subject: [PATCH 01/45] add gpu2gpu mpi transer with flag for do_group_update

---
 mpp/include/group_update_pack.inc   | 27 +++++++++++++++------------
 mpp/include/group_update_unpack.inc | 28 ++++++++++++++++------------
 mpp/include/mpp_group_update.fh     | 21 +++++++++++++++------
 mpp/include/mpp_transmit.inc        | 10 ++++++----
 mpp/include/mpp_transmit_mpi.fh     | 29 ++++++++++++++++++++++++-----
 5 files changed, 76 insertions(+), 39 deletions(-)

diff --git a/mpp/include/group_update_pack.inc b/mpp/include/group_update_pack.inc
index 142106aece..b1a55b68e3 100644
--- a/mpp/include/group_update_pack.inc
+++ b/mpp/include/group_update_pack.inc
@@ -18,9 +18,6 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
-!$OMP parallel do default(none) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
-!$OMP                          private(buffer_pos,pos,m,is,ie,js,je,rotation, &
-!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k)
   do n = 1, npack
      buffer_pos = group%pack_buffer_pos(n) + buffer_start_pos
      pos  = buffer_pos
@@ -32,14 +29,17 @@ if( group%k_loop_inside ) then
         case(ZERO)
            do l=1, group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
+              !$omp target teams distribute parallel do private(idx) &
+              !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) if(use_device_ptr)
               do k = 1, ksize
                  do j = js, je
                     do i = is, ie
-                       pos = pos + 1
-                       buffer(pos) = field(i,j,k)
+                       idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                       buffer(idx) = field(i,j,k)
                     end do
                  end do
               enddo
+              pos = pos + ksize*(je-js+1)*(ie-is+1)
            enddo
         case( MINUS_NINETY )
            do l=1,group%nscalar  ! loop over number of fields
@@ -83,14 +83,17 @@ if( group%k_loop_inside ) then
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
+              !$omp target teams distribute parallel do private(idx) &
+              !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) if(use_device_ptr)
               do k = 1, ksize
                  do j = js, je
                     do i = is, ie
-                       pos = pos + 1
-                       buffer(pos) = fieldx(i,j,k)
+                       idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                       buffer(idx) = fieldx(i,j,k)
                     end do
                  end do
               end do
+              pos = pos + ksize*(je-js+1)*(ie-is+1)
            end do
         case( MINUS_NINETY )
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
@@ -162,14 +165,17 @@ if( group%k_loop_inside ) then
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
+              !$omp target teams distribute parallel do private(idx) &
+              !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) if(use_device_ptr)
               do k = 1, ksize
                  do j = js, je
                     do i = is, ie
-                       pos = pos + 1
-                       buffer(pos) = fieldy(i,j,k)
+                       idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                       buffer(idx) = fieldy(i,j,k)
                     end do
                  end do
               end do
+              pos = pos + ksize*(je-js+1)*(ie-is+1)
            end do
         case( MINUS_NINETY )
            do l=1,nvector  ! loop over number of fields
@@ -239,9 +245,6 @@ if( group%k_loop_inside ) then
      endif
   enddo
 else
-!$OMP parallel do default(none) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
-!$OMP                          private(buffer_pos,pos,m,is,ie,js,je,rotation, &
-!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k)
   do nk = 1, npack*ksize
      n = (nk-1)/ksize + 1
      k = mod((nk-1), ksize) + 1
diff --git a/mpp/include/group_update_unpack.inc b/mpp/include/group_update_unpack.inc
index 7f60ed93df..c18275e9bb 100644
--- a/mpp/include/group_update_unpack.inc
+++ b/mpp/include/group_update_unpack.inc
@@ -18,9 +18,6 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
-!$OMP parallel do default(none) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
-!$OMP                          private(buffer_pos,pos,m,is, ie, js, je,rotation, &
-!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy, n,k )
   do n = nunpack, 1, -1
      buffer_pos = group%unpack_buffer_pos(n) + buffer_start_pos
      pos = buffer_pos
@@ -29,45 +26,52 @@ if( group%k_loop_inside ) then
      if( group%unpack_type(n) == FIELD_S ) then
         do l=1,nscalar  ! loop over number of fields
            ptr_field = group%addrs_s(l)
+           !$omp target teams distribute parallel do if(use_device_ptr) private(idx) &
+           !$omp   map(to: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) &
+           !$omp   map(from: field(is:ie,js:je,1:ksize))
            do k = 1, ksize
               do j = js, je
                  do i = is, ie
-                    pos = pos + 1
-                    field(i,j,k) = buffer(pos)
+                    idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                    field(i,j,k) = buffer(idx)
                  end do
               end do
            end do
+           pos = pos + ksize*(je-js+1)*(ie-is+1)
         end do
      else if( group%unpack_type(n) == FIELD_X ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldx = group%addrs_x(l)
+           !$omp target teams distribute parallel do private(idx) &
+           !$omp   map(to: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) map(from: fieldx(is:ie,js:je,1:ksize)) if(use_device_ptr)
            do k = 1, ksize
               do j = js, je
                  do i = is, ie
-                    pos = pos + 1
-                    fieldx(i,j,k) = buffer(pos)
+                    idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                    fieldx(i,j,k) = buffer(idx)
                  end do
               end do
            end do
+           pos = pos + ksize*(je-js+1)*(ie-is+1)
         end do
      else if( group%unpack_type(n) == FIELD_Y ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldy = group%addrs_y(l)
+           !$omp target teams distribute parallel do private(idx) &
+           !$omp   map(to: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) map(from: fieldy(is:ie,js:je,1:ksize)) if(use_device_ptr)
            do k = 1, ksize
               do j = js, je
                  do i = is, ie
-                    pos = pos + 1
-                    fieldy(i,j,k) = buffer(pos)
+                    idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                    fieldy(i,j,k) = buffer(idx)
                  end do
               end do
            end do
+           pos = pos + ksize*(je-js+1)*(ie-is+1)
         end do
      endif
   enddo
 else
-!$OMP parallel do default(none) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
-!$OMP                          private(buffer_pos,pos,m,is, ie, js, je,rotation, &
-!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k)
   do nk = nunpack*ksize, 1, -1
      n = (nk-1)/ksize + 1
      k = mod((nk-1), ksize) + 1
diff --git a/mpp/include/mpp_group_update.fh b/mpp/include/mpp_group_update.fh
index 0f04c06c3b..8464fa55f5 100644
--- a/mpp/include/mpp_group_update.fh
+++ b/mpp/include/mpp_group_update.fh
@@ -419,20 +419,22 @@ subroutine MPP_CREATE_GROUP_UPDATE_4D_V_( group, fieldx, fieldy, domain, flags,
 end subroutine MPP_CREATE_GROUP_UPDATE_4D_V_
 
 
-subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type)
+subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type, omp_offload)
   type(mpp_group_update_type), intent(inout) :: group
   type(domain2D),              intent(inout) :: domain
   MPP_TYPE_,                   intent(in)    :: d_type
+  logical, optional, intent(in) :: omp_offload
 
   integer   :: nscalar, nvector, nlist
   logical   :: recv_y(8)
   integer   :: nsend, nrecv, flags_v
   integer   :: msgsize
-  integer   :: from_pe, to_pe, buffer_pos, pos
+  integer   :: from_pe, to_pe, buffer_pos, pos, idx
   integer   :: ksize, is, ie, js, je
   integer   :: n, l, m, i, j, k, buffer_start_pos, nk
   integer   :: shift, gridtype, midpoint
   integer   :: npack, nunpack, rotation, isd
+  logical   :: use_device_ptr
 
   MPP_TYPE_ :: buffer(mpp_domains_stack_size)
   MPP_TYPE_ :: field (group%is_s:group%ie_s,group%js_s:group%je_s, group%ksize_s)
@@ -448,6 +450,9 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type)
   nlist   = size(domain%list(:))
   gridtype = group%gridtype
 
+  use_device_ptr = .false.
+  if (present(omp_offload)) use_device_ptr = omp_offload
+
   !--- ksize_s must equal ksize_v
   if(nvector > 0 .AND. nscalar > 0) then
      if(group%ksize_s .NE. group%ksize_v) then
@@ -476,13 +481,14 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type)
 
   !---pre-post receive.
   call mpp_clock_begin(group_recv_clock)
+  !$omp target enter data map(alloc: buffer) if(use_device_ptr)
   do m = 1, nrecv
      msgsize = group%recv_size(m)
      from_pe = group%from_pe(m)
      if( msgsize .GT. 0 )then
         buffer_pos = group%buffer_pos_recv(m)
         call mpp_recv( buffer(buffer_pos+1), glen=msgsize, from_pe=from_pe, block=.false., &
-             tag=COMM_TAG_1)
+             tag=COMM_TAG_1, omp_offload=omp_offload)
      end if
   end do
 
@@ -504,7 +510,7 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type)
      if( msgsize .GT. 0 )then
         buffer_pos = group%buffer_pos_send(n)
         to_pe = group%to_pe(n)
-        call mpp_send( buffer(buffer_pos+1), plen=msgsize, to_pe=to_pe, tag=COMM_TAG_1)
+        call mpp_send( buffer(buffer_pos+1), plen=msgsize, to_pe=to_pe, tag=COMM_TAG_1, omp_offload=omp_offload)
      endif
   enddo
   call mpp_clock_end(group_send_clock)
@@ -519,6 +525,7 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type)
   nunpack = group%nunpack
   call mpp_clock_begin(group_unpk_clock)
 #include <group_update_unpack.inc>
+  !$omp target exit data map(release: buffer) if(use_device_ptr)
   call mpp_clock_end(group_unpk_clock)
 
   ! ---northern boundary fold
@@ -644,10 +651,11 @@ subroutine MPP_START_GROUP_UPDATE_(group, domain, d_type, reuse_buffer)
   integer   :: nscalar, nvector
   integer   :: nsend, nrecv, flags_v
   integer   :: msgsize, npack, rotation
-  integer   :: from_pe, to_pe, buffer_pos, pos
+  integer   :: from_pe, to_pe, buffer_pos, pos, idx
   integer   :: ksize, is, ie, js, je
   integer   :: n, l, m, i, j, k, buffer_start_pos, nk
   logical   :: reuse_buf_pos
+  logical, parameter :: use_device_ptr = .false. ! placeholder
   character(len=8)            :: text
 
   MPP_TYPE_ :: buffer(size(mpp_domains_stack_nonblock(:)))
@@ -749,11 +757,12 @@ subroutine MPP_COMPLETE_GROUP_UPDATE_(group, domain, d_type)
   MPP_TYPE_,                   intent(in)    :: d_type
 
   integer   :: nsend, nrecv, nscalar, nvector
-  integer   :: k, buffer_pos, pos, m, n, l
+  integer   :: k, buffer_pos, pos, m, n, l, idx
   integer   :: is, ie, js, je, ksize, i, j
   integer   :: shift, gridtype, midpoint, flags_v
   integer   :: nunpack, rotation, buffer_start_pos, nk, isd
   logical   :: recv_y(8)
+  logical, parameter :: use_device_ptr = .false. ! placeholder
   MPP_TYPE_ :: buffer(size(mpp_domains_stack_nonblock(:)))
   MPP_TYPE_ :: field (group%is_s:group%ie_s,group%js_s:group%je_s, group%ksize_s)
   MPP_TYPE_ :: fieldx(group%is_x:group%ie_x,group%js_x:group%je_x, group%ksize_v)
diff --git a/mpp/include/mpp_transmit.inc b/mpp/include/mpp_transmit.inc
index 24d0cc437f..560de01c18 100644
--- a/mpp/include/mpp_transmit.inc
+++ b/mpp/include/mpp_transmit.inc
@@ -171,7 +171,7 @@
       call mpp_transmit( put_data, put_len, to_pe, dummy, 1, NULL_PE, tag=tag, send_request=request )
     end subroutine MPP_SEND_
 
-    subroutine MPP_RECV_SCALAR_( get_data, from_pe, glen, block, tag, request )
+    subroutine MPP_RECV_SCALAR_( get_data, from_pe, glen, block, tag, request, omp_offload )
 !a mpp_transmit with null arguments on the put side
       integer, intent(in) :: from_pe
       MPP_TYPE_, intent(out) :: get_data
@@ -180,6 +180,7 @@
       integer, intent(out), optional :: request
 
       integer, optional, intent(in) :: glen
+      logical, optional, intent(in) :: omp_offload
       integer                       :: get_len
       MPP_TYPE_ :: get_data1D(1)
       MPP_TYPE_ :: dummy(1)
@@ -189,17 +190,18 @@
 
       ptr = LOC(get_data)
       get_len=1; if(PRESENT(glen))get_len=glen
-      call mpp_transmit( dummy, 1, NULL_PE, get_data1D, get_len, from_pe, block, tag, recv_request=request )
+      call mpp_transmit( dummy, 1, NULL_PE, get_data1D, get_len, from_pe, block, tag, recv_request=request, omp_offload=omp_offload )
 
     end subroutine MPP_RECV_SCALAR_
 
-    subroutine MPP_SEND_SCALAR_( put_data, to_pe, plen, tag, request)
+    subroutine MPP_SEND_SCALAR_( put_data, to_pe, plen, tag, request, omp_offload)
 !a mpp_transmit with null arguments on the get side
       integer, intent(in) :: to_pe
       MPP_TYPE_, intent(in) :: put_data
       integer, optional, intent(in) :: plen
       integer, intent(in), optional :: tag
       integer, intent(out), optional :: request
+      logical, optional, intent(in) :: omp_offload
       integer                       :: put_len
       MPP_TYPE_ :: put_data1D(1)
       MPP_TYPE_ :: dummy(1)
@@ -207,7 +209,7 @@
       pointer( ptr, put_data1D )
       ptr = LOC(put_data)
       put_len=1; if(PRESENT(plen))put_len=plen
-      call mpp_transmit( put_data1D, put_len, to_pe, dummy, 1, NULL_PE, tag = tag, send_request=request )
+      call mpp_transmit( put_data1D, put_len, to_pe, dummy, 1, NULL_PE, tag = tag, send_request=request, omp_offload=omp_offload )
 
     end subroutine MPP_SEND_SCALAR_
 
diff --git a/mpp/include/mpp_transmit_mpi.fh b/mpp/include/mpp_transmit_mpi.fh
index 023c2d5124..2d2fef19e6 100644
--- a/mpp/include/mpp_transmit_mpi.fh
+++ b/mpp/include/mpp_transmit_mpi.fh
@@ -37,7 +37,7 @@
 !!(avoiding f90 rank conformance check)
 !!caller is responsible for completion checks (mpp_sync_self) before and after
     subroutine MPP_TRANSMIT_( put_data, put_len, to_pe, get_data, get_len, from_pe, block, tag, recv_request, &
-                            &  send_request )
+                            &  send_request, omp_offload )
 
       integer, intent(in) :: put_len, to_pe, get_len, from_pe
       MPP_TYPE_, intent(in)  :: put_data(*)
@@ -45,10 +45,15 @@
       logical, intent(in),  optional :: block
       integer, intent(in),  optional :: tag
       integer, intent(out), optional :: recv_request, send_request
+      logical, intent(in),  optional :: omp_offload
       logical                       :: block_comm
       integer                       :: i
       integer                       :: comm_tag
       integer                       :: rsize
+      logical                       :: use_device_ptr
+
+      use_device_ptr = .false.
+      if (present(omp_offload)) use_device_ptr = omp_offload
 
       if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_TRANSMIT: You must first call mpp_init.' )
       if( to_pe.EQ.NULL_PE .AND. from_pe.EQ.NULL_PE )return
@@ -82,8 +87,15 @@
              if( cur_send_request > max_request ) &
              call mpp_error(FATAL, &
                   & "MPP_TRANSMIT: cur_send_request is greater than max_request, increase mpp_nml request_multiply")
-             call MPI_ISEND( put_data, put_len, MPI_TYPE_, to_pe, comm_tag, mpp_comm_private, &
-                             request_send(cur_send_request), error)
+             if (use_device_ptr) then
+                 !$omp target data use_device_ptr(put_data)
+                 call MPI_ISEND( put_data, put_len, MPI_TYPE_, to_pe, comm_tag, mpp_comm_private, &
+                                 request_send(cur_send_request), error)
+                 !$omp end target data
+             else
+                 call MPI_ISEND( put_data, put_len, MPI_TYPE_, to_pe, comm_tag, mpp_comm_private, &
+                                 request_send(cur_send_request), error)
+             endif
           endif
           if (debug .and. (current_clock.NE.0)) call increment_current_clock(EVENT_SEND, put_len*MPP_TYPE_BYTELEN_)
       else if (to_pe.EQ.ALL_PES) then !this is a broadcast from from_pe
@@ -130,8 +142,15 @@
                 if( cur_recv_request > max_request ) &
                 call mpp_error(FATAL, &
                      "MPP_TRANSMIT: cur_recv_request is greater than max_request, increase mpp_nml request_multiply")
-                call MPI_IRECV( get_data, get_len, MPI_TYPE_, from_pe, comm_tag, mpp_comm_private, &
-                     request_recv(cur_recv_request), error )
+                if (use_device_ptr) then
+                    !$omp target data use_device_ptr(get_data)
+                    call MPI_IRECV( get_data, get_len, MPI_TYPE_, from_pe, comm_tag, mpp_comm_private, &
+                                    request_recv(cur_recv_request), error )
+                    !$omp end target data
+                else
+                    call MPI_IRECV( get_data, get_len, MPI_TYPE_, from_pe, comm_tag, mpp_comm_private, &
+                                    request_recv(cur_recv_request), error )
+                endif
                 size_recv(cur_recv_request) = get_len
                 type_recv(cur_recv_request) = MPI_TYPE_
              endif

From 5a85ab4809d2a2f83f74b029ecf6f0565a58266b Mon Sep 17 00:00:00 2001
From: Edward Yang <edward_yang_125@hotmail.com>
Date: Tue, 2 Sep 2025 10:12:37 +1000
Subject: [PATCH 02/45] add missing collapse(3) clauses

---
 mpp/include/group_update_pack.inc   |  6 +++---
 mpp/include/group_update_unpack.inc | 12 +++++++-----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/mpp/include/group_update_pack.inc b/mpp/include/group_update_pack.inc
index b1a55b68e3..d08335d1e6 100644
--- a/mpp/include/group_update_pack.inc
+++ b/mpp/include/group_update_pack.inc
@@ -29,7 +29,7 @@ if( group%k_loop_inside ) then
         case(ZERO)
            do l=1, group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
-              !$omp target teams distribute parallel do private(idx) &
+              !$omp target teams distribute parallel do collapse(3) private(idx) &
               !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) if(use_device_ptr)
               do k = 1, ksize
                  do j = js, je
@@ -83,7 +83,7 @@ if( group%k_loop_inside ) then
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
-              !$omp target teams distribute parallel do private(idx) &
+              !$omp target teams distribute parallel do collapse(3) private(idx) &
               !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) if(use_device_ptr)
               do k = 1, ksize
                  do j = js, je
@@ -165,7 +165,7 @@ if( group%k_loop_inside ) then
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
-              !$omp target teams distribute parallel do private(idx) &
+              !$omp target teams distribute parallel do collapse(3) private(idx) &
               !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) if(use_device_ptr)
               do k = 1, ksize
                  do j = js, je
diff --git a/mpp/include/group_update_unpack.inc b/mpp/include/group_update_unpack.inc
index c18275e9bb..f242cd3452 100644
--- a/mpp/include/group_update_unpack.inc
+++ b/mpp/include/group_update_unpack.inc
@@ -26,7 +26,7 @@ if( group%k_loop_inside ) then
      if( group%unpack_type(n) == FIELD_S ) then
         do l=1,nscalar  ! loop over number of fields
            ptr_field = group%addrs_s(l)
-           !$omp target teams distribute parallel do if(use_device_ptr) private(idx) &
+           !$omp target teams distribute parallel do collapse(3) if(use_device_ptr) private(idx) &
            !$omp   map(to: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) &
            !$omp   map(from: field(is:ie,js:je,1:ksize))
            do k = 1, ksize
@@ -42,8 +42,9 @@ if( group%k_loop_inside ) then
      else if( group%unpack_type(n) == FIELD_X ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldx = group%addrs_x(l)
-           !$omp target teams distribute parallel do private(idx) &
-           !$omp   map(to: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) map(from: fieldx(is:ie,js:je,1:ksize)) if(use_device_ptr)
+           !$omp target teams distribute parallel do collapse(3) private(idx) &
+           !$omp   map(to: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) &
+           !$omp   map(from: fieldx(is:ie,js:je,1:ksize)) if(use_device_ptr)
            do k = 1, ksize
               do j = js, je
                  do i = is, ie
@@ -57,8 +58,9 @@ if( group%k_loop_inside ) then
      else if( group%unpack_type(n) == FIELD_Y ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldy = group%addrs_y(l)
-           !$omp target teams distribute parallel do private(idx) &
-           !$omp   map(to: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) map(from: fieldy(is:ie,js:je,1:ksize)) if(use_device_ptr)
+           !$omp target teams distribute parallel do collapse(3) private(idx) &
+           !$omp   map(to: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) &
+           !$omp   map(from: fieldy(is:ie,js:je,1:ksize)) if(use_device_ptr)
            do k = 1, ksize
               do j = js, je
                  do i = is, ie

From aa8f22b6610679440309f07ad716f53a7b24ef20 Mon Sep 17 00:00:00 2001
From: Edward Yang <edward_yang_125@hotmail.com>
Date: Fri, 10 Oct 2025 13:14:15 +1100
Subject: [PATCH 03/45] Use __NVCOMPILER macro for target regions

---
 mpp/include/group_update_pack.inc   | 28 +++++++++++++++++-----------
 mpp/include/group_update_unpack.inc | 28 +++++++++++++++++-----------
 mpp/include/mpp_group_update.fh     | 10 +++++++---
 3 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/mpp/include/group_update_pack.inc b/mpp/include/group_update_pack.inc
index d08335d1e6..43e2af5bf5 100644
--- a/mpp/include/group_update_pack.inc
+++ b/mpp/include/group_update_pack.inc
@@ -21,25 +21,27 @@ if( group%k_loop_inside ) then
   do n = 1, npack
      buffer_pos = group%pack_buffer_pos(n) + buffer_start_pos
      pos  = buffer_pos
-     is = group%pack_is(n); ie = group%pack_ie(n)
-     js = group%pack_js(n); je = group%pack_je(n)
+     is = group%pack_is(n); ie = group%pack_ie(n); ni = ie-is+1
+     js = group%pack_js(n); je = group%pack_je(n); nj = je-js+1
      rotation = group%pack_rotation(n)
      if( group%pack_type(n) == FIELD_S ) then
         select case( rotation )
         case(ZERO)
            do l=1, group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
+#ifdef __NVCOMPILER
               !$omp target teams distribute parallel do collapse(3) private(idx) &
-              !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) if(use_device_ptr)
+              !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
               do k = 1, ksize
                  do j = js, je
                     do i = is, ie
-                       idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                       idx = pos + (k-1)*nj*ni + (j-js)*ni + (i-is) + 1
                        buffer(idx) = field(i,j,k)
                     end do
                  end do
               enddo
-              pos = pos + ksize*(je-js+1)*(ie-is+1)
+              pos = pos + ksize*nj*ni
            enddo
         case( MINUS_NINETY )
            do l=1,group%nscalar  ! loop over number of fields
@@ -83,17 +85,19 @@ if( group%k_loop_inside ) then
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
               !$omp target teams distribute parallel do collapse(3) private(idx) &
-              !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) if(use_device_ptr)
+              !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
               do k = 1, ksize
                  do j = js, je
                     do i = is, ie
-                       idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                       idx = pos + (k-1)*nj*ni + (j-js)*ni + (i-is) + 1
                        buffer(idx) = fieldx(i,j,k)
                     end do
                  end do
               end do
-              pos = pos + ksize*(je-js+1)*(ie-is+1)
+              pos = pos + ksize*nj*ni
            end do
         case( MINUS_NINETY )
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
@@ -165,17 +169,19 @@ if( group%k_loop_inside ) then
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
               !$omp target teams distribute parallel do collapse(3) private(idx) &
-              !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) if(use_device_ptr)
+              !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
               do k = 1, ksize
                  do j = js, je
                     do i = is, ie
-                       idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                       idx = pos + (k-1)*nj*ni + (j-js)*ni + (i-is) + 1
                        buffer(idx) = fieldy(i,j,k)
                     end do
                  end do
               end do
-              pos = pos + ksize*(je-js+1)*(ie-is+1)
+              pos = pos + ksize*nj*ni
            end do
         case( MINUS_NINETY )
            do l=1,nvector  ! loop over number of fields
diff --git a/mpp/include/group_update_unpack.inc b/mpp/include/group_update_unpack.inc
index f242cd3452..3f0b08faeb 100644
--- a/mpp/include/group_update_unpack.inc
+++ b/mpp/include/group_update_unpack.inc
@@ -21,55 +21,61 @@ if( group%k_loop_inside ) then
   do n = nunpack, 1, -1
      buffer_pos = group%unpack_buffer_pos(n) + buffer_start_pos
      pos = buffer_pos
-     is = group%unpack_is(n); ie = group%unpack_ie(n)
-     js = group%unpack_js(n); je = group%unpack_je(n)
+     is = group%unpack_is(n); ie = group%unpack_ie(n); ni = ie-is+1
+     js = group%unpack_js(n); je = group%unpack_je(n); nj = je-js+1
      if( group%unpack_type(n) == FIELD_S ) then
         do l=1,nscalar  ! loop over number of fields
            ptr_field = group%addrs_s(l)
+#ifdef __NVCOMPILER
            !$omp target teams distribute parallel do collapse(3) if(use_device_ptr) private(idx) &
-           !$omp   map(to: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) &
+           !$omp   map(to: buffer(pos+1:pos+ksize*nj*ni)) &
            !$omp   map(from: field(is:ie,js:je,1:ksize))
+#endif
            do k = 1, ksize
               do j = js, je
                  do i = is, ie
-                    idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                    idx = pos + (k-1)*nj*ni + (j-js)*ni + (i-is) + 1
                     field(i,j,k) = buffer(idx)
                  end do
               end do
            end do
-           pos = pos + ksize*(je-js+1)*(ie-is+1)
+           pos = pos + ksize*nj*ni
         end do
      else if( group%unpack_type(n) == FIELD_X ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
            !$omp target teams distribute parallel do collapse(3) private(idx) &
-           !$omp   map(to: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) &
+           !$omp   map(to: buffer(pos+1:pos+ksize*nj*ni)) &
            !$omp   map(from: fieldx(is:ie,js:je,1:ksize)) if(use_device_ptr)
+#endif
            do k = 1, ksize
               do j = js, je
                  do i = is, ie
-                    idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                    idx = pos + (k-1)*nj*ni + (j-js)*ni + (i-is) + 1
                     fieldx(i,j,k) = buffer(idx)
                  end do
               end do
            end do
-           pos = pos + ksize*(je-js+1)*(ie-is+1)
+           pos = pos + ksize*nj*ni
         end do
      else if( group%unpack_type(n) == FIELD_Y ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
            !$omp target teams distribute parallel do collapse(3) private(idx) &
-           !$omp   map(to: buffer(pos+1:pos+ksize*(je-js+1)*(ie-is+1))) &
+           !$omp   map(to: buffer(pos+1:pos+ksize*nj*ni)) &
            !$omp   map(from: fieldy(is:ie,js:je,1:ksize)) if(use_device_ptr)
+#endif
            do k = 1, ksize
               do j = js, je
                  do i = is, ie
-                    idx = pos + (k-1)*(je-js+1)*(ie-is+1) + (j-js)*(ie-is+1) + (i-is) + 1
+                    idx = pos + (k-1)*nj*ni + (j-js)*ni + (i-is) + 1
                     fieldy(i,j,k) = buffer(idx)
                  end do
               end do
            end do
-           pos = pos + ksize*(je-js+1)*(ie-is+1)
+           pos = pos + ksize*nj*ni
         end do
      endif
   enddo
diff --git a/mpp/include/mpp_group_update.fh b/mpp/include/mpp_group_update.fh
index 8464fa55f5..8f9251b10f 100644
--- a/mpp/include/mpp_group_update.fh
+++ b/mpp/include/mpp_group_update.fh
@@ -431,7 +431,7 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type, omp_offload)
   integer   :: msgsize
   integer   :: from_pe, to_pe, buffer_pos, pos, idx
   integer   :: ksize, is, ie, js, je
-  integer   :: n, l, m, i, j, k, buffer_start_pos, nk
+  integer   :: n, l, m, i, j, k, buffer_start_pos, ni, nj, nk
   integer   :: shift, gridtype, midpoint
   integer   :: npack, nunpack, rotation, isd
   logical   :: use_device_ptr
@@ -481,7 +481,9 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type, omp_offload)
 
   !---pre-post receive.
   call mpp_clock_begin(group_recv_clock)
+#ifdef __NVCOMPILER
   !$omp target enter data map(alloc: buffer) if(use_device_ptr)
+#endif
   do m = 1, nrecv
      msgsize = group%recv_size(m)
      from_pe = group%from_pe(m)
@@ -525,7 +527,9 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type, omp_offload)
   nunpack = group%nunpack
   call mpp_clock_begin(group_unpk_clock)
 #include <group_update_unpack.inc>
+#ifdef __NVCOMPILER
   !$omp target exit data map(release: buffer) if(use_device_ptr)
+#endif
   call mpp_clock_end(group_unpk_clock)
 
   ! ---northern boundary fold
@@ -653,7 +657,7 @@ subroutine MPP_START_GROUP_UPDATE_(group, domain, d_type, reuse_buffer)
   integer   :: msgsize, npack, rotation
   integer   :: from_pe, to_pe, buffer_pos, pos, idx
   integer   :: ksize, is, ie, js, je
-  integer   :: n, l, m, i, j, k, buffer_start_pos, nk
+  integer   :: n, l, m, i, j, k, buffer_start_pos, ni, nj, nk
   logical   :: reuse_buf_pos
   logical, parameter :: use_device_ptr = .false. ! placeholder
   character(len=8)            :: text
@@ -760,7 +764,7 @@ subroutine MPP_COMPLETE_GROUP_UPDATE_(group, domain, d_type)
   integer   :: k, buffer_pos, pos, m, n, l, idx
   integer   :: is, ie, js, je, ksize, i, j
   integer   :: shift, gridtype, midpoint, flags_v
-  integer   :: nunpack, rotation, buffer_start_pos, nk, isd
+  integer   :: nunpack, rotation, buffer_start_pos, ni, nj, nk, isd
   logical   :: recv_y(8)
   logical, parameter :: use_device_ptr = .false. ! placeholder
   MPP_TYPE_ :: buffer(size(mpp_domains_stack_nonblock(:)))

From 1e8274712f7c26c4a3df1b07865ffd9d89cab1a0 Mon Sep 17 00:00:00 2001
From: Edward Yang <edward_yang_125@hotmail.com>
Date: Fri, 10 Oct 2025 13:19:55 +1100
Subject: [PATCH 04/45] add back old omp directive wrapped in #ifndef
 __NVCOMPILER

---
 mpp/include/group_update_pack.inc   | 10 ++++++++++
 mpp/include/group_update_unpack.inc | 14 ++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/mpp/include/group_update_pack.inc b/mpp/include/group_update_pack.inc
index 43e2af5bf5..6fa8866e91 100644
--- a/mpp/include/group_update_pack.inc
+++ b/mpp/include/group_update_pack.inc
@@ -18,6 +18,11 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
+#ifndef __NVCOMPILER
+!$OMP parallel do default(none) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP                          private(buffer_pos,pos,m,is,ie,js,je,rotation, &
+!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx)
+#endif
   do n = 1, npack
      buffer_pos = group%pack_buffer_pos(n) + buffer_start_pos
      pos  = buffer_pos
@@ -251,6 +256,11 @@ if( group%k_loop_inside ) then
      endif
   enddo
 else
+#ifndef __NVCOMPILER
+!$OMP parallel do default(none) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP                          private(buffer_pos,pos,m,is,ie,js,je,rotation, &
+!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx)
+#endif
   do nk = 1, npack*ksize
      n = (nk-1)/ksize + 1
      k = mod((nk-1), ksize) + 1
diff --git a/mpp/include/group_update_unpack.inc b/mpp/include/group_update_unpack.inc
index 3f0b08faeb..894d6b110c 100644
--- a/mpp/include/group_update_unpack.inc
+++ b/mpp/include/group_update_unpack.inc
@@ -18,6 +18,11 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
+#ifndef __NVCOMPILER
+!$OMP parallel do default(none) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP                          private(buffer_pos,pos,m,is, ie, js, je,rotation, &
+!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy, n,k,ni,nj,idx)
+#endif
   do n = nunpack, 1, -1
      buffer_pos = group%unpack_buffer_pos(n) + buffer_start_pos
      pos = buffer_pos
@@ -80,13 +85,18 @@ if( group%k_loop_inside ) then
      endif
   enddo
 else
+#ifndef __NVCOMPILER
+!$OMP parallel do default(none) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP                          private(buffer_pos,pos,m,is, ie, js, je,rotation, &
+!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx)
+#endif
   do nk = nunpack*ksize, 1, -1
      n = (nk-1)/ksize + 1
      k = mod((nk-1), ksize) + 1
      buffer_pos = group%unpack_buffer_pos(n) + buffer_start_pos
      pos = buffer_pos + (k-1)*group%unpack_size(n)
-     is = group%unpack_is(n); ie = group%unpack_ie(n)
-     js = group%unpack_js(n); je = group%unpack_je(n)
+     is = group%unpack_is(n); ie = group%unpack_ie(n); ni = ie-is+1
+     js = group%unpack_js(n); je = group%unpack_je(n); nj = je-js+1
      if( group%unpack_type(n) == FIELD_S ) then
         do l=1,nscalar  ! loop over number of fields
            ptr_field = group%addrs_s(l)

From dc36e574ef3069e752b9e50050539e0feb504d26 Mon Sep 17 00:00:00 2001
From: Edward Yang <edward.yang@anu.edu.au>
Date: Fri, 10 Oct 2025 15:46:53 +1100
Subject: [PATCH 05/45] port remaining un/pack loops

---
 mpp/include/group_update_pack.inc   | 263 ++++++++++++++++++++++------
 mpp/include/group_update_unpack.inc |  27 ++-
 2 files changed, 226 insertions(+), 64 deletions(-)

diff --git a/mpp/include/group_update_pack.inc b/mpp/include/group_update_pack.inc
index 6fa8866e91..27ed8b9438 100644
--- a/mpp/include/group_update_pack.inc
+++ b/mpp/include/group_update_pack.inc
@@ -51,38 +51,53 @@ if( group%k_loop_inside ) then
         case( MINUS_NINETY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
               do k = 1, ksize
                  do i = is, ie
                     do j = je, js, -1
-                       pos = pos + 1
-                       buffer(pos) = field(i,j,k)
+                       idx = pos + (k-1)*nj*ni + (i-is)*nj + (je-j) + 1
+                       buffer(idx) = field(i,j,k)
                     end do
                  end do
               end do
+              pos = pos + ksize*nj*ni
            end do
         case( NINETY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
               do k = 1, ksize
                  do i = ie, is, -1
                     do j = js, je
-                       pos = pos + 1
-                       buffer(pos) = field(i,j,k)
+                       idx = pos + (k-1)*nj*ni + (ie-i)*nj + (j-js) + 1
+                       buffer(idx) = field(i,j,k)
                     end do
                  end do
               end do
+              pos = pos + ksize*nj*ni
            end do
         case( ONE_HUNDRED_EIGHTY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
               do k = 1, ksize
                  do j = je, js, -1
                     do i = ie, is, -1
-                       pos = pos + 1
-                       buffer(pos) = field(i,j,k)
+                       idx = pos + (k-1)*nj*ni + (je-j)*ni + (ie-i) + 1
+                       buffer(idx) = field(i,j,k)
                     end do
                  end do
               end do
+              pos = pos + ksize*nj*ni
            end do
         end select
      else if( group%pack_type(n) == FIELD_X  ) then
@@ -108,64 +123,91 @@ if( group%k_loop_inside ) then
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
                  do k = 1, ksize
                     do i = is, ie
                        do j = je, js, -1
-                          pos = pos + 1
-                          buffer(pos) = fieldy(i,j,k)
+                        idx = pos + (k-1)*nj*ni + (i-is)*nj + (je-j) + 1
+                        buffer(idx) = fieldy(i,j,k)
                        end do
                     end do
                  end do
+                 pos = pos + ksize*nj*ni
               end do
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
                  do k = 1, ksize
                     do i = is, ie
                        do j = je, js, -1
-                          pos = pos + 1
-                          buffer(pos) = -fieldy(i,j,k)
+                          idx = pos + (k-1)*nj*ni + (i-is)*nj + (je-j) + 1
+                          buffer(idx) = -fieldy(i,j,k)
                        end do
                     end do
                  end do
+                 pos = pos + ksize*nj*ni
               end do
            end if
         case( NINETY )
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
               do k = 1, ksize
                  do i = ie, is, -1
                     do j = js, je
-                       pos = pos + 1
-                       buffer(pos) = fieldy(i,j,k)
+                     !   pos = pos + 1
+                     !   buffer(pos) = fieldy(i,j,k)
+                       idx = pos + (k-1)*nj*ni + (ie-i)*nj + (j-js) + 1
+                       buffer(idx) = fieldy(i,j,k)
                     end do
                  end do
               end do
+              pos = pos + ksize*nj*ni
            end do
         case( ONE_HUNDRED_EIGHTY )
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
                  do k = 1, ksize
                     do j = je, js, -1
                        do i = ie, is, -1
-                          pos = pos + 1
-                          buffer(pos) =  fieldx(i,j,k)
+                          idx = pos + (k-1)*nj*ni + (je-j)*ni + (ie-i) + 1
+                          buffer(idx) = fieldx(i,j,k)
                        end do
                     end do
                  end do
+                 pos = pos + ksize*nj*ni
               end do
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
                  do k = 1, ksize
                     do j = je, js, -1
                        do i = ie, is, -1
-                          pos = pos + 1
-                          buffer(pos) =  -fieldx(i,j,k)
+                          idx = pos + (k-1)*nj*ni + (je-j)*ni + (ie-i) + 1
+                          buffer(idx) = -fieldx(i,j,k)
                        end do
                     end do
                  end do
+                 pos = pos + ksize*nj*ni
               end do
            end if
         end select ! select case( rotation(n) )
@@ -191,65 +233,90 @@ if( group%k_loop_inside ) then
         case( MINUS_NINETY )
            do l=1,nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
               do k = 1, ksize
                  do i = is, ie
                     do j = je, js, -1
-                       pos = pos + 1
-                       buffer(pos) = fieldx(i,j,k)
+                     idx = pos + (k-1)*nj*ni + (i-is)*nj + (je-j) + 1
+                     buffer(idx) = fieldx(i,j,k)
                     end do
                  end do
               end do
+              pos = pos + ksize*nj*ni
            end do
         case( NINETY )
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1, nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
                  do k = 1, ksize
                     do i = ie, is, -1
                        do j = js, je
-                          pos = pos + 1
-                          buffer(pos) = fieldx(i,j,k)
+                          idx = pos + (k-1)*nj*ni + (ie-i)*nj + (j-js) + 1
+                          buffer(idx) = fieldx(i,j,k)
                        end do
                     end do
                  end do
+                 pos = pos + ksize*nj*ni
               end do
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
                  do k = 1, ksize
                     do i = ie, is, -1
                        do j = js, je
-                          pos = pos + 1
-                          buffer(pos) = -fieldx(i,j,k)
+                          idx = pos + (k-1)*nj*ni + (ie-i)*nj + (j-js) + 1
+                          buffer(idx) = -fieldx(i,j,k)
                        end do
                     end do
                  end do
+                 pos = pos + ksize*nj*ni
               end do
            end if
         case( ONE_HUNDRED_EIGHTY )
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
                  do k = 1, ksize
                     do j = je, js, -1
                        do i = ie, is, -1
-                          pos = pos + 1
-                          buffer(pos) =  fieldy(i,j,k)
+                          idx = pos + (k-1)*nj*ni + (je-j)*ni + (ie-i) + 1
+                          buffer(idx) = fieldy(i,j,k)
                        end do
                     end do
                  end do
+                 pos = pos + ksize*nj*ni
               end do
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+#endif
                  do k = 1, ksize
                     do j = je, js, -1
                        do i = ie, is, -1
-                          pos = pos + 1
-                          buffer(pos) =  -fieldy(i,j,k)
+                          idx = pos + (k-1)*nj*ni + (je-j)*ni + (ie-i) + 1
+                          buffer(idx) = -fieldy(i,j,k)
                        end do
                     end do
                  end do
+                 pos = pos + ksize*nj*ni
               end do
            end if
         end select ! select case( rotation(n) )
@@ -274,42 +341,62 @@ else
         case(ZERO)
            do l=1, group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
               do j = js, je
                  do i = is, ie
-                    pos = pos + 1
-                    buffer(pos) = field(i,j,k)
+                    idx = pos + (j-js)*ni + (i-is) + 1
+                    buffer(idx) = field(i,j,k)
                  end do
               end do
+              pos = pos + nj*ni
            enddo
         case( MINUS_NINETY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
               do i = is, ie
                  do j = je, js, -1
-                    pos = pos + 1
-                    buffer(pos) = field(i,j,k)
+                    idx = pos + (i-is)*nj + (je-j) + 1
+                    buffer(idx) = field(i,j,k)
                  end do
               end do
+              pos = pos + nj*ni
            end do
         case( NINETY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
               do i = ie, is, -1
                  do j = js, je
-                    pos = pos + 1
-                    buffer(pos) = field(i,j,k)
+                    idx = pos + (ie-i)*nj + (j-js) + 1
+                    buffer(idx) = field(i,j,k)
                  end do
               end do
+              pos = pos + nj*ni
            end do
         case( ONE_HUNDRED_EIGHTY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
               do j = je, js, -1
                  do i = ie, is, -1
-                    pos = pos + 1
-                    buffer(pos) = field(i,j,k)
+                    idx = pos + (je-j)*ni + (ie-i) + 1
+                    buffer(idx) = field(i,j,k)
                  end do
               end do
+              pos = pos + nj*ni
            end do
         end select
      else if( group%pack_type(n) == FIELD_X  ) then
@@ -317,65 +404,95 @@ else
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
               do j = js, je
                  do i = is, ie
-                    pos = pos + 1
-                    buffer(pos) = fieldx(i,j,k)
+                    idx = pos + (j-js)*ni + (i-is) + 1
+                    buffer(idx) = fieldx(i,j,k)
                  end do
               end do
+              pos = pos + nj*ni
            end do
         case( MINUS_NINETY )
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
                  do i = is, ie
                     do j = je, js, -1
-                       pos = pos + 1
-                       buffer(pos) = fieldy(i,j,k)
+                       idx = pos + (i-is)*nj + (je-j) + 1
+                       buffer(idx) = fieldy(i,j,k)
                     end do
                  end do
+                 pos = pos + nj*ni
               end do
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
                  do i = is, ie
                     do j = je, js, -1
-                       pos = pos + 1
-                       buffer(pos) = -fieldy(i,j,k)
+                       idx = pos + (i-is)*nj + (je-j) + 1
+                       buffer(idx) = -fieldy(i,j,k)
                     end do
                  end do
+                 pos = pos + nj*ni
               end do
            end if
         case( NINETY )
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
               do i = ie, is, -1
                  do j = js, je
-                    pos = pos + 1
-                    buffer(pos) = fieldy(i,j,k)
+                    idx = pos + (ie-i)*nj + (j-js) + 1
+                    buffer(idx) = fieldy(i,j,k)
                  end do
               end do
+              pos = pos + nj*ni
            end do
         case( ONE_HUNDRED_EIGHTY )
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
                  do j = je, js, -1
                     do i = ie, is, -1
-                       pos = pos + 1
-                       buffer(pos) =  fieldx(i,j,k)
+                       idx = pos + (je-j)*ni + (ie-i) + 1
+                       buffer(idx) = fieldx(i,j,k)
                     end do
                  end do
+                 pos = pos + nj*ni
               end do
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
                  do j = je, js, -1
                     do i = ie, is, -1
-                       pos = pos + 1
-                       buffer(pos) =  -fieldx(i,j,k)
+                       idx = pos + (je-j)*ni + (ie-i) + 1
+                       buffer(idx) = -fieldx(i,j,k)
                     end do
                  end do
+                 pos = pos + nj*ni
               end do
            end if
         end select ! select case( rotation(n) )
@@ -384,65 +501,95 @@ else
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
               do j = js, je
                  do i = is, ie
-                    pos = pos + 1
-                    buffer(pos) = fieldy(i,j,k)
+                    idx = pos + (j-js)*ni + (i-is) + 1
+                    buffer(idx) = fieldy(i,j,k)
                  end do
               end do
+              pos = pos + nj*ni
            end do
         case( MINUS_NINETY )
            do l=1,nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
+              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
               do i = is, ie
                  do j = je, js, -1
-                    pos = pos + 1
-                    buffer(pos) = fieldx(i,j,k)
+                    idx = pos + (i-is)*nj + (je-j) + 1
+                    buffer(idx) = fieldx(i,j,k)
                  end do
               end do
+              pos = pos + nj*ni
            end do
         case( NINETY )
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1, nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
                  do i = ie, is, -1
                     do j = js, je
-                       pos = pos + 1
-                       buffer(pos) = fieldx(i,j,k)
+                       idx = pos + (ie-i)*nj + (j-js) + 1
+                       buffer(idx) = fieldx(i,j,k)
                     end do
                  end do
+                 pos = pos + nj*ni
               end do
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
                  do i = ie, is, -1
                     do j = js, je
-                       pos = pos + 1
-                       buffer(pos) = -fieldx(i,j,k)
+                       idx = pos + (ie-i)*nj + (j-js) + 1
+                       buffer(idx) = -fieldx(i,j,k)
                     end do
                  end do
+                 pos = pos + nj*ni
               end do
            end if
         case( ONE_HUNDRED_EIGHTY )
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
                  do j = je, js, -1
                     do i = ie, is, -1
-                       pos = pos + 1
-                       buffer(pos) =  fieldy(i,j,k)
+                       idx = pos + (je-j)*ni + (ie-i) + 1
+                       buffer(idx) = fieldy(i,j,k)
                     end do
                  end do
+                 pos = pos + nj*ni
               end do
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+#endif
                  do j = je, js, -1
                     do i = ie, is, -1
-                       pos = pos + 1
-                       buffer(pos) =  -fieldy(i,j,k)
+                       idx = pos + (je-j)*ni + (ie-i) + 1
+                       buffer(idx) = -fieldy(i,j,k)
                     end do
                  end do
+                 pos = pos + nj*ni
               end do
            end if
         end select ! select case( rotation(n) )
diff --git a/mpp/include/group_update_unpack.inc b/mpp/include/group_update_unpack.inc
index 894d6b110c..3fa214c7bc 100644
--- a/mpp/include/group_update_unpack.inc
+++ b/mpp/include/group_update_unpack.inc
@@ -100,32 +100,47 @@ else
      if( group%unpack_type(n) == FIELD_S ) then
         do l=1,nscalar  ! loop over number of fields
            ptr_field = group%addrs_s(l)
+#ifdef __NVCOMPILER
+           !$omp target teams distribute parallel do collapse(2) private(idx) &
+           !$omp   map(to: buffer(pos+1:pos+nj*ni)) map(from: field(is:ie,js:je,k)) if(use_device_ptr)
+#endif
            do j = js, je
               do i = is, ie
-                 pos = pos + 1
-                 field(i,j,k) = buffer(pos)
+                 idx = pos + (j-js)*ni + (i-is) + 1
+                 field(i,j,k) = buffer(idx)
               end do
            end do
+           pos = pos + ni*nj
         end do
      else if( group%unpack_type(n) == FIELD_X ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldx = group%addrs_x(l)
+#ifdef __NVCOMPILER__
+           !$omp target teams distribute parallel do collapse(2) private(idx) &
+           !$omp   map(to: buffer(pos+1:pos+nj*ni)) map(from: fieldx(is:ie,js:je,k)) if(use_device_ptr)
+#endif
            do j = js, je
               do i = is, ie
-                 pos = pos + 1
-                 fieldx(i,j,k) = buffer(pos)
+                 idx = pos + (j-js)*ni + (i-is) + 1
+                 fieldx(i,j,k) = buffer(idx)
               end do
            end do
+           pos = pos + ni*nj
         end do
      else if( group%unpack_type(n) == FIELD_Y ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldy = group%addrs_y(l)
+#ifdef __NVCOMPILER
+           !$omp target teams distribute parallel do collapse(2) private(idx) &
+           !$omp   map(to: buffer(pos+1:pos+nj*ni)) map(from: fieldy(is:ie,js:je,k)) if(use_device_ptr)
+#endif
            do j = js, je
               do i = is, ie
-                 pos = pos + 1
-                 fieldy(i,j,k) = buffer(pos)
+               idx = pos + (j-js)*ni + (i-is) + 1
+               fieldy(i,j,k) = buffer(idx)
               end do
            end do
+           pos = pos + ni*nj
         end do
      endif
   enddo

From 972a97c77cf46f7794345d6982c23c7ef05f823c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jorge=20Luis=20G=C3=A1lvez=20Vallejo?=
 <jorgegalvez1694@gmail.com>
Date: Thu, 30 Oct 2025 16:51:55 +1100
Subject: [PATCH 06/45] add multi gpu support (#2)

* add multi gpu support

* address review comments, add helpful comment for the acc/mp runbtime call
---
 mpp/include/mpp_comm_mpi.inc | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/mpp/include/mpp_comm_mpi.inc b/mpp/include/mpp_comm_mpi.inc
index 928f9fcb92..7b20d43044 100644
--- a/mpp/include/mpp_comm_mpi.inc
+++ b/mpp/include/mpp_comm_mpi.inc
@@ -31,6 +31,7 @@
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !> @brief Initialize the @ref mpp_mod module. Must be called before any usage.
   subroutine mpp_init( flags, localcomm, test_level, alt_input_nml_path )
+  !$use omp_lib
   integer, optional, intent(in) :: flags !< Flags for debug output, can be MPP_VERBOSE or MPP_DEBUG
   integer, optional, intent(in) :: localcomm !< Id of MPI communicator used to initialize
   integer, optional, intent(in) :: test_level !< Used to exit initialization at certain stages
@@ -54,6 +55,14 @@
   call MPI_COMM_RANK( mpp_comm_private, pe,   error )
   call MPI_COMM_SIZE( mpp_comm_private, npes, error )
 
+  ! set default device to enable multi GPU parallelism 
+  ! calls to both OpenACC and OpenMP runtimes are needed 
+  ! because we use both do-concurrent and openmp 
+  ! if you remove either, the code will run multiple 
+  ! ranks on a _single_ GPU. Be careful out there!
+  !$call omp_set_default_device(pe)
+  !$acc set device_num(pe)
+
   module_is_initialized = .TRUE.
   if (present(test_level)) then
     t_level = test_level

From 78b436d9792f62893666023c8d6fa03b878d5cd1 Mon Sep 17 00:00:00 2001
From: Edward Yang <edward.yang@anu.edu.au>
Date: Tue, 14 Oct 2025 13:20:54 +1100
Subject: [PATCH 07/45] sub __NVCOMPILER with __NVCOMPILER_OPENMP_GPU

---
 mpp/include/group_update_pack.inc   | 68 ++++++++++++++---------------
 mpp/include/group_update_unpack.inc | 16 +++----
 mpp/include/mpp_group_update.fh     |  4 +-
 3 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/mpp/include/group_update_pack.inc b/mpp/include/group_update_pack.inc
index 27ed8b9438..4c5b7be7e3 100644
--- a/mpp/include/group_update_pack.inc
+++ b/mpp/include/group_update_pack.inc
@@ -18,7 +18,7 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
-#ifndef __NVCOMPILER
+#ifndef __NVCOMPILER_OPENMP_GPU
 !$OMP parallel do default(none) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
 !$OMP                          private(buffer_pos,pos,m,is,ie,js,je,rotation, &
 !$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx)
@@ -34,7 +34,7 @@ if( group%k_loop_inside ) then
         case(ZERO)
            do l=1, group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
               !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -51,7 +51,7 @@ if( group%k_loop_inside ) then
         case( MINUS_NINETY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
               !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -68,7 +68,7 @@ if( group%k_loop_inside ) then
         case( NINETY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
               !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -85,7 +85,7 @@ if( group%k_loop_inside ) then
         case( ONE_HUNDRED_EIGHTY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
               !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -105,7 +105,7 @@ if( group%k_loop_inside ) then
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
               !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -123,7 +123,7 @@ if( group%k_loop_inside ) then
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
                  !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -140,7 +140,7 @@ if( group%k_loop_inside ) then
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
                  !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -158,7 +158,7 @@ if( group%k_loop_inside ) then
         case( NINETY )
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
               !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -178,7 +178,7 @@ if( group%k_loop_inside ) then
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
                  !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -195,7 +195,7 @@ if( group%k_loop_inside ) then
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
                  !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -216,7 +216,7 @@ if( group%k_loop_inside ) then
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
               !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -233,7 +233,7 @@ if( group%k_loop_inside ) then
         case( MINUS_NINETY )
            do l=1,nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
               !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -251,7 +251,7 @@ if( group%k_loop_inside ) then
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1, nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
                  !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -268,7 +268,7 @@ if( group%k_loop_inside ) then
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
                  !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -287,7 +287,7 @@ if( group%k_loop_inside ) then
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
                  !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -304,7 +304,7 @@ if( group%k_loop_inside ) then
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
                  !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
 #endif
@@ -323,7 +323,7 @@ if( group%k_loop_inside ) then
      endif
   enddo
 else
-#ifndef __NVCOMPILER
+#ifndef __NVCOMPILER_OPENMP_GPU
 !$OMP parallel do default(none) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
 !$OMP                          private(buffer_pos,pos,m,is,ie,js,je,rotation, &
 !$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx)
@@ -341,7 +341,7 @@ else
         case(ZERO)
            do l=1, group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
               !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -356,7 +356,7 @@ else
         case( MINUS_NINETY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
               !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -371,7 +371,7 @@ else
         case( NINETY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
               !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -386,7 +386,7 @@ else
         case( ONE_HUNDRED_EIGHTY )
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
               !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -404,7 +404,7 @@ else
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
               !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -420,7 +420,7 @@ else
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
                 !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -435,7 +435,7 @@ else
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
                  !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -451,7 +451,7 @@ else
         case( NINETY )
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
               !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -467,7 +467,7 @@ else
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
                  !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -482,7 +482,7 @@ else
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
                  !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -501,7 +501,7 @@ else
         case(ZERO)
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
               !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -516,7 +516,7 @@ else
         case( MINUS_NINETY )
            do l=1,nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
               !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -532,7 +532,7 @@ else
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1, nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
                  !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -547,7 +547,7 @@ else
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
                  !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -564,7 +564,7 @@ else
            if( BTEST(group%flags_v,SCALAR_BIT) ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
                  !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
@@ -579,7 +579,7 @@ else
            else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
                  !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
 #endif
diff --git a/mpp/include/group_update_unpack.inc b/mpp/include/group_update_unpack.inc
index 3fa214c7bc..286f07bc83 100644
--- a/mpp/include/group_update_unpack.inc
+++ b/mpp/include/group_update_unpack.inc
@@ -18,7 +18,7 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
-#ifndef __NVCOMPILER
+#ifndef __NVCOMPILER_OPENMP_GPU
 !$OMP parallel do default(none) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
 !$OMP                          private(buffer_pos,pos,m,is, ie, js, je,rotation, &
 !$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy, n,k,ni,nj,idx)
@@ -31,7 +31,7 @@ if( group%k_loop_inside ) then
      if( group%unpack_type(n) == FIELD_S ) then
         do l=1,nscalar  ! loop over number of fields
            ptr_field = group%addrs_s(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
            !$omp target teams distribute parallel do collapse(3) if(use_device_ptr) private(idx) &
            !$omp   map(to: buffer(pos+1:pos+ksize*nj*ni)) &
            !$omp   map(from: field(is:ie,js:je,1:ksize))
@@ -49,7 +49,7 @@ if( group%k_loop_inside ) then
      else if( group%unpack_type(n) == FIELD_X ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
            !$omp target teams distribute parallel do collapse(3) private(idx) &
            !$omp   map(to: buffer(pos+1:pos+ksize*nj*ni)) &
            !$omp   map(from: fieldx(is:ie,js:je,1:ksize)) if(use_device_ptr)
@@ -67,7 +67,7 @@ if( group%k_loop_inside ) then
      else if( group%unpack_type(n) == FIELD_Y ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
            !$omp target teams distribute parallel do collapse(3) private(idx) &
            !$omp   map(to: buffer(pos+1:pos+ksize*nj*ni)) &
            !$omp   map(from: fieldy(is:ie,js:je,1:ksize)) if(use_device_ptr)
@@ -85,7 +85,7 @@ if( group%k_loop_inside ) then
      endif
   enddo
 else
-#ifndef __NVCOMPILER
+#ifndef __NVCOMPILER_OPENMP_GPU
 !$OMP parallel do default(none) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
 !$OMP                          private(buffer_pos,pos,m,is, ie, js, je,rotation, &
 !$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx)
@@ -100,7 +100,7 @@ else
      if( group%unpack_type(n) == FIELD_S ) then
         do l=1,nscalar  ! loop over number of fields
            ptr_field = group%addrs_s(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
            !$omp target teams distribute parallel do collapse(2) private(idx) &
            !$omp   map(to: buffer(pos+1:pos+nj*ni)) map(from: field(is:ie,js:je,k)) if(use_device_ptr)
 #endif
@@ -115,7 +115,7 @@ else
      else if( group%unpack_type(n) == FIELD_X ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldx = group%addrs_x(l)
-#ifdef __NVCOMPILER__
+#ifdef __NVCOMPILER_OPENMP_GPU
            !$omp target teams distribute parallel do collapse(2) private(idx) &
            !$omp   map(to: buffer(pos+1:pos+nj*ni)) map(from: fieldx(is:ie,js:je,k)) if(use_device_ptr)
 #endif
@@ -130,7 +130,7 @@ else
      else if( group%unpack_type(n) == FIELD_Y ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldy = group%addrs_y(l)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
            !$omp target teams distribute parallel do collapse(2) private(idx) &
            !$omp   map(to: buffer(pos+1:pos+nj*ni)) map(from: fieldy(is:ie,js:je,k)) if(use_device_ptr)
 #endif
diff --git a/mpp/include/mpp_group_update.fh b/mpp/include/mpp_group_update.fh
index 8f9251b10f..4f4c668b7d 100644
--- a/mpp/include/mpp_group_update.fh
+++ b/mpp/include/mpp_group_update.fh
@@ -481,7 +481,7 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type, omp_offload)
 
   !---pre-post receive.
   call mpp_clock_begin(group_recv_clock)
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
   !$omp target enter data map(alloc: buffer) if(use_device_ptr)
 #endif
   do m = 1, nrecv
@@ -527,7 +527,7 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type, omp_offload)
   nunpack = group%nunpack
   call mpp_clock_begin(group_unpk_clock)
 #include <group_update_unpack.inc>
-#ifdef __NVCOMPILER
+#ifdef __NVCOMPILER_OPENMP_GPU
   !$omp target exit data map(release: buffer) if(use_device_ptr)
 #endif
   call mpp_clock_end(group_unpk_clock)

From 5ac547f974ea671ff938dd59ec632c0409c40007 Mon Sep 17 00:00:00 2001
From: Edward Yang <edward.yang@anu.edu.au>
Date: Tue, 16 Dec 2025 16:42:46 +1100
Subject: [PATCH 08/45] allow choice of gpu or cpu parallel

To enable this,  had to be removed -
otherwise segfaults happen on the GPU.
---
 mpp/include/group_update_pack.inc   | 9 ++++-----
 mpp/include/group_update_unpack.inc | 9 ++++-----
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/mpp/include/group_update_pack.inc b/mpp/include/group_update_pack.inc
index 4c5b7be7e3..8c421ba906 100644
--- a/mpp/include/group_update_pack.inc
+++ b/mpp/include/group_update_pack.inc
@@ -18,11 +18,10 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
-#ifndef __NVCOMPILER_OPENMP_GPU
-!$OMP parallel do default(none) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
-!$OMP                          private(buffer_pos,pos,m,is,ie,js,je,rotation, &
-!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx)
-#endif
+!$OMP parallel do shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP            private(buffer_pos,pos,m,is,ie,js,je,rotation, &
+!$OMP                    ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx) &
+!$OMP                if (.not.use_device_ptr)
   do n = 1, npack
      buffer_pos = group%pack_buffer_pos(n) + buffer_start_pos
      pos  = buffer_pos
diff --git a/mpp/include/group_update_unpack.inc b/mpp/include/group_update_unpack.inc
index 286f07bc83..a745982cf0 100644
--- a/mpp/include/group_update_unpack.inc
+++ b/mpp/include/group_update_unpack.inc
@@ -18,11 +18,10 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
-#ifndef __NVCOMPILER_OPENMP_GPU
-!$OMP parallel do default(none) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
-!$OMP                          private(buffer_pos,pos,m,is, ie, js, je,rotation, &
-!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy, n,k,ni,nj,idx)
-#endif
+!$OMP parallel do shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP            private(buffer_pos,pos,m,is, ie, js, je,rotation, &
+!$OMP                    ptr_field, ptr_fieldx, ptr_fieldy, n,k,ni,nj,idx) &
+!$OMP                if (.not.use_device_ptr)
   do n = nunpack, 1, -1
      buffer_pos = group%unpack_buffer_pos(n) + buffer_start_pos
      pos = buffer_pos

From 85ae7efbc81e6c4053e9c519e82a1ccc1032a7df Mon Sep 17 00:00:00 2001
From: Edward Yang <edward_yang_125@hotmail.com>
Date: Thu, 15 Jan 2026 02:31:34 +0000
Subject: [PATCH 09/45] fix omp set device call

---
 mpp/include/mpp_comm_mpi.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mpp/include/mpp_comm_mpi.inc b/mpp/include/mpp_comm_mpi.inc
index 7b20d43044..42d30f308f 100644
--- a/mpp/include/mpp_comm_mpi.inc
+++ b/mpp/include/mpp_comm_mpi.inc
@@ -60,7 +60,7 @@
   ! because we use both do-concurrent and openmp 
   ! if you remove either, the code will run multiple 
   ! ranks on a _single_ GPU. Be careful out there!
-  !$call omp_set_default_device(pe)
+  !$ call omp_set_default_device(pe)
   !$acc set device_num(pe)
 
   module_is_initialized = .TRUE.

From b4993f5dcce9bff06ee3aae21921e30e044f0d33 Mon Sep 17 00:00:00 2001
From: Edward Yang <edward.yang@anu.edu.au>
Date: Tue, 17 Feb 2026 09:54:24 +1100
Subject: [PATCH 10/45] Revert "allow choice of gpu or cpu parallel"

This reverts commit 0cc2a77ee70851ad08458a52a1e4f6307e163463.
Having both the CPU and GPU OpenMP directives compiled caused
a significant slowdown in GPU packing/unpacking performance -
even if parallelism is controlled using OpenMP "if" clause.
---
 mpp/include/group_update_pack.inc   | 9 +++++----
 mpp/include/group_update_unpack.inc | 9 +++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/mpp/include/group_update_pack.inc b/mpp/include/group_update_pack.inc
index 8c421ba906..4c5b7be7e3 100644
--- a/mpp/include/group_update_pack.inc
+++ b/mpp/include/group_update_pack.inc
@@ -18,10 +18,11 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
-!$OMP parallel do shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
-!$OMP            private(buffer_pos,pos,m,is,ie,js,je,rotation, &
-!$OMP                    ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx) &
-!$OMP                if (.not.use_device_ptr)
+#ifndef __NVCOMPILER_OPENMP_GPU
+!$OMP parallel do default(none) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP                          private(buffer_pos,pos,m,is,ie,js,je,rotation, &
+!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx)
+#endif
   do n = 1, npack
      buffer_pos = group%pack_buffer_pos(n) + buffer_start_pos
      pos  = buffer_pos
diff --git a/mpp/include/group_update_unpack.inc b/mpp/include/group_update_unpack.inc
index a745982cf0..286f07bc83 100644
--- a/mpp/include/group_update_unpack.inc
+++ b/mpp/include/group_update_unpack.inc
@@ -18,10 +18,11 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
-!$OMP parallel do shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
-!$OMP            private(buffer_pos,pos,m,is, ie, js, je,rotation, &
-!$OMP                    ptr_field, ptr_fieldx, ptr_fieldy, n,k,ni,nj,idx) &
-!$OMP                if (.not.use_device_ptr)
+#ifndef __NVCOMPILER_OPENMP_GPU
+!$OMP parallel do default(none) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP                          private(buffer_pos,pos,m,is, ie, js, je,rotation, &
+!$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy, n,k,ni,nj,idx)
+#endif
   do n = nunpack, 1, -1
      buffer_pos = group%unpack_buffer_pos(n) + buffer_start_pos
      pos = buffer_pos

From baa8d88cc0df5d5e2ca20dca1d3d507d96c4196d Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Mon, 16 Mar 2026 19:09:09 +0000
Subject: [PATCH 11/45] OMP MPI: Minor cleanups

Some very minor changes to the OpenMP target MPI PR:

* use_device_ptr -> use_device_addr

  This appears to be the updated form (or at least nvfortran says it is)

* Whitespace added to `!$ use omp_lib`

  Does not seem crucial but from our previous discussion it appears more
  correct.

* Removal of some trailing whitespace.
---
 mpp/include/mpp_comm_mpi.inc    | 10 +++++-----
 mpp/include/mpp_transmit_mpi.fh |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/mpp/include/mpp_comm_mpi.inc b/mpp/include/mpp_comm_mpi.inc
index 42d30f308f..142473497e 100644
--- a/mpp/include/mpp_comm_mpi.inc
+++ b/mpp/include/mpp_comm_mpi.inc
@@ -31,7 +31,7 @@
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !> @brief Initialize the @ref mpp_mod module. Must be called before any usage.
   subroutine mpp_init( flags, localcomm, test_level, alt_input_nml_path )
-  !$use omp_lib
+  !$ use omp_lib
   integer, optional, intent(in) :: flags !< Flags for debug output, can be MPP_VERBOSE or MPP_DEBUG
   integer, optional, intent(in) :: localcomm !< Id of MPI communicator used to initialize
   integer, optional, intent(in) :: test_level !< Used to exit initialization at certain stages
@@ -55,10 +55,10 @@
   call MPI_COMM_RANK( mpp_comm_private, pe,   error )
   call MPI_COMM_SIZE( mpp_comm_private, npes, error )
 
-  ! set default device to enable multi GPU parallelism 
-  ! calls to both OpenACC and OpenMP runtimes are needed 
-  ! because we use both do-concurrent and openmp 
-  ! if you remove either, the code will run multiple 
+  ! set default device to enable multi GPU parallelism
+  ! calls to both OpenACC and OpenMP runtimes are needed
+  ! because we use both do-concurrent and openmp
+  ! if you remove either, the code will run multiple
   ! ranks on a _single_ GPU. Be careful out there!
   !$ call omp_set_default_device(pe)
   !$acc set device_num(pe)
diff --git a/mpp/include/mpp_transmit_mpi.fh b/mpp/include/mpp_transmit_mpi.fh
index 2d2fef19e6..ce70b19812 100644
--- a/mpp/include/mpp_transmit_mpi.fh
+++ b/mpp/include/mpp_transmit_mpi.fh
@@ -88,7 +88,7 @@
              call mpp_error(FATAL, &
                   & "MPP_TRANSMIT: cur_send_request is greater than max_request, increase mpp_nml request_multiply")
              if (use_device_ptr) then
-                 !$omp target data use_device_ptr(put_data)
+                 !$omp target data use_device_addr(put_data)
                  call MPI_ISEND( put_data, put_len, MPI_TYPE_, to_pe, comm_tag, mpp_comm_private, &
                                  request_send(cur_send_request), error)
                  !$omp end target data
@@ -143,7 +143,7 @@
                 call mpp_error(FATAL, &
                      "MPP_TRANSMIT: cur_recv_request is greater than max_request, increase mpp_nml request_multiply")
                 if (use_device_ptr) then
-                    !$omp target data use_device_ptr(get_data)
+                    !$omp target data use_device_addr(get_data)
                     call MPI_IRECV( get_data, get_len, MPI_TYPE_, from_pe, comm_tag, mpp_comm_private, &
                                     request_recv(cur_recv_request), error )
                     !$omp end target data

From 8d83224b1feb87d5160a80b07b9926f36e225f76 Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Mon, 16 Mar 2026 19:47:25 +0000
Subject: [PATCH 12/45] OMP target MPI: line length compliance

This patch refactors several lines to keep within the 121-character line
length limit prescribed by the FMS style guidelines.
---
 mpp/include/group_update_pack.inc | 128 ++++++++++++++++++++++--------
 mpp/include/mpp_transmit.inc      |   9 ++-
 2 files changed, 102 insertions(+), 35 deletions(-)

diff --git a/mpp/include/group_update_pack.inc b/mpp/include/group_update_pack.inc
index 4c5b7be7e3..2422c1d8fa 100644
--- a/mpp/include/group_update_pack.inc
+++ b/mpp/include/group_update_pack.inc
@@ -36,7 +36,9 @@ if( group%k_loop_inside ) then
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
-              !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+              !$omp   map(to: field(is:ie,js:je,1:ksize)) &
+              !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do k = 1, ksize
                  do j = js, je
@@ -53,7 +55,9 @@ if( group%k_loop_inside ) then
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
-              !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+              !$omp   map(to: field(is:ie,js:je,1:ksize)) &
+              !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do k = 1, ksize
                  do i = is, ie
@@ -70,7 +74,9 @@ if( group%k_loop_inside ) then
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
-              !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+              !$omp   map(to: field(is:ie,js:je,1:ksize)) &
+              !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do k = 1, ksize
                  do i = ie, is, -1
@@ -87,7 +93,9 @@ if( group%k_loop_inside ) then
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
-              !$omp   map(to: field(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+              !$omp   map(to: field(is:ie,js:je,1:ksize)) &
+              !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do k = 1, ksize
                  do j = je, js, -1
@@ -107,7 +115,9 @@ if( group%k_loop_inside ) then
               ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
-              !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+              !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
+              !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do k = 1, ksize
                  do j = js, je
@@ -125,7 +135,9 @@ if( group%k_loop_inside ) then
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
-                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
+                 !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do k = 1, ksize
                     do i = is, ie
@@ -142,7 +154,9 @@ if( group%k_loop_inside ) then
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
-                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
+                 !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do k = 1, ksize
                     do i = is, ie
@@ -160,7 +174,9 @@ if( group%k_loop_inside ) then
               ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
-              !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+              !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
+              !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do k = 1, ksize
                  do i = ie, is, -1
@@ -180,7 +196,9 @@ if( group%k_loop_inside ) then
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
-                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
+                 !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+                 !$omp   if(use_device_ptr)
 #endif
                  do k = 1, ksize
                     do j = je, js, -1
@@ -197,7 +215,9 @@ if( group%k_loop_inside ) then
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
-                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
+                 !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do k = 1, ksize
                     do j = je, js, -1
@@ -218,7 +238,9 @@ if( group%k_loop_inside ) then
               ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
-              !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+              !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
+              !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do k = 1, ksize
                  do j = js, je
@@ -235,7 +257,9 @@ if( group%k_loop_inside ) then
               ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(3) private(idx) &
-              !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+              !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
+              !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do k = 1, ksize
                  do i = is, ie
@@ -253,7 +277,9 @@ if( group%k_loop_inside ) then
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
-                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
+                 !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do k = 1, ksize
                     do i = ie, is, -1
@@ -270,7 +296,9 @@ if( group%k_loop_inside ) then
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
-                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
+                 !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do k = 1, ksize
                     do i = ie, is, -1
@@ -289,7 +317,9 @@ if( group%k_loop_inside ) then
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
-                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
+                 !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do k = 1, ksize
                     do j = je, js, -1
@@ -306,7 +336,9 @@ if( group%k_loop_inside ) then
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(3) private(idx) &
-                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) map(from: buffer(pos+1:pos+ksize*nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
+                 !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
+                 !$omp   if(use_device_ptr)
 #endif
                  do k = 1, ksize
                     do j = je, js, -1
@@ -343,7 +375,9 @@ else
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
-              !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+              !$omp   map(to: field(k, is:ie, js:je)) &
+              !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do j = js, je
                  do i = is, ie
@@ -358,7 +392,9 @@ else
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
-              !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+              !$omp   map(to: field(k, is:ie, js:je)) &
+              !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do i = is, ie
                  do j = je, js, -1
@@ -373,7 +409,9 @@ else
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
-              !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+              !$omp   map(to: field(k, is:ie, js:je)) &
+              !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do i = ie, is, -1
                  do j = js, je
@@ -388,7 +426,9 @@ else
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
-              !$omp   map(to: field(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+              !$omp   map(to: field(k, is:ie, js:je)) &
+              !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do j = je, js, -1
                  do i = ie, is, -1
@@ -406,7 +446,9 @@ else
               ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
-              !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+              !$omp   map(to: fieldx(k, is:ie, js:je)) &
+              !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do j = js, je
                  do i = is, ie
@@ -422,7 +464,9 @@ else
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
-                !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldy(k, is:ie, js:je)) &
+                 !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do i = is, ie
                     do j = je, js, -1
@@ -437,7 +481,9 @@ else
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
-                 !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldy(k, is:ie, js:je)) &
+                 !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do i = is, ie
                     do j = je, js, -1
@@ -453,7 +499,9 @@ else
               ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
-              !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+              !$omp   map(to: fieldy(k, is:ie, js:je)) &
+              !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do i = ie, is, -1
                  do j = js, je
@@ -469,7 +517,9 @@ else
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
-                 !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldx(k, is:ie, js:je)) &
+                 !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do j = je, js, -1
                     do i = ie, is, -1
@@ -484,7 +534,9 @@ else
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
-                 !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldx(k, is:ie, js:je)) &
+                 !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do j = je, js, -1
                     do i = ie, is, -1
@@ -503,7 +555,9 @@ else
               ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
-              !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+              !$omp   map(to: fieldy(k, is:ie, js:je)) &
+              !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+              !$omp   if (use_device_ptr)
 #endif
               do j = js, je
                  do i = is, ie
@@ -518,7 +572,9 @@ else
               ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
               !$omp target teams distribute parallel do collapse(2) private(idx) &
-              !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+              !$omp   map(to: fieldx(k, is:ie, js:je)) &
+              !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+              !$omp   if(use_device_ptr)
 #endif
               do i = is, ie
                  do j = je, js, -1
@@ -534,7 +590,9 @@ else
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
-                 !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldx(k, is:ie, js:je)) &
+                 !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do i = ie, is, -1
                     do j = js, je
@@ -549,7 +607,9 @@ else
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
-                 !$omp   map(to: fieldx(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldx(k, is:ie, js:je)) &
+                 !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do i = ie, is, -1
                     do j = js, je
@@ -566,7 +626,9 @@ else
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
-                 !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldy(k, is:ie, js:je)) &
+                 !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do j = je, js, -1
                     do i = ie, is, -1
@@ -581,7 +643,9 @@ else
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
                  !$omp target teams distribute parallel do collapse(2) private(idx) &
-                 !$omp   map(to: fieldy(k, is:ie, js:je)) map(from: buffer(pos+1:pos+nj*ni)) if(use_device_ptr)
+                 !$omp   map(to: fieldy(k, is:ie, js:je)) &
+                 !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
+                 !$omp   if (use_device_ptr)
 #endif
                  do j = je, js, -1
                     do i = ie, is, -1
diff --git a/mpp/include/mpp_transmit.inc b/mpp/include/mpp_transmit.inc
index 560de01c18..71b0d97f0c 100644
--- a/mpp/include/mpp_transmit.inc
+++ b/mpp/include/mpp_transmit.inc
@@ -190,7 +190,8 @@
 
       ptr = LOC(get_data)
       get_len=1; if(PRESENT(glen))get_len=glen
-      call mpp_transmit( dummy, 1, NULL_PE, get_data1D, get_len, from_pe, block, tag, recv_request=request, omp_offload=omp_offload )
+      call mpp_transmit( dummy, 1, NULL_PE, get_data1D, get_len, from_pe, &
+          block, tag, recv_request=request, omp_offload=omp_offload )
 
     end subroutine MPP_RECV_SCALAR_
 
@@ -209,7 +210,8 @@
       pointer( ptr, put_data1D )
       ptr = LOC(put_data)
       put_len=1; if(PRESENT(plen))put_len=plen
-      call mpp_transmit( put_data1D, put_len, to_pe, dummy, 1, NULL_PE, tag = tag, send_request=request, omp_offload=omp_offload )
+      call mpp_transmit( put_data1D, put_len, to_pe, dummy, 1, NULL_PE, &
+          tag=tag, send_request=request, omp_offload=omp_offload )
 
     end subroutine MPP_SEND_SCALAR_
 
@@ -222,7 +224,8 @@
       integer, intent(out), optional :: request
 
       MPP_TYPE_ :: dummy(1,1)
-      call mpp_transmit( dummy, 1, NULL_PE, get_data, get_len, from_pe, block, tag, recv_request=request )
+      call mpp_transmit( dummy, 1, NULL_PE, get_data, get_len, from_pe, &
+          block, tag, recv_request=request )
     end subroutine MPP_RECV_2D_
 
     subroutine MPP_SEND_2D_( put_data, put_len, to_pe, tag, request )

From 236aef32a060abce0acff34048a94de39abcf244 Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Tue, 17 Mar 2026 14:14:53 +0000
Subject: [PATCH 13/45] OMP MPI: Update nocomm interface

The no-comm (no MPI) interface has been updated to support the new
omp_offload argument.
---
 mpp/include/mpp_transmit_mpi.fh    | 6 +++---
 mpp/include/mpp_transmit_nocomm.fh | 5 ++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/mpp/include/mpp_transmit_mpi.fh b/mpp/include/mpp_transmit_mpi.fh
index ce70b19812..4ce6af6148 100644
--- a/mpp/include/mpp_transmit_mpi.fh
+++ b/mpp/include/mpp_transmit_mpi.fh
@@ -52,12 +52,12 @@
       integer                       :: rsize
       logical                       :: use_device_ptr
 
-      use_device_ptr = .false.
-      if (present(omp_offload)) use_device_ptr = omp_offload
-
       if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_TRANSMIT: You must first call mpp_init.' )
       if( to_pe.EQ.NULL_PE .AND. from_pe.EQ.NULL_PE )return
 
+      use_device_ptr = .false.
+      if (present(omp_offload)) use_device_ptr = omp_offload
+
       block_comm = .true.
       if(PRESENT(block)) block_comm = block
 
diff --git a/mpp/include/mpp_transmit_nocomm.fh b/mpp/include/mpp_transmit_nocomm.fh
index ca132a4dc8..ee08d53a53 100644
--- a/mpp/include/mpp_transmit_nocomm.fh
+++ b/mpp/include/mpp_transmit_nocomm.fh
@@ -36,7 +36,7 @@
 !!words from an array of any rank to be passed (avoiding f90 rank conformance check)
 !!caller is responsible for completion checks (mpp_sync_self) before and after
     subroutine MPP_TRANSMIT_( put_data, put_len, to_pe, get_data, get_len, from_pe, block, tag, recv_request, &
-                            &  send_request )
+                            &  send_request, omp_offload )
 
       integer, intent(in) :: put_len, to_pe, get_len, from_pe
       MPP_TYPE_, intent(in)  :: put_data(*)
@@ -44,6 +44,8 @@
       logical, intent(in), optional :: block
       integer, intent(in), optional :: tag
       integer, intent(out), optional :: recv_request, send_request
+      logical, intent(in), optional :: omp_offload
+        ! NOTE: omp_offload is unused in this function
 
       integer :: i, outunit
       MPP_TYPE_, allocatable, save :: local_data(:) !local copy used by non-parallel code (no SHMEM or MPI)
@@ -55,6 +57,7 @@
       if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_TRANSMIT: You must first call mpp_init.' )
       if( to_pe.EQ.NULL_PE .AND. from_pe.EQ.NULL_PE )return
 
+
       outunit = stdout()
       if( debug )then
           call SYSTEM_CLOCK(tick)

From e3116ef234591c1c6772b96e68d86ada970c64ed Mon Sep 17 00:00:00 2001
From: Edward Yang <edward.yang@anu.edu.au>
Date: Fri, 17 Apr 2026 13:48:32 +1000
Subject: [PATCH 14/45] use openmp cpu if ompoffload=.false.

This ensures that (un)packing steps in do_group_update is performed
with openmp cpu parallelism if ompoffload=.false.. Previously it
would only do serial. This is implemented by undefining the GPU
macro (currently __NVCOMPILER_OPENMP_GPU) and re-including the
(un)packing files.

To make this work, the default(shared) was used in all the relevant
OpenMP directives. If default(none) is used, the loops would hang
or segfault.
---
 mpp/include/group_update_pack.inc   | 105 +++++++++++++++++++---------
 mpp/include/group_update_unpack.inc |  27 ++++---
 mpp/include/mpp_group_update.fh     |  47 +++++++++++++
 3 files changed, 137 insertions(+), 42 deletions(-)

diff --git a/mpp/include/group_update_pack.inc b/mpp/include/group_update_pack.inc
index 2422c1d8fa..a6ee72d7b8 100644
--- a/mpp/include/group_update_pack.inc
+++ b/mpp/include/group_update_pack.inc
@@ -18,8 +18,13 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
+! nvfortran + cray pointers imposes some restrictions on the loops below:
+!   * the compiler cannot privatise OpenMP cray pointers in offloaded loops. Hence, inner loops
+!     must be ported rather than the whole outer loop.
+!   * the more verbose form of openmp offload loops must be used. Would prefer "target teams loop".
+!   * default(shared) must be used otherwise loops hang or segfault. Would prefer "default(none)".
 #ifndef __NVCOMPILER_OPENMP_GPU
-!$OMP parallel do default(none) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP parallel do default(shared) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
 !$OMP                          private(buffer_pos,pos,m,is,ie,js,je,rotation, &
 !$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx)
 #endif
@@ -35,7 +40,8 @@ if( group%k_loop_inside ) then
            do l=1, group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp target teams distribute parallel do collapse(3) default(shared) &
+              !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_field,ptr) &
               !$omp   map(to: field(is:ie,js:je,1:ksize)) &
               !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -54,7 +60,8 @@ if( group%k_loop_inside ) then
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp target teams distribute parallel do collapse(3) default(shared) &
+              !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_field,ptr) &
               !$omp   map(to: field(is:ie,js:je,1:ksize)) &
               !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -73,7 +80,8 @@ if( group%k_loop_inside ) then
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp target teams distribute parallel do collapse(3) default(shared) &
+              !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_field,ptr) &
               !$omp   map(to: field(is:ie,js:je,1:ksize)) &
               !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -92,7 +100,8 @@ if( group%k_loop_inside ) then
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp target teams distribute parallel do collapse(3) default(shared) &
+              !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_field,ptr) &
               !$omp   map(to: field(is:ie,js:je,1:ksize)) &
               !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -114,7 +123,8 @@ if( group%k_loop_inside ) then
            do l=1, nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp target teams distribute parallel do collapse(3) default(shared) &
+              !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_fieldx,ptr) &
               !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
               !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -134,7 +144,8 @@ if( group%k_loop_inside ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp target teams distribute parallel do collapse(3) default(shared) &
+                 !$omp   private(i,j,k,idx) shared(ksize,is,ie,je,js,pos,nj,ni,ptr_fieldy,ptr) &
                  !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
                  !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -153,7 +164,8 @@ if( group%k_loop_inside ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp target teams distribute parallel do collapse(3) default(shared) &
+                 !$omp   private(i,j,k,idx) shared(ksize,is,ie,je,js,pos,nj,ni,ptr_fieldy,ptr) &
                  !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
                  !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -173,7 +185,8 @@ if( group%k_loop_inside ) then
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp target teams distribute parallel do collapse(3) default(shared) &
+              !$omp   private(i,j,k,idx) shared(ksize,is,ie,je,js,pos,nj,ni,ptr_fieldy,ptr) &
               !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
               !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -195,7 +208,8 @@ if( group%k_loop_inside ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp target teams distribute parallel do collapse(3) default(shared) &
+                 !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_fieldx,ptr) &
                  !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
                  !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
                  !$omp   if(use_device_ptr)
@@ -214,7 +228,8 @@ if( group%k_loop_inside ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp target teams distribute parallel do collapse(3) default(shared) &
+                 !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_fieldx,ptr) &
                  !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
                  !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -237,7 +252,8 @@ if( group%k_loop_inside ) then
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp target teams distribute parallel do collapse(3) default(shared) &
+              !$omp   private(i,j,k,idx) shared(ksize,is,ie,je,js,pos,nj,ni,ptr_fieldy,ptr) &
               !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
               !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -256,7 +272,8 @@ if( group%k_loop_inside ) then
            do l=1,nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(3) private(idx) &
+              !$omp target teams distribute parallel do collapse(3) default(shared) &
+              !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_fieldx,ptr) &
               !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
               !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -276,7 +293,8 @@ if( group%k_loop_inside ) then
               do l=1, nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp target teams distribute parallel do collapse(3) default(shared) &
+                 !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_fieldx,ptr) &
                  !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
                  !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -295,7 +313,8 @@ if( group%k_loop_inside ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp target teams distribute parallel do collapse(3) default(shared) &
+                 !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_fieldx,ptr) &
                  !$omp   map(to: fieldx(is:ie,js:je,1:ksize)) &
                  !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -316,7 +335,8 @@ if( group%k_loop_inside ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp target teams distribute parallel do collapse(3) default(shared) &
+                 !$omp   private(i,j,k,idx) shared(ksize,is,ie,je,js,pos,nj,ni,ptr_fieldy,ptr) &
                  !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
                  !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -335,7 +355,8 @@ if( group%k_loop_inside ) then
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(3) private(idx) &
+                 !$omp target teams distribute parallel do collapse(3) default(shared) &
+                 !$omp   private(i,j,k,idx) shared(ksize,is,ie,je,js,pos,nj,ni,ptr_fieldy,ptr) &
                  !$omp   map(to: fieldy(is:ie,js:je,1:ksize)) &
                  !$omp   map(from: buffer(pos+1:pos+ksize*nj*ni)) &
                  !$omp   if(use_device_ptr)
@@ -356,7 +377,7 @@ if( group%k_loop_inside ) then
   enddo
 else
 #ifndef __NVCOMPILER_OPENMP_GPU
-!$OMP parallel do default(none) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP parallel do default(shared) shared(npack,group,ptr,nvector,ksize,buffer_start_pos) &
 !$OMP                          private(buffer_pos,pos,m,is,ie,js,je,rotation, &
 !$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx)
 #endif
@@ -374,7 +395,8 @@ else
            do l=1, group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp target teams distribute parallel do collapse(2) default(shared) &
+              !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_field,ptr) &
               !$omp   map(to: field(k, is:ie, js:je)) &
               !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -391,7 +413,8 @@ else
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp target teams distribute parallel do collapse(2) default(shared) &
+              !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_field,ptr) &
               !$omp   map(to: field(k, is:ie, js:je)) &
               !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -408,7 +431,8 @@ else
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp target teams distribute parallel do collapse(2) default(shared) &
+              !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_field,ptr) &
               !$omp   map(to: field(k, is:ie, js:je)) &
               !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -425,7 +449,8 @@ else
            do l=1,group%nscalar  ! loop over number of fields
               ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp target teams distribute parallel do collapse(2) default(shared) &
+              !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_field,ptr) &
               !$omp   map(to: field(k, is:ie, js:je)) &
               !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -445,7 +470,8 @@ else
            do l=1, nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp target teams distribute parallel do collapse(2) default(shared) &
+              !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldx,ptr) &
               !$omp   map(to: fieldx(k, is:ie, js:je)) &
               !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -463,7 +489,8 @@ else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp target teams distribute parallel do collapse(2) default(shared) &
+                 !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldy,ptr) &
                  !$omp   map(to: fieldy(k, is:ie, js:je)) &
                  !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -480,7 +507,8 @@ else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp target teams distribute parallel do collapse(2) default(shared) &
+                 !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldy,ptr) &
                  !$omp   map(to: fieldy(k, is:ie, js:je)) &
                  !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -498,7 +526,8 @@ else
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp target teams distribute parallel do collapse(2) default(shared) &
+              !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldy,ptr) &
               !$omp   map(to: fieldy(k, is:ie, js:je)) &
               !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -516,7 +545,8 @@ else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp target teams distribute parallel do collapse(2) default(shared) &
+                 !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldx,ptr) &
                  !$omp   map(to: fieldx(k, is:ie, js:je)) &
                  !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -533,7 +563,8 @@ else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp target teams distribute parallel do collapse(2) default(shared) &
+                 !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldx,ptr) &
                  !$omp   map(to: fieldx(k, is:ie, js:je)) &
                  !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -554,7 +585,8 @@ else
            do l=1, nvector  ! loop over number of fields
               ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp target teams distribute parallel do collapse(2) default(shared) &
+              !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldy,ptr) &
               !$omp   map(to: fieldy(k, is:ie, js:je)) &
               !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
               !$omp   if (use_device_ptr)
@@ -571,7 +603,8 @@ else
            do l=1,nvector  ! loop over number of fields
               ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-              !$omp target teams distribute parallel do collapse(2) private(idx) &
+              !$omp target teams distribute parallel do collapse(2) default(shared) &
+              !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldx,ptr) &
               !$omp   map(to: fieldx(k, is:ie, js:je)) &
               !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
               !$omp   if(use_device_ptr)
@@ -589,7 +622,8 @@ else
               do l=1, nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp target teams distribute parallel do collapse(2) default(shared) &
+                 !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldx,ptr) &
                  !$omp   map(to: fieldx(k, is:ie, js:je)) &
                  !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -606,7 +640,8 @@ else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp target teams distribute parallel do collapse(2) default(shared) &
+                 !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldx,ptr) &
                  !$omp   map(to: fieldx(k, is:ie, js:je)) &
                  !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -625,7 +660,8 @@ else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp target teams distribute parallel do collapse(2) default(shared) &
+                 !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldy,ptr) &
                  !$omp   map(to: fieldy(k, is:ie, js:je)) &
                  !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
                  !$omp   if (use_device_ptr)
@@ -642,7 +678,8 @@ else
               do l=1,nvector  ! loop over number of fields
                  ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-                 !$omp target teams distribute parallel do collapse(2) private(idx) &
+                 !$omp target teams distribute parallel do collapse(2) default(shared) &
+                 !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldy,ptr) &
                  !$omp   map(to: fieldy(k, is:ie, js:je)) &
                  !$omp   map(from: buffer(pos+1:pos+nj*ni)) &
                  !$omp   if (use_device_ptr)
diff --git a/mpp/include/group_update_unpack.inc b/mpp/include/group_update_unpack.inc
index 286f07bc83..25a31ee8fc 100644
--- a/mpp/include/group_update_unpack.inc
+++ b/mpp/include/group_update_unpack.inc
@@ -18,8 +18,13 @@
 !***********************************************************************
 
 if( group%k_loop_inside ) then
+! nvfortran + cray pointers imposes some restrictions on the loops below:
+!   * the compiler cannot privatise OpenMP cray pointers in offloaded loops. Hence, inner loops
+!     must be ported rather than the whole outer loop.
+!   * the more verbose form of openmp offload loops must be used. Would prefer "target teams loop".
+!   * default(shared) must be used otherwise loops hang or segfault. Would prefer "default(none)".
 #ifndef __NVCOMPILER_OPENMP_GPU
-!$OMP parallel do default(none) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP parallel do default(shared) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
 !$OMP                          private(buffer_pos,pos,m,is, ie, js, je,rotation, &
 !$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy, n,k,ni,nj,idx)
 #endif
@@ -32,7 +37,8 @@ if( group%k_loop_inside ) then
         do l=1,nscalar  ! loop over number of fields
            ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-           !$omp target teams distribute parallel do collapse(3) if(use_device_ptr) private(idx) &
+           !$omp target teams distribute parallel do collapse(3) if(use_device_ptr) default(shared) &
+           !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_field,ptr) &
            !$omp   map(to: buffer(pos+1:pos+ksize*nj*ni)) &
            !$omp   map(from: field(is:ie,js:je,1:ksize))
 #endif
@@ -50,7 +56,8 @@ if( group%k_loop_inside ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-           !$omp target teams distribute parallel do collapse(3) private(idx) &
+           !$omp target teams distribute parallel do collapse(3) default(shared) &
+           !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_fieldx,ptr) &
            !$omp   map(to: buffer(pos+1:pos+ksize*nj*ni)) &
            !$omp   map(from: fieldx(is:ie,js:je,1:ksize)) if(use_device_ptr)
 #endif
@@ -68,7 +75,8 @@ if( group%k_loop_inside ) then
         do l=1,nvector  ! loop over number of fields
            ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-           !$omp target teams distribute parallel do collapse(3) private(idx) &
+           !$omp target teams distribute parallel do collapse(3) default(shared) &
+           !$omp   private(i,j,k,idx) shared(ksize,js,je,is,ie,pos,nj,ni,ptr_fieldy,ptr) &
            !$omp   map(to: buffer(pos+1:pos+ksize*nj*ni)) &
            !$omp   map(from: fieldy(is:ie,js:je,1:ksize)) if(use_device_ptr)
 #endif
@@ -86,7 +94,7 @@ if( group%k_loop_inside ) then
   enddo
 else
 #ifndef __NVCOMPILER_OPENMP_GPU
-!$OMP parallel do default(none) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
+!$OMP parallel do default(shared) shared(nunpack,group,nscalar,ptr,nvector,ksize,buffer_start_pos) &
 !$OMP                          private(buffer_pos,pos,m,is, ie, js, je,rotation, &
 !$OMP                                  ptr_field, ptr_fieldx, ptr_fieldy,n,k,ni,nj,idx)
 #endif
@@ -101,7 +109,8 @@ else
         do l=1,nscalar  ! loop over number of fields
            ptr_field = group%addrs_s(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-           !$omp target teams distribute parallel do collapse(2) private(idx) &
+           !$omp target teams distribute parallel do collapse(2) default(shared) &
+           !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_field,ptr) &
            !$omp   map(to: buffer(pos+1:pos+nj*ni)) map(from: field(is:ie,js:je,k)) if(use_device_ptr)
 #endif
            do j = js, je
@@ -116,7 +125,8 @@ else
         do l=1,nvector  ! loop over number of fields
            ptr_fieldx = group%addrs_x(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-           !$omp target teams distribute parallel do collapse(2) private(idx) &
+           !$omp target teams distribute parallel do collapse(2) default(shared) &
+           !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldx,ptr) &
            !$omp   map(to: buffer(pos+1:pos+nj*ni)) map(from: fieldx(is:ie,js:je,k)) if(use_device_ptr)
 #endif
            do j = js, je
@@ -131,7 +141,8 @@ else
         do l=1,nvector  ! loop over number of fields
            ptr_fieldy = group%addrs_y(l)
 #ifdef __NVCOMPILER_OPENMP_GPU
-           !$omp target teams distribute parallel do collapse(2) private(idx) &
+           !$omp target teams distribute parallel do collapse(2) default(shared) &
+           !$omp   private(i,j,idx) shared(k,js,je,is,ie,pos,ni,ptr_fieldy,ptr) &
            !$omp   map(to: buffer(pos+1:pos+nj*ni)) map(from: fieldy(is:ie,js:je,k)) if(use_device_ptr)
 #endif
            do j = js, je
diff --git a/mpp/include/mpp_group_update.fh b/mpp/include/mpp_group_update.fh
index 4f4c668b7d..b8aaadabe3 100644
--- a/mpp/include/mpp_group_update.fh
+++ b/mpp/include/mpp_group_update.fh
@@ -503,7 +503,19 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type, omp_offload)
   call mpp_clock_begin(group_pack_clock)
   !pack the data
   buffer_start_pos = 0
+  ! below switch runs OpenMP offloaded packing if ompoffload is .true. and compiled with 
+  ! OpenMP offload support. Otherwise, run OpenMP CPU by undefining GPU macro if defined
+  if (use_device_ptr) then
 #include <group_update_pack.inc>
+  else
+#ifdef __NVCOMPILER_OPENMP_GPU
+#undef __NVCOMPILER_OPENMP_GPU
+#include <group_update_pack.inc>
+#define __NVCOMPILER_OPENMP_GPU
+#else
+#include <group_update_pack.inc>
+#endif
+  endif
   call mpp_clock_end(group_pack_clock)
 
   call mpp_clock_begin(group_send_clock)
@@ -526,10 +538,20 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type, omp_offload)
   !---unpack the buffer
   nunpack = group%nunpack
   call mpp_clock_begin(group_unpk_clock)
+  ! below switch runs OpenMP offloaded unpacking if ompoffload is .true. and compiled with 
+  ! OpenMP offload support. Otherwise, run OpenMP CPU by undefining GPU macro if defined
+  if (use_device_ptr) then
 #include <group_update_unpack.inc>
+  else
 #ifdef __NVCOMPILER_OPENMP_GPU
+#undef __NVCOMPILER_OPENMP_GPU
+#include <group_update_unpack.inc>
+#define __NVCOMPILER_OPENMP_GPU
   !$omp target exit data map(release: buffer) if(use_device_ptr)
+#else
+#include <group_update_unpack.inc>
 #endif
+  endif
   call mpp_clock_end(group_unpk_clock)
 
   ! ---northern boundary fold
@@ -738,7 +760,19 @@ subroutine MPP_START_GROUP_UPDATE_(group, domain, d_type, reuse_buffer)
   call mpp_clock_begin(nonblock_group_pack_clock)
   npack = group%npack
   buffer_start_pos = group%buffer_start_pos
+  ! below switch runs OpenMP offloaded packing if ompoffload is .true. and compiled with 
+  ! OpenMP offload support. Otherwise, run OpenMP CPU by undefining GPU macro if defined
+  if (use_device_ptr) then
 #include <group_update_pack.inc>
+  else
+#ifdef __NVCOMPILER_OPENMP_GPU
+#undef __NVCOMPILER_OPENMP_GPU
+#include <group_update_pack.inc>
+#define __NVCOMPILER_OPENMP_GPU
+#else
+#include <group_update_pack.inc>
+#endif
+  endif
   call mpp_clock_end(nonblock_group_pack_clock)
 
   call mpp_clock_begin(nonblock_group_send_clock)
@@ -807,7 +841,20 @@ subroutine MPP_COMPLETE_GROUP_UPDATE_(group, domain, d_type)
 
   call mpp_clock_begin(nonblock_group_unpk_clock)
   buffer_start_pos = group%buffer_start_pos
+  ! below switch runs OpenMP offloaded unpacking if ompoffload is .true. and compiled with 
+  ! OpenMP offload support. Otherwise, run OpenMP CPU by undefining GPU macro if defined
+  if (use_device_ptr) then
 #include <group_update_unpack.inc>
+  else
+#ifdef __NVCOMPILER_OPENMP_GPU
+#undef __NVCOMPILER_OPENMP_GPU
+#include <group_update_unpack.inc>
+#define __NVCOMPILER_OPENMP_GPU
+  !$omp target exit data map(release: buffer) if(use_device_ptr)
+#else
+#include <group_update_unpack.inc>
+#endif
+  endif
   call mpp_clock_end(nonblock_group_unpk_clock)
 
   ! ---northern boundary fold

From 41521155b8177650acbe605ef3f4d2e192e94e75 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Wed, 6 May 2026 12:33:16 -0600
Subject: [PATCH 15/45] Linting clean-up

Removed trailing whitespace, replaced tabs with spaces, and kept all lines at
121 lines or fewer
---
 constants/cesm_constants.fh     |  26 ++++---
 diag_manager/diag_manager.F90   |  19 +++--
 diag_manager/diag_util.F90      |   8 +-
 diag_manager/mppnccombine.c     | 128 ++++++++++++++++----------------
 fms2_io/netcdf_io.F90           |   4 +-
 mpp/include/mpp_group_update.fh |   8 +-
 6 files changed, 101 insertions(+), 92 deletions(-)

diff --git a/constants/cesm_constants.fh b/constants/cesm_constants.fh
index db2ba4bb6c..eee816ce88 100644
--- a/constants/cesm_constants.fh
+++ b/constants/cesm_constants.fh
@@ -27,7 +27,7 @@ character(len=18), public, parameter :: constants_version = 'FMSConstants: CESM'
 ! Constants below use CESM shr values
 
 real(R8),   public, parameter :: KAPPA  = RDGAS/CP_AIR             !< RDGAS / CP_AIR [dimensionless]
-real(R8),   public, parameter :: RHO0R   = 1.0_r8/RHO0        	   !< Reciprocal of average density of sea water [m^3/kg]
+real(R8),   public, parameter :: RHO0R   = 1.0_r8/RHO0             !< Reciprocal of average density of sea water [m^3/kg]
 real(R8),   public, parameter :: RHO_CP  = RHO0*CP_OCEAN           !< (kg/m^3)*(cal/kg/deg C)(joules/cal) =
                                                                    !<(joules/m^3/deg C) [J/m^3/deg]
 real(R8),   public, parameter :: ES0 = 1.0_r8                      !< Humidity factor. Controls the humidity content of the
@@ -49,15 +49,21 @@ real(R8),   public, parameter :: SECONDS_PER_HOUR   = 3600._r8     !< Seconds in
 real(R8),   public, parameter :: SECONDS_PER_MINUTE = 60._r8       !< Seconds in a minute [s]
 real(R8),   public, parameter :: RAD_TO_DEG         = 180._r8/PI   !< Degrees per radian [deg/rad]
 real(R8),   public, parameter :: DEG_TO_RAD         = PI/180._r8   !< Radians per degree [rad/deg]
-real(R8),   public, parameter :: RADIAN             = RAD_TO_DEG   !< Equal to RAD_TO_DEG for backward compatability. [rad/deg]
-real(R8),   public, parameter :: ALOGMIN            = -50.0_r8     !< Minimum value allowed as argument to log function [N/A]
-real(R8),   public, parameter :: EPSLN              = 1.0e-40_r8   !< A small number to prevent divide by zero exceptions [N/A]
-real(R8),   public, parameter :: RADCON = ((1.0E+02_r8*GRAV)/(1.0E+04_r8*CP_AIR))*SECONDS_PER_DAY !< convert flux divergence
-                                                                   !to heating rate in degrees per day [deg sec/(cm day)]
+real(R8),   public, parameter :: RADIAN             = RAD_TO_DEG   !< Equal to RAD_TO_DEG for backward compatability.
+                                                                   !! [rad/deg]
+real(R8),   public, parameter :: ALOGMIN            = -50.0_r8     !< Minimum value allowed as argument to log function
+                                                                   !! [N/A]
+real(R8),   public, parameter :: EPSLN              = 1.0e-40_r8   !< A small number to prevent divide by zero exceptions
+                                                                   !! [N/A]
+real(R8),   public, parameter :: RADCON = ((1.0E+02_r8*GRAV)/(1.0E+04_r8*CP_AIR))*SECONDS_PER_DAY !< convert flux
+                                                                   !! divergence to heating rate in degrees per day
+                                                                   !! [deg sec/(cm day)]
 real(R8),   public, parameter :: RADCON_MKS  = (GRAV/CP_AIR)*SECONDS_PER_DAY !< Factor used to convert flux divergence to
-                                                                   !< heating rate in degrees per day [deg sec/(m day)]
-real(R8),   public, parameter :: O2MIXRAT    = 2.0953E-01_r8       !< Mixing ratio of molecular oxygen in air [dimensionless]
-real(R8),   public, parameter :: C2DBARS     = 1.e-4_r8            !< rho*g*z(mks) to dbars: 1dbar = 10^4 (kg/m^3)(m/s^2)m [dbars]
+                                                                   !! heating rate in degrees per day [deg sec/(m day)]
+real(R8),   public, parameter :: O2MIXRAT    = 2.0953E-01_r8       !< Mixing ratio of molecular oxygen in air
+                                                                   !! [dimensionless]
+real(R8),   public, parameter :: C2DBARS     = 1.e-4_r8            !< rho*g*z(mks) to dbars: 1dbar = 10^4
+                                                                   !! (kg/m^3)(m/s^2)m [dbars]
 real(R8),   public, parameter :: KELVIN      = 273.15_r8           !< Degrees Kelvin at zero Celsius [K]
 
 #ifdef SMALL_EARTH
@@ -70,4 +76,4 @@ real(R8),   public, parameter :: KELVIN      = 273.15_r8           !< Degrees Ke
 #endif
 #else
  real, public, parameter :: small_fac = 1._r8
-#endif
\ No newline at end of file
+#endif
diff --git a/diag_manager/diag_manager.F90 b/diag_manager/diag_manager.F90
index d0d6e4a3ad..335e805d7b 100644
--- a/diag_manager/diag_manager.F90
+++ b/diag_manager/diag_manager.F90
@@ -239,7 +239,8 @@ MODULE diag_manager_mod
   USE fms_diag_outfield_mod, ONLY: fmsDiagOutfieldIndex_type, fmsDiagOutfield_type
   USE fms_diag_fieldbuff_update_mod, ONLY: fieldbuff_update, fieldbuff_copy_missvals, &
    & fieldbuff_copy_fieldvals
-  use netcdf_io_mod, ONLY: filepath_list_type, partitioned_global_files, partitioned_section_files, append_to_filepath_list
+  use netcdf_io_mod, ONLY: filepath_list_type, partitioned_global_files, partitioned_section_files, &
+   & append_to_filepath_list
 
 #ifdef use_netCDF
   USE netcdf, ONLY: NF90_INT, NF90_FLOAT, NF90_CHAR
@@ -376,7 +377,7 @@ MODULE diag_manager_mod
   END INTERFACE diag_field_add_attribute
 
 ! ----- interface to the C function -----
-interface 
+interface
   function exec_mppnccombine(outfile) bind(C)
     use, intrinsic :: iso_c_binding, only: c_int, c_char
     implicit none
@@ -3733,7 +3734,7 @@ subroutine combine_files()
     integer(c_int) :: smallest_pix ! The smallest IO PE index of the set of IO PEs writing the current section file.
     integer(c_int) :: ireturn      ! Return code from mppnccombine
     integer(c_int) :: niopes       ! Number of IO PEs participating in writing of global files
-    integer :: f                   ! File index for the global diagnostic files          
+    integer :: f                   ! File index for the global diagnostic files
     integer :: pix_order           ! 0-based order of the IO PE in the list of all IO PEs writing the file.
     type(filepath_list_type), pointer :: current
     type(filepath_list_type), pointer :: files_to_combine ! list of files to combined by this PE
@@ -3754,8 +3755,8 @@ subroutine combine_files()
       filepath = trim(adjustl(current%path))
       outfile = filepath(1:len(filepath)-5) // c_null_char
 
-      ! get the number of files to combine (for the first global file only). The number of files is the 
-      ! same for all global files. Similarly, get pix and pix_order for the first file only, since for 
+      ! get the number of files to combine (for the first global file only). The number of files is the
+      ! same for all global files. Similarly, get pix and pix_order for the first file only, since for
       ! all global files, the pix and pix_order are the same.
       if (niopes == 0) then
          niopes = num_partitioned_files(outfile)
@@ -3806,7 +3807,9 @@ subroutine combine_files()
        do while (associated(current))
           !write(stdout_unit,*) ' Combining file ' // filepath(1:len(filepath)-5)
           ireturn = exec_mppnccombine(current%path)
-          if (ireturn /= 0) call error_mesg('diag_manager_mod::combine_files', 'mppnccombine failed for file ' // trim(current%path), FATAL)
+          if (ireturn /= 0) &
+            call error_mesg('diag_manager_mod::combine_files', 'mppnccombine failed for file ' // &
+                trim(current%path), FATAL)
           current => current%next
        end do
     end if
@@ -3825,10 +3828,10 @@ function get_pix_order(filename, num_files, pix) result(pix_order)
       ! local
       integer :: pix_order       ! 0-based order of the pix in the list of all IO PEs writing the file
       character(len=4) :: suffix ! 0000, 0001, etc.
-      integer :: npes            ! total number of all PEs 
+      integer :: npes            ! total number of all PEs
       integer :: i, f
       logical :: exists
-   
+
       npes = mpp_npes()
       pix_order = -1
 
diff --git a/diag_manager/diag_util.F90 b/diag_manager/diag_util.F90
index e42c2d7639..1e3c6f7557 100644
--- a/diag_manager/diag_util.F90
+++ b/diag_manager/diag_util.F90
@@ -1726,8 +1726,8 @@ SUBROUTINE opening_file(file, time, filename_time)
     INTEGER, ALLOCATABLE  :: axesc(:) ! indices if compressed axes associated with the field
     LOGICAL :: time_ops, aux_present, match_aux_name, req_present, match_req_fields
     CHARACTER(len=7) :: avg_name = 'average'
-    CHARACTER(len=MAX_NAME_LENGTH) :: time_units, timeb_units, avg, error_string, filename, aux_name, req_fields, fieldname
-    CHARACTER(len=MAX_NAME_LENGTH) :: suffix, base_name
+    CHARACTER(len=MAX_NAME_LENGTH) :: time_units, timeb_units, avg, error_string, filename, aux_name, req_fields
+    CHARACTER(len=MAX_NAME_LENGTH) :: fieldname, suffix, base_name
     CHARACTER(len=32) :: time_name, timeb_name,time_longname, timeb_longname, cart_name
     CHARACTER(len=MAX_FILENAME_LENGTH) :: fname
     CHARACTER(len=24) :: start_date
@@ -2290,10 +2290,10 @@ integer function find_first_fms_percent(filename)
     integer :: i
     integer :: first_percent
     integer :: first_percent_loc
-    
+
     first_percent = INDEX(filename, '%1')
     do i=2,9
-       write(first_fms_percent,"('%',i1)") i 
+       write(first_fms_percent,"('%',i1)") i
        first_percent_loc = INDEX(filename, first_fms_percent)
        if (first_percent_loc > 0) then
           if (first_percent == 0) then
diff --git a/diag_manager/mppnccombine.c b/diag_manager/mppnccombine.c
index bf0546c673..4dfb1c7877 100644
--- a/diag_manager/mppnccombine.c
+++ b/diag_manager/mppnccombine.c
@@ -142,19 +142,19 @@
  #include <unistd.h>
  #include <glob.h>
  #include <limits.h>
- 
+
  #ifndef MAX_BF
  #define MAX_BF 100 /* maximum blocking factor */
  #endif
  #ifndef DEFAULT_BF /* default blocking factor, if none set */
  #define DEFAULT_BF 1
  #endif
- 
+
  /* Block size for NetCDF file open/reads */
  #ifndef NC_BLKSZ
  #define NC_BLKSZ 65536
  #endif
- 
+
  /* Information structure for a file */
  struct fileinfo
  {
@@ -190,20 +190,20 @@ int flush_decomp (struct fileinfo *, int, int, int, unsigned char);
  void print_debug (struct fileinfo *, unsigned char);
  char *nc_type_to_str (nc_type);
  int min (int, int);
- 
+
  static void ***varbuf
      = NULL; /* Buffers for multiple records of decomposed var */
- 
+
  struct rusage ruse; /* structure used to store values from getrusage() */
  static unsigned long maxrss = 0; /* maximum memory used so far in kilobytes */
  static int print_mem_usage = 0;
  static unsigned long mem_allocated = 0; /* memory allocated so far */
- 
+
  static const char version[] = "2024.02";
- 
+
  static unsigned long estimated_maxrss = 0; /* see option: -x */
  static int mem_dry_run = 0;                /* set if -x option is used */
- 
+
  static inline void
  check_mem_usage (void)
  {
@@ -233,7 +233,7 @@ int flush_decomp (struct fileinfo *, int, int, int, unsigned char);
    prev_rss = rss;
    return;
  }
- 
+
  static void
  print_estimated_mem_footprint (int verbose)
  {
@@ -586,7 +586,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
        if (!strcmp (strptr, ".0000"))
          outfilename[outlen - 5] = '\0';
      }
- 
+
    /* if -x (estimate memory usage) is set, k will be automatically set to 1 */
    if (mem_dry_run)
      {
@@ -598,10 +598,10 @@ char** find_partitioned_files(const char* filepath, int* count) {
          printf ("This run will estimate peak memory resident size. No output "
                  "file will be created.\n");
      }
- 
+
    /* Disable fatal returns from netCDF library functions */
    ncopts = 0;
- 
+
    if (!mem_dry_run)
      {
        /* Create a new netCDF output file */
@@ -642,7 +642,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
              }
          }
      }
- 
+
    /* No input files are specified on the command-line */
    if (inputarg == (-1))
      {
@@ -904,7 +904,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
                  varbuf[k][v] = NULL;
                }
        }
- 
+
    /* Cleanup and check for any input or output file errors */
    if (ncsync (ncoutfile->ncfid) == (-1))
      outfileerrors++;
@@ -973,7 +973,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
        return (1);
      }
  }
- 
+
  /* Print the usage message for mppnccombine */
  void
  usage ()
@@ -1071,7 +1071,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
            "value of 1\n");
    printf ("otherwise.\n");
  }
- 
+
  int
  min (int a, int b)
  {
@@ -1079,7 +1079,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
      return a;
    return b;
  }
- 
+
  /* Open an input file and get some information about it, define the   */
  /* structure of the output file if necessary, prepare to copy all the */
  /* variables for the current block to memory (and non-decomposed variables */
@@ -1103,10 +1103,10 @@ char** find_partitioned_files(const char* filepath, int* count) {
    char attname[MAX_NC_NAME];       /* Name of a global or variable attribute */
    unsigned char ncinfileerror = 0; /* Were there any file errors? */
    size_t blksz = NC_BLKSZ;         /* netCDF block size */
- 
+
    if (print_mem_usage)
      check_mem_usage ();
- 
+
    /* Information for netCDF input file */
    if ((ncinfile = (struct fileinfo *)malloc (sizeof (struct fileinfo)))
        == NULL)
@@ -1114,7 +1114,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
        fprintf (stderr, "Error: cannot allocate enough memory!\n");
        return (1);
      }
- 
+
    /* Open an input netCDF file */
    if ((ncinfile->ncfid = ncopen (ncname, NC_NOWRITE)) == (-1))
      {
@@ -1122,7 +1122,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
        free (ncinfile);
        return (1);
      }
- 
+
    /* Determine the number of files in the decomposed domain */
    if (ncattget (ncinfile->ncfid, NC_GLOBAL, "NumFilesInSet", (void *)&nfiles2)
        == (-1))
@@ -1141,7 +1141,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
          }
      }
    *nfiles = nfiles2;
- 
+
    /* Get some general information about the input netCDF file */
    if (ncinquire (ncinfile->ncfid, &(ncinfile->ndims), &(ncinfile->nvars),
                   &(ncinfile->ngatts), &(ncinfile->recdim))
@@ -1152,7 +1152,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
        free (ncinfile);
        return (1);
      }
- 
+
    /* Get some information about the dimensions */
    for (d = 0; d < ncinfile->ndims; d++)
      {
@@ -1170,14 +1170,14 @@ char** find_partitioned_files(const char* filepath, int* count) {
        ncinfile->dimstart[d] = 1;
        ncinfile->dimend[d] = (-1);
      }
- 
+
    /* Save some information for the output file */
    if ((block == 0) && (!mem_dry_run))
      {
        ncoutfile->nvars = ncinfile->nvars;
        ncoutfile->recdim = ncinfile->recdim;
      }
- 
+
    /* Get some information about the variables */
    for (v = 0; v < ncinfile->nvars; v++)
      {
@@ -1191,7 +1191,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
            free (ncinfile);
            return (1);
          }
- 
+
        /* If the variable is also a dimension then get decomposition info */
        if ((dimid = ncdimid (ncinfile->ncfid, ncinfile->varname[v])) != (-1))
          {
@@ -1213,14 +1213,14 @@ char** find_partitioned_files(const char* filepath, int* count) {
              }
          }
      }
- 
+
    /* Get some additional information about the variables */
    for (v = 0; v < ncinfile->nvars; v++)
      {
- 
+
        /* start by assuming the variable has no decomposed dimension */
        ncinfile->vardecomp[v] = 0;
- 
+
        /* now, iterate over the variable's dimensions and mark the */
        /* variable as a decomposed variable if any dimension of */
        /* the variable is decomposed */
@@ -1233,7 +1233,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
                break;
              }
          }
- 
+
        /* Save some information for the output file */
        /* This only needs to be done once per output file */
        if ((!appendnc) && (!mem_dry_run))
@@ -1248,13 +1248,13 @@ char** find_partitioned_files(const char* filepath, int* count) {
            ncoutfile->varmiss[v] = 0;
          }
      }
- 
+
    /* If the output netCDF file was just created then define its structure */
    if ((!appendnc) && (!mem_dry_run))
      {
        if (verbose)
          printf ("    Creating output \"%s\"\n", outncname);
- 
+
        /* Determine the format of the input netCDF file */
        if (nc_inq_format (ncinfile->ncfid, &ncinformat) == (-1))
          {
@@ -1263,7 +1263,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
            free (ncinfile);
            return (1);
          }
- 
+
        /* Determine the format of the output netCDF file */
        if (nc_inq_format (ncoutfile->ncfid, &ncoutformat) == (-1))
          {
@@ -1272,11 +1272,11 @@ char** find_partitioned_files(const char* filepath, int* count) {
            free (ncinfile);
            return (1);
          }
- 
+
        if (verbose)
          printf ("    ncinformat=%d, ncoutformat=%d\n", ncinformat,
                  ncoutformat);
- 
+
        /* If the format option (-64 or -n4) for the output netCDF file
         * is not specified then recreate the output netCDF file based
         * upon the format of the input netCDF file. */
@@ -1320,7 +1320,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
              }
            ncsetfill (ncoutfile->ncfid, NC_NOFILL);
          }
- 
+
        /* Define the dimensions */
        for (d = 0; d < ncinfile->ndims; d++)
          {
@@ -1330,7 +1330,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
              ncdimdef (ncoutfile->ncfid, ncinfile->dimname[d],
                        ncinfile->dimfullsize[d]);
          }
- 
+
        /* Define the variables and copy their attributes */
        for (v = 0; v < ncinfile->nvars; v++)
          {
@@ -1367,7 +1367,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
                  }
              }
          }
- 
+
        /* Copy the global attributes */
        for (n = 0; n < ncinfile->ngatts; n++)
          {
@@ -1390,7 +1390,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
                  }
              }
          }
- 
+
        if (deflate == 1 && deflation > 0)
          {
            for (v = 0; v < ncinfile->nvars; v++)
@@ -1405,11 +1405,11 @@ char** find_partitioned_files(const char* filepath, int* count) {
                  }
              }
          }
- 
+
        /* Definitions done */
        nc__enddef (ncoutfile->ncfid, headerpad, 4, 0, 4);
      }
- 
+
    /* Copy all data values of the dimensions and variables to memory */
    /* For non-decomposed variables, process_vars will write them to the */
    /* output file. Decomposed variables for N records from this file will */
@@ -1436,13 +1436,13 @@ char** find_partitioned_files(const char* filepath, int* count) {
        r < min (
            ((block + 1) * (*bf)),
            *nrecs)); // r is a minimum of the next block start point and nrecs
- 
+
    /* Done */
    ncclose (ncinfile->ncfid);
    free (ncinfile);
    return (ncinfileerror);
  }
- 
+
  /* Decomposed variables from an input file and record will be written to memory
   */
  /* non-decomposed variables will be written to the output file */
@@ -1468,17 +1468,17 @@ char** find_partitioned_files(const char* filepath, int* count) {
    int recdimsize; /* Using a local value to correct issue when netcdf file does
                       not have a record dimension */
    long long varbufsize;
- 
+
    if (ncinfile->recdim < 0)
      recdimsize = 1;
    else
      recdimsize = ncinfile->dimsize[ncinfile->recdim];
- 
+
    /* Check the number of records */
    if (*nrecs == 1)
      {
        *nrecs = recdimsize;
- 
+
        /* adjust bf */
        if ((*bf) >= 1)
          {
@@ -1504,7 +1504,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
          *nblocks = (int)((*nrecs) / (*bf)) + 1;
        else
          *nblocks = (int)((*nrecs) / (*bf));
- 
+
        if (verbose)
          fprintf (stderr,
                   "blocking factor=%d, num. blocks=%d, num. records=%d\n", *bf,
@@ -1517,7 +1517,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
            "Error: different number of records than the first input file!\n");
        return (1);
      }
- 
+
    /* Allocate memory for the decomposed variables, if none has been allocated
       yet We use an optimized algorithm to malloc and set up a double dimension
       array using a single malloc call. We do the cross-linking after the
@@ -1563,13 +1563,13 @@ char** find_partitioned_files(const char* filepath, int* count) {
          }
        */
      } /* end of memory allocation, done once per block */
- 
+
    /* Loop over all the variables */
    for (v = 0; v < ncinfile->nvars; v++)
      {
        if (verbose > 1)
          printf ("    variable = %s\n", ncinfile->varname[v]);
- 
+
        /* Get read/write dimension sizes for the variable */
        recsize = 1;
        recfullsize = 1;
@@ -1597,7 +1597,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
              printf ("      dim %d:  instart=%ld  outstart=%ld  count=%ld\n", d,
                      instart[d], outstart[d], count[d]);
          }
- 
+
        /* Prevent unnecessary reads/writes */
        if (r > 0)
          {
@@ -1618,11 +1618,11 @@ char** find_partitioned_files(const char* filepath, int* count) {
              {
                /* Prevent unnecessary reads/writes of non-decomposed variables
                if (ncinfile->vardecomp[v]!=1 && appendnc) continue; */
- 
+
                /* Non-record variables */
                if (varrecdim == (-1))
                  continue;
- 
+
                /* Non-decomposed record variables */
                if (ncinfile->vardecomp[v] != 1 && f > 0)
                  continue;
@@ -1633,7 +1633,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
            if (ncinfile->vardecomp[v] != 1 && appendnc)
              continue;
          }
- 
+
        /* Allocate a buffer for the variable's record */
        if ((values = malloc (nctypelen (ncinfile->datatype[v]) * recsize))
            == NULL)
@@ -1645,7 +1645,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
                     ncinfile->varname[v]);
            return (1);
          }
- 
+
        /* Read the variable */
        if (varrecdim != (-1))
          instart[varrecdim] = outstart[varrecdim] = r;
@@ -1655,7 +1655,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
                     ncinfile->varname[v]);
            return (1);
          }
- 
+
        /* Write the buffered variable immediately if it's not decomposed */
        if ((ncinfile->vardecomp[v] != 1) && (!mem_dry_run))
          {
@@ -1740,7 +1740,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
            if (verbose > 1)
              printf ("      writing %lld bytes to memory\n",
                      nctypelen (ncinfile->datatype[v]) * recsize);
- 
+
            imax = ncinfile
                       ->dimsize[ncinfile->vardim[v][ncinfile->varndims[v] - 1]];
            if (ncinfile->varndims[v] > 1)
@@ -1776,7 +1776,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
            if (verbose > 1)
              printf ("      imap=%d  jmax=%d  kmax=%d  lmax=%d\n", imax, jmax,
                      kmax, lmax);
- 
+
            imaxfull
                = ncinfile->dimfullsize[ncinfile->vardim[v][ncinfile->varndims[v]
                                                            - 1]];
@@ -1808,7 +1808,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
                      imaxfull, jmaxfull, kmaxfull, lmaxfull);
            imaxjmaxfull = imaxfull * jmaxfull;
            imaxjmaxkmaxfull = imaxfull * jmaxfull * kmaxfull;
- 
+
            ioffset = outstart[ncinfile->varndims[v] - 0 - 1];
            if (ncinfile->varndims[v] > 1)
              joffset = outstart[ncinfile->varndims[v] - 1 - 1];
@@ -1939,7 +1939,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
                break;
              }
          }
- 
+
        /* Deallocate the decomposed variable's buffer */
        if (values != NULL)
          free (values);
@@ -1947,7 +1947,7 @@ char** find_partitioned_files(const char* filepath, int* count) {
    first = 0;
    return (0);
  }
- 
+
  /* Write all the buffered decomposed variables to the output file */
  int
  flush_decomp (struct fileinfo *ncoutfile, int nfiles, int r, int bf,
@@ -1957,12 +1957,12 @@ char** find_partitioned_files(const char* filepath, int* count) {
    long outstart[MAX_NC_DIMS]; /* Data array sizes */
    long count[MAX_NC_DIMS];    /*        "         */
    int varrecdim;              /* Position of a variable's record dimension */
- 
+
    if (verbose > 1)
      {
        printf ("    nvars=%d\n", ncoutfile->nvars);
      }
- 
+
    /* Write out all the decomposed variables */
    for (v = 0; v < ncoutfile->nvars; v++)
      {
@@ -2002,4 +2002,4 @@ char** find_partitioned_files(const char* filepath, int* count) {
          }
      }
    return (0);
- }
\ No newline at end of file
+ }
diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90
index e3c120b8e8..bdfba1f843 100644
--- a/fms2_io/netcdf_io.F90
+++ b/fms2_io/netcdf_io.F90
@@ -247,7 +247,7 @@ module netcdf_io_mod
 public :: read_restart_bc
 public :: flush_file
 public :: partitioned_global_files
-public :: partitioned_section_files 
+public :: partitioned_section_files
 public :: append_to_filepath_list
 
 !> @ingroup netcdf_io_mod
@@ -703,7 +703,7 @@ subroutine append_to_filepath_list(filepath, filepath_list)
   else
     current => filepath_list
     do
-      ! If file is already in the list, return. This happens when new_file_freq (e.g., daily) 
+      ! If file is already in the list, return. This happens when new_file_freq (e.g., daily)
       ! is more frequent than the date suffix (e.g., %4yr-%2mo), leading to overriding the file.
       if (string_compare(current%path, trim(filepath))) return
       ! If we reach the end of the list, exit the loop to add the new file.
diff --git a/mpp/include/mpp_group_update.fh b/mpp/include/mpp_group_update.fh
index b8aaadabe3..5bbde5ce6a 100644
--- a/mpp/include/mpp_group_update.fh
+++ b/mpp/include/mpp_group_update.fh
@@ -503,7 +503,7 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type, omp_offload)
   call mpp_clock_begin(group_pack_clock)
   !pack the data
   buffer_start_pos = 0
-  ! below switch runs OpenMP offloaded packing if ompoffload is .true. and compiled with 
+  ! below switch runs OpenMP offloaded packing if ompoffload is .true. and compiled with
   ! OpenMP offload support. Otherwise, run OpenMP CPU by undefining GPU macro if defined
   if (use_device_ptr) then
 #include <group_update_pack.inc>
@@ -538,7 +538,7 @@ subroutine MPP_DO_GROUP_UPDATE_(group, domain, d_type, omp_offload)
   !---unpack the buffer
   nunpack = group%nunpack
   call mpp_clock_begin(group_unpk_clock)
-  ! below switch runs OpenMP offloaded unpacking if ompoffload is .true. and compiled with 
+  ! below switch runs OpenMP offloaded unpacking if ompoffload is .true. and compiled with
   ! OpenMP offload support. Otherwise, run OpenMP CPU by undefining GPU macro if defined
   if (use_device_ptr) then
 #include <group_update_unpack.inc>
@@ -760,7 +760,7 @@ subroutine MPP_START_GROUP_UPDATE_(group, domain, d_type, reuse_buffer)
   call mpp_clock_begin(nonblock_group_pack_clock)
   npack = group%npack
   buffer_start_pos = group%buffer_start_pos
-  ! below switch runs OpenMP offloaded packing if ompoffload is .true. and compiled with 
+  ! below switch runs OpenMP offloaded packing if ompoffload is .true. and compiled with
   ! OpenMP offload support. Otherwise, run OpenMP CPU by undefining GPU macro if defined
   if (use_device_ptr) then
 #include <group_update_pack.inc>
@@ -841,7 +841,7 @@ subroutine MPP_COMPLETE_GROUP_UPDATE_(group, domain, d_type)
 
   call mpp_clock_begin(nonblock_group_unpk_clock)
   buffer_start_pos = group%buffer_start_pos
-  ! below switch runs OpenMP offloaded unpacking if ompoffload is .true. and compiled with 
+  ! below switch runs OpenMP offloaded unpacking if ompoffload is .true. and compiled with
   ! OpenMP offload support. Otherwise, run OpenMP CPU by undefining GPU macro if defined
   if (use_device_ptr) then
 #include <group_update_unpack.inc>

From d26f787f40f77f0eecfe9dd3640a433bb417d599 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Wed, 6 May 2026 12:39:38 -0600
Subject: [PATCH 16/45] One more lint clean-up

Missed one of the long lines in cesm_constants.fh
---
 constants/cesm_constants.fh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/constants/cesm_constants.fh b/constants/cesm_constants.fh
index eee816ce88..ed8cb6f197 100644
--- a/constants/cesm_constants.fh
+++ b/constants/cesm_constants.fh
@@ -30,9 +30,10 @@ real(R8),   public, parameter :: KAPPA  = RDGAS/CP_AIR             !< RDGAS / CP
 real(R8),   public, parameter :: RHO0R   = 1.0_r8/RHO0             !< Reciprocal of average density of sea water [m^3/kg]
 real(R8),   public, parameter :: RHO_CP  = RHO0*CP_OCEAN           !< (kg/m^3)*(cal/kg/deg C)(joules/cal) =
                                                                    !<(joules/m^3/deg C) [J/m^3/deg]
-real(R8),   public, parameter :: ES0 = 1.0_r8                      !< Humidity factor. Controls the humidity content of the
-                                                                   !< atmosphere through the Saturation Vapour Pressure
-                                                                   !< expression when using DO_SIMPLE. [dimensionless]
+real(R8),   public, parameter :: ES0 = 1.0_r8                      !< Humidity factor. Controls the humidity content of
+                                                                   !! the atmosphere through the Saturation Vapour
+                                                                   !! Pressure expression when using DO_SIMPLE.
+                                                                   !! [dimensionless]
 real(R8),   public, parameter :: HLS = HLV + HLF                   !< Latent heat of sublimation [J/kg]
 real(R8),   public, parameter :: WTMOZONE =  47.99820_r8           !< Molecular weight of ozone [AMU]
 real(R8),   public, parameter :: WTMC     =  12.00000_r8           !< Molecular weight of carbon [AMU]

From 6b71b2d53eba9460aaa8713b006f95dce67adbf6 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Wed, 6 May 2026 12:55:39 -0600
Subject: [PATCH 17/45] One more linting commit

I thought I shortened all the long lines in cesm_constants.fh but my awk
command to find the line length was off-by-one
---
 constants/cesm_constants.fh | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/constants/cesm_constants.fh b/constants/cesm_constants.fh
index ed8cb6f197..2904b0e4f9 100644
--- a/constants/cesm_constants.fh
+++ b/constants/cesm_constants.fh
@@ -27,7 +27,8 @@ character(len=18), public, parameter :: constants_version = 'FMSConstants: CESM'
 ! Constants below use CESM shr values
 
 real(R8),   public, parameter :: KAPPA  = RDGAS/CP_AIR             !< RDGAS / CP_AIR [dimensionless]
-real(R8),   public, parameter :: RHO0R   = 1.0_r8/RHO0             !< Reciprocal of average density of sea water [m^3/kg]
+real(R8),   public, parameter :: RHO0R   = 1.0_r8/RHO0             !< Reciprocal of average density of sea water
+                                                                   !!  [m^3/kg]
 real(R8),   public, parameter :: RHO_CP  = RHO0*CP_OCEAN           !< (kg/m^3)*(cal/kg/deg C)(joules/cal) =
                                                                    !<(joules/m^3/deg C) [J/m^3/deg]
 real(R8),   public, parameter :: ES0 = 1.0_r8                      !< Humidity factor. Controls the humidity content of
@@ -54,13 +55,14 @@ real(R8),   public, parameter :: RADIAN             = RAD_TO_DEG   !< Equal to R
                                                                    !! [rad/deg]
 real(R8),   public, parameter :: ALOGMIN            = -50.0_r8     !< Minimum value allowed as argument to log function
                                                                    !! [N/A]
-real(R8),   public, parameter :: EPSLN              = 1.0e-40_r8   !< A small number to prevent divide by zero exceptions
-                                                                   !! [N/A]
+real(R8),   public, parameter :: EPSLN              = 1.0e-40_r8   !< A small number to prevent divide by zero
+                                                                   !! exceptions [N/A]
 real(R8),   public, parameter :: RADCON = ((1.0E+02_r8*GRAV)/(1.0E+04_r8*CP_AIR))*SECONDS_PER_DAY !< convert flux
                                                                    !! divergence to heating rate in degrees per day
                                                                    !! [deg sec/(cm day)]
-real(R8),   public, parameter :: RADCON_MKS  = (GRAV/CP_AIR)*SECONDS_PER_DAY !< Factor used to convert flux divergence to
-                                                                   !! heating rate in degrees per day [deg sec/(m day)]
+real(R8),   public, parameter :: RADCON_MKS  = (GRAV/CP_AIR)*SECONDS_PER_DAY !< Factor used to convert flux divergence
+                                                                   !! to heating rate in degrees per day
+                                                                   !! [deg sec/(m day)]
 real(R8),   public, parameter :: O2MIXRAT    = 2.0953E-01_r8       !< Mixing ratio of molecular oxygen in air
                                                                    !! [dimensionless]
 real(R8),   public, parameter :: C2DBARS     = 1.e-4_r8            !< rho*g*z(mks) to dbars: 1dbar = 10^4

From 1d377f23289cbf0fc18e83c773f217982e94467f Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Fri, 8 May 2026 14:04:17 -0600
Subject: [PATCH 18/45] Update github actions

Use a container with gcc 15.1.0
---
 .github/workflows/build_cmake_gnu.yml  |  2 +-
 .github/workflows/build_ubuntu_gnu.yml | 27 +++++++++++++-------------
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/build_cmake_gnu.yml b/.github/workflows/build_cmake_gnu.yml
index f649345d8a..a86329faa3 100644
--- a/.github/workflows/build_cmake_gnu.yml
+++ b/.github/workflows/build_cmake_gnu.yml
@@ -10,7 +10,7 @@ jobs:
         omp-flags: [ -DOPENMP=on, -DOPENMP=off ]
         libyaml-flag: [ "", -DWITH_YAML=on ]
     container:
-      image: noaagfdl/hpc-me.ubuntu-minimal:cmake
+      image: ghcr.io/noaa-gfdl/fms/fms-ci-rocky-gnu:15.1.0
       env:
         CMAKE_FLAGS: "${{ matrix.omp-flags }} ${{ matrix.libyaml-flag }} -D64BIT=on"
     steps:
diff --git a/.github/workflows/build_ubuntu_gnu.yml b/.github/workflows/build_ubuntu_gnu.yml
index f4dc48225f..b5a866c5dc 100644
--- a/.github/workflows/build_ubuntu_gnu.yml
+++ b/.github/workflows/build_ubuntu_gnu.yml
@@ -10,27 +10,26 @@ jobs:
         shell: bash
     strategy:
       matrix:
-        conf-flags: [--disable-openmp, --enable-mixed-mode, --disable-setting-flags, --with-mpi=no]
-        input-flag:  [--with-yaml, --enable-test-input=/home/unit_tests_input]
+        conf-flag: [ --disable-openmp, --with-mpi=no, --disable-r8-default]
+        input-flag:  [--enable-test-input=/home/unit_tests_input]
+        exclude:
+          - conf-flag: --with-mpi=no
+            input-flag: --enable-test-input=/home/unit_tests_input
     container:
-      image: noaagfdl/hpc-me.ubuntu-minimal:gnu-input
+      image: ghcr.io/noaa-gfdl/fms/fms-ci-rocky-gnu:15.1.0
       env:
-        TEST_VERBOSE: 1
-        DISTCHECK_CONFIGURE_FLAGS: "${{ matrix.conf-flags }} ${{ matrix.input-flag }}"
+        DISTCHECK_CONFIGURE_FLAGS: "${{ matrix.conf-flag }} ${{ matrix.input-flag }}"
+        DEBUG_FLAGS: "-O0 -g -fbounds-check -ffpe-trap=invalid,zero,overflow"
     steps:
     - name: Checkout code
-      uses: actions/checkout@v2
+      uses: actions/checkout@v6.0.1
     - name: Prepare GNU autoconf for build
       run: autoreconf -if
     - name: Configure the build
-      if: ${{ matrix.conf-flags != '--disable-setting-flags' }}
-      run: ./configure ${DISTCHECK_CONFIGURE_FLAGS} || cat config.log
-    - name: Configure the build with compiler flags
-      if: ${{ matrix.conf-flags == '--disable-setting-flags' }}
-      run: ./configure ${DISTCHECK_CONFIGURE_FLAGS} FCFLAGS="-fdefault-real-8 -fdefault-double-8 -fcray-pointer -ffree-line-length-none -I/usr/include $FCFLAGS" || cat config.log
-    - name: Build the library
+      run: ./configure ${DISTCHECK_CONFIGURE_FLAGS} FCFLAGS="$FCFLAGS $DEBUG_FLAGS" || cat config.log
+    - name: Run distcheck
       run: make distcheck
-      if: ${{ matrix.conf-flags != '--with-mpi=no' }}
+      if: ${{ matrix.conf-flag != '--with-mpi=no' }}
     - name: Build the library (without test suite for serial build)
       run: make
-      if: ${{ matrix.conf-flags == '--with-mpi=no' }}
+      if: ${{ matrix.conf-flag == '--with-mpi=no' }}

From 695a88b422843cd7c107705ff226b12b87557224 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Fri, 8 May 2026 15:56:41 -0600
Subject: [PATCH 19/45] Turn off autoconf CI testing

Also, don't define CESM_CONSTANTS macro (will let us drop CESM share code from
the turbo-stack submodules, and is bit-for-bit)
---
 .github/workflows/build_ubuntu_gnu.yml | 35 --------------------------
 constants/fmsconstants.F90             |  1 -
 2 files changed, 36 deletions(-)
 delete mode 100644 .github/workflows/build_ubuntu_gnu.yml

diff --git a/.github/workflows/build_ubuntu_gnu.yml b/.github/workflows/build_ubuntu_gnu.yml
deleted file mode 100644
index b5a866c5dc..0000000000
--- a/.github/workflows/build_ubuntu_gnu.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-name: Build libFMS test with autotools
-
-on: [push, pull_request]
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        conf-flag: [ --disable-openmp, --with-mpi=no, --disable-r8-default]
-        input-flag:  [--enable-test-input=/home/unit_tests_input]
-        exclude:
-          - conf-flag: --with-mpi=no
-            input-flag: --enable-test-input=/home/unit_tests_input
-    container:
-      image: ghcr.io/noaa-gfdl/fms/fms-ci-rocky-gnu:15.1.0
-      env:
-        DISTCHECK_CONFIGURE_FLAGS: "${{ matrix.conf-flag }} ${{ matrix.input-flag }}"
-        DEBUG_FLAGS: "-O0 -g -fbounds-check -ffpe-trap=invalid,zero,overflow"
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v6.0.1
-    - name: Prepare GNU autoconf for build
-      run: autoreconf -if
-    - name: Configure the build
-      run: ./configure ${DISTCHECK_CONFIGURE_FLAGS} FCFLAGS="$FCFLAGS $DEBUG_FLAGS" || cat config.log
-    - name: Run distcheck
-      run: make distcheck
-      if: ${{ matrix.conf-flag != '--with-mpi=no' }}
-    - name: Build the library (without test suite for serial build)
-      run: make
-      if: ${{ matrix.conf-flag == '--with-mpi=no' }}
diff --git a/constants/fmsconstants.F90 b/constants/fmsconstants.F90
index c167a9ffd9..c28b82de01 100644
--- a/constants/fmsconstants.F90
+++ b/constants/fmsconstants.F90
@@ -55,7 +55,6 @@ module FMSconstants
 
   use platform_mod, only: r4_kind, r8_kind
 
-#define CESM_CONSTANTS
 #if defined(CESM_CONSTANTS)
 use shr_kind_mod,   only : R8 => shr_kind_r8
 use shr_const_mod,  only : &

From a440407498e1f615c1be6ccbf6b8c4208d0ca2e4 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Fri, 8 May 2026 16:02:15 -0600
Subject: [PATCH 20/45] Update containers for intel and coupler CI tests

---
 .github/workflows/coupler.yml  | 13 ++++++-------
 .github/workflows/intel_pr.yml | 16 +++++++---------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/coupler.yml b/.github/workflows/coupler.yml
index ea24899b95..1dc2e8d647 100644
--- a/.github/workflows/coupler.yml
+++ b/.github/workflows/coupler.yml
@@ -5,21 +5,20 @@ jobs:
   coupler-build:
     runs-on: ubuntu-latest
     container:
-      image: ryanmulhall/hpc-me.ubuntu-minimal:coupler
+      image: ghcr.io/noaa-gfdl/fms/fms-ci-rocky-gnu:15.1.0
       env:
         CC: mpicc
         FC: mpif90
-        CPPFLAGS: '-I/usr/include -Duse_LARGEFILE -DMAXFIELDMETHODS_=500'
-        FCFLAGS: '-fcray-pointer -fdefault-double-8 -fdefault-real-8 -Waliasing -ffree-line-length-none -fno-range-check -I/usr/include'
-        LDFLAGS: '-L/usr/lib'
-        VERBOSE: 1
+        CPPFLAGS: '-I/opt/view/include'
+        FFLAGS: '-fallow-argument-mismatch' # mkmf uses FFLAGS instead of FC
+        LDFLAGS: '-L/opt/view/lib'
     steps:
     - name: Checkout FMS
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4.2.2
       with:
         path: FMS
     - name: Checkout FMScoupler
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4.2.2
       with:
         repository: 'NOAA-GFDL/FMScoupler'
         path: FMScoupler
diff --git a/.github/workflows/intel_pr.yml b/.github/workflows/intel_pr.yml
index d95519fbf2..e609c03044 100644
--- a/.github/workflows/intel_pr.yml
+++ b/.github/workflows/intel_pr.yml
@@ -3,17 +3,17 @@ jobs:
   intel-autotools:
     runs-on: ubuntu-latest
     container:
-      image: intel/oneapi-hpckit:2022.2-devel-ubuntu20.04
+      image: intel/oneapi-hpckit:2025.2.2-0-devel-ubuntu24.04
       env:
-        CC: mpiicc
-        FC: mpiifort
+        CC: mpicc
+        FC: mpiifx
         CFLAGS:  "-I/libs/include"
         FCFLAGS: "-I/libs/include -g -traceback"
         LDFLAGS: "-L/libs/lib"
         TEST_VERBOSE: 1
         I_MPI_FABRICS: "shm" # needed for mpi in image
         # intel bug causes some failures with shm option(required in container)
-        SKIP_TESTS: "test_mpp_update_domains.1 test_update_domains_performance.1 test_diag_manager2.23"
+        SKIP_TESTS: "test_fms2_io.1 test_mpp_update_domains.1 test_update_domains_performance.1 test_diag_manager2.23"
     steps:
     - name: Cache dependencies
       id: cache
@@ -22,7 +22,7 @@ jobs:
         path: /libs
         key: ${{ runner.os }}-intel-libs
     - name: Install packages for building
-      run: apt update && apt install -y autoconf libtool automake zlibc zlib1g-dev
+      run: apt update && apt install -y autoconf libtool automake zlib1g-dev
     - if: steps.cache.outputs.cache-hit != 'true'
       name: Build netcdf
       run: |
@@ -33,20 +33,18 @@ jobs:
         make -j install && cd ..
         wget https://github.com/Unidata/netcdf-c/archive/refs/tags/v4.8.1.tar.gz
         tar xf v4.8.1.tar.gz && cd netcdf-c-4.8.1
-        ./configure --prefix=/libs --enable-remote-fortran-bootstrap
+        ./configure --prefix=/libs --enable-remote-fortran-bootstrap --disable-libxml2
         make -j install
         # sets this here to pass embeded configure checks
         export LD_LIBRARY_PATH="/libs/lib:$LD_LIBRARY_PATH"
         make -j -k build-netcdf-fortran
         make -j install-netcdf-fortran
-        wget https://github.com/yaml/libyaml/releases/download/0.2.5/yaml-0.2.5.tar.gz
-        tar xf yaml-0.2.5.tar.gz && cd yaml-0.2.5
         ./configure --prefix=/libs
         make -j install && cd
     - name: checkout
       uses: actions/checkout@v2
     - name: Configure
-      run: autoreconf -if ./configure.ac && ./configure --with-yaml
+      run: autoreconf -if ./configure.ac && ./configure
     - name: Compile
       run: make -j || make
     - name: Run test suite

From 207a4658c91f9047ee4c36da34b9b9cf25d4d773 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Fri, 8 May 2026 16:11:12 -0600
Subject: [PATCH 21/45] Drop FMS coupler CI test

I think our version of FMS is much older than what the FMS coupler is
expecting, so tests are failing... but we don't want to update our branch to
include newer FMS code so we should drop the test instead
---
 .github/workflows/coupler.yml | 26 --------------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 .github/workflows/coupler.yml

diff --git a/.github/workflows/coupler.yml b/.github/workflows/coupler.yml
deleted file mode 100644
index 1dc2e8d647..0000000000
--- a/.github/workflows/coupler.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-name: Test coupler build
-on: [pull_request]
-
-jobs:
-  coupler-build:
-    runs-on: ubuntu-latest
-    container:
-      image: ghcr.io/noaa-gfdl/fms/fms-ci-rocky-gnu:15.1.0
-      env:
-        CC: mpicc
-        FC: mpif90
-        CPPFLAGS: '-I/opt/view/include'
-        FFLAGS: '-fallow-argument-mismatch' # mkmf uses FFLAGS instead of FC
-        LDFLAGS: '-L/opt/view/lib'
-    steps:
-    - name: Checkout FMS
-      uses: actions/checkout@v4.2.2
-      with:
-        path: FMS
-    - name: Checkout FMScoupler
-      uses: actions/checkout@v4.2.2
-      with:
-        repository: 'NOAA-GFDL/FMScoupler'
-        path: FMScoupler
-    - name: Test Null build
-      run: FMScoupler/t/null_model_build.sh --local-fms

From f88a0955c1dd07185dadd5f673d2fbb3c266bb46 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 09:22:00 -0600
Subject: [PATCH 22/45] Drop YAML from configure.ac

---
 configure.ac | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/configure.ac b/configure.ac
index 61675e71f7..6cf92e14f2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -66,12 +66,6 @@ AC_ARG_WITH([mpi],
 AS_IF([test ${with_mpi:-yes} = yes],
   [with_mpi=yes],
   [with_mpi=no])
-AC_ARG_WITH([yaml],
-  [AS_HELP_STRING([--with-yaml],
-    [Build with YAML support.  This option will be ignored if --disable-fortran-flag-setting is also given. (Default no)])])
-AS_IF([test ${with_yaml:-no} = no],
-  [with_yaml=no],
-  [with_yaml=yes])
 AC_ARG_ENABLE([setting-flags],
   [AS_HELP_STRING([--enable-setting-flags],
     [Allow configure to set some compiler flags.  Disabling this will also disable any other --with or --enable options that set flags, and will only use user-provided falgs. (Default yes)])])
@@ -181,19 +175,6 @@ if test $with_mpi = yes; then
   GX_MPI()
 fi
 
-# Require yaml
-if test $with_yaml = yes; then
-  AC_CHECK_HEADERS([yaml.h], [], [AC_MSG_ERROR(["Can't find the libYAML C header file.  Set CC/CPPFLAGS/CFLAGS"])])
-  AC_SEARCH_LIBS([yaml_parser_initialize], [yaml], [], [AC_MSG_ERROR(["Can't find the libYAML C library.  Set CC/LDFLAGS/LIBS"])])
-
-  #If the test pass, define use_yaml macro
-  AC_DEFINE([use_yaml], [1], [This is required to use yaml parser])
-
-  AM_CONDITIONAL([SKIP_PARSER_TESTS], false )
-else
-  AM_CONDITIONAL([SKIP_PARSER_TESTS], true )
-fi
-
 # Require netCDF
 AC_CHECK_HEADERS([netcdf.h], [], [AC_MSG_ERROR([Can't find the netCDF C header file.  Set CPPFLAGS/CFLAGS])])
 AC_SEARCH_LIBS([nc_create], [netcdf], [], [AC_MSG_ERROR([Can't find the netCDF C library.  Set LDFLAGS/LIBS])])
@@ -335,10 +316,6 @@ if test $enable_setting_flags = yes; then
     FCFLAGS="$FCFLAGS -prof-gen=srcpos"
     CFLAGS="$CFLAGS -prof-gen=srcpos"
   fi
-  # add yaml flag
-  if test $with_yaml = yes; then
-    LDFLAGS="$LDFLAGS -lyaml"
-  fi
 fi
 
 # Find which mpi launcher to use

From b01892162e38023003b3450f5f2802d305fa0941 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 09:32:14 -0600
Subject: [PATCH 23/45] Drop SKIP_PARSER_TESTS from makefiles

I'm trying to make FMS configure look more like TIM configure, and some I
removed some stuff from configure.ac that is now breaking Makefiles
---
 test_fms/data_override/Makefile.am | 4 ----
 test_fms/parser/Makefile.am        | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/test_fms/data_override/Makefile.am b/test_fms/data_override/Makefile.am
index df990995f0..a622ceb5e7 100644
--- a/test_fms/data_override/Makefile.am
+++ b/test_fms/data_override/Makefile.am
@@ -36,11 +36,7 @@ test_data_override_SOURCES = test_data_override.F90
 test_data_override_ongrid_SOURCES = test_data_override_ongrid.F90
 test_get_grid_v1_SOURCES = test_get_grid_v1.F90
 
-if SKIP_PARSER_TESTS
-skipflag="skip"
-else
 skipflag=""
-endif
 
 TEST_EXTENSIONS = .sh
 SH_LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) \
diff --git a/test_fms/parser/Makefile.am b/test_fms/parser/Makefile.am
index 05fbcd737c..e941af705e 100644
--- a/test_fms/parser/Makefile.am
+++ b/test_fms/parser/Makefile.am
@@ -49,11 +49,7 @@ TEST_EXTENSIONS = .sh
 SH_LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) \
                   $(abs_top_srcdir)/test_fms/tap-driver.sh
 
-if SKIP_PARSER_TESTS
-skipflag="skip"
-else
 skipflag=""
-endif
 
 TESTS_ENVIRONMENT = parser_skip=${skipflag}
 # Clean up

From 67cafb5a6ac89e78d343046061703d2c697225d0 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 10:01:19 -0600
Subject: [PATCH 24/45] Add mppnccombine.c to diag_manager makefile

This c file is included in the TIM version of Makefile.ac but not the FMS2
version, and may be responsible for failing CI build
---
 diag_manager/Makefile.am | 1 +
 1 file changed, 1 insertion(+)

diff --git a/diag_manager/Makefile.am b/diag_manager/Makefile.am
index 37759e838f..8c19b996d7 100644
--- a/diag_manager/Makefile.am
+++ b/diag_manager/Makefile.am
@@ -33,6 +33,7 @@ noinst_LTLIBRARIES = libdiag_manager.la
 libdiag_manager_la_SOURCES = \
   diag_axis.F90 \
   diag_data.F90 \
+  mppnccombine.c \
   diag_grid.F90 \
   diag_manager.F90 \
   diag_output.F90 \

From 9369bf04d2386aef13c1ca50b270230b3a5a893f Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 10:18:59 -0600
Subject: [PATCH 25/45] Remove bad test

test_gather2DV does not exist in TIM or in more recent FMS commits, and it has
some code that doesn't compile... I suspect that's related to why the test was
removed in subsequent PRs
---
 test_fms/mpp/test_mpp_gatscat.F90 | 118 ------------------------------
 1 file changed, 118 deletions(-)

diff --git a/test_fms/mpp/test_mpp_gatscat.F90 b/test_fms/mpp/test_mpp_gatscat.F90
index 47ff6cf81c..3ebacf61d9 100644
--- a/test_fms/mpp/test_mpp_gatscat.F90
+++ b/test_fms/mpp/test_mpp_gatscat.F90
@@ -779,122 +779,4 @@ subroutine test_gatherV(npes,pe,root,out_unit)
      deallocate(sdata,rdata,ref)
   end subroutine test_gatherV
 
-  !> @brief  Test the 2D vector mpp_gather routine.
-  !> @todo   This is a legacy routine which does not work in all conditions. For the gcc version,
-  !> the use of cray pointers is suspect to causing a crash at the call to mpp_gather.
-subroutine test_gather2DV(npes,pe,root,out_unit)
-  implicit none
-     integer, intent(in) :: npes,pe,root,out_unit
-
-     integer :: pelist(npes),rsize(npes)
-     integer :: pelist2(npes),rsize2(npes)
-     integer :: i,j,k,l,nz,ssize,nelems
-     real,allocatable,dimension(:,:) :: data, cdata, sbuff,rbuff
-     real,allocatable :: ref(:,:)
-     integer, parameter :: KSIZE=10
-
-     real :: sbuff1D(size(sbuff))
-     real :: rbuff1D(size(rbuff))
-     pointer(sptr,sbuff1D); pointer(rptr,rbuff1D)
-
-
-     if(npes < 3)then
-       call mpp_error(FATAL, "Test_gather2DV: minimum of 3 ranks required. Not testing gather; too few ranks.")
-     elseif(npes > 9999)then
-       call mpp_error(FATAL, "Test_gather2DV: maximum of 9999 ranks supported. Not testing gather2DV; too many ranks.")
-       return
-     endif
-     write(out_unit,*)
-
-     ssize = pe+1
-     allocate(data(ssize,KSIZE))
-     do k=1,KSIZE; do i=1,ssize
-       data(i,k) = 10000.0*k + pe + 0.0001*i
-     enddo; enddo
-     do i=1,npes
-       pelist(i) = i-1
-       rsize(i) = i
-     enddo
-
-     nz = KSIZE
-     nelems = sum(rsize(:))
-
-     allocate(rbuff(nz,nelems)); rbuff = -1.0
-     allocate(ref(nelems,nz),cdata(nelems,nz))
-     ref = 0.0; cdata = 0.0
-     if(pe == root)then
-       do k=1,KSIZE
-       l=1
-       do j=1,npes
-         do i=1,rsize(j)
-            ref(l,k) = 10000.0*k + pelist(j) + 0.0001*i
-            l = l+1
-       enddo; enddo;enddo
-     endif
-     allocate(sbuff(nz,ssize))
-     ! this matrix inversion makes for easy gather to the IO root
-     ! and a clear, concise unpack
-     do j=1,ssize
-       do i=1,nz
-         sbuff(i,j) = data(j,i)
-     enddo; enddo
-
-  !  Note that the gatherV implied here is asymmetric; only root needs to know the vector of recv size
-     sptr = LOC(sbuff); rptr = LOC(rbuff)
-     call mpp_gather(sbuff1D,size(sbuff),rbuff1D,nz*rsize(:))
-
-     if(pe == root)then
-        do j=1,nz
-           do i=1,nelems
-             cdata(i,j) = rbuff(j,i)
-        enddo; enddo
-        do j=1,nz
-           do i=1,nelems
-            if(cdata(i,j) /= ref(i,j))then
-               write(6,*) "Gathered data ",cdata(i,j), " NE reference ",ref(i,j), "at i,j=",i,j
-               call mpp_error(FATAL, "Test gather2DV global pelist failed")
-            endif
-       enddo;enddo
-     endif
-
-     call mpp_sync()
-     write(out_unit,*) "Test gather2DV with global pelist successful"
-
-     do i=1,npes
-       pelist2(i) = pelist(npes-i+1)
-       rsize2(i) = rsize(npes-i+1)
-     enddo
-
-     rbuff = -1.0
-     ref = 0.0; cdata = 0.0
-     if(pe == pelist2(1))then
-       do k=1,KSIZE
-       l=1
-       do j=1,npes
-         do i=1,rsize2(j)
-            ref(l,k) = 10000.0*k + pelist2(j) + 0.0001*i
-            l = l+1
-       enddo; enddo;enddo
-     endif
-
-     call mpp_gather(sbuff1D,size(sbuff),rbuff1D,nz*rsize2(:),pelist2)
-
-     if(pe == pelist2(1))then
-        do j=1,nz
-           do i=1,nelems
-             cdata(i,j) = rbuff(j,i)
-        enddo; enddo
-        do j=1,nz
-           do i=1,nelems
-            if(cdata(i,j) /= ref(i,j))then
-               write(6,*) "Gathered data ",cdata(i,j), " NE reference ",ref(i,j), "at i,j=",i,j
-               call mpp_error(FATAL, "Test gather2DV with reversed pelist failed")
-            endif
-       enddo;enddo
-     endif
-     call mpp_sync()
-     write(out_unit,*) "Test gather2DV with reversed pelist successful"
-     deallocate(data,sbuff,rbuff,cdata,ref)
-  end subroutine test_gather2DV
-
 end program test_mpp_gatscat

From 9a76ecebffb7862038ad399602bd96742d0a216f Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 10:36:47 -0600
Subject: [PATCH 26/45] Increase filename size

test_simple_domain.nc does not fit into character(len=20); increased to len=25
just to have some breathing room
---
 test_fms/fms2_io/test_domain_io.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_fms/fms2_io/test_domain_io.F90 b/test_fms/fms2_io/test_domain_io.F90
index 07a3e2845a..8b170f3819 100644
--- a/test_fms/fms2_io/test_domain_io.F90
+++ b/test_fms/fms2_io/test_domain_io.F90
@@ -46,7 +46,7 @@ program test_domain_read
   integer                               :: xhalo = 3           !< Number of halo points in X
   integer                               :: yhalo = 2           !< Number of halo points in Y
   integer                               :: nz = 2              !< Number of points in the z dimension
-  character(len=20)                     :: filename="test.nc"  !< Name of the file
+  character(len=25)                     :: filename="test.nc"  !< Name of the file
   logical                               :: use_edges=.false.   !< Use North and East domain positions
 
   integer                               :: ndim4               !< Number of points in dim4

From e68d0034516ca6af8e99cef114473bde59c9ef76 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 10:40:35 -0600
Subject: [PATCH 27/45] Revert "Drop SKIP_PARSER_TESTS from makefiles"

This reverts commit b01892162e38023003b3450f5f2802d305fa0941.
---
 test_fms/data_override/Makefile.am | 4 ++++
 test_fms/parser/Makefile.am        | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/test_fms/data_override/Makefile.am b/test_fms/data_override/Makefile.am
index a622ceb5e7..df990995f0 100644
--- a/test_fms/data_override/Makefile.am
+++ b/test_fms/data_override/Makefile.am
@@ -36,7 +36,11 @@ test_data_override_SOURCES = test_data_override.F90
 test_data_override_ongrid_SOURCES = test_data_override_ongrid.F90
 test_get_grid_v1_SOURCES = test_get_grid_v1.F90
 
+if SKIP_PARSER_TESTS
+skipflag="skip"
+else
 skipflag=""
+endif
 
 TEST_EXTENSIONS = .sh
 SH_LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) \
diff --git a/test_fms/parser/Makefile.am b/test_fms/parser/Makefile.am
index e941af705e..05fbcd737c 100644
--- a/test_fms/parser/Makefile.am
+++ b/test_fms/parser/Makefile.am
@@ -49,7 +49,11 @@ TEST_EXTENSIONS = .sh
 SH_LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) \
                   $(abs_top_srcdir)/test_fms/tap-driver.sh
 
+if SKIP_PARSER_TESTS
+skipflag="skip"
+else
 skipflag=""
+endif
 
 TESTS_ENVIRONMENT = parser_skip=${skipflag}
 # Clean up

From 5284362d371df19afb5da96e9d605801ac2d556f Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 10:40:54 -0600
Subject: [PATCH 28/45] Revert "Drop YAML from configure.ac"

This reverts commit f88a0955c1dd07185dadd5f673d2fbb3c266bb46.
---
 configure.ac | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/configure.ac b/configure.ac
index 6cf92e14f2..61675e71f7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -66,6 +66,12 @@ AC_ARG_WITH([mpi],
 AS_IF([test ${with_mpi:-yes} = yes],
   [with_mpi=yes],
   [with_mpi=no])
+AC_ARG_WITH([yaml],
+  [AS_HELP_STRING([--with-yaml],
+    [Build with YAML support.  This option will be ignored if --disable-fortran-flag-setting is also given. (Default no)])])
+AS_IF([test ${with_yaml:-no} = no],
+  [with_yaml=no],
+  [with_yaml=yes])
 AC_ARG_ENABLE([setting-flags],
   [AS_HELP_STRING([--enable-setting-flags],
     [Allow configure to set some compiler flags.  Disabling this will also disable any other --with or --enable options that set flags, and will only use user-provided falgs. (Default yes)])])
@@ -175,6 +181,19 @@ if test $with_mpi = yes; then
   GX_MPI()
 fi
 
+# Require yaml
+if test $with_yaml = yes; then
+  AC_CHECK_HEADERS([yaml.h], [], [AC_MSG_ERROR(["Can't find the libYAML C header file.  Set CC/CPPFLAGS/CFLAGS"])])
+  AC_SEARCH_LIBS([yaml_parser_initialize], [yaml], [], [AC_MSG_ERROR(["Can't find the libYAML C library.  Set CC/LDFLAGS/LIBS"])])
+
+  #If the test pass, define use_yaml macro
+  AC_DEFINE([use_yaml], [1], [This is required to use yaml parser])
+
+  AM_CONDITIONAL([SKIP_PARSER_TESTS], false )
+else
+  AM_CONDITIONAL([SKIP_PARSER_TESTS], true )
+fi
+
 # Require netCDF
 AC_CHECK_HEADERS([netcdf.h], [], [AC_MSG_ERROR([Can't find the netCDF C header file.  Set CPPFLAGS/CFLAGS])])
 AC_SEARCH_LIBS([nc_create], [netcdf], [], [AC_MSG_ERROR([Can't find the netCDF C library.  Set LDFLAGS/LIBS])])
@@ -316,6 +335,10 @@ if test $enable_setting_flags = yes; then
     FCFLAGS="$FCFLAGS -prof-gen=srcpos"
     CFLAGS="$CFLAGS -prof-gen=srcpos"
   fi
+  # add yaml flag
+  if test $with_yaml = yes; then
+    LDFLAGS="$LDFLAGS -lyaml"
+  fi
 fi
 
 # Find which mpi launcher to use

From 5fb51e27b1015e42c8a69350cc1a2c9851dd9962 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 11:02:01 -0600
Subject: [PATCH 29/45] Autoconf tests might work with gnu again

I needed to clean up the autoconf build system to get a couple of tests to pass
with intel, maybe this will fix whatever issue was happening with gnu 15 as
well? If not, I'll revert this commit and call the PR good
---
 .github/workflows/build_ubuntu_gnu.yml | 36 ++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 .github/workflows/build_ubuntu_gnu.yml

diff --git a/.github/workflows/build_ubuntu_gnu.yml b/.github/workflows/build_ubuntu_gnu.yml
new file mode 100644
index 0000000000..419bb0b52e
--- /dev/null
+++ b/.github/workflows/build_ubuntu_gnu.yml
@@ -0,0 +1,36 @@
+name: Build libFMS test with autotools
+
+on: [push, pull_request]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        conf-flags: [--disable-openmp, --enable-mixed-mode, --disable-setting-flags, --with-mpi=no]
+        input-flag:  [--with-yaml, --enable-test-input=/home/unit_tests_input]
+    container:
+      image: ghcr.io/noaa-gfdl/fms/fms-ci-rocky-gnu:15.1.0
+      env:
+        TEST_VERBOSE: 1
+        DISTCHECK_CONFIGURE_FLAGS: "${{ matrix.conf-flags }} ${{ matrix.input-flag }}"
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+    - name: Prepare GNU autoconf for build
+      run: autoreconf -if
+    - name: Configure the build
+      if: ${{ matrix.conf-flags != '--disable-setting-flags' }}
+      run: ./configure ${DISTCHECK_CONFIGURE_FLAGS} || cat config.log
+    - name: Configure the build with compiler flags
+      if: ${{ matrix.conf-flags == '--disable-setting-flags' }}
+      run: ./configure ${DISTCHECK_CONFIGURE_FLAGS} FCFLAGS="-fdefault-real-8 -fdefault-double-8 -fcray-pointer -ffree-line-length-none -I/usr/include $FCFLAGS" || cat config.log
+    - name: Build the library
+      run: make distcheck
+      if: ${{ matrix.conf-flags != '--with-mpi=no' }}
+    - name: Build the library (without test suite for serial build)
+      run: make
+      if: ${{ matrix.conf-flags == '--with-mpi=no' }}

From 79e7cbc1de54911f84b4f8644d1ab7a923c4e909 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 11:08:53 -0600
Subject: [PATCH 30/45] Add exclude flags to ubuntu tests

---
 .github/workflows/build_ubuntu_gnu.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/build_ubuntu_gnu.yml b/.github/workflows/build_ubuntu_gnu.yml
index 419bb0b52e..fb00742122 100644
--- a/.github/workflows/build_ubuntu_gnu.yml
+++ b/.github/workflows/build_ubuntu_gnu.yml
@@ -12,6 +12,9 @@ jobs:
       matrix:
         conf-flags: [--disable-openmp, --enable-mixed-mode, --disable-setting-flags, --with-mpi=no]
         input-flag:  [--with-yaml, --enable-test-input=/home/unit_tests_input]
+        exclude:
+          - conf-flags: --with-mpi=no
+            input-flag: --enable-test-input=/home/unit_tests_input
     container:
       image: ghcr.io/noaa-gfdl/fms/fms-ci-rocky-gnu:15.1.0
       env:

From f8696f51bd6bb1f8bf08a7ce97baafedf9aaaff5 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 11:11:41 -0600
Subject: [PATCH 31/45] More CI cleanup

Try replacing build_ubuntu_gnu.yml with github_autotools_gnu.yml from GFDL main
branch
---
 .github/workflows/build_ubuntu_gnu.yml | 41 +++++++++++++++-----------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/build_ubuntu_gnu.yml b/.github/workflows/build_ubuntu_gnu.yml
index fb00742122..ba7fc8e8f1 100644
--- a/.github/workflows/build_ubuntu_gnu.yml
+++ b/.github/workflows/build_ubuntu_gnu.yml
@@ -5,35 +5,40 @@ on: [push, pull_request]
 jobs:
   build:
     runs-on: ubuntu-latest
-    defaults:
-      run:
-        shell: bash
     strategy:
       matrix:
-        conf-flags: [--disable-openmp, --enable-mixed-mode, --disable-setting-flags, --with-mpi=no]
-        input-flag:  [--with-yaml, --enable-test-input=/home/unit_tests_input]
+        conf-flag: [ --disable-openmp, --with-mpi=no, --disable-r8-default]
+        input-flag: [--with-yaml, --enable-test-input=/home/unit_tests_input]
         exclude:
-          - conf-flags: --with-mpi=no
+          - conf-flag: --with-mpi=no
             input-flag: --enable-test-input=/home/unit_tests_input
     container:
       image: ghcr.io/noaa-gfdl/fms/fms-ci-rocky-gnu:15.1.0
       env:
-        TEST_VERBOSE: 1
-        DISTCHECK_CONFIGURE_FLAGS: "${{ matrix.conf-flags }} ${{ matrix.input-flag }}"
+        DISTCHECK_CONFIGURE_FLAGS: "${{ matrix.conf-flag }} ${{ matrix.input-flag }}"
+        DEBUG_FLAGS: "-O0 -g -fbounds-check -ffpe-trap=invalid,zero,overflow" # debug compiler flags taken from the mkmf template
+        # diag manager openmp + logical mask tests fail with gcc, these are reproducible outside the CI
+        # test_mpp_clock_begin_end_id is an expected fail that is passing, only happens in the CI
+        # test_time_interp2 tests fail from file issues, only happens in the CI
+        SKIP_TESTS: "test_time_none.10 test_time_sum.10 test_time_avg.10 test_time_min.10 test_time_max.10 test_time_pow.10 test_time_rms.10 test_mpp_clock_begin_end_id.10 test_time_interp2.7 test_time_interp2.8"
     steps:
     - name: Checkout code
-      uses: actions/checkout@v2
+      uses: actions/checkout@v6.0.1
     - name: Prepare GNU autoconf for build
       run: autoreconf -if
     - name: Configure the build
-      if: ${{ matrix.conf-flags != '--disable-setting-flags' }}
-      run: ./configure ${DISTCHECK_CONFIGURE_FLAGS} || cat config.log
-    - name: Configure the build with compiler flags
-      if: ${{ matrix.conf-flags == '--disable-setting-flags' }}
-      run: ./configure ${DISTCHECK_CONFIGURE_FLAGS} FCFLAGS="-fdefault-real-8 -fdefault-double-8 -fcray-pointer -ffree-line-length-none -I/usr/include $FCFLAGS" || cat config.log
-    - name: Build the library
-      run: make distcheck
-      if: ${{ matrix.conf-flags != '--with-mpi=no' }}
+      run: ./configure ${DISTCHECK_CONFIGURE_FLAGS} FCFLAGS="$FCFLAGS $DEBUG_FLAGS" || cat config.log
+    - name: Run distcheck (compiles, tests, and packages)
+      run: make distcheck 2>&1 > distcheck.log
+      if: ${{ matrix.conf-flag != '--with-mpi=no' }}
+    - name: Output errors on failure
+      run: grep -E "^FAIL:|^XPASS:" distcheck.log
+      if: failure()
+    - name: Upload log on failure
+      uses: actions/upload-artifact@v7.0.0
+      if: failure()
+      with:
+        path: distcheck.log
     - name: Build the library (without test suite for serial build)
       run: make
-      if: ${{ matrix.conf-flags == '--with-mpi=no' }}
+      if: ${{ matrix.conf-flag == '--with-mpi=no' }}

From b9bfbf36247d064caef08b6e4de45def72b7bc6a Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 11:20:39 -0600
Subject: [PATCH 32/45] CI should report error with configure

The "Configure the build" step contained the command

./configure ${DISTCHECK_CONFIGURE_FLAGS} FCFLAGS="$FCFLAGS $DEBUG_FLAGS" || cat config.log

And the "||" meant this line would always return status 0 (either from
configure or from the cat command). This hides the true cause of the failure
(not finding netcdf)
---
 .github/workflows/build_ubuntu_gnu.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_ubuntu_gnu.yml b/.github/workflows/build_ubuntu_gnu.yml
index ba7fc8e8f1..54da07a208 100644
--- a/.github/workflows/build_ubuntu_gnu.yml
+++ b/.github/workflows/build_ubuntu_gnu.yml
@@ -27,7 +27,7 @@ jobs:
     - name: Prepare GNU autoconf for build
       run: autoreconf -if
     - name: Configure the build
-      run: ./configure ${DISTCHECK_CONFIGURE_FLAGS} FCFLAGS="$FCFLAGS $DEBUG_FLAGS" || cat config.log
+      run: ./configure ${DISTCHECK_CONFIGURE_FLAGS} FCFLAGS="$FCFLAGS $DEBUG_FLAGS" || (cat config.log && exit 1)
     - name: Run distcheck (compiles, tests, and packages)
       run: make distcheck 2>&1 > distcheck.log
       if: ${{ matrix.conf-flag != '--with-mpi=no' }}

From 2a87ebba1b77e0b52e6bc98bc1b5a1ab3bafab20 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 11:27:13 -0600
Subject: [PATCH 33/45] Bring in new configure.ac

Maybe the reason configure is failing is something that changed in this file?
Grasping at straws...
---
 configure.ac | 181 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 136 insertions(+), 45 deletions(-)

diff --git a/configure.ac b/configure.ac
index 61675e71f7..1aaf5c8f9b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,20 +1,19 @@
 #***********************************************************************
-#*                   GNU Lesser General Public License
+#*                             Apache License 2.0
 #*
 #* This file is part of the GFDL Flexible Modeling System (FMS).
 #*
-#* FMS is free software: you can redistribute it and/or modify it under
-#* the terms of the GNU Lesser General Public License as published by
-#* the Free Software Foundation, either version 3 of the License, or (at
-#* your option) any later version.
+#* Licensed under the Apache License, Version 2.0 (the "License");
+#* you may not use this file except in compliance with the License.
+#* You may obtain a copy of the License at
 #*
-#* FMS is distributed in the hope that it will be useful, but WITHOUT
-#* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-#* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#* for more details.
+#*     http://www.apache.org/licenses/LICENSE-2.0
 #*
-#* You should have received a copy of the GNU Lesser General Public
-#* License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
+#* FMS is distributed in the hope that it will be useful, but WITHOUT
+#* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied;
+#* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+#* PARTICULAR PURPOSE. See the License for the specific language
+#* governing permissions and limitations under the License.
 #***********************************************************************
 
 # This is the main configure file for the FMS package.
@@ -25,7 +24,7 @@ AC_PREREQ([2.69])
 
 # Initialize with name, version, and support email address.
 AC_INIT([GFDL FMS Library],
-  [2023.01.01],
+  [2026.01.01-dev],
   [gfdl.climate.model.info@noaa.gov],
   [FMS],
   [https://www.github.com/NOAA-GFDL/FMS])
@@ -54,12 +53,6 @@ AS_IF([test x${CRAYPE_VERSION:+yes} = "xyes"],[
 
 
 # Process user optons.
-AC_ARG_ENABLE([mixed-mode],
-  [AS_HELP_STRING([--enable-mixed-mode],
-    [Build using mixed mode.  Enables both 64-bit and 32-bit reals in Fortran. This option will be ignored if --disable-fortran-flag-setting is also given.])])
-AS_IF([test ${enable_mixed_mode:-no} = no],
-  [enable_mixed_mode=no],
-  [enable_mixed_mode=yes])
 AC_ARG_WITH([mpi],
   [AS_HELP_STRING([--with-mpi],
     [Build with MPI support.  This option will be ignored if --disable-fortran-flag-setting is also given. (Default yes)])])
@@ -85,12 +78,6 @@ AS_IF([test ${enable_code_coverage:-no} = no],
   [enable_code_coverage=no],
   [enable_code_coverage=yes])
 # individual mixed precision overload macros
-AC_ARG_ENABLE([overload-r4],
-  [AS_HELP_STRING([--enable-overload-r4],
-    [Enables the OVERLOAD_R4 macro to compile with 4 byte real routine overloads. (Default no)])])
-AS_IF([test ${enable_overload_r4:-no} = yes],
-  [enable_overload_r4=yes],
-  [enable_overload_r4=no])
 AC_ARG_ENABLE([overload-c4],
   [AS_HELP_STRING([--enable-overload-c4],
     [Enables the OVERLOAD_C4 macro to compile with 4 byte complex routine overloads. (Default no)])])
@@ -110,6 +97,20 @@ AS_IF([test ${enable_8byte_int:-no} = yes],
   [enable_8byte_int=yes],
   [enable_8byte_int=no])
 
+AC_ARG_ENABLE([r8-default],
+  [AS_HELP_STRING([--disable-r8-default],
+    [Disables the build from adding the 8 byte default real kind flag during compilation (default no)])])
+AS_IF([test ${enable_r8_default:-yes} = yes],
+  [enable_r8_default=yes],
+  [enable_r8_default=no])
+
+AC_ARG_ENABLE([portable-kinds],
+  [AS_HELP_STRING([--enable-portable-kinds],
+    [Enables compilation with -DPORTABLE_KINDS with iso_c_binding KIND type parameters])])
+AS_IF([test ${enable_portable_kinds:-no} = yes],
+  [enable_portable_kinds=yes],
+  [enable_portable_kinds=no])
+
 # user enabled testing with input files
 AC_MSG_CHECKING([whether to enable tests with input files])
 AC_ARG_ENABLE([test-input],
@@ -189,9 +190,9 @@ if test $with_yaml = yes; then
   #If the test pass, define use_yaml macro
   AC_DEFINE([use_yaml], [1], [This is required to use yaml parser])
 
-  AM_CONDITIONAL([SKIP_PARSER_TESTS], false )
+  AM_CONDITIONAL([USING_YAML], true)
 else
-  AM_CONDITIONAL([SKIP_PARSER_TESTS], true )
+  AM_CONDITIONAL([USING_YAML], false)
 fi
 
 # Require netCDF
@@ -203,7 +204,6 @@ AC_MSG_CHECKING([if netCDF was built with HDF5])
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [[
 #include <netcdf_meta.h>
 #if !(NC_HAS_NC4)
-      choke me
 #endif]])], [nc_has_nc4=yes], [nc_has_nc4=no])
 AC_MSG_RESULT([$nc_has_nc4])
 if test $nc_has_nc4 = no; then
@@ -226,6 +226,21 @@ AC_CHECK_FUNCS([sched_getaffinity], [], [])
 
 # Check if the compiler needs special OpenMP flags
 AC_OPENMP()
+
+# Check if building with OpenACC support.  Default is 'no'
+AC_ARG_ENABLE([openacc],
+  [AS_HELP_STRING([--enable-openacc],
+    [Builds with support for OpenACC directives. (default no)])])
+AS_IF([test ${enable_openacc:-no} = yes],
+  [enable_openacc=yes],
+  [enable_openacc=no])
+
+# If openacc support enabled, test for correct flags
+AS_IF([test ${enable_openacc} = yes],
+      [ACX_LANG_OPENACC_FLAG(
+				[AS_VAR_APPEND([CFLAGS], [" $acx_cv_c_openacc_flag"])], []
+			 )])
+
 AC_LANG_POP(C)
 
 # Fortran specific checks
@@ -258,6 +273,28 @@ GX_FC_CHECK_MOD([netcdf], [], [], [AC_MSG_ERROR([Can't find the netCDF Fortran m
 GX_FORTRAN_SEARCH_LIBS([nf90_create], [netcdff], [use netcdf], [iret = nf90_create('foo.nc', 1, ncid)], [],
   [AC_MSG_ERROR([Can't find the netCDF Fortran library.  Set LDFLAGS/LIBS])])
 
+# Check if we get a floating point exception with netcdf
+# this will only get triggered if you have FPE traps enabled via FCFLAGS
+AC_MSG_CHECKING([if HDF5 version causes floating point exceptions with set flags])
+AC_RUN_IFELSE([AC_LANG_PROGRAM([], [[
+      use netcdf
+      integer i, j
+      j = nf90_create("test.nc", NC_WRITE, i)
+]])], [hdf5_fpe_bug=no], [hdf5_fpe_bug=yes])
+AC_MSG_RESULT([$hdf5_fpe_bug])
+if test $hdf5_fpe_bug = yes; then
+  AC_MSG_ERROR([The HDF5 version used to build netcdf is incompatible with the set FCFLAGS. dnl
+NetCDF must be built with a HDF5 version other than 1.14.3 to support floating point exception traps.])
+fi
+
+rm -f test.nc
+
+# Check if we need a flag to not capitalize module output (needed with cray compiler)
+GX_FC_MOD_CASE_FLAG([FCFLAGS="$FCFLAGS $FC_MOD_CASE_FLAG"])
+
+# Check if new unit is supported
+GX_FC_08_OPEN_NEWUNIT([], [AC_MSG_ERROR([Compiler support for use of newunit argument is required to build FMS])])
+
 # Check if Fortran compiler has the Class, Character array assign bug
 GX_FC_CLASS_CHAR_ARRAY_BUG_CHECK()
 
@@ -267,20 +304,32 @@ GX_FC_CRAY_POINTER_FLAG()
 # Check if Fortran compiler and system have quad precision support
 GX_FC_QUAD_PRECISION()
 
-# Check if Fortran compiler supports reading namelists from internal files
-GX_FC_INTERNAL_FILE_NML()
-
 # Check if the compiler needs special OpenMP flags
 AC_OPENMP()
 
+# need to set preprocessor for openacc check below
+AC_FC_PP_SRCEXT([F90])
+
+# Check for openacc fortran flags
+AS_IF([test ${enable_openacc} = yes],
+      [ACX_LANG_OPENACC_FLAG(
+				[AS_VAR_APPEND([FCFLAGS], [" $acx_cv_fc_openacc_flag"])], []
+			 )])
+
 AC_LANG_POP(Fortran)
 
 # We passed all the tests.  Set the required defines.
-AC_DEFINE([use_netCDF], [1], [This is required for the library to build])
 if test $with_mpi = yes; then
   AC_DEFINE([use_libMPI], [1], [This is required for the library to build])
 fi
 
+# Builds with r8 default unless disable flag is given
+if test $enable_r8_default = yes; then
+  AM_CONDITIONAL([SKIP_MOSAIC_TESTS], false)
+else
+  AM_CONDITIONAL([SKIP_MOSAIC_TESTS], true)
+fi
+
 # Set any required compile flags.  This will not be done if the user wants to
 # define all their own flags.
 if test $enable_setting_flags = yes; then
@@ -292,20 +341,13 @@ if test $enable_setting_flags = yes; then
   # necessary fortran flags.
   AC_FC_LINE_LENGTH([unlimited])
 
-  # Will we build with default 64-bit reals in Fortran, or do mixed mode?
-  if test $enable_mixed_mode = yes; then
-    GX_FC_DEFAULT_REAL_KIND4_FLAG([dnl
-      FCFLAGS="$FCFLAGS $FC_DEFAULT_REAL_KIND8_FLAG"])
-    AC_DEFINE([OVERLOAD_R4], [1], [Set to overload the R4 Fortran routines])
-    AC_DEFINE([OVERLOAD_R8], [1], [Set to overload the R8 Fortran routines])
-  else
+  # Builds with r8 default unless disable flag is given
+  if test $enable_r8_default = yes; then
     GX_FC_DEFAULT_REAL_KIND8_FLAG([dnl
       FCFLAGS="$FCFLAGS $FC_DEFAULT_REAL_KIND8_FLAG"])
   fi
+
   # individual mixed precision overloads
-  if test $enable_overload_r4 = yes; then
-    AC_DEFINE([OVERLOAD_R4], [1], [Set to overload with the R4 Fortran routines])
-  fi
   if test $enable_overload_c4 = yes; then
     AC_DEFINE([OVERLOAD_C4], [1], [Set to overload with the C4 Fortran routines])
   fi
@@ -316,6 +358,11 @@ if test $enable_setting_flags = yes; then
     AC_DEFINE([no_8byte_integers], [1], [Set to disable 8 byte integer Fortran routines])
   fi
 
+  # Builds with C data types
+  if test $enable_portable_kinds = yes; then
+    AC_DEFINE([PORTABLE_KINDS], [1], [Set to define KIND parameters to iso_c_binding KIND parameters])
+  fi
+
   # Add Cray Pointer support flag
   if test ! -z "$FC_CRAY_POINTER_FLAG"; then
     FCFLAGS="$FCFLAGS $FC_CRAY_POINTER_FLAG"
@@ -345,9 +392,12 @@ fi
 AC_CHECK_PROGS([MPI_LAUNCHER],[srun aprun mpirun])
 
 # Check if the launcher can oversubscribe the MPI processes
-AS_IF([$MPI_LAUNCHER --oversubscribe hostname >/dev/null 2>&1], \
+AS_IF([$MPI_LAUNCHER --oversubscribe nproc >/dev/null 2>&1], \
       [ AC_SUBST([OVERSUBSCRIBE], [--oversubscribe])])
 
+# newer openmpi versions take a different flag
+AS_IF([$MPI_LAUNCHER --map-by :OVERSUBSCRIBE nproc >/dev/null 2>&1], \
+      [ AC_SUBST([OVERSUBSCRIBE], [--map-by :OVERSUBSCRIBE])])
 
 # Compiler with version information. This consists of the full path
 # name of the compiler and the reported version number.
@@ -399,6 +449,32 @@ else
   AC_MSG_RESULT([no])
 fi
 
+# Check if intel is 2025.1 for ICE
+AC_MSG_CHECKING([if using Intel 2025.1])
+if [ test -n "`$FC --version | grep ifx | grep 2025\.1\..*`" ]; then
+  AC_MSG_RESULT([yes])
+  AC_MSG_ERROR([Compilation with Oneapi 2025.1 is unsupported \
+by this version of FMS due to a bug in the compiler. Please use a different version of oneapi.])
+else
+  AC_MSG_RESULT([no])
+fi
+
+AC_MSG_CHECKING([if netCDF was built with HDF5 parallel I/O features])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [[
+#include <netcdf_meta.h>
+#if !defined(NC_HAS_PARALLEL4) || NC_HAS_PARALLEL4 == 0
+      choke me
+#endif]])], [nc_has_parallel4=yes], [nc_has_parallel4=no])
+AC_MSG_RESULT([$nc_has_parallel4])
+
+if test $nc_has_parallel4 = no; then
+  AC_MSG_WARN(netCDF was not build with HDF5 parallel I/O features, so collective netcdf io is not allowed)
+  AC_DEFINE([NO_NC_PARALLEL4], [1], [HDF5 parallel I/O features not allowed])
+  AM_CONDITIONAL([SKIP_PARALLEL_TEST], true)
+else
+  AM_CONDITIONAL([SKIP_PARALLEL_TEST], false)
+fi
+
 #####
 # Create output variables from various
 # shell variables, for use in generating
@@ -418,6 +494,10 @@ AC_SUBST([NOUNDEFINED])
 # Used in Makefiles.
 AC_SUBST([MODDIR],[\$\(top_builddir\)/.mods])
 
+# Define CMake variables used in test-lib.sh.in as empty strings
+AC_SUBST([CMAKE_CURRENT_SOURCE_DIR], [])
+AC_SUBST([USING_CMAKE],[])
+
 # These files will be created when the configure script is run.
 AC_CONFIG_FILES([
   Makefile
@@ -434,7 +514,7 @@ AC_CONFIG_FILES([
   tridiagonal/Makefile
   tracer_manager/Makefile
   topography/Makefile
-  mosaic/Makefile
+  grid_utils/Makefile
   mosaic2/Makefile
   monin_obukhov/Makefile
   memutils/Makefile
@@ -453,6 +533,7 @@ AC_CONFIG_FILES([
   diag_integral/Makefile
   sat_vapor_pres/Makefile
   random_numbers/Makefile
+  offloading/Makefile
   libFMS/Makefile
   docs/Makefile
   parser/Makefile
@@ -460,6 +541,8 @@ AC_CONFIG_FILES([
   test_fms/test-lib.sh
   test_fms/intel_coverage.sh
   test_fms/Makefile
+  test_fms/common/Makefile
+  test_fms/astronomy/Makefile
   test_fms/diag_manager/Makefile
   test_fms/data_override/Makefile
   test_fms/exchange/Makefile
@@ -469,17 +552,25 @@ AC_CONFIG_FILES([
   test_fms/fms2_io/Makefile
   test_fms/fms/Makefile
   test_fms/mpp/Makefile
-  test_fms/mpp_io/Makefile
   test_fms/time_interp/Makefile
   test_fms/time_manager/Makefile
   test_fms/horiz_interp/Makefile
   test_fms/field_manager/Makefile
   test_fms/axis_utils/Makefile
-  test_fms/mosaic/Makefile
+  test_fms/mosaic2/Makefile
   test_fms/affinity/Makefile
   test_fms/coupler/Makefile
   test_fms/parser/Makefile
   test_fms/string_utils/Makefile
+  test_fms/tridiagonal/Makefile
+  test_fms/sat_vapor_pres/Makefile
+  test_fms/diag_integral/Makefile
+  test_fms/tracer_manager/Makefile
+  test_fms/random_numbers/Makefile
+  test_fms/topography/Makefile
+  test_fms/column_diagnostics/Makefile
+  test_fms/offloading/Makefile
+  test_fms/block_control/Makefile
   FMS.pc
   ])
 

From 2ec746f3648b3808087ad13eb6fade9a9b08b34e Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 11:42:28 -0600
Subject: [PATCH 34/45] Take two with configure.ac

Went back to dev/turbo configure.ac file, then grabbed a few specific changes
from main that looked appropriate
---
 configure.ac | 142 ++++++++++++++-------------------------------------
 1 file changed, 39 insertions(+), 103 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1aaf5c8f9b..3217017bf7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,19 +1,20 @@
 #***********************************************************************
-#*                             Apache License 2.0
+#*                   GNU Lesser General Public License
 #*
 #* This file is part of the GFDL Flexible Modeling System (FMS).
 #*
-#* Licensed under the Apache License, Version 2.0 (the "License");
-#* you may not use this file except in compliance with the License.
-#* You may obtain a copy of the License at
-#*
-#*     http://www.apache.org/licenses/LICENSE-2.0
+#* FMS is free software: you can redistribute it and/or modify it under
+#* the terms of the GNU Lesser General Public License as published by
+#* the Free Software Foundation, either version 3 of the License, or (at
+#* your option) any later version.
 #*
 #* FMS is distributed in the hope that it will be useful, but WITHOUT
-#* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied;
-#* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-#* PARTICULAR PURPOSE. See the License for the specific language
-#* governing permissions and limitations under the License.
+#* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+#* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+#* for more details.
+#*
+#* You should have received a copy of the GNU Lesser General Public
+#* License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
 #***********************************************************************
 
 # This is the main configure file for the FMS package.
@@ -24,7 +25,7 @@ AC_PREREQ([2.69])
 
 # Initialize with name, version, and support email address.
 AC_INIT([GFDL FMS Library],
-  [2026.01.01-dev],
+  [2023.01.01],
   [gfdl.climate.model.info@noaa.gov],
   [FMS],
   [https://www.github.com/NOAA-GFDL/FMS])
@@ -53,6 +54,12 @@ AS_IF([test x${CRAYPE_VERSION:+yes} = "xyes"],[
 
 
 # Process user optons.
+AC_ARG_ENABLE([mixed-mode],
+  [AS_HELP_STRING([--enable-mixed-mode],
+    [Build using mixed mode.  Enables both 64-bit and 32-bit reals in Fortran. This option will be ignored if --disable-fortran-flag-setting is also given.])])
+AS_IF([test ${enable_mixed_mode:-no} = no],
+  [enable_mixed_mode=no],
+  [enable_mixed_mode=yes])
 AC_ARG_WITH([mpi],
   [AS_HELP_STRING([--with-mpi],
     [Build with MPI support.  This option will be ignored if --disable-fortran-flag-setting is also given. (Default yes)])])
@@ -78,6 +85,12 @@ AS_IF([test ${enable_code_coverage:-no} = no],
   [enable_code_coverage=no],
   [enable_code_coverage=yes])
 # individual mixed precision overload macros
+AC_ARG_ENABLE([overload-r4],
+  [AS_HELP_STRING([--enable-overload-r4],
+    [Enables the OVERLOAD_R4 macro to compile with 4 byte real routine overloads. (Default no)])])
+AS_IF([test ${enable_overload_r4:-no} = yes],
+  [enable_overload_r4=yes],
+  [enable_overload_r4=no])
 AC_ARG_ENABLE([overload-c4],
   [AS_HELP_STRING([--enable-overload-c4],
     [Enables the OVERLOAD_C4 macro to compile with 4 byte complex routine overloads. (Default no)])])
@@ -97,20 +110,6 @@ AS_IF([test ${enable_8byte_int:-no} = yes],
   [enable_8byte_int=yes],
   [enable_8byte_int=no])
 
-AC_ARG_ENABLE([r8-default],
-  [AS_HELP_STRING([--disable-r8-default],
-    [Disables the build from adding the 8 byte default real kind flag during compilation (default no)])])
-AS_IF([test ${enable_r8_default:-yes} = yes],
-  [enable_r8_default=yes],
-  [enable_r8_default=no])
-
-AC_ARG_ENABLE([portable-kinds],
-  [AS_HELP_STRING([--enable-portable-kinds],
-    [Enables compilation with -DPORTABLE_KINDS with iso_c_binding KIND type parameters])])
-AS_IF([test ${enable_portable_kinds:-no} = yes],
-  [enable_portable_kinds=yes],
-  [enable_portable_kinds=no])
-
 # user enabled testing with input files
 AC_MSG_CHECKING([whether to enable tests with input files])
 AC_ARG_ENABLE([test-input],
@@ -190,9 +189,9 @@ if test $with_yaml = yes; then
   #If the test pass, define use_yaml macro
   AC_DEFINE([use_yaml], [1], [This is required to use yaml parser])
 
-  AM_CONDITIONAL([USING_YAML], true)
+  AM_CONDITIONAL([SKIP_PARSER_TESTS], false )
 else
-  AM_CONDITIONAL([USING_YAML], false)
+  AM_CONDITIONAL([SKIP_PARSER_TESTS], true )
 fi
 
 # Require netCDF
@@ -226,21 +225,6 @@ AC_CHECK_FUNCS([sched_getaffinity], [], [])
 
 # Check if the compiler needs special OpenMP flags
 AC_OPENMP()
-
-# Check if building with OpenACC support.  Default is 'no'
-AC_ARG_ENABLE([openacc],
-  [AS_HELP_STRING([--enable-openacc],
-    [Builds with support for OpenACC directives. (default no)])])
-AS_IF([test ${enable_openacc:-no} = yes],
-  [enable_openacc=yes],
-  [enable_openacc=no])
-
-# If openacc support enabled, test for correct flags
-AS_IF([test ${enable_openacc} = yes],
-      [ACX_LANG_OPENACC_FLAG(
-				[AS_VAR_APPEND([CFLAGS], [" $acx_cv_c_openacc_flag"])], []
-			 )])
-
 AC_LANG_POP(C)
 
 # Fortran specific checks
@@ -304,18 +288,12 @@ GX_FC_CRAY_POINTER_FLAG()
 # Check if Fortran compiler and system have quad precision support
 GX_FC_QUAD_PRECISION()
 
+# Check if Fortran compiler supports reading namelists from internal files
+GX_FC_INTERNAL_FILE_NML()
+
 # Check if the compiler needs special OpenMP flags
 AC_OPENMP()
 
-# need to set preprocessor for openacc check below
-AC_FC_PP_SRCEXT([F90])
-
-# Check for openacc fortran flags
-AS_IF([test ${enable_openacc} = yes],
-      [ACX_LANG_OPENACC_FLAG(
-				[AS_VAR_APPEND([FCFLAGS], [" $acx_cv_fc_openacc_flag"])], []
-			 )])
-
 AC_LANG_POP(Fortran)
 
 # We passed all the tests.  Set the required defines.
@@ -323,11 +301,11 @@ if test $with_mpi = yes; then
   AC_DEFINE([use_libMPI], [1], [This is required for the library to build])
 fi
 
-# Builds with r8 default unless disable flag is given
+# builds with r8 default unless disable flag is given
 if test $enable_r8_default = yes; then
-  AM_CONDITIONAL([SKIP_MOSAIC_TESTS], false)
+  am_conditional([skip_mosaic_tests], false)
 else
-  AM_CONDITIONAL([SKIP_MOSAIC_TESTS], true)
+  am_conditional([skip_mosaic_tests], true)
 fi
 
 # Set any required compile flags.  This will not be done if the user wants to
@@ -346,8 +324,10 @@ if test $enable_setting_flags = yes; then
     GX_FC_DEFAULT_REAL_KIND8_FLAG([dnl
       FCFLAGS="$FCFLAGS $FC_DEFAULT_REAL_KIND8_FLAG"])
   fi
-
   # individual mixed precision overloads
+  if test $enable_overload_r4 = yes; then
+    AC_DEFINE([OVERLOAD_R4], [1], [Set to overload with the R4 Fortran routines])
+  fi
   if test $enable_overload_c4 = yes; then
     AC_DEFINE([OVERLOAD_C4], [1], [Set to overload with the C4 Fortran routines])
   fi
@@ -392,12 +372,9 @@ fi
 AC_CHECK_PROGS([MPI_LAUNCHER],[srun aprun mpirun])
 
 # Check if the launcher can oversubscribe the MPI processes
-AS_IF([$MPI_LAUNCHER --oversubscribe nproc >/dev/null 2>&1], \
+AS_IF([$MPI_LAUNCHER --oversubscribe hostname >/dev/null 2>&1], \
       [ AC_SUBST([OVERSUBSCRIBE], [--oversubscribe])])
 
-# newer openmpi versions take a different flag
-AS_IF([$MPI_LAUNCHER --map-by :OVERSUBSCRIBE nproc >/dev/null 2>&1], \
-      [ AC_SUBST([OVERSUBSCRIBE], [--map-by :OVERSUBSCRIBE])])
 
 # Compiler with version information. This consists of the full path
 # name of the compiler and the reported version number.
@@ -449,32 +426,6 @@ else
   AC_MSG_RESULT([no])
 fi
 
-# Check if intel is 2025.1 for ICE
-AC_MSG_CHECKING([if using Intel 2025.1])
-if [ test -n "`$FC --version | grep ifx | grep 2025\.1\..*`" ]; then
-  AC_MSG_RESULT([yes])
-  AC_MSG_ERROR([Compilation with Oneapi 2025.1 is unsupported \
-by this version of FMS due to a bug in the compiler. Please use a different version of oneapi.])
-else
-  AC_MSG_RESULT([no])
-fi
-
-AC_MSG_CHECKING([if netCDF was built with HDF5 parallel I/O features])
-AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [[
-#include <netcdf_meta.h>
-#if !defined(NC_HAS_PARALLEL4) || NC_HAS_PARALLEL4 == 0
-      choke me
-#endif]])], [nc_has_parallel4=yes], [nc_has_parallel4=no])
-AC_MSG_RESULT([$nc_has_parallel4])
-
-if test $nc_has_parallel4 = no; then
-  AC_MSG_WARN(netCDF was not build with HDF5 parallel I/O features, so collective netcdf io is not allowed)
-  AC_DEFINE([NO_NC_PARALLEL4], [1], [HDF5 parallel I/O features not allowed])
-  AM_CONDITIONAL([SKIP_PARALLEL_TEST], true)
-else
-  AM_CONDITIONAL([SKIP_PARALLEL_TEST], false)
-fi
-
 #####
 # Create output variables from various
 # shell variables, for use in generating
@@ -494,10 +445,6 @@ AC_SUBST([NOUNDEFINED])
 # Used in Makefiles.
 AC_SUBST([MODDIR],[\$\(top_builddir\)/.mods])
 
-# Define CMake variables used in test-lib.sh.in as empty strings
-AC_SUBST([CMAKE_CURRENT_SOURCE_DIR], [])
-AC_SUBST([USING_CMAKE],[])
-
 # These files will be created when the configure script is run.
 AC_CONFIG_FILES([
   Makefile
@@ -514,7 +461,7 @@ AC_CONFIG_FILES([
   tridiagonal/Makefile
   tracer_manager/Makefile
   topography/Makefile
-  grid_utils/Makefile
+  mosaic/Makefile
   mosaic2/Makefile
   monin_obukhov/Makefile
   memutils/Makefile
@@ -533,7 +480,6 @@ AC_CONFIG_FILES([
   diag_integral/Makefile
   sat_vapor_pres/Makefile
   random_numbers/Makefile
-  offloading/Makefile
   libFMS/Makefile
   docs/Makefile
   parser/Makefile
@@ -541,8 +487,6 @@ AC_CONFIG_FILES([
   test_fms/test-lib.sh
   test_fms/intel_coverage.sh
   test_fms/Makefile
-  test_fms/common/Makefile
-  test_fms/astronomy/Makefile
   test_fms/diag_manager/Makefile
   test_fms/data_override/Makefile
   test_fms/exchange/Makefile
@@ -552,25 +496,17 @@ AC_CONFIG_FILES([
   test_fms/fms2_io/Makefile
   test_fms/fms/Makefile
   test_fms/mpp/Makefile
+  test_fms/mpp_io/Makefile
   test_fms/time_interp/Makefile
   test_fms/time_manager/Makefile
   test_fms/horiz_interp/Makefile
   test_fms/field_manager/Makefile
   test_fms/axis_utils/Makefile
-  test_fms/mosaic2/Makefile
+  test_fms/mosaic/Makefile
   test_fms/affinity/Makefile
   test_fms/coupler/Makefile
   test_fms/parser/Makefile
   test_fms/string_utils/Makefile
-  test_fms/tridiagonal/Makefile
-  test_fms/sat_vapor_pres/Makefile
-  test_fms/diag_integral/Makefile
-  test_fms/tracer_manager/Makefile
-  test_fms/random_numbers/Makefile
-  test_fms/topography/Makefile
-  test_fms/column_diagnostics/Makefile
-  test_fms/offloading/Makefile
-  test_fms/block_control/Makefile
   FMS.pc
   ])
 

From a8b0038c44328e755beb096d8ded3ffca125df9e Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 11:51:38 -0600
Subject: [PATCH 35/45] More mixed-mode -> r8_default cleanup

---
 configure.ac | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 3217017bf7..7bb6a736ea 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,12 +54,6 @@ AS_IF([test x${CRAYPE_VERSION:+yes} = "xyes"],[
 
 
 # Process user optons.
-AC_ARG_ENABLE([mixed-mode],
-  [AS_HELP_STRING([--enable-mixed-mode],
-    [Build using mixed mode.  Enables both 64-bit and 32-bit reals in Fortran. This option will be ignored if --disable-fortran-flag-setting is also given.])])
-AS_IF([test ${enable_mixed_mode:-no} = no],
-  [enable_mixed_mode=no],
-  [enable_mixed_mode=yes])
 AC_ARG_WITH([mpi],
   [AS_HELP_STRING([--with-mpi],
     [Build with MPI support.  This option will be ignored if --disable-fortran-flag-setting is also given. (Default yes)])])
@@ -110,6 +104,13 @@ AS_IF([test ${enable_8byte_int:-no} = yes],
   [enable_8byte_int=yes],
   [enable_8byte_int=no])
 
+AC_ARG_ENABLE([r8-default],
+  [AS_HELP_STRING([--disable-r8-default],
+    [Disables the build from adding the 8 byte default real kind flag during compilation (default no)])])
+AS_IF([test ${enable_r8_default:-yes} = yes],
+  [enable_r8_default=yes],
+  [enable_r8_default=no])
+
 # user enabled testing with input files
 AC_MSG_CHECKING([whether to enable tests with input files])
 AC_ARG_ENABLE([test-input],

From 2e25a51599c0e5cf94c6deb8850cc654f1c0007a Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 12:03:44 -0600
Subject: [PATCH 36/45] Update intel versions

Grabbed what GFDL uses in github_autotools_intel_openapi.yml (but I skip
building with YAML support for now)
---
 .github/workflows/intel_pr.yml | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/intel_pr.yml b/.github/workflows/intel_pr.yml
index e609c03044..0f2fa589ba 100644
--- a/.github/workflows/intel_pr.yml
+++ b/.github/workflows/intel_pr.yml
@@ -27,24 +27,27 @@ jobs:
       name: Build netcdf
       run: |
         mkdir /libs
-        wget https://hdf-wordpress-1.s3.amazonaws.com/wp-content/uploads/manual/HDF5/HDF5_1_12_2/source/hdf5-1.12.2.tar.gz
-        tar xf hdf5-1.12.2.tar.gz && cd hdf5-1.12.2
+        # hdf5
+        wget https://support.hdfgroup.org/releases/hdf5/v1_14/v1_14_6/downloads/hdf5-1.14.6.tar.gz
+        tar xf hdf5-1.14.6.tar.gz && cd hdf5-1.14.6
         ./configure --prefix=/libs
         make -j install && cd ..
-        wget https://github.com/Unidata/netcdf-c/archive/refs/tags/v4.8.1.tar.gz
-        tar xf v4.8.1.tar.gz && cd netcdf-c-4.8.1
-        ./configure --prefix=/libs --enable-remote-fortran-bootstrap --disable-libxml2
-        make -j install
-        # sets this here to pass embeded configure checks
-        export LD_LIBRARY_PATH="/libs/lib:$LD_LIBRARY_PATH"
-        make -j -k build-netcdf-fortran
-        make -j install-netcdf-fortran
+        # netcdf-c
+        wget https://github.com/Unidata/netcdf-c/archive/refs/tags/v4.9.3.tar.gz
+        tar xf v4.9.3.tar.gz && cd netcdf-c-4.9.3
+        ./configure --prefix=/libs --disable-libxml2 --enable-netcdf-4 --enable-shared
+        make -j install && cd ..
+        # netcdf-fortran
+        wget https://github.com/Unidata/netcdf-fortran/archive/refs/tags/v4.6.2.tar.gz
+        tar xf v4.6.2.tar.gz && cd netcdf-fortran-4.6.2
         ./configure --prefix=/libs
-        make -j install && cd
+        make -j install && cd ..
     - name: checkout
       uses: actions/checkout@v2
     - name: Configure
-      run: autoreconf -if ./configure.ac && ./configure
+      run: autoreconf -if ./configure.ac
+           export LD_LIBRARY_PATH="/libs/lib:$LD_LIBRARY_PATH"
+           ./configure
     - name: Compile
       run: make -j || make
     - name: Run test suite

From 630d6c37de4825a70193493a8e7d7d1664886ba2 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 12:56:58 -0600
Subject: [PATCH 37/45] Run multiple commands for configure

---
 .github/workflows/intel_pr.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/intel_pr.yml b/.github/workflows/intel_pr.yml
index 0f2fa589ba..9dd292c9e0 100644
--- a/.github/workflows/intel_pr.yml
+++ b/.github/workflows/intel_pr.yml
@@ -45,9 +45,10 @@ jobs:
     - name: checkout
       uses: actions/checkout@v2
     - name: Configure
-      run: autoreconf -if ./configure.ac
-           export LD_LIBRARY_PATH="/libs/lib:$LD_LIBRARY_PATH"
-           ./configure
+      run: |
+        autoreconf -if ./configure.ac
+        export LD_LIBRARY_PATH="/libs/lib:$LD_LIBRARY_PATH"
+        ./configure
     - name: Compile
       run: make -j || make
     - name: Run test suite

From de4f21035574e5d9d217b8ebcdf2fa4e54f2fbd6 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 13:06:19 -0600
Subject: [PATCH 38/45] Add more YAML to configure.ac

I'm not sure why configure is failing; it's complaining about

./configure: line 19069: syntax error near unexpected token `FCFLAGS="$FCFLAGS $FC_MOD_CASE_FLAG"'
./configure: line 19069: `GX_FC_MOD_CASE_FLAG(FCFLAGS="$FCFLAGS $FC_MOD_CASE_FLAG")'

But I didn't change anything in the GX_FC_MOD_CASE part of the file
---
 configure.ac | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure.ac b/configure.ac
index 7bb6a736ea..70e84a0ee5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -191,8 +191,10 @@ if test $with_yaml = yes; then
   AC_DEFINE([use_yaml], [1], [This is required to use yaml parser])
 
   AM_CONDITIONAL([SKIP_PARSER_TESTS], false )
+  AM_CONDITIONAL([USING_YAML], true)
 else
   AM_CONDITIONAL([SKIP_PARSER_TESTS], true )
+  AM_CONDITIONAL([USING_YAML], false)
 fi
 
 # Require netCDF

From fed3dfa067b2609beb6d465d6a5b4df14ec1c53d Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 13:12:28 -0600
Subject: [PATCH 39/45] Update gx_fortran_options

I think I found where GX_FC_MOD_CASE_FLAG should be set
---
 m4/gx_fortran_options.m4 | 50 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/m4/gx_fortran_options.m4 b/m4/gx_fortran_options.m4
index 2bb5e7b90e..4f79d1670f 100644
--- a/m4/gx_fortran_options.m4
+++ b/m4/gx_fortran_options.m4
@@ -405,3 +405,53 @@ AC_DEFUN([_GX_FORTRAN_PP_SRCEXT_POP], [
 AS_VAR_SET_IF([_gx_fortran_pp_srcext_save], [ac_ext=${_gx_fortran_pp_srcext_save}], [ac_ext="f"])
 unset _gx_fortran_pp_srcext_save
 ])# _GX_FORTRAN_PP_SRCEXT_POP
+
+
+# GX_FC_MOD_CASE_FLAG([ACTION-IF-SUCCESS], [ACTION-IF-FAILURE])
+# -----------------------------------------------------------------------------
+# Checks if the output .mod files are in uppercase by default.
+# This has only been seen with cray compiler. A flag is required to disable the uppercase output
+# and instead follow the case of the module name as written in the source file.
+#
+# Sets the variable FC_MOD_CASE_FLAG to hold the flag that disables the behavior.
+#
+AC_DEFUN([GX_FC_MOD_CASE_FLAG],[
+AC_LANG_PUSH([Fortran])
+AC_FC_SRCEXT(F90)
+AC_CACHE_CHECK([if Fortran flag needed for module output case], [gx_cv_fc_mod_case_flag],[
+gx_cv_fc_mod_case_flag="unknown"
+gx_mod_case_flag_FCFLAGS_save=$FCFLAGS
+
+FCFLAGS="$gx_mod_case_flag_FCFLAGS_save -c"
+AC_COMPILE_IFELSE([[module foo
+end module foo]],
+                  [],
+                  AC_MSG_ERROR(["Failed to compile test module with -c"]))
+
+# if output .mod file is capitalized, add the flag and check that it works
+if test -f "FOO.${FC_MODEXT}"; then
+  FCFLAGS="$gx_mod_case_flag_FCFLAGS_save -c -ef"
+  AC_COMPILE_IFELSE([[module foo
+                    end module mod]],
+    [])
+  if test -f "foo.${FC_MODEXT}"; then
+    gx_cv_fc_mod_case_flag="-ef"
+  fi
+else
+  gx_cv_fc_mod_case_flag=no
+fi
+
+rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
+FCFLAGS=$gx_mod_case_flag_FCFLAGS_save
+])
+
+if test "x$gx_cv_fc_mod_case_flag" = "xunknown"; then
+  m4_default([$2],
+              [AC_MSG_ERROR([No working flag found to disable module filename capitalization])])
+elif test "x$gx_cv_fc_mod_case_flag" != "xno"; then
+  FC_MOD_CASE_FLAG=$gx_cv_fc_mod_case_flag
+  AC_SUBST([FC_MOD_CASE_FLAG])
+  $1
+fi
+AC_LANG_POP([Fortran])
+])

From a7a762bf69111ef16b2b6ba0ad74c060bdcef2ff Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 14:27:25 -0600
Subject: [PATCH 40/45] Drop skip_mosaic_tests stuff

---
 configure.ac | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/configure.ac b/configure.ac
index 70e84a0ee5..2abcb17b00 100644
--- a/configure.ac
+++ b/configure.ac
@@ -304,13 +304,6 @@ if test $with_mpi = yes; then
   AC_DEFINE([use_libMPI], [1], [This is required for the library to build])
 fi
 
-# builds with r8 default unless disable flag is given
-if test $enable_r8_default = yes; then
-  am_conditional([skip_mosaic_tests], false)
-else
-  am_conditional([skip_mosaic_tests], true)
-fi
-
 # Set any required compile flags.  This will not be done if the user wants to
 # define all their own flags.
 if test $enable_setting_flags = yes; then

From 6acf0a216b2fbdc8058805769d62790edaf30dec Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 14:41:58 -0600
Subject: [PATCH 41/45] drop nc_strerror

The main branch of FMS doesn't have the mosaic/ directory (just mosaic2/); I'm
trying to get this to compile without completely removing this subdirectory
because I think the directory name shows up quite a bit in autoconf files
---
 mosaic/read_mosaic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mosaic/read_mosaic.c b/mosaic/read_mosaic.c
index b855d4267b..a2f1172b6b 100644
--- a/mosaic/read_mosaic.c
+++ b/mosaic/read_mosaic.c
@@ -41,7 +41,7 @@ void handle_netcdf_error(const char *msg, int status )
 {
   char errmsg[512];
 
-  sprintf( errmsg, "%s: %s", msg, (char *)nc_strerror(status) );
+  sprintf( errmsg, "%s: %d", msg, status );
   error_handler(errmsg);
 
 } /* handle_netcdf_error */

From 6d60cc38022beb4244710562d949130eccd8bda6 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 14:56:17 -0600
Subject: [PATCH 42/45] Remove mosaic (leave mosaic2)

---
 CMakeLists.txt                  |    8 -
 Makefile.am                     |    1 -
 configure.ac                    |    2 -
 mosaic/Makefile.am              |   60 -
 mosaic/constant.h               |   20 -
 mosaic/create_xgrid.c           | 3088 -------------------------------
 mosaic/create_xgrid.h           |  157 --
 mosaic/gradient.F90             |  161 --
 mosaic/gradient_c2l.c           |  470 -----
 mosaic/gradient_c2l.h           |   56 -
 mosaic/grid.F90                 | 1036 -----------
 mosaic/interp.c                 |  394 ----
 mosaic/interp.h                 |   43 -
 mosaic/mosaic.F90               |  496 -----
 mosaic/mosaic_util.c            | 1368 --------------
 mosaic/mosaic_util.h            |  170 --
 mosaic/read_mosaic.c            |  838 ---------
 mosaic/read_mosaic.h            |  119 --
 test_fms/mosaic/Makefile.am     |   48 -
 test_fms/mosaic/test_mosaic.F90 |  145 --
 test_fms/mosaic/test_mosaic2.sh |   38 -
 21 files changed, 8718 deletions(-)
 delete mode 100644 mosaic/Makefile.am
 delete mode 100644 mosaic/constant.h
 delete mode 100644 mosaic/create_xgrid.c
 delete mode 100644 mosaic/create_xgrid.h
 delete mode 100644 mosaic/gradient.F90
 delete mode 100644 mosaic/gradient_c2l.c
 delete mode 100644 mosaic/gradient_c2l.h
 delete mode 100644 mosaic/grid.F90
 delete mode 100644 mosaic/interp.c
 delete mode 100644 mosaic/interp.h
 delete mode 100644 mosaic/mosaic.F90
 delete mode 100644 mosaic/mosaic_util.c
 delete mode 100644 mosaic/mosaic_util.h
 delete mode 100644 mosaic/read_mosaic.c
 delete mode 100644 mosaic/read_mosaic.h
 delete mode 100644 test_fms/mosaic/Makefile.am
 delete mode 100644 test_fms/mosaic/test_mosaic.F90
 delete mode 100755 test_fms/mosaic/test_mosaic2.sh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 473d8b91f9..41ff6885ea 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -159,9 +159,6 @@ list(APPEND fms_fortran_src_files
   memutils/memutils.F90
   monin_obukhov/monin_obukhov_inter.F90
   monin_obukhov/monin_obukhov.F90
-  mosaic/gradient.F90
-  mosaic/grid.F90
-  mosaic/mosaic.F90
   mosaic2/grid2.F90
   mosaic2/mosaic2.F90
   mpp/mpp.F90
@@ -195,11 +192,6 @@ list(APPEND fms_fortran_src_files
 # Collect FMS C source files
 list(APPEND fms_c_src_files
   affinity/affinity.c
-  mosaic/create_xgrid.c
-  mosaic/gradient_c2l.c
-  mosaic/interp.c
-  mosaic/mosaic_util.c
-  mosaic/read_mosaic.c
   mpp/mpp_memuse.c
   parser/yaml_parser_binding.c
   parser/yaml_output_functions.c
diff --git a/Makefile.am b/Makefile.am
index ffb12344ea..9f6b8cbf7b 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -46,7 +46,6 @@ SUBDIRS = \
   fms \
   parser \
   affinity \
-  mosaic \
   time_manager \
   axis_utils \
   diag_manager \
diff --git a/configure.ac b/configure.ac
index 2abcb17b00..bab10f2ebc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -457,7 +457,6 @@ AC_CONFIG_FILES([
   tridiagonal/Makefile
   tracer_manager/Makefile
   topography/Makefile
-  mosaic/Makefile
   mosaic2/Makefile
   monin_obukhov/Makefile
   memutils/Makefile
@@ -498,7 +497,6 @@ AC_CONFIG_FILES([
   test_fms/horiz_interp/Makefile
   test_fms/field_manager/Makefile
   test_fms/axis_utils/Makefile
-  test_fms/mosaic/Makefile
   test_fms/affinity/Makefile
   test_fms/coupler/Makefile
   test_fms/parser/Makefile
diff --git a/mosaic/Makefile.am b/mosaic/Makefile.am
deleted file mode 100644
index d097207105..0000000000
--- a/mosaic/Makefile.am
+++ /dev/null
@@ -1,60 +0,0 @@
-#***********************************************************************
-#*                   GNU Lesser General Public License
-#*
-#* This file is part of the GFDL Flexible Modeling System (FMS).
-#*
-#* FMS is free software: you can redistribute it and/or modify it under
-#* the terms of the GNU Lesser General Public License as published by
-#* the Free Software Foundation, either version 3 of the License, or (at
-#* your option) any later version.
-#*
-#* FMS is distributed in the hope that it will be useful, but WITHOUT
-#* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-#* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#* for more details.
-#*
-#* You should have received a copy of the GNU Lesser General Public
-#* License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
-#***********************************************************************
-
-# This is an automake file for the mosaic directory of the FMS
-# package.
-
-# Ed Hartnett 2/22/19
-
-# Include .h and .mod files.
-AM_CPPFLAGS = -I$(top_srcdir)/include
-AM_FCFLAGS = $(FC_MODINC). $(FC_MODOUT)$(MODDIR)
-
-# Build these uninstalled convenience libraries.
-
-noinst_LTLIBRARIES = libmosaic.la
-
-libmosaic_la_SOURCES = \
-mosaic.F90 \
-grid.F90 \
-gradient.F90 \
-create_xgrid.c \
-gradient_c2l.c \
-interp.c \
-mosaic_util.c \
-read_mosaic.c \
-constant.h \
-create_xgrid.h \
-gradient_c2l.h \
-interp.h \
-mosaic_util.h \
-read_mosaic.h
-
-# Some mods are dependant on other mods in this dir.
-grid_mod.$(FC_MODEXT): mosaic_mod.$(FC_MODEXT)
-
-# Mod files are built and then installed as headers.
-MODFILES = \
-  mosaic_mod.$(FC_MODEXT) \
-  grid_mod.$(FC_MODEXT) \
-  gradient_mod.$(FC_MODEXT)
-nodist_include_HEADERS = $(MODFILES)
-BUILT_SOURCES = $(MODFILES)
-
-include $(top_srcdir)/mkmods.mk
diff --git a/mosaic/constant.h b/mosaic/constant.h
deleted file mode 100644
index 7dc75e3526..0000000000
--- a/mosaic/constant.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/***********************************************************************
- *                   GNU Lesser General Public License
- *
- * This file is part of the GFDL Flexible Modeling System (FMS).
- *
- * FMS is free software: you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * FMS is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
- **********************************************************************/
-#define RADIUS        (6371000.)
-#define STRING        255
diff --git a/mosaic/create_xgrid.c b/mosaic/create_xgrid.c
deleted file mode 100644
index 7698303b92..0000000000
--- a/mosaic/create_xgrid.c
+++ /dev/null
@@ -1,3088 +0,0 @@
-/***********************************************************************
- *                   GNU Lesser General Public License
- *
- * This file is part of the GFDL Flexible Modeling System (FMS).
- *
- * FMS is free software: you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * FMS is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
- **********************************************************************/
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include "mosaic_util.h"
-#include "create_xgrid.h"
-#include "constant.h"
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
-
-#define AREA_RATIO_THRESH (1.e-6)
-#define MASK_THRESH       (0.5)
-#define EPSLN8            (1.e-8)
-#define EPSLN30           (1.0e-30)
-#define EPSLN10           (1.0e-10)
-#define R2D (180/M_PI)
-#define TPI (2.0*M_PI)
-
-/** \file
- *  \ingroup mosaic
- *  \brief Grid creation and calculation functions for use in @ref mosaic_mod
- * /
-
-/*******************************************************************************
-  int get_maxxgrid
-  return constants MAXXGRID.
-*******************************************************************************/
-int get_maxxgrid(void)
-{
-  return MAXXGRID;
-}
-
-int get_maxxgrid_(void)
-{
-  return get_maxxgrid();
-}
-
-
-/*******************************************************************************
-void get_grid_area(const int *nlon, const int *nlat, const double *lon, const double *lat, const double *area)
-  return the grid area.
-*******************************************************************************/
-void get_grid_area_(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area)
-{
-  get_grid_area(nlon, nlat, lon, lat, area);
-}
-
-void get_grid_area(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area)
-{
-  int nx, ny, nxp, i, j, n_in;
-  double x_in[20], y_in[20];
-
-  nx = *nlon;
-  ny = *nlat;
-  nxp = nx + 1;
-
-  for(j=0; j<ny; j++) for(i=0; i < nx; i++) {
-      x_in[0] = lon[j*nxp+i];
-      x_in[1] = lon[j*nxp+i+1];
-      x_in[2] = lon[(j+1)*nxp+i+1];
-      x_in[3] = lon[(j+1)*nxp+i];
-      y_in[0] = lat[j*nxp+i];
-      y_in[1] = lat[j*nxp+i+1];
-      y_in[2] = lat[(j+1)*nxp+i+1];
-      y_in[3] = lat[(j+1)*nxp+i];
-      n_in = fix_lon(x_in, y_in, 4, M_PI);
-      area[j*nx+i] = poly_area(x_in, y_in, n_in);
-    }
-
-}  /* get_grid_area */
-
-
-/*******************************************************************************
-void get_grid_area_ug(const int *npts, const double *lon, const double *lat, const double *area)
-  return the grid area.
-*******************************************************************************/
-void get_grid_area_ug_(const int *npts, const double *lon, const double *lat, double *area)
-{
-  get_grid_area_ug(npts, lon, lat, area);
-}
-
-void get_grid_area_ug(const int *npts, const double *lon, const double *lat, double *area)
-{
-  int nl, l, n_in, nv;
-  double x_in[20], y_in[20];
-
-  nl = *npts;
-  nv = 4;
-
-  for(l=0; l<nl; l++) {
-      x_in[0] = lon[l*nv];
-      x_in[1] = lon[l*nv+1];
-      x_in[2] = lon[l*nv+2];
-      x_in[3] = lon[l*nv+3];
-      y_in[0] = lat[l*nv];
-      y_in[1] = lat[l*nv+1];
-      y_in[2] = lat[l*nv+2];
-      y_in[3] = lat[l*nv+3];
-      n_in = fix_lon(x_in, y_in, nv, M_PI);
-      area[l] = poly_area(x_in, y_in, n_in);
-    }
-
-}  /* get_grid_area_ug */
-
-
-void get_grid_great_circle_area_(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area)
-{
-  get_grid_great_circle_area(nlon, nlat, lon, lat, area);
-
-}
-
-void get_grid_great_circle_area(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area)
-{
-  int nx, ny, nxp, nyp, i, j;
-  int n0, n1, n2, n3;
-  struct Node *grid=NULL;
-  double *x=NULL, *y=NULL, *z=NULL;
-
-
-  nx = *nlon;
-  ny = *nlat;
-  nxp = nx + 1;
-  nyp = ny + 1;
-
-  x = (double *)malloc(nxp*nyp*sizeof(double));
-  y = (double *)malloc(nxp*nyp*sizeof(double));
-  z = (double *)malloc(nxp*nyp*sizeof(double));
-
-  latlon2xyz(nxp*nyp, lon, lat, x, y, z);
-
-  for(j=0; j<ny; j++) for(i=0; i < nx; i++) {
-      /* clockwise */
-      n0 = j*nxp+i;
-      n1 = (j+1)*nxp+i;
-      n2 = (j+1)*nxp+i+1;
-      n3 = j*nxp+i+1;
-      rewindList();
-      grid = getNext();
-      addEnd(grid, x[n0], y[n0], z[n0], 0, 0, 0, -1);
-      addEnd(grid, x[n1], y[n1], z[n1], 0, 0, 0, -1);
-      addEnd(grid, x[n2], y[n2], z[n2], 0, 0, 0, -1);
-      addEnd(grid, x[n3], y[n3], z[n3], 0, 0, 0, -1);
-      area[j*nx+i] = gridArea(grid);
-    }
-
-  free(x);
-  free(y);
-  free(z);
-
-}  /* get_grid_great_circle_area */
-
-void get_grid_great_circle_area_ug_(const int *npts, const double *lon, const double *lat, double *area)
-{
-  get_grid_great_circle_area_ug(npts, lon, lat, area);
-
-}
-
-void get_grid_great_circle_area_ug(const int *npts, const double *lon, const double *lat, double *area)
-{
-  int l, nl, nv;
-  int n0, n1, n2, n3;
-  struct Node *grid=NULL;
-  double *x=NULL, *y=NULL, *z=NULL;
-
-  nl = *npts;
-  nv = 4;
-
-  x = (double *)malloc(nl*nv*sizeof(double));
-  y = (double *)malloc(nl*nv*sizeof(double));
-  z = (double *)malloc(nl*nv*sizeof(double));
-
-  latlon2xyz(nl*nv, lon, lat, x, y, z);
-
-  for(l=0; l<nv; l++) {
-      /* clockwise */
-      n0 = l*nv;
-      n1 = l*nv+1;
-      n2 = l*nv+2;
-      n3 = l*nv+3;
-      rewindList();
-      grid = getNext();
-      addEnd(grid, x[n0], y[n0], z[n0], 0, 0, 0, -1);
-      addEnd(grid, x[n1], y[n1], z[n1], 0, 0, 0, -1);
-      addEnd(grid, x[n2], y[n2], z[n2], 0, 0, 0, -1);
-      addEnd(grid, x[n3], y[n3], z[n3], 0, 0, 0, -1);
-      area[l] = gridArea(grid);
-    }
-
-  free(x);
-  free(y);
-  free(z);
-
-}  /* get_grid_great_circle_area_ug */
-
-void get_grid_area_dimensionless(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area)
-{
-  int nx, ny, nxp, i, j, n_in;
-  double x_in[20], y_in[20];
-
-  nx = *nlon;
-  ny = *nlat;
-  nxp = nx + 1;
-
-  for(j=0; j<ny; j++) for(i=0; i < nx; i++) {
-      x_in[0] = lon[j*nxp+i];
-      x_in[1] = lon[j*nxp+i+1];
-      x_in[2] = lon[(j+1)*nxp+i+1];
-      x_in[3] = lon[(j+1)*nxp+i];
-      y_in[0] = lat[j*nxp+i];
-      y_in[1] = lat[j*nxp+i+1];
-      y_in[2] = lat[(j+1)*nxp+i+1];
-      y_in[3] = lat[(j+1)*nxp+i];
-      n_in = fix_lon(x_in, y_in, 4, M_PI);
-      area[j*nx+i] = poly_area_dimensionless(x_in, y_in, n_in);
-    }
-
-}  /* get_grid_area */
-
-
-
-void get_grid_area_no_adjust(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area)
-{
-  int nx, ny, nxp, i, j, n_in;
-  double x_in[20], y_in[20];
-
-  nx = *nlon;
-  ny = *nlat;
-  nxp = nx + 1;
-
-  for(j=0; j<ny; j++) for(i=0; i < nx; i++) {
-      x_in[0] = lon[j*nxp+i];
-      x_in[1] = lon[j*nxp+i+1];
-      x_in[2] = lon[(j+1)*nxp+i+1];
-      x_in[3] = lon[(j+1)*nxp+i];
-      y_in[0] = lat[j*nxp+i];
-      y_in[1] = lat[j*nxp+i+1];
-      y_in[2] = lat[(j+1)*nxp+i+1];
-      y_in[3] = lat[(j+1)*nxp+i];
-      n_in = 4;
-      area[j*nx+i] = poly_area_no_adjust(x_in, y_in, n_in);
-    }
-
-}  /* get_grid_area_no_adjust */
-
-/*******************************************************************************
-  void create_xgrid_1dx2d_order1
-  This routine generate exchange grids between two grids for the first order
-  conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell
-  and lon_in,lat_in are 1-D grid bounds, lon_out,lat_out are geographic grid location of grid cell bounds.
-*******************************************************************************/
-int create_xgrid_1dx2d_order1_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out, double *xgrid_area)
-{
-  int nxgrid;
-
-  nxgrid = create_xgrid_1dx2d_order1(nlon_in, nlat_in, nlon_out, nlat_out, lon_in, lat_in, lon_out, lat_out, mask_in,
-                                     i_in, j_in, i_out, j_out, xgrid_area);
-  return nxgrid;
-
-}
-
-int create_xgrid_1dx2d_order1(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in,
-                              const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out,
-                              int *j_out, double *xgrid_area)
-{
-
-  int nx1, ny1, nx2, ny2, nx1p, nx2p;
-  int i1, j1, i2, j2, nxgrid;
-  double ll_lon, ll_lat, ur_lon, ur_lat, x_in[MV], y_in[MV], x_out[MV], y_out[MV];
-  double *area_in, *area_out, min_area;
-  double *tmpx, *tmpy;
-
-  nx1 = *nlon_in;
-  ny1 = *nlat_in;
-  nx2 = *nlon_out;
-  ny2 = *nlat_out;
-
-  nxgrid = 0;
-  nx1p = nx1 + 1;
-  nx2p = nx2 + 1;
-
-  area_in = (double *)malloc(nx1*ny1*sizeof(double));
-  area_out = (double *)malloc(nx2*ny2*sizeof(double));
-  tmpx = (double *)malloc((nx1+1)*(ny1+1)*sizeof(double));
-  tmpy = (double *)malloc((nx1+1)*(ny1+1)*sizeof(double));
-  for(j1=0; j1<=ny1; j1++) for(i1=0; i1<=nx1; i1++) {
-      tmpx[j1*nx1p+i1] = lon_in[i1];
-      tmpy[j1*nx1p+i1] = lat_in[j1];
-    }
-  /* This is just a temporary fix to solve the issue that there is one point in zonal direction */
-  if(nx1 > 1)
-    get_grid_area(nlon_in, nlat_in, tmpx, tmpy, area_in);
-  else
-    get_grid_area_no_adjust(nlon_in, nlat_in, tmpx, tmpy, area_in);
-
-  get_grid_area(nlon_out, nlat_out, lon_out, lat_out, area_out);
-  free(tmpx);
-  free(tmpy);
-
-  for(j1=0; j1<ny1; j1++) for(i1=0; i1<nx1; i1++) if( mask_in[j1*nx1+i1] > MASK_THRESH ) {
-
-        ll_lon = lon_in[i1];   ll_lat = lat_in[j1];
-        ur_lon = lon_in[i1+1]; ur_lat = lat_in[j1+1];
-        for(j2=0; j2<ny2; j2++) for(i2=0; i2<nx2; i2++) {
-            int n_in, n_out;
-            double Xarea;
-
-            y_in[0] = lat_out[j2*nx2p+i2];
-            y_in[1] = lat_out[j2*nx2p+i2+1];
-            y_in[2] = lat_out[(j2+1)*nx2p+i2+1];
-            y_in[3] = lat_out[(j2+1)*nx2p+i2];
-            if (  (y_in[0]<=ll_lat) && (y_in[1]<=ll_lat)
-                  && (y_in[2]<=ll_lat) && (y_in[3]<=ll_lat) ) continue;
-            if (  (y_in[0]>=ur_lat) && (y_in[1]>=ur_lat)
-                  && (y_in[2]>=ur_lat) && (y_in[3]>=ur_lat) ) continue;
-
-            x_in[0] = lon_out[j2*nx2p+i2];
-            x_in[1] = lon_out[j2*nx2p+i2+1];
-            x_in[2] = lon_out[(j2+1)*nx2p+i2+1];
-            x_in[3] = lon_out[(j2+1)*nx2p+i2];
-            n_in = fix_lon(x_in, y_in, 4, (ll_lon+ur_lon)/2);
-
-            if ( (n_out = clip ( x_in, y_in, n_in, ll_lon, ll_lat, ur_lon, ur_lat, x_out, y_out )) > 0 ) {
-              Xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1];
-              min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]);
-              if( Xarea/min_area > AREA_RATIO_THRESH ) {
-                xgrid_area[nxgrid] = Xarea;
-                i_in[nxgrid]    = i1;
-                j_in[nxgrid]    = j1;
-                i_out[nxgrid]   = i2;
-                j_out[nxgrid]   = j2;
-                ++nxgrid;
-                if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID");
-              }
-            }
-          }
-      }
-
-  free(area_in);
-  free(area_out);
-
-  return nxgrid;
-
-} /* create_xgrid_1dx2d_order1 */
-
-
-/*******************************************************************************
-  void create_xgrid_1dx2d_order1_ug
-  This routine generate exchange grids between two grids for the first order
-  conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell
-  and lon_in,lat_in are 1-D grid bounds, lon_out,lat_out are geographic grid location of grid cell bounds.
-*******************************************************************************/
-int create_xgrid_1dx2d_order1_ug_(const int *nlon_in, const int *nlat_in, const int *npts_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *l_out, double *xgrid_area)
-{
-  int nxgrid;
-
-  nxgrid = create_xgrid_1dx2d_order1_ug(nlon_in, nlat_in, npts_out, lon_in, lat_in, lon_out, lat_out, mask_in,
-                                     i_in, j_in, l_out, xgrid_area);
-  return nxgrid;
-
-}
-
-int create_xgrid_1dx2d_order1_ug(const int *nlon_in, const int *nlat_in, const int *npts_out, const double *lon_in,
-                              const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *l_out, double *xgrid_area)
-{
-
-  int nx1, ny1, nx1p, nv, npts2;
-  int i1, j1, l2, nxgrid;
-  double ll_lon, ll_lat, ur_lon, ur_lat, x_in[MV], y_in[MV], x_out[MV], y_out[MV];
-  double *area_in, *area_out, min_area;
-  double *tmpx, *tmpy;
-
-  nx1 = *nlon_in;
-  ny1 = *nlat_in;
-  nv  = 4;
-  npts2 = *npts_out;
-
-  nxgrid = 0;
-  nx1p = nx1 + 1;
-
-  area_in = (double *)malloc(nx1*ny1*sizeof(double));
-  area_out = (double *)malloc(npts2*sizeof(double));
-  tmpx = (double *)malloc((nx1+1)*(ny1+1)*sizeof(double));
-  tmpy = (double *)malloc((nx1+1)*(ny1+1)*sizeof(double));
-  for(j1=0; j1<=ny1; j1++) for(i1=0; i1<=nx1; i1++) {
-      tmpx[j1*nx1p+i1] = lon_in[i1];
-      tmpy[j1*nx1p+i1] = lat_in[j1];
-    }
-  /* This is just a temporary fix to solve the issue that there is one point in zonal direction */
-  if(nx1 > 1)
-    get_grid_area(nlon_in, nlat_in, tmpx, tmpy, area_in);
-  else
-    get_grid_area_no_adjust(nlon_in, nlat_in, tmpx, tmpy, area_in);
-
-  get_grid_area_ug(npts_out, lon_out, lat_out, area_out);
-  free(tmpx);
-  free(tmpy);
-
-  for(j1=0; j1<ny1; j1++) for(i1=0; i1<nx1; i1++) if( mask_in[j1*nx1+i1] > MASK_THRESH ) {
-
-        ll_lon = lon_in[i1];   ll_lat = lat_in[j1];
-        ur_lon = lon_in[i1+1]; ur_lat = lat_in[j1+1];
-        for(l2=0; l2<npts2; l2++) {
-            int n_in, n_out;
-            double Xarea;
-
-            y_in[0] = lat_out[l2*nv];
-            y_in[1] = lat_out[l2*nv+1];
-            y_in[2] = lat_out[l2*nv+2];
-            y_in[3] = lat_out[l2*nv+3];
-            if (  (y_in[0]<=ll_lat) && (y_in[1]<=ll_lat)
-                  && (y_in[2]<=ll_lat) && (y_in[3]<=ll_lat) ) continue;
-            if (  (y_in[0]>=ur_lat) && (y_in[1]>=ur_lat)
-                  && (y_in[2]>=ur_lat) && (y_in[3]>=ur_lat) ) continue;
-
-            x_in[0] = lon_out[l2*nv];
-            x_in[1] = lon_out[l2*nv+1];
-            x_in[2] = lon_out[l2*nv+2];
-            x_in[3] = lon_out[l2*nv+3];
-            n_in = fix_lon(x_in, y_in, 4, (ll_lon+ur_lon)/2);
-
-            if ( (n_out = clip ( x_in, y_in, n_in, ll_lon, ll_lat, ur_lon, ur_lat, x_out, y_out )) > 0 ) {
-              Xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1];
-              min_area = min(area_in[j1*nx1+i1], area_out[l2]);
-              if( Xarea/min_area > AREA_RATIO_THRESH ) {
-                xgrid_area[nxgrid] = Xarea;
-                i_in[nxgrid]    = i1;
-                j_in[nxgrid]    = j1;
-                l_out[nxgrid]   = l2;
-                ++nxgrid;
-                if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID");
-              }
-            }
-          }
-      }
-
-  free(area_in);
-  free(area_out);
-
-  return nxgrid;
-
-} /* create_xgrid_1dx2d_order1_ug */
-
-/********************************************************************************
-  void create_xgrid_1dx2d_order2
-  This routine generate exchange grids between two grids for the second order
-  conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell
-  and lon_in,lat_in are 1-D grid bounds, lon_out,lat_out are geographic grid location of grid cell bounds.
-********************************************************************************/
-int create_xgrid_1dx2d_order2_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                               double *xgrid_area, double *xgrid_clon, double *xgrid_clat)
-{
-  int nxgrid;
-  nxgrid = create_xgrid_1dx2d_order2(nlon_in, nlat_in, nlon_out, nlat_out, lon_in, lat_in, lon_out, lat_out, mask_in, i_in,
-                                     j_in, i_out, j_out, xgrid_area, xgrid_clon, xgrid_clat);
-  return nxgrid;
-
-}
-int create_xgrid_1dx2d_order2(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                              const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                              double *xgrid_area, double *xgrid_clon, double *xgrid_clat)
-{
-
-  int nx1, ny1, nx2, ny2, nx1p, nx2p;
-  int i1, j1, i2, j2, nxgrid;
-  double ll_lon, ll_lat, ur_lon, ur_lat, x_in[MV], y_in[MV], x_out[MV], y_out[MV];
-  double *area_in, *area_out, min_area;
-  double *tmpx, *tmpy;
-
-  nx1 = *nlon_in;
-  ny1 = *nlat_in;
-  nx2 = *nlon_out;
-  ny2 = *nlat_out;
-
-  nxgrid = 0;
-  nx1p = nx1 + 1;
-  nx2p = nx2 + 1;
-
-  area_in      = (double *)malloc(nx1*ny1*sizeof(double));
-  area_out     = (double *)malloc(nx2*ny2*sizeof(double));
-  tmpx = (double *)malloc((nx1+1)*(ny1+1)*sizeof(double));
-  tmpy = (double *)malloc((nx1+1)*(ny1+1)*sizeof(double));
-  for(j1=0; j1<=ny1; j1++) for(i1=0; i1<=nx1; i1++) {
-      tmpx[j1*nx1p+i1] = lon_in[i1];
-      tmpy[j1*nx1p+i1] = lat_in[j1];
-    }
-  get_grid_area(nlon_in, nlat_in, tmpx, tmpy, area_in);
-  get_grid_area(nlon_out, nlat_out, lon_out, lat_out, area_out);
-  free(tmpx);
-  free(tmpy);
-
-  for(j1=0; j1<ny1; j1++) for(i1=0; i1<nx1; i1++) if( mask_in[j1*nx1+i1] > MASK_THRESH ) {
-
-        ll_lon = lon_in[i1];   ll_lat = lat_in[j1];
-        ur_lon = lon_in[i1+1]; ur_lat = lat_in[j1+1];
-        for(j2=0; j2<ny2; j2++) for(i2=0; i2<nx2; i2++) {
-            int n_in, n_out;
-            double xarea, lon_in_avg;
-
-            y_in[0] = lat_out[j2*nx2p+i2];
-            y_in[1] = lat_out[j2*nx2p+i2+1];
-            y_in[2] = lat_out[(j2+1)*nx2p+i2+1];
-            y_in[3] = lat_out[(j2+1)*nx2p+i2];
-            if (  (y_in[0]<=ll_lat) && (y_in[1]<=ll_lat)
-                  && (y_in[2]<=ll_lat) && (y_in[3]<=ll_lat) ) continue;
-            if (  (y_in[0]>=ur_lat) && (y_in[1]>=ur_lat)
-                  && (y_in[2]>=ur_lat) && (y_in[3]>=ur_lat) ) continue;
-
-            x_in[0] = lon_out[j2*nx2p+i2];
-            x_in[1] = lon_out[j2*nx2p+i2+1];
-            x_in[2] = lon_out[(j2+1)*nx2p+i2+1];
-            x_in[3] = lon_out[(j2+1)*nx2p+i2];
-            n_in = fix_lon(x_in, y_in, 4, (ll_lon+ur_lon)/2);
-            lon_in_avg = avgval_double(n_in, x_in);
-
-            if (  (n_out = clip ( x_in, y_in, n_in, ll_lon, ll_lat, ur_lon, ur_lat, x_out, y_out )) > 0 ) {
-              xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1];
-              min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]);
-              if(xarea/min_area > AREA_RATIO_THRESH ) {
-                xgrid_area[nxgrid] = xarea;
-                xgrid_clon[nxgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg);
-                xgrid_clat[nxgrid] = poly_ctrlat (x_out, y_out, n_out );
-                i_in[nxgrid]    = i1;
-                j_in[nxgrid]    = j1;
-                i_out[nxgrid]   = i2;
-                j_out[nxgrid]   = j2;
-                ++nxgrid;
-                if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID");
-              }
-            }
-          }
-      }
-  free(area_in);
-  free(area_out);
-
-  return nxgrid;
-
-} /* create_xgrid_1dx2d_order2 */
-
-/*******************************************************************************
-  void create_xgrid_2dx1d_order1
-  This routine generate exchange grids between two grids for the first order
-  conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell
-  and lon_out,lat_out are 1-D grid bounds, lon_in,lat_in are geographic grid location of grid cell bounds.
-  mask is on grid lon_in/lat_in.
-*******************************************************************************/
-int create_xgrid_2dx1d_order1_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out,
-                               int *j_out, double *xgrid_area)
-{
-  int nxgrid;
-
-  nxgrid = create_xgrid_2dx1d_order1(nlon_in, nlat_in, nlon_out, nlat_out, lon_in, lat_in, lon_out, lat_out, mask_in,
-                                     i_in, j_in, i_out, j_out, xgrid_area);
-  return nxgrid;
-
-}
-int create_xgrid_2dx1d_order1(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in,
-                              const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out,
-                              int *j_out, double *xgrid_area)
-{
-
-  int nx1, ny1, nx2, ny2, nx1p, nx2p;
-  int i1, j1, i2, j2, nxgrid;
-  double ll_lon, ll_lat, ur_lon, ur_lat, x_in[MV], y_in[MV], x_out[MV], y_out[MV];
-  double *area_in, *area_out, min_area;
-  double *tmpx, *tmpy;
-  int n_in, n_out;
-  double Xarea;
-
-
-  nx1 = *nlon_in;
-  ny1 = *nlat_in;
-  nx2 = *nlon_out;
-  ny2 = *nlat_out;
-
-  nxgrid = 0;
-  nx1p = nx1 + 1;
-  nx2p = nx2 + 1;
-  area_in = (double *)malloc(nx1*ny1*sizeof(double));
-  area_out = (double *)malloc(nx2*ny2*sizeof(double));
-  tmpx = (double *)malloc((nx2+1)*(ny2+1)*sizeof(double));
-  tmpy = (double *)malloc((nx2+1)*(ny2+1)*sizeof(double));
-  for(j2=0; j2<=ny2; j2++) for(i2=0; i2<=nx2; i2++) {
-      tmpx[j2*nx2p+i2] = lon_out[i2];
-      tmpy[j2*nx2p+i2] = lat_out[j2];
-    }
-  get_grid_area(nlon_in, nlat_in, lon_in, lat_in, area_in);
-  get_grid_area(nlon_out, nlat_out, tmpx, tmpy, area_out);
-
-  free(tmpx);
-  free(tmpy);
-
-  for(j2=0; j2<ny2; j2++) for(i2=0; i2<nx2; i2++) {
-
-      ll_lon = lon_out[i2];   ll_lat = lat_out[j2];
-      ur_lon = lon_out[i2+1]; ur_lat = lat_out[j2+1];
-      for(j1=0; j1<ny1; j1++) for(i1=0; i1<nx1; i1++) if( mask_in[j1*nx1+i1] > MASK_THRESH ) {
-
-            y_in[0] = lat_in[j1*nx1p+i1];
-            y_in[1] = lat_in[j1*nx1p+i1+1];
-            y_in[2] = lat_in[(j1+1)*nx1p+i1+1];
-            y_in[3] = lat_in[(j1+1)*nx1p+i1];
-            if (  (y_in[0]<=ll_lat) && (y_in[1]<=ll_lat)
-                  && (y_in[2]<=ll_lat) && (y_in[3]<=ll_lat) ) continue;
-            if (  (y_in[0]>=ur_lat) && (y_in[1]>=ur_lat)
-                  && (y_in[2]>=ur_lat) && (y_in[3]>=ur_lat) ) continue;
-
-            x_in[0] = lon_in[j1*nx1p+i1];
-            x_in[1] = lon_in[j1*nx1p+i1+1];
-            x_in[2] = lon_in[(j1+1)*nx1p+i1+1];
-            x_in[3] = lon_in[(j1+1)*nx1p+i1];
-
-            n_in = fix_lon(x_in, y_in, 4, (ll_lon+ur_lon)/2);
-
-            if ( (n_out = clip ( x_in, y_in, n_in, ll_lon, ll_lat, ur_lon, ur_lat, x_out, y_out )) > 0 ) {
-              Xarea = poly_area ( x_out, y_out, n_out ) * mask_in[j1*nx1+i1];
-              min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]);
-              if( Xarea/min_area > AREA_RATIO_THRESH ) {
-                xgrid_area[nxgrid] = Xarea;
-                i_in[nxgrid]    = i1;
-                j_in[nxgrid]    = j1;
-                i_out[nxgrid]   = i2;
-                j_out[nxgrid]   = j2;
-                ++nxgrid;
-                if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID");
-              }
-            }
-          }
-    }
-
-  free(area_in);
-  free(area_out);
-
-  return nxgrid;
-
-} /* create_xgrid_2dx1d_order1 */
-
-
-/********************************************************************************
-  void create_xgrid_2dx1d_order2
-  This routine generate exchange grids between two grids for the second order
-  conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell
-  and lon_out,lat_out are 1-D grid bounds, lon_in,lat_in are geographic grid location of grid cell bounds.
-  mask is on grid lon_in/lat_in.
-********************************************************************************/
-int create_xgrid_2dx1d_order2_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                               double *xgrid_area, double *xgrid_clon, double *xgrid_clat)
-{
-  int nxgrid;
-  nxgrid = create_xgrid_2dx1d_order2(nlon_in, nlat_in, nlon_out, nlat_out, lon_in, lat_in, lon_out, lat_out, mask_in, i_in,
-                                     j_in, i_out, j_out, xgrid_area, xgrid_clon, xgrid_clat);
-  return nxgrid;
-
-}
-
-int create_xgrid_2dx1d_order2(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                              const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                              double *xgrid_area, double *xgrid_clon, double *xgrid_clat)
-{
-
-  int nx1, ny1, nx2, ny2, nx1p, nx2p;
-  int i1, j1, i2, j2, nxgrid;
-  double ll_lon, ll_lat, ur_lon, ur_lat, x_in[MV], y_in[MV], x_out[MV], y_out[MV];
-  double *tmpx, *tmpy;
-  double *area_in, *area_out, min_area;
-  double  lon_in_avg;
-  int n_in, n_out;
-  double xarea;
-
-
-  nx1 = *nlon_in;
-  ny1 = *nlat_in;
-  nx2 = *nlon_out;
-  ny2 = *nlat_out;
-
-  nxgrid = 0;
-  nx1p = nx1 + 1;
-  nx2p = nx2 + 1;
-
-  area_in      = (double *)malloc(nx1*ny1*sizeof(double));
-  area_out     = (double *)malloc(nx2*ny2*sizeof(double));
-  tmpx = (double *)malloc((nx2+1)*(ny2+1)*sizeof(double));
-  tmpy = (double *)malloc((nx2+1)*(ny2+1)*sizeof(double));
-  for(j2=0; j2<=ny2; j2++) for(i2=0; i2<=nx2; i2++) {
-      tmpx[j2*nx2p+i2] = lon_out[i2];
-      tmpy[j2*nx2p+i2] = lat_out[j2];
-    }
-  get_grid_area(nlon_in, nlat_in, lon_in, lat_in, area_in);
-  get_grid_area(nlon_out, nlat_out, tmpx, tmpy, area_out);
-
-  free(tmpx);
-  free(tmpy);
-
-  for(j2=0; j2<ny2; j2++) for(i2=0; i2<nx2; i2++) {
-
-      ll_lon = lon_out[i2];   ll_lat = lat_out[j2];
-      ur_lon = lon_out[i2+1]; ur_lat = lat_out[j2+1];
-      for(j1=0; j1<ny1; j1++) for(i1=0; i1<nx1; i1++) if( mask_in[j1*nx1+i1] > MASK_THRESH ) {
-
-            y_in[0] = lat_in[j1*nx1p+i1];
-            y_in[1] = lat_in[j1*nx1p+i1+1];
-            y_in[2] = lat_in[(j1+1)*nx1p+i1+1];
-            y_in[3] = lat_in[(j1+1)*nx1p+i1];
-            if (  (y_in[0]<=ll_lat) && (y_in[1]<=ll_lat)
-                  && (y_in[2]<=ll_lat) && (y_in[3]<=ll_lat) ) continue;
-            if (  (y_in[0]>=ur_lat) && (y_in[1]>=ur_lat)
-                  && (y_in[2]>=ur_lat) && (y_in[3]>=ur_lat) ) continue;
-
-            x_in[0] = lon_in[j1*nx1p+i1];
-            x_in[1] = lon_in[j1*nx1p+i1+1];
-            x_in[2] = lon_in[(j1+1)*nx1p+i1+1];
-            x_in[3] = lon_in[(j1+1)*nx1p+i1];
-
-            n_in = fix_lon(x_in, y_in, 4, (ll_lon+ur_lon)/2);
-            lon_in_avg = avgval_double(n_in, x_in);
-
-            if (  (n_out = clip ( x_in, y_in, n_in, ll_lon, ll_lat, ur_lon, ur_lat, x_out, y_out )) > 0 ) {
-              xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1];
-              min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]);
-              if(xarea/min_area > AREA_RATIO_THRESH ) {
-                xgrid_area[nxgrid] = xarea;
-                xgrid_clon[nxgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg);
-                xgrid_clat[nxgrid] = poly_ctrlat (x_out, y_out, n_out );
-                i_in[nxgrid]  = i1;
-                j_in[nxgrid]  = j1;
-                i_out[nxgrid] = i2;
-                j_out[nxgrid] = j2;
-                ++nxgrid;
-                if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID");
-              }
-            }
-          }
-    }
-
-  free(area_in);
-  free(area_out);
-
-  return nxgrid;
-
-} /* create_xgrid_2dx1d_order2 */
-
-/*******************************************************************************
-  void create_xgrid_2DX2D_order1
-  This routine generate exchange grids between two grids for the first order
-  conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell
-  and lon_in,lat_in, lon_out,lat_out are geographic grid location of grid cell bounds.
-  mask is on grid lon_in/lat_in.
-*******************************************************************************/
-int create_xgrid_2dx2d_order1_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out,
-                               int *j_out, double *xgrid_area)
-{
-  int nxgrid;
-
-  nxgrid = create_xgrid_2dx2d_order1(nlon_in, nlat_in, nlon_out, nlat_out, lon_in, lat_in, lon_out, lat_out, mask_in,
-                                     i_in, j_in, i_out, j_out, xgrid_area);
-  return nxgrid;
-
-}
-int create_xgrid_2dx2d_order1(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                              const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out,
-                              int *j_out, double *xgrid_area)
-{
-
-#define MAX_V 8
-  int nx1, nx2, ny1, ny2, nx1p, nx2p, nxgrid;
-  double *area_in, *area_out;
-  int nblocks =1;
-  int *istart2=NULL, *iend2=NULL;
-  int npts_left, nblks_left, pos, m, npts_my, ij;
-  double *lon_out_min_list,*lon_out_max_list,*lon_out_avg,*lat_out_min_list,*lat_out_max_list;
-  double *lon_out_list, *lat_out_list;
-  int *pnxgrid=NULL, *pstart;
-  int *pi_in=NULL, *pj_in=NULL, *pi_out=NULL, *pj_out=NULL;
-  double *pxgrid_area=NULL;
-  int    *n2_list;
-  int nthreads, nxgrid_block_max;
-
-  nx1 = *nlon_in;
-  ny1 = *nlat_in;
-  nx2 = *nlon_out;
-  ny2 = *nlat_out;
-  nx1p = nx1 + 1;
-  nx2p = nx2 + 1;
-
-  area_in  = (double *)malloc(nx1*ny1*sizeof(double));
-  area_out = (double *)malloc(nx2*ny2*sizeof(double));
-  get_grid_area(nlon_in, nlat_in, lon_in, lat_in, area_in);
-  get_grid_area(nlon_out, nlat_out, lon_out, lat_out, area_out);
-
-  nthreads = 1;
-#if defined(_OPENMP)
-#pragma omp parallel
-  nthreads = omp_get_num_threads();
-#endif
-
-  nblocks = nthreads;
-
-  istart2 = (int *)malloc(nblocks*sizeof(int));
-  iend2 = (int *)malloc(nblocks*sizeof(int));
-
-  pstart = (int *)malloc(nblocks*sizeof(int));
-  pnxgrid = (int *)malloc(nblocks*sizeof(int));
-
-  nxgrid_block_max = MAXXGRID/nblocks;
-
-  for(m=0; m<nblocks; m++) {
-    pnxgrid[m] = 0;
-    pstart[m] = m*nxgrid_block_max;
-  }
-
-  if(nblocks == 1) {
-    pi_in = i_in;
-    pj_in = j_in;
-    pi_out = i_out;
-    pj_out = j_out;
-    pxgrid_area = xgrid_area;
-  }
-  else {
-    pi_in = (int *)malloc(MAXXGRID*sizeof(int));
-    pj_in = (int *)malloc(MAXXGRID*sizeof(int));
-    pi_out = (int *)malloc(MAXXGRID*sizeof(int));
-    pj_out = (int *)malloc(MAXXGRID*sizeof(int));
-    pxgrid_area = (double *)malloc(MAXXGRID*sizeof(double));
-  }
-
-  npts_left = nx2*ny2;
-  nblks_left = nblocks;
-  pos = 0;
-  for(m=0; m<nblocks; m++) {
-    istart2[m] = pos;
-    npts_my = npts_left/nblks_left;
-    iend2[m] = istart2[m] + npts_my - 1;
-    pos = iend2[m] + 1;
-    npts_left -= npts_my;
-    nblks_left--;
-  }
-
-  lon_out_min_list = (double *)malloc(nx2*ny2*sizeof(double));
-  lon_out_max_list = (double *)malloc(nx2*ny2*sizeof(double));
-  lat_out_min_list = (double *)malloc(nx2*ny2*sizeof(double));
-  lat_out_max_list = (double *)malloc(nx2*ny2*sizeof(double));
-  lon_out_avg = (double *)malloc(nx2*ny2*sizeof(double));
-  n2_list     = (int *)malloc(nx2*ny2*sizeof(int));
-  lon_out_list = (double *)malloc(MAX_V*nx2*ny2*sizeof(double));
-  lat_out_list = (double *)malloc(MAX_V*nx2*ny2*sizeof(double));
-#if defined(_OPENMP)
-#pragma omp parallel for default(none) shared(nx2,ny2,nx2p,lon_out,lat_out,lat_out_min_list, \
-                                              lat_out_max_list,lon_out_min_list,lon_out_max_list, \
-                                              lon_out_avg,n2_list,lon_out_list,lat_out_list)
-#endif
-  for(ij=0; ij<nx2*ny2; ij++){
-    int i2, j2, n, n0, n1, n2, n3, n2_in, l;
-    double x2_in[MV], y2_in[MV];
-    i2 = ij%nx2;
-    j2 = ij/nx2;
-    n = j2*nx2+i2;
-    n0 = j2*nx2p+i2; n1 = j2*nx2p+i2+1;
-    n2 = (j2+1)*nx2p+i2+1; n3 = (j2+1)*nx2p+i2;
-    x2_in[0] = lon_out[n0]; y2_in[0] = lat_out[n0];
-    x2_in[1] = lon_out[n1]; y2_in[1] = lat_out[n1];
-    x2_in[2] = lon_out[n2]; y2_in[2] = lat_out[n2];
-    x2_in[3] = lon_out[n3]; y2_in[3] = lat_out[n3];
-
-    lat_out_min_list[n] = minval_double(4, y2_in);
-    lat_out_max_list[n] = maxval_double(4, y2_in);
-    n2_in = fix_lon(x2_in, y2_in, 4, M_PI);
-    if(n2_in > MAX_V) error_handler("create_xgrid.c: n2_in is greater than MAX_V");
-    lon_out_min_list[n] = minval_double(n2_in, x2_in);
-    lon_out_max_list[n] = maxval_double(n2_in, x2_in);
-    lon_out_avg[n] = avgval_double(n2_in, x2_in);
-    n2_list[n] = n2_in;
-    for(l=0; l<n2_in; l++) {
-      lon_out_list[n*MAX_V+l] = x2_in[l];
-      lat_out_list[n*MAX_V+l] = y2_in[l];
-    }
-  }
-
-  nxgrid = 0;
-
-#if defined(_OPENMP)
-#pragma omp parallel for default(none) shared(nblocks,nx1,ny1,nx1p,mask_in,lon_in,lat_in, \
-                                              istart2,iend2,nx2,lat_out_min_list,lat_out_max_list, \
-                                              n2_list,lon_out_list,lat_out_list,lon_out_min_list, \
-                                              lon_out_max_list,lon_out_avg,area_in,area_out, \
-                                              pxgrid_area,pnxgrid,pi_in,pj_in,pi_out,pj_out,pstart,nthreads)
-#endif
-  for(m=0; m<nblocks; m++) {
-    int i1, j1, ij;
-    for(j1=0; j1<ny1; j1++) for(i1=0; i1<nx1; i1++) if( mask_in[j1*nx1+i1] > MASK_THRESH ) {
-          int n0, n1, n2, n3, l,n1_in;
-          double lat_in_min,lat_in_max,lon_in_min,lon_in_max,lon_in_avg;
-          double x1_in[MV], y1_in[MV], x_out[MV], y_out[MV];
-
-          n0 = j1*nx1p+i1;       n1 = j1*nx1p+i1+1;
-          n2 = (j1+1)*nx1p+i1+1; n3 = (j1+1)*nx1p+i1;
-          x1_in[0] = lon_in[n0]; y1_in[0] = lat_in[n0];
-          x1_in[1] = lon_in[n1]; y1_in[1] = lat_in[n1];
-          x1_in[2] = lon_in[n2]; y1_in[2] = lat_in[n2];
-          x1_in[3] = lon_in[n3]; y1_in[3] = lat_in[n3];
-          lat_in_min = minval_double(4, y1_in);
-          lat_in_max = maxval_double(4, y1_in);
-          n1_in = fix_lon(x1_in, y1_in, 4, M_PI);
-          lon_in_min = minval_double(n1_in, x1_in);
-          lon_in_max = maxval_double(n1_in, x1_in);
-          lon_in_avg = avgval_double(n1_in, x1_in);
-          for(ij=istart2[m]; ij<=iend2[m]; ij++) {
-            int n_out, i2, j2, n2_in;
-            double xarea, dx, lon_out_min, lon_out_max;
-            double x2_in[MAX_V], y2_in[MAX_V];
-
-            i2 = ij%nx2;
-            j2 = ij/nx2;
-
-            if(lat_out_min_list[ij] >= lat_in_max || lat_out_max_list[ij] <= lat_in_min ) continue;
-            /* adjust x2_in according to lon_in_avg*/
-            n2_in = n2_list[ij];
-            for(l=0; l<n2_in; l++) {
-              x2_in[l] = lon_out_list[ij*MAX_V+l];
-              y2_in[l] = lat_out_list[ij*MAX_V+l];
-            }
-            lon_out_min = lon_out_min_list[ij];
-            lon_out_max = lon_out_max_list[ij];
-            dx = lon_out_avg[ij] - lon_in_avg;
-            if(dx < -M_PI ) {
-              lon_out_min += TPI;
-              lon_out_max += TPI;
-              for (l=0; l<n2_in; l++) x2_in[l] += TPI;
-            }
-            else if (dx >  M_PI) {
-              lon_out_min -= TPI;
-              lon_out_max -= TPI;
-              for (l=0; l<n2_in; l++) x2_in[l] -= TPI;
-            }
-
-            /* x2_in should in the same range as x1_in after lon_fix, so no need to
-               consider cyclic condition
-            */
-            if(lon_out_min >= lon_in_max || lon_out_max <= lon_in_min ) continue;
-            if (  (n_out = clip_2dx2d( x1_in, y1_in, n1_in, x2_in, y2_in, n2_in, x_out, y_out )) > 0) {
-              double min_area;
-              int    nn;
-              xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1];
-              min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]);
-              if( xarea/min_area > AREA_RATIO_THRESH ) {
-                pnxgrid[m]++;
-                if(pnxgrid[m]>= MAXXGRID/nthreads)
-                  error_handler("nxgrid is greater than MAXXGRID/nthreads, increase MAXXGRID, decrease nthreads, or increase number of MPI ranks");
-                nn = pstart[m] + pnxgrid[m]-1;
-
-                pxgrid_area[nn] = xarea;
-                pi_in[nn]       = i1;
-                pj_in[nn]       = j1;
-                pi_out[nn]      = i2;
-                pj_out[nn]      = j2;
-              }
-
-            }
-
-          }
-        }
-  }
-
-  /*copy data if nblocks > 1 */
-  if(nblocks == 1) {
-    nxgrid = pnxgrid[0];
-    pi_in = NULL;
-    pj_in = NULL;
-    pi_out = NULL;
-    pj_out = NULL;
-    pxgrid_area = NULL;
-  }
-  else {
-    int nn, i;
-    nxgrid = 0;
-    for(m=0; m<nblocks; m++) {
-      for(i=0; i<pnxgrid[m]; i++) {
-        nn = pstart[m] + i;
-        i_in[nxgrid] = pi_in[nn];
-        j_in[nxgrid] = pj_in[nn];
-        i_out[nxgrid] = pi_out[nn];
-        j_out[nxgrid] = pj_out[nn];
-        xgrid_area[nxgrid] = pxgrid_area[nn];
-        nxgrid++;
-      }
-    }
-    free(pi_in);
-    free(pj_in);
-    free(pi_out);
-    free(pj_out);
-    free(pxgrid_area);
-  }
-
-  free(area_in);
-  free(area_out);
-  free(lon_out_min_list);
-  free(lon_out_max_list);
-  free(lat_out_min_list);
-  free(lat_out_max_list);
-  free(lon_out_avg);
-  free(n2_list);
-  free(lon_out_list);
-  free(lat_out_list);
-
-  return nxgrid;
-
-}/* get_xgrid_2Dx2D_order1 */
-
-/********************************************************************************
-  void create_xgrid_2dx1d_order2
-  This routine generate exchange grids between two grids for the second order
-  conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell
-  and lon_in,lat_in, lon_out,lat_out are geographic grid location of grid cell bounds.
-  mask is on grid lon_in/lat_in.
-********************************************************************************/
-int create_xgrid_2dx2d_order2_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                               double *xgrid_area, double *xgrid_clon, double *xgrid_clat)
-{
-  int nxgrid;
-  nxgrid = create_xgrid_2dx2d_order2(nlon_in, nlat_in, nlon_out, nlat_out, lon_in, lat_in, lon_out, lat_out, mask_in, i_in,
-                                     j_in, i_out, j_out, xgrid_area, xgrid_clon, xgrid_clat);
-  return nxgrid;
-
-}
-int create_xgrid_2dx2d_order2(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                              const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                              double *xgrid_area, double *xgrid_clon, double *xgrid_clat)
-{
-
-#define MAX_V 8
-  int nx1, nx2, ny1, ny2, nx1p, nx2p, nxgrid;
-  double *area_in, *area_out;
-  int nblocks =1;
-  int *istart2=NULL, *iend2=NULL;
-  int npts_left, nblks_left, pos, m, npts_my, ij;
-  double *lon_out_min_list,*lon_out_max_list,*lon_out_avg,*lat_out_min_list,*lat_out_max_list;
-  double *lon_out_list, *lat_out_list;
-  int *pnxgrid=NULL, *pstart;
-  int *pi_in=NULL, *pj_in=NULL, *pi_out=NULL, *pj_out=NULL;
-  double *pxgrid_area=NULL, *pxgrid_clon=NULL, *pxgrid_clat=NULL;
-  int    *n2_list;
-  int nthreads, nxgrid_block_max;
-
-  nx1 = *nlon_in;
-  ny1 = *nlat_in;
-  nx2 = *nlon_out;
-  ny2 = *nlat_out;
-  nx1p = nx1 + 1;
-  nx2p = nx2 + 1;
-
-  area_in  = (double *)malloc(nx1*ny1*sizeof(double));
-  area_out = (double *)malloc(nx2*ny2*sizeof(double));
-  get_grid_area(nlon_in, nlat_in, lon_in, lat_in, area_in);
-  get_grid_area(nlon_out, nlat_out, lon_out, lat_out, area_out);
-
-  nthreads = 1;
-#if defined(_OPENMP)
-#pragma omp parallel
-  nthreads = omp_get_num_threads();
-#endif
-
-  nblocks = nthreads;
-
-  istart2 = (int *)malloc(nblocks*sizeof(int));
-  iend2 = (int *)malloc(nblocks*sizeof(int));
-
-  pstart = (int *)malloc(nblocks*sizeof(int));
-  pnxgrid = (int *)malloc(nblocks*sizeof(int));
-
-  nxgrid_block_max = MAXXGRID/nblocks;
-
-  for(m=0; m<nblocks; m++) {
-    pnxgrid[m] = 0;
-    pstart[m] = m*nxgrid_block_max;
-  }
-
-  if(nblocks == 1) {
-    pi_in = i_in;
-    pj_in = j_in;
-    pi_out = i_out;
-    pj_out = j_out;
-    pxgrid_area = xgrid_area;
-    pxgrid_clon = xgrid_clon;
-    pxgrid_clat = xgrid_clat;
-  }
-  else {
-    pi_in = (int *)malloc(MAXXGRID*sizeof(int));
-    pj_in = (int *)malloc(MAXXGRID*sizeof(int));
-    pi_out = (int *)malloc(MAXXGRID*sizeof(int));
-    pj_out = (int *)malloc(MAXXGRID*sizeof(int));
-    pxgrid_area = (double *)malloc(MAXXGRID*sizeof(double));
-    pxgrid_clon = (double *)malloc(MAXXGRID*sizeof(double));
-    pxgrid_clat = (double *)malloc(MAXXGRID*sizeof(double));
-  }
-
-  npts_left = nx2*ny2;
-  nblks_left = nblocks;
-  pos = 0;
-  for(m=0; m<nblocks; m++) {
-    istart2[m] = pos;
-    npts_my = npts_left/nblks_left;
-    iend2[m] = istart2[m] + npts_my - 1;
-    pos = iend2[m] + 1;
-    npts_left -= npts_my;
-    nblks_left--;
-  }
-
-  lon_out_min_list = (double *)malloc(nx2*ny2*sizeof(double));
-  lon_out_max_list = (double *)malloc(nx2*ny2*sizeof(double));
-  lat_out_min_list = (double *)malloc(nx2*ny2*sizeof(double));
-  lat_out_max_list = (double *)malloc(nx2*ny2*sizeof(double));
-  lon_out_avg = (double *)malloc(nx2*ny2*sizeof(double));
-  n2_list     = (int *)malloc(nx2*ny2*sizeof(int));
-  lon_out_list = (double *)malloc(MAX_V*nx2*ny2*sizeof(double));
-  lat_out_list = (double *)malloc(MAX_V*nx2*ny2*sizeof(double));
-#if defined(_OPENMP)
-#pragma omp parallel for default(none) shared(nx2,ny2,nx2p,lon_out,lat_out,lat_out_min_list, \
-                                              lat_out_max_list,lon_out_min_list,lon_out_max_list, \
-                                              lon_out_avg,n2_list,lon_out_list,lat_out_list)
-#endif
-  for(ij=0; ij<nx2*ny2; ij++){
-    int i2, j2, n, n0, n1, n2, n3, n2_in, l;
-    double x2_in[MV], y2_in[MV];
-    i2 = ij%nx2;
-    j2 = ij/nx2;
-    n = j2*nx2+i2;
-    n0 = j2*nx2p+i2; n1 = j2*nx2p+i2+1;
-    n2 = (j2+1)*nx2p+i2+1; n3 = (j2+1)*nx2p+i2;
-    x2_in[0] = lon_out[n0]; y2_in[0] = lat_out[n0];
-    x2_in[1] = lon_out[n1]; y2_in[1] = lat_out[n1];
-    x2_in[2] = lon_out[n2]; y2_in[2] = lat_out[n2];
-    x2_in[3] = lon_out[n3]; y2_in[3] = lat_out[n3];
-
-    lat_out_min_list[n] = minval_double(4, y2_in);
-    lat_out_max_list[n] = maxval_double(4, y2_in);
-    n2_in = fix_lon(x2_in, y2_in, 4, M_PI);
-    if(n2_in > MAX_V) error_handler("create_xgrid.c: n2_in is greater than MAX_V");
-    lon_out_min_list[n] = minval_double(n2_in, x2_in);
-    lon_out_max_list[n] = maxval_double(n2_in, x2_in);
-    lon_out_avg[n] = avgval_double(n2_in, x2_in);
-    n2_list[n] = n2_in;
-    for(l=0; l<n2_in; l++) {
-      lon_out_list[n*MAX_V+l] = x2_in[l];
-      lat_out_list[n*MAX_V+l] = y2_in[l];
-    }
-  }
-
-  nxgrid = 0;
-
-#if defined(_OPENMP)
-#pragma omp parallel for default(none) shared(nblocks,nx1,ny1,nx1p,mask_in,lon_in,lat_in, \
-                                              istart2,iend2,nx2,lat_out_min_list,lat_out_max_list, \
-                                              n2_list,lon_out_list,lat_out_list,lon_out_min_list, \
-                                              lon_out_max_list,lon_out_avg,area_in,area_out, \
-                                              pxgrid_area,pnxgrid,pxgrid_clon,pxgrid_clat,pi_in, \
-                                              pj_in,pi_out,pj_out,pstart,nthreads)
-#endif
-  for(m=0; m<nblocks; m++) {
-    int i1, j1, ij;
-    for(j1=0; j1<ny1; j1++) for(i1=0; i1<nx1; i1++) if( mask_in[j1*nx1+i1] > MASK_THRESH ) {
-          int n0, n1, n2, n3, l,n1_in;
-          double lat_in_min,lat_in_max,lon_in_min,lon_in_max,lon_in_avg;
-          double x1_in[MV], y1_in[MV], x_out[MV], y_out[MV];
-
-          n0 = j1*nx1p+i1;       n1 = j1*nx1p+i1+1;
-          n2 = (j1+1)*nx1p+i1+1; n3 = (j1+1)*nx1p+i1;
-          x1_in[0] = lon_in[n0]; y1_in[0] = lat_in[n0];
-          x1_in[1] = lon_in[n1]; y1_in[1] = lat_in[n1];
-          x1_in[2] = lon_in[n2]; y1_in[2] = lat_in[n2];
-          x1_in[3] = lon_in[n3]; y1_in[3] = lat_in[n3];
-          lat_in_min = minval_double(4, y1_in);
-          lat_in_max = maxval_double(4, y1_in);
-          n1_in = fix_lon(x1_in, y1_in, 4, M_PI);
-          lon_in_min = minval_double(n1_in, x1_in);
-          lon_in_max = maxval_double(n1_in, x1_in);
-          lon_in_avg = avgval_double(n1_in, x1_in);
-          for(ij=istart2[m]; ij<=iend2[m]; ij++) {
-            int n_out, i2, j2, n2_in;
-            double xarea, dx, lon_out_min, lon_out_max;
-            double x2_in[MAX_V], y2_in[MAX_V];
-
-            i2 = ij%nx2;
-            j2 = ij/nx2;
-
-            if(lat_out_min_list[ij] >= lat_in_max || lat_out_max_list[ij] <= lat_in_min ) continue;
-            /* adjust x2_in according to lon_in_avg*/
-            n2_in = n2_list[ij];
-            for(l=0; l<n2_in; l++) {
-              x2_in[l] = lon_out_list[ij*MAX_V+l];
-              y2_in[l] = lat_out_list[ij*MAX_V+l];
-            }
-            lon_out_min = lon_out_min_list[ij];
-            lon_out_max = lon_out_max_list[ij];
-            dx = lon_out_avg[ij] - lon_in_avg;
-            if(dx < -M_PI ) {
-              lon_out_min += TPI;
-              lon_out_max += TPI;
-              for (l=0; l<n2_in; l++) x2_in[l] += TPI;
-            }
-            else if (dx >  M_PI) {
-              lon_out_min -= TPI;
-              lon_out_max -= TPI;
-              for (l=0; l<n2_in; l++) x2_in[l] -= TPI;
-            }
-
-            /* x2_in should in the same range as x1_in after lon_fix, so no need to
-               consider cyclic condition
-            */
-            if(lon_out_min >= lon_in_max || lon_out_max <= lon_in_min ) continue;
-            if (  (n_out = clip_2dx2d( x1_in, y1_in, n1_in, x2_in, y2_in, n2_in, x_out, y_out )) > 0) {
-              double min_area;
-              int nn;
-              xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1];
-              min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]);
-              if( xarea/min_area > AREA_RATIO_THRESH ) {
-                pnxgrid[m]++;
-                if(pnxgrid[m]>= MAXXGRID/nthreads)
-                  error_handler("nxgrid is greater than MAXXGRID/nthreads, increase MAXXGRID, decrease nthreads, or increase number of MPI ranks");
-                nn = pstart[m] + pnxgrid[m]-1;
-                pxgrid_area[nn] = xarea;
-                pxgrid_clon[nn] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg);
-                pxgrid_clat[nn] = poly_ctrlat (x_out, y_out, n_out );
-                pi_in[nn]       = i1;
-                pj_in[nn]       = j1;
-                pi_out[nn]      = i2;
-                pj_out[nn]      = j2;
-              }
-            }
-          }
-        }
-  }
-
-  /*copy data if nblocks > 1 */
-  if(nblocks == 1) {
-    nxgrid = pnxgrid[0];
-    pi_in = NULL;
-    pj_in = NULL;
-    pi_out = NULL;
-    pj_out = NULL;
-    pxgrid_area = NULL;
-    pxgrid_clon = NULL;
-    pxgrid_clat = NULL;
-  }
-  else {
-    int nn, i;
-    nxgrid = 0;
-    for(m=0; m<nblocks; m++) {
-      for(i=0; i<pnxgrid[m]; i++) {
-        nn = pstart[m] + i;
-        i_in[nxgrid] = pi_in[nn];
-        j_in[nxgrid] = pj_in[nn];
-        i_out[nxgrid] = pi_out[nn];
-        j_out[nxgrid] = pj_out[nn];
-        xgrid_area[nxgrid] = pxgrid_area[nn];
-        xgrid_clon[nxgrid] = pxgrid_clon[nn];
-        xgrid_clat[nxgrid] = pxgrid_clat[nn];
-        nxgrid++;
-      }
-    }
-    free(pi_in);
-    free(pj_in);
-    free(pi_out);
-    free(pj_out);
-    free(pxgrid_area);
-    free(pxgrid_clon);
-    free(pxgrid_clat);
-  }
-
-  free(area_in);
-  free(area_out);
-  free(lon_out_min_list);
-  free(lon_out_max_list);
-  free(lat_out_min_list);
-  free(lat_out_max_list);
-  free(lon_out_avg);
-  free(n2_list);
-  free(lon_out_list);
-  free(lat_out_list);
-
-  return nxgrid;
-
-}/* get_xgrid_2Dx2D_order2 */
-
-
-/*******************************************************************************
-   Sutherland-Hodgeman algorithm sequentially clips parts outside 4 boundaries
-*******************************************************************************/
-
-int clip(const double lon_in[], const double lat_in[], int n_in, double ll_lon, double ll_lat,
-         double ur_lon, double ur_lat, double lon_out[], double lat_out[])
-{
-  double x_tmp[MV], y_tmp[MV], x_last, y_last;
-  int i_in, i_out, n_out, inside_last, inside;
-
-  /* clip polygon with LEFT boundary - clip V_IN to V_TMP */
-  x_last = lon_in[n_in-1];
-  y_last = lat_in[n_in-1];
-  inside_last = (x_last >= ll_lon);
-  for (i_in=0,i_out=0;i_in<n_in;i_in++) {
-
-    /* if crossing LEFT boundary - output intersection */
-    if ((inside=(lon_in[i_in] >= ll_lon))!=inside_last) {
-      x_tmp[i_out] = ll_lon;
-      y_tmp[i_out++] = y_last + (ll_lon - x_last) * (lat_in[i_in] - y_last) / (lon_in[i_in] - x_last);
-    }
-
-    /* if "to" point is right of LEFT boundary, output it */
-    if (inside) {
-      x_tmp[i_out]   = lon_in[i_in];
-      y_tmp[i_out++] = lat_in[i_in];
-    }
-    x_last = lon_in[i_in];
-    y_last = lat_in[i_in];
-    inside_last = inside;
-  }
-  if (!(n_out=i_out)) return(0);
-
-  /* clip polygon with RIGHT boundary - clip V_TMP to V_OUT */
-  x_last = x_tmp[n_out-1];
-  y_last = y_tmp[n_out-1];
-  inside_last = (x_last <= ur_lon);
-  for (i_in=0,i_out=0;i_in<n_out;i_in++) {
-
-    /* if crossing RIGHT boundary - output intersection */
-    if ((inside=(x_tmp[i_in] <= ur_lon))!=inside_last) {
-      lon_out[i_out]   = ur_lon;
-      lat_out[i_out++] = y_last + (ur_lon - x_last) * (y_tmp[i_in] - y_last)
-          / (x_tmp[i_in] - x_last);
-    }
-
-    /* if "to" point is left of RIGHT boundary, output it */
-    if (inside) {
-      lon_out[i_out]   = x_tmp[i_in];
-      lat_out[i_out++] = y_tmp[i_in];
-    }
-
-    x_last = x_tmp[i_in];
-    y_last = y_tmp[i_in];
-    inside_last = inside;
-  }
-  if (!(n_out=i_out)) return(0);
-
-  /* clip polygon with BOTTOM boundary - clip V_OUT to V_TMP */
-  x_last = lon_out[n_out-1];
-  y_last = lat_out[n_out-1];
-  inside_last = (y_last >= ll_lat);
-  for (i_in=0,i_out=0;i_in<n_out;i_in++) {
-
-    /* if crossing BOTTOM boundary - output intersection */
-    if ((inside=(lat_out[i_in] >= ll_lat))!=inside_last) {
-      y_tmp[i_out]   = ll_lat;
-      x_tmp[i_out++] = x_last + (ll_lat - y_last) * (lon_out[i_in] - x_last) / (lat_out[i_in] - y_last);
-    }
-
-    /* if "to" point is above BOTTOM boundary, output it */
-    if (inside) {
-      x_tmp[i_out]   = lon_out[i_in];
-      y_tmp[i_out++] = lat_out[i_in];
-    }
-    x_last = lon_out[i_in];
-    y_last = lat_out[i_in];
-    inside_last = inside;
-  }
-  if (!(n_out=i_out)) return(0);
-
-  /* clip polygon with TOP boundary - clip V_TMP to V_OUT */
-  x_last = x_tmp[n_out-1];
-  y_last = y_tmp[n_out-1];
-  inside_last = (y_last <= ur_lat);
-  for (i_in=0,i_out=0;i_in<n_out;i_in++) {
-
-    /* if crossing TOP boundary - output intersection */
-    if ((inside=(y_tmp[i_in] <= ur_lat))!=inside_last) {
-      lat_out[i_out]   = ur_lat;
-      lon_out[i_out++] = x_last + (ur_lat - y_last) * (x_tmp[i_in] - x_last) / (y_tmp[i_in] - y_last);
-    }
-
-    /* if "to" point is below TOP boundary, output it */
-    if (inside) {
-      lon_out[i_out]   = x_tmp[i_in];
-      lat_out[i_out++] = y_tmp[i_in];
-    }
-    x_last = x_tmp[i_in];
-    y_last = y_tmp[i_in];
-    inside_last = inside;
-  }
-  return(i_out);
-} /* clip */
-
-
-/*******************************************************************************
-   Revise Sutherland-Hodgeman algorithm to find the vertices of the overlapping
-   between any two grid boxes. It return the number of vertices for the exchange grid.
-*******************************************************************************/
-
-int clip_2dx2d(const double lon1_in[], const double lat1_in[], int n1_in,
-               const double lon2_in[], const double lat2_in[], int n2_in,
-               double lon_out[], double lat_out[])
-{
-  double lon_tmp[MV], lat_tmp[MV];
-  double x1_0, y1_0, x1_1, y1_1, x2_0, y2_0, x2_1, y2_1;
-  double dx1, dy1, dx2, dy2, determ, ds1, ds2;
-  int i_out, n_out, inside_last, inside, i1, i2;
-
-  /* clip polygon with each boundary of the polygon */
-  /* We treat lon1_in/lat1_in as clip polygon and lon2_in/lat2_in as subject polygon */
-  n_out = n1_in;
-  for(i1=0; i1<n1_in; i1++) {
-    lon_tmp[i1] = lon1_in[i1];
-    lat_tmp[i1] = lat1_in[i1];
-  }
-  x2_0 = lon2_in[n2_in-1];
-  y2_0 = lat2_in[n2_in-1];
-  for(i2=0; i2<n2_in; i2++) {
-    x2_1 = lon2_in[i2];
-    y2_1 = lat2_in[i2];
-    x1_0 = lon_tmp[n_out-1];
-    y1_0 = lat_tmp[n_out-1];
-    inside_last = inside_edge( x2_0, y2_0, x2_1, y2_1, x1_0, y1_0);
-    for(i1=0, i_out=0; i1<n_out; i1++) {
-      x1_1 = lon_tmp[i1];
-      y1_1 = lat_tmp[i1];
-      if((inside = inside_edge(x2_0, y2_0, x2_1, y2_1, x1_1, y1_1)) != inside_last ) {
-        /* there is intersection, the line between <x1_0,y1_0> and  <x1_1,y1_1>
-           should not parallel to the line between <x2_0,y2_0> and  <x2_1,y2_1>
-           may need to consider truncation error */
-        dy1 = y1_1-y1_0;
-        dy2 = y2_1-y2_0;
-        dx1 = x1_1-x1_0;
-        dx2 = x2_1-x2_0;
-        ds1 = y1_0*x1_1 - y1_1*x1_0;
-        ds2 = y2_0*x2_1 - y2_1*x2_0;
-        determ = dy2*dx1 - dy1*dx2;
-        if(fabs(determ) < EPSLN30) {
-          error_handler("the line between <x1_0,y1_0> and  <x1_1,y1_1> should not parallel to "
-                        "the line between <x2_0,y2_0> and  <x2_1,y2_1>");
-        }
-        lon_out[i_out]   = (dx2*ds1 - dx1*ds2)/determ;
-        lat_out[i_out++] = (dy2*ds1 - dy1*ds2)/determ;
-
-
-      }
-      if(inside) {
-        lon_out[i_out]   = x1_1;
-        lat_out[i_out++] = y1_1;
-      }
-      x1_0 = x1_1;
-      y1_0 = y1_1;
-      inside_last = inside;
-    }
-    if(!(n_out=i_out)) return 0;
-    for(i1=0; i1<n_out; i1++) {
-      lon_tmp[i1] = lon_out[i1];
-      lat_tmp[i1] = lat_out[i1];
-    }
-    /* shift the starting point */
-    x2_0 = x2_1;
-    y2_0 = y2_1;
-  }
-  return(n_out);
-} /* clip */
-
-/*#define debug_test_create_xgrid*/
-
-int create_xgrid_great_circle_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                               double *xgrid_area, double *xgrid_clon, double *xgrid_clat)
-{
-  int nxgrid;
-  nxgrid = create_xgrid_great_circle(nlon_in, nlat_in, nlon_out, nlat_out, lon_in, lat_in, lon_out, lat_out,
-                                     mask_in, i_in, j_in, i_out, j_out, xgrid_area, xgrid_clon, xgrid_clat);
-
-  return nxgrid;
-}
-
-int create_xgrid_great_circle(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                              const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                              double *xgrid_area, double *xgrid_clon, double *xgrid_clat)
-{
-
-  int nx1, nx2, ny1, ny2, nx1p, nx2p, ny1p, ny2p, nxgrid, n1_in, n2_in;
-  int n0, n1, n2, n3, i1, j1, i2, j2;
-  double x1_in[MV], y1_in[MV], z1_in[MV];
-  double x2_in[MV], y2_in[MV], z2_in[MV];
-  double x_out[MV], y_out[MV], z_out[MV];
-  double *x1=NULL, *y1=NULL, *z1=NULL;
-  double *x2=NULL, *y2=NULL, *z2=NULL;
-
-  double *area1, *area2, min_area;
-
-  nx1 = *nlon_in;
-  ny1 = *nlat_in;
-  nx2 = *nlon_out;
-  ny2 = *nlat_out;
-  nxgrid = 0;
-  nx1p = nx1 + 1;
-  nx2p = nx2 + 1;
-  ny1p = ny1 + 1;
-  ny2p = ny2 + 1;
-
-  /* first convert lon-lat to cartesian coordinates */
-  x1 = (double *)malloc(nx1p*ny1p*sizeof(double));
-  y1 = (double *)malloc(nx1p*ny1p*sizeof(double));
-  z1 = (double *)malloc(nx1p*ny1p*sizeof(double));
-  x2 = (double *)malloc(nx2p*ny2p*sizeof(double));
-  y2 = (double *)malloc(nx2p*ny2p*sizeof(double));
-  z2 = (double *)malloc(nx2p*ny2p*sizeof(double));
-
-  latlon2xyz(nx1p*ny1p, lon_in, lat_in, x1, y1, z1);
-  latlon2xyz(nx2p*ny2p, lon_out, lat_out, x2, y2, z2);
-
-  area1  = (double *)malloc(nx1*ny1*sizeof(double));
-  area2 = (double *)malloc(nx2*ny2*sizeof(double));
-  get_grid_great_circle_area(nlon_in, nlat_in, lon_in, lat_in, area1);
-  get_grid_great_circle_area(nlon_out, nlat_out, lon_out, lat_out, area2);
-  n1_in = 4;
-  n2_in = 4;
-
-  for(j1=0; j1<ny1; j1++) for(i1=0; i1<nx1; i1++) if( mask_in[j1*nx1+i1] > MASK_THRESH ) {
-        /* clockwise */
-        n0 = j1*nx1p+i1;       n1 = (j1+1)*nx1p+i1;
-        n2 = (j1+1)*nx1p+i1+1; n3 = j1*nx1p+i1+1;
-        x1_in[0] = x1[n0]; y1_in[0] = y1[n0]; z1_in[0] = z1[n0];
-        x1_in[1] = x1[n1]; y1_in[1] = y1[n1]; z1_in[1] = z1[n1];
-        x1_in[2] = x1[n2]; y1_in[2] = y1[n2]; z1_in[2] = z1[n2];
-        x1_in[3] = x1[n3]; y1_in[3] = y1[n3]; z1_in[3] = z1[n3];
-
-        for(j2=0; j2<ny2; j2++) for(i2=0; i2<nx2; i2++) {
-            int n_out;
-            double xarea;
-
-            n0 = j2*nx2p+i2;       n1 = (j2+1)*nx2p+i2;
-            n2 = (j2+1)*nx2p+i2+1; n3 = j2*nx2p+i2+1;
-            x2_in[0] = x2[n0]; y2_in[0] = y2[n0]; z2_in[0] = z2[n0];
-            x2_in[1] = x2[n1]; y2_in[1] = y2[n1]; z2_in[1] = z2[n1];
-            x2_in[2] = x2[n2]; y2_in[2] = y2[n2]; z2_in[2] = z2[n2];
-            x2_in[3] = x2[n3]; y2_in[3] = y2[n3]; z2_in[3] = z2[n3];
-
-            if (  (n_out = clip_2dx2d_great_circle( x1_in, y1_in, z1_in, n1_in, x2_in, y2_in, z2_in, n2_in,
-                                                    x_out, y_out, z_out)) > 0) {
-              xarea = great_circle_area ( n_out, x_out, y_out, z_out ) * mask_in[j1*nx1+i1];
-              min_area = min(area1[j1*nx1+i1], area2[j2*nx2+i2]);
-              if( xarea/min_area > AREA_RATIO_THRESH ) {
-#ifdef debug_test_create_xgrid
-                printf("(i2,j2)=(%d,%d), (i1,j1)=(%d,%d), xarea=%g\n", i2, j2, i1, j1, xarea);
-#endif
-                xgrid_area[nxgrid] = xarea;
-                xgrid_clon[nxgrid] = 0; /*z1l: will be developed very soon */
-                xgrid_clat[nxgrid] = 0;
-                i_in[nxgrid]       = i1;
-                j_in[nxgrid]       = j1;
-                i_out[nxgrid]      = i2;
-                j_out[nxgrid]      = j2;
-                ++nxgrid;
-                if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID");
-              }
-            }
-          }
-      }
-
-
-  free(area1);
-  free(area2);
-
-  free(x1);
-  free(y1);
-  free(z1);
-  free(x2);
-  free(y2);
-  free(z2);
-
-  return nxgrid;
-
-}/* create_xgrid_great_circle */
-
-int create_xgrid_great_circle_ug_(const int *nlon_in, const int *nlat_in, const int *npts_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *l_out,
-                               double *xgrid_area, double *xgrid_clon, double *xgrid_clat)
-{
-  int nxgrid;
-  nxgrid = create_xgrid_great_circle_ug(nlon_in, nlat_in, npts_out, lon_in, lat_in, lon_out, lat_out,
-                                     mask_in, i_in, j_in, l_out, xgrid_area, xgrid_clon, xgrid_clat);
-
-  return nxgrid;
-}
-
-int create_xgrid_great_circle_ug(const int *nlon_in, const int *nlat_in, const int *npts_out,
-                              const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *l_out,
-                              double *xgrid_area, double *xgrid_clon, double *xgrid_clat)
-{
-
-  int nx1, ny1, npts2, nx1p, ny1p, nxgrid, n1_in, n2_in, nv;
-  int n0, n1, n2, n3, i1, j1, l2;
-  double x1_in[MV], y1_in[MV], z1_in[MV];
-  double x2_in[MV], y2_in[MV], z2_in[MV];
-  double x_out[MV], y_out[MV], z_out[MV];
-  double *x1=NULL, *y1=NULL, *z1=NULL;
-  double *x2=NULL, *y2=NULL, *z2=NULL;
-
-  double *area1, *area2, min_area;
-
-  nx1 = *nlon_in;
-  ny1 = *nlat_in;
-  nv = 4;
-  npts2 = *npts_out;
-  nxgrid = 0;
-  nx1p = nx1 + 1;
-  ny1p = ny1 + 1;
-
-  /* first convert lon-lat to cartesian coordinates */
-  x1 = (double *)malloc(nx1p*ny1p*sizeof(double));
-  y1 = (double *)malloc(nx1p*ny1p*sizeof(double));
-  z1 = (double *)malloc(nx1p*ny1p*sizeof(double));
-  x2 = (double *)malloc(npts2*nv*sizeof(double));
-  y2 = (double *)malloc(npts2*nv*sizeof(double));
-  z2 = (double *)malloc(npts2*nv*sizeof(double));
-
-  latlon2xyz(nx1p*ny1p, lon_in, lat_in, x1, y1, z1);
-  latlon2xyz(npts2*nv, lon_out, lat_out, x2, y2, z2);
-
-  area1  = (double *)malloc(nx1*ny1*sizeof(double));
-  area2 = (double *)malloc(npts2*sizeof(double));
-  get_grid_great_circle_area(nlon_in, nlat_in, lon_in, lat_in, area1);
-  get_grid_great_circle_area_ug(npts_out, lon_out, lat_out, area2);
-  n1_in = 4;
-  n2_in = 4;
-
-  for(j1=0; j1<ny1; j1++) for(i1=0; i1<nx1; i1++) if( mask_in[j1*nx1+i1] > MASK_THRESH ) {
-        /* clockwise */
-        n0 = j1*nx1p+i1;       n1 = (j1+1)*nx1p+i1;
-        n2 = (j1+1)*nx1p+i1+1; n3 = j1*nx1p+i1+1;
-        x1_in[0] = x1[n0]; y1_in[0] = y1[n0]; z1_in[0] = z1[n0];
-        x1_in[1] = x1[n1]; y1_in[1] = y1[n1]; z1_in[1] = z1[n1];
-        x1_in[2] = x1[n2]; y1_in[2] = y1[n2]; z1_in[2] = z1[n2];
-        x1_in[3] = x1[n3]; y1_in[3] = y1[n3]; z1_in[3] = z1[n3];
-
-        for(l2=0; l2<npts2; l2++) {
-            int n_out;
-            double xarea;
-
-            n0 = l2*nv;   n1 = l2*nv+1;
-            n2 = l2*nv+2; n3 = l2*nv+3;
-            x2_in[0] = x2[n0]; y2_in[0] = y2[n0]; z2_in[0] = z2[n0];
-            x2_in[1] = x2[n1]; y2_in[1] = y2[n1]; z2_in[1] = z2[n1];
-            x2_in[2] = x2[n2]; y2_in[2] = y2[n2]; z2_in[2] = z2[n2];
-            x2_in[3] = x2[n3]; y2_in[3] = y2[n3]; z2_in[3] = z2[n3];
-
-            if (  (n_out = clip_2dx2d_great_circle( x1_in, y1_in, z1_in, n1_in, x2_in, y2_in, z2_in, n2_in,
-                                                    x_out, y_out, z_out)) > 0) {
-              xarea = great_circle_area ( n_out, x_out, y_out, z_out ) * mask_in[j1*nx1+i1];
-              min_area = min(area1[j1*nx1+i1], area2[l2]);
-              if( xarea/min_area > AREA_RATIO_THRESH ) {
-#ifdef debug_test_create_xgrid
-                printf("(l2)=(%d,%d), (i1,j1)=(%d,%d), xarea=%g\n", l2, i1, j1, xarea);
-#endif
-                xgrid_area[nxgrid] = xarea;
-                xgrid_clon[nxgrid] = 0; /*z1l: will be developed very soon */
-                xgrid_clat[nxgrid] = 0;
-                i_in[nxgrid]       = i1;
-                j_in[nxgrid]       = j1;
-                l_out[nxgrid]      = l2;
-                ++nxgrid;
-                if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID");
-              }
-            }
-          }
-      }
-
-
-  free(area1);
-  free(area2);
-
-  free(x1);
-  free(y1);
-  free(z1);
-  free(x2);
-  free(y2);
-  free(z2);
-
-  return nxgrid;
-
-}/* create_xgrid_great_circle_ug */
-
-
-/*******************************************************************************
-   Revise Sutherland-Hodgeman algorithm to find the vertices of the overlapping
-   between any two grid boxes. It return the number of vertices for the exchange grid.
-   Each edge of grid box is a part of great circle. All the points are cartesian
-   coordinates. Here we are assuming each polygon is convex.
-   RANGE_CHECK_CRITERIA is used to determine if the two grid boxes are possible to be
-   overlap. The size should be between 0 and 0.5. The larger the range_check_criteria,
-   the more expensive of the computatioin. When the value is close to 0,
-   some small exchange grid might be lost. Suggest to use value 0.05 for C48.
-*******************************************************************************/
-
-int clip_2dx2d_great_circle(const double x1_in[], const double y1_in[], const double z1_in[], int n1_in,
-                            const double x2_in[], const double y2_in[], const double z2_in [], int n2_in,
-                            double x_out[], double y_out[], double z_out[])
-{
-  struct Node *grid1List=NULL;
-  struct Node *grid2List=NULL;
-  struct Node *intersectList=NULL;
-  struct Node *polyList=NULL;
-  struct Node *curList=NULL;
-  struct Node *firstIntersect=NULL, *curIntersect=NULL;
-  struct Node *temp1=NULL, *temp2=NULL, *temp=NULL;
-
-  int    i1, i2, i1p, i2p, i2p2, npts1, npts2;
-  int    nintersect, n_out;
-  int    maxiter1, maxiter2, iter1, iter2;
-  int    found1, found2, curListNum;
-  int    has_inbound, inbound;
-  double pt1[MV][3], pt2[MV][3];
-  double *p1_0=NULL, *p1_1=NULL;
-  double *p2_0=NULL, *p2_1=NULL, *p2_2=NULL;
-  double intersect[3];
-  double u1, u2;
-  double min_x1, max_x1, min_y1, max_y1, min_z1, max_z1;
-  double min_x2, max_x2, min_y2, max_y2, min_z2, max_z2;
-
-
-  /* first check the min and max of (x1_in, y1_in, z1_in) with (x2_in, y2_in, z2_in) */
-  min_x1 = minval_double(n1_in, x1_in);
-  max_x2 = maxval_double(n2_in, x2_in);
-  if(min_x1 >= max_x2+RANGE_CHECK_CRITERIA) return 0;
-  max_x1 = maxval_double(n1_in, x1_in);
-  min_x2 = minval_double(n2_in, x2_in);
-  if(min_x2 >= max_x1+RANGE_CHECK_CRITERIA) return 0;
-
-  min_y1 = minval_double(n1_in, y1_in);
-  max_y2 = maxval_double(n2_in, y2_in);
-  if(min_y1 >= max_y2+RANGE_CHECK_CRITERIA) return 0;
-  max_y1 = maxval_double(n1_in, y1_in);
-  min_y2 = minval_double(n2_in, y2_in);
-  if(min_y2 >= max_y1+RANGE_CHECK_CRITERIA) return 0;
-
-  min_z1 = minval_double(n1_in, z1_in);
-  max_z2 = maxval_double(n2_in, z2_in);
-  if(min_z1 >= max_z2+RANGE_CHECK_CRITERIA) return 0;
-  max_z1 = maxval_double(n1_in, z1_in);
-  min_z2 = minval_double(n2_in, z2_in);
-  if(min_z2 >= max_z1+RANGE_CHECK_CRITERIA) return 0;
-
-  rewindList();
-
-  grid1List = getNext();
-  grid2List = getNext();
-  intersectList = getNext();
-  polyList = getNext();
-
-  /* insert points into SubjList and ClipList */
-  for(i1=0; i1<n1_in; i1++) addEnd(grid1List, x1_in[i1], y1_in[i1], z1_in[i1], 0, 0, 0, -1);
-  for(i2=0; i2<n2_in; i2++) addEnd(grid2List, x2_in[i2], y2_in[i2], z2_in[i2], 0, 0, 0, -1);
-  npts1 = length(grid1List);
-  npts2 = length(grid2List);
-
-  n_out = 0;
-  /* set the inside value */
-#ifdef debug_test_create_xgrid
-  printf("\nNOTE from clip_2dx2d_great_circle: begin to set inside value grid1List\n");
-#endif
-  /* first check number of points in grid1 is inside grid2 */
-
-  temp = grid1List;
-  while(temp) {
-    if(insidePolygon(temp, grid2List))
-      temp->isInside = 1;
-    else
-      temp->isInside = 0;
-    temp = getNextNode(temp);
-  }
-
-#ifdef debug_test_create_xgrid
-  printf("\nNOTE from clip_2dx2d_great_circle: begin to set inside value of grid2List\n");
-#endif
-  /* check if grid2List is inside grid1List */
-  temp = grid2List;
-
-  while(temp) {
-    if(insidePolygon(temp, grid1List))
-      temp->isInside = 1;
-    else
-      temp->isInside = 0;
-    temp = getNextNode(temp);
-  }
-
-  /* make sure the grid box is clockwise */
-
-  /*make sure each polygon is convex, which is equivalent that the great_circle_area is positive */
-  if( gridArea(grid1List) <= 0 )
-    error_handler("create_xgrid.c(clip_2dx2d_great_circle): grid box 1 is not convex");
-  if( gridArea(grid2List) <= 0 )
-    error_handler("create_xgrid.c(clip_2dx2d_great_circle): grid box 2 is not convex");
-
-#ifdef debug_test_create_xgrid
-  printNode(grid1List, "grid1List");
-  printNode(grid2List, "grid2List");
-#endif
-
-  /* get the coordinates from grid1List and grid2List.
-     Please not npts1 might not equal n1_in, npts2 might not equal n2_in because of pole
-  */
-
-  temp = grid1List;
-  for(i1=0; i1<npts1; i1++) {
-    getCoordinates(temp, pt1[i1]);
-    temp = temp->Next;
-  }
-  temp = grid2List;
-  for(i2=0; i2<npts2; i2++) {
-    getCoordinates(temp, pt2[i2]);
-    temp = temp->Next;
-  }
-
-  firstIntersect=getNext();
-  curIntersect = getNext();
-
-#ifdef debug_test_create_xgrid
-  printf("\n\n************************ Start line_intersect_2D_3D ******************************\n");
-#endif
-  /* first find all the intersection points */
-  nintersect = 0;
-  for(i1=0; i1<npts1; i1++) {
-    i1p = (i1+1)%npts1;
-    p1_0 = pt1[i1];
-    p1_1 = pt1[i1p];
-    for(i2=0; i2<npts2; i2++) {
-      i2p = (i2+1)%npts2;
-      i2p2 = (i2+2)%npts2;
-      p2_0 = pt2[i2];
-      p2_1 = pt2[i2p];
-      p2_2 = pt2[i2p2];
-#ifdef debug_test_create_xgrid
-      printf("\n******************************************************************************\n");
-      printf(" i1 = %d, i2 = %d \n", i1, i2);
-      printf("********************************************************************************\n");
-#endif
-      if( line_intersect_2D_3D(p1_0, p1_1, p2_0, p2_1, p2_2, intersect, &u1, &u2, &inbound) ) {
-        /* from the value of u1, u2 and inbound, we can partially decide if a point is inside or outside of polygon */
-
-        /* add the intersection into intersetList, The intersection might already be in
-           intersectList and will be taken care addIntersect
-        */
-        if(addIntersect(intersectList, intersect[0], intersect[1], intersect[2], 1, u1, u2, inbound, i1, i1p, i2, i2p)) {
-          /* add the intersection into the grid1List */
-
-          if(u1 == 1) {
-            insertIntersect(grid1List, intersect[0], intersect[1], intersect[2], 0.0, u2, inbound, p1_1[0], p1_1[1], p1_1[2]);
-          }
-          else
-            insertIntersect(grid1List, intersect[0], intersect[1], intersect[2], u1, u2, inbound, p1_0[0], p1_0[1], p1_0[2]);
-          /* when u1 == 0 or 1, need to adjust the vertice to intersect value for roundoff error */
-          if(u1==1) {
-            p1_1[0] = intersect[0];
-            p1_1[1] = intersect[1];
-            p1_1[2] = intersect[2];
-          }
-          else if(u1 == 0) {
-            p1_0[0] = intersect[0];
-            p1_0[1] = intersect[1];
-            p1_0[2] = intersect[2];
-          }
-          /* add the intersection into the grid2List */
-          if(u2==1)
-            insertIntersect(grid2List, intersect[0], intersect[1], intersect[2], 0.0, u1, 0, p2_1[0], p2_1[1], p2_1[2]);
-          else
-            insertIntersect(grid2List, intersect[0], intersect[1], intersect[2], u2, u1, 0, p2_0[0], p2_0[1], p2_0[2]);
-          /* when u2 == 0 or 1, need to adjust the vertice to intersect value for roundoff error */
-          if(u2==1) {
-            p2_1[0] = intersect[0];
-            p2_1[1] = intersect[1];
-            p2_1[2] = intersect[2];
-          }
-          else if(u2 == 0) {
-            p2_0[0] = intersect[0];
-            p2_0[1] = intersect[1];
-            p2_0[2] = intersect[2];
-          }
-        }
-      }
-    }
-  }
-
-  /* set inbound value for the points in intersectList that has inbound == 0,
-     this will also set some inbound value of the points in grid1List
-  */
-
-  /* get the first point in intersectList has inbound = 2, if not, set inbound value */
-  has_inbound = 0;
-  /* loop through intersectList to see if there is any has inbound=1 or 2 */
-  temp = intersectList;
-  nintersect = length(intersectList);
-  if(nintersect > 1) {
-    getFirstInbound(intersectList, firstIntersect);
-    if(firstIntersect->initialized) {
-      has_inbound = 1;
-    }
-  }
-
-  /* when has_inbound == 0, get the grid1List and grid2List */
-  if( !has_inbound && nintersect > 1) {
-    setInbound(intersectList, grid1List);
-    getFirstInbound(intersectList, firstIntersect);
-    if(firstIntersect->initialized) has_inbound = 1;
-  }
-
-  /* if has_inbound = 1, find the overlapping */
-  n_out = 0;
-
-  if(has_inbound) {
-    maxiter1 = nintersect;
-#ifdef debug_test_create_xgrid
-    printf("\nNOTE from clip_2dx2d_great_circle: number of intersect is %d\n", nintersect);
-    printf("\n size of grid2List is %d, size of grid1List is %d\n", length(grid2List), length(grid1List));
-    printNode(intersectList, "beginning intersection list");
-    printNode(grid2List, "beginning clip list");
-    printNode(grid1List, "beginning subj list");
-    printf("\n************************ End line_intersect_2D_3D **********************************\n\n");
-#endif
-    temp1 = getNode(grid1List, *firstIntersect);
-    if( temp1 == NULL) {
-      double lon[10], lat[10];
-      int i;
-      xyz2latlon(n1_in, x1_in, y1_in, z1_in, lon, lat);
-      for(i=0; i< n1_in; i++) printf("lon1 = %g, lat1 = %g\n", lon[i]*R2D, lat[i]*R2D);
-      printf("\n");
-      xyz2latlon(n2_in, x2_in, y2_in, z2_in, lon, lat);
-      for(i=0; i< n2_in; i++) printf("lon2 = %g, lat2 = %g\n", lon[i]*R2D, lat[i]*R2D);
-      printf("\n");
-
-      error_handler("firstIntersect is not in the grid1List");
-    }
-    addNode(polyList, *firstIntersect);
-    nintersect--;
-#ifdef debug_test_create_xgrid
-    printNode(polyList, "polyList at stage 1");
-#endif
-
-    /* Loop over the grid1List and grid2List to find again the firstIntersect */
-    curList = grid1List;
-    curListNum = 0;
-
-    /* Loop through curList to find the next intersection, the loop will end
-       when come back to firstIntersect
-    */
-    copyNode(curIntersect, *firstIntersect);
-    iter1 = 0;
-    found1 = 0;
-
-    while( iter1 < maxiter1 ) {
-#ifdef debug_test_create_xgrid
-      printf("\n----------- At iteration = %d\n\n", iter1+1 );
-      printNode(curIntersect, "curIntersect at the begining of iter1");
-#endif
-      /* find the curIntersect in curList and get the next intersection points */
-      temp1 =  getNode(curList, *curIntersect);
-      temp2 = temp1->Next;
-      if( temp2 == NULL ) temp2 = curList;
-
-      maxiter2 = length(curList);
-      found2 = 0;
-      iter2  = 0;
-      /* Loop until find the next intersection */
-      while( iter2 < maxiter2 ) {
-        int temp2IsIntersect;
-
-        temp2IsIntersect = 0;
-        if( isIntersect( *temp2 ) ) { /* copy the point and switch to the grid2List */
-          struct Node *temp3;
-
-          /* first check if temp2 is the firstIntersect */
-          if( sameNode( *temp2, *firstIntersect) ) {
-            found1 = 1;
-            break;
-          }
-
-          temp3 = temp2->Next;
-          if( temp3 == NULL) temp3 = curList;
-          if( temp3 == NULL) error_handler("creat_xgrid.c: temp3 can not be NULL");
-          found2 = 1;
-          /* if next node is inside or an intersection,
-             need to keep on curList
-          */
-          temp2IsIntersect = 1;
-          if( isIntersect(*temp3) || (temp3->isInside == 1)  ) found2 = 0;
-        }
-        if(found2) {
-          copyNode(curIntersect, *temp2);
-          break;
-        }
-        else {
-          addNode(polyList, *temp2);
-#ifdef debug_test_create_xgrid
-          printNode(polyList, "polyList at stage 2");
-#endif
-          if(temp2IsIntersect) {
-            nintersect--;
-          }
-        }
-        temp2 = temp2->Next;
-        if( temp2 == NULL ) temp2 = curList;
-        iter2 ++;
-      }
-      if(found1) break;
-
-      if( !found2 ) error_handler(" not found the next intersection ");
-
-      /* if find the first intersection, the poly found */
-      if( sameNode( *curIntersect, *firstIntersect) ) {
-        found1 = 1;
-        break;
-      }
-
-      /* add curIntersect to polyList and remove it from intersectList and curList */
-      addNode(polyList, *curIntersect);
-#ifdef debug_test_create_xgrid
-      printNode(polyList, "polyList at stage 3");
-#endif
-      nintersect--;
-
-
-      /* switch curList */
-      if( curListNum == 0) {
-        curList = grid2List;
-        curListNum = 1;
-      }
-      else {
-        curList = grid1List;
-        curListNum = 0;
-      }
-      iter1++;
-    }
-    if(!found1) error_handler("not return back to the first intersection");
-
-    /* currently we are only clipping convex polygon to convex polygon */
-    if( nintersect > 0) error_handler("After clipping, nintersect should be 0");
-
-    /* copy the polygon to x_out, y_out, z_out */
-    temp1 = polyList;
-    while (temp1 != NULL) {
-      getCoordinate(*temp1, x_out+n_out, y_out+n_out, z_out+n_out);
-      temp1 = temp1->Next;
-      n_out++;
-    }
-
-    /* if(n_out < 3) error_handler(" The clipped region has < 3 vertices"); */
-    if( n_out < 3) n_out = 0;
-#ifdef debug_test_create_xgrid
-    printNode(polyList, "polyList after clipping");
-#endif
-  }
-
-  /* check if grid1 is inside grid2 */
-  if(n_out==0){
-    /* first check number of points in grid1 is inside grid2 */
-    int n, n1in2;
-    /* One possible is that grid1List is inside grid2List */
-#ifdef debug_test_create_xgrid
-    printf("\nNOTE from clip_2dx2d_great_circle: check if grid1 is inside grid2\n");
-#endif
-    n1in2 = 0;
-    temp = grid1List;
-    while(temp) {
-      if(temp->intersect != 1) {
-#ifdef debug_test_create_xgrid
-        printf("grid1->isInside = %d\n", temp->isInside);
-#endif
-        if( temp->isInside == 1) n1in2++;
-      }
-      temp = getNextNode(temp);
-    }
-    if(npts1==n1in2) { /* grid1 is inside grid2 */
-      n_out = npts1;
-      n = 0;
-      temp = grid1List;
-      while( temp ) {
-        getCoordinate(*temp, &x_out[n], &y_out[n], &z_out[n]);
-        n++;
-        temp = getNextNode(temp);
-      }
-    }
-    if(n_out>0) return n_out;
-  }
-
-  /* check if grid2List is inside grid1List */
-  if(n_out ==0){
-    int n, n2in1;
-#ifdef debug_test_create_xgrid
-    printf("\nNOTE from clip_2dx2d_great_circle: check if grid2 is inside grid1\n");
-#endif
-
-    temp = grid2List;
-    n2in1 = 0;
-    while(temp) {
-      if(temp->intersect != 1) {
-#ifdef debug_test_create_xgrid
-        printf("grid2->isInside = %d\n", temp->isInside);
-#endif
-        if( temp->isInside == 1) n2in1++;
-      }
-      temp = getNextNode(temp);
-    }
-
-    if(npts2==n2in1) { /* grid2 is inside grid1 */
-      n_out = npts2;
-      n = 0;
-      temp = grid2List;
-      while( temp ) {
-        getCoordinate(*temp, &x_out[n], &y_out[n], &z_out[n]);
-        n++;
-        temp = getNextNode(temp);
-      }
-
-    }
-  }
-
-
-  return n_out;
-}
-
-
-/* Intersects between the line a and the seqment s
-   where both line and segment are great circle lines on the sphere represented by
-   3D cartesian points.
-   [sin sout] are the ends of a line segment
-   returns true if the lines could be intersected, false otherwise.
-   inbound means the direction of (a1,a2) go inside or outside of (q1,q2,q3)
-*/
-
-int line_intersect_2D_3D(double *a1, double *a2, double *q1, double *q2, double *q3,
-                         double *intersect, double *u_a, double *u_q, int *inbound){
-
-  /* Do this intersection by reprsenting the line a1 to a2 as a plane through the
-     two line points and the origin of the sphere (0,0,0). This is the
-     definition of a great circle arc.
-  */
-  double plane[9];
-  double plane_p[2];
-  double u;
-  double p1[3], v1[3], v2[3];
-  double c1[3], c2[3], c3[3];
-  double coincident, sense, norm;
-  int    i;
-  int is_inter1, is_inter2;
-
-  *inbound = 0;
-
-  /* first check if any vertices are the same */
-  if(samePoint(a1[0], a1[1], a1[2], q1[0], q1[1], q1[2])) {
-    *u_a = 0;
-    *u_q = 0;
-    intersect[0] = a1[0];
-    intersect[1] = a1[1];
-    intersect[2] = a1[2];
-#ifdef debug_test_create_xgrid
-    printf("\nNOTE from line_intersect_2D_3D: u_a = %19.15f, u_q=%19.15f, inbound=%d\n", *u_a, *u_q, *inbound);
-#endif
-    return 1;
-  }
-  else if (samePoint(a1[0], a1[1], a1[2], q2[0], q2[1], q2[2])) {
-    *u_a = 0;
-    *u_q = 1;
-    intersect[0] = a1[0];
-    intersect[1] = a1[1];
-    intersect[2] = a1[2];
-#ifdef debug_test_create_xgrid
-    printf("\nNOTE from line_intersect_2D_3D: u_a = %19.15f, u_q=%19.15f, inbound=%d\n", *u_a, *u_q, *inbound);
-#endif
-    return 1;
-  }
-  else if(samePoint(a2[0], a2[1], a2[2], q1[0], q1[1], q1[2])) {
-#ifdef debug_test_create_xgrid
-    printf("\nNOTE from line_intersect_2D_3D: u_a = %19.15f, u_q=%19.15f, inbound=%d\n", *u_a, *u_q, *inbound);
-#endif
-    *u_a = 1;
-    *u_q = 0;
-    intersect[0] = a2[0];
-    intersect[1] = a2[1];
-    intersect[2] = a2[2];
-    return 1;
-  }
-  else if (samePoint(a2[0], a2[1], a2[2], q2[0], q2[1], q2[2])) {
-#ifdef debug_test_create_xgrid
-    printf("\nNOTE from line_intersect_2D_3D: u_a = %19.15f, u_q=%19.15f, inbound=%d\n", *u_a, *u_q, *inbound);
-#endif
-    *u_a = 1;
-    *u_q = 1;
-    intersect[0] = a2[0];
-    intersect[1] = a2[1];
-    intersect[2] = a2[2];
-    return 1;
-  }
-
-
-  /* Load points defining plane into variable (these are supposed to be in counterclockwise order) */
-  plane[0]=q1[0];
-  plane[1]=q1[1];
-  plane[2]=q1[2];
-  plane[3]=q2[0];
-  plane[4]=q2[1];
-  plane[5]=q2[2];
-  plane[6]=0.0;
-  plane[7]=0.0;
-  plane[8]=0.0;
-
-  /* Intersect the segment with the plane */
-  is_inter1 = intersect_tri_with_line(plane, a1, a2, plane_p, u_a);
-
-  if(!is_inter1)
-    return 0;
-
-  if(fabs(*u_a) < EPSLN8) *u_a = 0;
-  if(fabs(*u_a-1) < EPSLN8) *u_a = 1;
-
-
-#ifdef debug_test_create_xgrid
-  printf("\nNOTE from line_intersect_2D_3D: u_a = %19.15f\n", *u_a);
-#endif
-
-
-  if( (*u_a < 0) || (*u_a > 1) ) return 0;
-
-  /* Load points defining plane into variable (these are supposed to be in counterclockwise order) */
-  plane[0]=a1[0];
-  plane[1]=a1[1];
-  plane[2]=a1[2];
-  plane[3]=a2[0];
-  plane[4]=a2[1];
-  plane[5]=a2[2];
-  plane[6]=0.0;
-  plane[7]=0.0;
-  plane[8]=0.0;
-
-  /* Intersect the segment with the plane */
-  is_inter2 = intersect_tri_with_line(plane, q1, q2, plane_p, u_q);
-
-  if(!is_inter2)
-    return 0;
-
-  if(fabs(*u_q) < EPSLN8) *u_q = 0;
-  if(fabs(*u_q-1) < EPSLN8) *u_q = 1;
-#ifdef debug_test_create_xgrid
-  printf("\nNOTE from line_intersect_2D_3D: u_q = %19.15f\n", *u_q);
-#endif
-
-
-  if( (*u_q < 0) || (*u_q > 1) ) return 0;
-
-  u =*u_a;
-
-  /* The two planes are coincidental */
-  vect_cross(a1, a2, c1);
-  vect_cross(q1, q2, c2);
-  vect_cross(c1, c2, c3);
-  coincident = metric(c3);
-
-  if(fabs(coincident) < EPSLN30) return 0;
-
-  /* Calculate point of intersection */
-  intersect[0]=a1[0] + u*(a2[0]-a1[0]);
-  intersect[1]=a1[1] + u*(a2[1]-a1[1]);
-  intersect[2]=a1[2] + u*(a2[2]-a1[2]);
-
-  norm = metric( intersect );
-  for(i = 0; i < 3; i ++) intersect[i] /= norm;
-
-  /* when u_q =0 or u_q =1, the following could not decide the inbound value */
-  if(*u_q != 0 && *u_q != 1){
-
-    p1[0] = a2[0]-a1[0];
-    p1[1] = a2[1]-a1[1];
-    p1[2] = a2[2]-a1[2];
-    v1[0] = q2[0]-q1[0];
-    v1[1] = q2[1]-q1[1];
-    v1[2] = q2[2]-q1[2];
-    v2[0] = q3[0]-q2[0];
-    v2[1] = q3[1]-q2[1];
-    v2[2] = q3[2]-q2[2];
-
-    vect_cross(v1, v2, c1);
-    vect_cross(v1, p1, c2);
-
-    sense = dot(c1, c2);
-    *inbound = 1;
-    if(sense > 0) *inbound = 2; /* v1 going into v2 in CCW sense */
-  }
-#ifdef debug_test_create_xgrid
-  printf("\nNOTE from line_intersect_2D_3D: inbound=%d\n", *inbound);
-#endif
-
-  return 1;
-}
-
-
-/*------------------------------------------------------------------------------
-  double poly_ctrlat(const double x[], const double y[], int n)
-  This routine is used to calculate the latitude of the centroid
-  ---------------------------------------------------------------------------*/
-
-double poly_ctrlat(const double x[], const double y[], int n)
-{
-  double ctrlat = 0.0;
-  int    i;
-
-  for (i=0;i<n;i++) {
-    int ip = (i+1) % n;
-    double dx = (x[ip]-x[i]);
-    double dy, avg_y, hdy;
-    double lat1, lat2;
-    lat1 = y[ip];
-    lat2 = y[i];
-    dy = lat2 - lat1;
-    hdy = dy*0.5;
-    avg_y = (lat1+lat2)*0.5;
-    if      (dx==0.0) continue;
-    if(dx > M_PI)  dx = dx - 2.0*M_PI;
-    if(dx < -M_PI) dx = dx + 2.0*M_PI;
-
-    if ( fabs(hdy)< SMALL_VALUE ) /* cheap area calculation along latitude */
-      ctrlat -= dx*(2*cos(avg_y) + lat2*sin(avg_y) - cos(lat1) );
-    else
-      ctrlat -= dx*( (sin(hdy)/hdy)*(2*cos(avg_y) + lat2*sin(avg_y)) - cos(lat1) );
-  }
-  return (ctrlat*RADIUS*RADIUS);
-} /* poly_ctrlat */
-
-/*------------------------------------------------------------------------------
-  double poly_ctrlon(const double x[], const double y[], int n, double clon)
-  This routine is used to calculate the lontitude of the centroid.
-  ---------------------------------------------------------------------------*/
-double poly_ctrlon(const double x[], const double y[], int n, double clon)
-{
-  double ctrlon = 0.0;
-  int    i;
-
-  for (i=0;i<n;i++) {
-    int ip = (i+1) % n;
-    double phi1, phi2, dphi, lat1, lat2, dphi1, dphi2;
-    double f1, f2, fac, fint;
-    phi1   = x[ip];
-    phi2   = x[i];
-    lat1 = y[ip];
-    lat2 = y[i];
-    dphi   = phi1 - phi2;
-
-    if      (dphi==0.0) continue;
-
-    f1 = 0.5*(cos(lat1)*sin(lat1)+lat1);
-    f2 = 0.5*(cos(lat2)*sin(lat2)+lat2);
-
-    /* this will make sure longitude of centroid is at
-       the same interval as the center of any grid */
-    if(dphi > M_PI)  dphi = dphi - 2.0*M_PI;
-    if(dphi < -M_PI) dphi = dphi + 2.0*M_PI;
-    dphi1 = phi1 - clon;
-    if( dphi1 > M_PI) dphi1 -= 2.0*M_PI;
-    if( dphi1 <-M_PI) dphi1 += 2.0*M_PI;
-    dphi2 = phi2 -clon;
-    if( dphi2 > M_PI) dphi2 -= 2.0*M_PI;
-    if( dphi2 <-M_PI) dphi2 += 2.0*M_PI;
-
-    if(fabs(dphi2 -dphi1) < M_PI) {
-      ctrlon -= dphi * (dphi1*f1+dphi2*f2)/2.0;
-    }
-    else {
-      if(dphi1 > 0.0)
-        fac = M_PI;
-      else
-        fac = -M_PI;
-      fint = f1 + (f2-f1)*(fac-dphi1)/fabs(dphi);
-      ctrlon -= 0.5*dphi1*(dphi1-fac)*f1 - 0.5*dphi2*(dphi2+fac)*f2
-          + 0.5*fac*(dphi1+dphi2)*fint;
-    }
-
-  }
-  return (ctrlon*RADIUS*RADIUS);
-}   /* poly_ctrlon */
-
-/* -----------------------------------------------------------------------------
-   double box_ctrlat(double ll_lon, double ll_lat, double ur_lon, double ur_lat)
-   This routine is used to calculate the latitude of the centroid.
-   ---------------------------------------------------------------------------*/
-double box_ctrlat(double ll_lon, double ll_lat, double ur_lon, double ur_lat)
-{
-  double dphi = ur_lon-ll_lon;
-  double ctrlat;
-
-  if(dphi > M_PI)  dphi = dphi - 2.0*M_PI;
-  if(dphi < -M_PI) dphi = dphi + 2.0*M_PI;
-  ctrlat = dphi*(cos(ur_lat) + ur_lat*sin(ur_lat)-(cos(ll_lat) + ll_lat*sin(ll_lat)));
-  return (ctrlat*RADIUS*RADIUS);
-} /* box_ctrlat */
-
-/*------------------------------------------------------------------------------
-  double box_ctrlon(double ll_lon, double ll_lat, double ur_lon, double ur_lat, double clon)
-  This routine is used to calculate the lontitude of the centroid
-  ----------------------------------------------------------------------------*/
-double box_ctrlon(double ll_lon, double ll_lat, double ur_lon, double ur_lat, double clon)
-{
-  double phi1, phi2, dphi, lat1, lat2, dphi1, dphi2;
-  double f1, f2, fac, fint;
-  double ctrlon  = 0.0;
-  int i;
-  for( i =0; i<2; i++) {
-    if(i == 0) {
-      phi1 = ur_lon;
-      phi2 = ll_lon;
-      lat1 = lat2 = ll_lat;
-    }
-    else {
-      phi1 = ll_lon;
-      phi2 = ur_lon;
-      lat1 = lat2 = ur_lat;
-    }
-    dphi   = phi1 - phi2;
-    f1 = 0.5*(cos(lat1)*sin(lat1)+lat1);
-    f2 = 0.5*(cos(lat2)*sin(lat2)+lat2);
-
-    if(dphi > M_PI)  dphi = dphi - 2.0*M_PI;
-    if(dphi < -M_PI) dphi = dphi + 2.0*M_PI;
-    /* make sure the center is in the same grid box. */
-    dphi1 = phi1 - clon;
-    if( dphi1 > M_PI) dphi1 -= 2.0*M_PI;
-    if( dphi1 <-M_PI) dphi1 += 2.0*M_PI;
-    dphi2 = phi2 -clon;
-    if( dphi2 > M_PI) dphi2 -= 2.0*M_PI;
-    if( dphi2 <-M_PI) dphi2 += 2.0*M_PI;
-
-    if(fabs(dphi2 -dphi1) < M_PI) {
-      ctrlon -= dphi * (dphi1*f1+dphi2*f2)/2.0;
-    }
-    else {
-      if(dphi1 > 0.0)
-        fac = M_PI;
-      else
-        fac = -M_PI;
-      fint = f1 + (f2-f1)*(fac-dphi1)/fabs(dphi);
-      ctrlon -= 0.5*dphi1*(dphi1-fac)*f1 - 0.5*dphi2*(dphi2+fac)*f2
-          + 0.5*fac*(dphi1+dphi2)*fint;
-    }
-  }
-  return (ctrlon*RADIUS*RADIUS);
-} /* box_ctrlon */
-
-/*******************************************************************************
-  double grid_box_radius(double *x, double *y, double *z, int n);
-  Find the radius of the grid box, the radius is defined the
-  maximum distance between any two vertices
-*******************************************************************************/
-double grid_box_radius(const double *x, const double *y, const double *z, int n)
-{
-  double radius;
-  int i, j;
-
-  radius = 0;
-  for(i=0; i<n-1; i++) {
-    for(j=i+1; j<n; j++) {
-      radius = max(radius, pow(x[i]-x[j],2.)+pow(y[i]-y[j],2.)+pow(z[i]-z[j],2.));
-    }
-  }
-
-  radius = sqrt(radius);
-
-  return (radius);
-
-} /* grid_box_radius */
-
-/*******************************************************************************
-  double dist_between_boxes(const double *x1, const double *y1, const double *z1, int n1,
-                            const double *x2, const double *y2, const double *z2, int n2);
-  Find the distance between any two grid boxes. The distance is defined by the maximum
-  distance between any vertices of these two box
-*******************************************************************************/
-double dist_between_boxes(const double *x1, const double *y1, const double *z1, int n1,
-                          const double *x2, const double *y2, const double *z2, int n2)
-{
-  double dist;
-  int i, j;
-
-  dist = 0.0;
-  for(i=0; i<n1; i++) {
-    for(j=0; j<n2; j++) {
-      dist = max(dist, pow(x1[i]-x2[j],2.)+pow(y1[i]-y2[j],2.)+pow(z1[i]-z2[j],2.));
-    }
-  }
-
-  dist = sqrt(dist);
-  return (dist);
-
-} /* dist_between_boxes */
-
-/*******************************************************************************
- int inside_edge(double x0, double y0, double x1, double y1, double x, double y)
- determine a point(x,y) is inside or outside a given edge with vertex,
- (x0,y0) and (x1,y1). return 1 if inside and 0 if outside. <y1-y0, -(x1-x0)> is
- the outward edge normal from vertex <x0,y0> to <x1,y1>. <x-x0,y-y0> is the vector
- from <x0,y0> to <x,y>.
- if Inner produce <x-x0,y-y0>*<y1-y0, -(x1-x0)> > 0, outside, otherwise inside.
- inner product value = 0 also treate as inside.
-*******************************************************************************/
-int inside_edge(double x0, double y0, double x1, double y1, double x, double y)
-{
-  const double SMALL = 1.e-12;
-  double product;
-
-  product = ( x-x0 )*(y1-y0) + (x0-x1)*(y-y0);
-  return (product<=SMALL) ? 1:0;
-
-} /* inside_edge */
-
-
-/* The following is a test program to test subroutines in create_xgrid.c */
-
-#ifdef test_create_xgrid
-
-#include "create_xgrid.h"
-#include <math.h>
-
-#define D2R (M_PI/180)
-#define R2D (180/M_PI)
-#define MAXPOINT 1000
-
-int main(int argc, char* argv[])
-{
-
-  double lon1_in[MAXPOINT], lat1_in[MAXPOINT];
-  double lon2_in[MAXPOINT], lat2_in[MAXPOINT];
-  double x1_in[MAXPOINT], y1_in[MAXPOINT], z1_in[MAXPOINT];
-  double x2_in[MAXPOINT], y2_in[MAXPOINT], z2_in[MAXPOINT];
-  double lon_out[20], lat_out[20];
-  double x_out[20], y_out[20], z_out[20];
-  int    n1_in, n2_in, n_out, i, j;
-  int    nlon1=0, nlat1=0, nlon2=0, nlat2=0;
-  int    n;
-  int    ntest = 11;
-
-
-  for(n=11; n<=ntest; n++) {
-
-    switch (n) {
-      case 1:
-        /****************************************************************
-
-       test clip_2dx2d_great_cirle case 1:
-       box 1: (20,10), (20,12), (22,12), (22,10)
-       box 2: (21,11), (21,14), (24,14), (24,11)
-       out  : (21, 12.0018), (22, 12), (22, 11.0033), (21, 11)
-
-        ****************************************************************/
-        n1_in = 4; n2_in = 4;
-        /* first a simple lat-lon grid box to clip another lat-lon grid box */
-        lon1_in[0] = 20; lat1_in[0] = 10;
-        lon1_in[1] = 20; lat1_in[1] = 12;
-        lon1_in[2] = 22; lat1_in[2] = 12;
-        lon1_in[3] = 22; lat1_in[3] = 10;
-        lon2_in[0] = 21; lat2_in[0] = 11;
-        lon2_in[1] = 21; lat2_in[1] = 14;
-        lon2_in[2] = 24; lat2_in[2] = 14;
-        lon2_in[3] = 24; lat2_in[3] = 11;
-        break;
-
-      case 2:
-        /****************************************************************
-
-        test clip_2dx2d_great_cirle case 2: two identical box
-        box 1: (20,10), (20,12), (22,12), (22,10)
-        box 2: (20,10), (20,12), (22,12), (22,10)
-        out  : (20,10), (20,12), (22,12), (22,10)
-
-        ****************************************************************/
-        lon1_in[0] = 20; lat1_in[0] = 10;
-        lon1_in[1] = 20; lat1_in[1] = 12;
-        lon1_in[2] = 22; lat1_in[2] = 12;
-        lon1_in[3] = 22; lat1_in[3] = 10;
-
-        for(i=0; i<n2_in; i++) {
-          lon2_in[i] = lon1_in[i];
-          lat2_in[i] = lat1_in[i];
-        }
-        break;
-
-      case 3:
-        /****************************************************************
-
-       test clip_2dx2d_great_cirle case 3: one cubic sphere grid close to the pole with lat-lon grid.
-       box 1: (251.7, 88.98), (148.3, 88.98), (57.81, 88.72), (342.2, 88.72)
-       box 2: (150, 88), (150, 90), (152.5, 90), (152.5, 88)
-       out  : (152.5, 89.0642), (150, 89.0165), (0, 90)
-
-        ****************************************************************/
-        n1_in = 4; n2_in = 4;
-        /* first a simple lat-lon grid box to clip another lat-lon grid box */
-        lon1_in[0] = 251.7; lat1_in[0] = 88.98;
-        lon1_in[1] = 148.3; lat1_in[1] = 88.98;
-        lon1_in[2] = 57.81; lat1_in[2] = 88.72;
-        lon1_in[3] = 342.2; lat1_in[3] = 88.72;
-
-        lon2_in[0] = 150; lat2_in[0] = 88;
-        lon2_in[1] = 150; lat2_in[1] = 90;
-        lon2_in[2] = 152.5; lat2_in[2] = 90;
-        lon2_in[3] = 152.5; lat2_in[3] = 88;
-        /*
-          for(i=0; i<4; i++) {
-          lon2_in[i] = lon1_in[i];
-          lat2_in[i] = lat1_in[i];
-          }
-        */
-        break;
-
-      case 4:
-        /****************************************************************
-
-       test clip_2dx2d_great_cirle case 4: One box contains the pole
-       box 1: (-160, 88.5354), (152.011, 87.8123) , (102.985, 88.4008), (20, 89.8047)
-       box 2: (145,88), (145,90), (150,90), (150,88)
-       out  : (145.916, 88.0011), (145, 88.0249), (0, 90), (150, 88)
-
-        ****************************************************************/
-        n1_in = 4; n2_in = 4;
-        /* first a simple lat-lon grid box to clip another lat-lon grid box */
-
-        lon1_in[0] = -160;  lat1_in[0] = 88.5354;
-        lon1_in[1] = 152.011; lat1_in[1] = 87.8123;
-        lon1_in[2] = 102.985; lat1_in[2] = 88.4008;
-        lon1_in[3] = 20; lat1_in[3] = 89.8047;
-
-        lon2_in[0] = 145; lat2_in[0] = 88;
-        lon2_in[1] = 145; lat2_in[1] = 90;
-        lon2_in[2] = 150; lat2_in[2] = 90;
-        lon2_in[3] = 150; lat2_in[3] = 88;
-        break;
-
-      case 5:
-        /****************************************************************
-
-       test clip_2dx2d_great_cirle case 5: One tripolar grid around the pole with lat-lon grid.
-       box 1: (-202.6, 87.95), (-280, 89.56), (-100, 90), (-190, 88)
-       box 2: (21,11), (21,14), (24,14), (24,11)
-       out  : (150, 88.7006), (145,  88.9507), (0, 90)
-
-        ****************************************************************/
-        n1_in = 4; n2_in = 4;
-        /* first a simple lat-lon grid box to clip another lat-lon grid box */
-
-        lon1_in[0] = -202.6;  lat1_in[0] = 87.95;
-        lon1_in[1] = -280.;   lat1_in[1] = 89.56;
-        lon1_in[2] = -100.0; lat1_in[2] = 90;
-        lon1_in[3] = -190.; lat1_in[3] = 88;
-
-        lon2_in[0] = 145; lat2_in[0] = 88;
-        lon2_in[1] = 145; lat2_in[1] = 90;
-        lon2_in[2] = 150; lat2_in[2] = 90;
-        lon2_in[3] = 150; lat2_in[3] = 88;
-        break;
-
-      case 6:
-        /****************************************************************
-
-       test clip_2dx2d_great_cirle case 6: One cubic sphere grid arounc the pole with one tripolar grid box
-                                       around the pole.
-       box 1: (-160, 88.5354), (152.011, 87.8123) , (102.985, 88.4008), (20, 89.8047)
-       box 2: (-202.6, 87.95), (-280, 89.56), (-100, 90), (-190, 88)
-       out  : (170, 88.309), (157.082, 88.0005), (83.714, 89.559), (80, 89.6094), (0, 90), (200, 88.5354)
-
-
-        ****************************************************************/
-        n1_in = 4; n2_in = 4;
-        /* first a simple lat-lon grid box to clip another lat-lon grid box */
-
-        lon1_in[0] = -160;  lat1_in[0] = 88.5354;
-        lon1_in[1] = 152.011; lat1_in[1] = 87.8123;
-        lon1_in[2] = 102.985; lat1_in[2] = 88.4008;
-        lon1_in[3] = 20; lat1_in[3] = 89.8047;
-
-        lon2_in[0] = -202.6;  lat2_in[0] = 87.95;
-        lon2_in[1] = -280.;   lat2_in[1] = 89.56;
-        lon2_in[2] = -100.0;  lat2_in[2] = 90;
-        lon2_in[3] = -190.;   lat2_in[3] = 88;
-        break;
-
-      case 7:
-        /****************************************************************
-
-       test clip_2dx2d_great_cirle case 7: One small grid box inside a big grid box.
-       box 1: (20,10), (20,12), (22,12), (22,10)
-       box 2: (18,8), (18,14), (24,14), (24,8)
-       out  : (20,10), (20,12), (22,12), (22,10)
-
-        ****************************************************************/
-        n1_in = 4; n2_in = 4;
-        /* first a simple lat-lon grid box to clip another lat-lon grid box */
-        lon1_in[0] = 20; lat1_in[0] = 10;
-        lon1_in[1] = 20; lat1_in[1] = 12;
-        lon1_in[2] = 22; lat1_in[2] = 12;
-        lon1_in[3] = 22; lat1_in[3] = 10;
-        lon2_in[0] = 18; lat2_in[0] = 8;
-        lon2_in[1] = 18; lat2_in[1] = 14;
-        lon2_in[2] = 24; lat2_in[2] = 14;
-        lon2_in[3] = 24; lat2_in[3] = 8;
-        break;
-
-      case 8:
-        /****************************************************************
-
-       test clip_2dx2d_great_cirle case 8: Cubic sphere grid at tile = 1, point (i=25,j=1)
-          with N45 at (i=141,j=23)
-       box 1:
-       box 2:
-       out  : None
-
-        ****************************************************************/
-        n1_in = 4; n2_in = 4;
-        /* first a simple lat-lo
-           n grid box to clip another lat-lon grid box */
-        lon1_in[0] = 350.0; lat1_in[0] = -45;
-        lon1_in[1] = 350.0; lat1_in[1] = -43.43;
-        lon1_in[2] = 352.1; lat1_in[2] = -43.41;
-        lon1_in[3] = 352.1; lat1_in[3] = -44.98;
-        lon2_in[0] = 350.0;   lat2_in[0] = -46;
-        lon2_in[1] = 350.0;   lat2_in[1] = -44;
-        lon2_in[2] = 352.5; lat2_in[2] = -44;
-        lon2_in[3] = 352.5; lat2_in[3] = -46;
-        break;
-
-      case 9:
-        /****************************************************************
-
-       test clip_2dx2d_great_cirle case 9: Cubic sphere grid at tile = 1, point (i=1,j=1)
-          with N45 at (i=51,j=61)
-       box 1:
-       box 2:
-       out  : None
-
-        ****************************************************************/
-        n1_in = 4; n2_in = 4;
-
-        lon1_in[0] = 305.0; lat1_in[0] = -35.26;
-        lon1_in[1] = 305.0; lat1_in[1] = -33.80;
-        lon1_in[2] = 306.6; lat1_in[2] = -34.51;
-        lon1_in[3] = 306.6; lat1_in[3] = -35.99;
-        lon2_in[0] = 125;   lat2_in[0] = 32;
-        lon2_in[1] = 125;   lat2_in[1] = 34;
-        lon2_in[2] = 127.5; lat2_in[2] = 34;
-        lon2_in[3] = 127.5; lat2_in[3] = 32;
-        break;
-
-      case 10:
-        /****************************************************************
-
-       test clip_2dx2d_great_cirle case 10: Cubic sphere grid at tile = 3, point (i=24,j=1)
-          with N45 at (i=51,j=46)
-       box 1:
-       box 2:
-       out  : None
-
-        ****************************************************************/
-        n1_in = 4; n2_in = 4;
-
-        lon1_in[0] = 125.0; lat1_in[0] = 1.46935;
-        lon1_in[1] = 126.573; lat1_in[1] = 1.5091;
-        lon1_in[2] = 126.573; lat1_in[2] = 0;
-        lon1_in[3] = 125.0; lat1_in[3] = 0;
-        lon2_in[0] = 125;   lat2_in[0] = 0;
-        lon2_in[1] = 125;   lat2_in[1] = 2;
-        lon2_in[2] = 127.5; lat2_in[2] = 2;
-        lon2_in[3] = 127.5; lat2_in[3] = 0;
-        break;
-
-      case 11:
-        /****************************************************************
-
-       test clip_2dx2d_great_cirle case 10: Cubic sphere grid at tile = 3, point (i=24,j=1)
-          with N45 at (i=51,j=46)
-       box 1:
-       box 2:
-       out  :
-
-        ****************************************************************/
-        nlon1 = 1;
-        nlat1 = 1;
-        nlon2 = 1;
-        nlat2 = 1;
-        n1_in = (nlon1+1)*(nlat1+1);
-        n2_in = (nlon2+1)*(nlat2+1);
-
-        lon1_in[0] = 350.0; lat1_in[0] = 90.00;
-        lon1_in[1] = 170.0; lat1_in[1] = 87.92;
-        lon1_in[2] = 260.0; lat1_in[2] = 87.92;
-        lon1_in[3] = 215.0;  lat1_in[3] = 87.06;
-
-        /*       lon1_in[0] = 35.0; lat1_in[0] = 87.06; */
-        /*       lon1_in[1] = 80.0; lat1_in[1] = 87.92; */
-        /*       lon1_in[2] = 125.0; lat1_in[2] = 87.06; */
-        /*       lon1_in[3] = 350.0; lat1_in[3] = 87.92; */
-        /*       lon1_in[4] = 350.0; lat1_in[4] = 90.00; */
-        /*       lon1_in[5] = 170.0; lat1_in[5] = 87.92; */
-        /*       lon1_in[6] = 305.0; lat1_in[6] = 87.06; */
-        /*       lon1_in[7] = 260.0; lat1_in[7] = 87.92; */
-        /*       lon1_in[8] = 215.0;  lat1_in[8] = 87.06; */
-
-        lon2_in[0] = 167.5; lat2_in[0] = 88;
-        lon2_in[1] = 170;   lat2_in[1] = 88;
-        lon2_in[2] = 167.5; lat2_in[2] = 90;
-        lon2_in[3] = 170;   lat2_in[3] = 90;
-
-        /*       nlon1 = 3; */
-        /*       nlat1 = 2; */
-        /*       nlon2 = 1; */
-        /*       nlat2 = 1; */
-        /*       n1_in = (nlon1+1)*(nlat1+1); */
-        /*       n2_in = (nlon2+1)*(nlat2+1); */
-
-        /*       lon1_in[0] = 35.00;     lat1_in[0] = -59.90; */
-        /*       lon1_in[1] = 37.64;     lat1_in[1] = -58.69; */
-        /*       lon1_in[2] = 40.07;     lat1_in[2] = -57.44; */
-        /*       lon1_in[3] = 42.32;     lat1_in[3] = -56.15; */
-        /*       lon1_in[4] = 32.36;     lat1_in[4] = -58.69; */
-        /*       lon1_in[5] = 35.00;     lat1_in[5] = -57.56; */
-        /*       lon1_in[6] = 37.45;     lat1_in[6] = -56.39; */
-        /*       lon1_in[7] = 39.74;     lat1_in[7] = -55.18; */
-        /*       lon1_in[8] = 29.93;     lat1_in[8] = -57.44; */
-        /*       lon1_in[9] = 32.55;     lat1_in[9] = -56.39; */
-        /*       lon1_in[10] = 35.00;     lat1_in[10] = -55.29; */
-        /*       lon1_in[11] = 37.30;     lat1_in[11] = -54.16; */
-        /*       lon2_in[0] = 35;   lat2_in[0] = -58; */
-        /*       lon2_in[1] = 37.5; lat2_in[1] = -58; */
-        /*       lon2_in[2] = 35;   lat2_in[2] = -56; */
-        /*       lon2_in[3] = 37.5; lat2_in[3] = -56; */
-
-        /*       nlon1 = 1; */
-        /*       nlat1 = 1; */
-        /*       nlon2 = 1; */
-        /*       nlat2 = 1; */
-        /*       n1_in = (nlon1+1)*(nlat1+1); */
-        /*       n2_in = (nlon2+1)*(nlat2+1); */
-
-        /*       lon1_in[0] = 305;     lat1_in[0] = -35.26; */
-        /*       lon1_in[1] = 306;     lat1_in[1] = -35.99; */
-        /*       lon1_in[2] = 305;     lat1_in[2] = -33.80; */
-        /*       lon1_in[3] = 306;     lat1_in[3] = -34.51; */
-        /*       lon2_in[0] = 305;   lat2_in[0] = -34; */
-        /*       lon2_in[1] = 307.5; lat2_in[1] = -34; */
-        /*       lon2_in[2] = 305;   lat2_in[2] = -32; */
-        /*       lon2_in[3] = 307.5; lat2_in[3] = -32; */
-
-        nlon1 = 2;
-        nlat1 = 2;
-        nlon2 = 1;
-        nlat2 = 1;
-        n1_in = (nlon1+1)*(nlat1+1);
-        n2_in = (nlon2+1)*(nlat2+1);
-
-        lon1_in[0] = 111.3; lat1_in[0] = 1.591;
-        lon1_in[1] = 109.7; lat1_in[1] = 2.926;
-        lon1_in[2] = 108.2; lat1_in[2] = 4.256;
-        lon1_in[3] = 110.0; lat1_in[3] = 0.000;
-        lon1_in[4] = 108.4; lat1_in[4] = 1.335;
-        lon1_in[5] = 106.8; lat1_in[5] = 2.668;
-        lon1_in[6] = 108.7; lat1_in[6] = -1.591;
-        lon1_in[7] = 107.1; lat1_in[7] = -0.256;
-        lon1_in[8] = 105.5;  lat1_in[8] = 1.078;
-
-        lon2_in[0] = 107.5; lat2_in[0] = 0;
-        lon2_in[1] = 110;   lat2_in[1] = 0;
-        lon2_in[2] = 107.5; lat2_in[2] = 2;
-        lon2_in[3] = 110;   lat2_in[3] = 2;
-
-        break;
-
-      case 12:
-        /****************************************************************
-
-       test : create_xgrid_great_circle
-       box 1: (20,10), (20,12), (22,12), (22,10)
-       box 2: (21,11), (21,14), (24,14), (24,11)
-       out  : (21, 12.0018), (22, 12), (22, 11.0033), (21, 11)
-
-        ****************************************************************/
-        nlon1 = 2;
-        nlat1 = 2;
-        nlon2 = 3;
-        nlat2 = 3;
-        n1_in = (nlon1+1)*(nlat1+1);
-        n2_in = (nlon2+1)*(nlat2+1);
-
-        /* first a simple lat-lon grid box to clip another lat-lon grid box */
-        for(j=0; j<=nlat1; j++) for(i=0; i<=nlon1; i++){
-            lon1_in[j*(nlon1+1)+i] = 20.0 + (i-1)*2.0;
-            lat1_in[j*(nlon1+1)+i] = 10.0 + (j-1)*2.0;
-          }
-        for(j=0; j<=nlat2; j++) for(i=0; i<=nlon2; i++){
-            lon2_in[j*(nlon2+1)+i] = 19.0 + (i-1)*2.0;
-            lat2_in[j*(nlon2+1)+i] = 9.0 + (j-1)*2.0;
-          }
-
-        break;
-
-      case 13:
-
-        nlon1 = 1;
-        nlat1 = 1;
-        nlon2 = 1;
-        nlat2 = 1;
-        n1_in = (nlon1+1)*(nlat1+1);
-        n2_in = (nlon2+1)*(nlat2+1);
-
-        /*       lon1_in[0] = ; lat1_in[0] = ; */
-        /*       lon1_in[1] = ; lat1_in[1] = ; */
-        /*       lon1_in[2] = ; lat1_in[2] = ; */
-        /*       lon1_in[3] = ; lat1_in[3] = ; */
-        /*       lon2_in[0] = ; lat2_in[0] = ; */
-        /*       lon2_in[1] = ; lat2_in[1] = ; */
-        /*       lon2_in[2] = ; lat2_in[2] = ; */
-        /*       lon2_in[3] = ; lat2_in[3] = ;     */
-
-        /*       lon1_in[0] = 1.35536; lat1_in[0] = 1.16251; */
-        /*       lon1_in[1] = 1.36805; lat1_in[1] = 1.15369; */
-        /*       lon1_in[2] = 1.37843; lat1_in[2] = 1.16729; */
-        /*       lon1_in[3] = 1.39048; lat1_in[3] = 1.15826; */
-        /*       lon2_in[0] = 1.34611; lat2_in[0] = 1.16372; */
-        /*       lon2_in[1] = 1.35616; lat2_in[1] = 1.15802;    */
-        /*       lon2_in[2] = 1.35143; lat2_in[2] = 1.16509; */
-        /*       lon2_in[3] = 1.36042; lat2_in[3] = 1.15913; */
-
-        /*       lon1_in[0] = 12.508065121288551; lat1_in[0] = -87.445883646793547; */
-        /*       lon1_in[1] = 325.425637772; lat1_in[1] = -86.481216821859505; */
-        /*       lon1_in[2] = 97.5; lat1_in[2] = -89.802136057677174; */
-        /*       lon1_in[3] = 277.5; lat1_in[3] = -87.615232005344637; */
-
-        /*       for(j=0; j<=nlat2; j++) for(i=0; i<=nlon2; i++) { */
-        /*      lon2_in[j*(nlon2+1)+i] = -280.0 + i*1.0; */
-        /*      lat2_in[j*(nlon2+1)+i] = -90.0 + j*8.0; */
-        /*       } */
-        lon1_in[0] = 120.369397984526174; lat1_in[0] = 16.751543427495864;
-        lon1_in[1] = 119.999999999999986; lat1_in[1] = 16.751871929590038;
-        lon1_in[2] = 120.369397846883501; lat1_in[2] = 16.397797979598028;
-        lon1_in[3] = 119.999999999999986; lat1_in[3] = 16.398120477217255;
-        lon2_in[0] = 120.369415056522087; lat2_in[0] = 16.752176828509153;
-        lon2_in[1] = 119.999999999999986; lat2_in[1] = 16.752505523196167;
-        lon2_in[2] = 120.369415056522087; lat2_in[2] = 16.397797949548146;
-        lon2_in[3] = 119.999999999999986; lat2_in[3] = 16.398120477217255;
-
-        break;
-
-
-      default:
-        error_handler("test_create_xgrid: incorrect case number");
-    }
-
-    /* convert to radian */
-
-    for(i=0; i<n1_in; i++) {
-      lon1_in[i] *= D2R; lat1_in[i] *=D2R;
-    }
-    for(i=0; i<n2_in; i++) {
-      lon2_in[i] *= D2R; lat2_in[i] *=D2R;
-    }
-
-
-    printf("\n*********************************************************\n");
-    printf("\n               Case %d                                    \n", n);
-    printf("\n*********************************************************\n");
-
-
-    if( n > 10 ) {
-      int nxgrid;
-      int *i1, *j1, *i2, *j2;
-      double *xarea, *xclon, *xclat, *mask1;
-
-      mask1 = (double *)malloc(nlon1*nlat1*sizeof(double));
-      i1    = (int    *)malloc(MAXXGRID*sizeof(int));
-      j1    = (int    *)malloc(MAXXGRID*sizeof(int));
-      i2    = (int    *)malloc(MAXXGRID*sizeof(int));
-      j2    = (int    *)malloc(MAXXGRID*sizeof(int));
-      xarea = (double *)malloc(MAXXGRID*sizeof(double));
-      xclon = (double *)malloc(MAXXGRID*sizeof(double));
-      xclat = (double *)malloc(MAXXGRID*sizeof(double));
-
-      for(i=0; i<nlon1*nlat1; i++) mask1[i] = 1.0;
-
-      nxgrid = create_xgrid_great_circle(&nlon1, &nlat1, &nlon2, &nlat2, lon1_in, lat1_in,
-                                         lon2_in, lat2_in, mask1, i1, j1, i2, j2,
-                                         xarea, xclon, xclat);
-      printf("\n*********************************************************\n");
-      printf("\n     First input grid box longitude, latitude   \n \n");
-      for(i=0; i<n1_in; i++) printf(" %g,  %g \n", lon1_in[i]*R2D, lat1_in[i]*R2D);
-
-      printf("\n     Second input grid box longitude, latitude \n \n");
-      for(i=0; i<n2_in; i++) printf(" %g,  %g \n", lon2_in[i]*R2D, lat2_in[i]*R2D);
-
-      printf("\n  Number of exchange grid is %d\n", nxgrid);
-      for(i=0; i<nxgrid; i++) {
-        printf("(i1,j1)=(%d,%d), (i2,j2)=(%d, %d), xgrid_area=%g, xgrid_clon=%g, xgrid_clat=%g\n",
-               i1[i], j1[i], i2[i], j2[i], xarea[i], xclon[i], xclat[i]);
-      }
-
-      /* comparing the area sum of exchange grid and grid1 area */
-      {
-        double *x1, *y1, *z1, *area1;
-        double area_sum;
-        int    i;
-        area_sum = 0.0;
-
-        for(i=0; i<nxgrid; i++) {
-          area_sum+= xarea[i];
-        }
-
-        area1 = (double *)malloc((nlon1)*(nlat1)*sizeof(double));
-        get_grid_great_circle_area_(&nlon1, &nlat1, lon1_in, lat1_in, area1);
-
-        printf("xgrid area sum is %g, grid 1 area is %g\n", area_sum, area1[0]);
-      }
-
-      printf("\n");
-      free(i1);
-      free(i2);
-      free(j1);
-      free(j2);
-      free(xarea);
-      free(xclon);
-      free(xclat);
-      free(mask1);
-    }
-    else {
-      latlon2xyz(n1_in, lon1_in, lat1_in, x1_in, y1_in, z1_in);
-      latlon2xyz(n2_in, lon2_in, lat2_in, x2_in, y2_in, z2_in);
-
-      n_out = clip_2dx2d_great_circle(x1_in, y1_in, z1_in, 4, x2_in, y2_in, z2_in, n2_in,
-                                      x_out, y_out,  z_out);
-      xyz2latlon(n_out, x_out, y_out, z_out, lon_out, lat_out);
-
-      printf("\n*********************************************************\n");
-      printf("\n     First input grid box longitude, latitude   \n \n");
-      for(i=0; i<n1_in; i++) printf(" %g,  %g \n", lon1_in[i]*R2D, lat1_in[i]*R2D);
-
-      printf("\n     Second input grid box longitude, latitude \n \n");
-      for(i=0; i<n2_in; i++) printf(" %g,  %g \n", lon2_in[i]*R2D, lat2_in[i]*R2D);
-
-      printf("\n     output clip grid box longitude, latitude for case 1 \n \n");
-      for(i=0; i<n_out; i++) printf(" %g,  %g \n", lon_out[i]*R2D, lat_out[i]*R2D);
-      printf("\n");
-    }
-  }
-}
-
-
-#endif
diff --git a/mosaic/create_xgrid.h b/mosaic/create_xgrid.h
deleted file mode 100644
index 90c0338b93..0000000000
--- a/mosaic/create_xgrid.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/***********************************************************************
- *                   GNU Lesser General Public License
- *
- * This file is part of the GFDL Flexible Modeling System (FMS).
- *
- * FMS is free software: you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * FMS is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
- **********************************************************************/
-#ifndef CREATE_XGRID_H_
-#define CREATE_XGRID_H_
-
-#ifndef MAXXGRID
-#define MAXXGRID 1e6
-#endif
-
-#define MV 50
-/* this value is small compare to earth area */
-
-double grid_box_radius(const double *x, const double *y, const double *z, int n);
-
-double dist_between_boxes(const double *x1, const double *y1, const double *z1, int n1,
-                          const double *x2, const double *y2, const double *z2, int n2);
-
-int inside_edge(double x0, double y0, double x1, double y1, double x, double y);
-
-int line_intersect_2D_3D(double *a1, double *a2, double *q1, double *q2, double *q3,
-                         double *intersect, double *u_a, double *u_q, int *inbound);
-
-double poly_ctrlon(const double lon[], const double lat[], int n, double clon);
-
-double poly_ctrlat(const double lon[], const double lat[], int n);
-
-double box_ctrlon(double ll_lon, double ll_lat, double ur_lon, double ur_lat, double clon);
-
-double box_ctrlat(double ll_lon, double ll_lat, double ur_lon, double ur_lat);
-
-int get_maxxgrid(void);
-
-int get_maxxgrid_(void);
-
-void get_grid_area(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area);
-
-void get_grid_great_circle_area(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area);
-
-void get_grid_area_dimensionless(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area);
-
-void get_grid_area_no_adjust(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area);
-
-int clip(const double lon_in[], const double lat_in[], int n_in, double ll_lon, double ll_lat,
-         double ur_lon, double ur_lat, double lon_out[], double lat_out[]);
-
-int clip_2dx2d(const double lon1_in[], const double lat1_in[], int n1_in,
-               const double lon2_in[], const double lat2_in[], int n2_in,
-               double lon_out[], double lat_out[]);
-
-int create_xgrid_1dx2d_order1(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in,
-                              const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out,
-                              int *j_out, double *xgrid_area);
-
-int create_xgrid_1dx2d_order1_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out, double *xgrid_area);
-
-int create_xgrid_1dx2d_order2(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                              const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                              double *xgrid_area, double *xgrid_clon, double *xgrid_clat);
-
-int create_xgrid_1dx2d_order2_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                               double *xgrid_area, double *xgrid_clon, double *xgrid_clat);
-
-int create_xgrid_2dx1d_order1(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in,
-                              const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out,
-                              int *j_out, double *xgrid_area);
-
-int create_xgrid_2dx1d_order1_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out,
-                               int *j_out, double *xgrid_area);
-
-int create_xgrid_2dx1d_order2(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                              const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                              double *xgrid_area, double *xgrid_clon, double *xgrid_clat);
-
-int create_xgrid_2dx1d_order2_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                               double *xgrid_area, double *xgrid_clon, double *xgrid_clat);
-
-int create_xgrid_2dx2d_order1(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                              const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out,
-                              int *j_out, double *xgrid_area);
-
-int create_xgrid_2dx2d_order2(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                              const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                              double *xgrid_area, double *xgrid_clon, double *xgrid_clat);
-
-int clip_2dx2d_great_circle(const double x1_in[], const double y1_in[], const double z1_in[], int n1_in,
-                            const double x2_in[], const double y2_in[], const double z2_in [], int n2_in,
-                            double x_out[], double y_out[], double z_out[]);
-
-int create_xgrid_great_circle(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                              const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                              double *xgrid_area, double *xgrid_clon, double *xgrid_clat);
-
-void get_grid_area_ug(const int *npts, const double *lon, const double *lat, double *area);
-int create_xgrid_1dx2d_order1_ug(const int *nlon_in, const int *nlat_in, const int *npts_out, const double *lon_in,
-                              const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *l_out, double *xgrid_area);
-void get_grid_great_circle_area_ug(const int *npts, const double *lon, const double *lat, double *area);
-int create_xgrid_great_circle_ug(const int *nlon_in, const int *nlat_in, const int *npts_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *l_out,
-                               double *xgrid_area, double *xgrid_clon, double *xgrid_clat);
-
-void get_grid_area_(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area);
-
-void get_grid_great_circle_area_(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area);
-
-int create_xgrid_2dx2d_order1_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out,
-                               int *j_out, double *xgrid_area);
-
-int create_xgrid_2dx2d_order2_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
-                               double *xgrid_area, double *xgrid_clon, double *xgrid_clat);
-void get_grid_area_ug_(const int *npts, const double *lon, const double *lat, double *area);
-int create_xgrid_1dx2d_order1_ug_(const int *nlon_in, const int *nlat_in, const int *npts_out, const double *lon_in,
-                              const double *lat_in, const double *lon_out, const double *lat_out,
-                              const double *mask_in, int *i_in, int *j_in, int *l_out, double *xgrid_area);
-void get_grid_great_circle_area_ug_(const int *npts, const double *lon, const double *lat, double *area);
-int create_xgrid_great_circle_ug_(const int *nlon_in, const int *nlat_in, const int *npts_out,
-                               const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
-                               const double *mask_in, int *i_in, int *j_in, int *l_out,
-                               double *xgrid_area, double *xgrid_clon, double *xgrid_clat);
-
-#endif
diff --git a/mosaic/gradient.F90 b/mosaic/gradient.F90
deleted file mode 100644
index 6e1f72532d..0000000000
--- a/mosaic/gradient.F90
+++ /dev/null
@@ -1,161 +0,0 @@
-!***********************************************************************
-!*                   GNU Lesser General Public License
-!*
-!* This file is part of the GFDL Flexible Modeling System (FMS).
-!*
-!* FMS is free software: you can redistribute it and/or modify it under
-!* the terms of the GNU Lesser General Public License as published by
-!* the Free Software Foundation, either version 3 of the License, or (at
-!* your option) any later version.
-!*
-!* FMS is distributed in the hope that it will be useful, but WITHOUT
-!* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-!* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-!* for more details.
-!*
-!* You should have received a copy of the GNU Lesser General Public
-!* License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
-!***********************************************************************
-!> @defgroup gradient_mod gradient_mod
-!> @ingroup mosaic
-!> @brief Implements some utility routines to calculate gradient.
-!> @author Zhi Liang
-!!
-!! Currently only gradient on cubic grid is implemented. Also a public interface
-!! is provided to calculate grid information needed to calculate gradient.
-
-!> @addtogroup gradient_mod
-!> @{
-module gradient_mod
-
-use mpp_mod,       only : mpp_error, FATAL
-use constants_mod, only : RADIUS
-use platform_mod
-
-implicit none
-private
-
-
-public :: gradient_cubic
-public :: calc_cubic_grid_info
-
-! Include variable "version" to be written to log file.
-#include<file_version.h>
-
-contains
-
-
-!#####################################################################
-!> Pin has halo size = 1.
-!! @param pin the size of pin    will be (nx+2,ny+2), T-cell center, with halo = 1
-!! @param the size of dx     will be (nx, ny+1),  N-cell center
-!! @param the size of dy     will be (nx+1, ny),  E-cell center
-!! @param the size of area   will be (nx, ny),    T-cell center.
-!! @param The size of edge_w will be (ny+1),      C-cell center
-!! @param The size of edge_e will be (ny+1),      C-cell center
-!! @param The size of edge_s will be (nx+1),      C-cell center
-!! @param The size of edge_n will be (nx+1),      C-cell center
-!! @param The size of en_n   will be (3,nx,ny+1), N-cell center
-!! @param The size of en_e   will be (3,nx+1,ny), E-cell center
-!! @param The size of vlon   will be (3,nx, ny)   T-cell center
-!! @param The size of vlat   will be (3,nx, ny),  T-cell center
-subroutine gradient_cubic(pin, dx, dy, area, edge_w, edge_e, edge_s, edge_n,    &
-                          en_n, en_e, vlon, vlat, grad_x, grad_y, on_west_edge, &
-                          on_east_edge, on_south_edge, on_north_edge)
-
-  real(r8_kind),    dimension(:,:  ), intent(in ) :: pin, dx, dy, area
-  real(r8_kind),    dimension(:    ), intent(in ) :: edge_w, edge_e, edge_s, edge_n
-  real(r8_kind),    dimension(:,:,:), intent(in ) :: en_n, en_e
-  real(r8_kind),    dimension(:,:,:), intent(in ) :: vlon, vlat
-  real(r8_kind),    dimension(:,:  ), intent(out) :: grad_x, grad_y
-  logical,                   intent(in ) :: on_west_edge, on_east_edge, on_south_edge, on_north_edge
-  integer :: nx, ny
-
-
-  nx = size(grad_x,1)
-  ny = size(grad_x,2)
-
-  if(size(pin,1) .NE. nx+2 .OR. size(pin,2) .NE. ny+2)call mpp_error(FATAL, &
-     &  "gradient_mod:size of pin should be (nx+2, ny+2)")
-  if(size(dx,1) .NE. nx .OR. size(dx,2) .NE. ny+1 ) call mpp_error(FATAL, &
-     &  "gradient_mod: size of dx should be (nx,ny+1)")
-  if(size(dy,1) .NE. nx+1 .OR. size(dy,2) .NE. ny ) call mpp_error(FATAL, &
-     &  "gradient_mod: size of dy should be (nx+1,ny)")
-  if(size(area,1) .NE. nx .OR. size(area,2) .NE. ny ) call mpp_error(FATAL, &
-     &  "gradient_mod: size of area should be (nx,ny)")
-  if(size(vlon,1) .NE. 3 .OR. size(vlon,2) .NE. nx .OR. size(vlon,3) .NE. ny) &
-          call mpp_error(FATAL, "gradient_mod: size of vlon should be (3,nx,ny)")
-  if(size(vlat,1) .NE. 3 .OR. size(vlat,2) .NE. nx .OR. size(vlat,3) .NE. ny) &
-          call mpp_error(FATAL, "gradient_mod: size of vlat should be (3,nx,ny)")
-  if(size(edge_w) .NE. ny+1) call mpp_error(FATAL, "gradient_mod: size of edge_w should be (ny+1)")
-  if(size(edge_e) .NE. ny+1) call mpp_error(FATAL, "gradient_mod: size of edge_e should be (ny+1)")
-  if(size(edge_s) .NE. nx+1) call mpp_error(FATAL, "gradient_mod: size of edge_s should be (nx+1)")
-  if(size(edge_n) .NE. nx+1) call mpp_error(FATAL, "gradient_mod: size of edge_n should be (nx+1)")
-  if(size(en_n,1) .NE. 3 .OR. size(en_n,2) .NE. nx .OR.  size(en_n,3) .NE. ny+1 ) &
-       call mpp_error(FATAL, "gradient_mod:size of en_n should be (3, nx, ny+1)")
-  if(size(en_e,1) .NE. 3 .OR. size(en_e,2) .NE. nx+1 .OR.  size(en_e,3) .NE. ny ) &
-       call mpp_error(FATAL, "gradient_mod:size of en_e should be (3, nx+1, ny)")
-
-  call grad_c2l(nx, ny, pin, dx, dy, area, edge_w, edge_e, edge_s, edge_n, en_n, en_e, vlon, vlat, &
-                grad_x, grad_y, on_west_edge, on_east_edge, on_south_edge, on_north_edge)
-
-  return
-
-end subroutine gradient_cubic
-
-
-subroutine calc_cubic_grid_info(xt, yt, xc, yc, dx, dy, area, edge_w, edge_e, edge_s, edge_n, &
-                           en_n, en_e, vlon, vlat, on_west_edge, on_east_edge, on_south_edge, on_north_edge )
-  real(r8_kind),    dimension(:,:  ), intent(in ) :: xt, yt, xc, yc
-  real(r8_kind),    dimension(:,:  ), intent(out) :: dx, dy, area
-  real(r8_kind),    dimension(:    ), intent(out) :: edge_w, edge_e, edge_s, edge_n
-  real(r8_kind),    dimension(:,:,:), intent(out) :: en_n, en_e
-  real(r8_kind),    dimension(:,:,:), intent(out) :: vlon, vlat
-  logical,                   intent(in ) :: on_west_edge, on_east_edge, on_south_edge, on_north_edge
-  integer :: nx, ny, nxp, nyp
-
-
-  nx  = size(area,1)
-  ny  = size(area,2)
-  nxp = nx+1
-  nyp = ny+1
-
-  if(size(xt,1) .NE. nx+2 .OR. size(xt,2) .NE. ny+2 ) call mpp_error(FATAL, &
-     &  "gradient_mod: size of xt should be (nx+2,ny+2)")
-  if(size(yt,1) .NE. nx+2 .OR. size(yt,2) .NE. ny+2 ) call mpp_error(FATAL, &
-     &  "gradient_mod: size of yt should be (nx+2,ny+2)")
-  if(size(xc,1) .NE. nxp .OR. size(xc,2) .NE. nyp ) call mpp_error(FATAL, &
-     &  "gradient_mod: size of xc should be (nx+1,ny+1)")
-  if(size(yc,1) .NE. nxp .OR. size(yc,2) .NE. nyp ) call mpp_error(FATAL, &
-     &  "gradient_mod: size of yc should be (nx+1,ny+1)")
-  if(size(dx,1) .NE. nx .OR. size(dx,2) .NE. nyp ) call mpp_error(FATAL, &
-     &  "gradient_mod: size of dx should be (nx,ny+1)")
-  if(size(dy,1) .NE. nxp .OR. size(dy,2) .NE. ny ) call mpp_error(FATAL, &
-     &  "gradient_mod: size of dy should be (nx+1,ny)")
-  if(size(area,1) .NE. nx .OR. size(area,2) .NE. ny ) call mpp_error(FATAL, &
-     &  "gradient_mod: size of area should be (nx,ny)")
-  if(size(vlon,1) .NE. 3 .OR. size(vlon,2) .NE. nx .OR. size(vlon,3) .NE. ny) &
-          call mpp_error(FATAL, "gradient_mod: size of vlon should be (3,nx,ny)")
-  if(size(vlat,1) .NE. 3 .OR. size(vlat,2) .NE. nx .OR. size(vlat,3) .NE. ny) &
-          call mpp_error(FATAL, "gradient_mod: size of vlat should be (3,nx,ny)")
-  if(size(edge_w) .NE. ny+1) call mpp_error(FATAL, "gradient_mod: size of edge_w should be (ny-1)")
-  if(size(edge_e) .NE. ny+1) call mpp_error(FATAL, "gradient_mod: size of edge_e should be (ny-1)")
-  if(size(edge_s) .NE. nx+1) call mpp_error(FATAL, "gradient_mod: size of edge_s should be (nx-1)")
-  if(size(edge_n) .NE. nx+1) call mpp_error(FATAL, "gradient_mod: size of edge_n should be (nx-1)")
-  if(size(en_n,1) .NE. 3 .OR. size(en_n,2) .NE. nx .OR.  size(en_n,3) .NE. nyp ) &
-       call mpp_error(FATAL, "gradient_mod:size of en_n should be (3, nx, ny+1)")
-  if(size(en_e,1) .NE. 3 .OR. size(en_e,2) .NE. nxp .OR.  size(en_e,3) .NE. ny ) &
-       call mpp_error(FATAL, "gradient_mod:size of en_e should be (3, nx+1, ny)")
-
-
-  call calc_c2l_grid_info(nx, ny, xt, yt, xc, yc, dx, dy, area, edge_w, edge_e, edge_s, edge_n, &
-                          en_n, en_e, vlon, vlat, on_west_edge, on_east_edge, on_south_edge, on_north_edge )
-
-
-  return
-
-end subroutine calc_cubic_grid_info
-
-end module gradient_mod
-!> @}
-! close documentation grouping
diff --git a/mosaic/gradient_c2l.c b/mosaic/gradient_c2l.c
deleted file mode 100644
index 0ab1658ffe..0000000000
--- a/mosaic/gradient_c2l.c
+++ /dev/null
@@ -1,470 +0,0 @@
-/***********************************************************************
- *                   GNU Lesser General Public License
- *
- * This file is part of the GFDL Flexible Modeling System (FMS).
- *
- * FMS is free software: you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * FMS is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
- **********************************************************************/
-#include <math.h>
-#include <stdlib.h>
-#include "constant.h"
-#include "mosaic_util.h"
-#include "gradient_c2l.h"
-#include <stdio.h>
-
-/** \file
- *  \ingroup mosaic
- *  \ Grid utility functions for use in @ref mosaic_mod
- */
-
-/*------------------------------------------------------------------------------
-  Routine to compute gradient terms for SCRIP:
-  SJL: Oct 5, 2007
-  NOTe: pin has halo size = 1.
-  the size of pin    will be (nx+2,ny+2), T-cell center, with halo = 1
-  the size of dx     will be (nx, ny+1),  N-cell center
-  the size of dy     will be (nx+1, ny),  E-cell center
-  the size of area   will be (nx, ny),    T-cell center.
-  The size of edge_w will be (ny+1),      C-cell center
-  The size of edge_e will be (ny+1),      C-cell center
-  The size of edge_s will be (nx+1),      C-cell center
-  The size of edge_n will be (nx+1),      C-cell center
-  The size of en_n   will be (nx, ny+1,3),N-cell center
-  The size of en_e   will be (nx+1,ny,3), E-cell center
-  The size of vlon   will be (nx, ny, 3)  T-cell center
-  The size of vlat   will be (nx, ny, 3), T-cell center
-  ----------------------------------------------------------------------------*/
-void grad_c2l_(const int *nlon, const int *nlat, const double *pin, const double *dx, const double *dy, const double *area,
-               const double *edge_w, const double *edge_e, const double *edge_s, const double *edge_n,
-               const double *en_n, const double *en_e, const double *vlon, const double *vlat,
-               double *grad_x, double *grad_y, const int *on_west_edge, const int *on_east_edge,
-               const int *on_south_edge, const int *on_north_edge)
-{
-  grad_c2l(nlon, nlat, pin, dx, dy, area, edge_w, edge_e, edge_s, edge_n, en_n, en_e, vlon, vlat, grad_x, grad_y,
-           on_west_edge, on_east_edge, on_south_edge, on_north_edge);
-}
-
-void grad_c2l(const int *nlon, const int *nlat, const double *pin, const double *dx, const double *dy, const double *area,
-              const double *edge_w, const double *edge_e, const double *edge_s, const double *edge_n,
-              const double *en_n, const double *en_e, const double *vlon, const double *vlat,
-              double *grad_x, double *grad_y, const int *on_west_edge, const int *on_east_edge,
-              const int *on_south_edge, const int *on_north_edge)
-{
-
-  double *pb, *pdx, *pdy, *grad3;
-  int nx, ny, nxp, nyp, i, j, m0, m1, n;
-
-  nx    = *nlon;
-  ny    = *nlat;
-  nxp   = nx+1;
-  nyp   = ny+1;
-  pb    = (double *)malloc(nxp*nyp*sizeof(double));
-  pdx   = (double *)malloc(3*nx*(ny+1)*sizeof(double));
-  pdy   = (double *)malloc(3*(nx+1)*ny*sizeof(double));
-  grad3 = (double *)malloc(3*nx*ny*sizeof(double));
-  a2b_ord2(nx, ny, pin, edge_w, edge_e, edge_s, edge_n, pb, *on_west_edge, *on_east_edge,*on_south_edge, *on_north_edge);
-
-  for(j=0; j<nyp; j++) for(i=0; i<nx; i++) {
-      m0 = j*nx+i;
-      m1 = j*nxp+i;
-      for(n=0; n<3; n++) {
-        pdx[3*m0+n] = 0.5*(pb[m1]+pb[m1+1])*dx[m0]*en_n[3*m0+n];
-      }
-    }
-
-  for(j=0; j<ny; j++) for(i=0; i<nxp; i++) {
-      m0 = j*nxp+i;
-      for(n=0; n<3; n++) {
-        pdy[3*m0+n] = 0.5*(pb[m0]+pb[m0+nxp])*dy[m0]*en_e[3*m0+n];
-      }
-    }
-
-  /* Compute 3D grad of the input scalar field  by Green's theorem */
-  for(j=0; j<ny; j++) for(i=0; i<nx; i++) {
-      m0 = 3*(j*nx+i);
-      for(n=0; n<3; n++) {
-        grad3[m0+n] = pdx[3*((j+1)*nx+i)+n]-pdx[m0+n]-pdy[3*(j*nxp+i)+n]+pdy[3*(j*nxp+i+1)+n];
-      }
-    }
-
-  /* Compute inner product: V3 * grad (pe) */
-  for(j=0; j<ny; j++) for(i=0; i<nx; i++) {
-      m0 = j*nx+i;
-      m1 = 3*m0;
-      /* dq / d(Lamda)*/
-      grad_x[m0] = (vlon[m1]*grad3[m1] + vlon[m1+1]*grad3[m1+1] + vlon[m1+2]*grad3[m1+2])/area[m0];
-      grad_x[m0] *= RADIUS;
-      /* dq / d(theta) */
-      grad_y[m0] = (vlat[m1]*grad3[m1] + vlat[m1+1]*grad3[m1+1] + vlat[m1+2]*grad3[m1+2] )/area[m0];
-      grad_y[m0] *= RADIUS;
-    }
-
-  free(pb);
-  free(pdx);
-  free(pdy);
-  free(grad3);
-
-} /* grad_c2l */
-
-/*------------------------------------------------------------------------------
-  qin:  A-grid field, size (nx+2, ny+2)
-  qout: B-grid field, size (nx+1, ny+1)
-  ----------------------------------------------------------------------------*/
-void a2b_ord2(int nx, int ny, const double *qin, const double *edge_w, const double *edge_e,
-              const double *edge_s, const double *edge_n, double *qout,
-              int on_west_edge, int on_east_edge, int on_south_edge, int on_north_edge)
-{
-  int    nxp, nyp, i, j;
-  int    istart, iend, jstart, jend;
-  double *q1, *q2;
-  const double r3 = 1./3.;
-
-  nxp = nx+1;
-  nyp = ny+1;
-  q1 = (double *)malloc((nx+2)*sizeof(double));
-  q2 = (double *)malloc((ny+2)*sizeof(double));
-
-
-  if(on_west_edge)
-    istart = 1;
-  else
-    istart = 0;
-  if(on_east_edge)
-    iend = nx;
-  else
-    iend = nxp;
-  if(on_south_edge)
-    jstart = 1;
-  else
-    jstart = 0;
-  if(on_north_edge)
-    jend = ny;
-  else
-    jend = nyp;
-
-  /* internal region ( 1: nx-1, 1:ny-1) */
-  for(j=jstart; j<jend; j++) for(i=istart; i<iend; i++) {
-      qout[j*nxp+i] = 0.25*(qin[j*(nx+2)+i] + qin[j*(nx+2)+i+1] +
-                            qin[(j+1)*(nx+2)+i] + qin[(j+1)*(nx+2)+i+1] );
-    }
-
-  /* Fix the 4 Corners */
-  if(on_west_edge && on_south_edge)qout[        0] = r3*(qin[1* (nx+2)+1 ]+qin[1* (nx+2)    ]+qin[             1]); /* sw_corner */
-  if(on_east_edge && on_south_edge)qout[       nx] = r3*(qin[1* (nx+2)+nx]+qin[          nx ]+qin[1*  (nx+2)+nxp]); /* se_corner */
-  if(on_east_edge && on_north_edge)qout[ny*nxp+nx] = r3*(qin[ny*(nx+2)+nx]+qin[ny*(nx+2)+nxp]+qin[nyp*(nx+2)+nx ]); /* ne_corner */
-  if(on_west_edge && on_north_edge)qout[ny*nxp   ] = r3*(qin[ny*(nx+2)+1 ]+qin[ny*(nx+2)    ]+qin[nyp*(nx+2)+1  ]); /* nw_corner */
-
-  /* West Edges */
-  if(on_west_edge) {
-    for(j=jstart; j<=jend; j++){
-      q2[j] = 0.5*(qin[j*(nx+2)] + qin[j*(nx+2)+1]);
-    }
-    for(j=jstart; j<jend; j++){
-      qout[j*nxp] = edge_w[j]*q2[j] + (1-edge_w[j])*q2[j+1];
-    }
-  }
-
-  /* East Edges */
-  if(on_east_edge) {
-    for(j=jstart; j<=jend; j++){
-      q2[j] = 0.5*(qin[j*(nx+2)+nx] + qin[j*(nx+2)+nxp]);
-    }
-    for(j=jstart; j<jend; j++){
-      qout[j*nxp+nx] = edge_e[j]*q2[j] + (1-edge_e[j])*q2[j+1];
-    }
-  }
-
-  /* south edge */
-  if(on_south_edge) {
-    for(i=istart; i<=iend; i++){
-      q1[i] = 0.5*(qin[i] + qin[(nx+2)+i]);
-    }
-    for(i=istart; i<iend; i++){
-      qout[i] = edge_s[i]*q1[i] + (1 - edge_s[i])*q1[i+1];
-    }
-  }
-
-  /* north edge */
-  if(on_north_edge) {
-    for(i=istart; i<=iend; i++){
-      q1[i] = 0.5*(qin[ny*(nx+2)+i] + qin[nyp*(nx+2)+i]);
-    }
-    for(i=istart; i<iend; i++){
-      qout[ny*nxp+i] = edge_n[i]*q1[i] + (1 - edge_n[i])*q1[i+1];
-    }
-  }
-
-  free(q1);
-  free(q2);
-
-} /* a2b_ord2 */
-
-
-void get_edge(int nx, int ny, const double *lont, const double *latt,
-              const double *lonc, const double *latc, double *edge_w, double *edge_e, double *edge_s, double *edge_n,
-              int on_west_edge, int on_east_edge, int on_south_edge, int on_north_edge)
-{
-  int i, j, nxp, nyp;
-  int istart, iend, jstart, jend;
-  double p1[2], p2[2];
-  double *py, *px;
-  double d1, d2;
-
-  nxp = nx + 1;
-  nyp = ny + 1;
-
-  for(i=0; i<nxp; i++) {
-    edge_s[i] = 0.5; /* dummy value */
-    edge_n[i] = 0.5; /* dummy value */
-  }
-  for(j=0; j<nyp; j++) {
-    edge_w[j] = 0.5; /* dummy value */
-    edge_e[j] = 0.5; /* dummy value */
-  }
-
-  px = (double *)malloc(2*(nx+2)*sizeof(double));
-  py = (double *)malloc(2*(ny+2)*sizeof(double));
-
-  if(on_west_edge)
-    istart = 1;
-  else
-    istart = 0;
-  if(on_east_edge)
-    iend = nx;
-  else
-    iend = nxp;
-  if(on_south_edge)
-    jstart = 1;
-  else
-    jstart = 0;
-  if(on_north_edge)
-    jend = ny;
-  else
-    jend = nyp;
-  /* west edge */
-
-  if(on_west_edge) {
-    i=0;
-    for(j=jstart; j<=jend; j++) {
-      /* get mid point sphere */
-      p1[0] = lont[j*(nx+2)+i  ]; p1[1] = latt[j*(nx+2)+i  ];
-      p2[0] = lont[j*(nx+2)+i+1]; p2[1] = latt[j*(nx+2)+i+1];
-      mid_pt_sphere(p1, p2, &(py[2*j]));
-    }
-
-    for(j=jstart; j<jend; j++) {
-      p1[0] = lonc[j*nxp+i];
-      p1[1] = latc[j*nxp+i];
-      d1 = great_circle_distance(py+2*j, p1);
-      d2 = great_circle_distance(py+2*(j+1), p1);
-      edge_w[j] = d2/(d1+d2);
-    }
-  }
-  /* east edge */
-  if(on_east_edge) {
-    i=nx;
-    for(j=jstart; j<=jend; j++) {
-      /* get mid point sphere */
-      p1[0] = lont[j*(nx+2)+i  ]; p1[1] = latt[j*(nx+2)+i  ];
-      p2[0] = lont[j*(nx+2)+i+1]; p2[1] = latt[j*(nx+2)+i+1];
-      mid_pt_sphere(p1, p2, &(py[2*j]));
-    }
-
-    for(j=jstart; j<jend; j++) {
-      p1[0] = lonc[j*nxp+i];
-      p1[1] = latc[j*nxp+i];
-      d1 = great_circle_distance(&(py[2*j]), p1);
-      d2 = great_circle_distance(&(py[2*(j+1)]), p1);
-      edge_e[j] = d2/(d1+d2);
-    }
-  }
-
-  /* south edge */
-  if(on_south_edge) {
-    j=0;
-    for(i=istart; i<=iend; i++) {
-      p1[0] = lont[j    *(nx+2)+i]; p1[1] = latt[j    *(nx+2)+i];
-      p2[0] = lont[(j+1)*(nx+2)+i]; p2[1] = latt[(j+1)*(nx+2)+i];
-      mid_pt_sphere(p1, p2, &(px[2*i]));
-    }
-    for(i=istart; i<iend; i++) {
-      p1[0] = lonc[j*nxp+i];
-      p1[1] = latc[j*nxp+i];
-      d1 = great_circle_distance(&(px[2*i]), p1);
-      d2 = great_circle_distance(&(px[2*(i+1)]), p1);
-      edge_s[i] = d2/(d1+d2);
-    }
-  }
-  /* north edge */
-  if(on_north_edge) {
-    j=ny;
-    for(i=istart; i<=iend; i++) {
-      p1[0] = lont[j    *(nx+2)+i]; p1[1] = latt[j    *(nx+2)+i];
-      p2[0] = lont[(j+1)*(nx+2)+i]; p2[1] = latt[(j+1)*(nx+2)+i];
-      mid_pt_sphere(p1, p2, &(px[2*i]));
-    }
-    for(i=istart; i<iend; i++) {
-      p1[0] = lonc[j*nxp+i];
-      p1[1] = latc[j*nxp+i];
-      d1 = great_circle_distance(&(px[2*i]), p1);
-      d2 = great_circle_distance(&(px[2*(i+1)]), p1);
-      edge_n[i] = d2/(d1+d2);
-    }
-  }
-
-  free(px);
-  free(py);
-
-} /* get_edge */
-
-void mid_pt_sphere(const double *p1, const double *p2, double *pm)
-{
-  double e1[3], e2[3], e3[3];
-
-  latlon2xyz(1, p1, p1+1, e1, e1+1, e1+2);
-  latlon2xyz(1, p2, p2+1, e2, e2+1, e2+2);
-  mid_pt3_cart(e1, e2, e3);
-  xyz2latlon(1, e3, e3+1, e3+2, pm, pm+1);
-
-}
-
-void mid_pt3_cart(const double *p1, const double *p2, double *e)
-{
-  double dd;
-
-  e[0] = p1[0] + p2[0];
-  e[1] = p1[1] + p2[1];
-  e[2] = p1[2] + p2[2];
-  dd = sqrt( e[0]*e[0] + e[1]*e[1] + e[2]*e[2] );
-  e[0] /= dd;
-  e[1] /= dd;
-  e[2] /= dd;
-}
-
-/**********************************************************************************************
-  This routine is used to calculate grid information for second order conservative interpolation
-  from cubic grid to other grid
-  the size of xt     will be (nx+2,ny+2), T-cell center, with halo = 1
-  the size of yt     will be (nx+2,ny+2), T-cell center, with halo = 1
-  the size of xc     will be (nx+1,ny+1), C-cell center
-  the size of yc     will be (nx+1,ny+1), C-cell center
-  the size of dx     will be (nx, ny+1),  N-cell center
-  the size of dy     will be (nx+1, ny),  E-cell center
-  the size of area   will be (nx, ny),    T-cell center.
-  The size of edge_w will be (ny-1),      C-cell center, without two end point
-  The size of edge_e will be (ny-1),      C-cell center, without two end point
-  The size of edge_s will be (nx-1),      C-cell center, without two end point
-  The size of edge_n will be (nx-1),      C-cell center, without two end point
-  The size of en_n   will be (nx, ny+1,3),N-cell center
-  The size of en_e   will be (nx+1,ny,3), E-cell center
-  The size of vlon   will be (nx, ny)     T-cell center
-  The size of vlat   will be (nx, ny),    T-cell center
-**********************************************************************************************/
-void calc_c2l_grid_info_(int *nx_pt, int *ny_pt, const double *xt, const double *yt, const double *xc, const double *yc,
-                         double *dx, double *dy, double *area, double *edge_w, double *edge_e, double *edge_s,
-                         double *edge_n, double *en_n, double *en_e, double *vlon, double *vlat,
-                         int *on_west_edge, int *on_east_edge, int *on_south_edge, int *on_north_edge)
-{
-  calc_c2l_grid_info(nx_pt, ny_pt, xt, yt, xc, yc, dx, dy, area, edge_w, edge_e, edge_s, edge_n,
-                     en_n, en_e, vlon, vlat, on_west_edge, on_east_edge, on_south_edge, on_north_edge);
-
-}
-
-void calc_c2l_grid_info(int *nx_pt, int *ny_pt, const double *xt, const double *yt, const double *xc, const double *yc,
-                        double *dx, double *dy, double *area, double *edge_w, double *edge_e, double *edge_s,
-                        double *edge_n, double *en_n, double *en_e, double *vlon, double *vlat,
-                        int *on_west_edge, int *on_east_edge, int *on_south_edge, int *on_north_edge)
-{
-  double *x, *y, *z, *xt_tmp, *yt_tmp;
-  int    nx, ny, nxp, nyp, i, j;
-  double p1[3], p2[3], p3[3], p4[3];
-
-
-  nx  = *nx_pt;
-  ny  = *ny_pt;
-  nxp = nx+1;
-  nyp = ny+1;
-
-  for(j=0; j<nyp; j++) for(i=0; i<nx; i++) {
-      p1[0] = xc[j*nxp+i];
-      p1[1] = yc[j*nxp+i];
-      p2[0] = xc[j*nxp+i+1];
-      p2[1] = yc[j*nxp+i+1];
-      dx[j*nx+i] = great_circle_distance(p1, p2);
-    }
-
-  for(j=0; j<ny; j++) for(i=0; i<nxp; i++) {
-      p1[0] = xc[j*nxp+i];
-      p1[1] = yc[j*nxp+i];
-      p2[0] = xc[(j+1)*nxp+i];
-      p2[1] = yc[(j+1)*nxp+i];
-      dy[j*nxp+i] = great_circle_distance(p1, p2);
-    }
-
-  for(j=0; j<ny; j++) for(i=0; i<nx; i++) {
-      p1[0] = xc[j*nxp+i];       /* ll lon */
-      p1[1] = yc[j*nxp+i];       /* ll lat */
-      p2[0] = xc[(j+1)*nxp+i];   /* ul lon */
-      p2[1] = yc[(j+1)*nxp+i];   /* ul lat */
-      p3[0] = xc[j*nxp+i+1];     /* lr lon */
-      p3[1] = yc[j*nxp+i+1];     /* lr lat */
-      p4[0] = xc[(j+1)*nxp+i+1]; /* ur lon */
-      p4[1] = yc[(j+1)*nxp+i+1]; /* ur lat */
-      area[j*nx+i] = spherical_excess_area(p1, p2, p3, p4, RADIUS);
-    }
-
-  x = (double *)malloc(nxp*nyp*sizeof(double));
-  y = (double *)malloc(nxp*nyp*sizeof(double));
-  z = (double *)malloc(nxp*nyp*sizeof(double));
-
-  latlon2xyz(nxp*nyp, xc, yc, x, y, z);
-  for(j=0; j<nyp; j++) for(i=0; i<nx; i++) {
-      p1[0] = x[j*nxp+i];
-      p1[1] = y[j*nxp+i];
-      p1[2] = z[j*nxp+i];
-      p2[0] = x[j*nxp+i+1];
-      p2[1] = y[j*nxp+i+1];
-      p2[2] = z[j*nxp+i+1];
-      vect_cross(p1, p2, en_n+3*(j*nx+i) );
-      normalize_vect(en_n+3*(j*nx+i));
-    }
-
-  for(j=0; j<ny; j++) for(i=0; i<nxp; i++) {
-      p2[0] = x[j*nxp+i];
-      p2[1] = y[j*nxp+i];
-      p2[2] = z[j*nxp+i];
-      p1[0] = x[(j+1)*nxp+i];
-      p1[1] = y[(j+1)*nxp+i];
-      p1[2] = z[(j+1)*nxp+i];
-      vect_cross(p1, p2, en_e+3*(j*nxp+i) );
-      normalize_vect(en_e+3*(j*nxp+i));
-    }
-
-  xt_tmp = (double *)malloc(nx*ny*sizeof(double));
-  yt_tmp = (double *)malloc(nx*ny*sizeof(double));
-  for(j=0; j<ny; j++)for(i=0; i<nx; i++) {
-      xt_tmp[j*nx+i] = xt[(j+1)*(nx+2)+i+1];
-      yt_tmp[j*nx+i] = yt[(j+1)*(nx+2)+i+1];
-    }
-  unit_vect_latlon(nx*ny, xt_tmp, yt_tmp, vlon, vlat);
-  get_edge(nx, ny, xt, yt, xc, yc, edge_w, edge_e, edge_s, edge_n, *on_west_edge, *on_east_edge,
-           *on_south_edge, *on_north_edge);
-
-  free(x);
-  free(y);
-  free(z);
-  free(xt_tmp);
-  free(yt_tmp);
-
-}
diff --git a/mosaic/gradient_c2l.h b/mosaic/gradient_c2l.h
deleted file mode 100644
index 67810914c4..0000000000
--- a/mosaic/gradient_c2l.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/***********************************************************************
- *                   GNU Lesser General Public License
- *
- * This file is part of the GFDL Flexible Modeling System (FMS).
- *
- * FMS is free software: you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * FMS is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
- **********************************************************************/
-#ifndef GRADIENT_H_
-#define GRADIENT_H_
-
-void a2b_ord2(int nx, int ny, const double *qin, const double *edge_w, const double *edge_e,
-              const double *edge_s, const double *edge_n, double *qout,
-              int on_west_edge, int on_east_edge, int on_south_edge, int on_north_edge);
-
-void grad_c2l(const int *nlon, const int *nlat, const double *pin, const double *dx, const double *dy, const double *area,
-              const double *edge_w, const double *edge_e, const double *edge_s, const double *edge_n,
-              const double *en_n, const double *en_e, const double *vlon, const double *vlat,
-              double *grad_x, double *grad_y, const int *on_west_edge, const int *on_east_edge,
-              const int *on_south_edge, const int *on_north_edge);
-
-void grad_c2l_(const int *nlon, const int *nlat, const double *pin, const double *dx, const double *dy, const double *area,
-               const double *edge_w, const double *edge_e, const double *edge_s, const double *edge_n,
-               const double *en_n, const double *en_e, const double *vlon, const double *vlat,
-               double *grad_x, double *grad_y, const int *on_west_edge, const int *on_east_edge,
-               const int *on_south_edge, const int *on_north_edge);
-
-void calc_c2l_grid_info(int *nx_pt, int *ny_pt, const double *xt, const double *yt, const double *xc, const double *yc,
-                        double *dx, double *dy, double *area, double *edge_w, double *edge_e, double *edge_s,
-                        double *edge_n, double *en_n, double *en_e, double *vlon, double *vlat,
-                        int *on_west_edge, int *on_east_edge, int *on_south_edge, int *on_north_edge);
-
-void calc_c2l_grid_info_(int *nx_pt, int *ny_pt, const double *xt, const double *yt, const double *xc, const double *yc,
-                         double *dx, double *dy, double *area, double *edge_w, double *edge_e, double *edge_s,
-                         double *edge_n, double *en_n, double *en_e, double *vlon, double *vlat,
-                         int *on_west_edge, int *on_east_edge, int *on_south_edge, int *on_north_edge);
-
-void get_edge(int nx, int ny, const double *lont, const double *latt,
-              const double *lonc, const double *latc, double *edge_w, double *edge_e, double *edge_s, double *edge_n,
-              int on_west_edge, int on_east_edge, int on_south_edge, int on_north_edge );
-
-void mid_pt_sphere(const double *p1, const double *p2, double *pm);
-
-void mid_pt3_cart(const double *p1, const double *p2, double *e);
-
-#endif
diff --git a/mosaic/grid.F90 b/mosaic/grid.F90
deleted file mode 100644
index 6c94e1b733..0000000000
--- a/mosaic/grid.F90
+++ /dev/null
@@ -1,1036 +0,0 @@
-!***********************************************************************
-!*                   GNU Lesser General Public License
-!*
-!* This file is part of the GFDL Flexible Modeling System (FMS).
-!*
-!* FMS is free software: you can redistribute it and/or modify it under
-!* the terms of the GNU Lesser General Public License as published by
-!* the Free Software Foundation, either version 3 of the License, or (at
-!* your option) any later version.
-!*
-!* FMS is distributed in the hope that it will be useful, but WITHOUT
-!* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-!* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-!* for more details.
-!*
-!* You should have received a copy of the GNU Lesser General Public
-!* License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
-!***********************************************************************
-!> @defgroup grid_mod grid_mod
-!> @ingroup mosaic
-!> @brief Routines for grid calculations
-
-module grid_mod
-
-use mpp_mod, only : mpp_root_pe, uppercase, lowercase, FATAL, NOTE, mpp_error
-use constants_mod, only : PI, radius
-use fms_io_mod, only : get_great_circle_algorithm, get_global_att_value, string, &
-                       field_exist, field_size, read_data
-use mosaic_mod, only : get_mosaic_ntiles, get_mosaic_xgrid_size, get_mosaic_grid_sizes, &
-     get_mosaic_xgrid, calc_mosaic_grid_area, calc_mosaic_grid_great_circle_area
-
-! the following two use statement are only needed for define_cube_mosaic
-use mpp_domains_mod, only : domain2d, mpp_define_mosaic, mpp_get_compute_domain, &
-                            mpp_get_global_domain, domainUG, mpp_pass_SG_to_UG
-use mosaic_mod, only : get_mosaic_ncontacts, get_mosaic_contact
-
-implicit none;private
-
-! ==== public interfaces =====================================================
-! grid dimension inquiry subroutines
-public :: get_grid_ntiles
-public :: get_grid_size
-! grid geometry inquiry subroutines
-public :: get_grid_cell_centers
-public :: get_grid_cell_vertices
-! grid area inquiry subroutines
-public :: get_grid_cell_area
-public :: get_grid_comp_area
-! decompose cubed sphere domains -- probably does not belong here, but it should
-! be in some place available for component models
-public :: define_cube_mosaic
-! ==== end of public interfaces ==============================================
-
-!> returns horizontal sizes of the grid
-!> @ingroup grid_mod
-interface get_grid_size
-   module procedure get_grid_size_for_all_tiles
-   module procedure get_grid_size_for_one_tile
-end interface
-!> returns number of tiles
-!> @ingroup grid_mod
-interface get_grid_cell_vertices
-   module procedure get_grid_cell_vertices_1D
-   module procedure get_grid_cell_vertices_2D
-   module procedure get_grid_cell_vertices_UG
-end interface
-
-!> @ingroup grid_mod
-interface get_grid_cell_centers
-   module procedure get_grid_cell_centers_1D
-   module procedure get_grid_cell_centers_2D
-   module procedure get_grid_cell_centers_UG
-end interface
-
-!> @ingroup grid_mod
-interface get_grid_cell_area
-   module procedure get_grid_cell_area_SG
-   module procedure get_grid_cell_area_UG
-end interface get_grid_cell_area
-
-!> @ingroup grid_mod
-interface get_grid_comp_area
-   module procedure get_grid_comp_area_SG
-   module procedure get_grid_comp_area_UG
-end interface get_grid_comp_area
-
-!> @addtogroup grid_mod
-!> @{
-
-! ==== module constants ======================================================
-character(len=*), parameter :: &
-     module_name = 'grid_mod'
-
-! Include variable "version" to be written to log file.
-#include<file_version.h>
-
-character(len=*), parameter :: &
-     grid_dir  = 'INPUT/',     &      !< root directory for all grid files
-     grid_file = 'INPUT/grid_spec.nc' !< name of the grid spec file
-
-integer, parameter :: &
-     MAX_NAME = 256,  & !< max length of the variable names
-     MAX_FILE = 1024, & !< max length of the file names
-     VERSION_0 = 0,   &
-     VERSION_1 = 1,   &
-     VERSION_2 = 2
-
-integer, parameter :: BUFSIZE = 1048576  !< This is used to control memory usage in get_grid_comp_area
-                                         !! We may change this to a namelist variable is needed.
-
-! ==== module variables ======================================================
-integer :: grid_version = -1
-logical :: great_circle_algorithm = .FALSE.
-logical :: first_call = .TRUE.
-
-
-contains
-
-function get_grid_version()
-  integer :: get_grid_version
-
-  if(first_call) then
-     great_circle_algorithm = get_great_circle_algorithm()
-     first_call = .FALSE.
-  endif
-
-  if(grid_version<0) then
-    if(field_exist(grid_file, 'geolon_t')) then
-       grid_version = VERSION_0
-    else if(field_exist(grid_file, 'x_T')) then
-       grid_version = VERSION_1
-    else if(field_exist(grid_file, 'ocn_mosaic_file') ) then
-       grid_version = VERSION_2
-    else
-       call mpp_error(FATAL, module_name//&
-                    & '/get_grid_version: Can''t determine the version of the grid spec:'// &
-                    & ' none of "x_T", "geolon_t", or "ocn_mosaic_file" exist in file "'//trim(grid_file)//'"')
-    endif
-  endif
-  get_grid_version = grid_version
-end function get_grid_version
-
-
-! ============================================================================
-! ============================================================================
-!> Returns number of tiles for a given component
-subroutine get_grid_ntiles(component,ntiles)
-  character(len=*)     :: component
-  integer, intent(out) :: ntiles
-
-  ! local vars
-  character(len=MAX_FILE) :: component_mosaic
-
-  select case (get_grid_version())
-  case(VERSION_0,VERSION_1)
-     ntiles = 1
-  case(VERSION_2)
-     call read_data(grid_file,trim(lowercase(component))//'_mosaic_file',component_mosaic)
-     ntiles = get_mosaic_ntiles(grid_dir//trim(component_mosaic))
-  end select
-end subroutine get_grid_ntiles
-
-
-! ============================================================================
-! ============================================================================
-!> Returns size of the grid for each of the tiles
-subroutine get_grid_size_for_all_tiles(component,nx,ny)
-  character(len=*)     :: component
-  integer, intent(inout) :: nx(:),ny(:)
-
-  ! local vars
-  integer :: siz(4) ! for the size of external fields
-  character(len=MAX_NAME) :: varname1, varname2
-  character(len=MAX_FILE) :: component_mosaic
-
-  varname1 = 'AREA_'//trim(uppercase(component))
-  varname2 = trim(lowercase(component))//'_mosaic_file'
-
-  select case (get_grid_version())
-  case(VERSION_0,VERSION_1)
-     call field_size(grid_file, varname1, siz)
-     nx(1) = siz(1); ny(1)=siz(2)
-  case(VERSION_2) ! mosaic file
-     call read_data(grid_file,varname2, component_mosaic)
-     call get_mosaic_grid_sizes(grid_dir//trim(component_mosaic),nx,ny)
-  end select
-end subroutine get_grid_size_for_all_tiles
-
-
-! ============================================================================
-! ============================================================================
-!> Returns size of the grid for one of the tiles
-subroutine get_grid_size_for_one_tile(component,tile,nx,ny)
-  character(len=*)       :: component
-  integer, intent(in)    :: tile
-  integer, intent(inout) :: nx,ny
-
-  ! local vars
-  integer, allocatable :: nnx(:), nny(:)
-  integer :: ntiles
-
-  call get_grid_ntiles(component, ntiles)
-  if(tile>0.and.tile<=ntiles) then
-     allocate(nnx(ntiles),nny(ntiles))
-     call get_grid_size_for_all_tiles(component,nnx,nny)
-     nx = nnx(tile); ny = nny(tile)
-     deallocate(nnx,nny)
-  else
-     call mpp_error(FATAL, 'get_grid_size: requested tile index '// &
-                    & trim(string(tile))//' is out of bounds (1:'//trim(string(ntiles))//')')
-  endif
-end subroutine get_grid_size_for_one_tile
-
-! ============================================================================
-! ============================================================================
-!> Return grid cell area for the specified model component and tile
-subroutine get_grid_cell_area_SG(component, tile, cellarea, domain)
-  character(len=*), intent(in)    :: component
-  integer         , intent(in)    :: tile
-  real            , intent(inout) :: cellarea(:,:)
-  type(domain2d)  , intent(in), optional :: domain
-
-  ! local vars
-  integer :: nlon, nlat
-  real, allocatable :: glonb(:,:), glatb(:,:)
-
-  select case(get_grid_version())
-  case(VERSION_0,VERSION_1)
-     select case(trim(component))
-     case('LND')
-        call read_data(grid_file, 'AREA_LND_CELL', cellarea, &
-            no_domain=.not.present(domain), domain=domain)
-     case('ATM','OCN')
-        call read_data(grid_file, 'AREA_'//trim(uppercase(component)),cellarea,&
-            no_domain=.not.present(domain),domain=domain)
-     case default
-        call mpp_error(FATAL, module_name//'/get_grid_cell_area: Illegal component name "'//trim(component) &
-                            & //'": must be one of ATM, LND, or OCN')
-     end select
-     ! convert area to m2
-     cellarea = cellarea*4.*PI*radius**2
-  case(VERSION_2)
-     if (present(domain)) then
-        call mpp_get_compute_domain(domain,xsize=nlon,ysize=nlat)
-     else
-        call get_grid_size(component,tile,nlon,nlat)
-     endif
-     allocate(glonb(nlon+1,nlat+1),glatb(nlon+1,nlat+1))
-     call get_grid_cell_vertices(component, tile, glonb, glatb, domain)
-     if (great_circle_algorithm) then
-        call calc_mosaic_grid_great_circle_area(glonb*pi/180.0, glatb*pi/180.0, cellarea)
-     else
-        call calc_mosaic_grid_area(glonb*pi/180.0, glatb*pi/180.0, cellarea)
-     end if
-     deallocate(glonb,glatb)
-  end select
-
-end subroutine get_grid_cell_area_SG
-
-! ============================================================================
-! ============================================================================
-!> Get the area of the component per grid cell
-subroutine get_grid_comp_area_SG(component,tile,area,domain)
-  character(len=*) :: component
-  integer, intent(in) :: tile
-  real, intent(inout) :: area(:,:)
-  type(domain2d), intent(in), optional :: domain
-  ! local vars
-  integer :: n_xgrid_files ! number of exchange grid files in the mosaic
-  integer :: siz(4), nxgrid
-  integer :: i,j,m,n
-  integer, allocatable :: i1(:), j1(:), i2(:), j2(:)
-  real, allocatable :: xgrid_area(:)
-  real, allocatable :: rmask(:,:)
-  character(len=MAX_NAME) :: &
-     xgrid_name, & ! name of the variable holding xgrid names
-     tile_name,  & ! name of the tile
-     xgrid_file, & ! name of the current xgrid file
-     mosaic_name,& ! name of the mosaic
-     mosaic_file,&
-     tilefile
-  character(len=4096)     :: attvalue
-  character(len=MAX_NAME), allocatable :: nest_tile_name(:)
-  integer :: is,ie,js,je ! boundaries of our domain
-  integer :: i0, j0 ! offsets for x and y, respectively
-  integer :: num_nest_tile, ntiles
-  logical :: is_nest
-  integer :: found_xgrid_files ! how many xgrid files we actually found in the grid spec
-  integer :: ibegin, iend, bsize, l
-
-  select case (get_grid_version())
-  case(VERSION_0,VERSION_1)
-     select case(component)
-     case('ATM')
-        call read_data(grid_file,'AREA_ATM',area, no_domain=.not.present(domain),domain=domain)
-     case('OCN')
-        allocate(rmask(size(area,1),size(area,2)))
-        call read_data(grid_file,'AREA_OCN',area, no_domain=.not.present(domain),domain=domain)
-        call read_data(grid_file,'wet',     rmask,no_domain=.not.present(domain),domain=domain)
-        area = area*rmask
-        deallocate(rmask)
-     case('LND')
-        call read_data(grid_file,'AREA_LND',area,no_domain=.not.present(domain),domain=domain)
-     case default
-        call mpp_error(FATAL, module_name// &
-              & '/get_grid_comp_area: Illegal component name "'//trim(component)//'": must be one of ATM, LND, or OCN')
-     end select
-  case(VERSION_2) ! mosaic gridspec
-     select case (component)
-     case ('ATM')
-        ! just read the grid cell area and return
-        call get_grid_cell_area(component,tile,area)
-        return
-     case ('LND')
-        xgrid_name = 'aXl_file'
-        call read_data(grid_file, 'lnd_mosaic', mosaic_name)
-        tile_name  = trim(mosaic_name)//'_tile'//char(tile+ichar('0'))
-     case ('OCN')
-        xgrid_name = 'aXo_file'
-        call read_data(grid_file, 'ocn_mosaic', mosaic_name)
-        tile_name  = trim(mosaic_name)//'_tile'//char(tile+ichar('0'))
-     case default
-        call mpp_error(FATAL, module_name// &
-              & '/get_grid_comp_area: Illegal component name "'//trim(component)//'": must be one of ATM, LND, or OCN')
-     end select
-     ! get the boundaries of the requested domain
-     if(present(domain)) then
-        call mpp_get_compute_domain(domain,is,ie,js,je)
-        i0 = 1-is ; j0=1-js
-     else
-        call get_grid_size(component,tile,ie,je)
-        is = 1 ; i0 = 0
-        js = 1 ; j0 = 0
-     endif
-     if (size(area,1)/=ie-is+1.or.size(area,2)/=je-js+1) &
-        call mpp_error(FATAL, module_name// &
-                       & '/get_grid_comp_area: size of the output argument "area" is not consistent with the domain')
-
-     ! find the nest tile
-     call read_data(grid_file, 'atm_mosaic', mosaic_name)
-     call read_data(grid_file,'atm_mosaic_file',mosaic_file)
-     mosaic_file = grid_dir//trim(mosaic_file)
-     ntiles = get_mosaic_ntiles(trim(mosaic_file))
-     allocate(nest_tile_name(ntiles))
-     num_nest_tile = 0
-     do n = 1, ntiles
-        call read_data(mosaic_file, 'gridfiles', tilefile, level=n)
-        tilefile = grid_dir//trim(tilefile)
-        if( get_global_att_value(tilefile, "nest_grid", attvalue) ) then
-           if(trim(attvalue) == "TRUE") then
-              num_nest_tile = num_nest_tile + 1
-              nest_tile_name(num_nest_tile) = trim(mosaic_name)//'_tile'//char(n+ichar('0'))
-           else if(trim(attvalue) .NE. "FALSE") then
-              call mpp_error(FATAL, module_name//'/get_grid_comp_area: value of global attribute nest_grid in file'// &
-                             &  trim(tilefile)//' should be TRUE of FALSE')
-           endif
-        end if
-     end do
-     area(:,:) = 0.
-     if(field_exist(grid_file,xgrid_name)) then
-        ! get the number of the exchange-grid files
-        call field_size(grid_file,xgrid_name,siz)
-        n_xgrid_files = siz(2)
-        found_xgrid_files = 0
-        ! loop through all exchange grid files
-        do n = 1, n_xgrid_files
-           ! get the name of the current exchange grid file
-           call read_data(grid_file,xgrid_name,xgrid_file,level=n)
-           ! skip the rest of the loop if the name of the current tile isn't found
-           ! in the file name, but check this only if there is more than 1 tile
-           if(n_xgrid_files>1) then
-              if(index(xgrid_file,trim(tile_name))==0) cycle
-           endif
-           found_xgrid_files = found_xgrid_files + 1
-           !---make sure the atmosphere grid is not a nested grid
-           is_nest = .false.
-           do m = 1, num_nest_tile
-              if(index(xgrid_file, trim(nest_tile_name(m))) .NE. 0) then
-                 is_nest = .true.
-                 exit
-              end if
-           end do
-           if(is_nest) cycle
-
-           ! finally read the exchange grid
-           nxgrid = get_mosaic_xgrid_size(grid_dir//xgrid_file)
-           if(nxgrid < BUFSIZE) then
-              allocate(i1(nxgrid), j1(nxgrid), i2(nxgrid), j2(nxgrid), xgrid_area(nxgrid))
-           else
-              allocate(i1(BUFSIZE), j1(BUFSIZE), i2(BUFSIZE), j2(BUFSIZE), xgrid_area(BUFSIZE))
-           endif
-           ibegin = 1
-           do l = 1,nxgrid,BUFSIZE
-              bsize = min(BUFSIZE, nxgrid-l+1)
-              iend = ibegin + bsize - 1
-              call get_mosaic_xgrid(grid_dir//xgrid_file, i1(1:bsize), j1(1:bsize), i2(1:bsize), j2(1:bsize), &
-                                    xgrid_area(1:bsize), ibegin, iend)
-              ! and sum the exchange grid areas
-              do m = 1, bsize
-                 i = i2(m); j = j2(m)
-                 if (i<is.or.i>ie) cycle
-                 if (j<js.or.j>je) cycle
-                 area(i+i0,j+j0) = area(i+i0,j+j0) + xgrid_area(m)
-              end do
-              ibegin = iend + 1
-           enddo
-           deallocate(i1, j1, i2, j2, xgrid_area)
-        enddo
-        if (found_xgrid_files == 0) &
-           call mpp_error(FATAL, 'get_grid_comp_area: no xgrid files were found for component '// &
-                          & trim(component)//' (mosaic name is '//trim(mosaic_name)//')')
-
-     endif
-     deallocate(nest_tile_name)
-  end select ! version
-  ! convert area to m2
-  area = area*4.*PI*radius**2
-end subroutine get_grid_comp_area_SG
-
-!======================================================================
-subroutine get_grid_cell_area_UG(component, tile, cellarea, SG_domain, UG_domain)
-  character(len=*),   intent(in)    :: component
-  integer         ,   intent(in)    :: tile
-  real            ,   intent(inout) :: cellarea(:)
-  type(domain2d)  ,   intent(in)    :: SG_domain
-  type(domainUG)  ,   intent(in)    :: UG_domain
-  integer :: is, ie, js, je
-  real, allocatable :: SG_area(:,:)
-
-  call mpp_get_compute_domain(SG_domain, is, ie, js, je)
-  allocate(SG_area(is:ie, js:je))
-  call get_grid_cell_area_SG(component, tile, SG_area, SG_domain)
-  call mpp_pass_SG_to_UG(UG_domain, SG_area, cellarea)
-  deallocate(SG_area)
-
-end subroutine get_grid_cell_area_UG
-
-subroutine get_grid_comp_area_UG(component, tile, area, SG_domain, UG_domain)
-  character(len=*),   intent(in)    :: component
-  integer         ,   intent(in)    :: tile
-  real            ,   intent(inout) :: area(:)
-  type(domain2d)  ,   intent(in)    :: SG_domain
-  type(domainUG)  ,   intent(in)    :: UG_domain
-  integer :: is, ie, js, je
-  real, allocatable :: SG_area(:,:)
-
-  call mpp_get_compute_domain(SG_domain, is, ie, js, je)
-  allocate(SG_area(is:ie, js:je))
-  call get_grid_comp_area_SG(component, tile, SG_area, SG_domain)
-  call mpp_pass_SG_to_UG(UG_domain, SG_area, area)
-  deallocate(SG_area)
-
-end subroutine get_grid_comp_area_UG
-
-
-! ============================================================================
-! ============================================================================
-!> Returns arrays of global grid cell boundaries for given model component and
-!! mosaic tile number.
-!!
-!> @note In the case of non-lat-lon grid the returned coordinates may have be not so
-!! meaningful, by the very nature of such grids. But presumably these 1D coordinate
-!! arrays are good enough for diag axis and such.
-subroutine get_grid_cell_vertices_1D(component, tile, glonb, glatb)
-  character(len=*), intent(in) :: component
-  integer,          intent(in) :: tile
-  real,          intent(inout) :: glonb(:),glatb(:)
-
-  integer                      :: nlon, nlat
-  integer                      :: start(4), nread(4)
-  real, allocatable            :: tmp(:,:), x_vert_t(:,:,:), y_vert_t(:,:,:)
-  character(len=MAX_FILE)      :: filename1, filename2
-
-  call get_grid_size_for_one_tile(component, tile, nlon, nlat)
-  if (size(glonb(:))/=nlon+1) &
-       call mpp_error (FATAL,  module_name// &
-                       & '/get_grid_cell_vertices_1D: Size of argument "glonb" is not consistent with the grid size')
-  if (size(glatb(:))/=nlat+1) &
-       call mpp_error (FATAL, module_name// &
-                       & '/get_grid_cell_vertices_1D: Size of argument "glatb" is not consistent with the grid size')
-  if(trim(component) .NE. 'ATM' .AND. component .NE. 'LND' .AND. component .NE. 'OCN') then
-     call mpp_error(FATAL, module_name//'/get_grid_cell_vertices_1D: Illegal component name "'// &
-                    & trim(component)//'": must be one of ATM, LND, or OCN')
-  endif
-
-  select case(get_grid_version())
-  case(VERSION_0)
-     select case(trim(component))
-     case('ATM','LND')
-        call read_data(grid_file, 'xb'//lowercase(component(1:1)), glonb, no_domain=.true.)
-        call read_data(grid_file, 'yb'//lowercase(component(1:1)), glatb, no_domain=.true.)
-     case('OCN')
-        call read_data(grid_file, "gridlon_vert_t", glonb, no_domain=.true.)
-        call read_data(grid_file, "gridlat_vert_t", glatb, no_domain=.true.)
-     end select
-  case(VERSION_1)
-     select case(trim(component))
-     case('ATM','LND')
-        call read_data(grid_file, 'xb'//lowercase(component(1:1)), glonb, no_domain=.true.)
-        call read_data(grid_file, 'yb'//lowercase(component(1:1)), glatb, no_domain=.true.)
-     case('OCN')
-        allocate (x_vert_t(nlon,1,2), y_vert_t(1,nlat,2) )
-        start = 1; nread = 1
-        nread(1) = nlon; nread(2) = 1; start(3) = 1
-        call read_data(grid_file, "x_vert_T", x_vert_t(:,:,1), start, nread, no_domain=.TRUE.)
-        nread(1) = nlon; nread(2) = 1; start(3) = 2
-        call read_data(grid_file, "x_vert_T", x_vert_t(:,:,2), start, nread, no_domain=.TRUE.)
-
-        nread(1) = 1; nread(2) = nlat; start(3) = 1
-        call read_data(grid_file, "y_vert_T", y_vert_t(:,:,1), start, nread, no_domain=.TRUE.)
-        nread(1) = 1; nread(2) = nlat; start(3) = 4
-        call read_data(grid_file, "y_vert_T", y_vert_t(:,:,2), start, nread, no_domain=.TRUE.)
-        glonb(1:nlon) = x_vert_t(1:nlon,1,1)
-        glonb(nlon+1) = x_vert_t(nlon,1,2)
-        glatb(1:nlat) = y_vert_t(1,1:nlat,1)
-        glatb(nlat+1) = y_vert_t(1,nlat,2)
-        deallocate(x_vert_t, y_vert_t)
-     end select
-  case(VERSION_2)
-     ! get the name of the mosaic file for the component
-     call read_data(grid_file, trim(lowercase(component))//'_mosaic_file', filename1)
-     filename1=grid_dir//trim(filename1)
-     ! get the name of the grid file for the component and tile
-     call read_data(filename1, 'gridfiles', filename2, level=tile)
-     filename2 = grid_dir//trim(filename2)
-
-     start = 1; nread = 1
-     nread(1) = 2*nlon+1
-     allocate( tmp(2*nlon+1,1) )
-     call read_data(filename2, "x", tmp, start, nread, no_domain=.TRUE.)
-     glonb(1:nlon+1) = tmp(1:2*nlon+1:2,1)
-     deallocate(tmp)
-     allocate(tmp(1,2*nlat+1))
-
-     start = 1; nread = 1
-     nread(2) = 2*nlat+1
-     call read_data(filename2, "y", tmp, start, nread, no_domain=.TRUE.)
-     glatb(1:nlat+1) = tmp(1,1:2*nlat+1:2)
-     deallocate(tmp)
-  end select
-
-end subroutine get_grid_cell_vertices_1D
-
-! ============================================================================
-! ============================================================================
-!> Returns cell vertices for the specified model component and mosaic tile number
-subroutine get_grid_cell_vertices_2D(component, tile, lonb, latb, domain)
-  character(len=*),         intent(in) :: component
-  integer,                  intent(in) :: tile
-  real,                  intent(inout) :: lonb(:,:),latb(:,:)
-  type(domain2d), optional, intent(in) :: domain
-
-  ! local vars
-  character(len=MAX_FILE) :: filename1, filename2
-  integer :: nlon, nlat
-  integer :: i,j
-  real, allocatable :: buffer(:), tmp(:,:), x_vert_t(:,:,:), y_vert_t(:,:,:)
-  integer :: is,ie,js,je ! boundaries of our domain
-  integer :: i0,j0 ! offsets for coordinates
-  integer :: isg, jsg
-  integer :: start(4), nread(4)
-
-  call get_grid_size_for_one_tile(component, tile, nlon, nlat)
-  if (present(domain)) then
-    call mpp_get_compute_domain(domain,is,ie,js,je)
-  else
-    is = 1 ; ie = nlon
-    js = 1 ; je = nlat
-    !--- domain normally should be present
-    call mpp_error (NOTE, module_name//'/get_grid_cell_vertices: domain is not present, global data will be read')
-  endif
-  i0 = -is+1; j0 = -js+1
-
-  ! verify that lonb and latb sizes are consistent with the size of domain
-  if (size(lonb,1)/=ie-is+2.or.size(lonb,2)/=je-js+2) &
-       call mpp_error (FATAL, module_name// &
-                       & '/get_grid_cell_vertices: Size of argument "lonb" is not consistent with the domain size')
-  if (size(latb,1)/=ie-is+2.or.size(latb,2)/=je-js+2) &
-       call mpp_error (FATAL, module_name// &
-                       & '/get_grid_cell_vertices: Size of argument "latb" is not consistent with the domain size')
-  if(trim(component) .NE. 'ATM' .AND. component .NE. 'LND' .AND. component .NE. 'OCN') then
-     call mpp_error(FATAL, module_name//'/get_grid_cell_vertices: Illegal component name "'// &
-                    & trim(component)//'": must be one of ATM, LND, or OCN')
-  endif
-
-  select case(get_grid_version())
-  case(VERSION_0)
-     select case(component)
-     case('ATM','LND')
-        allocate(buffer(max(nlon,nlat)+1))
-        ! read coordinates of grid cell vertices
-        call read_data(grid_file, 'xb'//lowercase(component(1:1)), buffer(1:nlon+1), no_domain=.true.)
-        do j = js, je+1
-           do i = is, ie+1
-              lonb(i+i0,j+j0) = buffer(i)
-           enddo
-        enddo
-        call read_data(grid_file, 'yb'//lowercase(component(1:1)), buffer(1:nlat+1), no_domain=.true.)
-        do j = js, je+1
-           do i = is, ie+1
-              latb(i+i0,j+j0) = buffer(j)
-           enddo
-        enddo
-        deallocate(buffer)
-     case('OCN')
-        if (present(domain)) then
-           start = 1; nread = 1
-           start(1) = is; start(2) = js
-           nread(1) = ie-is+2; nread(2) = je-js+2
-           call read_data(grid_file, 'geolon_vert_t', lonb, start, nread, no_domain=.true. )
-           call read_data(grid_file, 'geolat_vert_t', latb, start, nread, no_domain=.true. )
-         else
-           call read_data(grid_file, 'geolon_vert_t', lonb, no_domain=.TRUE. )
-           call read_data(grid_file, 'geolat_vert_t', latb, no_domain=.TRUE. )
-         endif
-     end select
-  case(VERSION_1)
-     select case(component)
-     case('ATM','LND')
-        allocate(buffer(max(nlon,nlat)+1))
-        ! read coordinates of grid cell vertices
-        call read_data(grid_file, 'xb'//lowercase(component(1:1)), buffer(1:nlon+1), no_domain=.true.)
-        do j = js, je+1
-           do i = is, ie+1
-              lonb(i+i0,j+j0) = buffer(i)
-           enddo
-        enddo
-        call read_data(grid_file, 'yb'//lowercase(component(1:1)), buffer(1:nlat+1), no_domain=.true.)
-        do j = js, je+1
-           do i = is, ie+1
-              latb(i+i0,j+j0) = buffer(j)
-           enddo
-        enddo
-        deallocate(buffer)
-     case('OCN')
-        nlon=ie-is+1; nlat=je-js+1
-        allocate (x_vert_t(nlon,nlat,4), y_vert_t(nlon,nlat,4) )
-        call read_data(grid_file, 'x_vert_T', x_vert_t, no_domain=.not.present(domain), domain=domain )
-        call read_data(grid_file, 'y_vert_T', y_vert_t, no_domain=.not.present(domain), domain=domain )
-        lonb(1:nlon,1:nlat) = x_vert_t(1:nlon,1:nlat,1)
-        lonb(nlon+1,1:nlat) = x_vert_t(nlon,1:nlat,2)
-        lonb(1:nlon,nlat+1) = x_vert_t(1:nlon,nlat,4)
-        lonb(nlon+1,nlat+1) = x_vert_t(nlon,nlat,3)
-        latb(1:nlon,1:nlat) = y_vert_t(1:nlon,1:nlat,1)
-        latb(nlon+1,1:nlat) = y_vert_t(nlon,1:nlat,2)
-        latb(1:nlon,nlat+1) = y_vert_t(1:nlon,nlat,4)
-        latb(nlon+1,nlat+1) = y_vert_t(nlon,nlat,3)
-        deallocate(x_vert_t, y_vert_t)
-     end select
-  case(VERSION_2)
-     ! get the name of the mosaic file for the component
-     call read_data(grid_file, trim(lowercase(component))//'_mosaic_file', filename1)
-     filename1=grid_dir//trim(filename1)
-     ! get the name of the grid file for the component and tile
-     call read_data(filename1, 'gridfiles', filename2, level=tile)
-     filename2 = grid_dir//trim(filename2)
-     if(PRESENT(domain)) then
-        call mpp_get_global_domain(domain, xbegin=isg, ybegin=jsg)
-        start = 1; nread = 1
-        start(1) = 2*(is-isg+1) - 1; nread(1) = 2*(ie-is)+3
-        start(2) = 2*(js-jsg+1) - 1; nread(2) = 2*(je-js)+3
-        allocate(tmp(nread(1), nread(2)) )
-        call read_data(filename2, 'x', tmp, start, nread, no_domain=.TRUE.)
-        do j = 1, je-js+2
-           do i = 1, ie-is+2
-              lonb(i,j) = tmp(2*i-1,2*j-1)
-           enddo
-        enddo
-        call read_data(filename2, 'y', tmp, start, nread, no_domain=.TRUE.)
-        do j = 1, je-js+2
-           do i = 1, ie-is+2
-              latb(i,j) = tmp(2*i-1,2*j-1)
-           enddo
-        enddo
-     else
-        allocate(tmp(2*nlon+1,2*nlat+1))
-        call read_data(filename2, 'x', tmp, no_domain=.TRUE.)
-        do j = js, je+1
-           do i = is, ie+1
-              lonb(i+i0,j+j0) = tmp(2*i-1,2*j-1)
-           end do
-        end do
-        call read_data(filename2, 'y', tmp, no_domain=.TRUE.)
-        do j = js, je+1
-           do i = is, ie+1
-              latb(i+i0,j+j0) = tmp(2*i-1,2*j-1)
-           end do
-        end do
-     endif
-     deallocate(tmp)
-  end select
-
-end subroutine get_grid_cell_vertices_2D
-
-
-subroutine get_grid_cell_vertices_UG(component, tile, lonb, latb, SG_domain, UG_domain)
-  character(len=*),         intent(in) :: component
-  integer,                  intent(in) :: tile
-  real,                  intent(inout) :: lonb(:,:),latb(:,:) ! The second dimension is 4
-  type(domain2d)  ,   intent(in)       :: SG_domain
-  type(domainUG)  ,   intent(in)       :: UG_domain
-  integer :: is, ie, js, je, i, j
-  real, allocatable :: SG_lonb(:,:), SG_latb(:,:), tmp(:,:,:)
-
-  call mpp_get_compute_domain(SG_domain, is, ie, js, je)
-  allocate(SG_lonb(is:ie+1, js:je+1))
-  allocate(SG_latb(is:ie+1, js:je+1))
-  allocate(tmp(is:ie,js:je,4))
-  call get_grid_cell_vertices_2D(component, tile, SG_lonb, SG_latb, SG_domain)
-  do j = js, je
-     do i = is, ie
-        tmp(i,j,1) = SG_lonb(i,j)
-        tmp(i,j,2) = SG_lonb(i+1,j)
-        tmp(i,j,3) = SG_lonb(i+1,j+1)
-        tmp(i,j,4) = SG_lonb(i,j+1)
-     enddo
-  enddo
-  call mpp_pass_SG_to_UG(UG_domain, tmp, lonb)
-  do j = js, je
-     do i = is, ie
-        tmp(i,j,1) = SG_latb(i,j)
-        tmp(i,j,2) = SG_latb(i+1,j)
-        tmp(i,j,3) = SG_latb(i+1,j+1)
-        tmp(i,j,4) = SG_latb(i,j+1)
-     enddo
-  enddo
-  call mpp_pass_SG_to_UG(UG_domain, tmp, latb)
-
-
-  deallocate(SG_lonb, SG_latb, tmp)
-
-end subroutine get_grid_cell_vertices_UG
-
-! ============================================================================
-!> Returns global coordinate arrays fro given model component and mosaic tile number
-!! @note In the case of non-lat-lon grid those coordinates may have be not so
-!! meaningful, by the very nature of such grids. But presumably these 1D coordinate
-!! arrays are good enough for diag axis and such.
-subroutine get_grid_cell_centers_1D(component, tile, glon, glat)
-  character(len=*), intent(in) :: component
-  integer, intent(in) :: tile
-  real, intent(inout) :: glon(:),glat(:)
-  integer                      :: nlon, nlat
-  integer                      :: start(4), nread(4)
-  real, allocatable            :: tmp(:,:)
-  character(len=MAX_FILE)      :: filename1, filename2
-
-  call get_grid_size_for_one_tile(component, tile, nlon, nlat)
-  if (size(glon(:))/=nlon) &
-       call mpp_error (FATAL,  module_name// &
-                       & '/get_grid_cell_centers_1D: Size of argument "glon" is not consistent with the grid size')
-  if (size(glat(:))/=nlat) &
-       call mpp_error (FATAL,  module_name// &
-                       & '/get_grid_cell_centers_1D: Size of argument "glat" is not consistent with the grid size')
-  if(trim(component) .NE. 'ATM' .AND. component .NE. 'LND' .AND. component .NE. 'OCN') then
-     call mpp_error(FATAL, module_name//'/get_grid_cell_centers_1D: Illegal component name "'// &
-                    & trim(component)//'": must be one of ATM, LND, or OCN')
-  endif
-
-  select case(get_grid_version())
-  case(VERSION_0)
-     select case(trim(component))
-     case('ATM','LND')
-        call read_data(grid_file, 'xt'//lowercase(component(1:1)), glon, no_domain=.true.)
-        call read_data(grid_file, 'yt'//lowercase(component(1:1)), glat, no_domain=.true.)
-     case('OCN')
-        call read_data(grid_file, "gridlon_t", glon, no_domain=.true.)
-        call read_data(grid_file, "gridlat_t", glat, no_domain=.true.)
-     end select
-  case(VERSION_1)
-     select case(trim(component))
-     case('ATM','LND')
-        call read_data(grid_file, 'xt'//lowercase(component(1:1)), glon, no_domain=.true.)
-        call read_data(grid_file, 'yt'//lowercase(component(1:1)), glat, no_domain=.true.)
-     case('OCN')
-        call read_data(grid_file, "grid_x_T", glon, no_domain=.true.)
-        call read_data(grid_file, "grid_y_T", glat, no_domain=.true.)
-     end select
-  case(VERSION_2)
-     ! get the name of the mosaic file for the component
-     call read_data(grid_file, trim(lowercase(component))//'_mosaic_file', filename1)
-     filename1=grid_dir//trim(filename1)
-     ! get the name of the grid file for the component and tile
-     call read_data(filename1, 'gridfiles', filename2, level=tile)
-     filename2 = grid_dir//trim(filename2)
-
-     start = 1; nread = 1
-     nread(1) = 2*nlon+1; start(2) = 2
-     allocate( tmp(2*nlon+1,1) )
-     call read_data(filename2, "x", tmp, start, nread, no_domain=.TRUE.)
-     glon(1:nlon) = tmp(2:2*nlon:2,1)
-     deallocate(tmp)
-     allocate(tmp(1, 2*nlat+1))
-
-     start = 1; nread = 1
-     nread(2) = 2*nlat+1; start(1) = 2
-     call read_data(filename2, "y", tmp, start, nread, no_domain=.TRUE.)
-     glat(1:nlat) = tmp(1,2:2*nlat:2)
-     deallocate(tmp)
-  end select
-
-
-end subroutine get_grid_cell_centers_1D
-
-! ============================================================================
-! ============================================================================
-!> Returns grid cell centers for specified model component and mosaic tile number
-subroutine get_grid_cell_centers_2D(component, tile, lon, lat, domain)
-  character(len=*), intent(in) :: component
-  integer, intent(in) :: tile
-  real, intent(inout) :: lon(:,:),lat(:,:)
-  type(domain2d), intent(in), optional :: domain
-  ! local vars
-  character(len=MAX_FILE) :: filename1, filename2
-  integer :: nlon, nlat
-  integer :: i,j
-  real, allocatable :: buffer(:),tmp(:,:)
-  integer :: is,ie,js,je ! boundaries of our domain
-  integer :: i0,j0 ! offsets for coordinates
-  integer :: isg, jsg
-  integer :: start(4), nread(4)
-
-  call get_grid_size_for_one_tile(component, tile, nlon, nlat)
-  if (present(domain)) then
-    call mpp_get_compute_domain(domain,is,ie,js,je)
-  else
-    is = 1 ; ie = nlon
-    js = 1 ; je = nlat
-    !--- domain normally should be present
-    call mpp_error (NOTE, module_name//'/get_grid_cell_centers: domain is not present, global data will be read')
-  endif
-  i0 = -is+1; j0 = -js+1
-
-  ! verify that lon and lat sizes are consistent with the size of domain
-  if (size(lon,1)/=ie-is+1.or.size(lon,2)/=je-js+1) &
-       call mpp_error (FATAL, module_name// &
-                       & '/get_grid_cell_centers: Size of array "lon" is not consistent with the domain size')
-  if (size(lat,1)/=ie-is+1.or.size(lat,2)/=je-js+1) &
-       call mpp_error (FATAL, module_name// &
-                       & '/get_grid_cell_centers: Size of array "lat" is not consistent with the domain size')
-  if(trim(component) .NE. 'ATM' .AND. component .NE. 'LND' .AND. component .NE. 'OCN') then
-     call mpp_error(FATAL, module_name//'/get_grid_cell_vertices: Illegal component name "'// &
-                    & trim(component)//'": must be one of ATM, LND, or OCN')
-  endif
-
-  select case(get_grid_version())
-  case(VERSION_0)
-     select case (trim(component))
-     case('ATM','LND')
-        allocate(buffer(max(nlon,nlat)))
-        ! read coordinates of grid cell vertices
-        call read_data(grid_file, 'xt'//lowercase(component(1:1)), buffer(1:nlon), no_domain=.true.)
-        do j = js,je
-        do i = is,ie
-           lon(i+i0,j+j0) = buffer(i)
-        enddo
-        enddo
-        call read_data(grid_file, 'yt'//lowercase(component(1:1)), buffer(1:nlat), no_domain=.true.)
-        do j = js,je
-        do i = is,ie
-           lat(i+i0,j+j0) = buffer(j)
-        enddo
-        enddo
-        deallocate(buffer)
-     case('OCN')
-        call read_data(grid_file, 'geolon_t', lon, no_domain=.not.present(domain), domain=domain )
-        call read_data(grid_file, 'geolat_t', lat, no_domain=.not.present(domain), domain=domain )
-     end select
-  case(VERSION_1)
-     select case(trim(component))
-     case('ATM','LND')
-        allocate(buffer(max(nlon,nlat)))
-        ! read coordinates of grid cell vertices
-        call read_data(grid_file, 'xt'//lowercase(component(1:1)), buffer(1:nlon), no_domain=.true.)
-        do j = js,je
-        do i = is,ie
-           lon(i+i0,j+j0) = buffer(i)
-        enddo
-        enddo
-        call read_data(grid_file, 'yt'//lowercase(component(1:1)), buffer(1:nlat), no_domain=.true.)
-        do j = js,je
-        do i = is,ie
-           lat(i+i0,j+j0) = buffer(j)
-        enddo
-        enddo
-        deallocate(buffer)
-     case('OCN')
-        call read_data(grid_file, 'x_T', lon, no_domain=.not.present(domain), domain=domain )
-        call read_data(grid_file, 'y_T', lat, no_domain=.not.present(domain), domain=domain )
-     end select
-  case(VERSION_2) ! mosaic grid file
-     ! get the name of the mosaic file for the component
-     call read_data(grid_file, trim(lowercase(component))//'_mosaic_file', filename1)
-     filename1=grid_dir//trim(filename1)
-     ! get the name of the grid file for the component and tile
-     call read_data(filename1, 'gridfiles', filename2, level=tile)
-     filename2 = grid_dir//trim(filename2)
-     if(PRESENT(domain)) then
-        call mpp_get_global_domain(domain, xbegin=isg, ybegin=jsg)
-        start = 1; nread = 1
-        start(1) = 2*(is-isg+1) - 1; nread(1) = 2*(ie-is)+3
-        start(2) = 2*(js-jsg+1) - 1; nread(2) = 2*(je-js)+3
-        allocate(tmp(nread(1), nread(2)))
-        call read_data(filename2, 'x', tmp, start, nread, no_domain=.TRUE.)
-        do j = 1, je-js+1
-           do i = 1, ie-is+1
-              lon(i,j) = tmp(2*i,2*j)
-           enddo
-        enddo
-        call read_data(filename2, 'y', tmp, start, nread, no_domain=.TRUE.)
-        do j = 1, je-js+1
-           do i = 1, ie-is+1
-              lat(i,j) = tmp(2*i,2*j)
-           enddo
-        enddo
-     else
-        allocate(tmp(2*nlon+1,2*nlat+1))
-        call read_data(filename2, 'x', tmp, no_domain=.TRUE.)
-        do j = js,je
-           do i = is,ie
-              lon(i+i0,j+j0) = tmp(2*i,2*j)
-           end do
-        end do
-        call read_data(filename2, 'y', tmp, no_domain=.TRUE.)
-        do j = js,je
-           do i = is,ie
-              lat(i+i0,j+j0) = tmp(2*i,2*j)
-           end do
-        end do
-        deallocate(tmp)
-     endif
-  end select
-
-end subroutine get_grid_cell_centers_2D
-
-subroutine get_grid_cell_centers_UG(component, tile, lon, lat, SG_domain, UG_domain)
-  character(len=*), intent(in) :: component
-  integer, intent(in) :: tile
-  real, intent(inout) :: lon(:),lat(:)
-  type(domain2d)  ,   intent(in) :: SG_domain
-  type(domainUG)  ,   intent(in) :: UG_domain
-  integer :: is, ie, js, je
-  real, allocatable :: SG_lon(:,:), SG_lat(:,:)
-
-  call mpp_get_compute_domain(SG_domain, is, ie, js, je)
-  allocate(SG_lon(is:ie, js:je))
-  allocate(SG_lat(is:ie, js:je))
-  call get_grid_cell_centers_2D(component, tile, SG_lon, SG_lat, SG_domain)
-  call mpp_pass_SG_to_UG(UG_domain, SG_lon, lon)
-  call mpp_pass_SG_to_UG(UG_domain, SG_lat, lat)
-  deallocate(SG_lon, SG_lat)
-
-end subroutine get_grid_cell_centers_UG
-
-! ============================================================================
-! ============================================================================
-! this subroutine probably does not belong in the grid_mod
-!> Given a model component, a layout, and (optionally) a halo size, returns a
-!! domain for current processor
-subroutine define_cube_mosaic ( component, domain, layout, halo, maskmap )
-  character(len=*) , intent(in)    :: component
-  type(domain2d)   , intent(inout) :: domain
-  integer          , intent(in)    :: layout(2)
-  integer, optional, intent(in)    :: halo
-  logical, optional, intent(in)    :: maskmap(:,:,:)
-
-  ! ---- local constants
-
-  ! ---- local vars
-  character(len=MAX_NAME) :: varname
-  character(len=MAX_FILE + len(grid_dir)) :: mosaic_file
-  integer :: ntiles     ! number of tiles
-  integer :: ncontacts  ! number of contacts between mosaic tiles
-  integer :: n
-  integer :: ng, pe_pos, npes         ! halo size
-  integer, allocatable :: nlon(:), nlat(:), global_indices(:,:)
-  integer, allocatable :: pe_start(:), pe_end(:), layout_2d(:,:)
-  integer, allocatable :: tile1(:),tile2(:)
-  integer, allocatable :: is1(:),ie1(:),js1(:),je1(:)
-  integer, allocatable :: is2(:),ie2(:),js2(:),je2(:)
-
-  call get_grid_ntiles(component,ntiles)
-  allocate(nlon(ntiles), nlat(ntiles))
-  allocate(global_indices(4,ntiles))
-  allocate(pe_start(ntiles),pe_end(ntiles))
-  allocate(layout_2d(2,ntiles))
-  call get_grid_size(component,nlon,nlat)
-
-  pe_pos = mpp_root_pe()
-  do n = 1, ntiles
-     global_indices(:,n) = (/ 1, nlon(n), 1, nlat(n) /)
-     layout_2d     (:,n) = layout
-     if(present(maskmap)) then
-        npes = count(maskmap(:,:,n))
-     else
-        npes = layout(1)*layout(2)
-     endif
-     pe_start(n) = pe_pos
-     pe_end  (n) = pe_pos + npes - 1
-     pe_pos      = pe_end(n) + 1
-  enddo
-
-  varname=trim(lowercase(component))//'_mosaic_file'
-  call read_data(grid_file,varname,mosaic_file(1:MAX_FILE))
-  mosaic_file = grid_dir//mosaic_file(1:MAX_FILE)
-
-  ! get the contact information from mosaic file
-  ncontacts = get_mosaic_ncontacts(mosaic_file)
-  allocate(tile1(ncontacts),tile2(ncontacts))
-  allocate(is1(ncontacts),ie1(ncontacts),js1(ncontacts),je1(ncontacts))
-  allocate(is2(ncontacts),ie2(ncontacts),js2(ncontacts),je2(ncontacts))
-  call get_mosaic_contact(mosaic_file, tile1, tile2, &
-       is1, ie1, js1, je1, is2, ie2, js2, je2)
-
-  ng = 0
-  if(present(halo)) ng = halo
-  ! create the domain2d variable
-  call mpp_define_mosaic ( global_indices, layout_2d, domain, &
-       ntiles, ncontacts, tile1, tile2,                  &
-       is1, ie1, js1, je1, &
-       is2, ie2, js2, je2, &
-       pe_start=pe_start, pe_end=pe_end, symmetry=.true.,  &
-       shalo = ng, nhalo = ng, whalo = ng, ehalo = ng,     &
-       maskmap = maskmap,                                  &
-       name = trim(component)//'Cubic-Sphere Grid' )
-
-  deallocate(nlon,nlat,global_indices,pe_start,pe_end,layout_2d)
-  deallocate(tile1,tile2)
-  deallocate(is1,ie1,js1,je1)
-  deallocate(is2,ie2,js2,je2)
-
-end subroutine define_cube_mosaic
-
-end module grid_mod
-!> @}
-! close documentation grouping
diff --git a/mosaic/interp.c b/mosaic/interp.c
deleted file mode 100644
index 6ead747eda..0000000000
--- a/mosaic/interp.c
+++ /dev/null
@@ -1,394 +0,0 @@
-/***********************************************************************
- *                   GNU Lesser General Public License
- *
- * This file is part of the GFDL Flexible Modeling System (FMS).
- *
- * FMS is free software: you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * FMS is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
- **********************************************************************/
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "mosaic_util.h"
-#include "interp.h"
-#include "create_xgrid.h"
-
-/** \file
- *  \ingroup mosaic
- *  \brief Grid interpolation functions for use in @ref mosaic_mod
- */
-
-/*********************************************************************
-   void cublic_spline_sp(size1, size2, grid1, grid2, data1, data2)
-
-   Calculate a shape preserving cubic spline. Monotonicity is ensured over each subinterval
-   unlike classic cubic spline interpolation.
-   It will be used to interpolation data in 1-D space.
-
-   INPUT Arguments:
-    grid1:    grid for input data grid.
-    grid2:    grid for output data grid.
-    size1:    size of input grid.
-    size2:    size of output grid.
-    data1:    input data associated with grid1.
-
-   OUTPUT ARGUMENTS:
-    data2:    output data associated with grid2. (OUTPUT)
-
-*********************************************************************/
-
-void cubic_spline_sp(int size1, int size2, const double *grid1, const double *grid2, const double *data1,
-                     double *data2 )
-{
-  double *delta=NULL, *d=NULL, *dh=NULL, *b=NULL, *c = NULL;
-  double s, w1, w2, p;
-  int i, k, n, klo, khi, kmax;
-
-  for(i=1; i<size1; i++) {
-    if( grid1[i] <= grid1[i-1] ) error_handler("cubic_spline_sp: grid1 is not monotonic increasing");
-  }
-
-  for(i=0; i<size2; i++) {
-    if( grid2[i] < grid1[0] || grid2[i] > grid1[size1-1]) error_handler("cubic_spline_sp: grid2 lies outside grid1");
-  }
-
-  if(size1 < 2) error_handler("cubic_spline_sp: the size of input grid should be at least 2");
-  if(size1 == 2) {  /* when size1 is 2, it just reduced to a linear interpolation */
-    p = (data1[1]-data1[0])/(grid1[1]-grid1[0]);
-    for(i=0; i< size2; i++) data2[i] = p*(grid2[i] - grid1[0]) + data1[0];
-    return;
-  }
-  delta = (double *)malloc((size1-1)*sizeof(double));
-  dh = (double *)malloc((size1-1)*sizeof(double));
-  d = (double *)malloc(size1*sizeof(double));
-  for(k=0;k<size1-1;k++) {
-    dh[k] = grid1[k+1]-grid1[k];
-    delta[k] = (data1[k+1]-data1[k])/(dh[k]);
-  }
-  /*
-    Interior slopes
-  */
-  for(k=1;k<size1-1;k++) {
-    if( delta[k]*delta[k-1] > 0.0 ) {
-      w1 = 2.0*dh[k] + dh[k-1];
-      w2 = dh[k] + 2.0*dh[k-1];
-      d[k] = (w1+w2)/(w1/delta[k-1]+w2/delta[k]);
-    }
-    else {
-      d[k] = 0.0;
-    }
-  }
-  /*
-    End slopes
-  */
-  kmax = size1-1;
-  d[0] = ((2.0*dh[0] + dh[1])*delta[0] - dh[0]*delta[1])/(dh[0]+dh[1]);
-
-  if ( d[0]*delta[0] < 0.0 ) {
-    d[0] = 0.0;
-  }
-  else {
-    if ( delta[0]*delta[1] < 0.0 && fabs(d[0]) > fabs(3.0*delta[0])) {
-      d[0]=3.0*delta[0];
-    }
-  }
-
-  d[kmax] = ((2.0*dh[kmax-1] + dh[kmax-2])*delta[kmax-1] - dh[kmax-1]*delta[kmax-2])/(dh[kmax-1]+dh[kmax-2]);
-  if ( d[kmax]*delta[kmax-1] < 0.0 ) {
-    d[kmax] = 0.0;
-  }
-  else {
-    if ( delta[kmax-1]*delta[kmax-2] < 0.0 && fabs(d[kmax]) > fabs(3.0*delta[kmax-1])) {
-      d[kmax]=3.0*delta[kmax-1];
-    }
-  }
-
-  /* Precalculate coefficients */
-  b = (double *)malloc((size1-1)*sizeof(double));
-  c = (double *)malloc((size1-1)*sizeof(double));
-  for (k=0; k<size1-1; k++) {
-    c[k]   = (3.0*delta[k]-2.0*d[k]-d[k+1])/dh[k];
-    b[k]   = (d[k]-2.0*delta[k]+d[k+1])/(dh[k]*dh[k]);
-  }
-  /* interpolate data onto grid2 */
-  for(k=0; k<size2; k++) {
-    n = nearest_index(grid2[k],grid1, size1);
-    if (grid1[n] < grid2[k]) {
-      klo = n;
-    }
-    else {
-      if(n==0) {
-        klo = n;
-      }
-      else {
-        klo = n -1;
-      }
-    }
-    khi = klo+1;
-    s   = grid2[k] - grid1[klo];
-    data2[k] = data1[klo]+s*(d[klo]+s*(c[klo]+s*b[klo]));
-  }
-
-  free(c);
-  free(b);
-  free(d);
-  free(dh);
-  free(delta);
-
-}/* cubic spline sp */
-
-
-/*********************************************************************
-   void cublic_spline(size1, size2, grid1, grid2, data1, data2, yp1, ypn  )
-
-   This alborithm is to get an interpolation formula that is smooth in the first
-   derivative, and continuous in the second derivative, both within an interval
-   and its boundaries. It will be used to interpolation data in 1-D space.
-
-   INPUT Arguments:
-    grid1:    grid for input data grid.
-    grid2:    grid for output data grid.
-    size1:    size of input grid.
-    size2:    size of output grid.
-    data1:    input data associated with grid1.
-    yp1:      first derivative of starting point.
-              Set to 0 to be "natural"           (INPUT)
-    ypn:      first derivative of ending point.
-              Set to 0 to be "natural"           (INPUT)
-
-   OUTPUT ARGUMENTS:
-    data2:    output data associated with grid2. (OUTPUT)
-
-*********************************************************************/
-
-
-void cubic_spline(int size1, int size2, const double *grid1, const double *grid2, const double *data1,
-                  double *data2, double yp1, double ypn  )
-{
-  double *y2=NULL, *u=NULL;
-  double p, sig, qn, un, h, a, b;
-  int i, k, n, klo, khi;
-
-  for(i=1; i<size1; i++) {
-    if( grid1[i] <= grid1[i-1] ) error_handler("cubic_spline: grid1 is not monotonic increasing");
-  }
-
-  for(i=0; i<size2; i++) {
-    if( grid2[i] < grid1[0] || grid2[i] > grid1[size1-1]) error_handler("cubic_spline: grid2 lies outside grid1");
-  }
-
-  if(size1 < 2) error_handler("cubic_spline: the size of input grid should be at least 2");
-  if(size1 == 2) {  /* when size1 is 2, it just reduced to a linear interpolation */
-    p = (data1[1]-data1[0])/(grid1[1]-grid1[0]);
-    for(i=0; i< size2; i++) data2[i] = p*(grid2[i] - grid1[0]) + data1[0];
-    return;
-  }
-  y2 = (double *)malloc(size1*sizeof(double));
-  u = (double *)malloc(size1*sizeof(double));
-  if (yp1 >.99e30) {
-    y2[0]=0.;
-    u[0]=0.;
-  }
-  else {
-    y2[0]=-0.5;
-    u[0]=(3./(grid1[1]-grid1[0]))*((data1[1]-data1[0])/(grid1[1]-grid1[0])-yp1);
-  }
-
-  for(i=1; i<size1-1; i++) {
-    sig=(grid1[i]-grid1[i-1])/(grid1[i+1]-grid1[i-1]);
-    p=sig*y2[i-1]+2.;
-    y2[i]=(sig-1.)/p;
-    u[i]=(6.*((data1[i+1]-data1[i])/(grid1[i+1]-grid1[i])-(data1[i]-data1[i-1])
-              /(grid1[i]-grid1[i-1]))/(grid1[i+1]-grid1[i-1])-sig*u[i-1])/p;
-
-  }
-
-  if (ypn > .99e30) {
-    qn=0.;
-    un=0.;
-  }
-  else {
-    qn=0.5;
-    un=(3./(grid1[size1-1]-grid1[size1-2]))*(ypn-(data1[size1-1]-data1[size1-2])/(grid1[size1-1]-grid1[size1-2]));
-  }
-
-  y2[size1-1]=(un-qn*u[size1-2])/(qn*y2[size1-2]+1.);
-
-  for(k=size1-2; k>=0; k--) y2[k] = y2[k]*y2[k+1]+u[k];
-
-  /* interpolate data onto grid2 */
-  for(k=0; k<size2; k++) {
-    n = nearest_index(grid2[k],grid1, size1);
-    if (grid1[n] < grid2[k]) {
-      klo = n;
-    }
-    else {
-      if(n==0) {
-        klo = n;
-      }
-      else {
-        klo = n -1;
-      }
-    }
-    khi = klo+1;
-    h   = grid1[khi]-grid1[klo];
-    a   = (grid1[khi] - grid2[k])/h;
-    b   = (grid2[k] - grid1[klo])/h;
-    data2[k] = a*data1[klo] + b*data1[khi]+ ((pow(a,3.0)-a)*y2[klo] + (pow(b,3.0)-b)*y2[khi])*(pow(h,2.0))/6;
-  }
-
-  free(y2);
-  free(u);
-
-}/* cubic spline */
-
-
-/*------------------------------------------------------------------------------
-  void conserve_interp()
-  conservative interpolation through exchange grid.
-  Currently only first order interpolation are implemented here.
-  ----------------------------------------------------------------------------*/
-void conserve_interp(int nx_src, int ny_src, int nx_dst, int ny_dst, const double *x_src,
-                     const double *y_src, const double *x_dst, const double *y_dst,
-                     const double *mask_src, const double *data_src, double *data_dst )
-{
-  int n, nxgrid;
-  int *xgrid_i1, *xgrid_j1, *xgrid_i2, *xgrid_j2;
-  double *xgrid_area, *dst_area, *area_frac;
-
-  /* get the exchange grid between source and destination grid. */
-  xgrid_i1   = (int    *)malloc(MAXXGRID*sizeof(int));
-  xgrid_j1   = (int    *)malloc(MAXXGRID*sizeof(int));
-  xgrid_i2   = (int    *)malloc(MAXXGRID*sizeof(int));
-  xgrid_j2   = (int    *)malloc(MAXXGRID*sizeof(int));
-  xgrid_area = (double *)malloc(MAXXGRID*sizeof(double));
-  dst_area   = (double *)malloc(nx_dst*ny_dst*sizeof(double));
-  nxgrid = create_xgrid_2dx2d_order1(&nx_src, &ny_src, &nx_dst, &ny_dst, x_src, y_src, x_dst, y_dst, mask_src,
-                                     xgrid_i1, xgrid_j1, xgrid_i2, xgrid_j2, xgrid_area );
-  /* The source grid may not cover the destination grid
-     so need to sum of exchange grid area to get dst_area
-     get_grid_area(&nx_dst, &ny_dst, x_dst, y_dst, dst_area);
-  */
-  for(n=0; n<nx_dst*ny_dst; n++) dst_area[n] = 0;
-  for(n=0; n<nxgrid; n++) dst_area[xgrid_j2[n]*nx_dst+xgrid_i2[n]] += xgrid_area[n];
-
-  area_frac = (double *)malloc(nxgrid*sizeof(double));
-  for(n=0; n<nxgrid; n++) area_frac[n] = xgrid_area[n]/dst_area[xgrid_j2[n]*nx_dst+xgrid_i2[n]];
-
-  for(n=0; n<nx_dst*ny_dst; n++) {
-    data_dst[n] = 0;
-  }
-  for(n=0; n<nxgrid; n++) {
-    data_dst[xgrid_j2[n]*nx_dst+xgrid_i2[n]] += data_src[xgrid_j1[n]*nx_src+xgrid_i1[n]]*area_frac[n];
-  }
-
-  free(xgrid_i1);
-  free(xgrid_j1);
-  free(xgrid_i2);
-  free(xgrid_j2);
-  free(xgrid_area);
-  free(dst_area);
-  free(area_frac);
-
-} /* conserve_interp */
-
-/*------------------------------------------------------------------------------
-  void conserve_interp_great_circle()
-  conservative interpolation through exchange grid.
-  Currently only first order interpolation are implemented here.
-  great_circle algorithm is used for clipping and interpolation.
-  ----------------------------------------------------------------------------*/
-void conserve_interp_great_circle(int nx_src, int ny_src, int nx_dst, int ny_dst, const double *x_src,
-                                  const double *y_src, const double *x_dst, const double *y_dst,
-                                  const double *mask_src, const double *data_src, double *data_dst )
-{
-  int n, nxgrid;
-  int *xgrid_i1, *xgrid_j1, *xgrid_i2, *xgrid_j2;
-  double *xgrid_area, *dst_area, *area_frac, *xgrid_di, *xgrid_dj;
-
-  /* get the exchange grid between source and destination grid. */
-  xgrid_i1   = (int    *)malloc(MAXXGRID*sizeof(int));
-  xgrid_j1   = (int    *)malloc(MAXXGRID*sizeof(int));
-  xgrid_i2   = (int    *)malloc(MAXXGRID*sizeof(int));
-  xgrid_j2   = (int    *)malloc(MAXXGRID*sizeof(int));
-  xgrid_area = (double *)malloc(MAXXGRID*sizeof(double));
-  xgrid_di   = (double *)malloc(MAXXGRID*sizeof(double));
-  xgrid_dj   = (double *)malloc(MAXXGRID*sizeof(double));
-  dst_area   = (double *)malloc(nx_dst*ny_dst*sizeof(double));
-  nxgrid = create_xgrid_great_circle(&nx_src, &ny_src, &nx_dst, &ny_dst, x_src, y_src, x_dst, y_dst, mask_src,
-                                     xgrid_i1, xgrid_j1, xgrid_i2, xgrid_j2, xgrid_area, xgrid_di, xgrid_dj );
-  /* The source grid may not cover the destination grid
-     so need to sum of exchange grid area to get dst_area
-     get_grid_area(&nx_dst, &ny_dst, x_dst, y_dst, dst_area);
-  */
-  for(n=0; n<nx_dst*ny_dst; n++) dst_area[n] = 0;
-  for(n=0; n<nxgrid; n++) dst_area[xgrid_j2[n]*nx_dst+xgrid_i2[n]] += xgrid_area[n];
-
-  area_frac = (double *)malloc(nxgrid*sizeof(double));
-  for(n=0; n<nxgrid; n++) area_frac[n] = xgrid_area[n]/dst_area[xgrid_j2[n]*nx_dst+xgrid_i2[n]];
-
-  for(n=0; n<nx_dst*ny_dst; n++) {
-    data_dst[n] = 0;
-  }
-  for(n=0; n<nxgrid; n++) {
-    data_dst[xgrid_j2[n]*nx_dst+xgrid_i2[n]] += data_src[xgrid_j1[n]*nx_src+xgrid_i1[n]]*area_frac[n];
-  }
-
-  free(xgrid_i1);
-  free(xgrid_j1);
-  free(xgrid_i2);
-  free(xgrid_j2);
-  free(xgrid_area);
-  free(dst_area);
-  free(area_frac);
-
-} /* conserve_interp_great_circle */
-
-
-
-void linear_vertical_interp(int nx, int ny, int nk1, int nk2, const double *grid1, const double *grid2,
-                            double *data1, double *data2)
-{
-  int n, k, l;
-  double w;
-
-  for(k=1; k<nk1; k++) {
-    if(grid1[k] <= grid1[k-1]) error_handler("interp.c: grid1 not monotonic");
-  }
-  for(k=1; k<nk2; k++) {
-    if(grid2[k] <= grid2[k-1]) error_handler("interp.c: grid2 not monotonic");
-  }
-
-  if (grid1[0] > grid2[0] ) error_handler("interp.c: grid2 lies outside grid1");
-  if (grid1[nk1-1] < grid2[nk2-1] ) error_handler("interp.c: grid2 lies outside grid1");
-
-  for(k=0; k<nk2; k++) {
-    n = nearest_index(grid2[k],grid1,nk1);
-    if (grid1[n] < grid2[k]) {
-      w = (grid2[k]-grid1[n])/(grid1[n+1]-grid1[n]);
-      for(l=0; l<nx*ny; l++) {
-        data2[k*nx*ny+l] = (1.-w)*data1[n*nx*ny+l] + w*data1[(n+1)*nx*ny+l];
-      }
-    }
-    else {
-      if(n==0)
-        for(l=0;l<nx*ny;l++) data2[k*nx*ny+l] = data1[n*nx*ny+l];
-      else {
-        w = (grid2[k]-grid1[n-1])/(grid1[n]-grid1[n-1]);
-        for(l=0; l<nx*ny; l++) {
-          data2[k*nx*ny+l] = (1.-w)*data1[(n-1)*nx*ny+l] + w*data1[n*nx*ny+l];
-        }
-      }
-    }
-  }
-}
diff --git a/mosaic/interp.h b/mosaic/interp.h
deleted file mode 100644
index 8b78357fef..0000000000
--- a/mosaic/interp.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/***********************************************************************
- *                   GNU Lesser General Public License
- *
- * This file is part of the GFDL Flexible Modeling System (FMS).
- *
- * FMS is free software: you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * FMS is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
- **********************************************************************/
-#ifndef INTERP_H_
-#define INTERP_H_
-/*********************************************************************
-                     interp.h
-   This header files contains defition of some interpolation routine  (1-D or 2-D).
-   contact: Zhi.Liang@noaa.gov
-*********************************************************************/
-void cubic_spline_sp(int size1, int size2, const double *grid1, const double *grid2, const double *data1,
-                     double *data2 );
-
-void cubic_spline(int size1, int size2, const double *grid1, const double *grid2, const double *data1,
-                  double *data2, double yp1, double ypn  );
-
-void conserve_interp(int nx_src, int ny_src, int nx_dst, int ny_dst, const double *x_src,
-                     const double *y_src, const double *x_dst, const double *y_dst,
-                     const double *mask_src, const double *data_src, double *data_dst );
-
-void conserve_interp_great_circle(int nx_src, int ny_src, int nx_dst, int ny_dst, const double *x_src,
-                                  const double *y_src, const double *x_dst, const double *y_dst,
-                                  const double *mask_src, const double *data_src, double *data_dst );
-
-void linear_vertical_interp(int nx, int ny, int nk1, int nk2, const double *grid1, const double *grid2,
-                            double *data1, double *data2);
-
-#endif
diff --git a/mosaic/mosaic.F90 b/mosaic/mosaic.F90
deleted file mode 100644
index e8558fc8fa..0000000000
--- a/mosaic/mosaic.F90
+++ /dev/null
@@ -1,496 +0,0 @@
-!***********************************************************************
-!*                   GNU Lesser General Public License
-!*
-!* This file is part of the GFDL Flexible Modeling System (FMS).
-!*
-!* FMS is free software: you can redistribute it and/or modify it under
-!* the terms of the GNU Lesser General Public License as published by
-!* the Free Software Foundation, either version 3 of the License, or (at
-!* your option) any later version.
-!*
-!* FMS is distributed in the hope that it will be useful, but WITHOUT
-!* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-!* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-!* for more details.
-!*
-!* You should have received a copy of the GNU Lesser General Public
-!* License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
-!***********************************************************************
-!> @defgroup mosaic_mod mosaic_mod
-!> @ingroup mosaic
-!> @brief Implements some utility routines to read mosaic information.
-!> @author Zhi Liang
-!> Implements some utility routines to read mosaic information.
-!!    The information includes number of tiles and contacts in the mosaic,
-!!    mosaic grid resolution of each tile, mosaic contact information, mosaic exchange
-!!    grid information. Each routine will call a C-version routine to get these information.
-
-!> @addtogroup mosaic_mod
-!> @{
-module mosaic_mod
-
-use mpp_mod,    only : mpp_error, FATAL, mpp_pe, mpp_root_pe
-use mpp_io_mod, only : MPP_MULTI
-use fms_io_mod, only : dimension_size, field_exist, read_data, read_compressed
-use constants_mod, only : PI, RADIUS
-
-implicit none
-private
-
-character(len=*), parameter :: &
-     grid_dir  = 'INPUT/'      !< root directory for all grid files
-
-integer, parameter :: &
-     MAX_NAME = 256,  & !< max length of the variable names
-     MAX_FILE = 1024, & !< max length of the file names
-     X_REFINE = 2,    & !< supergrid size/model grid size in x-direction
-     Y_REFINE = 2       !< supergrid size/model grid size in y-direction
-
-! --- public interface
-
-public :: get_mosaic_ntiles
-public :: get_mosaic_ncontacts
-public :: get_mosaic_grid_sizes
-public :: get_mosaic_contact
-public :: get_mosaic_xgrid_size
-public :: get_mosaic_xgrid
-public :: calc_mosaic_grid_area
-public :: calc_mosaic_grid_great_circle_area
-public :: is_inside_polygon
-
-logical :: module_is_initialized = .true.
-!--- external c routines
-external get_grid_area, get_grid_great_circle_area, grad_c2l, calc_c2l_grid_info
-
-! Include variable "version" to be written to log file.
-#include<file_version.h>
-
-contains
-
-!#######################################################################
-
-!> @brief Initialize the mosaic_mod.
-!!
-!! Initialization routine for the mosaic module. It writes the
-!! version information to the log file.
-subroutine mosaic_init()
-
-  if (module_is_initialized) return
-  module_is_initialized = .TRUE.
-
-!--------- write version number and namelist ------------------
-
-end subroutine mosaic_init
-
-!###############################################################################
-
-  !> @return integer for exchange grid size of mosaic xgrid file.
-  function get_mosaic_xgrid_size(xgrid_file)
-    character(len=*), intent(in)          :: xgrid_file !< File that contains exchange grid information
-    integer                               :: get_mosaic_xgrid_size
-
-    get_mosaic_xgrid_size = dimension_size(xgrid_file, "ncells", no_domain=.TRUE.)
-
-    return
-
-  end function get_mosaic_xgrid_size
-
-!###############################################################################
-  !> Get exchange grid information from mosaic xgrid file.
-  !!
-  !> <br> Example usage:
-  !! @code{.F90}
-  !! call get_mosaic_xgrid(xgrid_file, nxgrid, i1, j1, i2, j2, area)
-  !! @endcode
-  subroutine get_mosaic_xgrid(xgrid_file, i1, j1, i2, j2, area, ibegin, iend)
-    character(len=*), intent(in) :: xgrid_file !< The file that contains exchange grid information.
-    integer,       intent(inout) :: i1(:), j1(:) !< i and j-index in grid 1 of exchange field
-    integer,       intent(inout) :: i2(:), j2(:) !< i and j-index in grid 2 of exchange field
-    real,          intent(inout) :: area(:) !< area of the exchange grid. The area is sclaed to
-                                            !! represent unit earth area.
-    integer, optional, intent(in) :: ibegin, iend
-
-    integer                            :: start(4), nread(4), istart
-    real,    dimension(2, size(i1(:))) :: tile1_cell, tile2_cell
-    integer                            :: nxgrid, n
-    real                               :: garea
-    real                               :: get_global_area;
-
-    garea = get_global_area();
-
-    ! When start and nread present, make sure nread(1) is the same as the size of the data
-    if(present(ibegin) .and. present(iend)) then
-       istart = ibegin
-       nxgrid = iend - ibegin + 1
-       if(nxgrid .NE. size(i1(:))) call mpp_error(FATAL, "get_mosaic_xgrid: nxgrid .NE. size(i1(:))")
-       if(nxgrid .NE. size(j1(:))) call mpp_error(FATAL, "get_mosaic_xgrid: nxgrid .NE. size(j1(:))")
-       if(nxgrid .NE. size(i2(:))) call mpp_error(FATAL, "get_mosaic_xgrid: nxgrid .NE. size(i2(:))")
-       if(nxgrid .NE. size(j2(:))) call mpp_error(FATAL, "get_mosaic_xgrid: nxgrid .NE. size(j2(:))")
-       if(nxgrid .NE. size(area(:))) call mpp_error(FATAL, "get_mosaic_xgrid: nxgrid .NE. size(area(:))")
-    else
-       istart = 1
-       nxgrid = size(i1(:))
-    endif
-
-    start  = 1; nread = 1
-    start(1) = istart; nread(1) = nxgrid
-    call read_compressed(xgrid_file, 'xgrid_area', area, start=start, nread=nread, threading=MPP_MULTI)
-    start = 1; nread = 1
-    nread(1) = 2
-    start(2) = istart; nread(2) = nxgrid
-    call read_compressed(xgrid_file, 'tile1_cell', tile1_cell, start=start, nread=nread, threading=MPP_MULTI)
-    call read_compressed(xgrid_file, 'tile2_cell', tile2_cell, start=start, nread=nread, threading=MPP_MULTI)
-
-     do n = 1, nxgrid
-       i1(n) = int(tile1_cell(1,n))
-       j1(n) = int(tile1_cell(2,n))
-       i2(n) = int(tile2_cell(1,n))
-       j2(n) = int(tile2_cell(2,n))
-       area(n) = area(n)/garea
-    end do
-
-    return
-
-  end subroutine get_mosaic_xgrid
-
-  !###############################################################################
-
-  !> Get number of tiles in the mosaic_file.
-  !!
-  !!
-  !! <br> Example usage:
-  !! @code{.F90}
-  !! ntiles = get_mosaic_ntiles( mosaic_file)
-  !! @endcode
-  function get_mosaic_ntiles(mosaic_file)
-    character(len=*), intent(in) :: mosaic_file !< The file that contains mosaic information.
-    integer                      :: get_mosaic_ntiles
-
-    get_mosaic_ntiles = dimension_size(mosaic_file, "ntiles")
-
-    return
-
-  end function get_mosaic_ntiles
-
-  !###############################################################################
-
-  !> Get number of contacts in the mosaic_file.
-  !!
-  !> <br> Example usage:
-  !! @code{.F90}
-  !! ntiles = get_mosaic_ncontacts( mosaic_file)
-  !! @endcode
-  function get_mosaic_ncontacts( mosaic_file)
-    character(len=*), intent(in) :: mosaic_file !< The file that contains mosaic information.
-    integer                      :: get_mosaic_ncontacts
-
-    if(field_exist(mosaic_file, "contacts") ) then
-      get_mosaic_ncontacts = dimension_size(mosaic_file, "ncontact", no_domain=.TRUE.)
-    else
-      get_mosaic_ncontacts = 0
-    endif
-
-    return
-
-  end function get_mosaic_ncontacts
-
-  !###############################################################################
-
-  !> Get grid size of each tile from mosaic_file
-  subroutine get_mosaic_grid_sizes( mosaic_file, nx, ny)
-    character(len=*),         intent(in) :: mosaic_file !< The file that contains mosaic information.
-    integer, dimension(:), intent(inout) :: nx !< List of grid size in x-direction of each tile.
-    integer, dimension(:), intent(inout) :: ny !< List of grid size in y-direction of each tile.
-
-    character(len=MAX_FILE) :: gridfile
-    integer                 :: ntiles, n
-
-    ntiles = get_mosaic_ntiles(mosaic_file)
-    if(ntiles .NE. size(nx(:)) .OR. ntiles .NE. size(ny(:)) ) then
-      call mpp_error(FATAL, "get_mosaic_grid_sizes: size of nx/ny does not equal to ntiles")
-    endif
-    do n = 1, ntiles
-      call read_data(mosaic_file, 'gridfiles', gridfile, level=n)
-      gridfile = grid_dir//trim(gridfile)
-      nx(n) = dimension_size(gridfile, "nx")
-      ny(n) = dimension_size(gridfile, "ny")
-      if(mod(nx(n),x_refine) .NE. 0) call mpp_error(FATAL, "get_mosaic_grid_sizes: nx is not divided by x_refine");
-      if(mod(ny(n),y_refine) .NE. 0) call mpp_error(FATAL, "get_mosaic_grid_sizes: ny is not divided by y_refine");
-      nx(n) = nx(n)/x_refine;
-      ny(n) = ny(n)/y_refine;
-    enddo
-
-    return
-
-  end subroutine get_mosaic_grid_sizes
-
-  !###############################################################################
-
-  !> Get contact information from mosaic_file
-  subroutine get_mosaic_contact( mosaic_file, tile1, tile2, istart1, iend1, jstart1, jend1, &
-                                   istart2, iend2, jstart2, jend2)
-    character(len=*),         intent(in) :: mosaic_file !< File that contains mosaic information
-    integer, dimension(:), intent(inout) :: tile1 !< list tile number in tile 1 of each contact
-    integer, dimension(:), intent(inout) :: tile2 !< list tile number in tile 2 of each contact
-    integer, dimension(:), intent(inout) :: istart1!< list starting i-index in tile 1 of each contact
-    integer, dimension(:), intent(inout) :: iend1 !< list ending i-index in tile 1 of each contact
-    integer, dimension(:), intent(inout) :: jstart1!< list starting j-index in tile 1 of each contact
-    integer, dimension(:), intent(inout) :: jend1 !< list ending j-index in tile 1 of each contact
-    integer, dimension(:), intent(inout) :: istart2!< list starting i-index in tile 2 of each contact
-    integer, dimension(:), intent(inout) :: iend2 !< list ending i-index in tile 2 of each contact
-    integer, dimension(:), intent(inout) :: jstart2!< list starting j-index in tile 2 of each contact
-    integer, dimension(:), intent(inout) :: jend2 !< list ending j-index in tile 2 of each contact
-    character(len=MAX_NAME), allocatable :: gridtiles(:)
-    character(len=MAX_NAME)              :: contacts
-    character(len=MAX_NAME)              :: strlist(8)
-    integer :: ntiles, n, m, ncontacts, nstr, ios
-    integer :: i1_type, j1_type, i2_type, j2_type
-    logical :: found
-
-    ntiles = get_mosaic_ntiles(mosaic_file)
-    allocate(gridtiles(ntiles))
-    do n = 1, ntiles
-      call read_data(mosaic_file, 'gridtiles', gridtiles(n), level=n)
-    enddo
-
-    ncontacts = get_mosaic_ncontacts(mosaic_file)
-
-    do n = 1, ncontacts
-      call read_data(mosaic_file, "contacts", contacts, level=n)
-      nstr = parse_string(contacts, ":", strlist)
-      if(nstr .NE. 4) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): number of elements in contact seperated by :/:: should be 4")
-      found = .false.
-      do m = 1, ntiles
-        if(trim(gridtiles(m)) == trim(strlist(2)) ) then !found the tile name
-          found = .true.
-          tile1(n) = m
-          exit
-        endif
-      enddo
-
-      if(.not.found) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact):the first tile name specified in contact is not found in tile list")
-
-      found = .false.
-      do m = 1, ntiles
-        if(trim(gridtiles(m)) == trim(strlist(4)) ) then !found the tile name
-          found = .true.
-          tile2(n) = m
-          exit
-        endif
-      enddo
-
-      if(.not.found) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact):the second tile name specified in contact is not found in tile list")
-
-      call read_data(mosaic_file, "contact_index", contacts, level=n)
-      nstr = parse_string(contacts, ":,", strlist)
-      if(nstr .NE. 8) then
-        if(mpp_pe()==mpp_root_pe()) then
-          print*, "nstr is ", nstr
-          print*, "contacts is ", contacts
-          do m = 1, nstr
-            print*, "strlist is ", trim(strlist(m))
-          enddo
-        endif
-        call mpp_error(FATAL, &
-               "mosaic_mod(get_mosaic_contact): number of elements in contact_index seperated by :/, should be 8")
-      endif
-      read(strlist(1), *, iostat=ios) istart1(n)
-      if(ios .NE. 0) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): Error in reading istart1")
-      read(strlist(2), *, iostat=ios) iend1(n)
-      if(ios .NE. 0) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): Error in reading iend1")
-      read(strlist(3), *, iostat=ios) jstart1(n)
-      if(ios .NE. 0) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): Error in reading jstart1")
-      read(strlist(4), *, iostat=ios) jend1(n)
-      if(ios .NE. 0) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): Error in reading jend1")
-      read(strlist(5), *, iostat=ios) istart2(n)
-      if(ios .NE. 0) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): Error in reading istart2")
-      read(strlist(6), *, iostat=ios) iend2(n)
-      if(ios .NE. 0) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): Error in reading iend2")
-      read(strlist(7), *, iostat=ios) jstart2(n)
-      if(ios .NE. 0) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): Error in reading jstart2")
-      read(strlist(8), *, iostat=ios) jend2(n)
-      if(ios .NE. 0) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): Error in reading jend2")
-
-      i1_type = transfer_to_model_index(istart1(n), iend1(n), x_refine)
-      j1_type = transfer_to_model_index(jstart1(n), jend1(n), y_refine)
-      i2_type = transfer_to_model_index(istart2(n), iend2(n), x_refine)
-      j2_type = transfer_to_model_index(jstart2(n), jend2(n), y_refine)
-
-      if( i1_type == 0 .AND. j1_type == 0 ) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): istart1==iend1 and jstart1==jend1")
-      if( i2_type == 0 .AND. j2_type == 0 ) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): istart2==iend2 and jstart2==jend2")
-      if( i1_type + j1_type .NE. i2_type + j2_type ) call mpp_error(FATAL, &
-         "mosaic_mod(get_mosaic_contact): It is not a line or overlap contact")
-
-   enddo
-
-      deallocate(gridtiles)
-
-  end subroutine get_mosaic_contact
-
-function transfer_to_model_index(istart, iend, refine_ratio)
-   integer, intent(inout) :: istart, iend
-   integer                :: refine_ratio
-   integer                :: transfer_to_model_index
-   integer                :: istart_in, iend_in
-
-   istart_in = istart
-   iend_in = iend
-
-   if( istart_in == iend_in ) then
-      transfer_to_model_index = 0
-      istart = (istart_in + 1)/refine_ratio
-      iend   = istart
-   else
-      transfer_to_model_index = 1
-      if( iend_in > istart_in ) then
-        istart = istart_in + 1
-        iend   = iend_in
-      else
-        istart = istart_in
-        iend   = iend_in + 1
-      endif
-      if( mod(istart, refine_ratio) .NE. 0 .OR. mod(iend,refine_ratio) .NE. 0) call mpp_error(FATAL, &
-         "mosaic_mod(transfer_to_model_index): mismatch between refine_ratio and istart/iend")
-      istart = istart/refine_ratio
-      iend = iend/refine_ratio
-
-   endif
-
-   return
-
-end function transfer_to_model_index
-
-  !###############################################################################
-
-  !> @brief Calculate grid cell area.
-  !!
-  !> Calculate the grid cell area. The purpose of this routine is to make
-  !! sure the consistency between model grid area and exchange grid area.
-  subroutine calc_mosaic_grid_area(lon, lat, area)
-     real, dimension(:,:), intent(in)    :: lon !< geographical longitude of grid cell vertices
-     real, dimension(:,:), intent(in)    :: lat !< geographical latitude of grid cell vertices
-     real, dimension(:,:), intent(inout) :: area !< grid cell area
-     integer                             :: nlon, nlat
-
-     nlon = size(area,1)
-     nlat = size(area,2)
-     ! make sure size of lon, lat and area are consitency
-     if( size(lon,1) .NE. nlon+1 .OR. size(lat,1) .NE. nlon+1 ) &
-        call mpp_error(FATAL, "mosaic_mod: size(lon,1) and size(lat,1) should equal to size(area,1)+1")
-     if( size(lon,2) .NE. nlat+1 .OR. size(lat,2) .NE. nlat+1 ) &
-        call mpp_error(FATAL, "mosaic_mod: size(lon,2) and size(lat,2) should equal to size(area,2)+1")
-
-     call get_grid_area( nlon, nlat, lon, lat, area)
-
-  end subroutine calc_mosaic_grid_area
-
-  !###############################################################################
-
-  !> Calculate grid cell area using great circle algorithm.
-  !!
-  !> Calculate the grid cell area. The purpose of this routine is to make
-  !! sure the consistency between model grid area and exchange grid area.
-  subroutine calc_mosaic_grid_great_circle_area(lon, lat, area)
-     real, dimension(:,:), intent(in)    :: lon !< Geographical longitude of grid cell vertices.
-     real, dimension(:,:), intent(in)    :: lat !< Geographical latitude of grid cell vertices.
-     real, dimension(:,:), intent(inout) :: area !< grid cell area
-     integer                             :: nlon, nlat
-
-
-     nlon = size(area,1)
-     nlat = size(area,2)
-     ! make sure size of lon, lat and area are consitency
-     if( size(lon,1) .NE. nlon+1 .OR. size(lat,1) .NE. nlon+1 ) &
-        call mpp_error(FATAL, "mosaic_mod: size(lon,1) and size(lat,1) should equal to size(area,1)+1")
-     if( size(lon,2) .NE. nlat+1 .OR. size(lat,2) .NE. nlat+1 ) &
-        call mpp_error(FATAL, "mosaic_mod: size(lon,2) and size(lat,2) should equal to size(area,2)+1")
-
-     call get_grid_great_circle_area( nlon, nlat, lon, lat, area)
-
-  end subroutine calc_mosaic_grid_great_circle_area
-
-  !#####################################################################
-  !> This function check if a point (lon1,lat1) is inside a polygon (lon2(:), lat2(:))
-  !! lon1, lat1, lon2, lat2 are in radians.
-  function is_inside_polygon(lon1, lat1, lon2, lat2 )
-     real, intent(in) :: lon1, lat1
-     real, intent(in) :: lon2(:), lat2(:)
-     logical          :: is_inside_polygon
-     integer                        :: npts, isinside
-     integer                        :: inside_a_polygon
-
-     npts = size(lon2(:))
-
-     isinside = inside_a_polygon(lon1, lat1, npts, lon2, lat2)
-     if(isinside == 1) then
-        is_inside_polygon = .TRUE.
-     else
-        is_inside_polygon = .FALSE.
-     endif
-
-     return
-
-  end function is_inside_polygon
-
-  function parse_string(string, set, value)
-  character(len=*),  intent(in) :: string
-  character(len=*),  intent(in) :: set
-  character(len=*), intent(out) :: value(:)
-  integer                       :: parse_string
-  integer :: nelem, length, first, last
-
-     nelem = size(value(:))
-     length = len_trim(string)
-
-     first = 1; last = 0
-     parse_string = 0
-
-     do while(first .LE. length)
-       parse_string = parse_string + 1
-       if(parse_string>nelem) then
-         call mpp_error(FATAL, "mosaic_mod(parse_string) : number of element is greater than size(value(:))")
-       endif
-       last = first - 1 + scan(string(first:length), set)
-       if(last == first-1 ) then  ! not found, end of string
-         value(parse_string) = string(first:length)
-         exit
-       else
-         if(last <= first) then
-           call mpp_error(FATAL, "mosaic_mod(parse_string) : last <= first")
-         endif
-         value(parse_string) = string(first:(last-1))
-         first = last + 1
-         ! scan to make sure the next is not the character in the set
-         do while (first == last+1)
-           last = first - 1 + scan(string(first:length), set)
-           if(last == first) then
-             first = first+1
-           else
-             exit
-           endif
-         end do
-       endif
-     enddo
-
-     return
-
-  end function parse_string
-
-end module mosaic_mod
-
-
-!> @}
-! close documentation grouping
diff --git a/mosaic/mosaic_util.c b/mosaic/mosaic_util.c
deleted file mode 100644
index c37f799f18..0000000000
--- a/mosaic/mosaic_util.c
+++ /dev/null
@@ -1,1368 +0,0 @@
-/***********************************************************************
- *                   GNU Lesser General Public License
- *
- * This file is part of the GFDL Flexible Modeling System (FMS).
- *
- * FMS is free software: you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * FMS is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
- **********************************************************************/
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <string.h>
-#ifdef use_libMPI
-#include <mpi.h>
-#endif
-#include "mosaic_util.h"
-#include "constant.h"
-
-#define HPI (0.5*M_PI)
-#define TPI (2.0*M_PI)
-#define TOLORENCE (1.e-6)
-#define EPSLN8 (1.e-8)
-#define EPSLN10 (1.e-10)
-#define EPSLN15 (1.e-15)
-#define EPSLN30 (1.e-30)
-
-/** \file
- *  \ingroup mosaic
- *  \brief Error handling and other general utilities for @ref mosaic_mod
- */
-
-/***********************************************************
-    void error_handler(char *str)
-    error handler: will print out error message and then abort
-***********************************************************/
-
-void error_handler(const char *msg)
-{
-  fprintf(stderr, "FATAL Error: %s\n", msg );
-#ifdef use_libMPI
-  MPI_Abort(MPI_COMM_WORLD, -1);
-#else
-  exit(1);
-#endif
-} /* error_handler */
-
-/*********************************************************************
-
-   int nearest_index(double value, const double *array, int ia)
-
-   return index of nearest data point within "array" corresponding to "value".
-   if "value" is outside the domain of "array" then nearest_index = 0
-   or = size(array)-1 depending on whether array(0) or array(ia-1) is
-   closest to "value"
-
-   Arguments:
-     value:  arbitrary data...same units as elements in "array"
-     array:  array of data points  (must be monotonically increasing)
-     ia   :  size of array.
-
-********************************************************************/
-int nearest_index(double value, const double *array, int ia)
-{
-  int index, i;
-  int keep_going;
-
-  for(i=1; i<ia; i++){
-    if (array[i] < array[i-1])
-      error_handler("nearest_index: array must be monotonically increasing");
-  }
-  if (value < array[0] )
-    index = 0;
-  else if ( value > array[ia-1])
-    index = ia-1;
-  else
-  {
-    i=0;
-    keep_going = 1;
-    while (i < ia && keep_going) {
-      i = i+1;
-      if (value <= array[i]) {
-        index = i;
-        if (array[i]-value > value-array[i-1]) index = i-1;
-        keep_going = 0;
-      }
-    }
-  }
-  return index;
-
-}
-
-/******************************************************************/
-
-void tokenize(const char * const string, const char *tokens, unsigned int varlen,
-              unsigned int maxvar, char * pstring, unsigned int * const nstr)
-{
-  size_t i, j, nvar, len, ntoken;
-  int found, n;
-
-  nvar = 0; j = 0;
-  len = strlen(string);
-  ntoken = strlen(tokens);
-  /* here we use the fact that C array [][] is contiguous in memory */
-  if(string[0] == 0)error_handler("Error from tokenize: to-be-parsed string is empty");
-
-  for(i = 0; i < len; i ++){
-    if(string[i] != ' ' && string[i] != '\t'){
-      found = 0;
-      for(n=0; n<ntoken; n++) {
-        if(string[i] == tokens[n] ) {
-          found = 1;
-          break;
-        }
-      }
-      if(found) {
-        if( j != 0) { /* remove :: */
-          *(pstring + (nvar++)*varlen + j) = 0;
-          j = 0;
-          if(nvar >= maxvar) error_handler("Error from tokenize: number of variables exceeds limit");
-        }
-      }
-      else {
-        *(pstring + nvar*varlen + j++) = string[i];
-        if(j >= varlen ) error_handler("error from tokenize: variable name length exceeds limit during tokenization");
-      }
-    }
-  }
-  *(pstring + nvar*varlen + j) = 0;
-
-  *nstr = ++nvar;
-
-}
-
-/*******************************************************************************
-  double maxval_double(int size, double *data)
-  get the maximum value of double array
-*******************************************************************************/
-double maxval_double(int size, const double *data)
-{
-  int n;
-  double maxval;
-
-  maxval = data[0];
-  for(n=1; n<size; n++){
-    if( data[n] > maxval ) maxval = data[n];
-  }
-
-  return maxval;
-
-} /* maxval_double */
-
-
-/*******************************************************************************
-  double minval_double(int size, double *data)
-  get the minimum value of double array
-*******************************************************************************/
-double minval_double(int size, const double *data)
-{
-  int n;
-  double minval;
-
-  minval = data[0];
-  for(n=1; n<size; n++){
-    if( data[n] < minval ) minval = data[n];
-  }
-
-  return minval;
-
-} /* minval_double */
-
-/*******************************************************************************
-  double avgval_double(int size, double *data)
-  get the average value of double array
-*******************************************************************************/
-double avgval_double(int size, const double *data)
-{
-  int n;
-  double avgval;
-
-  avgval = 0;
-  for(n=0; n<size; n++) avgval += data[n];
-  avgval /= size;
-
-  return avgval;
-
-} /* avgval_double */
-
-
-/*******************************************************************************
-  void latlon2xyz
-  Routine to map (lon, lat) to (x,y,z)
-******************************************************************************/
-void latlon2xyz(int size, const double *lon, const double *lat, double *x, double *y, double *z)
-{
-  int n;
-
-  for(n=0; n<size; n++) {
-    x[n] = cos(lat[n])*cos(lon[n]);
-    y[n] = cos(lat[n])*sin(lon[n]);
-    z[n] = sin(lat[n]);
-  }
-
-} /* latlon2xyz */
-
-/*------------------------------------------------------------
-  void xyz2laton(np, p, xs, ys)
-  Transfer cartesian coordinates to spherical coordinates
-  ----------------------------------------------------------*/
-void xyz2latlon( int np, const double *x, const double *y, const double *z, double *lon, double *lat)
-{
-
-  double xx, yy, zz;
-  double dist;
-  int i;
-
-  for(i=0; i<np; i++) {
-    xx = x[i];
-    yy = y[i];
-    zz = z[i];
-    dist = sqrt(xx*xx+yy*yy+zz*zz);
-    xx /= dist;
-    yy /= dist;
-    zz /= dist;
-
-    if ( fabs(xx)+fabs(yy)  < EPSLN10 )
-      lon[i] = 0;
-    else
-      lon[i] = atan2(yy, xx);
-    lat[i] = asin(zz);
-
-    if ( lon[i] < 0.) lon[i] = 2.*M_PI + lon[i];
-  }
-
-} /* xyz2latlon */
-
-/*------------------------------------------------------------------------------
-  double box_area(double ll_lon, double ll_lat, double ur_lon, double ur_lat)
-  return the area of a lat-lon grid box. grid is in radians.
-  ----------------------------------------------------------------------------*/
-double box_area(double ll_lon, double ll_lat, double ur_lon, double ur_lat)
-{
-  double dx = ur_lon-ll_lon;
-
-  if(dx > M_PI)  dx = dx - 2.0*M_PI;
-  if(dx < -M_PI) dx = dx + 2.0*M_PI;
-
-  return (dx*(sin(ur_lat)-sin(ll_lat))*RADIUS*RADIUS ) ;
-
-} /* box_area */
-
-
-/*------------------------------------------------------------------------------
-  double poly_area(const x[], const y[], int n)
-  obtains area of input polygon by line integrating -sin(lat)d(lon)
-  Vertex coordinates must be in degrees.
-  Vertices must be listed counter-clockwise around polygon.
-  grid is in radians.
-  ----------------------------------------------------------------------------*/
-double poly_area_dimensionless(const double x[], const double y[], int n)
-{
-  double area = 0.0;
-  int    i;
-
-  for (i=0;i<n;i++) {
-    int ip = (i+1) % n;
-    double dx = (x[ip]-x[i]);
-    double lat1, lat2;
-    double dy, dat;
-
-    lat1 = y[ip];
-    lat2 = y[i];
-    if(dx > M_PI)  dx = dx - 2.0*M_PI;
-    if(dx < -M_PI) dx = dx + 2.0*M_PI;
-    if (dx==0.0) continue;
-
-    if ( fabs(lat1-lat2) < SMALL_VALUE) /* cheap area calculation along latitude */
-      area -= dx*sin(0.5*(lat1+lat2));
-    else {
-      dy = 0.5*(lat1-lat2);
-      dat = sin(dy)/dy;
-      area -= dx*sin(0.5*(lat1+lat2))*dat;
-    }
-  }
-  if(area < 0)
-    return (-area/(4*M_PI));
-  else
-    return (area/(4*M_PI));
-
-} /* poly_area */
-
-double poly_area(const double x[], const double y[], int n)
-{
-  double area = 0.0;
-  int    i;
-
-  for (i=0;i<n;i++) {
-    int ip = (i+1) % n;
-    double dx = (x[ip]-x[i]);
-    double lat1, lat2;
-    double dy, dat;
-
-    lat1 = y[ip];
-    lat2 = y[i];
-    if(dx > M_PI)  dx = dx - 2.0*M_PI;
-    if(dx < -M_PI) dx = dx + 2.0*M_PI;
-    if (dx==0.0) continue;
-
-    if ( fabs(lat1-lat2) < SMALL_VALUE) /* cheap area calculation along latitude */
-      area -= dx*sin(0.5*(lat1+lat2));
-    else {
-      dy = 0.5*(lat1-lat2);
-      dat = sin(dy)/dy;
-      area -= dx*sin(0.5*(lat1+lat2))*dat;
-    }
-  }
-  if(area < 0)
-    return -area*RADIUS*RADIUS;
-  else
-    return area*RADIUS*RADIUS;
-
-} /* poly_area */
-
-double poly_area_no_adjust(const double x[], const double y[], int n)
-{
-  double area = 0.0;
-  int    i;
-
-  for (i=0;i<n;i++) {
-    int ip = (i+1) % n;
-    double dx = (x[ip]-x[i]);
-    double lat1, lat2;
-
-    lat1 = y[ip];
-    lat2 = y[i];
-    if (dx==0.0) continue;
-
-    if ( fabs(lat1-lat2) < SMALL_VALUE) /* cheap area calculation along latitude */
-      area -= dx*sin(0.5*(lat1+lat2));
-    else
-      area += dx*(cos(lat1)-cos(lat2))/(lat1-lat2);
-  }
-  if(area < 0)
-    return area*RADIUS*RADIUS;
-  else
-    return area*RADIUS*RADIUS;
-} /* poly_area_no_adjust */
-
-int delete_vtx(double x[], double y[], int n, int n_del)
-{
-  for (;n_del<n-1;n_del++) {
-    x[n_del] = x[n_del+1];
-    y[n_del] = y[n_del+1];
-  }
-
-  return (n-1);
-} /* delete_vtx */
-
-int insert_vtx(double x[], double y[], int n, int n_ins, double lon_in, double lat_in)
-{
-  int i;
-
-  for (i=n-1;i>=n_ins;i--) {
-    x[i+1] = x[i];
-    y[i+1] = y[i];
-  }
-
-  x[n_ins] = lon_in;
-  y[n_ins] = lat_in;
-  return (n+1);
-} /* insert_vtx */
-
-void v_print(double x[], double y[], int n)
-{
-  int i;
-
-  for (i=0;i<n;i++){
-    printf(" %20g   %20g\n", x[i], y[i]);
-  }
-} /* v_print */
-
-int fix_lon(double x[], double y[], int n, double tlon)
-{
-  double x_sum, dx, dx_;
-  int i, nn = n, pole = 0;
-
-  for (i=0;i<nn;i++) if (fabs(y[i])>=HPI-TOLORENCE) pole = 1;
-  if (0&&pole) {
-    printf("fixing pole cell\n");
-    v_print(x, y, nn);
-    printf("---------");
-  }
-
-  /* all pole points must be paired */
-  for (i=0;i<nn;i++) if (fabs(y[i])>=HPI-TOLORENCE) {
-      int im=(i+nn-1)%nn, ip=(i+1)%nn;
-
-      if (y[im]==y[i] && y[ip]==y[i]) {
-        nn = delete_vtx(x, y, nn, i);
-        i--;
-      } else if (y[im]!=y[i] && y[ip]!=y[i]) {
-        nn = insert_vtx(x, y, nn, i, x[i], y[i]);
-        i++;
-      }
-    }
-  /* first of pole pair has longitude of previous vertex */
-  /* second of pole pair has longitude of subsequent vertex */
-  for (i=0;i<nn;i++) if (fabs(y[i])>=HPI-TOLORENCE) {
-      int im=(i+nn-1)%nn, ip=(i+1)%nn;
-
-      if (y[im]!=y[i]){
-        x[i] = x[im];
-      }
-      if (y[ip]!=y[i]){
-        x[i] = x[ip];
-      }
-    }
-
-  if (nn){
-    x_sum = x[0];
-  }
-  else{
-    return(0);
-  }
-  for (i=1;i<nn;i++) {
-    dx_ = x[i]-x[i-1];
-
-    if (dx_ < -M_PI) dx_ = dx_ + TPI;
-    else if (dx_ >  M_PI) dx_ = dx_ - TPI;
-    x_sum += (x[i] = x[i-1] + dx_);
-  }
-
-  dx = (x_sum/nn)-tlon;
-  if (dx < -M_PI){
-    for (i=0;i<nn;i++){
-      x[i] += TPI;
-    }
-  }
-  else if (dx >  M_PI){
-    for (i=0;i<nn;i++){
-      x[i] -= TPI;
-    }
-  }
-
-  if (0&&pole) {
-    printf("area=%g\n", poly_area(x, y,nn));
-    v_print(x, y, nn);
-    printf("---------");
-  }
-
-  return (nn);
-} /* fix_lon */
-
-
-/*------------------------------------------------------------------------------
-  double great_circle_distance()
-  computes distance between two points along a great circle
-  (the shortest distance between 2 points on a sphere)
-  returned in units of meter
-  ----------------------------------------------------------------------------*/
-double great_circle_distance(double *p1, double *p2)
-{
-  double dist, beta;
-
-  /* This algorithm is not accurate for small distance
-     dist = RADIUS*ACOS(SIN(p1[1])*SIN(p2[1]) + COS(p1[1])*COS(p2[1])*COS(p1[0]-p2[0]));
-  */
-  beta = 2.*asin( sqrt( sin((p1[1]-p2[1])/2.)*sin((p1[1]-p2[1])/2.) +
-                        cos(p1[1])*cos(p2[1])*(sin((p1[0]-p2[0])/2.)*sin((p1[0]-p2[0])/2.)) ) );
-  dist = RADIUS*beta;
-  return dist;
-
-} /* great_circle_distance */
-
-
-/* Compute the great circle area of a polygon on a sphere */
-double great_circle_area(int n, const double *x, const double *y, const double *z) {
-  int i;
-  double pnt0[3], pnt1[3], pnt2[3];
-  double sum, area;
-
-  /* sum angles around polygon */
-  sum=0.0;
-  for ( i=0; i<n; i++) {
-    /* points that make up a side of polygon */
-    pnt0[0] = x[i];
-    pnt0[1] = y[i];
-    pnt0[2] = z[i];
-    pnt1[0] = x[(i+1)%n];
-    pnt1[1] = y[(i+1)%n];
-    pnt1[2] = z[(i+1)%n];
-    pnt2[0] = x[(i+2)%n];
-    pnt2[1] = y[(i+2)%n];
-    pnt2[2] = z[(i+2)%n];
-
-    /* compute angle for pnt1 */
-    sum += spherical_angle(pnt1, pnt2, pnt0);
-
-  }
-  area = (sum - (n-2.)*M_PI) * RADIUS* RADIUS;
-  return area;
-}
-
-/*------------------------------------------------------------------------------
-  double spherical_angle(const double *p1, const double *p2, const double *p3)
-  p3
-  /
-  /
-  p1 ---> angle
-  \
-  \
-  p2
-  -----------------------------------------------------------------------------*/
-double spherical_angle(const double *v1, const double *v2, const double *v3)
-{
-  double angle;
-  long double px, py, pz, qx, qy, qz, ddd;
-
-  /* vector product between v1 and v2 */
-  px = v1[1]*v2[2] - v1[2]*v2[1];
-  py = v1[2]*v2[0] - v1[0]*v2[2];
-  pz = v1[0]*v2[1] - v1[1]*v2[0];
-  /* vector product between v1 and v3 */
-  qx = v1[1]*v3[2] - v1[2]*v3[1];
-  qy = v1[2]*v3[0] - v1[0]*v3[2];
-  qz = v1[0]*v3[1] - v1[1]*v3[0];
-
-  ddd = (px*px+py*py+pz*pz)*(qx*qx+qy*qy+qz*qz);
-  if ( ddd <= 0.0 )
-    angle = 0. ;
-  else {
-    ddd = (px*qx+py*qy+pz*qz) / sqrtl(ddd);
-    if( fabsl(ddd-1) < EPSLN30 ) ddd = 1;
-    if( fabsl(ddd+1) < EPSLN30 ) ddd = -1;
-    if ( ddd>1. || ddd<-1. ) {
-      /*FIX (lmh) to correctly handle co-linear points (angle near pi or 0) */
-      if (ddd < 0.)
-        angle = M_PI;
-      else
-        angle = 0.;
-    }
-    else
-      angle = ((double)acosl( ddd ));
-  }
-
-  return angle;
-} /* spherical_angle */
-
-/*------------------------------------------------------------------------------
-  double spherical_excess_area(p_lL, p_uL, p_lR, p_uR)
-  get the surface area of a cell defined as a quadrilateral
-  on the sphere. Area is computed as the spherical excess
-  [area units are m^2]
-  ----------------------------------------------------------------------------*/
-double spherical_excess_area(const double* p_ll, const double* p_ul,
-                             const double* p_lr, const double* p_ur, double radius)
-{
-  double area, ang1, ang2, ang3, ang4;
-  double v1[3], v2[3], v3[3];
-
-  /*   S-W: 1   */
-  latlon2xyz(1, p_ll, p_ll+1, v1, v1+1, v1+2);
-  latlon2xyz(1, p_lr, p_lr+1, v2, v2+1, v2+2);
-  latlon2xyz(1, p_ul, p_ul+1, v3, v3+1, v3+2);
-  ang1 = spherical_angle(v1, v2, v3);
-
-  /*   S-E: 2   */
-  latlon2xyz(1, p_lr, p_lr+1, v1, v1+1, v1+2);
-  latlon2xyz(1, p_ur, p_ur+1, v2, v2+1, v2+2);
-  latlon2xyz(1, p_ll, p_ll+1, v3, v3+1, v3+2);
-  ang2 = spherical_angle(v1, v2, v3);
-
-  /*   N-E: 3   */
-  latlon2xyz(1, p_ur, p_ur+1, v1, v1+1, v1+2);
-  latlon2xyz(1, p_ul, p_ul+1, v2, v2+1, v2+2);
-  latlon2xyz(1, p_lr, p_lr+1, v3, v3+1, v3+2);
-  ang3 = spherical_angle(v1, v2, v3);
-
-  /*   N-W: 4   */
-  latlon2xyz(1, p_ul, p_ul+1, v1, v1+1, v1+2);
-  latlon2xyz(1, p_ur, p_ur+1, v2, v2+1, v2+2);
-  latlon2xyz(1, p_ll, p_ll+1, v3, v3+1, v3+2);
-  ang4 = spherical_angle(v1, v2, v3);
-
-  area = (ang1 + ang2 + ang3 + ang4 - 2.*M_PI) * radius* radius;
-
-  return area;
-
-} /* spherical_excess_area */
-
-
-/*----------------------------------------------------------------------
-  void vect_cross(e, p1, p2)
-  Perform cross products of 3D vectors: e = P1 X P2
-  -------------------------------------------------------------------*/
-
-void vect_cross(const double *p1, const double *p2, double *e )
-{
-
-  e[0] = p1[1]*p2[2] - p1[2]*p2[1];
-  e[1] = p1[2]*p2[0] - p1[0]*p2[2];
-  e[2] = p1[0]*p2[1] - p1[1]*p2[0];
-
-} /* vect_cross */
-
-
-/*----------------------------------------------------------------------
-  double* vect_cross(p1, p2)
-  return cross products of 3D vectors: = P1 X P2
-  -------------------------------------------------------------------*/
-
-double dot(const double *p1, const double *p2)
-{
-
-  return( p1[0]*p2[0] + p1[1]*p2[1] + p1[2]*p2[2] );
-
-}
-
-
-double metric(const double *p) {
-  return (sqrt(p[0]*p[0] + p[1]*p[1]+p[2]*p[2]) );
-}
-
-
-/* ----------------------------------------------------------------
-   make a unit vector
-   --------------------------------------------------------------*/
-void normalize_vect(double *e)
-{
-  double pdot;
-  int k;
-
-  pdot = e[0]*e[0] + e[1] * e[1] + e[2] * e[2];
-  pdot = sqrt( pdot );
-
-  for(k=0; k<3; k++) e[k] /= pdot;
-}
-
-
-/*------------------------------------------------------------------
-  void unit_vect_latlon(int size, lon, lat, vlon, vlat)
-
-  calculate unit vector for latlon in cartesian coordinates
-
-  ---------------------------------------------------------------------*/
-void unit_vect_latlon(int size, const double *lon, const double *lat, double *vlon, double *vlat)
-{
-  double sin_lon, cos_lon, sin_lat, cos_lat;
-  int n;
-
-  for(n=0; n<size; n++) {
-    sin_lon = sin(lon[n]);
-    cos_lon = cos(lon[n]);
-    sin_lat = sin(lat[n]);
-    cos_lat = cos(lat[n]);
-
-    vlon[3*n] = -sin_lon;
-    vlon[3*n+1] =  cos_lon;
-    vlon[3*n+2] =  0.;
-
-    vlat[3*n]   = -sin_lat*cos_lon;
-    vlat[3*n+1] = -sin_lat*sin_lon;
-    vlat[3*n+2] =  cos_lat;
-  }
-} /* unit_vect_latlon */
-
-
-/* Intersect a line and a plane
-   Intersects between the plane ( three points ) (entries in counterclockwise order)
-   and the line determined by the endpoints l1 and l2 (t=0.0 at l1 and t=1.0 at l2)
-   returns true if the two intersect and the output variables are valid
-   outputs p containing the coordinates in the tri and t the coordinate in the line
-   of the intersection.
-   NOTE: the intersection doesn't have to be inside the tri or line for this to return true
-*/
-int intersect_tri_with_line(const double *plane, const double *l1, const double *l2, double *p,
-                            double *t) {
-
-  long double M[3*3], inv_M[3*3];
-  long double V[3];
-  long double X[3];
-  int is_invert=0;
-
-  const double *pnt0=plane;
-  const double *pnt1=plane+3;
-  const double *pnt2=plane+6;
-
-  /* To do intersection just solve the set of linear equations for both
-     Setup M
-  */
-  M[0]=l1[0]-l2[0]; M[1]=pnt1[0]-pnt0[0]; M[2]=pnt2[0]-pnt0[0];
-  M[3]=l1[1]-l2[1]; M[4]=pnt1[1]-pnt0[1]; M[5]=pnt2[1]-pnt0[1];
-  M[6]=l1[2]-l2[2]; M[7]=pnt1[2]-pnt0[2]; M[8]=pnt2[2]-pnt0[2];
-
-
-  /* Invert M */
-  is_invert = invert_matrix_3x3(M,inv_M);
-  if (!is_invert) return 0;
-
-  /* Set variable holding vector */
-  V[0]=l1[0]-pnt0[0];
-  V[1]=l1[1]-pnt0[1];
-  V[2]=l1[2]-pnt0[2];
-
-  /* Calculate solution */
-  mult(inv_M, V, X);
-
-  /* Get answer out */
-  *t=((double)X[0]);
-  p[0]=((double)X[1]);
-  p[1]=((double)X[2]);
-
-  return 1;
-}
-
-
-void mult(long double m[], long double v[], long double out_v[]) {
-
-  out_v[0]=m[0]*v[0]+m[1]*v[1]+m[2]*v[2];
-  out_v[1]=m[3]*v[0]+m[4]*v[1]+m[5]*v[2];
-  out_v[2]=m[6]*v[0]+m[7]*v[1]+m[8]*v[2];
-
-}
-
-
-/* returns 1 if matrix is inverted, 0 otherwise */
-int invert_matrix_3x3(long double m[], long double m_inv[]) {
-
-
-  const long double det =  m[0] * (m[4]*m[8] - m[5]*m[7])
-      -m[1] * (m[3]*m[8] - m[5]*m[6])
-      +m[2] * (m[3]*m[7] - m[4]*m[6]);
-#ifdef test_invert_matrix_3x3
-  printf("det = %Lf\n", det);
-#endif
-  if (fabsl(det) < EPSLN15 ) return 0;
-
-  const long double deti = 1.0/det;
-
-  m_inv[0] = (m[4]*m[8] - m[5]*m[7]) * deti;
-  m_inv[1] = (m[2]*m[7] - m[1]*m[8]) * deti;
-  m_inv[2] = (m[1]*m[5] - m[2]*m[4]) * deti;
-
-  m_inv[3] = (m[5]*m[6] - m[3]*m[8]) * deti;
-  m_inv[4] = (m[0]*m[8] - m[2]*m[6]) * deti;
-  m_inv[5] = (m[2]*m[3] - m[0]*m[5]) * deti;
-
-  m_inv[6] = (m[3]*m[7] - m[4]*m[6]) * deti;
-  m_inv[7] = (m[1]*m[6] - m[0]*m[7]) * deti;
-  m_inv[8] = (m[0]*m[4] - m[1]*m[3]) * deti;
-
-  return 1;
-}
-
-#ifndef MAXNODELIST
-#define MAXNODELIST 100
-#endif
-
-struct Node *nodeList=NULL;
-int curListPos=0;
-
-void rewindList(void)
-{
-  int n;
-
-  curListPos = 0;
-  if(!nodeList) nodeList = (struct Node *)malloc(MAXNODELIST*sizeof(struct Node));
-  for(n=0; n<MAXNODELIST; n++) initNode(nodeList+n);
-
-}
-
-struct Node *getNext()
-{
-  struct Node *temp=NULL;
-  int n;
-
-  if(!nodeList) {
-    nodeList = (struct Node *)malloc(MAXNODELIST*sizeof(struct Node));
-    for(n=0; n<MAXNODELIST; n++) initNode(nodeList+n);
-  }
-
-  temp = nodeList+curListPos;
-  curListPos++;
-  if(curListPos > MAXNODELIST) error_handler("getNext: curListPos >= MAXNODELIST");
-
-  return (temp);
-}
-
-
-void initNode(struct Node *node)
-{
-  node->x = 0;
-  node->y = 0;
-  node->z = 0;
-  node->u = 0;
-  node->intersect = 0;
-  node->inbound = 0;
-  node->isInside = 0;
-  node->Next = NULL;
-  node->initialized=0;
-
-}
-
-void addEnd(struct Node *list, double x, double y, double z, int intersect, double u, int inbound, int inside)
-{
-
-  struct Node *temp=NULL;
-
-  if(list == NULL) error_handler("addEnd: list is NULL");
-
-  if(list->initialized) {
-
-    /* (x,y,z) might already in the list when intersect is true and u=0 or 1 */
-    temp = list;
-    while (temp) {
-      if(samePoint(temp->x, temp->y, temp->z, x, y, z)) return;
-      temp=temp->Next;
-    }
-    temp = list;
-    while(temp->Next)
-      temp=temp->Next;
-
-    /* Append at the end of the list.  */
-    temp->Next = getNext();
-    temp = temp->Next;
-  }
-  else {
-    temp = list;
-  }
-
-  temp->x = x;
-  temp->y = y;
-  temp->z = z;
-  temp->u = u;
-  temp->intersect = intersect;
-  temp->inbound = inbound;
-  temp->initialized=1;
-  temp->isInside = inside;
-}
-
-/* return 1 if the point (x,y,z) is added in the list, return 0 if it is already in the list */
-
-int addIntersect(struct Node *list, double x, double y, double z, int intersect, double u1, double u2, int inbound,
-                 int is1, int ie1, int is2, int ie2)
-{
-
-  double u1_cur, u2_cur;
-  int    i1_cur, i2_cur;
-  struct Node *temp=NULL;
-
-  if(list == NULL) error_handler("addEnd: list is NULL");
-
-  /* first check to make sure this point is not in the list */
-  u1_cur = u1;
-  i1_cur = is1;
-  u2_cur = u2;
-  i2_cur = is2;
-  if(u1_cur == 1) {
-    u1_cur = 0;
-    i1_cur = ie1;
-  }
-  if(u2_cur == 1) {
-    u2_cur = 0;
-    i2_cur = ie2;
-  }
-
-  if(list->initialized) {
-    temp = list;
-    while(temp) {
-      if( temp->u == u1_cur && temp->subj_index == i1_cur) return 0;
-      if( temp->u_clip == u2_cur && temp->clip_index == i2_cur) return 0;
-      if( !temp->Next ) break;
-      temp=temp->Next;
-    }
-
-    /* Append at the end of the list.  */
-    temp->Next = getNext();
-    temp = temp->Next;
-  }
-  else {
-    temp = list;
-  }
-
-  temp->x = x;
-  temp->y = y;
-  temp->z = z;
-  temp->intersect = intersect;
-  temp->inbound = inbound;
-  temp->initialized=1;
-  temp->isInside = 0;
-  temp->u = u1_cur;
-  temp->subj_index = i1_cur;
-  temp->u_clip = u2_cur;
-  temp->clip_index = i2_cur;
-
-  return 1;
-}
-
-
-int length(struct Node *list)
-{
-  struct Node *cur_ptr=NULL;
-  int count=0;
-
-  cur_ptr=list;
-
-  while(cur_ptr)
-  {
-    if(cur_ptr->initialized ==0) break;
-    cur_ptr=cur_ptr->Next;
-    count++;
-  }
-  return(count);
-}
-
-/* two points are the same if there are close enough */
-int samePoint(double x1, double y1, double z1, double x2, double y2, double z2)
-{
-  if( fabs(x1-x2) > EPSLN10 || fabs(y1-y2) > EPSLN10 || fabs(z1-z2) > EPSLN10 )
-    return 0;
-  else
-    return 1;
-}
-
-
-
-int sameNode(struct Node node1, struct Node node2)
-{
-  if( node1.x == node2.x && node1.y == node2.y && node1.z==node2.z )
-    return 1;
-  else
-    return 0;
-}
-
-
-void addNode(struct Node *list, struct Node inNode)
-{
-
-  addEnd(list, inNode.x, inNode.y, inNode.z, inNode.intersect, inNode.u, inNode.inbound, inNode.isInside);
-
-}
-
-struct Node *getNode(struct Node *list, struct Node inNode)
-{
-  struct Node *thisNode=NULL;
-  struct Node *temp=NULL;
-
-  temp = list;
-  while( temp ) {
-    if( sameNode( *temp, inNode ) ) {
-      thisNode = temp;
-      temp = NULL;
-      break;
-    }
-    temp = temp->Next;
-  }
-
-  return thisNode;
-}
-
-struct Node *getNextNode(struct Node *list)
-{
-  return list->Next;
-}
-
-void copyNode(struct Node *node_out, struct Node node_in)
-{
-
-  node_out->x = node_in.x;
-  node_out->y = node_in.y;
-  node_out->z = node_in.z;
-  node_out->u = node_in.u;
-  node_out->intersect = node_in.intersect;
-  node_out->inbound   = node_in.inbound;
-  node_out->Next = NULL;
-  node_out->initialized = node_in.initialized;
-  node_out->isInside = node_in.isInside;
-}
-
-void printNode(struct Node *list, char *str)
-{
-  struct Node *temp;
-
-  if(list == NULL) error_handler("printNode: list is NULL");
-  if(str) printf("  %s \n", str);
-  temp = list;
-  while(temp) {
-    if(temp->initialized ==0) break;
-    printf(" (x, y, z, interset, inbound, isInside) = (%19.15f,%19.15f,%19.15f,%d,%d,%d)\n",
-           temp->x, temp->y, temp->z, temp->intersect, temp->inbound, temp->isInside);
-    temp = temp->Next;
-  }
-  printf("\n");
-}
-
-int intersectInList(struct Node *list, double x, double y, double z)
-{
-  struct Node *temp;
-  int found=0;
-
-  temp = list;
-  found = 0;
-  while ( temp ) {
-    if( temp->x == x && temp->y == y && temp->z == z ) {
-      found = 1;
-      break;
-    }
-    temp=temp->Next;
-  }
-  if (!found) error_handler("intersectInList: point (x,y,z) is not found in the list");
-  if( temp->intersect == 2 )
-    return 1;
-  else
-    return 0;
-
-}
-
-
-/* The following insert a intersection after non-intersect point (x2,y2,z2), if the point
-   after (x2,y2,z2) is an intersection, if u is greater than the u value of the intersection,
-   insert after, otherwise insert before
-*/
-void insertIntersect(struct Node *list, double x, double y, double z, double u1, double u2, int inbound,
-                     double x2, double y2, double z2)
-{
-  struct Node *temp1=NULL, *temp2=NULL;
-  struct Node *temp;
-  double u_cur;
-  int found=0;
-
-  temp1 = list;
-  found = 0;
-  while ( temp1 ) {
-    if( temp1->x == x2 && temp1->y == y2 && temp1->z == z2 ) {
-      found = 1;
-      break;
-    }
-    temp1=temp1->Next;
-  }
-  if (!found) error_handler("inserAfter: point (x,y,z) is not found in the list");
-
-  /* when u = 0 or u = 1, set the grid point to be the intersection point to solve truncation error isuse */
-  u_cur = u1;
-  if(u1 == 1) {
-    u_cur = 0;
-    temp1 = temp1->Next;
-    if(!temp1) temp1 = list;
-  }
-  if(u_cur==0) {
-    temp1->intersect = 2;
-    temp1->isInside = 1;
-    temp1->u = u_cur;
-    temp1->x = x;
-    temp1->y = y;
-    temp1->z = z;
-    return;
-  }
-
-  /* when u2 != 0 and u2 !=1, can decide if one end of the point is outside depending on inbound value */
-  if(u2 != 0 && u2 != 1) {
-    if(inbound == 1) { /* goes outside, then temp1->Next is an outside point */
-      /* find the next non-intersect point */
-      temp2 = temp1->Next;
-      if(!temp2) temp2 = list;
-      while(temp2->intersect) {
-        temp2=temp2->Next;
-        if(!temp2) temp2 = list;
-      }
-
-      temp2->isInside = 0;
-    }
-    else if(inbound ==2) { /* goes inside, then temp1 is an outside point */
-      temp1->isInside = 0;
-    }
-  }
-
-  temp2 = temp1->Next;
-  while ( temp2 ) {
-    if( temp2->intersect == 1 ) {
-      if( temp2->u > u_cur ) {
-        break;
-      }
-    }
-    else
-      break;
-    temp1 = temp2;
-    temp2 = temp2->Next;
-  }
-
-  /* assign value */
-  temp = getNext();
-  temp->x = x;
-  temp->y = y;
-  temp->z = z;
-  temp->u = u_cur;
-  temp->intersect = 1;
-  temp->inbound = inbound;
-  temp->isInside = 1;
-  temp->initialized = 1;
-  temp1->Next = temp;
-  temp->Next = temp2;
-
-}
-
-double gridArea(struct Node *grid) {
-  double x[20], y[20], z[20];
-  struct Node *temp=NULL;
-  double area;
-  int n;
-
-  temp = grid;
-  n = 0;
-  while( temp ) {
-    x[n] = temp->x;
-    y[n] = temp->y;
-    z[n] = temp->z;
-    n++;
-    temp = temp->Next;
-  }
-
-  area = great_circle_area(n, x, y, z);
-
-  return area;
-
-}
-
-int isIntersect(struct Node node) {
-
-  return node.intersect;
-
-}
-
-
-int getInbound( struct Node node )
-{
-  return node.inbound;
-}
-
-struct Node *getLast(struct Node *list)
-{
-  struct Node *temp1;
-
-  temp1 = list;
-  if( temp1 ) {
-    while( temp1->Next ) {
-      temp1 = temp1->Next;
-    }
-  }
-
-  return temp1;
-}
-
-
-int getFirstInbound( struct Node *list, struct Node *nodeOut)
-{
-  struct Node *temp=NULL;
-
-  temp=list;
-
-  while(temp) {
-    if( temp->inbound == 2 ) {
-      copyNode(nodeOut, *temp);
-      return 1;
-    }
-    temp=temp->Next;
-  }
-
-  return 0;
-}
-
-void getCoordinate(struct Node node, double *x, double *y, double *z)
-{
-
-
-  *x = node.x;
-  *y = node.y;
-  *z = node.z;
-
-}
-
-void getCoordinates(struct Node *node, double *p)
-{
-
-
-  p[0] = node->x;
-  p[1] = node->y;
-  p[2] = node->z;
-
-}
-
-void setCoordinate(struct Node *node, double x, double y, double z)
-{
-
-
-  node->x = x;
-  node->y = y;
-  node->z = z;
-
-}
-
-/* set inbound value for the points in interList that has inbound =0,
-   this will also set some inbound value of the points in list1
-*/
-
-void setInbound(struct Node *interList, struct Node *list)
-{
-
-  struct Node *temp1=NULL, *temp=NULL;
-  struct Node *temp1_prev=NULL, *temp1_next=NULL;
-  int prev_is_inside, next_is_inside;
-
-  /* for each point in interList, search through list to decide the inbound value the interList point */
-  /* For each inbound point, the prev node should be outside and the next is inside. */
-  if(length(interList) == 0) return;
-
-  temp = interList;
-
-  while(temp) {
-    if( !temp->inbound) {
-      /* search in grid1 to find the prev and next point of temp, when prev point is outside and next point is inside
-         inbound = 2, else inbound = 1*/
-      temp1 = list;
-      temp1_prev = NULL;
-      temp1_next = NULL;
-      while(temp1) {
-        if(sameNode(*temp1, *temp)) {
-          if(!temp1_prev) temp1_prev = getLast(list);
-          temp1_next = temp1->Next;
-          if(!temp1_next) temp1_next = list;
-          break;
-        }
-        temp1_prev = temp1;
-        temp1 = temp1->Next;
-      }
-      if(!temp1_next) error_handler("Error from create_xgrid.c: temp is not in list1");
-      if( temp1_prev->isInside == 0 && temp1_next->isInside == 1)
-        temp->inbound = 2;   /* go inside */
-      else
-        temp->inbound = 1;
-    }
-    temp=temp->Next;
-  }
-}
-
-int isInside(struct Node *node) {
-
-  if(node->isInside == -1) error_handler("Error from mosaic_util.c: node->isInside is not set");
-  return(node->isInside);
-
-}
-
-/*  #define debug_test_create_xgrid */
-
-/* check if node is inside polygon list or not */
-int insidePolygon( struct Node *node, struct Node *list)
-{
-  int is_inside;
-  double pnt0[3], pnt1[3], pnt2[3];
-  double anglesum;
-  struct Node *p1=NULL, *p2=NULL;
-
-  anglesum = 0;
-
-  pnt0[0] = node->x;
-  pnt0[1] = node->y;
-  pnt0[2] = node->z;
-
-  p1 = list;
-  p2 = list->Next;
-  is_inside = 0;
-
-
-  while(p1) {
-    pnt1[0] = p1->x;
-    pnt1[1] = p1->y;
-    pnt1[2] = p1->z;
-    pnt2[0] = p2->x;
-    pnt2[1] = p2->y;
-    pnt2[2] = p2->z;
-    if( samePoint(pnt0[0], pnt0[1], pnt0[2], pnt1[0], pnt1[1], pnt1[2]) ){
-      return 1;
-    }
-    anglesum += spherical_angle(pnt0, pnt2, pnt1);
-    p1 = p1->Next;
-    p2 = p2->Next;
-    if(p2==NULL){
-      p2 = list;
-    }
-  }
-
-  if( fabs(anglesum - 2*M_PI) < EPSLN8 ){
-    is_inside = 1;
-  }
-  else{
-    is_inside = 0;
-  }
-
-#ifdef debug_test_create_xgrid
-  printf("anglesum-2PI is %19.15f, is_inside = %d\n", anglesum- 2*M_PI, is_inside);
-#endif
-
-  return is_inside;
-
-}
-
-int inside_a_polygon(double *lon1, double *lat1, int *npts, double *lon2, double *lat2)
-{
-
-  double x2[20], y2[20], z2[20];
-  double x1, y1, z1;
-  double min_x2, max_x2, min_y2, max_y2, min_z2, max_z2;
-  int isinside, i;
-
-  struct Node *grid1=NULL, *grid2=NULL;
-
-  /* first convert to cartesian grid */
-  latlon2xyz(*npts, lon2, lat2, x2, y2, z2);
-  latlon2xyz(1, lon1, lat1, &x1, &y1, &z1);
-
-  max_x2 = maxval_double(*npts, x2);
-  if(x1 >= max_x2+RANGE_CHECK_CRITERIA) return 0;
-  min_x2 = minval_double(*npts, x2);
-  if(min_x2 >= x1+RANGE_CHECK_CRITERIA) return 0;
-
-  max_y2 = maxval_double(*npts, y2);
-  if(y1 >= max_y2+RANGE_CHECK_CRITERIA) return 0;
-  min_y2 = minval_double(*npts, y2);
-  if(min_y2 >= y1+RANGE_CHECK_CRITERIA) return 0;
-
-  max_z2 = maxval_double(*npts, z2);
-  if(z1 >= max_z2+RANGE_CHECK_CRITERIA) return 0;
-  min_z2 = minval_double(*npts, z2);
-  if(min_z2 >= z1+RANGE_CHECK_CRITERIA) return 0;
-
-
-  /* add x2,y2,z2 to a Node */
-  rewindList();
-  grid1 = getNext();
-  grid2 = getNext();
-
-  addEnd(grid1, x1, y1, z1, 0, 0, 0, -1);
-  for(i=0; i<*npts; i++) addEnd(grid2, x2[i], y2[i], z2[i], 0, 0, 0, -1);
-
-  isinside = insidePolygon(grid1, grid2);
-
-  return isinside;
-
-}
-
-int inside_a_polygon_(double *lon1, double *lat1, int *npts, double *lon2, double *lat2)
-{
-
-  int isinside;
-
-  isinside = inside_a_polygon(lon1, lat1, npts, lon2, lat2);
-
-  return isinside;
-
-}
diff --git a/mosaic/mosaic_util.h b/mosaic/mosaic_util.h
deleted file mode 100644
index c12eb08d03..0000000000
--- a/mosaic/mosaic_util.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/***********************************************************************
- *                   GNU Lesser General Public License
- *
- * This file is part of the GFDL Flexible Modeling System (FMS).
- *
- * FMS is free software: you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * FMS is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
- **********************************************************************/
-/***********************************************************************
-                      mosaic_util.h
-    This header file provide some utilities routine that will be used in many tools.
-
-    contact: Zhi.Liang@noaa.gov
-***********************************************************************/
-#ifndef MOSAIC_UTIL_H_
-#define MOSAIC_UTIL_H_
-
-#ifndef RANGE_CHECK_CRITERIA
-#define RANGE_CHECK_CRITERIA 0.05
-#endif
-
-#define min(a,b) (a<b ? a:b)
-#define max(a,b) (a>b ? a:b)
-#define SMALL_VALUE ( 1.e-10 )
-
-struct Node{
-  double x, y, z, u, u_clip;
-  int intersect; /* indicate if this point is an intersection, 0 = no, 1= yes, 2=both intersect and vertices */
-  int inbound;      /* -1 uninitialized, 0 coincident, 1 outbound, 2 inbound */
-  int initialized; /* = 0 means empty list */
-  int isInside;   /* = 1 means one point is inside the other polygon, 0 is not, -1 undecided. */
-  int subj_index; /* the index of subject point that an intersection follow. */
-  int clip_index; /* the index of clip point that an intersection follow */
-  struct Node *Next;
-};
-
-
-void error_handler(const char *msg);
-
-int nearest_index(double value, const double *array, int ia);
-
-int lon_fix(double *x, double *y, int n_in, double tlon);
-
-double minval_double(int size, const double *data);
-
-double maxval_double(int size, const double *data);
-
-double avgval_double(int size, const double *data);
-
-void latlon2xyz(int size, const double *lon, const double *lat, double *x, double *y, double *z);
-
-void xyz2latlon(int size, const double *x, const double *y, const double *z, double *lon, double *lat);
-
-double box_area(double ll_lon, double ll_lat, double ur_lon, double ur_lat);
-
-double poly_area(const double lon[], const double lat[], int n);
-
-int delete_vtx(double x[], double y[], int n, int n_del);
-
-int insert_vtx(double x[], double y[], int n, int n_ins, double lon_in, double lat_in);
-
-double poly_area_dimensionless(const double lon[], const double lat[], int n);
-
-double poly_area_no_adjust(const double x[], const double y[], int n);
-
-int fix_lon(double lon[], double lat[], int n, double tlon);
-
-void tokenize(const char * const string, const char *tokens, unsigned int varlen,
-              unsigned int maxvar, char * pstring, unsigned int * const nstr);
-
-double great_circle_distance(double *p1, double *p2);
-
-double spherical_excess_area(const double* p_ll, const double* p_ul,
-                             const double* p_lr, const double* p_ur, double radius);
-
-void vect_cross(const double *p1, const double *p2, double *e );
-
-double spherical_angle(const double *v1, const double *v2, const double *v3);
-
-void normalize_vect(double *e);
-
-void unit_vect_latlon(int size, const double *lon, const double *lat, double *vlon, double *vlat);
-
-double great_circle_area(int n, const double *x, const double *y, const double *z);
-
-double * cross(const double *p1, const double *p2);
-
-double dot(const double *p1, const double *p2);
-
-int intersect_tri_with_line(const double *plane, const double *l1, const double *l2, double *p,
-                            double *t);
-
-int invert_matrix_3x3(long double m[], long double m_inv[]);
-
-void mult(long double m[], long double v[], long double out_v[]);
-
-double metric(const double *p);
-
-int insidePolygon(struct Node *node, struct Node *list );
-
-int inside_a_polygon( double *lon1, double *lat1, int *npts, double *lon2, double *lat2);
-
-void rewindList(void);
-
-struct Node *getNext();
-
-void initNode(struct Node *node);
-
-void addEnd(struct Node *list, double x, double y, double z, int intersect, double u, int inbound, int inside);
-
-int addIntersect(struct Node *list, double x, double y, double z, int intersect, double u1, double u2,
-                 int inbound, int is1, int ie1, int is2, int ie2);
-
-int length(struct Node *list);
-
-int samePoint(double x1, double y1, double z1, double x2, double y2, double z2);
-
-int sameNode(struct Node node1, struct Node node2);
-
-void addNode(struct Node *list, struct Node nodeIn);
-
-struct Node *getNode(struct Node *list, struct Node inNode);
-
-struct Node *getNextNode(struct Node *list);
-
-void copyNode(struct Node *node_out, struct Node node_in);
-
-void printNode(struct Node *list, char *str);
-
-int intersectInList(struct Node *list, double x, double y, double z);
-
-void insertIntersect(struct Node *list, double x, double y, double z, double u1, double u2, int inbound,
-                     double x2, double y2, double z2);
-
-void insertAfter(struct Node *list, double x, double y, double z, int intersect, double u, int inbound,
-                 double x2, double y2, double z2);
-
-double gridArea(struct Node *grid);
-
-int isIntersect(struct Node node);
-
-int getInbound( struct Node node );
-
-struct Node *getLast(struct Node *list);
-
-int getFirstInbound( struct Node *list, struct Node *nodeOut);
-
-void getCoordinate(struct Node node, double *x, double *y, double *z);
-
-void getCoordinates(struct Node *node, double *p);
-
-void setCoordinate(struct Node *node, double x, double y, double z);
-
-void setInbound(struct Node *interList, struct Node *list);
-
-int isInside(struct Node *node);
-
-int inside_a_polygon_(double *lon1, double *lat1, int *npts, double *lon2, double *lat2);
-
-#endif
diff --git a/mosaic/read_mosaic.c b/mosaic/read_mosaic.c
deleted file mode 100644
index a2f1172b6b..0000000000
--- a/mosaic/read_mosaic.c
+++ /dev/null
@@ -1,838 +0,0 @@
-/***********************************************************************
- *                   GNU Lesser General Public License
- *
- * This file is part of the GFDL Flexible Modeling System (FMS).
- *
- * FMS is free software: you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * FMS is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
- **********************************************************************/
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <string.h>
-#include "read_mosaic.h"
-#include "constant.h"
-#include "mosaic_util.h"
-#ifdef use_netCDF
-#include <netcdf.h>
-#endif
-
-/** \file
- *  \ingroup mosaic
- *  \brief Support for reading mosaic netcdf grid files.
- */
-
-/*********************************************************************
-    void netcdf_error( int status )
-    status is the returning value of netcdf call. this routine will
-    handle the error when status is not NC_NOERR.
-********************************************************************/
-void handle_netcdf_error(const char *msg, int status )
-{
-  char errmsg[512];
-
-  sprintf( errmsg, "%s: %d", msg, status );
-  error_handler(errmsg);
-
-} /* handle_netcdf_error */
-
-/***************************************************************************
-  void get_file_dir(const char *file, char *dir)
-  get the directory where file is located. The dir will be the complate path
-  before the last "/". If no "/" exist in file, the path will be current ".".
-***************************************************************************/
-void get_file_dir(const char *file, char *dir)
-{
-  int len;
-  const char *strptr = NULL;
-
-  /* get the diretory */
-
-  strptr = strrchr(file, '/');
-  if(strptr) {
-    len = strptr - file;
-    strncpy(dir, file, len);
-  }
-  else {
-    len = 1;
-    strcpy(dir, ".");
-  }
-  dir[len] = 0;
-
-} /* get_file_dir */
-
-
-int field_exist(const char* file, const char *name)
-{
-  int ncid, varid, status;
-  char msg[512];
-  int existed=0;
-
-#ifdef use_netCDF
-
-  status = nc_open(file, NC_NOWRITE, &ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "field_exist: in opening file %s", file);
-    handle_netcdf_error(msg, status);
-  }
-
-  status = nc_inq_varid(ncid, name, &varid);
-  if(status == NC_NOERR){
-    existed = 1;
-  }
-
-  status = nc_close(ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "field_exist: in closing file %s.", file);
-    handle_netcdf_error(msg, status);
-  }
-
-#else  /* ndef use_netCDF */
-  error_handler("read_mosaic: Add flag -Duse_netCDF when compiling");
-#endif  /* use_netcdf */
-
-  return existed;
-
-} /* field_exist */
-
-int get_dimlen(const char* file, const char *name)
-{
-  int ncid, dimid, status, len;
-  size_t size;
-  char msg[512];
-
-  len = 0;
-#ifdef use_netCDF
-  status = nc_open(file, NC_NOWRITE, &ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in opening file %s", file);
-    handle_netcdf_error(msg, status);
-  }
-
-  status = nc_inq_dimid(ncid, name, &dimid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in getting dimid of %s from file %s.", name, file);
-    handle_netcdf_error(msg, status);
-  }
-
-  status = nc_inq_dimlen(ncid, dimid, &size);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in getting dimension size of %s from file %s.", name, file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_close(ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in closing file %s.", file);
-    handle_netcdf_error(msg, status);
-  }
-
-  len = size;
-  if(status != NC_NOERR) {
-    sprintf(msg, "in closing file %s", file);
-    handle_netcdf_error(msg, status);
-  }
-#else
-  error_handler("read_mosaic: Add flag -Duse_netCDF when compiling");
-#endif
-
-  return len;
-
-} /* get_dimlen */
-
-/*******************************************************************************
-   void get_string_data(const char *file, const char *name, char *data)
-   get string data of field with "name" from "file".
-******************************************************************************/
-void get_string_data(const char *file, const char *name, char *data)
-{
-  int ncid, varid, status;
-  char msg[512];
-
-#ifdef use_netCDF
-  status = nc_open(file, NC_NOWRITE, &ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in opening file %s", file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_inq_varid(ncid, name, &varid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in getting varid of %s from file %s.", name, file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_get_var_text(ncid, varid, data);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in getting data of %s from file %s.", name, file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_close(ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in closing file %s.", file);
-    handle_netcdf_error(msg, status);
-  }
-#else
-  error_handler("read_mosaic: Add flag -Duse_netCDF when compiling");
-#endif
-
-} /* get_string_data */
-
-/*******************************************************************************
-   void get_string_data_level(const char *file, const char *name, const size_t *start, const size_t *nread, char *data)
-   get string data of field with "name" from "file".
-******************************************************************************/
-void get_string_data_level(const char *file, const char *name, char *data, const unsigned int *level)
-{
-  int ncid, varid, status, i;
-  size_t start[4], nread[4];
-  char msg[512];
-
-#ifdef use_netCDF
-  status = nc_open(file, NC_NOWRITE, &ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in opening file %s", file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_inq_varid(ncid, name, &varid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in getting varid of %s from file %s.", name, file);
-    handle_netcdf_error(msg, status);
-  }
-  for(i=0; i<4; i++) {
-    start[i] = 0; nread[i] = 1;
-  }
-  start[0] = *level; nread[1] = STRING;
-  status = nc_get_vara_text(ncid, varid, start, nread, data);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in getting data of %s from file %s.", name, file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_close(ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in closing file %s.", file);
-    handle_netcdf_error(msg, status);
-  }
-#else
-  error_handler("read_mosaic: Add flag -Duse_netCDF when compiling");
-#endif
-
-} /* get_string_data_level */
-
-
-/*******************************************************************************
-   void get_var_data(const char *file, const char *name, double *data)
-   get var data of field with "name" from "file".
-******************************************************************************/
-void get_var_data(const char *file, const char *name, void *data)
-{
-
-  int ncid, varid, status;
-  nc_type vartype;
-  char msg[512];
-
-#ifdef use_netCDF
-  status = nc_open(file, NC_NOWRITE, &ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in opening file %s", file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_inq_varid(ncid, name, &varid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in getting varid of %s from file %s.", name, file);
-    handle_netcdf_error(msg, status);
-  }
-
-  status = nc_inq_vartype(ncid, varid, &vartype);
-  if(status != NC_NOERR) {
-    sprintf(msg, "get_var_data: in getting vartype of of %s in file %s ", name, file);
-    handle_netcdf_error(msg, status);
-  }
-
-  switch (vartype) {
-    case NC_DOUBLE:case NC_FLOAT:
-#ifdef OVERLOAD_R4
-      status = nc_get_var_float(ncid, varid, (float *)data);
-#else
-      status = nc_get_var_double(ncid, varid, (double *)data);
-#endif
-      break;
-    case NC_INT:
-      status = nc_get_var_int(ncid, varid, (int *)data);
-      break;
-    default:
-      sprintf(msg, "get_var_data: field %s in file %s has an invalid type, "
-              "the type should be NC_DOUBLE, NC_FLOAT or NC_INT", name, file);
-      error_handler(msg);
-  }
-  if(status != NC_NOERR) {
-    sprintf(msg, "in getting data of %s from file %s.", name, file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_close(ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in closing file %s.", file);
-    handle_netcdf_error(msg, status);
-  }
-#else
-  error_handler("read_mosaic: Add flag -Duse_netCDF when compiling");
-#endif
-
-} /* get_var_data */
-
-/*******************************************************************************
-   void get_var_data(const char *file, const char *name, double *data)
-   get var data of field with "name" from "file".
-******************************************************************************/
-void get_var_data_region(const char *file, const char *name, const size_t *start, const size_t *nread, void *data)
-{
-
-  int ncid, varid, status;
-  nc_type vartype;
-  char msg[512];
-
-#ifdef use_netCDF
-  status = nc_open(file, NC_NOWRITE, &ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "get_var_data_region: in opening file %s", file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_inq_varid(ncid, name, &varid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in getting varid of %s from file %s.", name, file);
-    handle_netcdf_error(msg, status);
-  }
-
-  status = nc_inq_vartype(ncid, varid, &vartype);
-  if(status != NC_NOERR) {
-    sprintf(msg, "get_var_data_region: in getting vartype of of %s in file %s ", name, file);
-    handle_netcdf_error(msg, status);
-  }
-
-  switch (vartype) {
-    case NC_DOUBLE:case NC_FLOAT:
-#ifdef OVERLOAD_R4
-      status = nc_get_vara_float(ncid, varid, start, nread, (float *)data);
-#else
-      status = nc_get_vara_double(ncid, varid, start, nread, (double *)data);
-#endif
-      break;
-    case NC_INT:
-      status = nc_get_vara_int(ncid, varid, start, nread, (int *)data);
-      break;
-    default:
-      sprintf(msg, "get_var_data_region: field %s in file %s has an invalid type, "
-              "the type should be NC_DOUBLE, NC_FLOAT or NC_INT", name, file);
-      error_handler(msg);
-  }
-
-  if(status != NC_NOERR) {
-    sprintf(msg, "get_var_data_region: in getting data of %s from file %s.", name, file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_close(ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "get_var_data_region: in closing file %s.", file);
-    handle_netcdf_error(msg, status);
-  }
-#else
-  error_handler("read_mosaic: Add flag -Duse_netCDF when compiling");
-#endif
-
-} /* get_var_data_region */
-
-/******************************************************************************
-   void get_var_text_att(const char *file, const char *name, const char *attname, char *att)
-   get text attribute of field 'name' from 'file
-******************************************************************************/
-void get_var_text_att(const char *file, const char *name, const char *attname, char *att)
-{
-  int ncid, varid, status;
-  char msg[512];
-
-#ifdef use_netCDF
-  status = nc_open(file, NC_NOWRITE, &ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in opening file %s", file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_inq_varid(ncid, name, &varid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in getting varid of %s from file %s.", name, file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_get_att_text(ncid, varid, attname, att);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in getting attribute %s of %s from file %s.", attname, name, file);
-    handle_netcdf_error(msg, status);
-  }
-  status = nc_close(ncid);
-  if(status != NC_NOERR) {
-    sprintf(msg, "in closing file %s.", file);
-    handle_netcdf_error(msg, status);
-  }
-#else
-  error_handler("read_mosaic: Add flag -Duse_netCDF when compiling");
-#endif
-
-} /* get_var_text_att */
-
-/***********************************************************************
-  return number of overlapping cells.
-***********************************************************************/
-int read_mosaic_xgrid_size_( const char *xgrid_file )
-{
-  return read_mosaic_xgrid_size(xgrid_file);
-}
-
-int read_mosaic_xgrid_size( const char *xgrid_file )
-{
-  int ncells;
-
-  ncells = get_dimlen(xgrid_file, "ncells");
-  return ncells;
-}
-
-#ifdef OVERLOAD_R4
-float get_global_area(void)
-{
-  float garea;
-#else
-  double get_global_area(void)
-  {
-    double garea;
-#endif
-    garea = 4*M_PI*RADIUS*RADIUS;
-
-    return garea;
-  }
-
-#ifdef OVERLOAD_R4
-  float get_global_area_(void)
-  {
-    float garea;
-#else
-    double get_global_area_(void)
-    {
-      double garea;
-#endif
-      garea = 4*M_PI*RADIUS*RADIUS;
-
-      return garea;
-    }
-
-
-    /****************************************************************************/
-#ifdef OVERLOAD_R4
-    void read_mosaic_xgrid_order1_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, float *area )
-#else
-        void read_mosaic_xgrid_order1_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, double *area )
-#endif
-    {
-      read_mosaic_xgrid_order1(xgrid_file, i1, j1, i2, j2, area);
-
-    }
-
-#ifdef OVERLOAD_R4
-    void read_mosaic_xgrid_order1(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, float *area )
-#else
-        void read_mosaic_xgrid_order1(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, double *area )
-#endif
-    {
-      int    ncells, n;
-      int    *tile1_cell, *tile2_cell;
-#ifdef OVERLOAD_R4
-      float garea;
-#else
-      double garea;
-#endif
-
-      ncells = get_dimlen(xgrid_file, "ncells");
-
-      tile1_cell       = (int *)malloc(ncells*2*sizeof(int));
-      tile2_cell       = (int *)malloc(ncells*2*sizeof(int));
-      get_var_data(xgrid_file, "tile1_cell", tile1_cell);
-      get_var_data(xgrid_file, "tile2_cell", tile2_cell);
-
-      get_var_data(xgrid_file, "xgrid_area", area);
-
-      garea = 4*M_PI*RADIUS*RADIUS;
-
-      for(n=0; n<ncells; n++) {
-        i1[n] = tile1_cell[n*2] - 1;
-        j1[n] = tile1_cell[n*2+1] - 1;
-        i2[n] = tile2_cell[n*2] - 1;
-        j2[n] = tile2_cell[n*2+1] - 1;
-        area[n] /= garea; /* rescale the exchange grid area to unit earth area */
-      }
-
-      free(tile1_cell);
-      free(tile2_cell);
-
-    } /* read_mosaic_xgrid_order1 */
-
-
-#ifdef OVERLOAD_R4
-    void read_mosaic_xgrid_order1_region_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, float *area, int *isc, int *iec )
-#else
-        void read_mosaic_xgrid_order1_region_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, double *area, int *isc, int *iec )
-#endif
-    {
-      read_mosaic_xgrid_order1_region(xgrid_file, i1, j1, i2, j2, area, isc, iec);
-
-    }
-
-#ifdef OVERLOAD_R4
-    void read_mosaic_xgrid_order1_region(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, float *area, int *isc, int *iec )
-#else
-        void read_mosaic_xgrid_order1_region(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, double *area, int *isc, int *iec )
-#endif
-    {
-      int    ncells, n, i;
-      int    *tile1_cell, *tile2_cell;
-      size_t start[4], nread[4];
-#ifdef OVERLOAD_R4
-      float garea;
-#else
-      double garea;
-#endif
-
-      ncells = *iec-*isc+1;
-
-      tile1_cell       = (int *)malloc(ncells*2*sizeof(int));
-      tile2_cell       = (int *)malloc(ncells*2*sizeof(int));
-      for(i=0; i<4; i++) {
-        start[i] = 0; nread[i] = 1;
-      }
-
-      start[0] = *isc;
-      nread[0] = ncells;
-      nread[1] = 2;
-
-      get_var_data_region(xgrid_file, "tile1_cell", start, nread, tile1_cell);
-      get_var_data_region(xgrid_file, "tile2_cell", start, nread, tile2_cell);
-
-      nread[1] = 1;
-
-      get_var_data_region(xgrid_file, "xgrid_area", start, nread, area);
-
-      garea = 4*M_PI*RADIUS*RADIUS;
-
-      for(n=0; n<ncells; n++) {
-        i1[n] = tile1_cell[n*2] - 1;
-        j1[n] = tile1_cell[n*2+1] - 1;
-        i2[n] = tile2_cell[n*2] - 1;
-        j2[n] = tile2_cell[n*2+1] - 1;
-        area[n] /= garea; /* rescale the exchange grid area to unit earth area */
-      }
-
-      free(tile1_cell);
-      free(tile2_cell);
-
-    } /* read_mosaic_xgrid_order1 */
-
-    /* NOTE: di, dj is for tile1, */
-    /****************************************************************************/
-#ifdef OVERLOAD_R4
-    void read_mosaic_xgrid_order2_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, float *area, float *di, float *dj )
-#else
-        void read_mosaic_xgrid_order2_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, double *area, double *di, double *dj )
-#endif
-    {
-      read_mosaic_xgrid_order2(xgrid_file, i1, j1, i2, j2, area, di, dj);
-
-    }
-#ifdef OVERLOAD_R4
-    void read_mosaic_xgrid_order2(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, float *area, float *di, float *dj )
-#else
-        void read_mosaic_xgrid_order2(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, double *area, double *di, double *dj )
-#endif
-
-    {
-      int    ncells, n;
-      int    *tile1_cell, *tile2_cell;
-      double *tile1_distance;
-#ifdef OVERLOAD_R4
-      float garea;
-#else
-      double garea;
-#endif
-      ncells = get_dimlen(xgrid_file, "ncells");
-
-      tile1_cell       = (int    *)malloc(ncells*2*sizeof(int   ));
-      tile2_cell       = (int    *)malloc(ncells*2*sizeof(int   ));
-      tile1_distance   = (double *)malloc(ncells*2*sizeof(double));
-      get_var_data(xgrid_file, "tile1_cell", tile1_cell);
-      get_var_data(xgrid_file, "tile2_cell", tile2_cell);
-      get_var_data(xgrid_file, "xgrid_area", area);
-      get_var_data(xgrid_file, "tile1_distance", tile1_distance);
-
-      garea = 4*M_PI*RADIUS*RADIUS;
-
-      for(n=0; n<ncells; n++) {
-        i1[n] = tile1_cell[n*2] - 1;
-        j1[n] = tile1_cell[n*2+1] - 1;
-        i2[n] = tile2_cell[n*2] - 1;
-        j2[n] = tile2_cell[n*2+1] - 1;
-        di[n] = tile1_distance[n*2];
-        dj[n] = tile1_distance[n*2+1];
-        area[n] /= garea; /* rescale the exchange grid area to unit earth area */
-      }
-
-      free(tile1_cell);
-      free(tile2_cell);
-      free(tile1_distance);
-
-    } /* read_mosaic_xgrid_order2 */
-
-    /******************************************************************************
-  int read_mosaic_ntiles(const char *mosaic_file)
-  return number tiles in mosaic_file
-    ******************************************************************************/
-    int read_mosaic_ntiles_(const char *mosaic_file)
-    {
-      return read_mosaic_ntiles(mosaic_file);
-    }
-    int read_mosaic_ntiles(const char *mosaic_file)
-    {
-
-      int ntiles;
-
-      ntiles = get_dimlen(mosaic_file, "ntiles");
-
-      return ntiles;
-
-    } /* read_mosaic_ntiles */
-
-    /******************************************************************************
-  int read_mosaic_ncontacts(const char *mosaic_file)
-  return number of contacts in mosaic_file
-    ******************************************************************************/
-    int read_mosaic_ncontacts_(const char *mosaic_file)
-    {
-      return read_mosaic_ncontacts(mosaic_file);
-    }
-    int read_mosaic_ncontacts(const char *mosaic_file)
-    {
-
-      int ncontacts;
-
-      if(field_exist(mosaic_file, "contacts") )
-        ncontacts = get_dimlen(mosaic_file, "ncontact");
-      else
-        ncontacts = 0;
-
-      return ncontacts;
-
-    } /* read_mosaic_ncontacts */
-
-
-    /*****************************************************************************
-  void read_mosaic_grid_sizes(const char *mosaic_file, int *nx, int *ny)
-  read mosaic grid size of each tile, currently we are assuming the refinement is 2.
-  We assume the grid files are located at the same directory as mosaic_file.
-    *****************************************************************************/
-    void read_mosaic_grid_sizes_(const char *mosaic_file, int *nx, int *ny)
-    {
-      read_mosaic_grid_sizes(mosaic_file, nx, ny);
-    }
-    void read_mosaic_grid_sizes(const char *mosaic_file, int *nx, int *ny)
-    {
-      unsigned int ntiles, n;
-      char gridfile[STRING], tilefile[2*STRING];
-      char dir[STRING];
-      const int x_refine = 2, y_refine = 2;
-
-      get_file_dir(mosaic_file, dir);
-      ntiles = get_dimlen(mosaic_file, "ntiles");
-      for(n = 0; n < ntiles; n++) {
-        get_string_data_level(mosaic_file, "gridfiles", gridfile, &n);
-        sprintf(tilefile, "%s/%s", dir, gridfile);
-        nx[n] = get_dimlen(tilefile, "nx");
-        ny[n] = get_dimlen(tilefile, "ny");
-        if(nx[n]%x_refine != 0) error_handler("Error from read_mosaic_grid_sizes: nx is not divided by x_refine");
-        if(ny[n]%y_refine != 0) error_handler("Error from read_mosaic_grid_sizes: ny is not divided by y_refine");
-        nx[n] /= x_refine;
-        ny[n] /= y_refine;
-      }
-
-    } /* read_mosaic_grid_sizes */
-
-
-    /******************************************************************************
-  void read_mosaic_contact(const char *mosaic_file)
-  read mosaic contact information
-    ******************************************************************************/
-    void read_mosaic_contact_(const char *mosaic_file, int *tile1, int *tile2, int *istart1, int *iend1,
-                              int *jstart1, int *jend1, int *istart2, int *iend2, int *jstart2, int *jend2)
-    {
-      read_mosaic_contact(mosaic_file, tile1, tile2, istart1, iend1, jstart1, jend1, istart2, iend2, jstart2, jend2);
-    }
-
-    /* transfer the index from supergrid to model grid
-       return 0 if istart = iend
-       otherwise return 1.
-    */
-
-    int transfer_to_model_index(int istart_in, int iend_in, int *istart_out, int *iend_out, int refine_ratio)
-    {
-
-      int type;
-
-      if( istart_in == iend_in ) {
-        type = 0;
-        istart_out[0] = (istart_in+1)/refine_ratio-1;
-        iend_out[0]  = istart_out[0];
-      }
-      else {
-        type = 1;
-        if( iend_in > istart_in ) {
-          istart_out[0] = istart_in - 1;
-          iend_out[0]   = iend_in - refine_ratio;
-        }
-        else {
-          istart_out[0] = istart_in - refine_ratio;
-          iend_out[0]   = iend_in - 1;
-        }
-
-        if( istart_out[0]%refine_ratio || iend_out[0]%refine_ratio)
-          error_handler("Error from read_mosaic: mismatch between refine_ratio and istart_in/iend_in");
-        istart_out[0] /= refine_ratio;
-        iend_out[0]   /= refine_ratio;
-      }
-
-      return type;
-
-    }
-
-
-    void read_mosaic_contact(const char *mosaic_file, int *tile1, int *tile2, int *istart1, int *iend1,
-                             int *jstart1, int *jend1, int *istart2, int *iend2, int *jstart2, int *jend2)
-    {
-      char contacts[STRING];
-      char **gridtiles;
-#define MAXVAR 40
-      char pstring[MAXVAR][STRING];
-      unsigned int nstr, ntiles, ncontacts, n, m, l, found;
-      const int x_refine = 2, y_refine = 2;
-      int i1_type, j1_type, i2_type, j2_type;
-
-      ntiles = get_dimlen(mosaic_file, "ntiles");
-      gridtiles = (char **)malloc(ntiles*sizeof(char *));
-      for(n=0; n<ntiles; n++) {
-        gridtiles[n] = (char *)malloc(STRING*sizeof(char));
-        get_string_data_level(mosaic_file, "gridtiles", gridtiles[n], &n);
-      }
-
-      ncontacts = get_dimlen(mosaic_file, "ncontact");
-      for(n = 0; n < ncontacts; n++) {
-        get_string_data_level(mosaic_file, "contacts", contacts, &n);
-        /* parse the string contacts to get tile number */
-        tokenize( contacts, ":", STRING, MAXVAR, (char *)pstring, &nstr);
-        if(nstr != 4) error_handler("Error from read_mosaic: number of elements "
-                                    "in contact seperated by :/:: should be 4");
-        found = 0;
-        for(m=0; m<ntiles; m++) {
-          if(strcmp(gridtiles[m], pstring[1]) == 0) { /*found the tile name */
-            found = 1;
-            tile1[n] = m+1;
-            break;
-          }
-        }
-        if(!found) error_handler("error from read_mosaic: the first tile name specified "
-                                 "in contact is not found in tile list");
-        found = 0;
-        for(m=0; m<ntiles; m++) {
-          if(strcmp(gridtiles[m], pstring[3]) == 0) { /*found the tile name */
-            found = 1;
-            tile2[n] = m+1;
-            break;
-          }
-        }
-        if(!found) error_handler("error from read_mosaic: the second tile name specified "
-                                 "in contact is not found in tile list");
-        get_string_data_level(mosaic_file, "contact_index", contacts, &n);
-        /* parse the string to get contact index */
-        tokenize( contacts, ":,", STRING, MAXVAR, (char *)pstring, &nstr);
-        if(nstr != 8) error_handler("Error from read_mosaic: number of elements "
-                                    "in contact_index seperated by :/, should be 8");
-        /* make sure the string is only composed of numbers */
-        for(m=0; m<nstr; m++){
-          for(l=0; l<strlen(pstring[m]); l++){
-            if(pstring[m][l] > '9' ||  pstring[m][l] < '0' ) {
-              error_handler("Error from read_mosaic: some of the character in "
-                            "contact_indices except token is not digit number");
-            }
-          }
-        }
-        istart1[n] = atoi(pstring[0]);
-        iend1[n]   = atoi(pstring[1]);
-        jstart1[n] = atoi(pstring[2]);
-        jend1[n]   = atoi(pstring[3]);
-        istart2[n] = atoi(pstring[4]);
-        iend2[n]   = atoi(pstring[5]);
-        jstart2[n] = atoi(pstring[6]);
-        jend2[n]   = atoi(pstring[7]);
-        i1_type = transfer_to_model_index(istart1[n], iend1[n], istart1+n, iend1+n, x_refine);
-        j1_type = transfer_to_model_index(jstart1[n], jend1[n], jstart1+n, jend1+n, y_refine);
-        i2_type = transfer_to_model_index(istart2[n], iend2[n], istart2+n, iend2+n, x_refine);
-        j2_type = transfer_to_model_index(jstart2[n], jend2[n], jstart2+n, jend2+n, y_refine);
-        if( i1_type == 0 && j1_type == 0 )
-          error_handler("Error from read_mosaic_contact:istart1==iend1 and jstart1==jend1");
-        if( i2_type == 0 && j2_type == 0 )
-          error_handler("Error from read_mosaic_contact:istart2==iend2 and jstart2==jend2");
-        if( i1_type + j1_type != i2_type + j2_type )
-          error_handler("Error from read_mosaic_contact: It is not a line or overlap contact");
-
-      }
-
-      for(m=0; m<ntiles; m++) {
-        free(gridtiles[m]);
-      }
-
-      free(gridtiles);
-
-
-    } /* read_mosaic_contact */
-
-
-    /******************************************************************************
-  void read_mosaic_grid_data(const char *mosaic_file, const char *name, int nx, int ny,
-                             double *data, int level, int ioff, int joff)
-  read mosaic grid information onto model grid. We assume the refinement is 2 right now.
-  We may remove this restriction in the future. nx and ny are model grid size. level
-  is the tile number. ioff and joff to indicate grid location. ioff =0 and joff = 0
-  for C-cell. ioff=0 and joff=1 for E-cell, ioff=1 and joff=0 for N-cell,
-  ioff=1 and joff=1 for T-cell
-    ******************************************************************************/
-    void read_mosaic_grid_data(const char *mosaic_file, const char *name, int nx, int ny,
-                               double *data, unsigned int level, int ioff, int joff)
-    {
-      char   tilefile[STRING], gridfile[STRING], dir[STRING];
-      double *tmp;
-      int    ni, nj, nxp, nyp, i, j;
-
-      get_file_dir(mosaic_file, dir);
-
-      get_string_data_level(mosaic_file, "gridfiles", gridfile, &level);
-      sprintf(tilefile, "%s/%s", dir, gridfile);
-
-      ni = get_dimlen(tilefile, "nx");
-      nj = get_dimlen(tilefile, "ny");
-
-      if( ni != nx*2 || nj != ny*2) error_handler("supergrid size should be double of the model grid size");
-      tmp = (double *)malloc((ni+1)*(nj+1)*sizeof(double));
-      get_var_data( tilefile, name, tmp);
-      nxp = nx + 1 - ioff;
-      nyp = ny + 1 - joff;
-      for(j=0; j<nyp; j++) for(i=0; i<nxp; i++) data[j*nxp+i] = tmp[(2*j+joff)*(ni+1)+2*i+ioff];
-      free(tmp);
-
-    } /* read_mosaic_grid_data */
diff --git a/mosaic/read_mosaic.h b/mosaic/read_mosaic.h
deleted file mode 100644
index 3612fb7bcb..0000000000
--- a/mosaic/read_mosaic.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/***********************************************************************
- *                   GNU Lesser General Public License
- *
- * This file is part of the GFDL Flexible Modeling System (FMS).
- *
- * FMS is free software: you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * FMS is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
- **********************************************************************/
-#ifndef READ_MOSAIC_H_
-#define READ_MOSAIC_H_
-
-/* netcdf helpers */
-/* perhaps should consider making static, or breaking out into seperate file,
-   some of these names (field_exist) could pollute namespace... */
-
-void handle_netcdf_error(const char *msg, int status );
-
-void get_file_dir(const char *file, char *dir);
-
-int field_exist(const char* file, const char *name);
-
-int get_dimlen(const char* file, const char *name);
-
-void get_string_data_level(const char *file, const char *name, char *data, const unsigned int* level);
-
-void get_var_data(const char *file, const char *name, void *data);
-
-void get_var_data_region(const char *file, const char *name, const size_t *start, const size_t *nread, void *data);
-
-void get_string_data(const char *file, const char *name, char *data);
-
-void get_var_text_att(const char *file, const char *name, const char *attname, char *att);
-/* end netcdf helpers */
-
-int read_mosaic_xgrid_size( const char *xgrid_file );
-
-#ifdef OVERLOAD_R4
-
-void read_mosaic_xgrid_order1(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, float *area );
-
-void read_mosaic_xgrid_order1_region(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, float *area, int *isc, int *iec );
-
-void read_mosaic_xgrid_order2(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2,
-                              float *area, float *di, float *dj );
-
-float get_global_area(void);
-
-#else
-
-void read_mosaic_xgrid_order1(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, double *area );
-
-void read_mosaic_xgrid_order1_region(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, double *area, int *isc, int *iec );
-
-void read_mosaic_xgrid_order2(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2,
-                              double *area, double *di, double *dj );
-
-double get_global_area(void);
-
-#endif
-
-int read_mosaic_ntiles(const char *mosaic_file);
-
-int read_mosaic_ncontacts(const char *mosaic_file);
-
-void read_mosaic_grid_sizes(const char *mosaic_file, int *nx, int *ny);
-
-void read_mosaic_contact(const char *mosaic_file, int *tile1, int *tile2, int *istart1, int *iend1,
-                         int *jstart1, int *jend1, int *istart2, int *iend2, int *jstart2, int *jend2);
-
-int transfer_to_model_index(int istart_in, int iend_in, int *istart_out, int *iend_out, int refine_ratio);
-
-void read_mosaic_grid_data(const char *mosaic_file, const char *name, int nx, int ny,
-                           double *data, unsigned int level, int ioff, int joff);
-
-
-void read_mosaic_contact_(const char *mosaic_file, int *tile1, int *tile2, int *istart1, int *iend1,
-                          int *jstart1, int *jend1, int *istart2, int *iend2, int *jstart2, int *jend2);
-
-int read_mosaic_xgrid_size_( const char *xgrid_file );
-
-int read_mosaic_ntiles_(const char *mosaic_file);
-
-int read_mosaic_ncontacts_(const char *mosaic_file);
-
-void read_mosaic_grid_sizes_(const char *mosaic_file, int *nx, int *ny);
-
-#ifdef OVERLOAD_R4
-
-float get_global_area_(void);
-
-void read_mosaic_xgrid_order1_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, float *area );
-
-void read_mosaic_xgrid_order1_region_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, float *area, int *isc, int *iec );
-
-void read_mosaic_xgrid_order2_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, float *area, float *di, float *dj );
-
-#else
-
-double get_global_area_(void);
-
-void read_mosaic_xgrid_order1_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, double *area );
-
-void read_mosaic_xgrid_order1_region_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, double *area, int *isc, int *iec );
-
-void read_mosaic_xgrid_order2_(const char *xgrid_file, int *i1, int *j1, int *i2, int *j2, double *area, double *di, double *dj );
-
-#endif  /* OVERLOAD_R4 */
-
-#endif
diff --git a/test_fms/mosaic/Makefile.am b/test_fms/mosaic/Makefile.am
deleted file mode 100644
index ff36605e0a..0000000000
--- a/test_fms/mosaic/Makefile.am
+++ /dev/null
@@ -1,48 +0,0 @@
-#***********************************************************************
-#*                   GNU Lesser General Public License
-#*
-#* This file is part of the GFDL Flexible Modeling System (FMS).
-#*
-#* FMS is free software: you can redistribute it and/or modify it under
-#* the terms of the GNU Lesser General Public License as published by
-#* the Free Software Foundation, either version 3 of the License, or (at
-#* your option) any later version.
-#*
-#* FMS is distributed in the hope that it will be useful, but WITHOUT
-#* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-#* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#* for more details.
-#*
-#* You should have received a copy of the GNU Lesser General Public
-#* License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
-#***********************************************************************
-
-# This is an automake file for the test_fms/mosaic directory of the
-# FMS package.
-
-# uramirez, Ed Hartnett
-
-# Find the needed mod and include files.
-AM_CPPFLAGS = -I$(top_srcdir)/include -I$(MODDIR)
-
-# Link to the FMS library.
-LDADD = $(top_builddir)/libFMS/libFMS.la
-
-# Build this test program.
-check_PROGRAMS = test_mosaic
-
-# This is the source code for the test.
-test_mosaic_SOURCES = test_mosaic.F90
-
-# Run the test program.
-TESTS = test_mosaic2.sh
-
-TEST_EXTENSIONS = .sh
-SH_LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) \
-                  $(abs_top_srcdir)/test_fms/tap-driver.sh
-
-# These files are also included in the distribution.
-EXTRA_DIST = test_mosaic2.sh
-
-# Clean up
-CLEANFILES = input.nml *.nc *.out *.dpi *.spi *.dyn *.spl
diff --git a/test_fms/mosaic/test_mosaic.F90 b/test_fms/mosaic/test_mosaic.F90
deleted file mode 100644
index 6c436d27f2..0000000000
--- a/test_fms/mosaic/test_mosaic.F90
+++ /dev/null
@@ -1,145 +0,0 @@
-!***********************************************************************
-!*                   GNU Lesser General Public License
-!*
-!* This file is part of the GFDL Flexible Modeling System (FMS).
-!*
-!* FMS is free software: you can redistribute it and/or modify it under
-!* the terms of the GNU Lesser General Public License as published by
-!* the Free Software Foundation, either version 3 of the License, or (at
-!* your option) any later version.
-!*
-!* FMS is distributed in the hope that it will be useful, but WITHOUT
-!* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-!* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-!* for more details.
-!*
-!* You should have received a copy of the GNU Lesser General Public
-!* License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
-!***********************************************************************
-
-!> @brief  This programs tests calls to get_mosaic_ntiles, get_mosaic_ncontacts,
-!! get_mosaic_grid_sizes, get_mosaic_contact
-program test_mosaic
-
-use mosaic2_mod, only : get_mosaic_ntiles, get_mosaic_ncontacts
-use mosaic2_mod, only : get_mosaic_grid_sizes, get_mosaic_contact
-use mpp_mod,     only : mpp_init, mpp_error, FATAL, mpp_sync, mpp_npes, mpp_get_current_pelist
-use fms2_io_mod, only : open_file, close_file, FmsNetcdfFile_t
-use fms2_io_mod, only : register_axis, register_field, write_data
-use fms_mod,     only : fms_init, fms_end
-
-implicit none
-
-integer              :: ntiles         !< Number of tiles
-integer              :: ncontacts      !< Number of contacts
-integer              :: n              !< For do loops
-integer, allocatable :: tile1(:)       !< tile number for first contact
-integer, allocatable :: tile2(:)       !< tile number of the second contact
-integer, allocatable :: nx(:), ny(:)   !< Number of x/y points for each tile
-integer, allocatable :: istart1(:), iend1(:), jstart1(:), jend1(:) !< Indexes of first contact point
-integer, allocatable :: istart2(:), iend2(:), jstart2(:), jend2(:) !< Indexes of second contact point
-character(len=128)   :: mosaic_file    !< Mosaic filename
-type(FmsNetcdfFile_t):: mosaic_fileobj !< Fileobj for the file read by the test
-integer              :: answers(2, 8)  !< Expected results
-integer, allocatable :: pes(:)         !< List of pes in the current pelist
-
-call mpp_init()
-call fms_init()
-
-mosaic_file = "INPUT/ocean_mosaic.nc"
-answers(1,:) = (/1440, 1440, 1, 1080, 1, 1, 1, 1080 /)
-answers(2,:) = (/1, 720, 1080, 1080, 1440, 721, 1080, 1080 /)
-
-allocate(pes(mpp_npes()))
-call mpp_get_current_pelist(pes)
-
-call create_files(pes)
-
-!< Open the mosaic file
-if(.not. open_file(mosaic_fileobj, mosaic_file, 'read', pelist=pes)) then
-  call mpp_error(FATAL, 'test_mosaic: error in opening file '//trim(mosaic_file))
-endif
-
-ntiles = get_mosaic_ntiles(mosaic_fileobj)
-ncontacts = get_mosaic_ncontacts(mosaic_fileobj)
-allocate(nx(ntiles), ny(ntiles))
-allocate(tile1(ncontacts), tile2(ncontacts) )
-allocate(istart1(ncontacts), iend1(ncontacts), jstart1(ncontacts), jend1(ncontacts) )
-allocate(istart2(ncontacts), iend2(ncontacts), jstart2(ncontacts), jend2(ncontacts) )
-
-call get_mosaic_grid_sizes(mosaic_fileobj, nx, ny )
-call get_mosaic_contact(mosaic_fileobj, tile1, tile2, istart1, iend1, jstart1, jend1, istart2, iend2, jstart2, jend2)
-
-!< Compare with expected results:
-if (ntiles .ne. 1) call mpp_error(FATAL, "ntiles is not equal to 1")
-
-do n = 1, ntiles
-   if (nx(n) .ne. 2880/2)  call mpp_error(FATAL, "nx is not the expected result")
-   if (ny(n) .ne. 2160/2)  call mpp_error(FATAL, "ny is not the expected result")
-end do
-
-if (ncontacts .ne. 2) call mpp_error(FATAL, "ncontacts is not the expected result")
-do n = 1, ncontacts
-   if (istart1(n) .ne. answers(n,1)) call mpp_error(FATAL, "istart1 is not the expected result")
-   if (iend1(n)   .ne. answers(n,2)) call mpp_error(FATAL, "iend1 is not the expected result")
-
-   if (jstart1(n) .ne. answers(n,3)) call mpp_error(FATAL, "jstart1 is not the expected result")
-   if (jend1(n)   .ne. answers(n,4)) call mpp_error(FATAL, "jend1 is not the expected result")
-
-   if (istart2(n) .ne. answers(n,5)) call mpp_error(FATAL, "istart2 is not the expected result")
-   if (iend2(n)   .ne. answers(n,6)) call mpp_error(FATAL, "iend2 is not the expected result")
-
-   if (jstart2(n) .ne. answers(n,7)) call mpp_error(FATAL, "jstart2 is not the expected result")
-   if (jend2(n)   .ne. answers(n,8)) call mpp_error(FATAL, "jend2 is not the expected result")
-end do
-
-deallocate(tile1, tile2, nx, ny)
-deallocate(istart1, iend1, jstart1, jend1)
-deallocate(istart2, iend2, jstart2, jend2)
-
-call close_file(mosaic_fileobj)
-call fms_end()
-
-contains
-
-subroutine create_files(pes)
-   integer, intent(in)  :: pes(:)         !< List of pes
-
-   type(FmsNetcdfFile_t):: fileobj        !< Fileobj for the files written by the test
-   character(len=255)     :: str_array(2)   !< Array of strings because GNU
-
-   if( open_file(fileobj, mosaic_file, 'overwrite', pelist=pes)) then
-      call register_axis(fileobj, "ntiles", 1)
-      call register_axis(fileobj, "ncontact", 2)
-      call register_axis(fileobj, "string", 255)
-
-      str_array(1) = "string"
-      str_array(2) = "ncontact"
-      call register_field(fileobj, "contacts", "char",  dimensions=str_array)
-      call register_field(fileobj, "contact_index", "char",  dimensions=str_array)
-      call register_field(fileobj, "gridfiles", "char", dimensions=(/"string", "ntiles"/))
-      call register_field(fileobj, "gridtiles", "char", dimensions=(/"string", "ntiles"/))
-
-      call write_data(fileobj, "gridfiles", (/"ocean_hgrid.nc"/))
-      call write_data(fileobj, "gridtiles", (/"tile1"/))
-
-      str_array(1) = "2880:2880,1:2160::1:1,1:2160"
-      str_array(2) = "1:1440,2160:2160::2880:1441,2160:2160"
-      call write_data(fileobj, "contact_index", str_array)
-      call write_data(fileobj, "contacts", &
-         & (/"ocean_mosaic:tile1::ocean_mosaic:tile1", "ocean_mosaic:tile1::ocean_mosaic:tile1" /))
-
-      call close_file(fileobj)
-   endif
-   call mpp_sync()
-
-   if( open_file(fileobj, "INPUT/ocean_hgrid.nc", "overwrite", pelist=pes)) then
-      call register_axis(fileobj, "nx", 2880)
-      call register_axis(fileobj, "ny", 2160)
-
-      call close_file(fileobj)
-   endif
-   call mpp_sync()
-end subroutine create_files
-
-end program test_mosaic
diff --git a/test_fms/mosaic/test_mosaic2.sh b/test_fms/mosaic/test_mosaic2.sh
deleted file mode 100755
index f67991a9a3..0000000000
--- a/test_fms/mosaic/test_mosaic2.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/sh
-
-#***********************************************************************
-#*                   GNU Lesser General Public License
-#*
-#* This file is part of the GFDL Flexible Modeling System (FMS).
-#*
-#* FMS is free software: you can redistribute it and/or modify it under
-#* the terms of the GNU Lesser General Public License as published by
-#* the Free Software Foundation, either version 3 of the License, or (at
-#* your option) any later version.
-#*
-#* FMS is distributed in the hope that it will be useful, but WITHOUT
-#* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-#* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#* for more details.
-#*
-#* You should have received a copy of the GNU Lesser General Public
-#* License along with FMS.  If not, see <http://www.gnu.org/licenses/>.
-#***********************************************************************
-
-# This is part of the GFDL FMS package. This is a shell script to
-# execute tests in the test_fms/field_manager directory.
-
-# Ed Hartnett 11/29/19
-
-# Set common test settings.
-. ../test-lib.sh
-
-# Copy files for test.
-touch input.nml
-rm -rf INPUT
-mkdir INPUT
-test_expect_success "test mosaic" '
-  mpirun -n 2 ./test_mosaic
-'
-rm -rf INPUT
-test_done

From 738445026adf212361fd4da49685d6fc322a06e2 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 14:59:39 -0600
Subject: [PATCH 43/45] remove one more mosaic

---
 test_fms/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_fms/Makefile.am b/test_fms/Makefile.am
index 4472863ad2..e941b6a58f 100644
--- a/test_fms/Makefile.am
+++ b/test_fms/Makefile.am
@@ -25,7 +25,7 @@ ACLOCAL_AMFLAGS = -I m4
 
 # Make targets will be run in each subdirectory. Order is significant.
 SUBDIRS = coupler diag_manager data_override exchange monin_obukhov drifters \
-mosaic interpolator fms mpp mpp_io time_interp time_manager \
+interpolator fms mpp mpp_io time_interp time_manager \
 horiz_interp field_manager axis_utils affinity fms2_io parser string_utils
 
 # testing utility scripts to distribute

From 29205057e050839a313072b79cb8cd033c6a72eb Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 15:06:45 -0600
Subject: [PATCH 44/45] More mosaic clean-up

---
 libFMS/Makefile.am            |   1 -
 mosaic2/Makefile.am           |  13 +-
 mosaic2/include/grid2.inc     | 769 ++++++++++++++++++++++++++++++++++
 mosaic2/include/grid2_r4.fh   |  58 +++
 mosaic2/include/grid2_r8.fh   |  58 +++
 mosaic2/include/mosaic2.inc   | 164 ++++++++
 mosaic2/include/mosaic2_r4.fh |  40 ++
 mosaic2/include/mosaic2_r8.fh |  40 ++
 8 files changed, 1137 insertions(+), 6 deletions(-)
 create mode 100644 mosaic2/include/grid2.inc
 create mode 100644 mosaic2/include/grid2_r4.fh
 create mode 100644 mosaic2/include/grid2_r8.fh
 create mode 100644 mosaic2/include/mosaic2.inc
 create mode 100644 mosaic2/include/mosaic2_r4.fh
 create mode 100644 mosaic2/include/mosaic2_r8.fh

diff --git a/libFMS/Makefile.am b/libFMS/Makefile.am
index e7690d9b15..27bf0fcb1b 100644
--- a/libFMS/Makefile.am
+++ b/libFMS/Makefile.am
@@ -39,7 +39,6 @@ libFMS_la_LIBADD += $(top_builddir)/memutils/libmemutils.la
 libFMS_la_LIBADD += $(top_builddir)/fms/libfms.la
 libFMS_la_LIBADD += $(top_builddir)/fms2_io/libfms2_io.la
 libFMS_la_LIBADD += $(top_builddir)/affinity/libfms_affinity.la
-libFMS_la_LIBADD += $(top_builddir)/mosaic/libmosaic.la
 libFMS_la_LIBADD += $(top_builddir)/mosaic2/libmosaic2.la
 libFMS_la_LIBADD += $(top_builddir)/coupler/libcoupler.la
 libFMS_la_LIBADD += $(top_builddir)/drifters/libdrifters.la
diff --git a/mosaic2/Makefile.am b/mosaic2/Makefile.am
index 4830823af2..fbb28069a8 100644
--- a/mosaic2/Makefile.am
+++ b/mosaic2/Makefile.am
@@ -23,7 +23,7 @@
 # Ed Hartnett 2/22/19
 
 # Include .h and .mod files.
-AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/mosaic
+AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/mosaic2/include
 AM_FCFLAGS = $(FC_MODINC). $(FC_MODOUT)$(MODDIR)
 
 # Build these uninstalled convenience libraries.
@@ -32,14 +32,17 @@ noinst_LTLIBRARIES = libmosaic2.la
 
 libmosaic2_la_SOURCES = \
 mosaic2.F90 \
-grid2.F90
+grid2.F90 \
+include/mosaic2_r4.fh  include/mosaic2_r8.fh  include/mosaic2.inc \
+include/grid2_r4.fh  include/grid2_r8.fh include/grid2.inc
 
 # Some mods are dependant on other mods in this dir.
-grid2_mod.$(FC_MODEXT): mosaic2_mod.$(FC_MODEXT)
+grid2_mod.$(FC_MODEXT): mosaic2_mod.$(FC_MODEXT) include/grid2_r4.fh include/grid2_r8.fh include/grid2.inc
+mosaic2_mod.$(FC_MODEXT): include/mosaic2_r4.fh include/mosaic2_r8.fh include/mosaic2.inc
 
 MODFILES = \
-  mosaic2_mod.$(FC_MODEXT) \
-  grid2_mod.$(FC_MODEXT)
+	mosaic2_mod.$(FC_MODEXT) \
+	grid2_mod.$(FC_MODEXT)
 nodist_include_HEADERS = $(MODFILES)
 BUILT_SOURCES = $(MODFILES)
 
diff --git a/mosaic2/include/grid2.inc b/mosaic2/include/grid2.inc
new file mode 100644
index 0000000000..da237c7177
--- /dev/null
+++ b/mosaic2/include/grid2.inc
@@ -0,0 +1,769 @@
+!***********************************************************************
+!*                             Apache License 2.0
+!*
+!* This file is part of the GFDL Flexible Modeling System (FMS).
+!*
+!* Licensed under the Apache License, Version 2.0 (the "License");
+!* you may not use this file except in compliance with the License.
+!* You may obtain a copy of the License at
+!*
+!*     http://www.apache.org/licenses/LICENSE-2.0
+!*
+!* FMS is distributed in the hope that it will be useful, but WITHOUT
+!* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied;
+!* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+!* PARTICULAR PURPOSE. See the License for the specific language
+!* governing permissions and limitations under the License.
+!***********************************************************************
+
+!> @file
+
+!> @brief return grid cell area for the specified model component and tile
+subroutine GET_GRID_CELL_AREA_SG_(component, tile, cellarea, domain)
+  character(len=*), intent(in)    :: component !< Component model (atm, lnd, ocn)
+  integer         , intent(in)    :: tile !< Tile number
+  real(kind=FMS_MOS_KIND_) , intent(inout) :: cellarea(:,:) !< Cell area
+  type(domain2d)  , intent(in), optional :: domain !< Domain
+
+  ! local vars
+  integer :: nlon, nlat
+  real(kind=r8_kind), allocatable :: glonb(:,:), glatb(:,:)
+  real(kind=r8_kind), allocatable :: cellarea8(:,:)
+
+  call init_checks("get_grid_cell_area")
+  allocate(cellarea8(size(cellarea,1),size(cellarea,2)))
+
+  select case(grid_version)
+  case(VERSION_GEOLON_T,VERSION_X_T)
+     select case(trim(component))
+     case('LND')
+        call read_data(gridfileobj, 'AREA_LND_CELL', cellarea8)
+     case('ATM','OCN')
+        call read_data(gridfileobj, 'AREA_'//trim(uppercase(component)),cellarea8)
+     case default
+        call mpp_error(FATAL, module_name//'/get_grid_cell_area'//&
+             'Illegal component name "'//trim(component)//'": must be one of ATM, LND, or OCN')
+     end select
+     ! convert area to m2
+     cellarea = real( cellarea8*4.0_r8_kind*PI*RADIUS**2, FMS_MOS_KIND_)
+  case(VERSION_OCN_MOSAIC_FILE, VERSION_GRIDFILES)
+     if (present(domain)) then
+        call mpp_get_compute_domain(domain,xsize=nlon,ysize=nlat)
+     else
+        call get_grid_size(component,tile,nlon,nlat)
+     endif
+     allocate(glonb(nlon+1,nlat+1),glatb(nlon+1,nlat+1))
+     call get_grid_cell_vertices(component, tile, glonb, glatb, domain)
+     if (great_circle_algorithm) then
+        call calc_mosaic_grid_great_circle_area(glonb*PI/180.0_r8_kind, glatb*PI/180_r8_kind, cellarea8)
+        cellarea=real(cellarea8,FMS_MOS_KIND_)
+     else
+        call calc_mosaic_grid_area(glonb*PI/180.0_r8_kind, glatb*PI/180_r8_kind, cellarea8)
+        cellarea=real(cellarea8,FMS_MOS_KIND_)
+     end if
+     deallocate(glonb,glatb)
+  end select
+
+  deallocate(cellarea8)
+
+end subroutine GET_GRID_CELL_AREA_SG_
+
+!> @brief get the area of the component per grid cell
+subroutine GET_GRID_COMP_AREA_SG_(component,tile,area,domain)
+  character(len=*) :: component !< Component model (atm, lnd, ocn)
+  integer, intent(in) :: tile !< Tile number
+  real(kind=FMS_MOS_KIND_), intent(inout) :: area(:,:) !< Area of grid cell
+  type(domain2d), intent(in), optional :: domain !< Domain
+  ! local vars
+  integer :: n_xgrid_files ! number of exchange grid files in the mosaic
+  integer :: siz(2), nxgrid
+  integer :: i,j,m,n
+  integer, allocatable :: i1(:), j1(:), i2(:), j2(:)
+  real(kind=r8_kind), allocatable :: xgrid_area(:)
+  real(kind=r8_kind), allocatable :: rmask(:,:)
+  character(len=MAX_NAME) :: &
+     xgrid_name, & ! name of the variable holding xgrid names
+     tile_name,  & ! name of the tile
+     mosaic_name ! name of the mosaic
+  character(len=FMS_PATH_LEN) :: &
+     tilefile,   & ! name of current tile file
+     xgrid_file  ! name of the current xgrid file
+  character(len=4096)     :: attvalue
+  character(len=MAX_NAME), allocatable :: nest_tile_name(:)
+  integer :: is,ie,js,je ! boundaries of our domain
+  integer :: i0, j0 ! offsets for x and y, respectively
+  integer :: num_nest_tile, ntiles
+  logical :: is_nest
+  integer :: found_xgrid_files ! how many xgrid files we actually found in the grid spec
+  integer :: ibegin, iend, bsize, l
+  type(FmsNetcdfFile_t) :: tilefileobj, xgrid_fileobj
+
+  real(r8_kind),allocatable :: area8(:,:)
+
+  call init_checks("get_grid_comp_area")
+  allocate(area8(size(area,1),size(area,2)))
+
+  select case (grid_version   )
+  case(VERSION_GEOLON_T,VERSION_X_T)
+     select case(component)
+     case('ATM')
+        call read_data(gridfileobj,'AREA_ATM',area8)
+     case('OCN')
+        allocate(rmask(size(area8,1),size(area8,2)))
+        call read_data(gridfileobj,'AREA_OCN',area8)
+        call read_data(gridfileobj,'wet',     rmask)
+        area = real(area8*rmask, FMS_MOS_KIND_)
+        deallocate(rmask)
+     case('LND')
+        call read_data(gridfileobj,'AREA_LND',area8)
+     case default
+        call mpp_error(FATAL, module_name//'/get_grid_comp_area'//&
+             'Illegal component name "'//trim(component)//'": must be one of ATM, LND, or OCN')
+     end select
+  case(VERSION_OCN_MOSAIC_FILE, VERSION_GRIDFILES) ! mosaic gridspec
+     select case (component)
+     case ('ATM')
+        ! just read the grid cell area and return
+        call get_grid_cell_area(component,tile,area8)
+        area = real(area8, FMS_MOS_KIND_)
+        return
+     case ('LND')
+        xgrid_name = 'aXl_file'
+        call read_data(gridfileobj, 'lnd_mosaic', mosaic_name)
+        tile_name  = trim(mosaic_name)//'_tile'//char(tile+ichar('0'))
+     case ('OCN')
+        xgrid_name = 'aXo_file'
+        call read_data(gridfileobj, 'ocn_mosaic', mosaic_name)
+        tile_name  = trim(mosaic_name)//'_tile'//char(tile+ichar('0'))
+     case default
+        call mpp_error(FATAL, module_name//'/get_grid_comp_area'//&
+             'Illegal component name "'//trim(component)//'": must be one of ATM, LND, or OCN')
+     end select
+     ! get the boundaries of the requested domain
+     if(present(domain)) then
+        call mpp_get_compute_domain(domain,is,ie,js,je)
+        i0 = 1-is ; j0=1-js
+     else
+        call get_grid_size(component,tile,ie,je)
+        is = 1 ; i0 = 0
+        js = 1 ; j0 = 0
+     endif
+     if (size(area8,1)/=ie-is+1.or.size(area8,2)/=je-js+1) &
+          call mpp_error(FATAL, module_name//'/get_grid_comp_area '//&
+          'size of the output argument "area" is not consistent with the domain')
+
+     ! find the nest tile
+     call read_data(gridfileobj, 'atm_mosaic', mosaic_name)
+     call get_grid_ntiles('atm', ntiles)
+     allocate(nest_tile_name(ntiles))
+     num_nest_tile = 0
+     do n = 1, ntiles
+        tilefile = read_file_name(mosaic_fileobj(1), 'gridfiles', n)
+        call open_grid_file(tilefileobj, grid_dir//tilefile)
+        if (global_att_exists(tilefileobj, "nest_grid")) then
+           call get_global_attribute(tilefileobj, "nest_grid", attvalue)
+           if(trim(attvalue) == "TRUE") then
+              num_nest_tile = num_nest_tile + 1
+              nest_tile_name(num_nest_tile) = trim(mosaic_name)//'_tile'//char(n+ichar('0'))
+           else if(trim(attvalue) .NE. "FALSE") then
+              call mpp_error(FATAL,module_name//'/get_grid_comp_area value of global attribute nest_grid in file'//&
+                   trim(tilefile)//' should be TRUE or FALSE')
+           endif
+        end if
+        call close_file(tilefileobj)
+     end do
+     area8(:,:) = 0.0_r8_kind
+     if(variable_exists(gridfileobj,xgrid_name)) then
+        ! get the number of the exchange-grid files
+        call get_variable_size(gridfileobj,xgrid_name,siz)
+        n_xgrid_files = siz(2)
+        found_xgrid_files = 0
+        ! loop through all exchange grid files
+        do n = 1, n_xgrid_files
+           ! get the name of the current exchange grid file
+           xgrid_file = read_file_name(gridfileobj,xgrid_name,n)
+           call open_grid_file(xgrid_fileobj, grid_dir//xgrid_file)
+           ! skip the rest of the loop if the name of the current tile isn't found
+           ! in the file name, but check this only if there is more than 1 tile
+           if(n_xgrid_files>1) then
+              if(index(xgrid_file,trim(tile_name))==0) cycle
+           endif
+           found_xgrid_files = found_xgrid_files + 1
+           !---make sure the atmosphere grid is not a nested grid
+           is_nest = .false.
+           do m = 1, num_nest_tile
+              if(index(xgrid_file, trim(nest_tile_name(m))) .NE. 0) then
+                 is_nest = .true.
+                 exit
+              end if
+           end do
+           if(is_nest) cycle
+
+           ! finally read the exchange grid
+           nxgrid = get_mosaic_xgrid_size(xgrid_fileobj)
+           if(nxgrid < BUFSIZE) then
+              allocate(i1(nxgrid), j1(nxgrid), i2(nxgrid), j2(nxgrid), xgrid_area(nxgrid))
+           else
+              allocate(i1(BUFSIZE), j1(BUFSIZE), i2(BUFSIZE), j2(BUFSIZE), xgrid_area(BUFSIZE))
+           endif
+           ibegin = 1
+           do l = 1,nxgrid,BUFSIZE
+              bsize = min(BUFSIZE, nxgrid-l+1)
+              iend = ibegin + bsize - 1
+              call get_mosaic_xgrid(xgrid_fileobj, i1(1:bsize), j1(1:bsize), i2(1:bsize), j2(1:bsize), &
+                   xgrid_area(1:bsize), ibegin, iend)
+              ! and sum the exchange grid areas
+              do m = 1, bsize
+                 i = i2(m); j = j2(m)
+                 if (i<is.or.i>ie) cycle
+                 if (j<js.or.j>je) cycle
+                 area8(i+i0,j+j0) = area8(i+i0,j+j0) + xgrid_area(m)
+              end do
+              ibegin = iend + 1
+           enddo
+           deallocate(i1, j1, i2, j2, xgrid_area)
+           call close_file(xgrid_fileobj)
+        enddo
+        if (found_xgrid_files == 0) &
+             call mpp_error(FATAL, 'get_grid_comp_area no xgrid files were found for component '&
+             //trim(component)//' (mosaic name is '//trim(mosaic_name)//')')
+
+     endif
+     deallocate(nest_tile_name)
+  end select ! version
+  ! convert area to m2
+  area = real(area8*4.0_r8_kind*PI*RADIUS**2, FMS_MOS_KIND_)
+
+  deallocate(area8)
+
+end subroutine GET_GRID_COMP_AREA_SG_
+
+!> @brief return grid cell area for the specified model component and tile on an
+!! unstructured domain
+subroutine GET_GRID_CELL_AREA_UG_(component, tile, cellarea, SG_domain, UG_domain)
+  character(len=*),   intent(in)    :: component !< Component model (atm, lnd, ocn)
+  integer         ,   intent(in)    :: tile !< Tile number
+  real(kind=FMS_MOS_KIND_),   intent(inout) :: cellarea(:) !< Cell area
+  type(domain2d)  ,   intent(in)    :: SG_domain !< Structured Domain
+  type(domainUG)  ,   intent(in)    :: UG_domain !< Unstructured Domain
+  integer :: is, ie, js, je
+  real(kind=FMS_MOS_KIND_), allocatable :: SG_area(:,:)
+
+  call init_checks("get_grid_cell_area")
+  call mpp_get_compute_domain(SG_domain, is, ie, js, je)
+  allocate(SG_area(is:ie, js:je))
+  call get_grid_cell_area(component, tile, SG_area, SG_domain)
+  call mpp_pass_SG_to_UG(UG_domain, SG_area, cellarea)
+  deallocate(SG_area)
+end subroutine GET_GRID_CELL_AREA_UG_
+
+!> @brief get the area of the component per grid cell for an unstructured domain
+subroutine GET_GRID_COMP_AREA_UG_(component, tile, area, SG_domain, UG_domain)
+  character(len=*),   intent(in)    :: component !< Component model (atm, lnd, ocn)
+  integer         ,   intent(in)    :: tile !< Tile number
+  real(kind=FMS_MOS_KIND_), intent(inout) :: area(:) !< Area of the component
+  type(domain2d)  ,   intent(in)    :: SG_domain !< Structured domain
+  type(domainUG)  ,   intent(in)    :: UG_domain !< Unstructured domain
+  integer :: is, ie, js, je
+  real(kind=FMS_MOS_KIND_), allocatable :: SG_area(:,:)
+
+  call init_checks("get_grid_comp_area")
+  call mpp_get_compute_domain(SG_domain, is, ie, js, je)
+  allocate(SG_area(is:ie, js:je))
+  call get_grid_comp_area(component, tile, SG_area, SG_domain)
+  call mpp_pass_SG_to_UG(UG_domain, SG_area, area)
+  deallocate(SG_area)
+
+end subroutine GET_GRID_COMP_AREA_UG_
+
+!> @brief returns arrays of global grid cell boundaries for given model component and
+!! mosaic tile number.
+subroutine GET_GRID_CELL_VERTICES_1D_(component, tile, glonb, glatb)
+  character(len=*), intent(in) :: component !< Component model (atm, lnd, ocn)
+  integer,          intent(in) :: tile !< Tile number
+  real(kind=FMS_MOS_KIND_),intent(inout) :: glonb(:),glatb(:) !< Grid cell vertices
+
+  integer                      :: nlon, nlat
+  integer                      :: start(4), nread(4)
+  real(kind=FMS_MOS_KIND_), allocatable :: tmp(:,:), x_vert_t(:,:,:), y_vert_t(:,:,:)
+  character(len=FMS_PATH_LEN)      :: tilefile
+  type(FmsNetcdfFile_t)  :: tilefileobj
+
+  call init_checks("get_grid_cell_vertices")
+  call get_grid_size_for_one_tile(component, tile, nlon, nlat)
+  if (size(glonb(:))/=nlon+1) &
+       call mpp_error (FATAL, module_name//'/get_grid_cell_vertices_1D '//&
+       'Size of argument "glonb" is not consistent with the grid size')
+  if (size(glatb(:))/=nlat+1) &
+       call mpp_error (FATAL, module_name//'/get_grid_cell_vertices_1D '//&
+       'Size of argument "glatb" is not consistent with the grid size')
+  if(trim(component) .NE. 'ATM' .AND. component .NE. 'LND' .AND. component .NE. 'OCN') then
+     call mpp_error(FATAL, module_name//'/get_grid_cell_vertices_1D '//&
+          'Illegal component name "'//trim(component)//'": must be one of ATM, LND, or OCN')
+  endif
+
+  select case(grid_version)
+  case(VERSION_GEOLON_T)
+     select case(trim(component))
+     case('ATM','LND')
+        call read_data(gridfileobj, 'xb'//lowercase(component(1:1)), glonb)
+        call read_data(gridfileobj, 'yb'//lowercase(component(1:1)), glatb)
+     case('OCN')
+        call read_data(gridfileobj, "gridlon_vert_t", glonb)
+        call read_data(gridfileobj, "gridlat_vert_t", glatb)
+     end select
+  case(VERSION_X_T)
+     select case(trim(component))
+     case('ATM','LND')
+        call read_data(gridfileobj, 'xb'//lowercase(component(1:1)), glonb)
+        call read_data(gridfileobj, 'yb'//lowercase(component(1:1)), glatb)
+     case('OCN')
+        allocate (x_vert_t(nlon,1,2), y_vert_t(1,nlat,2) )
+        start = 1; nread = 1
+        nread(1) = nlon; nread(2) = 1; start(3) = 1
+        call read_data(gridfileobj, "x_vert_T", x_vert_t(:,:,1), corner=start, edge_lengths=nread)
+        nread(1) = nlon; nread(2) = 1; start(3) = 2
+        call read_data(gridfileobj, "x_vert_T", x_vert_t(:,:,2), corner=start, edge_lengths=nread)
+
+        nread(1) = 1; nread(2) = nlat; start(3) = 1
+        call read_data(gridfileobj, "y_vert_T", y_vert_t(:,:,1), corner=start, edge_lengths=nread)
+        nread(1) = 1; nread(2) = nlat; start(3) = 4
+        call read_data(gridfileobj, "y_vert_T", y_vert_t(:,:,2), corner=start, edge_lengths=nread)
+        glonb(1:nlon) = x_vert_t(1:nlon,1,1)
+        glonb(nlon+1) = x_vert_t(nlon,1,2)
+        glatb(1:nlat) = y_vert_t(1,1:nlat,1)
+        glatb(nlat+1) = y_vert_t(1,nlat,2)
+        deallocate(x_vert_t, y_vert_t)
+     end select
+  case(VERSION_OCN_MOSAIC_FILE, VERSION_GRIDFILES)
+     ! get the name of the grid file for the component and tile
+     tilefile = read_file_name(mosaic_fileobj(get_component_number(trim(component))), 'gridfiles',tile)
+     call open_grid_file(tilefileobj, grid_dir//tilefile)
+
+     start = 1; nread = 1
+     nread(1) = 2*nlon+1
+     allocate( tmp(2*nlon+1,1) )
+     call read_data(tilefileobj, "x", tmp, corner=start, edge_lengths=nread)
+     glonb(1:nlon+1) = tmp(1:2*nlon+1:2,1)
+     deallocate(tmp)
+     allocate(tmp(1,2*nlat+1))
+
+     start = 1; nread = 1
+     nread(2) = 2*nlat+1
+     call read_data(tilefileobj, "y", tmp, corner=start, edge_lengths=nread)
+     glatb(1:nlat+1) = tmp(1,1:2*nlat+1:2)
+     deallocate(tmp)
+     call close_file(tilefileobj)
+  end select
+end subroutine GET_GRID_CELL_VERTICES_1D_
+
+!> @brief returns cell vertices for the specified model component and mosaic tile number
+subroutine GET_GRID_CELL_VERTICES_2D_(component, tile, lonb, latb, domain)
+  character(len=*),         intent(in) :: component !< Component model (atm, lnd, ocn)
+  integer,                  intent(in) :: tile !< Tile number
+  real(kind=FMS_MOS_KIND_), intent(inout) :: lonb(:,:),latb(:,:) !< Cell vertices
+  type(domain2d), optional, intent(in) :: domain !< Domain
+
+  ! local vars
+  integer :: nlon, nlat
+  integer :: i,j
+  real(kind=FMS_MOS_KIND_), allocatable :: buffer(:), tmp(:,:), x_vert_t(:,:,:), y_vert_t(:,:,:)
+  integer :: is,ie,js,je ! boundaries of our domain
+  integer :: i0,j0 ! offsets for coordinates
+  integer :: isg, jsg
+  integer :: start(4), nread(4)
+  character(len=FMS_PATH_LEN)      :: tilefile
+  type(FmsNetcdfFile_t)  :: tilefileobj
+
+  call init_checks("get_grid_cell_vertices")
+  call get_grid_size_for_one_tile(component, tile, nlon, nlat)
+
+  if (present(domain)) then
+    call mpp_get_compute_domain(domain,is,ie,js,je)
+  else
+    is = 1 ; ie = nlon
+    js = 1 ; je = nlat
+    !--- domain normally should be present
+    call mpp_error (NOTE, module_name//'/get_grid_cell_vertices '//&
+       'domain is not present, global data will be read')
+  endif
+  i0 = -is+1; j0 = -js+1
+
+  ! verify that lonb and latb sizes are consistent with the size of domain
+  if (size(lonb,1)/=ie-is+2.or.size(lonb,2)/=je-js+2) &
+       call mpp_error (FATAL, module_name//'/get_grid_cell_vertices '//&
+       'Size of argument "lonb" is not consistent with the domain size')
+  if (size(latb,1)/=ie-is+2.or.size(latb,2)/=je-js+2) &
+       call mpp_error (FATAL, module_name//'/get_grid_cell_vertices '//&
+       'Size of argument "latb" is not consistent with the domain size')
+  if(trim(component) .NE. 'ATM' .AND. component .NE. 'LND' .AND. component .NE. 'OCN') then
+     call mpp_error(FATAL, module_name//'/get_grid_cell_vertices '//&
+          'Illegal component name "'//trim(component)//'": must be one of ATM, LND, or OCN')
+  endif
+
+     !! use lonb, latb as r4
+     select case(grid_version)
+     case(VERSION_GEOLON_T)
+       select case(component)
+       case('ATM','LND')
+          allocate(buffer(max(nlon,nlat)+1))
+          ! read coordinates of grid cell vertices
+          call read_data(gridfileobj, 'xb'//lowercase(component(1:1)), buffer(1:nlon+1))
+          do j = js, je+1
+             do i = is, ie+1
+                lonb(i+i0,j+j0) = buffer(i)
+             enddo
+          enddo
+          call read_data(gridfileobj, 'yb'//lowercase(component(1:1)), buffer(1:nlat+1))
+          do j = js, je+1
+             do i = is, ie+1
+                latb(i+i0,j+j0) = buffer(j)
+             enddo
+          enddo
+          deallocate(buffer)
+       case('OCN')
+          if (present(domain)) then
+             start = 1; nread = 1
+             start(1) = is; start(2) = js
+             nread(1) = ie-is+2; nread(2) = je-js+2
+             call read_data(gridfileobj, "geolon_vert_t", lonb, corner=start, edge_lengths=nread)
+             call read_data(gridfileobj, "geolat_vert_t", latb, corner=start, edge_lengths=nread)
+           else
+             call read_data(gridfileobj, "geolon_vert_t", lonb)
+             call read_data(gridfileobj, "geolat_vert_t", latb)
+           endif
+       end select
+     case(VERSION_X_T)
+       select case(component)
+       case('ATM','LND')
+          allocate(buffer(max(nlon,nlat)+1))
+          ! read coordinates of grid cell vertices
+          call read_data(gridfileobj, 'xb'//lowercase(component(1:1)), buffer(1:nlon+1))
+          do j = js, je+1
+             do i = is, ie+1
+                lonb(i+i0,j+j0) = buffer(i)
+             enddo
+          enddo
+          call read_data(gridfileobj, 'yb'//lowercase(component(1:1)), buffer(1:nlat+1))
+          do j = js, je+1
+             do i = is, ie+1
+                latb(i+i0,j+j0) = buffer(j)
+             enddo
+          enddo
+          deallocate(buffer)
+       case('OCN')
+          nlon=ie-is+1; nlat=je-js+1
+          allocate (x_vert_t(nlon,nlat,4), y_vert_t(nlon,nlat,4) )
+          call read_data(gridfileobj, 'x_vert_T', x_vert_t)
+          call read_data(gridfileobj, 'y_vert_T', y_vert_t)
+          lonb(1:nlon,1:nlat) = x_vert_t(1:nlon,1:nlat,1)
+          lonb(nlon+1,1:nlat) = x_vert_t(nlon,1:nlat,2)
+          lonb(1:nlon,nlat+1) = x_vert_t(1:nlon,nlat,4)
+          lonb(nlon+1,nlat+1) = x_vert_t(nlon,nlat,3)
+          latb(1:nlon,1:nlat) = y_vert_t(1:nlon,1:nlat,1)
+          latb(nlon+1,1:nlat) = y_vert_t(nlon,1:nlat,2)
+          latb(1:nlon,nlat+1) = y_vert_t(1:nlon,nlat,4)
+          latb(nlon+1,nlat+1) = y_vert_t(nlon,nlat,3)
+          deallocate(x_vert_t, y_vert_t)
+       end select
+     case(VERSION_OCN_MOSAIC_FILE, VERSION_GRIDFILES)
+       ! get the name of the grid file for the component and tile
+       tilefile = read_file_name(mosaic_fileobj(get_component_number(trim(component))), 'gridfiles',tile)
+       call open_grid_file(tilefileobj, grid_dir//tilefile)
+       if(PRESENT(domain)) then
+          call mpp_get_global_domain(domain, xbegin=isg, ybegin=jsg)
+          start = 1; nread = 1
+          start(1) = 2*(is-isg+1) - 1; nread(1) = 2*(ie-is)+3
+          start(2) = 2*(js-jsg+1) - 1; nread(2) = 2*(je-js)+3
+          allocate(tmp(nread(1), nread(2)) )
+          call read_data(tilefileobj, "x", tmp, corner=start, edge_lengths=nread)
+          do j = 1, je-js+2
+             do i = 1, ie-is+2
+                lonb(i,j) = tmp(2*i-1,2*j-1)
+             enddo
+          enddo
+          call read_data(tilefileobj, "y", tmp, corner=start, edge_lengths=nread)
+          do j = 1, je-js+2
+             do i = 1, ie-is+2
+                latb(i,j) = tmp(2*i-1,2*j-1)
+             enddo
+          enddo
+       else
+          allocate(tmp(2*nlon+1,2*nlat+1))
+          call read_data(tilefileobj, "x", tmp)
+          do j = js, je+1
+             do i = is, ie+1
+                lonb(i+i0,j+j0) = tmp(2*i-1,2*j-1)
+             end do
+          end do
+          call read_data(tilefileobj, "y", tmp)
+          do j = js, je+1
+             do i = is, ie+1
+                latb(i+i0,j+j0) = tmp(2*i-1,2*j-1)
+             end do
+          end do
+       endif
+       deallocate(tmp)
+       call close_file(tilefileobj)
+    end select ! end grid_version
+  end subroutine GET_GRID_CELL_VERTICES_2D_
+
+!> @brief returns cell vertices for the specified model component and mosaic tile number for
+!! an unstructured domain
+subroutine GET_GRID_CELL_VERTICES_UG_(component, tile, lonb, latb, SG_domain, UG_domain)
+  character(len=*),         intent(in) :: component !< Component model (atm, lnd, ocn)
+  integer,                  intent(in) :: tile !< Tile number
+  real(kind=FMS_MOS_KIND_),  intent(inout) :: lonb(:,:),latb(:,:) ! The second dimension is 4
+  type(domain2d)  ,   intent(in)       :: SG_domain !< Structured domain
+  type(domainUG)  ,   intent(in)       :: UG_domain !< Unstructured domain
+  integer :: is, ie, js, je, i, j
+  real(kind=FMS_MOS_KIND_), allocatable :: SG_lonb(:,:), SG_latb(:,:), tmp(:,:,:)
+
+  call init_checks("get_grid_cell_vertices")
+  call mpp_get_compute_domain(SG_domain, is, ie, js, je)
+  allocate(SG_lonb(is:ie+1, js:je+1))
+  allocate(SG_latb(is:ie+1, js:je+1))
+  allocate(tmp(is:ie,js:je,4))
+  call get_grid_cell_vertices(component, tile, SG_lonb, SG_latb, SG_domain)
+  do j = js, je
+     do i = is, ie
+        tmp(i,j,1) = SG_lonb(i,j)
+        tmp(i,j,2) = SG_lonb(i+1,j)
+        tmp(i,j,3) = SG_lonb(i+1,j+1)
+        tmp(i,j,4) = SG_lonb(i,j+1)
+     enddo
+  enddo
+  call mpp_pass_SG_to_UG(UG_domain, tmp, lonb)
+  do j = js, je
+     do i = is, ie
+        tmp(i,j,1) = SG_latb(i,j)
+        tmp(i,j,2) = SG_latb(i+1,j)
+        tmp(i,j,3) = SG_latb(i+1,j+1)
+        tmp(i,j,4) = SG_latb(i,j+1)
+     enddo
+  enddo
+  call mpp_pass_SG_to_UG(UG_domain, tmp, latb)
+
+
+  deallocate(SG_lonb, SG_latb, tmp)
+end subroutine GET_GRID_CELL_VERTICES_UG_
+
+!> @brief returns grid cell centers given model component and mosaic tile number
+subroutine GET_GRID_CELL_CENTERS_1D_(component, tile, glon, glat)
+  character(len=*), intent(in) :: component !< Component model (atm, lnd, ocn)
+  integer, intent(in) :: tile !< Tile number
+  real(kind=FMS_MOS_KIND_), intent(inout) :: glon(:),glat(:) !< Grid cell centers
+
+  integer                      :: nlon, nlat
+  integer                      :: start(4), nread(4)
+  real(kind=FMS_MOS_KIND_), allocatable :: tmp(:,:)
+  character(len=FMS_PATH_LEN)      :: tilefile
+  type(FmsNetcdfFile_t)  :: tilefileobj
+
+  call init_checks("get_grid_cell_centers")
+  call get_grid_size_for_one_tile(component, tile, nlon, nlat)
+  if (size(glon(:))/=nlon) &
+       call mpp_error (FATAL, module_name//'/get_grid_cell_centers_1D '//&
+       'Size of argument "glon" is not consistent with the grid size')
+  if (size(glat(:))/=nlat) &
+       call mpp_error (FATAL, module_name//'/get_grid_cell_centers_1D '//&
+       'Size of argument "glat" is not consistent with the grid size')
+  if(trim(component) .NE. 'ATM' .AND. component .NE. 'LND' .AND. component .NE. 'OCN') then
+     call mpp_error(FATAL, module_name//'/get_grid_cell_centers_1D '//&
+          'Illegal component name "'//trim(component)//'": must be one of ATM, LND, or OCN')
+  endif
+
+  select case(grid_version)
+  case(VERSION_GEOLON_T)
+     select case(trim(component))
+     case('ATM','LND')
+        call read_data(gridfileobj, 'xt'//lowercase(component(1:1)), glon)
+        call read_data(gridfileobj, 'yt'//lowercase(component(1:1)), glat)
+     case('OCN')
+        call read_data(gridfileobj, "gridlon_t", glon)
+        call read_data(gridfileobj, "gridlat_t", glat)
+     end select
+  case(VERSION_X_T)
+     select case(trim(component))
+     case('ATM','LND')
+        call read_data(gridfileobj, 'xt'//lowercase(component(1:1)), glon)
+        call read_data(gridfileobj, 'yt'//lowercase(component(1:1)), glat)
+     case('OCN')
+        call read_data(gridfileobj, "grid_x_T", glon)
+        call read_data(gridfileobj, "grid_y_T", glat)
+     end select
+  case(VERSION_OCN_MOSAIC_FILE, VERSION_GRIDFILES)
+     ! get the name of the grid file for the component and tile
+     tilefile = read_file_name(mosaic_fileobj(get_component_number(trim(component))), 'gridfiles',tile)
+     call open_grid_file(tilefileobj, grid_dir//tilefile)
+
+     start = 1; nread = 1
+     nread(1) = 2*nlon+1; start(2) = 2
+     allocate( tmp(2*nlon+1,1) )
+     call read_data(tilefileobj, "x", tmp, corner=start, edge_lengths=nread)
+     glon(1:nlon) = tmp(2:2*nlon:2,1)
+     deallocate(tmp)
+     allocate(tmp(1, 2*nlat+1))
+
+     start = 1; nread = 1
+     nread(2) = 2*nlat+1; start(1) = 2
+     call read_data(tilefileobj, "y", tmp, corner=start, edge_lengths=nread)
+     glat(1:nlat) = tmp(1,2:2*nlat:2)
+     deallocate(tmp)
+     call close_file(tilefileobj)
+  end select
+end subroutine GET_GRID_CELL_CENTERS_1D_
+
+!> @brief returns grid cell centers given model component and mosaic tile number
+subroutine GET_GRID_CELL_CENTERS_2D_(component, tile, lon, lat, domain)
+  character(len=*), intent(in) :: component !< Component model (atm, lnd, ocn)
+  integer, intent(in) :: tile !< Tile number
+  real(kind=FMS_MOS_KIND_), intent(inout) :: lon(:,:),lat(:,:) !< Grid cell centers
+  type(domain2d), intent(in), optional :: domain !< Domain
+  ! local vars
+  integer :: nlon, nlat
+  integer :: i,j
+  real(kind=FMS_MOS_KIND_), allocatable :: buffer(:),tmp(:,:)
+  integer :: is,ie,js,je ! boundaries of our domain
+  integer :: i0,j0 ! offsets for coordinates
+  integer :: isg, jsg
+  integer :: start(4), nread(4)
+  character(len=FMS_PATH_LEN)      :: tilefile
+  type(FmsNetcdfFile_t)  :: tilefileobj
+
+  call init_checks("get_grid_cell_centers")
+  call get_grid_size_for_one_tile(component, tile, nlon, nlat)
+  if (present(domain)) then
+    call mpp_get_compute_domain(domain,is,ie,js,je)
+  else
+    is = 1 ; ie = nlon
+    js = 1 ; je = nlat
+    !--- domain normally should be present
+    call mpp_error (NOTE, module_name//'/get_grid_cell_centers '//&
+       'domain is not present, global data will be read')
+  endif
+  i0 = -is+1; j0 = -js+1
+
+  ! verify that lon and lat sizes are consistent with the size of domain
+  if (size(lon,1)/=ie-is+1.or.size(lon,2)/=je-js+1) &
+       call mpp_error (FATAL, module_name//'/get_grid_cell_centers '//&
+       'Size of array "lon" is not consistent with the domain size')
+  if (size(lat,1)/=ie-is+1.or.size(lat,2)/=je-js+1) &
+       call mpp_error (FATAL, module_name//'/get_grid_cell_centers '//&
+       'Size of array "lat" is not consistent with the domain size')
+  if(trim(component) .NE. 'ATM' .AND. component .NE. 'LND' .AND. component .NE. 'OCN') then
+     call mpp_error(FATAL, module_name//'/get_grid_cell_vertices '//&
+          'Illegal component name "'//trim(component)//'": must be one of ATM, LND, or OCN')
+  endif
+
+  select case(grid_version)
+  case(VERSION_GEOLON_T)
+     select case (trim(component))
+     case('ATM','LND')
+        allocate(buffer(max(nlon,nlat)))
+        ! read coordinates of grid cell vertices
+        call read_data(gridfileobj, 'xt'//lowercase(component(1:1)), buffer(1:nlon))
+        do j = js,je
+        do i = is,ie
+           lon(i+i0,j+j0) = buffer(i)
+        enddo
+        enddo
+        call read_data(gridfileobj, 'yt'//lowercase(component(1:1)), buffer(1:nlat))
+        do j = js,je
+        do i = is,ie
+           lat(i+i0,j+j0) = buffer(j)
+        enddo
+        enddo
+        deallocate(buffer)
+     case('OCN')
+        call read_data(gridfileobj, 'geolon_t', lon)
+        call read_data(gridfileobj, 'geolat_t', lat)
+     end select
+  case(VERSION_X_T)
+     select case(trim(component))
+     case('ATM','LND')
+        allocate(buffer(max(nlon,nlat)))
+        ! read coordinates of grid cell vertices
+        call read_data(gridfileobj, 'xt'//lowercase(component(1:1)), buffer(1:nlon))
+        do j = js,je
+        do i = is,ie
+           lon(i+i0,j+j0) = buffer(i)
+        enddo
+        enddo
+        call read_data(gridfileobj, 'yt'//lowercase(component(1:1)), buffer(1:nlat))
+        do j = js,je
+        do i = is,ie
+           lat(i+i0,j+j0) = buffer(j)
+        enddo
+        enddo
+        deallocate(buffer)
+     case('OCN')
+        call read_data(gridfileobj, 'x_T', lon)
+        call read_data(gridfileobj, 'y_T', lat)
+     end select
+  case(VERSION_OCN_MOSAIC_FILE, VERSION_GRIDFILES) ! mosaic grid file
+     ! get the name of the grid file for the component and tile
+     tilefile = read_file_name(mosaic_fileobj(get_component_number(trim(component))), 'gridfiles',tile)
+     call open_grid_file(tilefileobj, grid_dir//tilefile)
+
+     if(PRESENT(domain)) then
+        call mpp_get_global_domain(domain, xbegin=isg, ybegin=jsg)
+        start = 1; nread = 1
+        start(1) = 2*(is-isg+1) - 1; nread(1) = 2*(ie-is)+3
+        start(2) = 2*(js-jsg+1) - 1; nread(2) = 2*(je-js)+3
+        allocate(tmp(nread(1), nread(2)))
+        call read_data(tilefileobj, "x", tmp, corner=start, edge_lengths=nread)
+        do j = 1, je-js+1
+           do i = 1, ie-is+1
+              lon(i,j) = tmp(2*i,2*j)
+           enddo
+        enddo
+        call read_data(tilefileobj, "y", tmp, corner=start, edge_lengths=nread)
+        do j = 1, je-js+1
+           do i = 1, ie-is+1
+              lat(i,j) = tmp(2*i,2*j)
+           enddo
+        enddo
+     else
+        allocate(tmp(2*nlon+1,2*nlat+1))
+        call read_data(tilefileobj, 'x', tmp)
+        do j = js,je
+           do i = is,ie
+              lon(i+i0,j+j0) = tmp(2*i,2*j)
+           end do
+        end do
+        call read_data(tilefileobj, 'y', tmp)
+        do j = js,je
+           do i = is,ie
+              lat(i+i0,j+j0) = tmp(2*i,2*j)
+           end do
+        end do
+        deallocate(tmp)
+     endif
+     call close_file(tilefileobj)
+  end select
+end subroutine GET_GRID_CELL_CENTERS_2D_
+
+!> @brief returns grid cell centers given model component and mosaic tile number
+!! for unstructured domain
+subroutine GET_GRID_CELL_CENTERS_UG_(component, tile, lon, lat, SG_domain, UG_domain)
+  character(len=*), intent(in) :: component !< Component model (atm, lnd, ocn)
+  integer, intent(in) :: tile !< Tile number
+  real(kind=FMS_MOS_KIND_), intent(inout) :: lon(:),lat(:) !< Grid cell centers
+  type(domain2d)  ,   intent(in) :: SG_domain !< Structured domain
+  type(domainUG)  ,   intent(in) :: UG_domain !< Unstructured domain
+  integer :: is, ie, js, je
+  real(kind=FMS_MOS_KIND_), allocatable :: SG_lon(:,:), SG_lat(:,:)
+
+  call init_checks("get_grid_cell_centers")
+  call mpp_get_compute_domain(SG_domain, is, ie, js, je)
+  allocate(SG_lon(is:ie, js:je))
+  allocate(SG_lat(is:ie, js:je))
+  call get_grid_cell_centers(component, tile, SG_lon, SG_lat, SG_domain)
+  call mpp_pass_SG_to_UG(UG_domain, SG_lon, lon)
+  call mpp_pass_SG_to_UG(UG_domain, SG_lat, lat)
+  deallocate(SG_lon, SG_lat)
+end subroutine GET_GRID_CELL_CENTERS_UG_
+
+!> @}
+! close documentation grouping
diff --git a/mosaic2/include/grid2_r4.fh b/mosaic2/include/grid2_r4.fh
new file mode 100644
index 0000000000..b1740e058f
--- /dev/null
+++ b/mosaic2/include/grid2_r4.fh
@@ -0,0 +1,58 @@
+!***********************************************************************
+!*                             Apache License 2.0
+!*
+!* This file is part of the GFDL Flexible Modeling System (FMS).
+!*
+!* Licensed under the Apache License, Version 2.0 (the "License");
+!* you may not use this file except in compliance with the License.
+!* You may obtain a copy of the License at
+!*
+!*     http://www.apache.org/licenses/LICENSE-2.0
+!*
+!* FMS is distributed in the hope that it will be useful, but WITHOUT
+!* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied;
+!* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+!* PARTICULAR PURPOSE. See the License for the specific language
+!* governing permissions and limitations under the License.
+!***********************************************************************
+!> @file
+!> @brief include file for grid2_mod to generate subroutines/functions for r4_kind arguments
+
+!> @addtogroup grid2_mod
+!> @{
+
+#undef  FMS_MOS_KIND_
+#define FMS_MOS_KIND_ r4_kind
+
+#undef  GET_GRID_CELL_AREA_SG_
+#define GET_GRID_CELL_AREA_SG_ get_grid_cell_area_SG_r4
+
+#undef  GET_GRID_COMP_AREA_SG_
+#define GET_GRID_COMP_AREA_SG_ get_grid_comp_area_SG_r4
+
+#undef  GET_GRID_CELL_AREA_UG_
+#define GET_GRID_CELL_AREA_UG_ get_grid_cell_area_UG_r4
+
+#undef  GET_GRID_COMP_AREA_UG_
+#define GET_GRID_COMP_AREA_UG_ get_grid_comp_area_UG_r4
+
+#undef  GET_GRID_CELL_VERTICES_1D_
+#define GET_GRID_CELL_VERTICES_1D_ get_grid_cell_vertices_1D_r4
+
+#undef  GET_GRID_CELL_VERTICES_2D_
+#define GET_GRID_CELL_VERTICES_2D_ get_grid_cell_vertices_2D_r4
+
+#undef  GET_GRID_CELL_VERTICES_UG_
+#define GET_GRID_CELL_VERTICES_UG_ get_grid_cell_vertices_UG_r4
+
+#undef  GET_GRID_CELL_CENTERS_1D_
+#define GET_GRID_CELL_CENTERS_1D_ get_grid_cell_centers_1D_r4
+
+#undef  GET_GRID_CELL_CENTERS_2D_
+#define GET_GRID_CELL_CENTERS_2D_ get_grid_cell_centers_2D_r4
+
+#undef  GET_GRID_CELL_CENTERS_UG_
+#define GET_GRID_CELL_CENTERS_UG_ get_grid_cell_centers_UG_r4
+
+#include "grid2.inc"
+!> @}
diff --git a/mosaic2/include/grid2_r8.fh b/mosaic2/include/grid2_r8.fh
new file mode 100644
index 0000000000..9b3d67a898
--- /dev/null
+++ b/mosaic2/include/grid2_r8.fh
@@ -0,0 +1,58 @@
+!***********************************************************************
+!*                             Apache License 2.0
+!*
+!* This file is part of the GFDL Flexible Modeling System (FMS).
+!*
+!* Licensed under the Apache License, Version 2.0 (the "License");
+!* you may not use this file except in compliance with the License.
+!* You may obtain a copy of the License at
+!*
+!*     http://www.apache.org/licenses/LICENSE-2.0
+!*
+!* FMS is distributed in the hope that it will be useful, but WITHOUT
+!* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied;
+!* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+!* PARTICULAR PURPOSE. See the License for the specific language
+!* governing permissions and limitations under the License.
+!***********************************************************************
+!> @file
+!> @brief include file for grid2_mod to generate subroutines/functions for r8_kind arguments
+
+!> @addtogroup grid2_mod
+!> @{
+
+#undef  FMS_MOS_KIND_
+#define FMS_MOS_KIND_ r8_kind
+
+#undef  GET_GRID_CELL_AREA_SG_
+#define GET_GRID_CELL_AREA_SG_ get_grid_cell_area_SG_r8
+
+#undef  GET_GRID_CELL_AREA_UG_
+#define GET_GRID_CELL_AREA_UG_ get_grid_cell_area_UG_r8
+
+#undef  GET_GRID_COMP_AREA_SG_
+#define GET_GRID_COMP_AREA_SG_ get_grid_comp_area_SG_r8
+
+#undef  GET_GRID_COMP_AREA_UG_
+#define GET_GRID_COMP_AREA_UG_ get_grid_comp_area_UG_r8
+
+#undef  GET_GRID_CELL_VERTICES_1D_
+#define GET_GRID_CELL_VERTICES_1D_ get_grid_cell_vertices_1D_r8
+
+#undef  GET_GRID_CELL_VERTICES_2D_
+#define GET_GRID_CELL_VERTICES_2D_ get_grid_cell_vertices_2D_r8
+
+#undef  GET_GRID_CELL_VERTICES_UG_
+#define GET_GRID_CELL_VERTICES_UG_ get_grid_cell_vertices_UG_r8
+
+#undef  GET_GRID_CELL_CENTERS_1D_
+#define GET_GRID_CELL_CENTERS_1D_ get_grid_cell_centers_1D_r8
+
+#undef  GET_GRID_CELL_CENTERS_2D_
+#define GET_GRID_CELL_CENTERS_2D_ get_grid_cell_centers_2D_r8
+
+#undef  GET_GRID_CELL_CENTERS_UG_
+#define GET_GRID_CELL_CENTERS_UG_ get_grid_cell_centers_UG_r8
+
+#include "grid2.inc"
+!> @}
diff --git a/mosaic2/include/mosaic2.inc b/mosaic2/include/mosaic2.inc
new file mode 100644
index 0000000000..0a70bdb0be
--- /dev/null
+++ b/mosaic2/include/mosaic2.inc
@@ -0,0 +1,164 @@
+!***********************************************************************
+!*                             Apache License 2.0
+!*
+!* This file is part of the GFDL Flexible Modeling System (FMS).
+!*
+!* Licensed under the Apache License, Version 2.0 (the "License");
+!* you may not use this file except in compliance with the License.
+!* You may obtain a copy of the License at
+!*
+!*     http://www.apache.org/licenses/LICENSE-2.0
+!*
+!* FMS is distributed in the hope that it will be useful, but WITHOUT
+!* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied;
+!* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+!* PARTICULAR PURPOSE. See the License for the specific language
+!* governing permissions and limitations under the License.
+!***********************************************************************
+
+!> @file
+
+!> @brief Get exchange grid information from mosaic xgrid file.
+!> Example usage:
+!!
+!!           call get_mosaic_xgrid(fileobj, nxgrid, i1, j1, i2, j2, area)
+!!
+  subroutine GET_MOSAIC_XGRID_(fileobj, i1, j1, i2, j2, area, ibegin, iend)
+    type(FmsNetcdfFile_t), intent(in) :: fileobj !> The file that contains exchange grid information.
+    integer,       intent(inout) :: i1(:), j1(:), i2(:), j2(:) !> i and j indices for grids 1 and 2
+    real(kind=FMS_MOS_KIND_),  intent(inout) :: area(:) !> area of the exchange grid. The area is scaled to
+                                                        !! represent unit earth area
+    integer, optional, intent(in) :: ibegin, iend
+
+    integer                 :: start(4), nread(4), istart
+    real(kind=FMS_MOS_KIND_),  dimension(2, size(i1(:))) :: tile1_cell, tile2_cell
+    integer                 :: nxgrid, n
+    real(kind=r8_kind) :: garea
+    real(kind=r8_kind) :: get_global_area
+
+    garea = get_global_area() !< get_global_area returns a r8_kind
+
+    ! When start and nread present, make sure nread(1) is the same as the size of the data
+    if(present(ibegin) .and. present(iend)) then
+       istart = ibegin
+       nxgrid = iend - ibegin + 1
+       if(nxgrid .NE. size(i1(:))) call mpp_error(FATAL, "get_mosaic_xgrid: nxgrid .NE. size(i1(:))")
+       if(nxgrid .NE. size(j1(:))) call mpp_error(FATAL, "get_mosaic_xgrid: nxgrid .NE. size(j1(:))")
+       if(nxgrid .NE. size(i2(:))) call mpp_error(FATAL, "get_mosaic_xgrid: nxgrid .NE. size(i2(:))")
+       if(nxgrid .NE. size(j2(:))) call mpp_error(FATAL, "get_mosaic_xgrid: nxgrid .NE. size(j2(:))")
+       if(nxgrid .NE. size(area(:))) call mpp_error(FATAL, "get_mosaic_xgrid: nxgrid .NE. size(area(:))")
+    else
+       istart = 1
+       nxgrid = size(i1(:))
+    endif
+
+    start  = 1; nread = 1
+    start(1) = istart; nread(1) = nxgrid
+
+    call read_data(fileobj, 'xgrid_area', area, corner=start, edge_lengths=nread)
+
+    start = 1; nread = 1
+    nread(1) = 2
+    start(2) = istart; nread(2) = nxgrid
+
+    call read_data(fileobj, 'tile1_cell', tile1_cell, corner=start, edge_lengths=nread)
+    call read_data(fileobj, 'tile2_cell', tile2_cell, corner=start, edge_lengths=nread)
+
+     do n = 1, nxgrid
+       i1(n) = int(tile1_cell(1,n))
+       j1(n) = int(tile1_cell(2,n))
+       i2(n) = int(tile2_cell(1,n))
+       j2(n) = int(tile2_cell(2,n))
+       area(n) = real( real(area(n),r8_kind)/garea, FMS_MOS_KIND_ )
+    end do
+
+    return
+
+  end subroutine GET_MOSAIC_XGRID_
+  !###############################################################################
+  !> @brief Calculate grid cell area.
+  !> Calculate the grid cell area. The purpose of this routine is to make
+  !! sure the consistency between model grid area and exchange grid area.
+  !> @param lon geographical longitude of grid cell vertices.
+  !> @param lat geographical latitude of grid cell vertices.
+  !> @param[inout] area grid cell area.
+  !> <br>Example usage:
+  !!            call calc_mosaic_grid_area(lon, lat, area)
+  subroutine CALC_MOSAIC_GRID_AREA_(lon, lat, area)
+     real(kind=FMS_MOS_KIND_), dimension(:,:), intent(in)  :: lon
+     real(kind=FMS_MOS_KIND_), dimension(:,:), intent(in)  :: lat
+     real(kind=FMS_MOS_KIND_), dimension(:,:), intent(out) :: area
+     integer :: nlon, nlat
+
+     real(r8_kind) :: area_r8(size(area,1),size(area,2))
+
+     nlon = size(area,1)
+     nlat = size(area,2)
+     ! make sure size of lon, lat and area are consitency
+     if( size(lon,1) .NE. nlon+1 .OR. size(lat,1) .NE. nlon+1 ) &
+        call mpp_error(FATAL, "mosaic_mod: size(lon,1) and size(lat,1) should equal to size(area,1)+1")
+     if( size(lon,2) .NE. nlat+1 .OR. size(lat,2) .NE. nlat+1 ) &
+        call mpp_error(FATAL, "mosaic_mod: size(lon,2) and size(lat,2) should equal to size(area,2)+1")
+
+     ! get_grid_area only accepts double precision data
+     call get_grid_area( nlon, nlat, real(lon,r8_kind), real(lat,r8_kind), area_r8)
+
+     area=real(area_r8,FMS_MOS_KIND_)
+
+  end subroutine CALC_MOSAIC_GRID_AREA_
+  !###############################################################################
+  !> @brief Calculate grid cell area using great cirlce algorithm
+  !> Calculate the grid cell area. The purpose of this routine is to make
+  !! sure the consistency between model grid area and exchange grid area.
+  !> @param lon geographical longitude of grid cell vertices.
+  !> @param lat geographical latitude of grid cell vertices.
+  !> @param[inout] area grid cell area.
+  !> <br>Example usage:
+  !!            call calc_mosaic_grid_great_circle_area(lon, lat, area)
+  subroutine CALC_MOSAIC_GRID_GREAT_CIRCLE_AREA_(lon, lat, area)
+     real(kind=FMS_MOS_KIND_), dimension(:,:), intent(in)    :: lon
+     real(kind=FMS_MOS_KIND_), dimension(:,:), intent(in)    :: lat
+     real(kind=FMS_MOS_KIND_), dimension(:,:), intent(inout) :: area
+     integer :: nlon, nlat
+
+     real(r8_kind) :: area_r8(size(area,1),size(area,2))
+
+     nlon = size(area,1)
+     nlat = size(area,2)
+     ! make sure size of lon, lat and area are consitency
+     if( size(lon,1) .NE. nlon+1 .OR. size(lat,1) .NE. nlon+1 ) &
+        call mpp_error(FATAL, "mosaic_mod: size(lon,1) and size(lat,1) should equal to size(area,1)+1")
+     if( size(lon,2) .NE. nlat+1 .OR. size(lat,2) .NE. nlat+1 ) &
+        call mpp_error(FATAL, "mosaic_mod: size(lon,2) and size(lat,2) should equal to size(area,2)+1")
+
+     ! get_grid_great_circle_area only accepts r8_kind arguments
+     call get_grid_great_circle_area( nlon, nlat, real(lon,r8_kind), real(lat,r8_kind), area_r8)
+
+     area=real(area_r8, FMS_MOS_KIND_)
+
+   end subroutine CALC_MOSAIC_GRID_GREAT_CIRCLE_AREA_
+  !#####################################################################
+  !> This function check if a point (lon1,lat1) is inside a polygon (lon2(:), lat2(:))
+  !! lon1, lat1, lon2, lat2 are in radians.
+  function IS_INSIDE_POLYGON_(lon1, lat1, lon2, lat2 )
+     real(kind=FMS_MOS_KIND_), intent(in) :: lon1, lat1
+     real(kind=FMS_MOS_KIND_), intent(in) :: lon2(:), lat2(:)
+     logical :: IS_INSIDE_POLYGON_
+     integer :: npts, isinside
+     integer :: inside_a_polygon
+
+     npts = size(lon2(:))
+
+     !> inside_a_polygon function only accepts r8_kind real variables
+
+     isinside = inside_a_polygon(real(lon1,r8_kind), real(lat1,r8_kind), npts, real(lon2,r8_kind), real(lat2,r8_kind))
+     if(isinside == 1) then
+        IS_INSIDE_POLYGON_ = .TRUE.
+     else
+        IS_INSIDE_POLYGON_ = .FALSE.
+     endif
+
+     return
+
+  end function IS_INSIDE_POLYGON_
+!> @}
diff --git a/mosaic2/include/mosaic2_r4.fh b/mosaic2/include/mosaic2_r4.fh
new file mode 100644
index 0000000000..996275e9c7
--- /dev/null
+++ b/mosaic2/include/mosaic2_r4.fh
@@ -0,0 +1,40 @@
+!***********************************************************************
+!*                             Apache License 2.0
+!*
+!* This file is part of the GFDL Flexible Modeling System (FMS).
+!*
+!* Licensed under the Apache License, Version 2.0 (the "License");
+!* you may not use this file except in compliance with the License.
+!* You may obtain a copy of the License at
+!*
+!*     http://www.apache.org/licenses/LICENSE-2.0
+!*
+!* FMS is distributed in the hope that it will be useful, but WITHOUT
+!* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied;
+!* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+!* PARTICULAR PURPOSE. See the License for the specific language
+!* governing permissions and limitations under the License.
+!***********************************************************************
+!> @file
+!> @brief include file for mosaic22_mod to generate subroutines/functions for r4_kind arguments
+
+!> @addtogroup mosaic2_mod
+!> @{
+
+#undef  FMS_MOS_KIND_
+#define FMS_MOS_KIND_ r4_kind
+
+#undef  GET_MOSAIC_XGRID_
+#define GET_MOSAIC_XGRID_ get_mosaic_xgrid_r4
+
+#undef  CALC_MOSAIC_GRID_AREA_
+#define CALC_MOSAIC_GRID_AREA_ calc_mosaic_grid_area_r4
+
+#undef  CALC_MOSAIC_GRID_GREAT_CIRCLE_AREA_
+#define CALC_MOSAIC_GRID_GREAT_CIRCLE_AREA_ calc_mosaic_grid_great_circle_area_r4
+
+#undef  IS_INSIDE_POLYGON_
+#define IS_INSIDE_POLYGON_ is_inside_polygon_r4
+
+#include "mosaic2.inc"
+!> @}
diff --git a/mosaic2/include/mosaic2_r8.fh b/mosaic2/include/mosaic2_r8.fh
new file mode 100644
index 0000000000..ab7e2aa86e
--- /dev/null
+++ b/mosaic2/include/mosaic2_r8.fh
@@ -0,0 +1,40 @@
+!***********************************************************************
+!*                             Apache License 2.0
+!*
+!* This file is part of the GFDL Flexible Modeling System (FMS).
+!*
+!* Licensed under the Apache License, Version 2.0 (the "License");
+!* you may not use this file except in compliance with the License.
+!* You may obtain a copy of the License at
+!*
+!*     http://www.apache.org/licenses/LICENSE-2.0
+!*
+!* FMS is distributed in the hope that it will be useful, but WITHOUT
+!* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied;
+!* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+!* PARTICULAR PURPOSE. See the License for the specific language
+!* governing permissions and limitations under the License.
+!***********************************************************************
+!> @file
+!> @brief include file for mosaic2_mod to generate subroutines/functions for r8_kind arguments
+
+!> @addtogroup mosaic2_mod
+!> @{
+
+#undef  FMS_MOS_KIND_
+#define FMS_MOS_KIND_ r8_kind
+
+#undef  GET_MOSAIC_XGRID_
+#define GET_MOSAIC_XGRID_ get_mosaic_xgrid_r8
+
+#undef  CALC_MOSAIC_GRID_AREA_
+#define CALC_MOSAIC_GRID_AREA_ calc_mosaic_grid_area_r8
+
+#undef  CALC_MOSAIC_GRID_GREAT_CIRCLE_AREA_
+#define CALC_MOSAIC_GRID_GREAT_CIRCLE_AREA_ calc_mosaic_grid_great_circle_area_r8
+
+#undef  IS_INSIDE_POLYGON_
+#define IS_INSIDE_POLYGON_ is_inside_polygon_r8
+
+#include "mosaic2.inc"
+!> @}

From b7cb57a50f74225f475f84037bcb9ffd992b2347 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 11 May 2026 15:19:33 -0600
Subject: [PATCH 45/45] Strip out a bunch of netcdf ifdefs

---
 diag_manager/diag_axis.F90               |  2 --
 diag_manager/diag_data.F90               |  6 ------
 diag_manager/diag_manager.F90            |  2 --
 diag_manager/diag_output.F90             |  2 --
 diag_manager/diag_util.F90               |  2 --
 mpp/include/mpp_io_connect.inc           |  2 --
 mpp/include/mpp_io_misc.inc              |  8 --------
 mpp/include/mpp_io_read.inc              |  2 --
 mpp/include/mpp_io_unstructured_read.inc | 12 ------------
 mpp/mpp_io.F90                           |  2 --
 test_fms/mpp_io/test_io_R4_R8.F90        |  2 --
 test_fms/mpp_io/test_io_mosaic_R4_R8.F90 |  2 --
 test_fms/mpp_io/test_mpp_io.F90          |  2 --
 13 files changed, 46 deletions(-)

diff --git a/diag_manager/diag_axis.F90 b/diag_manager/diag_axis.F90
index 96221aee36..7eb5eaf686 100644
--- a/diag_manager/diag_axis.F90
+++ b/diag_manager/diag_axis.F90
@@ -40,9 +40,7 @@ MODULE diag_axis_mod
   USE diag_data_mod, ONLY: diag_axis_type, max_subaxes, max_axes,&
        & max_num_axis_sets, max_axis_attributes, debug_diag_manager,&
        & first_send_data_call, diag_atttype
-#ifdef use_netCDF
   USE netcdf, ONLY: NF90_INT, NF90_FLOAT, NF90_CHAR
-#endif
 
   IMPLICIT NONE
 
diff --git a/diag_manager/diag_data.F90 b/diag_manager/diag_data.F90
index 58a85bce91..ab7d180328 100644
--- a/diag_manager/diag_data.F90
+++ b/diag_manager/diag_data.F90
@@ -53,10 +53,8 @@ MODULE diag_data_mod
   USE fms_mod, ONLY: WARNING, write_version_number
   USE fms_diag_bbox_mod, ONLY: fmsDiagIbounds_type
 
-#ifdef use_netCDF
   ! NF90_FILL_REAL has value of 9.9692099683868690e+36.
   USE netcdf, ONLY: NF_FILL_REAL => NF90_FILL_REAL
-#endif
   use fms2_io_mod
 
   IMPLICIT NONE
@@ -339,13 +337,9 @@ MODULE diag_data_mod
 
   ! <!-- netCDF variable -->
 
-#ifdef use_netCDF
   REAL :: FILL_VALUE = NF_FILL_REAL !< Fill value used.  Value will be <TT>NF90_FILL_REAL</TT> if using the
                                     !! netCDF module, otherwise will be 9.9692099683868690e+36.
                                     ! from file /usr/local/include/netcdf.inc
-#else
-  REAL :: FILL_VALUE = 9.9692099683868690e+36
-#endif
 
   INTEGER :: pack_size = 1 !< 1 for double and 2 for float
 
diff --git a/diag_manager/diag_manager.F90 b/diag_manager/diag_manager.F90
index 335e805d7b..1ad511567a 100644
--- a/diag_manager/diag_manager.F90
+++ b/diag_manager/diag_manager.F90
@@ -242,9 +242,7 @@ MODULE diag_manager_mod
   use netcdf_io_mod, ONLY: filepath_list_type, partitioned_global_files, partitioned_section_files, &
    & append_to_filepath_list
 
-#ifdef use_netCDF
   USE netcdf, ONLY: NF90_INT, NF90_FLOAT, NF90_CHAR
-#endif
 
 !----------
 !ug support
diff --git a/diag_manager/diag_output.F90 b/diag_manager/diag_output.F90
index eef10ef7e4..e140569adb 100644
--- a/diag_manager/diag_output.F90
+++ b/diag_manager/diag_output.F90
@@ -44,9 +44,7 @@ MODULE diag_output_mod
   USE time_manager_mod, ONLY: get_calendar_type, valid_calendar_types
   USE fms_mod, ONLY: error_mesg, write_version_number, fms_error_handler, FATAL, note
 
-#ifdef use_netCDF
   USE netcdf, ONLY: NF90_INT, NF90_FLOAT, NF90_CHAR
-#endif
 
   use mpp_domains_mod, only: mpp_get_UG_io_domain
   use mpp_domains_mod, only: mpp_get_UG_domain_npes
diff --git a/diag_manager/diag_util.F90 b/diag_manager/diag_util.F90
index 1e3c6f7557..41212e632b 100644
--- a/diag_manager/diag_util.F90
+++ b/diag_manager/diag_util.F90
@@ -76,9 +76,7 @@ MODULE diag_util_mod
   USE constants_mod, ONLY: SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE
   USE fms2_io_mod
   USE fms_diag_bbox_mod, ONLY: fmsDiagIbounds_type
-#ifdef use_netCDF
   USE netcdf, ONLY: NF90_CHAR
-#endif
 
   IMPLICIT NONE
   PRIVATE
diff --git a/mpp/include/mpp_io_connect.inc b/mpp/include/mpp_io_connect.inc
index 2d8a2e0cae..0df90ea18e 100644
--- a/mpp/include/mpp_io_connect.inc
+++ b/mpp/include/mpp_io_connect.inc
@@ -828,9 +828,7 @@
       if( mpp_file(unit)%fileset.NE.MPP_MULTI )collect = .FALSE.
       if( mpp_file(unit)%opened) then
          if( mpp_file(unit)%format.EQ.MPP_NETCDF )then
-#ifdef use_netCDF
             error = NF_CLOSE(mpp_file(unit)%ncid); call netcdf_err( error, mpp_file(unit) )
-#endif
          else
             close(unit,status=status)
          end if
diff --git a/mpp/include/mpp_io_misc.inc b/mpp/include/mpp_io_misc.inc
index c949063f85..8f285c9726 100644
--- a/mpp/include/mpp_io_misc.inc
+++ b/mpp/include/mpp_io_misc.inc
@@ -178,10 +178,8 @@
       if( pe.EQ.mpp_root_pe() )then
           iunit = stdlog()  ! PGI compiler does not like stdlog() doing I/O within write call
           write( iunit,'(/a)' )'MPP_IO module '//trim(version)
-#ifdef use_netCDF
           text = NF_INQ_LIBVERS()
           write( iunit,'(/a)' )'Using netCDF library version '//trim(text)
-#endif
       endif
 
       call mpp_io_set_stack_size(131072) ! default initial value
@@ -230,12 +228,10 @@
       do unit = unit_begin,unit_end
          if( mpp_file(unit)%opened )close(unit)
       end do
-#ifdef use_netCDF
 !close all open netCDF units
       do unit = maxunits+1,2*maxunits
          if( mpp_file(unit)%opened )error = NF_CLOSE(mpp_file(unit)%ncid)
       end do
-#endif
 
 !      call mpp_max(mpp_io_stack_hwm)
 
@@ -258,7 +254,6 @@
       character(len=*), optional :: string
       character(len=256) :: errmsg
 
-#ifdef use_netCDF
       if( err.EQ.NF_NOERR )return
       errmsg = NF_STRERROR(err)
       if( PRESENT(file) )errmsg = trim(errmsg)//' File='//file%name
@@ -268,7 +263,6 @@
       if( PRESENT(string) )errmsg = trim(errmsg)//string
       call mpp_io_exit('NOSYNC')        !make sure you close all open files
       call mpp_error( FATAL, 'NETCDF ERROR: '//trim(errmsg) )
-#endif
       return
     end subroutine netcdf_err
 
@@ -284,9 +278,7 @@
          &  'MPP_FLUSH: cannot flush a file during writing of metadata.' )
 
       if( mpp_file(unit)%format.EQ.MPP_NETCDF )then
-#ifdef use_netCDF
           error = NF_SYNC(mpp_file(unit)%ncid); call netcdf_err( error, mpp_file(unit) )
-#endif
       else
           FLUSH(unit)
       end if
diff --git a/mpp/include/mpp_io_read.inc b/mpp/include/mpp_io_read.inc
index de343a4f13..bb9eec6c55 100644
--- a/mpp/include/mpp_io_read.inc
+++ b/mpp/include/mpp_io_read.inc
@@ -1282,7 +1282,6 @@
 
       tavg_info_exists = .false.
 
-#ifdef use_netCDF
       do n= 1, field%natt
          if (field%Att(n)%type .EQ. NF_CHAR) then
              if (field%Att(n)%name(1:13) == 'time_avg_info') then
@@ -1291,7 +1290,6 @@
              endif
          endif
       enddo
-#endif
       if (tavg_info_exists) then
           do n = 1, size(fields(:))
              if (trim(fields(n)%name) == 'average_T1') then
diff --git a/mpp/include/mpp_io_unstructured_read.inc b/mpp/include/mpp_io_unstructured_read.inc
index 7fc113c3aa..534ae1c669 100644
--- a/mpp/include/mpp_io_unstructured_read.inc
+++ b/mpp/include/mpp_io_unstructured_read.inc
@@ -159,7 +159,6 @@ subroutine mpp_io_unstructured_read_r8_1D(funit, &
 
    !If necessary, compute a check-sum of the read-in data.
     if (compute_chksum) then
-#ifdef use_netCDF
         if (field%type .eq. NF_INT) then
             if (field%fill .eq. MPP_FILL_DOUBLE .or. field%fill .eq. &
                 real(MPP_FILL_INT)) then
@@ -180,7 +179,6 @@ subroutine mpp_io_unstructured_read_r8_1D(funit, &
         else
             chk = mpp_chksum(fdata, mask_val=real(field%fill,KIND(fdata)))
         endif
-#endif
        !Print out the computed check-sum for the field.  This feature is
        !currently turned off.  Uncomment the following lines to turn it
        !back on.
@@ -341,7 +339,6 @@ subroutine mpp_io_unstructured_read_r8_2D(funit, &
 
    !If necessary, compute a check-sum of the read-in data.
     if (compute_chksum) then
-#ifdef use_netCDF
         if (field%type .eq. NF_INT) then
             if (field%fill .eq. MPP_FILL_DOUBLE .or. field%fill .eq. &
                 real(MPP_FILL_INT)) then
@@ -362,7 +359,6 @@ subroutine mpp_io_unstructured_read_r8_2D(funit, &
         else
             chk = mpp_chksum(fdata, mask_val=real(field%fill,KIND(fdata)))
         endif
-#endif
        !Print out the computed check-sum for the field.  This feature is
        !currently turned off.  Uncomment the following lines to turn it
        !back on.
@@ -523,7 +519,6 @@ subroutine mpp_io_unstructured_read_r8_3D(funit, &
 
    !If necessary, compute a check-sum of the read-in data.
     if (compute_chksum) then
-#ifdef use_netCDF
         if (field%type .eq. NF_INT) then
             if (field%fill .eq. MPP_FILL_DOUBLE .or. field%fill .eq. &
                 real(MPP_FILL_INT)) then
@@ -544,7 +539,6 @@ subroutine mpp_io_unstructured_read_r8_3D(funit, &
         else
             chk = mpp_chksum(fdata, mask_val=real(field%fill,KIND(fdata)))
         endif
-#endif
        !Print out the computed check-sum for the field.  This feature is
        !currently turned off.  Uncomment the following lines to turn it
        !back on.
@@ -709,7 +703,6 @@ subroutine mpp_io_unstructured_read_r4_1D(funit, &
 
    !If necessary, compute a check-sum of the read-in data.
     if (compute_chksum) then
-#ifdef use_netCDF
         if (field%type .eq. NF_INT) then
             if (field%fill .eq. MPP_FILL_DOUBLE .or. field%fill .eq. &
                 real(MPP_FILL_INT)) then
@@ -730,7 +723,6 @@ subroutine mpp_io_unstructured_read_r4_1D(funit, &
         else
             chk = mpp_chksum(fdata, mask_val=real(field%fill,KIND(fdata)))
         endif
-#endif
        !Print out the computed check-sum for the field.  This feature is
        !currently turned off.  Uncomment the following lines to turn it
        !back on.
@@ -891,7 +883,6 @@ subroutine mpp_io_unstructured_read_r4_2D(funit, &
 
    !If necessary, compute a check-sum of the read-in data.
     if (compute_chksum) then
-#ifdef use_netCDF
         if (field%type .eq. NF_INT) then
             if (field%fill .eq. MPP_FILL_DOUBLE .or. field%fill .eq. &
                 real(MPP_FILL_INT)) then
@@ -912,7 +903,6 @@ subroutine mpp_io_unstructured_read_r4_2D(funit, &
         else
             chk = mpp_chksum(fdata, mask_val=real(field%fill,KIND(fdata)))
         endif
-#endif
        !Print out the computed check-sum for the field.  This feature is
        !currently turned off.  Uncomment the following lines to turn it
        !back on.
@@ -1073,7 +1063,6 @@ subroutine mpp_io_unstructured_read_r4_3D(funit, &
 
    !If necessary, compute a check-sum of the read-in data.
     if (compute_chksum) then
-#ifdef use_netCDF
         if (field%type .eq. NF_INT) then
             if (field%fill .eq. MPP_FILL_DOUBLE .or. field%fill .eq. &
                 real(MPP_FILL_INT)) then
@@ -1094,7 +1083,6 @@ subroutine mpp_io_unstructured_read_r4_3D(funit, &
         else
             chk = mpp_chksum(fdata, mask_val=real(field%fill,KIND(fdata)))
         endif
-#endif
        !Print out the computed check-sum for the field.  This feature is
        !currently turned off.  Uncomment the following lines to turn it
        !back on.
diff --git a/mpp/mpp_io.F90 b/mpp/mpp_io.F90
index 4a8fc1bb0b..a92b926443 100644
--- a/mpp/mpp_io.F90
+++ b/mpp/mpp_io.F90
@@ -312,12 +312,10 @@ module mpp_io_mod
 
 #define _MAX_FILE_UNITS 1024
 
-#ifdef use_netCDF
 use netcdf
 use netcdf_nf_data
 use netcdf_nf_interfaces
 use netcdf4_nf_interfaces
-#endif
 
 use mpp_parameter_mod,  only : MPP_WRONLY, MPP_RDONLY, MPP_APPEND, MPP_OVERWR, MPP_ASCII
 use mpp_parameter_mod,  only : MPP_IEEE32, MPP_NATIVE, MPP_NETCDF, MPP_SEQUENTIAL
diff --git a/test_fms/mpp_io/test_io_R4_R8.F90 b/test_fms/mpp_io/test_io_R4_R8.F90
index 49c17e0b4d..c27ca1b025 100644
--- a/test_fms/mpp_io/test_io_R4_R8.F90
+++ b/test_fms/mpp_io/test_io_R4_R8.F90
@@ -41,9 +41,7 @@ program test_io_R4_R8
 
   implicit none
 
-#ifdef use_netCDF
 #include <netcdf.inc>
-#endif
 
   !--- namelist definition
   integer           :: nx=360, ny=200, nz=50, nt=2
diff --git a/test_fms/mpp_io/test_io_mosaic_R4_R8.F90 b/test_fms/mpp_io/test_io_mosaic_R4_R8.F90
index 8360bd2523..083cee8133 100644
--- a/test_fms/mpp_io/test_io_mosaic_R4_R8.F90
+++ b/test_fms/mpp_io/test_io_mosaic_R4_R8.F90
@@ -44,9 +44,7 @@ program test_io_mosaic_R4_R8
 
   implicit none
 
-#ifdef use_netCDF
 #include <netcdf.inc>
-#endif
 
   !--- namelist definition
   integer           :: nx=360, ny=200, nz=50, nt=2
diff --git a/test_fms/mpp_io/test_mpp_io.F90 b/test_fms/mpp_io/test_mpp_io.F90
index 907d45600b..d8ae2883f7 100644
--- a/test_fms/mpp_io/test_mpp_io.F90
+++ b/test_fms/mpp_io/test_mpp_io.F90
@@ -35,12 +35,10 @@ program test
   use mpp_mod,         only : input_nml_file
   use fms_mod,         only : check_nml_error
 
-#ifdef use_netCDF
   use netcdf
   use netcdf_nf_data
   use netcdf_nf_interfaces
   use netcdf4_nf_interfaces
-#endif
 
   implicit none